1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*--------------------------------------------------------------------*/
4 /*--- begin guest_arm64_toIR.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 /* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
42 FMULX is treated the same as FMUL. That's also not correct.
43
44 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
48
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
51
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
54 */
55
56 /* "Special" instructions.
57
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
62
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
66
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
69
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
74
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
79 */
80
81 /* Translates ARM64 code to IR. */
82
83 #include "libvex_basictypes.h"
84 #include "libvex_ir.h"
85 #include "libvex.h"
86 #include "libvex_guest_arm64.h"
87
88 #include "main_util.h"
89 #include "main_globals.h"
90 #include "guest_generic_bb_to_IR.h"
91 #include "guest_arm64_defs.h"
92
93
94 /*------------------------------------------------------------*/
95 /*--- Globals ---*/
96 /*------------------------------------------------------------*/
97
98 /* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
101 */
102
103 /* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
105 correctly. */
106 static VexEndness host_endness;
107
108 /* CONST: The guest address for the instruction currently being
109 translated. */
110 static Addr64 guest_PC_curr_instr;
111
112 /* MOD: The IRSB* into which we're generating code. */
113 static IRSB* irsb;
114
115
116 /*------------------------------------------------------------*/
117 /*--- Debugging output ---*/
118 /*------------------------------------------------------------*/
119
120 #define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
123
124 #define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
127
128
129 /*------------------------------------------------------------*/
130 /*--- Helper bits and pieces for deconstructing the ---*/
131 /*--- arm insn stream. ---*/
132 /*------------------------------------------------------------*/
133
134 /* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
getUIntLittleEndianly(const UChar * p)136 static inline UInt getUIntLittleEndianly ( const UChar* p )
137 {
138 UInt w = 0;
139 w = (w << 8) | p[3];
140 w = (w << 8) | p[2];
141 w = (w << 8) | p[1];
142 w = (w << 8) | p[0];
143 return w;
144 }
145
146 /* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
sx_to_64(ULong x,UInt n)148 static ULong sx_to_64 ( ULong x, UInt n )
149 {
150 vassert(n > 1 && n < 64);
151 Long r = (Long)x;
152 r = (r << (64-n)) >> (64-n);
153 return (ULong)r;
154 }
155
156 //ZZ /* Do a little-endian load of a 16-bit word, regardless of the
157 //ZZ endianness of the underlying host. */
158 //ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
159 //ZZ {
160 //ZZ UShort w = 0;
161 //ZZ w = (w << 8) | p[1];
162 //ZZ w = (w << 8) | p[0];
163 //ZZ return w;
164 //ZZ }
165 //ZZ
166 //ZZ static UInt ROR32 ( UInt x, UInt sh ) {
167 //ZZ vassert(sh >= 0 && sh < 32);
168 //ZZ if (sh == 0)
169 //ZZ return x;
170 //ZZ else
171 //ZZ return (x << (32-sh)) | (x >> sh);
172 //ZZ }
173 //ZZ
174 //ZZ static Int popcount32 ( UInt x )
175 //ZZ {
176 //ZZ Int res = 0, i;
177 //ZZ for (i = 0; i < 32; i++) {
178 //ZZ res += (x & 1);
179 //ZZ x >>= 1;
180 //ZZ }
181 //ZZ return res;
182 //ZZ }
183 //ZZ
184 //ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
185 //ZZ {
186 //ZZ UInt mask = 1 << ix;
187 //ZZ x &= ~mask;
188 //ZZ x |= ((b << ix) & mask);
189 //ZZ return x;
190 //ZZ }
191
192 #define BITS2(_b1,_b0) \
193 (((_b1) << 1) | (_b0))
194
195 #define BITS3(_b2,_b1,_b0) \
196 (((_b2) << 2) | ((_b1) << 1) | (_b0))
197
198 #define BITS4(_b3,_b2,_b1,_b0) \
199 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
200
201 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
202 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
203 | BITS4((_b3),(_b2),(_b1),(_b0)))
204
205 #define BITS5(_b4,_b3,_b2,_b1,_b0) \
206 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
207 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
208 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
209 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
210 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211
212 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
213 (((_b8) << 8) \
214 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
215
216 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
217 (((_b9) << 9) | ((_b8) << 8) \
218 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
219
220 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 (((_b10) << 10) \
222 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
223
224 #define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
225 (((_b11) << 11) \
226 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
227
228 #define X00 BITS2(0,0)
229 #define X01 BITS2(0,1)
230 #define X10 BITS2(1,0)
231 #define X11 BITS2(1,1)
232
233 // produces _uint[_bMax:_bMin]
234 #define SLICE_UInt(_uint,_bMax,_bMin) \
235 (( ((UInt)(_uint)) >> (_bMin)) \
236 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
237
238
239 /*------------------------------------------------------------*/
240 /*--- Helper bits and pieces for creating IR fragments. ---*/
241 /*------------------------------------------------------------*/
242
mkV128(UShort w)243 static IRExpr* mkV128 ( UShort w )
244 {
245 return IRExpr_Const(IRConst_V128(w));
246 }
247
mkU64(ULong i)248 static IRExpr* mkU64 ( ULong i )
249 {
250 return IRExpr_Const(IRConst_U64(i));
251 }
252
mkU32(UInt i)253 static IRExpr* mkU32 ( UInt i )
254 {
255 return IRExpr_Const(IRConst_U32(i));
256 }
257
mkU16(UInt i)258 static IRExpr* mkU16 ( UInt i )
259 {
260 vassert(i < 65536);
261 return IRExpr_Const(IRConst_U16(i));
262 }
263
mkU8(UInt i)264 static IRExpr* mkU8 ( UInt i )
265 {
266 vassert(i < 256);
267 return IRExpr_Const(IRConst_U8( (UChar)i ));
268 }
269
mkexpr(IRTemp tmp)270 static IRExpr* mkexpr ( IRTemp tmp )
271 {
272 return IRExpr_RdTmp(tmp);
273 }
274
unop(IROp op,IRExpr * a)275 static IRExpr* unop ( IROp op, IRExpr* a )
276 {
277 return IRExpr_Unop(op, a);
278 }
279
binop(IROp op,IRExpr * a1,IRExpr * a2)280 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
281 {
282 return IRExpr_Binop(op, a1, a2);
283 }
284
triop(IROp op,IRExpr * a1,IRExpr * a2,IRExpr * a3)285 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
286 {
287 return IRExpr_Triop(op, a1, a2, a3);
288 }
289
loadLE(IRType ty,IRExpr * addr)290 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
291 {
292 return IRExpr_Load(Iend_LE, ty, addr);
293 }
294
295 /* Add a statement to the list held by "irbb". */
stmt(IRStmt * st)296 static void stmt ( IRStmt* st )
297 {
298 addStmtToIRSB( irsb, st );
299 }
300
assign(IRTemp dst,IRExpr * e)301 static void assign ( IRTemp dst, IRExpr* e )
302 {
303 stmt( IRStmt_WrTmp(dst, e) );
304 }
305
storeLE(IRExpr * addr,IRExpr * data)306 static void storeLE ( IRExpr* addr, IRExpr* data )
307 {
308 stmt( IRStmt_Store(Iend_LE, addr, data) );
309 }
310
311 //ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
312 //ZZ {
313 //ZZ if (guardT == IRTemp_INVALID) {
314 //ZZ /* unconditional */
315 //ZZ storeLE(addr, data);
316 //ZZ } else {
317 //ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
318 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
319 //ZZ }
320 //ZZ }
321 //ZZ
322 //ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
323 //ZZ IRExpr* addr, IRExpr* alt,
324 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
325 //ZZ {
326 //ZZ if (guardT == IRTemp_INVALID) {
327 //ZZ /* unconditional */
328 //ZZ IRExpr* loaded = NULL;
329 //ZZ switch (cvt) {
330 //ZZ case ILGop_Ident32:
331 //ZZ loaded = loadLE(Ity_I32, addr); break;
332 //ZZ case ILGop_8Uto32:
333 //ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
334 //ZZ case ILGop_8Sto32:
335 //ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
336 //ZZ case ILGop_16Uto32:
337 //ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
338 //ZZ case ILGop_16Sto32:
339 //ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
340 //ZZ default:
341 //ZZ vassert(0);
342 //ZZ }
343 //ZZ vassert(loaded != NULL);
344 //ZZ assign(dst, loaded);
345 //ZZ } else {
346 //ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
347 //ZZ loaded data before putting the data in 'dst'. If the load
348 //ZZ does not take place, 'alt' is placed directly in 'dst'. */
349 //ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
350 //ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
351 //ZZ }
352 //ZZ }
353
354 /* Generate a new temporary of the given type. */
newTemp(IRType ty)355 static IRTemp newTemp ( IRType ty )
356 {
357 vassert(isPlausibleIRType(ty));
358 return newIRTemp( irsb->tyenv, ty );
359 }
360
361 /* This is used in many places, so the brevity is an advantage. */
newTempV128(void)362 static IRTemp newTempV128(void)
363 {
364 return newTemp(Ity_V128);
365 }
366
367 /* Initialise V128 temporaries en masse. */
368 static
newTempsV128_2(IRTemp * t1,IRTemp * t2)369 void newTempsV128_2(IRTemp* t1, IRTemp* t2)
370 {
371 vassert(t1 && *t1 == IRTemp_INVALID);
372 vassert(t2 && *t2 == IRTemp_INVALID);
373 *t1 = newTempV128();
374 *t2 = newTempV128();
375 }
376
377 static
newTempsV128_3(IRTemp * t1,IRTemp * t2,IRTemp * t3)378 void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
379 {
380 vassert(t1 && *t1 == IRTemp_INVALID);
381 vassert(t2 && *t2 == IRTemp_INVALID);
382 vassert(t3 && *t3 == IRTemp_INVALID);
383 *t1 = newTempV128();
384 *t2 = newTempV128();
385 *t3 = newTempV128();
386 }
387
388 static
newTempsV128_4(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4)389 void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
390 {
391 vassert(t1 && *t1 == IRTemp_INVALID);
392 vassert(t2 && *t2 == IRTemp_INVALID);
393 vassert(t3 && *t3 == IRTemp_INVALID);
394 vassert(t4 && *t4 == IRTemp_INVALID);
395 *t1 = newTempV128();
396 *t2 = newTempV128();
397 *t3 = newTempV128();
398 *t4 = newTempV128();
399 }
400
401 static
newTempsV128_7(IRTemp * t1,IRTemp * t2,IRTemp * t3,IRTemp * t4,IRTemp * t5,IRTemp * t6,IRTemp * t7)402 void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
403 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
404 {
405 vassert(t1 && *t1 == IRTemp_INVALID);
406 vassert(t2 && *t2 == IRTemp_INVALID);
407 vassert(t3 && *t3 == IRTemp_INVALID);
408 vassert(t4 && *t4 == IRTemp_INVALID);
409 vassert(t5 && *t5 == IRTemp_INVALID);
410 vassert(t6 && *t6 == IRTemp_INVALID);
411 vassert(t7 && *t7 == IRTemp_INVALID);
412 *t1 = newTempV128();
413 *t2 = newTempV128();
414 *t3 = newTempV128();
415 *t4 = newTempV128();
416 *t5 = newTempV128();
417 *t6 = newTempV128();
418 *t7 = newTempV128();
419 }
420
421 //ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
422 //ZZ IRRoundingMode. */
423 //ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
424 //ZZ {
425 //ZZ return mkU32(Irrm_NEAREST);
426 //ZZ }
427 //ZZ
428 //ZZ /* Generate an expression for SRC rotated right by ROT. */
429 //ZZ static IRExpr* genROR32( IRTemp src, Int rot )
430 //ZZ {
431 //ZZ vassert(rot >= 0 && rot < 32);
432 //ZZ if (rot == 0)
433 //ZZ return mkexpr(src);
434 //ZZ return
435 //ZZ binop(Iop_Or32,
436 //ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
437 //ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
438 //ZZ }
439 //ZZ
440 //ZZ static IRExpr* mkU128 ( ULong i )
441 //ZZ {
442 //ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
443 //ZZ }
444 //ZZ
445 //ZZ /* Generate a 4-aligned version of the given expression if
446 //ZZ the given condition is true. Else return it unchanged. */
447 //ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
448 //ZZ {
449 //ZZ if (b)
450 //ZZ return binop(Iop_And32, e, mkU32(~3));
451 //ZZ else
452 //ZZ return e;
453 //ZZ }
454
455 /* Other IR construction helpers. */
mkAND(IRType ty)456 static IROp mkAND ( IRType ty ) {
457 switch (ty) {
458 case Ity_I32: return Iop_And32;
459 case Ity_I64: return Iop_And64;
460 default: vpanic("mkAND");
461 }
462 }
463
mkOR(IRType ty)464 static IROp mkOR ( IRType ty ) {
465 switch (ty) {
466 case Ity_I32: return Iop_Or32;
467 case Ity_I64: return Iop_Or64;
468 default: vpanic("mkOR");
469 }
470 }
471
mkXOR(IRType ty)472 static IROp mkXOR ( IRType ty ) {
473 switch (ty) {
474 case Ity_I32: return Iop_Xor32;
475 case Ity_I64: return Iop_Xor64;
476 default: vpanic("mkXOR");
477 }
478 }
479
mkSHL(IRType ty)480 static IROp mkSHL ( IRType ty ) {
481 switch (ty) {
482 case Ity_I32: return Iop_Shl32;
483 case Ity_I64: return Iop_Shl64;
484 default: vpanic("mkSHL");
485 }
486 }
487
mkSHR(IRType ty)488 static IROp mkSHR ( IRType ty ) {
489 switch (ty) {
490 case Ity_I32: return Iop_Shr32;
491 case Ity_I64: return Iop_Shr64;
492 default: vpanic("mkSHR");
493 }
494 }
495
mkSAR(IRType ty)496 static IROp mkSAR ( IRType ty ) {
497 switch (ty) {
498 case Ity_I32: return Iop_Sar32;
499 case Ity_I64: return Iop_Sar64;
500 default: vpanic("mkSAR");
501 }
502 }
503
mkNOT(IRType ty)504 static IROp mkNOT ( IRType ty ) {
505 switch (ty) {
506 case Ity_I32: return Iop_Not32;
507 case Ity_I64: return Iop_Not64;
508 default: vpanic("mkNOT");
509 }
510 }
511
mkADD(IRType ty)512 static IROp mkADD ( IRType ty ) {
513 switch (ty) {
514 case Ity_I32: return Iop_Add32;
515 case Ity_I64: return Iop_Add64;
516 default: vpanic("mkADD");
517 }
518 }
519
mkSUB(IRType ty)520 static IROp mkSUB ( IRType ty ) {
521 switch (ty) {
522 case Ity_I32: return Iop_Sub32;
523 case Ity_I64: return Iop_Sub64;
524 default: vpanic("mkSUB");
525 }
526 }
527
mkADDF(IRType ty)528 static IROp mkADDF ( IRType ty ) {
529 switch (ty) {
530 case Ity_F32: return Iop_AddF32;
531 case Ity_F64: return Iop_AddF64;
532 default: vpanic("mkADDF");
533 }
534 }
535
mkSUBF(IRType ty)536 static IROp mkSUBF ( IRType ty ) {
537 switch (ty) {
538 case Ity_F32: return Iop_SubF32;
539 case Ity_F64: return Iop_SubF64;
540 default: vpanic("mkSUBF");
541 }
542 }
543
mkMULF(IRType ty)544 static IROp mkMULF ( IRType ty ) {
545 switch (ty) {
546 case Ity_F32: return Iop_MulF32;
547 case Ity_F64: return Iop_MulF64;
548 default: vpanic("mkMULF");
549 }
550 }
551
mkDIVF(IRType ty)552 static IROp mkDIVF ( IRType ty ) {
553 switch (ty) {
554 case Ity_F32: return Iop_DivF32;
555 case Ity_F64: return Iop_DivF64;
556 default: vpanic("mkMULF");
557 }
558 }
559
mkNEGF(IRType ty)560 static IROp mkNEGF ( IRType ty ) {
561 switch (ty) {
562 case Ity_F32: return Iop_NegF32;
563 case Ity_F64: return Iop_NegF64;
564 default: vpanic("mkNEGF");
565 }
566 }
567
mkABSF(IRType ty)568 static IROp mkABSF ( IRType ty ) {
569 switch (ty) {
570 case Ity_F32: return Iop_AbsF32;
571 case Ity_F64: return Iop_AbsF64;
572 default: vpanic("mkNEGF");
573 }
574 }
575
mkSQRTF(IRType ty)576 static IROp mkSQRTF ( IRType ty ) {
577 switch (ty) {
578 case Ity_F32: return Iop_SqrtF32;
579 case Ity_F64: return Iop_SqrtF64;
580 default: vpanic("mkNEGF");
581 }
582 }
583
mkVecADD(UInt size)584 static IROp mkVecADD ( UInt size ) {
585 const IROp ops[4]
586 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
587 vassert(size < 4);
588 return ops[size];
589 }
590
mkVecQADDU(UInt size)591 static IROp mkVecQADDU ( UInt size ) {
592 const IROp ops[4]
593 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
594 vassert(size < 4);
595 return ops[size];
596 }
597
mkVecQADDS(UInt size)598 static IROp mkVecQADDS ( UInt size ) {
599 const IROp ops[4]
600 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
601 vassert(size < 4);
602 return ops[size];
603 }
604
mkVecQADDEXTSUSATUU(UInt size)605 static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
606 const IROp ops[4]
607 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
608 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
609 vassert(size < 4);
610 return ops[size];
611 }
612
mkVecQADDEXTUSSATSS(UInt size)613 static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
614 const IROp ops[4]
615 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
616 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
617 vassert(size < 4);
618 return ops[size];
619 }
620
mkVecSUB(UInt size)621 static IROp mkVecSUB ( UInt size ) {
622 const IROp ops[4]
623 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
624 vassert(size < 4);
625 return ops[size];
626 }
627
mkVecQSUBU(UInt size)628 static IROp mkVecQSUBU ( UInt size ) {
629 const IROp ops[4]
630 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
631 vassert(size < 4);
632 return ops[size];
633 }
634
mkVecQSUBS(UInt size)635 static IROp mkVecQSUBS ( UInt size ) {
636 const IROp ops[4]
637 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
638 vassert(size < 4);
639 return ops[size];
640 }
641
mkVecSARN(UInt size)642 static IROp mkVecSARN ( UInt size ) {
643 const IROp ops[4]
644 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
645 vassert(size < 4);
646 return ops[size];
647 }
648
mkVecSHRN(UInt size)649 static IROp mkVecSHRN ( UInt size ) {
650 const IROp ops[4]
651 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
652 vassert(size < 4);
653 return ops[size];
654 }
655
mkVecSHLN(UInt size)656 static IROp mkVecSHLN ( UInt size ) {
657 const IROp ops[4]
658 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
659 vassert(size < 4);
660 return ops[size];
661 }
662
mkVecCATEVENLANES(UInt size)663 static IROp mkVecCATEVENLANES ( UInt size ) {
664 const IROp ops[4]
665 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
666 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
667 vassert(size < 4);
668 return ops[size];
669 }
670
mkVecCATODDLANES(UInt size)671 static IROp mkVecCATODDLANES ( UInt size ) {
672 const IROp ops[4]
673 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
674 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
675 vassert(size < 4);
676 return ops[size];
677 }
678
mkVecINTERLEAVELO(UInt size)679 static IROp mkVecINTERLEAVELO ( UInt size ) {
680 const IROp ops[4]
681 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
682 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
683 vassert(size < 4);
684 return ops[size];
685 }
686
mkVecINTERLEAVEHI(UInt size)687 static IROp mkVecINTERLEAVEHI ( UInt size ) {
688 const IROp ops[4]
689 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
690 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
691 vassert(size < 4);
692 return ops[size];
693 }
694
mkVecMAXU(UInt size)695 static IROp mkVecMAXU ( UInt size ) {
696 const IROp ops[4]
697 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
698 vassert(size < 4);
699 return ops[size];
700 }
701
mkVecMAXS(UInt size)702 static IROp mkVecMAXS ( UInt size ) {
703 const IROp ops[4]
704 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
705 vassert(size < 4);
706 return ops[size];
707 }
708
mkVecMINU(UInt size)709 static IROp mkVecMINU ( UInt size ) {
710 const IROp ops[4]
711 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
712 vassert(size < 4);
713 return ops[size];
714 }
715
mkVecMINS(UInt size)716 static IROp mkVecMINS ( UInt size ) {
717 const IROp ops[4]
718 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
719 vassert(size < 4);
720 return ops[size];
721 }
722
mkVecMUL(UInt size)723 static IROp mkVecMUL ( UInt size ) {
724 const IROp ops[4]
725 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
726 vassert(size < 3);
727 return ops[size];
728 }
729
mkVecMULLU(UInt sizeNarrow)730 static IROp mkVecMULLU ( UInt sizeNarrow ) {
731 const IROp ops[4]
732 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
733 vassert(sizeNarrow < 3);
734 return ops[sizeNarrow];
735 }
736
mkVecMULLS(UInt sizeNarrow)737 static IROp mkVecMULLS ( UInt sizeNarrow ) {
738 const IROp ops[4]
739 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
740 vassert(sizeNarrow < 3);
741 return ops[sizeNarrow];
742 }
743
mkVecQDMULLS(UInt sizeNarrow)744 static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
745 const IROp ops[4]
746 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
747 vassert(sizeNarrow < 3);
748 return ops[sizeNarrow];
749 }
750
mkVecCMPEQ(UInt size)751 static IROp mkVecCMPEQ ( UInt size ) {
752 const IROp ops[4]
753 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
754 vassert(size < 4);
755 return ops[size];
756 }
757
mkVecCMPGTU(UInt size)758 static IROp mkVecCMPGTU ( UInt size ) {
759 const IROp ops[4]
760 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
761 vassert(size < 4);
762 return ops[size];
763 }
764
mkVecCMPGTS(UInt size)765 static IROp mkVecCMPGTS ( UInt size ) {
766 const IROp ops[4]
767 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
768 vassert(size < 4);
769 return ops[size];
770 }
771
mkVecABS(UInt size)772 static IROp mkVecABS ( UInt size ) {
773 const IROp ops[4]
774 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
775 vassert(size < 4);
776 return ops[size];
777 }
778
mkVecZEROHIxxOFV128(UInt size)779 static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
780 const IROp ops[4]
781 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
782 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
783 vassert(size < 4);
784 return ops[size];
785 }
786
mkU(IRType ty,ULong imm)787 static IRExpr* mkU ( IRType ty, ULong imm ) {
788 switch (ty) {
789 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
790 case Ity_I64: return mkU64(imm);
791 default: vpanic("mkU");
792 }
793 }
794
mkVecQDMULHIS(UInt size)795 static IROp mkVecQDMULHIS ( UInt size ) {
796 const IROp ops[4]
797 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
798 vassert(size < 4);
799 return ops[size];
800 }
801
mkVecQRDMULHIS(UInt size)802 static IROp mkVecQRDMULHIS ( UInt size ) {
803 const IROp ops[4]
804 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
805 vassert(size < 4);
806 return ops[size];
807 }
808
mkVecQANDUQSH(UInt size)809 static IROp mkVecQANDUQSH ( UInt size ) {
810 const IROp ops[4]
811 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
812 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
813 vassert(size < 4);
814 return ops[size];
815 }
816
mkVecQANDSQSH(UInt size)817 static IROp mkVecQANDSQSH ( UInt size ) {
818 const IROp ops[4]
819 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
820 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
821 vassert(size < 4);
822 return ops[size];
823 }
824
mkVecQANDUQRSH(UInt size)825 static IROp mkVecQANDUQRSH ( UInt size ) {
826 const IROp ops[4]
827 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
828 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
829 vassert(size < 4);
830 return ops[size];
831 }
832
mkVecQANDSQRSH(UInt size)833 static IROp mkVecQANDSQRSH ( UInt size ) {
834 const IROp ops[4]
835 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
836 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
837 vassert(size < 4);
838 return ops[size];
839 }
840
mkVecSHU(UInt size)841 static IROp mkVecSHU ( UInt size ) {
842 const IROp ops[4]
843 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
844 vassert(size < 4);
845 return ops[size];
846 }
847
mkVecSHS(UInt size)848 static IROp mkVecSHS ( UInt size ) {
849 const IROp ops[4]
850 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
851 vassert(size < 4);
852 return ops[size];
853 }
854
mkVecRSHU(UInt size)855 static IROp mkVecRSHU ( UInt size ) {
856 const IROp ops[4]
857 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
858 vassert(size < 4);
859 return ops[size];
860 }
861
mkVecRSHS(UInt size)862 static IROp mkVecRSHS ( UInt size ) {
863 const IROp ops[4]
864 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
865 vassert(size < 4);
866 return ops[size];
867 }
868
mkVecNARROWUN(UInt sizeNarrow)869 static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
870 const IROp ops[4]
871 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
872 Iop_NarrowUn64to32x2, Iop_INVALID };
873 vassert(sizeNarrow < 4);
874 return ops[sizeNarrow];
875 }
876
mkVecQNARROWUNSU(UInt sizeNarrow)877 static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
878 const IROp ops[4]
879 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
880 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
881 vassert(sizeNarrow < 4);
882 return ops[sizeNarrow];
883 }
884
mkVecQNARROWUNSS(UInt sizeNarrow)885 static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
886 const IROp ops[4]
887 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
888 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
889 vassert(sizeNarrow < 4);
890 return ops[sizeNarrow];
891 }
892
mkVecQNARROWUNUU(UInt sizeNarrow)893 static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
894 const IROp ops[4]
895 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
896 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
897 vassert(sizeNarrow < 4);
898 return ops[sizeNarrow];
899 }
900
mkVecQANDqshrNNARROWUU(UInt sizeNarrow)901 static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
902 const IROp ops[4]
903 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
904 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
905 vassert(sizeNarrow < 4);
906 return ops[sizeNarrow];
907 }
908
mkVecQANDqsarNNARROWSS(UInt sizeNarrow)909 static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
910 const IROp ops[4]
911 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
912 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
913 vassert(sizeNarrow < 4);
914 return ops[sizeNarrow];
915 }
916
mkVecQANDqsarNNARROWSU(UInt sizeNarrow)917 static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
918 const IROp ops[4]
919 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
920 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
921 vassert(sizeNarrow < 4);
922 return ops[sizeNarrow];
923 }
924
mkVecQANDqrshrNNARROWUU(UInt sizeNarrow)925 static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
926 const IROp ops[4]
927 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
928 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
929 vassert(sizeNarrow < 4);
930 return ops[sizeNarrow];
931 }
932
mkVecQANDqrsarNNARROWSS(UInt sizeNarrow)933 static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
934 const IROp ops[4]
935 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
936 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
937 vassert(sizeNarrow < 4);
938 return ops[sizeNarrow];
939 }
940
mkVecQANDqrsarNNARROWSU(UInt sizeNarrow)941 static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
942 const IROp ops[4]
943 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
944 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
945 vassert(sizeNarrow < 4);
946 return ops[sizeNarrow];
947 }
948
mkVecQSHLNSATUU(UInt size)949 static IROp mkVecQSHLNSATUU ( UInt size ) {
950 const IROp ops[4]
951 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
952 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
953 vassert(size < 4);
954 return ops[size];
955 }
956
mkVecQSHLNSATSS(UInt size)957 static IROp mkVecQSHLNSATSS ( UInt size ) {
958 const IROp ops[4]
959 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
960 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
961 vassert(size < 4);
962 return ops[size];
963 }
964
mkVecQSHLNSATSU(UInt size)965 static IROp mkVecQSHLNSATSU ( UInt size ) {
966 const IROp ops[4]
967 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
968 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
969 vassert(size < 4);
970 return ops[size];
971 }
972
mkVecADDF(UInt size)973 static IROp mkVecADDF ( UInt size ) {
974 const IROp ops[4]
975 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
976 vassert(size < 4);
977 return ops[size];
978 }
979
mkVecMAXF(UInt size)980 static IROp mkVecMAXF ( UInt size ) {
981 const IROp ops[4]
982 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
983 vassert(size < 4);
984 return ops[size];
985 }
986
mkVecMINF(UInt size)987 static IROp mkVecMINF ( UInt size ) {
988 const IROp ops[4]
989 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
990 vassert(size < 4);
991 return ops[size];
992 }
993
994 /* Generate IR to create 'arg rotated right by imm', for sane values
995 of 'ty' and 'imm'. */
mathROR(IRType ty,IRTemp arg,UInt imm)996 static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
997 {
998 UInt w = 0;
999 if (ty == Ity_I64) {
1000 w = 64;
1001 } else {
1002 vassert(ty == Ity_I32);
1003 w = 32;
1004 }
1005 vassert(w != 0);
1006 vassert(imm < w);
1007 if (imm == 0) {
1008 return arg;
1009 }
1010 IRTemp res = newTemp(ty);
1011 assign(res, binop(mkOR(ty),
1012 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1013 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1014 return res;
1015 }
1016
1017 /* Generate IR to set the returned temp to either all-zeroes or
1018 all ones, as a copy of arg<imm>. */
mathREPLICATE(IRType ty,IRTemp arg,UInt imm)1019 static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1020 {
1021 UInt w = 0;
1022 if (ty == Ity_I64) {
1023 w = 64;
1024 } else {
1025 vassert(ty == Ity_I32);
1026 w = 32;
1027 }
1028 vassert(w != 0);
1029 vassert(imm < w);
1030 IRTemp res = newTemp(ty);
1031 assign(res, binop(mkSAR(ty),
1032 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1033 mkU8(w - 1)));
1034 return res;
1035 }
1036
1037 /* U-widen 8/16/32/64 bit int expr to 64. */
widenUto64(IRType srcTy,IRExpr * e)1038 static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1039 {
1040 switch (srcTy) {
1041 case Ity_I64: return e;
1042 case Ity_I32: return unop(Iop_32Uto64, e);
1043 case Ity_I16: return unop(Iop_16Uto64, e);
1044 case Ity_I8: return unop(Iop_8Uto64, e);
1045 default: vpanic("widenUto64(arm64)");
1046 }
1047 }
1048
1049 /* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1050 of these combinations make sense. */
narrowFrom64(IRType dstTy,IRExpr * e)1051 static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1052 {
1053 switch (dstTy) {
1054 case Ity_I64: return e;
1055 case Ity_I32: return unop(Iop_64to32, e);
1056 case Ity_I16: return unop(Iop_64to16, e);
1057 case Ity_I8: return unop(Iop_64to8, e);
1058 default: vpanic("narrowFrom64(arm64)");
1059 }
1060 }
1061
1062
1063 /*------------------------------------------------------------*/
1064 /*--- Helpers for accessing guest registers. ---*/
1065 /*------------------------------------------------------------*/
1066
1067 #define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1068 #define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1069 #define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1070 #define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1071 #define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1072 #define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1073 #define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1074 #define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1075 #define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1076 #define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1077 #define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1078 #define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1079 #define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1080 #define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1081 #define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1082 #define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1083 #define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1084 #define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1085 #define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1086 #define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1087 #define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1088 #define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1089 #define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1090 #define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1091 #define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1092 #define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1093 #define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1094 #define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1095 #define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1096 #define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1097 #define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1098
1099 #define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
1100 #define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1101
1102 #define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1103 #define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1104 #define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1105 #define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1106
1107 #define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1108 #define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1109
1110 #define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1111 #define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1112 #define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1113 #define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1114 #define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1115 #define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1116 #define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1117 #define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1118 #define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1119 #define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1120 #define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1121 #define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1122 #define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1123 #define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1124 #define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1125 #define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1126 #define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1127 #define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1128 #define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1129 #define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1130 #define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1131 #define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1132 #define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1133 #define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1134 #define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1135 #define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1136 #define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1137 #define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1138 #define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1139 #define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1140 #define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1141 #define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1142
1143 #define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
1144 #define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
1145
1146 #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1147 #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
1148
1149
1150 /* ---------------- Integer registers ---------------- */
1151
offsetIReg64(UInt iregNo)1152 static Int offsetIReg64 ( UInt iregNo )
1153 {
1154 /* Do we care about endianness here? We do if sub-parts of integer
1155 registers are accessed. */
1156 switch (iregNo) {
1157 case 0: return OFFB_X0;
1158 case 1: return OFFB_X1;
1159 case 2: return OFFB_X2;
1160 case 3: return OFFB_X3;
1161 case 4: return OFFB_X4;
1162 case 5: return OFFB_X5;
1163 case 6: return OFFB_X6;
1164 case 7: return OFFB_X7;
1165 case 8: return OFFB_X8;
1166 case 9: return OFFB_X9;
1167 case 10: return OFFB_X10;
1168 case 11: return OFFB_X11;
1169 case 12: return OFFB_X12;
1170 case 13: return OFFB_X13;
1171 case 14: return OFFB_X14;
1172 case 15: return OFFB_X15;
1173 case 16: return OFFB_X16;
1174 case 17: return OFFB_X17;
1175 case 18: return OFFB_X18;
1176 case 19: return OFFB_X19;
1177 case 20: return OFFB_X20;
1178 case 21: return OFFB_X21;
1179 case 22: return OFFB_X22;
1180 case 23: return OFFB_X23;
1181 case 24: return OFFB_X24;
1182 case 25: return OFFB_X25;
1183 case 26: return OFFB_X26;
1184 case 27: return OFFB_X27;
1185 case 28: return OFFB_X28;
1186 case 29: return OFFB_X29;
1187 case 30: return OFFB_X30;
1188 /* but not 31 */
1189 default: vassert(0);
1190 }
1191 }
1192
offsetIReg64orSP(UInt iregNo)1193 static Int offsetIReg64orSP ( UInt iregNo )
1194 {
1195 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
1196 }
1197
nameIReg64orZR(UInt iregNo)1198 static const HChar* nameIReg64orZR ( UInt iregNo )
1199 {
1200 vassert(iregNo < 32);
1201 static const HChar* names[32]
1202 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1203 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1204 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1205 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1206 return names[iregNo];
1207 }
1208
nameIReg64orSP(UInt iregNo)1209 static const HChar* nameIReg64orSP ( UInt iregNo )
1210 {
1211 if (iregNo == 31) {
1212 return "sp";
1213 }
1214 vassert(iregNo < 31);
1215 return nameIReg64orZR(iregNo);
1216 }
1217
getIReg64orSP(UInt iregNo)1218 static IRExpr* getIReg64orSP ( UInt iregNo )
1219 {
1220 vassert(iregNo < 32);
1221 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1222 }
1223
getIReg64orZR(UInt iregNo)1224 static IRExpr* getIReg64orZR ( UInt iregNo )
1225 {
1226 if (iregNo == 31) {
1227 return mkU64(0);
1228 }
1229 vassert(iregNo < 31);
1230 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1231 }
1232
putIReg64orSP(UInt iregNo,IRExpr * e)1233 static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1234 {
1235 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1236 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1237 }
1238
putIReg64orZR(UInt iregNo,IRExpr * e)1239 static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1240 {
1241 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1242 if (iregNo == 31) {
1243 return;
1244 }
1245 vassert(iregNo < 31);
1246 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1247 }
1248
nameIReg32orZR(UInt iregNo)1249 static const HChar* nameIReg32orZR ( UInt iregNo )
1250 {
1251 vassert(iregNo < 32);
1252 static const HChar* names[32]
1253 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1254 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1255 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1256 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1257 return names[iregNo];
1258 }
1259
nameIReg32orSP(UInt iregNo)1260 static const HChar* nameIReg32orSP ( UInt iregNo )
1261 {
1262 if (iregNo == 31) {
1263 return "wsp";
1264 }
1265 vassert(iregNo < 31);
1266 return nameIReg32orZR(iregNo);
1267 }
1268
getIReg32orSP(UInt iregNo)1269 static IRExpr* getIReg32orSP ( UInt iregNo )
1270 {
1271 vassert(iregNo < 32);
1272 return unop(Iop_64to32,
1273 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1274 }
1275
getIReg32orZR(UInt iregNo)1276 static IRExpr* getIReg32orZR ( UInt iregNo )
1277 {
1278 if (iregNo == 31) {
1279 return mkU32(0);
1280 }
1281 vassert(iregNo < 31);
1282 return unop(Iop_64to32,
1283 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1284 }
1285
putIReg32orSP(UInt iregNo,IRExpr * e)1286 static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1287 {
1288 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1289 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1290 }
1291
putIReg32orZR(UInt iregNo,IRExpr * e)1292 static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1293 {
1294 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1295 if (iregNo == 31) {
1296 return;
1297 }
1298 vassert(iregNo < 31);
1299 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1300 }
1301
nameIRegOrSP(Bool is64,UInt iregNo)1302 static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1303 {
1304 vassert(is64 == True || is64 == False);
1305 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1306 }
1307
nameIRegOrZR(Bool is64,UInt iregNo)1308 static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1309 {
1310 vassert(is64 == True || is64 == False);
1311 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1312 }
1313
getIRegOrZR(Bool is64,UInt iregNo)1314 static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1315 {
1316 vassert(is64 == True || is64 == False);
1317 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1318 }
1319
putIRegOrZR(Bool is64,UInt iregNo,IRExpr * e)1320 static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1321 {
1322 vassert(is64 == True || is64 == False);
1323 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1324 }
1325
putPC(IRExpr * e)1326 static void putPC ( IRExpr* e )
1327 {
1328 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1329 stmt( IRStmt_Put(OFFB_PC, e) );
1330 }
1331
1332
1333 /* ---------------- Vector (Q) registers ---------------- */
1334
offsetQReg128(UInt qregNo)1335 static Int offsetQReg128 ( UInt qregNo )
1336 {
1337 /* We don't care about endianness at this point. It only becomes
1338 relevant when dealing with sections of these registers.*/
1339 switch (qregNo) {
1340 case 0: return OFFB_Q0;
1341 case 1: return OFFB_Q1;
1342 case 2: return OFFB_Q2;
1343 case 3: return OFFB_Q3;
1344 case 4: return OFFB_Q4;
1345 case 5: return OFFB_Q5;
1346 case 6: return OFFB_Q6;
1347 case 7: return OFFB_Q7;
1348 case 8: return OFFB_Q8;
1349 case 9: return OFFB_Q9;
1350 case 10: return OFFB_Q10;
1351 case 11: return OFFB_Q11;
1352 case 12: return OFFB_Q12;
1353 case 13: return OFFB_Q13;
1354 case 14: return OFFB_Q14;
1355 case 15: return OFFB_Q15;
1356 case 16: return OFFB_Q16;
1357 case 17: return OFFB_Q17;
1358 case 18: return OFFB_Q18;
1359 case 19: return OFFB_Q19;
1360 case 20: return OFFB_Q20;
1361 case 21: return OFFB_Q21;
1362 case 22: return OFFB_Q22;
1363 case 23: return OFFB_Q23;
1364 case 24: return OFFB_Q24;
1365 case 25: return OFFB_Q25;
1366 case 26: return OFFB_Q26;
1367 case 27: return OFFB_Q27;
1368 case 28: return OFFB_Q28;
1369 case 29: return OFFB_Q29;
1370 case 30: return OFFB_Q30;
1371 case 31: return OFFB_Q31;
1372 default: vassert(0);
1373 }
1374 }
1375
1376 /* Write to a complete Qreg. */
putQReg128(UInt qregNo,IRExpr * e)1377 static void putQReg128 ( UInt qregNo, IRExpr* e )
1378 {
1379 vassert(qregNo < 32);
1380 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1381 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1382 }
1383
1384 /* Read a complete Qreg. */
getQReg128(UInt qregNo)1385 static IRExpr* getQReg128 ( UInt qregNo )
1386 {
1387 vassert(qregNo < 32);
1388 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1389 }
1390
1391 /* Produce the IR type for some sub-part of a vector. For 32- and 64-
1392 bit sub-parts we can choose either integer or float types, and
1393 choose float on the basis that that is the common use case and so
1394 will give least interference with Put-to-Get forwarding later
1395 on. */
preferredVectorSubTypeFromSize(UInt szB)1396 static IRType preferredVectorSubTypeFromSize ( UInt szB )
1397 {
1398 switch (szB) {
1399 case 1: return Ity_I8;
1400 case 2: return Ity_I16;
1401 case 4: return Ity_I32; //Ity_F32;
1402 case 8: return Ity_F64;
1403 case 16: return Ity_V128;
1404 default: vassert(0);
1405 }
1406 }
1407
1408 /* Find the offset of the laneNo'th lane of type laneTy in the given
1409 Qreg. Since the host is little-endian, the least significant lane
1410 has the lowest offset. */
offsetQRegLane(UInt qregNo,IRType laneTy,UInt laneNo)1411 static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
1412 {
1413 vassert(host_endness == VexEndnessLE);
1414 Int base = offsetQReg128(qregNo);
1415 /* Since the host is little-endian, the least significant lane
1416 will be at the lowest address. */
1417 /* Restrict this to known types, so as to avoid silently accepting
1418 stupid types. */
1419 UInt laneSzB = 0;
1420 switch (laneTy) {
1421 case Ity_I8: laneSzB = 1; break;
1422 case Ity_F16: case Ity_I16: laneSzB = 2; break;
1423 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1424 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1425 case Ity_V128: laneSzB = 16; break;
1426 default: break;
1427 }
1428 vassert(laneSzB > 0);
1429 UInt minOff = laneNo * laneSzB;
1430 UInt maxOff = minOff + laneSzB - 1;
1431 vassert(maxOff < 16);
1432 return base + minOff;
1433 }
1434
1435 /* Put to the least significant lane of a Qreg. */
putQRegLO(UInt qregNo,IRExpr * e)1436 static void putQRegLO ( UInt qregNo, IRExpr* e )
1437 {
1438 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1439 Int off = offsetQRegLane(qregNo, ty, 0);
1440 switch (ty) {
1441 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1442 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
1443 break;
1444 default:
1445 vassert(0); // Other cases are probably invalid
1446 }
1447 stmt(IRStmt_Put(off, e));
1448 }
1449
1450 /* Get from the least significant lane of a Qreg. */
getQRegLO(UInt qregNo,IRType ty)1451 static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
1452 {
1453 Int off = offsetQRegLane(qregNo, ty, 0);
1454 switch (ty) {
1455 case Ity_I8:
1456 case Ity_F16: case Ity_I16:
1457 case Ity_I32: case Ity_I64:
1458 case Ity_F32: case Ity_F64: case Ity_V128:
1459 break;
1460 default:
1461 vassert(0); // Other cases are ATC
1462 }
1463 return IRExpr_Get(off, ty);
1464 }
1465
nameQRegLO(UInt qregNo,IRType laneTy)1466 static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
1467 {
1468 static const HChar* namesQ[32]
1469 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1470 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1471 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1472 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1473 static const HChar* namesD[32]
1474 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1475 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1476 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1477 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1478 static const HChar* namesS[32]
1479 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1480 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1481 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1482 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1483 static const HChar* namesH[32]
1484 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1485 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1486 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1487 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1488 static const HChar* namesB[32]
1489 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1490 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1491 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1492 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1493 vassert(qregNo < 32);
1494 switch (sizeofIRType(laneTy)) {
1495 case 1: return namesB[qregNo];
1496 case 2: return namesH[qregNo];
1497 case 4: return namesS[qregNo];
1498 case 8: return namesD[qregNo];
1499 case 16: return namesQ[qregNo];
1500 default: vassert(0);
1501 }
1502 /*NOTREACHED*/
1503 }
1504
nameQReg128(UInt qregNo)1505 static const HChar* nameQReg128 ( UInt qregNo )
1506 {
1507 return nameQRegLO(qregNo, Ity_V128);
1508 }
1509
1510 /* Find the offset of the most significant half (8 bytes) of the given
1511 Qreg. This requires knowing the endianness of the host. */
offsetQRegHI64(UInt qregNo)1512 static Int offsetQRegHI64 ( UInt qregNo )
1513 {
1514 return offsetQRegLane(qregNo, Ity_I64, 1);
1515 }
1516
getQRegHI64(UInt qregNo)1517 static IRExpr* getQRegHI64 ( UInt qregNo )
1518 {
1519 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
1520 }
1521
putQRegHI64(UInt qregNo,IRExpr * e)1522 static void putQRegHI64 ( UInt qregNo, IRExpr* e )
1523 {
1524 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1525 Int off = offsetQRegHI64(qregNo);
1526 switch (ty) {
1527 case Ity_I64: case Ity_F64:
1528 break;
1529 default:
1530 vassert(0); // Other cases are plain wrong
1531 }
1532 stmt(IRStmt_Put(off, e));
1533 }
1534
1535 /* Put to a specified lane of a Qreg. */
putQRegLane(UInt qregNo,UInt laneNo,IRExpr * e)1536 static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1537 {
1538 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1539 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1540 switch (laneTy) {
1541 case Ity_F64: case Ity_I64:
1542 case Ity_I32: case Ity_F32:
1543 case Ity_I16: case Ity_F16:
1544 case Ity_I8:
1545 break;
1546 default:
1547 vassert(0); // Other cases are ATC
1548 }
1549 stmt(IRStmt_Put(off, e));
1550 }
1551
1552 /* Get from a specified lane of a Qreg. */
getQRegLane(UInt qregNo,UInt laneNo,IRType laneTy)1553 static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1554 {
1555 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1556 switch (laneTy) {
1557 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1558 case Ity_F64: case Ity_F32: case Ity_F16:
1559 break;
1560 default:
1561 vassert(0); // Other cases are ATC
1562 }
1563 return IRExpr_Get(off, laneTy);
1564 }
1565
1566
1567 //ZZ /* ---------------- Misc registers ---------------- */
1568 //ZZ
1569 //ZZ static void putMiscReg32 ( UInt gsoffset,
1570 //ZZ IRExpr* e, /* :: Ity_I32 */
1571 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1572 //ZZ {
1573 //ZZ switch (gsoffset) {
1574 //ZZ case OFFB_FPSCR: break;
1575 //ZZ case OFFB_QFLAG32: break;
1576 //ZZ case OFFB_GEFLAG0: break;
1577 //ZZ case OFFB_GEFLAG1: break;
1578 //ZZ case OFFB_GEFLAG2: break;
1579 //ZZ case OFFB_GEFLAG3: break;
1580 //ZZ default: vassert(0); /* awaiting more cases */
1581 //ZZ }
1582 //ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1583 //ZZ
1584 //ZZ if (guardT == IRTemp_INVALID) {
1585 //ZZ /* unconditional write */
1586 //ZZ stmt(IRStmt_Put(gsoffset, e));
1587 //ZZ } else {
1588 //ZZ stmt(IRStmt_Put(
1589 //ZZ gsoffset,
1590 //ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1591 //ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1592 //ZZ ));
1593 //ZZ }
1594 //ZZ }
1595 //ZZ
1596 //ZZ static IRTemp get_ITSTATE ( void )
1597 //ZZ {
1598 //ZZ ASSERT_IS_THUMB;
1599 //ZZ IRTemp t = newTemp(Ity_I32);
1600 //ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1601 //ZZ return t;
1602 //ZZ }
1603 //ZZ
1604 //ZZ static void put_ITSTATE ( IRTemp t )
1605 //ZZ {
1606 //ZZ ASSERT_IS_THUMB;
1607 //ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1608 //ZZ }
1609 //ZZ
1610 //ZZ static IRTemp get_QFLAG32 ( void )
1611 //ZZ {
1612 //ZZ IRTemp t = newTemp(Ity_I32);
1613 //ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1614 //ZZ return t;
1615 //ZZ }
1616 //ZZ
1617 //ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1618 //ZZ {
1619 //ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1620 //ZZ }
1621 //ZZ
1622 //ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1623 //ZZ Status Register) to indicate that overflow or saturation occurred.
1624 //ZZ Nb: t must be zero to denote no saturation, and any nonzero
1625 //ZZ value to indicate saturation. */
1626 //ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1627 //ZZ {
1628 //ZZ IRTemp old = get_QFLAG32();
1629 //ZZ IRTemp nyu = newTemp(Ity_I32);
1630 //ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1631 //ZZ put_QFLAG32(nyu, condT);
1632 //ZZ }
1633
1634
1635 /* ---------------- FPCR stuff ---------------- */
1636
1637 /* Generate IR to get hold of the rounding mode bits in FPCR, and
1638 convert them to IR format. Bind the final result to the
1639 returned temp. */
mk_get_IR_rounding_mode(void)1640 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1641 {
1642 /* The ARMvfp encoding for rounding mode bits is:
1643 00 to nearest
1644 01 to +infinity
1645 10 to -infinity
1646 11 to zero
1647 We need to convert that to the IR encoding:
1648 00 to nearest (the default)
1649 10 to +infinity
1650 01 to -infinity
1651 11 to zero
1652 Which can be done by swapping bits 0 and 1.
1653 The rmode bits are at 23:22 in FPSCR.
1654 */
1655 IRTemp armEncd = newTemp(Ity_I32);
1656 IRTemp swapped = newTemp(Ity_I32);
1657 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1658 we don't zero out bits 24 and above, since the assignment to
1659 'swapped' will mask them out anyway. */
1660 assign(armEncd,
1661 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1662 /* Now swap them. */
1663 assign(swapped,
1664 binop(Iop_Or32,
1665 binop(Iop_And32,
1666 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1667 mkU32(2)),
1668 binop(Iop_And32,
1669 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1670 mkU32(1))
1671 ));
1672 return swapped;
1673 }
1674
1675
1676 /*------------------------------------------------------------*/
1677 /*--- Helpers for flag handling and conditional insns ---*/
1678 /*------------------------------------------------------------*/
1679
nameARM64Condcode(ARM64Condcode cond)1680 static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1681 {
1682 switch (cond) {
1683 case ARM64CondEQ: return "eq";
1684 case ARM64CondNE: return "ne";
1685 case ARM64CondCS: return "cs"; // or 'hs'
1686 case ARM64CondCC: return "cc"; // or 'lo'
1687 case ARM64CondMI: return "mi";
1688 case ARM64CondPL: return "pl";
1689 case ARM64CondVS: return "vs";
1690 case ARM64CondVC: return "vc";
1691 case ARM64CondHI: return "hi";
1692 case ARM64CondLS: return "ls";
1693 case ARM64CondGE: return "ge";
1694 case ARM64CondLT: return "lt";
1695 case ARM64CondGT: return "gt";
1696 case ARM64CondLE: return "le";
1697 case ARM64CondAL: return "al";
1698 case ARM64CondNV: return "nv";
1699 default: vpanic("name_ARM64Condcode");
1700 }
1701 }
1702
1703 /* and a handy shorthand for it */
nameCC(ARM64Condcode cond)1704 static const HChar* nameCC ( ARM64Condcode cond ) {
1705 return nameARM64Condcode(cond);
1706 }
1707
1708
1709 /* Build IR to calculate some particular condition from stored
1710 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1711 Ity_I64, suitable for narrowing. Although the return type is
1712 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1713 :: Ity_I64 and must denote the condition to compute in
1714 bits 7:4, and be zero everywhere else.
1715 */
mk_arm64g_calculate_condition_dyn(IRExpr * cond)1716 static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1717 {
1718 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1719 /* And 'cond' had better produce a value in which only bits 7:4 are
1720 nonzero. However, obviously we can't assert for that. */
1721
1722 /* So what we're constructing for the first argument is
1723 "(cond << 4) | stored-operation".
1724 However, as per comments above, 'cond' must be supplied
1725 pre-shifted to this function.
1726
1727 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1728 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1729 8 bits of the first argument. */
1730 IRExpr** args
1731 = mkIRExprVec_4(
1732 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1733 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1734 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1735 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1736 );
1737 IRExpr* call
1738 = mkIRExprCCall(
1739 Ity_I64,
1740 0/*regparm*/,
1741 "arm64g_calculate_condition", &arm64g_calculate_condition,
1742 args
1743 );
1744
1745 /* Exclude the requested condition, OP and NDEP from definedness
1746 checking. We're only interested in DEP1 and DEP2. */
1747 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1748 return call;
1749 }
1750
1751
1752 /* Build IR to calculate some particular condition from stored
1753 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1754 Ity_I64, suitable for narrowing. Although the return type is
1755 Ity_I64, the returned value is either 0 or 1.
1756 */
mk_arm64g_calculate_condition(ARM64Condcode cond)1757 static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1758 {
1759 /* First arg is "(cond << 4) | condition". This requires that the
1760 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1761 (COND, OP) pair in the lowest 8 bits of the first argument. */
1762 vassert(cond >= 0 && cond <= 15);
1763 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1764 }
1765
1766
1767 /* Build IR to calculate just the carry flag from stored
1768 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1769 Ity_I64. */
mk_arm64g_calculate_flag_c(void)1770 static IRExpr* mk_arm64g_calculate_flag_c ( void )
1771 {
1772 IRExpr** args
1773 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1774 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1775 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1776 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1777 IRExpr* call
1778 = mkIRExprCCall(
1779 Ity_I64,
1780 0/*regparm*/,
1781 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1782 args
1783 );
1784 /* Exclude OP and NDEP from definedness checking. We're only
1785 interested in DEP1 and DEP2. */
1786 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1787 return call;
1788 }
1789
1790
1791 //ZZ /* Build IR to calculate just the overflow flag from stored
1792 //ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1793 //ZZ Ity_I32. */
1794 //ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1795 //ZZ {
1796 //ZZ IRExpr** args
1797 //ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1798 //ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1799 //ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1800 //ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1801 //ZZ IRExpr* call
1802 //ZZ = mkIRExprCCall(
1803 //ZZ Ity_I32,
1804 //ZZ 0/*regparm*/,
1805 //ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1806 //ZZ args
1807 //ZZ );
1808 //ZZ /* Exclude OP and NDEP from definedness checking. We're only
1809 //ZZ interested in DEP1 and DEP2. */
1810 //ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1811 //ZZ return call;
1812 //ZZ }
1813
1814
1815 /* Build IR to calculate N Z C V in bits 31:28 of the
1816 returned word. */
mk_arm64g_calculate_flags_nzcv(void)1817 static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1818 {
1819 IRExpr** args
1820 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1821 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1822 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1823 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1824 IRExpr* call
1825 = mkIRExprCCall(
1826 Ity_I64,
1827 0/*regparm*/,
1828 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1829 args
1830 );
1831 /* Exclude OP and NDEP from definedness checking. We're only
1832 interested in DEP1 and DEP2. */
1833 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1834 return call;
1835 }
1836
1837
1838 /* Build IR to set the flags thunk, in the most general case. */
1839 static
setFlags_D1_D2_ND(UInt cc_op,IRTemp t_dep1,IRTemp t_dep2,IRTemp t_ndep)1840 void setFlags_D1_D2_ND ( UInt cc_op,
1841 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1842 {
1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1845 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1846 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1847 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1848 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1849 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1850 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1851 }
1852
1853 /* Build IR to set the flags thunk after ADD or SUB. */
1854 static
setFlags_ADD_SUB(Bool is64,Bool isSUB,IRTemp argL,IRTemp argR)1855 void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1856 {
1857 IRTemp argL64 = IRTemp_INVALID;
1858 IRTemp argR64 = IRTemp_INVALID;
1859 IRTemp z64 = newTemp(Ity_I64);
1860 if (is64) {
1861 argL64 = argL;
1862 argR64 = argR;
1863 } else {
1864 argL64 = newTemp(Ity_I64);
1865 argR64 = newTemp(Ity_I64);
1866 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1867 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1868 }
1869 assign(z64, mkU64(0));
1870 UInt cc_op = ARM64G_CC_OP_NUMBER;
1871 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1872 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1873 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1874 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1875 else { vassert(0); }
1876 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1877 }
1878
1879 /* Build IR to set the flags thunk after ADC or SBC. */
1880 static
setFlags_ADC_SBC(Bool is64,Bool isSBC,IRTemp argL,IRTemp argR,IRTemp oldC)1881 void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1882 IRTemp argL, IRTemp argR, IRTemp oldC )
1883 {
1884 IRTemp argL64 = IRTemp_INVALID;
1885 IRTemp argR64 = IRTemp_INVALID;
1886 IRTemp oldC64 = IRTemp_INVALID;
1887 if (is64) {
1888 argL64 = argL;
1889 argR64 = argR;
1890 oldC64 = oldC;
1891 } else {
1892 argL64 = newTemp(Ity_I64);
1893 argR64 = newTemp(Ity_I64);
1894 oldC64 = newTemp(Ity_I64);
1895 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1896 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1897 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1898 }
1899 UInt cc_op = ARM64G_CC_OP_NUMBER;
1900 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1901 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1902 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1903 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1904 else { vassert(0); }
1905 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1906 }
1907
1908 /* Build IR to set the flags thunk after ADD or SUB, if the given
1909 condition evaluates to True at run time. If not, the flags are set
1910 to the specified NZCV value. */
1911 static
setFlags_ADD_SUB_conditionally(Bool is64,Bool isSUB,IRTemp cond,IRTemp argL,IRTemp argR,UInt nzcv)1912 void setFlags_ADD_SUB_conditionally (
1913 Bool is64, Bool isSUB,
1914 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1915 )
1916 {
1917 /* Generate IR as follows:
1918 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1919 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1920 CC_DEP2 = ITE(cond, argR64, 0)
1921 CC_NDEP = 0
1922 */
1923
1924 IRTemp z64 = newTemp(Ity_I64);
1925 assign(z64, mkU64(0));
1926
1927 /* Establish the operation and operands for the True case. */
1928 IRTemp t_dep1 = IRTemp_INVALID;
1929 IRTemp t_dep2 = IRTemp_INVALID;
1930 UInt t_op = ARM64G_CC_OP_NUMBER;
1931 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1932 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1933 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1934 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1935 else { vassert(0); }
1936 /* */
1937 if (is64) {
1938 t_dep1 = argL;
1939 t_dep2 = argR;
1940 } else {
1941 t_dep1 = newTemp(Ity_I64);
1942 t_dep2 = newTemp(Ity_I64);
1943 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1944 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1945 }
1946
1947 /* Establish the operation and operands for the False case. */
1948 IRTemp f_dep1 = newTemp(Ity_I64);
1949 IRTemp f_dep2 = z64;
1950 UInt f_op = ARM64G_CC_OP_COPY;
1951 assign(f_dep1, mkU64(nzcv << 28));
1952
1953 /* Final thunk values */
1954 IRTemp dep1 = newTemp(Ity_I64);
1955 IRTemp dep2 = newTemp(Ity_I64);
1956 IRTemp op = newTemp(Ity_I64);
1957
1958 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1959 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1960 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1961
1962 /* finally .. */
1963 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1964 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1965 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1966 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1967 }
1968
1969 /* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1970 static
setFlags_LOGIC(Bool is64,IRTemp res)1971 void setFlags_LOGIC ( Bool is64, IRTemp res )
1972 {
1973 IRTemp res64 = IRTemp_INVALID;
1974 IRTemp z64 = newTemp(Ity_I64);
1975 UInt cc_op = ARM64G_CC_OP_NUMBER;
1976 if (is64) {
1977 res64 = res;
1978 cc_op = ARM64G_CC_OP_LOGIC64;
1979 } else {
1980 res64 = newTemp(Ity_I64);
1981 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1982 cc_op = ARM64G_CC_OP_LOGIC32;
1983 }
1984 assign(z64, mkU64(0));
1985 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1986 }
1987
1988 /* Build IR to set the flags thunk to a given NZCV value. NZCV is
1989 located in bits 31:28 of the supplied value. */
1990 static
setFlags_COPY(IRTemp nzcv_28x0)1991 void setFlags_COPY ( IRTemp nzcv_28x0 )
1992 {
1993 IRTemp z64 = newTemp(Ity_I64);
1994 assign(z64, mkU64(0));
1995 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1996 }
1997
1998
1999 //ZZ /* Minor variant of the above that sets NDEP to zero (if it
2000 //ZZ sets it at all) */
2001 //ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2002 //ZZ IRTemp t_dep2,
2003 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2004 //ZZ {
2005 //ZZ IRTemp z32 = newTemp(Ity_I32);
2006 //ZZ assign( z32, mkU32(0) );
2007 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2008 //ZZ }
2009 //ZZ
2010 //ZZ
2011 //ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2012 //ZZ sets it at all) */
2013 //ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2014 //ZZ IRTemp t_ndep,
2015 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2016 //ZZ {
2017 //ZZ IRTemp z32 = newTemp(Ity_I32);
2018 //ZZ assign( z32, mkU32(0) );
2019 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2020 //ZZ }
2021 //ZZ
2022 //ZZ
2023 //ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2024 //ZZ sets them at all) */
2025 //ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2026 //ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2027 //ZZ {
2028 //ZZ IRTemp z32 = newTemp(Ity_I32);
2029 //ZZ assign( z32, mkU32(0) );
2030 //ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2031 //ZZ }
2032
2033
2034 /*------------------------------------------------------------*/
2035 /*--- Misc math helpers ---*/
2036 /*------------------------------------------------------------*/
2037
2038 /* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
math_SWAPHELPER(IRTemp x,ULong mask,Int sh)2039 static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
2040 {
2041 IRTemp maskT = newTemp(Ity_I64);
2042 IRTemp res = newTemp(Ity_I64);
2043 vassert(sh >= 1 && sh <= 63);
2044 assign(maskT, mkU64(mask));
2045 assign( res,
2046 binop(Iop_Or64,
2047 binop(Iop_Shr64,
2048 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2049 mkU8(sh)),
2050 binop(Iop_And64,
2051 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2052 mkexpr(maskT))
2053 )
2054 );
2055 return res;
2056 }
2057
2058 /* Generates byte swaps within 32-bit lanes. */
math_UINTSWAP64(IRTemp src)2059 static IRTemp math_UINTSWAP64 ( IRTemp src )
2060 {
2061 IRTemp res;
2062 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2063 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2064 return res;
2065 }
2066
2067 /* Generates byte swaps within 16-bit lanes. */
math_USHORTSWAP64(IRTemp src)2068 static IRTemp math_USHORTSWAP64 ( IRTemp src )
2069 {
2070 IRTemp res;
2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2072 return res;
2073 }
2074
2075 /* Generates a 64-bit byte swap. */
math_BYTESWAP64(IRTemp src)2076 static IRTemp math_BYTESWAP64 ( IRTemp src )
2077 {
2078 IRTemp res;
2079 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2080 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2081 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2082 return res;
2083 }
2084
2085 /* Generates a 64-bit bit swap. */
math_BITSWAP64(IRTemp src)2086 static IRTemp math_BITSWAP64 ( IRTemp src )
2087 {
2088 IRTemp res;
2089 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2090 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2091 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2092 return math_BYTESWAP64(res);
2093 }
2094
2095 /* Duplicates the bits at the bottom of the given word to fill the
2096 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2097 except for the bottom bits. */
math_DUP_TO_64(IRTemp src,IRType srcTy)2098 static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2099 {
2100 if (srcTy == Ity_I8) {
2101 IRTemp t16 = newTemp(Ity_I64);
2102 assign(t16, binop(Iop_Or64, mkexpr(src),
2103 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2104 IRTemp t32 = newTemp(Ity_I64);
2105 assign(t32, binop(Iop_Or64, mkexpr(t16),
2106 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2107 IRTemp t64 = newTemp(Ity_I64);
2108 assign(t64, binop(Iop_Or64, mkexpr(t32),
2109 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2110 return t64;
2111 }
2112 if (srcTy == Ity_I16) {
2113 IRTemp t32 = newTemp(Ity_I64);
2114 assign(t32, binop(Iop_Or64, mkexpr(src),
2115 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2116 IRTemp t64 = newTemp(Ity_I64);
2117 assign(t64, binop(Iop_Or64, mkexpr(t32),
2118 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2119 return t64;
2120 }
2121 if (srcTy == Ity_I32) {
2122 IRTemp t64 = newTemp(Ity_I64);
2123 assign(t64, binop(Iop_Or64, mkexpr(src),
2124 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2125 return t64;
2126 }
2127 if (srcTy == Ity_I64) {
2128 return src;
2129 }
2130 vassert(0);
2131 }
2132
2133
2134 /* Duplicates the src element exactly so as to fill a V128 value. */
math_DUP_TO_V128(IRTemp src,IRType srcTy)2135 static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2136 {
2137 IRTemp res = newTempV128();
2138 if (srcTy == Ity_F64) {
2139 IRTemp i64 = newTemp(Ity_I64);
2140 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2141 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2142 return res;
2143 }
2144 if (srcTy == Ity_F32) {
2145 IRTemp i64a = newTemp(Ity_I64);
2146 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2147 IRTemp i64b = newTemp(Ity_I64);
2148 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2149 mkexpr(i64a)));
2150 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2151 return res;
2152 }
2153 if (srcTy == Ity_I64) {
2154 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2155 return res;
2156 }
2157 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2158 IRTemp t1 = newTemp(Ity_I64);
2159 assign(t1, widenUto64(srcTy, mkexpr(src)));
2160 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2161 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2162 return res;
2163 }
2164 vassert(0);
2165 }
2166
2167
2168 /* |fullWidth| is a full V128 width result. Depending on bitQ,
2169 zero out the upper half. */
math_MAYBE_ZERO_HI64(UInt bitQ,IRTemp fullWidth)2170 static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2171 {
2172 if (bitQ == 1) return mkexpr(fullWidth);
2173 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2174 vassert(0);
2175 }
2176
2177 /* The same, but from an expression instead. */
math_MAYBE_ZERO_HI64_fromE(UInt bitQ,IRExpr * fullWidth)2178 static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2179 {
2180 IRTemp fullWidthT = newTempV128();
2181 assign(fullWidthT, fullWidth);
2182 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2183 }
2184
2185
2186 /*------------------------------------------------------------*/
2187 /*--- FP comparison helpers ---*/
2188 /*------------------------------------------------------------*/
2189
2190 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2191 as an IRCmpF64Result. Generate code to convert it to an
2192 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2193 Assign a new temp to hold that value, and return the temp. */
2194 static
mk_convert_IRCmpF64Result_to_NZCV(IRTemp irRes32)2195 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2196 {
2197 IRTemp ix = newTemp(Ity_I64);
2198 IRTemp termL = newTemp(Ity_I64);
2199 IRTemp termR = newTemp(Ity_I64);
2200 IRTemp nzcv = newTemp(Ity_I64);
2201 IRTemp irRes = newTemp(Ity_I64);
2202
2203 /* This is where the fun starts. We have to convert 'irRes' from
2204 an IR-convention return result (IRCmpF64Result) to an
2205 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2206 4 bits of 'nzcv'. */
2207 /* Map compare result from IR to ARM(nzcv) */
2208 /*
2209 FP cmp result | IR | ARM(nzcv)
2210 --------------------------------
2211 UN 0x45 0011
2212 LT 0x01 1000
2213 GT 0x00 0010
2214 EQ 0x40 0110
2215 */
2216 /* Now since you're probably wondering WTF ..
2217
2218 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2219 places them side by side, giving a number which is 0, 1, 2 or 3.
2220
2221 termL is a sequence cooked up by GNU superopt. It converts ix
2222 into an almost correct value NZCV value (incredibly), except
2223 for the case of UN, where it produces 0100 instead of the
2224 required 0011.
2225
2226 termR is therefore a correction term, also computed from ix. It
2227 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2228 the final correct value, we subtract termR from termL.
2229
2230 Don't take my word for it. There's a test program at the bottom
2231 of guest_arm_toIR.c, to try this out with.
2232 */
2233 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2234
2235 assign(
2236 ix,
2237 binop(Iop_Or64,
2238 binop(Iop_And64,
2239 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2240 mkU64(3)),
2241 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2242
2243 assign(
2244 termL,
2245 binop(Iop_Add64,
2246 binop(Iop_Shr64,
2247 binop(Iop_Sub64,
2248 binop(Iop_Shl64,
2249 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2250 mkU8(62)),
2251 mkU64(1)),
2252 mkU8(61)),
2253 mkU64(1)));
2254
2255 assign(
2256 termR,
2257 binop(Iop_And64,
2258 binop(Iop_And64,
2259 mkexpr(ix),
2260 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2261 mkU64(1)));
2262
2263 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2264 return nzcv;
2265 }
2266
2267
2268 /*------------------------------------------------------------*/
2269 /*--- Data processing (immediate) ---*/
2270 /*------------------------------------------------------------*/
2271
2272 /* Helper functions for supporting "DecodeBitMasks" */
2273
dbm_ROR(Int width,ULong x,Int rot)2274 static ULong dbm_ROR ( Int width, ULong x, Int rot )
2275 {
2276 vassert(width > 0 && width <= 64);
2277 vassert(rot >= 0 && rot < width);
2278 if (rot == 0) return x;
2279 ULong res = x >> rot;
2280 res |= (x << (width - rot));
2281 if (width < 64)
2282 res &= ((1ULL << width) - 1);
2283 return res;
2284 }
2285
dbm_RepTo64(Int esize,ULong x)2286 static ULong dbm_RepTo64( Int esize, ULong x )
2287 {
2288 switch (esize) {
2289 case 64:
2290 return x;
2291 case 32:
2292 x &= 0xFFFFFFFF; x |= (x << 32);
2293 return x;
2294 case 16:
2295 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2296 return x;
2297 case 8:
2298 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2299 return x;
2300 case 4:
2301 x &= 0xF; x |= (x << 4); x |= (x << 8);
2302 x |= (x << 16); x |= (x << 32);
2303 return x;
2304 case 2:
2305 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2306 x |= (x << 16); x |= (x << 32);
2307 return x;
2308 default:
2309 break;
2310 }
2311 vpanic("dbm_RepTo64");
2312 /*NOTREACHED*/
2313 return 0;
2314 }
2315
dbm_highestSetBit(ULong x)2316 static Int dbm_highestSetBit ( ULong x )
2317 {
2318 Int i;
2319 for (i = 63; i >= 0; i--) {
2320 if (x & (1ULL << i))
2321 return i;
2322 }
2323 vassert(x == 0);
2324 return -1;
2325 }
2326
2327 static
dbm_DecodeBitMasks(ULong * wmask,ULong * tmask,ULong immN,ULong imms,ULong immr,Bool immediate,UInt M)2328 Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2329 ULong immN, ULong imms, ULong immr, Bool immediate,
2330 UInt M /*32 or 64*/)
2331 {
2332 vassert(immN < (1ULL << 1));
2333 vassert(imms < (1ULL << 6));
2334 vassert(immr < (1ULL << 6));
2335 vassert(immediate == False || immediate == True);
2336 vassert(M == 32 || M == 64);
2337
2338 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2339 if (len < 1) { /* printf("fail1\n"); */ return False; }
2340 vassert(len <= 6);
2341 vassert(M >= (1 << len));
2342
2343 vassert(len >= 1 && len <= 6);
2344 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2345 (1 << len) - 1;
2346 vassert(levels >= 1 && levels <= 63);
2347
2348 if (immediate && ((imms & levels) == levels)) {
2349 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2350 return False;
2351 }
2352
2353 ULong S = imms & levels;
2354 ULong R = immr & levels;
2355 Int diff = S - R;
2356 diff &= 63;
2357 Int esize = 1 << len;
2358 vassert(2 <= esize && esize <= 64);
2359
2360 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2361 same below with d. S can be 63 in which case we have an out of
2362 range and hence undefined shift. */
2363 vassert(S >= 0 && S <= 63);
2364 vassert(esize >= (S+1));
2365 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2366 //(1ULL << (S+1)) - 1;
2367 ((1ULL << S) - 1) + (1ULL << S);
2368
2369 Int d = // diff<len-1:0>
2370 diff & ((1 << len)-1);
2371 vassert(esize >= (d+1));
2372 vassert(d >= 0 && d <= 63);
2373
2374 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2375 //(1ULL << (d+1)) - 1;
2376 ((1ULL << d) - 1) + (1ULL << d);
2377
2378 if (esize != 64) vassert(elem_s < (1ULL << esize));
2379 if (esize != 64) vassert(elem_d < (1ULL << esize));
2380
2381 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2382 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2383
2384 return True;
2385 }
2386
2387
2388 static
dis_ARM64_data_processing_immediate(DisResult * dres,UInt insn)2389 Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2390 UInt insn)
2391 {
2392 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2393
2394 /* insn[28:23]
2395 10000x PC-rel addressing
2396 10001x Add/subtract (immediate)
2397 100100 Logical (immediate)
2398 100101 Move Wide (immediate)
2399 100110 Bitfield
2400 100111 Extract
2401 */
2402
2403 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2404 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2405 Bool is64 = INSN(31,31) == 1;
2406 Bool isSub = INSN(30,30) == 1;
2407 Bool setCC = INSN(29,29) == 1;
2408 UInt sh = INSN(23,22);
2409 UInt uimm12 = INSN(21,10);
2410 UInt nn = INSN(9,5);
2411 UInt dd = INSN(4,0);
2412 const HChar* nm = isSub ? "sub" : "add";
2413 if (sh >= 2) {
2414 /* Invalid; fall through */
2415 } else {
2416 vassert(sh <= 1);
2417 uimm12 <<= (12 * sh);
2418 if (is64) {
2419 IRTemp argL = newTemp(Ity_I64);
2420 IRTemp argR = newTemp(Ity_I64);
2421 IRTemp res = newTemp(Ity_I64);
2422 assign(argL, getIReg64orSP(nn));
2423 assign(argR, mkU64(uimm12));
2424 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2425 mkexpr(argL), mkexpr(argR)));
2426 if (setCC) {
2427 putIReg64orZR(dd, mkexpr(res));
2428 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2429 DIP("%ss %s, %s, 0x%x\n",
2430 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2431 } else {
2432 putIReg64orSP(dd, mkexpr(res));
2433 DIP("%s %s, %s, 0x%x\n",
2434 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2435 }
2436 } else {
2437 IRTemp argL = newTemp(Ity_I32);
2438 IRTemp argR = newTemp(Ity_I32);
2439 IRTemp res = newTemp(Ity_I32);
2440 assign(argL, getIReg32orSP(nn));
2441 assign(argR, mkU32(uimm12));
2442 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2443 mkexpr(argL), mkexpr(argR)));
2444 if (setCC) {
2445 putIReg32orZR(dd, mkexpr(res));
2446 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2447 DIP("%ss %s, %s, 0x%x\n",
2448 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2449 } else {
2450 putIReg32orSP(dd, mkexpr(res));
2451 DIP("%s %s, %s, 0x%x\n",
2452 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2453 }
2454 }
2455 return True;
2456 }
2457 }
2458
2459 /* -------------------- ADR/ADRP -------------------- */
2460 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2461 UInt bP = INSN(31,31);
2462 UInt immLo = INSN(30,29);
2463 UInt immHi = INSN(23,5);
2464 UInt rD = INSN(4,0);
2465 ULong uimm = (immHi << 2) | immLo;
2466 ULong simm = sx_to_64(uimm, 21);
2467 ULong val;
2468 if (bP) {
2469 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2470 } else {
2471 val = guest_PC_curr_instr + simm;
2472 }
2473 putIReg64orZR(rD, mkU64(val));
2474 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2475 return True;
2476 }
2477
2478 /* -------------------- LOGIC(imm) -------------------- */
2479 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2480 /* 31 30 28 22 21 15 9 4
2481 sf op 100100 N immr imms Rn Rd
2482 op=00: AND Rd|SP, Rn, #imm
2483 op=01: ORR Rd|SP, Rn, #imm
2484 op=10: EOR Rd|SP, Rn, #imm
2485 op=11: ANDS Rd|ZR, Rn, #imm
2486 */
2487 Bool is64 = INSN(31,31) == 1;
2488 UInt op = INSN(30,29);
2489 UInt N = INSN(22,22);
2490 UInt immR = INSN(21,16);
2491 UInt immS = INSN(15,10);
2492 UInt nn = INSN(9,5);
2493 UInt dd = INSN(4,0);
2494 ULong imm = 0;
2495 Bool ok;
2496 if (N == 1 && !is64)
2497 goto after_logic_imm; /* not allowed; fall through */
2498 ok = dbm_DecodeBitMasks(&imm, NULL,
2499 N, immS, immR, True, is64 ? 64 : 32);
2500 if (!ok)
2501 goto after_logic_imm;
2502
2503 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2504 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2505 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2506
2507 vassert(op < 4);
2508 if (is64) {
2509 IRExpr* argL = getIReg64orZR(nn);
2510 IRExpr* argR = mkU64(imm);
2511 IRTemp res = newTemp(Ity_I64);
2512 assign(res, binop(ops64[op], argL, argR));
2513 if (op < 3) {
2514 putIReg64orSP(dd, mkexpr(res));
2515 DIP("%s %s, %s, 0x%llx\n", names[op],
2516 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2517 } else {
2518 putIReg64orZR(dd, mkexpr(res));
2519 setFlags_LOGIC(True/*is64*/, res);
2520 DIP("%s %s, %s, 0x%llx\n", names[op],
2521 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2522 }
2523 } else {
2524 IRExpr* argL = getIReg32orZR(nn);
2525 IRExpr* argR = mkU32((UInt)imm);
2526 IRTemp res = newTemp(Ity_I32);
2527 assign(res, binop(ops32[op], argL, argR));
2528 if (op < 3) {
2529 putIReg32orSP(dd, mkexpr(res));
2530 DIP("%s %s, %s, 0x%x\n", names[op],
2531 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2532 } else {
2533 putIReg32orZR(dd, mkexpr(res));
2534 setFlags_LOGIC(False/*!is64*/, res);
2535 DIP("%s %s, %s, 0x%x\n", names[op],
2536 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2537 }
2538 }
2539 return True;
2540 }
2541 after_logic_imm:
2542
2543 /* -------------------- MOV{Z,N,K} -------------------- */
2544 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2545 /* 31 30 28 22 20 4
2546 | | | | | |
2547 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2548 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2549 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2550 */
2551 Bool is64 = INSN(31,31) == 1;
2552 UInt subopc = INSN(30,29);
2553 UInt hw = INSN(22,21);
2554 UInt imm16 = INSN(20,5);
2555 UInt dd = INSN(4,0);
2556 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2557 /* invalid; fall through */
2558 } else {
2559 ULong imm64 = ((ULong)imm16) << (16 * hw);
2560 if (!is64)
2561 vassert(imm64 < 0x100000000ULL);
2562 switch (subopc) {
2563 case BITS2(1,0): // MOVZ
2564 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2565 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2566 break;
2567 case BITS2(0,0): // MOVN
2568 imm64 = ~imm64;
2569 if (!is64)
2570 imm64 &= 0xFFFFFFFFULL;
2571 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2572 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2573 break;
2574 case BITS2(1,1): // MOVK
2575 /* This is more complex. We are inserting a slice into
2576 the destination register, so we need to have the old
2577 value of it. */
2578 if (is64) {
2579 IRTemp old = newTemp(Ity_I64);
2580 assign(old, getIReg64orZR(dd));
2581 ULong mask = 0xFFFFULL << (16 * hw);
2582 IRExpr* res
2583 = binop(Iop_Or64,
2584 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2585 mkU64(imm64));
2586 putIReg64orZR(dd, res);
2587 DIP("movk %s, 0x%x, lsl %u\n",
2588 nameIReg64orZR(dd), imm16, 16*hw);
2589 } else {
2590 IRTemp old = newTemp(Ity_I32);
2591 assign(old, getIReg32orZR(dd));
2592 vassert(hw <= 1);
2593 UInt mask = 0xFFFF << (16 * hw);
2594 IRExpr* res
2595 = binop(Iop_Or32,
2596 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2597 mkU32((UInt)imm64));
2598 putIReg32orZR(dd, res);
2599 DIP("movk %s, 0x%x, lsl %u\n",
2600 nameIReg32orZR(dd), imm16, 16*hw);
2601 }
2602 break;
2603 default:
2604 vassert(0);
2605 }
2606 return True;
2607 }
2608 }
2609
2610 /* -------------------- {U,S,}BFM -------------------- */
2611 /* 30 28 22 21 15 9 4
2612
2613 sf 10 100110 N immr imms nn dd
2614 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2615 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2616
2617 sf 00 100110 N immr imms nn dd
2618 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2619 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2620
2621 sf 01 100110 N immr imms nn dd
2622 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2623 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2624 */
2625 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2626 UInt sf = INSN(31,31);
2627 UInt opc = INSN(30,29);
2628 UInt N = INSN(22,22);
2629 UInt immR = INSN(21,16);
2630 UInt immS = INSN(15,10);
2631 UInt nn = INSN(9,5);
2632 UInt dd = INSN(4,0);
2633 Bool inZero = False;
2634 Bool extend = False;
2635 const HChar* nm = "???";
2636 /* skip invalid combinations */
2637 switch (opc) {
2638 case BITS2(0,0):
2639 inZero = True; extend = True; nm = "sbfm"; break;
2640 case BITS2(0,1):
2641 inZero = False; extend = False; nm = "bfm"; break;
2642 case BITS2(1,0):
2643 inZero = True; extend = False; nm = "ubfm"; break;
2644 case BITS2(1,1):
2645 goto after_bfm; /* invalid */
2646 default:
2647 vassert(0);
2648 }
2649 if (sf == 1 && N != 1) goto after_bfm;
2650 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2651 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2652 ULong wmask = 0, tmask = 0;
2653 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2654 N, immS, immR, False, sf == 1 ? 64 : 32);
2655 if (!ok) goto after_bfm; /* hmmm */
2656
2657 Bool is64 = sf == 1;
2658 IRType ty = is64 ? Ity_I64 : Ity_I32;
2659
2660 IRTemp dst = newTemp(ty);
2661 IRTemp src = newTemp(ty);
2662 IRTemp bot = newTemp(ty);
2663 IRTemp top = newTemp(ty);
2664 IRTemp res = newTemp(ty);
2665 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2666 assign(src, getIRegOrZR(is64, nn));
2667 /* perform bitfield move on low bits */
2668 assign(bot, binop(mkOR(ty),
2669 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2670 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2671 mkU(ty, wmask))));
2672 /* determine extension bits (sign, zero or dest register) */
2673 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2674 /* combine extension bits and result bits */
2675 assign(res, binop(mkOR(ty),
2676 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2677 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2678 putIRegOrZR(is64, dd, mkexpr(res));
2679 DIP("%s %s, %s, immR=%u, immS=%u\n",
2680 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2681 return True;
2682 }
2683 after_bfm:
2684
2685 /* ---------------------- EXTR ---------------------- */
2686 /* 30 28 22 20 15 9 4
2687 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2688 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2689 */
2690 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2691 Bool is64 = INSN(31,31) == 1;
2692 UInt mm = INSN(20,16);
2693 UInt imm6 = INSN(15,10);
2694 UInt nn = INSN(9,5);
2695 UInt dd = INSN(4,0);
2696 Bool valid = True;
2697 if (INSN(31,31) != INSN(22,22))
2698 valid = False;
2699 if (!is64 && imm6 >= 32)
2700 valid = False;
2701 if (!valid) goto after_extr;
2702 IRType ty = is64 ? Ity_I64 : Ity_I32;
2703 IRTemp srcHi = newTemp(ty);
2704 IRTemp srcLo = newTemp(ty);
2705 IRTemp res = newTemp(ty);
2706 assign(srcHi, getIRegOrZR(is64, nn));
2707 assign(srcLo, getIRegOrZR(is64, mm));
2708 if (imm6 == 0) {
2709 assign(res, mkexpr(srcLo));
2710 } else {
2711 UInt szBits = 8 * sizeofIRType(ty);
2712 vassert(imm6 > 0 && imm6 < szBits);
2713 assign(res, binop(mkOR(ty),
2714 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2715 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2716 }
2717 putIRegOrZR(is64, dd, mkexpr(res));
2718 DIP("extr %s, %s, %s, #%u\n",
2719 nameIRegOrZR(is64,dd),
2720 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2721 return True;
2722 }
2723 after_extr:
2724
2725 vex_printf("ARM64 front end: data_processing_immediate\n");
2726 return False;
2727 # undef INSN
2728 }
2729
2730
2731 /*------------------------------------------------------------*/
2732 /*--- Data processing (register) instructions ---*/
2733 /*------------------------------------------------------------*/
2734
nameSH(UInt sh)2735 static const HChar* nameSH ( UInt sh ) {
2736 switch (sh) {
2737 case 0: return "lsl";
2738 case 1: return "lsr";
2739 case 2: return "asr";
2740 case 3: return "ror";
2741 default: vassert(0);
2742 }
2743 }
2744
2745 /* Generate IR to get a register value, possibly shifted by an
2746 immediate. Returns either a 32- or 64-bit temporary holding the
2747 result. After the shift, the value can optionally be NOT-ed
2748 too.
2749
2750 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2751 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2752 isn't allowed, but it's the job of the caller to check that.
2753 */
getShiftedIRegOrZR(Bool is64,UInt sh_how,UInt sh_amt,UInt regNo,Bool invert)2754 static IRTemp getShiftedIRegOrZR ( Bool is64,
2755 UInt sh_how, UInt sh_amt, UInt regNo,
2756 Bool invert )
2757 {
2758 vassert(sh_how < 4);
2759 vassert(sh_amt < (is64 ? 64 : 32));
2760 IRType ty = is64 ? Ity_I64 : Ity_I32;
2761 IRTemp t0 = newTemp(ty);
2762 assign(t0, getIRegOrZR(is64, regNo));
2763 IRTemp t1 = newTemp(ty);
2764 switch (sh_how) {
2765 case BITS2(0,0):
2766 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2767 break;
2768 case BITS2(0,1):
2769 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2770 break;
2771 case BITS2(1,0):
2772 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2773 break;
2774 case BITS2(1,1):
2775 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2776 break;
2777 default:
2778 vassert(0);
2779 }
2780 if (invert) {
2781 IRTemp t2 = newTemp(ty);
2782 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2783 return t2;
2784 } else {
2785 return t1;
2786 }
2787 }
2788
2789
2790 static
dis_ARM64_data_processing_register(DisResult * dres,UInt insn)2791 Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2792 UInt insn)
2793 {
2794 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2795
2796 /* ------------------- ADD/SUB(reg) ------------------- */
2797 /* x==0 => 32 bit op x==1 => 64 bit op
2798 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2799
2800 31 30 29 28 23 21 20 15 9 4
2801 | | | | | | | | | |
2802 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2803 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2804 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2805 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2806 */
2807 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2808 UInt bX = INSN(31,31);
2809 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2810 UInt bS = INSN(29, 29); /* set flags? */
2811 UInt sh = INSN(23,22);
2812 UInt rM = INSN(20,16);
2813 UInt imm6 = INSN(15,10);
2814 UInt rN = INSN(9,5);
2815 UInt rD = INSN(4,0);
2816 Bool isSUB = bOP == 1;
2817 Bool is64 = bX == 1;
2818 IRType ty = is64 ? Ity_I64 : Ity_I32;
2819 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2820 /* invalid; fall through */
2821 } else {
2822 IRTemp argL = newTemp(ty);
2823 assign(argL, getIRegOrZR(is64, rN));
2824 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2826 IRTemp res = newTemp(ty);
2827 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2828 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2829 if (bS) {
2830 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2831 }
2832 DIP("%s%s %s, %s, %s, %s #%u\n",
2833 bOP ? "sub" : "add", bS ? "s" : "",
2834 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2835 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2836 return True;
2837 }
2838 }
2839
2840 /* ------------------- ADC/SBC(reg) ------------------- */
2841 /* x==0 => 32 bit op x==1 => 64 bit op
2842
2843 31 30 29 28 23 21 20 15 9 4
2844 | | | | | | | | | |
2845 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2846 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2847 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2848 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2849 */
2850
2851 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2852 UInt bX = INSN(31,31);
2853 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2854 UInt bS = INSN(29,29); /* set flags */
2855 UInt rM = INSN(20,16);
2856 UInt rN = INSN(9,5);
2857 UInt rD = INSN(4,0);
2858
2859 Bool isSUB = bOP == 1;
2860 Bool is64 = bX == 1;
2861 IRType ty = is64 ? Ity_I64 : Ity_I32;
2862
2863 IRTemp oldC = newTemp(ty);
2864 assign(oldC,
2865 is64 ? mk_arm64g_calculate_flag_c()
2866 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2867
2868 IRTemp argL = newTemp(ty);
2869 assign(argL, getIRegOrZR(is64, rN));
2870 IRTemp argR = newTemp(ty);
2871 assign(argR, getIRegOrZR(is64, rM));
2872
2873 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2874 IRTemp res = newTemp(ty);
2875 if (isSUB) {
2876 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2877 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2878 assign(res,
2879 binop(op,
2880 binop(op, mkexpr(argL), mkexpr(argR)),
2881 binop(xorOp, mkexpr(oldC), one)));
2882 } else {
2883 assign(res,
2884 binop(op,
2885 binop(op, mkexpr(argL), mkexpr(argR)),
2886 mkexpr(oldC)));
2887 }
2888
2889 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2890
2891 if (bS) {
2892 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2893 }
2894
2895 DIP("%s%s %s, %s, %s\n",
2896 bOP ? "sbc" : "adc", bS ? "s" : "",
2897 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2898 nameIRegOrZR(is64, rM));
2899 return True;
2900 }
2901
2902 /* -------------------- LOGIC(reg) -------------------- */
2903 /* x==0 => 32 bit op x==1 => 64 bit op
2904 N==0 => inv? is no-op (no inversion)
2905 N==1 => inv? is NOT
2906 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2907
2908 31 30 28 23 21 20 15 9 4
2909 | | | | | | | | |
2910 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2911 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2912 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2913 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2914 With N=1, the names are: BIC ORN EON BICS
2915 */
2916 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2917 UInt bX = INSN(31,31);
2918 UInt sh = INSN(23,22);
2919 UInt bN = INSN(21,21);
2920 UInt rM = INSN(20,16);
2921 UInt imm6 = INSN(15,10);
2922 UInt rN = INSN(9,5);
2923 UInt rD = INSN(4,0);
2924 Bool is64 = bX == 1;
2925 IRType ty = is64 ? Ity_I64 : Ity_I32;
2926 if (!is64 && imm6 > 31) {
2927 /* invalid; fall though */
2928 } else {
2929 IRTemp argL = newTemp(ty);
2930 assign(argL, getIRegOrZR(is64, rN));
2931 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2932 IROp op = Iop_INVALID;
2933 switch (INSN(30,29)) {
2934 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2935 case BITS2(0,1): op = mkOR(ty); break;
2936 case BITS2(1,0): op = mkXOR(ty); break;
2937 default: vassert(0);
2938 }
2939 IRTemp res = newTemp(ty);
2940 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2941 if (INSN(30,29) == BITS2(1,1)) {
2942 setFlags_LOGIC(is64, res);
2943 }
2944 putIRegOrZR(is64, rD, mkexpr(res));
2945
2946 static const HChar* names_op[8]
2947 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2948 vassert(((bN << 2) | INSN(30,29)) < 8);
2949 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2950 /* Special-case the printing of "MOV" */
2951 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2952 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2953 nameIRegOrZR(is64, rM));
2954 } else {
2955 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2956 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2957 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2958 }
2959 return True;
2960 }
2961 }
2962
2963 /* -------------------- {U,S}MULH -------------------- */
2964 /* 31 23 22 20 15 9 4
2965 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2966 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2967 */
2968 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2969 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
2970 Bool isU = INSN(23,23) == 1;
2971 UInt mm = INSN(20,16);
2972 UInt nn = INSN(9,5);
2973 UInt dd = INSN(4,0);
2974 putIReg64orZR(dd, unop(Iop_128HIto64,
2975 binop(isU ? Iop_MullU64 : Iop_MullS64,
2976 getIReg64orZR(nn), getIReg64orZR(mm))));
2977 DIP("%cmulh %s, %s, %s\n",
2978 isU ? 'u' : 's',
2979 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2980 return True;
2981 }
2982
2983 /* -------------------- M{ADD,SUB} -------------------- */
2984 /* 31 30 20 15 14 9 4
2985 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2986 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2987 */
2988 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2989 Bool is64 = INSN(31,31) == 1;
2990 UInt mm = INSN(20,16);
2991 Bool isAdd = INSN(15,15) == 0;
2992 UInt aa = INSN(14,10);
2993 UInt nn = INSN(9,5);
2994 UInt dd = INSN(4,0);
2995 if (is64) {
2996 putIReg64orZR(
2997 dd,
2998 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2999 getIReg64orZR(aa),
3000 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3001 } else {
3002 putIReg32orZR(
3003 dd,
3004 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3005 getIReg32orZR(aa),
3006 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3007 }
3008 DIP("%s %s, %s, %s, %s\n",
3009 isAdd ? "madd" : "msub",
3010 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3011 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3012 return True;
3013 }
3014
3015 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3016 /* 31 30 28 20 15 11 9 4
3017 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3018 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3019 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3020 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3021 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3022 */
3023 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3024 Bool is64 = INSN(31,31) == 1;
3025 UInt b30 = INSN(30,30);
3026 UInt mm = INSN(20,16);
3027 UInt cond = INSN(15,12);
3028 UInt b10 = INSN(10,10);
3029 UInt nn = INSN(9,5);
3030 UInt dd = INSN(4,0);
3031 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3032 IRType ty = is64 ? Ity_I64 : Ity_I32;
3033 IRExpr* argL = getIRegOrZR(is64, nn);
3034 IRExpr* argR = getIRegOrZR(is64, mm);
3035 switch (op) {
3036 case BITS2(0,0):
3037 break;
3038 case BITS2(0,1):
3039 argR = binop(mkADD(ty), argR, mkU(ty,1));
3040 break;
3041 case BITS2(1,0):
3042 argR = unop(mkNOT(ty), argR);
3043 break;
3044 case BITS2(1,1):
3045 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3046 break;
3047 default:
3048 vassert(0);
3049 }
3050 putIRegOrZR(
3051 is64, dd,
3052 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3053 argL, argR)
3054 );
3055 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3056 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3057 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3058 nameIRegOrZR(is64, mm), nameCC(cond));
3059 return True;
3060 }
3061
3062 /* -------------- ADD/SUB(extended reg) -------------- */
3063 /* 28 20 15 12 9 4
3064 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3065 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3066
3067 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3068 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3069
3070 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3071 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3072
3073 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3074 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3075
3076 The 'm' operand is extended per opt, thusly:
3077
3078 000 Xm & 0xFF UXTB
3079 001 Xm & 0xFFFF UXTH
3080 010 Xm & (2^32)-1 UXTW
3081 011 Xm UXTX
3082
3083 100 Xm sx from bit 7 SXTB
3084 101 Xm sx from bit 15 SXTH
3085 110 Xm sx from bit 31 SXTW
3086 111 Xm SXTX
3087
3088 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3089 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3090 are the identity operation on Wm.
3091
3092 After extension, the value is shifted left by imm3 bits, which
3093 may only be in the range 0 .. 4 inclusive.
3094 */
3095 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3096 Bool is64 = INSN(31,31) == 1;
3097 Bool isSub = INSN(30,30) == 1;
3098 Bool setCC = INSN(29,29) == 1;
3099 UInt mm = INSN(20,16);
3100 UInt opt = INSN(15,13);
3101 UInt imm3 = INSN(12,10);
3102 UInt nn = INSN(9,5);
3103 UInt dd = INSN(4,0);
3104 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3105 "sxtb", "sxth", "sxtw", "sxtx" };
3106 /* Do almost the same thing in the 32- and 64-bit cases. */
3107 IRTemp xN = newTemp(Ity_I64);
3108 IRTemp xM = newTemp(Ity_I64);
3109 assign(xN, getIReg64orSP(nn));
3110 assign(xM, getIReg64orZR(mm));
3111 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3112 Int shSX = 0;
3113 /* widen Xm .. */
3114 switch (opt) {
3115 case BITS3(0,0,0): // UXTB
3116 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3117 case BITS3(0,0,1): // UXTH
3118 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3119 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3120 if (is64) {
3121 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3122 }
3123 break;
3124 case BITS3(0,1,1): // UXTX -- always a noop
3125 break;
3126 case BITS3(1,0,0): // SXTB
3127 shSX = 56; goto sxTo64;
3128 case BITS3(1,0,1): // SXTH
3129 shSX = 48; goto sxTo64;
3130 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3131 if (is64) {
3132 shSX = 32; goto sxTo64;
3133 }
3134 break;
3135 case BITS3(1,1,1): // SXTX -- always a noop
3136 break;
3137 sxTo64:
3138 vassert(shSX >= 32);
3139 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3140 mkU8(shSX));
3141 break;
3142 default:
3143 vassert(0);
3144 }
3145 /* and now shift */
3146 IRTemp argL = xN;
3147 IRTemp argR = newTemp(Ity_I64);
3148 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3149 IRTemp res = newTemp(Ity_I64);
3150 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3151 mkexpr(argL), mkexpr(argR)));
3152 if (is64) {
3153 if (setCC) {
3154 putIReg64orZR(dd, mkexpr(res));
3155 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3156 } else {
3157 putIReg64orSP(dd, mkexpr(res));
3158 }
3159 } else {
3160 if (setCC) {
3161 IRTemp argL32 = newTemp(Ity_I32);
3162 IRTemp argR32 = newTemp(Ity_I32);
3163 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3164 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3165 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3166 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3167 } else {
3168 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3169 }
3170 }
3171 DIP("%s%s %s, %s, %s %s lsl %u\n",
3172 isSub ? "sub" : "add", setCC ? "s" : "",
3173 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3174 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3175 nameExt[opt], imm3);
3176 return True;
3177 }
3178
3179 /* ---------------- CCMP/CCMN(imm) ---------------- */
3180 /* Bizarrely, these appear in the "data processing register"
3181 category, even though they are operations against an
3182 immediate. */
3183 /* 31 29 20 15 11 9 3
3184 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3185 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3186
3187 Operation is:
3188 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3189 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3190 */
3191 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3192 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3193 Bool is64 = INSN(31,31) == 1;
3194 Bool isSUB = INSN(30,30) == 1;
3195 UInt imm5 = INSN(20,16);
3196 UInt cond = INSN(15,12);
3197 UInt nn = INSN(9,5);
3198 UInt nzcv = INSN(3,0);
3199
3200 IRTemp condT = newTemp(Ity_I1);
3201 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3202
3203 IRType ty = is64 ? Ity_I64 : Ity_I32;
3204 IRTemp argL = newTemp(ty);
3205 IRTemp argR = newTemp(ty);
3206
3207 if (is64) {
3208 assign(argL, getIReg64orZR(nn));
3209 assign(argR, mkU64(imm5));
3210 } else {
3211 assign(argL, getIReg32orZR(nn));
3212 assign(argR, mkU32(imm5));
3213 }
3214 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3215
3216 DIP("ccm%c %s, #%u, #%u, %s\n",
3217 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3218 imm5, nzcv, nameCC(cond));
3219 return True;
3220 }
3221
3222 /* ---------------- CCMP/CCMN(reg) ---------------- */
3223 /* 31 29 20 15 11 9 3
3224 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3225 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3226 Operation is:
3227 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3228 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3229 */
3230 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3231 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3232 Bool is64 = INSN(31,31) == 1;
3233 Bool isSUB = INSN(30,30) == 1;
3234 UInt mm = INSN(20,16);
3235 UInt cond = INSN(15,12);
3236 UInt nn = INSN(9,5);
3237 UInt nzcv = INSN(3,0);
3238
3239 IRTemp condT = newTemp(Ity_I1);
3240 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3241
3242 IRType ty = is64 ? Ity_I64 : Ity_I32;
3243 IRTemp argL = newTemp(ty);
3244 IRTemp argR = newTemp(ty);
3245
3246 if (is64) {
3247 assign(argL, getIReg64orZR(nn));
3248 assign(argR, getIReg64orZR(mm));
3249 } else {
3250 assign(argL, getIReg32orZR(nn));
3251 assign(argR, getIReg32orZR(mm));
3252 }
3253 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3254
3255 DIP("ccm%c %s, %s, #%u, %s\n",
3256 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3257 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3258 return True;
3259 }
3260
3261
3262 /* -------------- REV/REV16/REV32/RBIT -------------- */
3263 /* 31 30 28 20 15 11 9 4
3264
3265 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3266 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
3267
3268 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3269 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
3270
3271 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3272 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
3273
3274 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
3275 */
3276 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3277 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3278 UInt b31 = INSN(31,31);
3279 UInt opc = INSN(11,10);
3280
3281 UInt ix = 0;
3282 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3283 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3284 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3285 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3286 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3287 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3288 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
3289 if (ix >= 1 && ix <= 7) {
3290 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
3291 UInt nn = INSN(9,5);
3292 UInt dd = INSN(4,0);
3293 IRTemp src = newTemp(Ity_I64);
3294 IRTemp dst = IRTemp_INVALID;
3295 IRTemp (*math)(IRTemp) = NULL;
3296 switch (ix) {
3297 case 1: case 2: math = math_BYTESWAP64; break;
3298 case 3: case 4: math = math_BITSWAP64; break;
3299 case 5: case 6: math = math_USHORTSWAP64; break;
3300 case 7: math = math_UINTSWAP64; break;
3301 default: vassert(0);
3302 }
3303 const HChar* names[7]
3304 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3305 const HChar* nm = names[ix-1];
3306 vassert(math);
3307 if (ix == 6) {
3308 /* This has to be special cased, since the logic below doesn't
3309 handle it correctly. */
3310 assign(src, getIReg64orZR(nn));
3311 dst = math(src);
3312 putIReg64orZR(dd,
3313 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3314 } else if (is64) {
3315 assign(src, getIReg64orZR(nn));
3316 dst = math(src);
3317 putIReg64orZR(dd, mkexpr(dst));
3318 } else {
3319 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
3320 dst = math(src);
3321 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3322 }
3323 DIP("%s %s, %s\n", nm,
3324 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3325 return True;
3326 }
3327 /* else fall through */
3328 }
3329
3330 /* -------------------- CLZ/CLS -------------------- */
3331 /* 30 28 24 20 15 9 4
3332 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3333 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3334 */
3335 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3336 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3337 Bool is64 = INSN(31,31) == 1;
3338 Bool isCLS = INSN(10,10) == 1;
3339 UInt nn = INSN(9,5);
3340 UInt dd = INSN(4,0);
3341 IRTemp src = newTemp(Ity_I64);
3342 IRTemp srcZ = newTemp(Ity_I64);
3343 IRTemp dst = newTemp(Ity_I64);
3344 /* Get the argument, widened out to 64 bit */
3345 if (is64) {
3346 assign(src, getIReg64orZR(nn));
3347 } else {
3348 assign(src, binop(Iop_Shl64,
3349 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3350 }
3351 /* If this is CLS, mash the arg around accordingly */
3352 if (isCLS) {
3353 IRExpr* one = mkU8(1);
3354 assign(srcZ,
3355 binop(Iop_Xor64,
3356 binop(Iop_Shl64, mkexpr(src), one),
3357 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3358 } else {
3359 assign(srcZ, mkexpr(src));
3360 }
3361 /* And compute CLZ. */
3362 if (is64) {
3363 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3364 mkU64(isCLS ? 63 : 64),
3365 unop(Iop_Clz64, mkexpr(srcZ))));
3366 putIReg64orZR(dd, mkexpr(dst));
3367 } else {
3368 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3369 mkU64(isCLS ? 31 : 32),
3370 unop(Iop_Clz64, mkexpr(srcZ))));
3371 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3372 }
3373 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3374 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3375 return True;
3376 }
3377
3378 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
3379 /* 30 28 20 15 11 9 4
3380 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3381 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3382 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3383 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
3384 */
3385 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3386 && INSN(15,12) == BITS4(0,0,1,0)) {
3387 Bool is64 = INSN(31,31) == 1;
3388 UInt mm = INSN(20,16);
3389 UInt op = INSN(11,10);
3390 UInt nn = INSN(9,5);
3391 UInt dd = INSN(4,0);
3392 IRType ty = is64 ? Ity_I64 : Ity_I32;
3393 IRTemp srcL = newTemp(ty);
3394 IRTemp srcR = newTemp(Ity_I64);
3395 IRTemp res = newTemp(ty);
3396 IROp iop = Iop_INVALID;
3397 assign(srcL, getIRegOrZR(is64, nn));
3398 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3399 mkU64(is64 ? 63 : 31)));
3400 if (op < 3) {
3401 // LSLV, LSRV, ASRV
3402 switch (op) {
3403 case BITS2(0,0): iop = mkSHL(ty); break;
3404 case BITS2(0,1): iop = mkSHR(ty); break;
3405 case BITS2(1,0): iop = mkSAR(ty); break;
3406 default: vassert(0);
3407 }
3408 assign(res, binop(iop, mkexpr(srcL),
3409 unop(Iop_64to8, mkexpr(srcR))));
3410 } else {
3411 // RORV
3412 IROp opSHL = mkSHL(ty);
3413 IROp opSHR = mkSHR(ty);
3414 IROp opOR = mkOR(ty);
3415 IRExpr* width = mkU64(is64 ? 64: 32);
3416 assign(
3417 res,
3418 IRExpr_ITE(
3419 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3420 mkexpr(srcL),
3421 binop(opOR,
3422 binop(opSHL,
3423 mkexpr(srcL),
3424 unop(Iop_64to8, binop(Iop_Sub64, width,
3425 mkexpr(srcR)))),
3426 binop(opSHR,
3427 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3428 ));
3429 }
3430 putIRegOrZR(is64, dd, mkexpr(res));
3431 vassert(op < 4);
3432 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
3433 DIP("%s %s, %s, %s\n",
3434 names[op], nameIRegOrZR(is64,dd),
3435 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3436 return True;
3437 }
3438
3439 /* -------------------- SDIV/UDIV -------------------- */
3440 /* 30 28 20 15 10 9 4
3441 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3442 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3443 */
3444 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3445 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3446 Bool is64 = INSN(31,31) == 1;
3447 UInt mm = INSN(20,16);
3448 Bool isS = INSN(10,10) == 1;
3449 UInt nn = INSN(9,5);
3450 UInt dd = INSN(4,0);
3451 if (isS) {
3452 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3453 getIRegOrZR(is64, nn),
3454 getIRegOrZR(is64, mm)));
3455 } else {
3456 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3457 getIRegOrZR(is64, nn),
3458 getIRegOrZR(is64, mm)));
3459 }
3460 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3461 nameIRegOrZR(is64, dd),
3462 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3463 return True;
3464 }
3465
3466 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3467 /* 31 23 20 15 14 9 4
3468 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3469 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3470 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3471 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3472 with operation
3473 Xd = Xa +/- (Wn *u/s Wm)
3474 */
3475 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3476 Bool isU = INSN(23,23) == 1;
3477 UInt mm = INSN(20,16);
3478 Bool isAdd = INSN(15,15) == 0;
3479 UInt aa = INSN(14,10);
3480 UInt nn = INSN(9,5);
3481 UInt dd = INSN(4,0);
3482 IRTemp wN = newTemp(Ity_I32);
3483 IRTemp wM = newTemp(Ity_I32);
3484 IRTemp xA = newTemp(Ity_I64);
3485 IRTemp muld = newTemp(Ity_I64);
3486 IRTemp res = newTemp(Ity_I64);
3487 assign(wN, getIReg32orZR(nn));
3488 assign(wM, getIReg32orZR(mm));
3489 assign(xA, getIReg64orZR(aa));
3490 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3491 mkexpr(wN), mkexpr(wM)));
3492 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3493 mkexpr(xA), mkexpr(muld)));
3494 putIReg64orZR(dd, mkexpr(res));
3495 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3496 nameIReg64orZR(dd), nameIReg32orZR(nn),
3497 nameIReg32orZR(mm), nameIReg64orZR(aa));
3498 return True;
3499 }
3500 vex_printf("ARM64 front end: data_processing_register\n");
3501 return False;
3502 # undef INSN
3503 }
3504
3505
3506 /*------------------------------------------------------------*/
3507 /*--- Math helpers for vector interleave/deinterleave ---*/
3508 /*------------------------------------------------------------*/
3509
3510 #define EX(_tmp) \
3511 mkexpr(_tmp)
3512 #define SL(_hi128,_lo128,_nbytes) \
3513 ( (_nbytes) == 0 \
3514 ? (_lo128) \
3515 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3516 #define ROR(_v128,_nbytes) \
3517 SL((_v128),(_v128),(_nbytes))
3518 #define ROL(_v128,_nbytes) \
3519 SL((_v128),(_v128),16-(_nbytes))
3520 #define SHR(_v128,_nbytes) \
3521 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3522 #define SHL(_v128,_nbytes) \
3523 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3524 #define ILO64x2(_argL,_argR) \
3525 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3526 #define IHI64x2(_argL,_argR) \
3527 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3528 #define ILO32x4(_argL,_argR) \
3529 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3530 #define IHI32x4(_argL,_argR) \
3531 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3532 #define ILO16x8(_argL,_argR) \
3533 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3534 #define IHI16x8(_argL,_argR) \
3535 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3536 #define ILO8x16(_argL,_argR) \
3537 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3538 #define IHI8x16(_argL,_argR) \
3539 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3540 #define CEV32x4(_argL,_argR) \
3541 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3542 #define COD32x4(_argL,_argR) \
3543 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3544 #define COD16x8(_argL,_argR) \
3545 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3546 #define COD8x16(_argL,_argR) \
3547 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3548 #define CEV8x16(_argL,_argR) \
3549 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3550 #define AND(_arg1,_arg2) \
3551 binop(Iop_AndV128,(_arg1),(_arg2))
3552 #define OR2(_arg1,_arg2) \
3553 binop(Iop_OrV128,(_arg1),(_arg2))
3554 #define OR3(_arg1,_arg2,_arg3) \
3555 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3556 #define OR4(_arg1,_arg2,_arg3,_arg4) \
3557 binop(Iop_OrV128, \
3558 binop(Iop_OrV128,(_arg1),(_arg2)), \
3559 binop(Iop_OrV128,(_arg3),(_arg4)))
3560
3561
3562 /* Do interleaving for 1 128 bit vector, for ST1 insns. */
3563 static
math_INTERLEAVE1_128(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)3564 void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3565 UInt laneSzBlg2, IRTemp u0 )
3566 {
3567 assign(*i0, mkexpr(u0));
3568 }
3569
3570
3571 /* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3572 static
math_INTERLEAVE2_128(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)3573 void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3574 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3575 {
3576 /* This is pretty easy, since we have primitives directly to
3577 hand. */
3578 if (laneSzBlg2 == 3) {
3579 // 64x2
3580 // u1 == B1 B0, u0 == A1 A0
3581 // i1 == B1 A1, i0 == B0 A0
3582 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3583 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3584 return;
3585 }
3586 if (laneSzBlg2 == 2) {
3587 // 32x4
3588 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3589 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3590 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3591 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3592 return;
3593 }
3594 if (laneSzBlg2 == 1) {
3595 // 16x8
3596 // u1 == B{7..0}, u0 == A{7..0}
3597 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3598 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3599 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3600 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3601 return;
3602 }
3603 if (laneSzBlg2 == 0) {
3604 // 8x16
3605 // u1 == B{f..0}, u0 == A{f..0}
3606 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3607 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3608 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3609 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3610 return;
3611 }
3612 /*NOTREACHED*/
3613 vassert(0);
3614 }
3615
3616
3617 /* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3618 static
math_INTERLEAVE3_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)3619 void math_INTERLEAVE3_128(
3620 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3621 UInt laneSzBlg2,
3622 IRTemp u0, IRTemp u1, IRTemp u2 )
3623 {
3624 if (laneSzBlg2 == 3) {
3625 // 64x2
3626 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3627 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3628 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3629 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3630 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3631 return;
3632 }
3633
3634 if (laneSzBlg2 == 2) {
3635 // 32x4
3636 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3637 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3638 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3639 IRTemp p0 = newTempV128();
3640 IRTemp p1 = newTempV128();
3641 IRTemp p2 = newTempV128();
3642 IRTemp c1100 = newTempV128();
3643 IRTemp c0011 = newTempV128();
3644 IRTemp c0110 = newTempV128();
3645 assign(c1100, mkV128(0xFF00));
3646 assign(c0011, mkV128(0x00FF));
3647 assign(c0110, mkV128(0x0FF0));
3648 // First interleave them at 64x2 granularity,
3649 // generating partial ("p") values.
3650 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3651 // And more shuffling around for the final answer
3652 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3653 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3654 assign(*i1, OR3( SHL(EX(p2),12),
3655 AND(EX(p1),EX(c0110)),
3656 SHR(EX(p0),12) ));
3657 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3658 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3659 return;
3660 }
3661
3662 if (laneSzBlg2 == 1) {
3663 // 16x8
3664 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3665 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3666 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3667 //
3668 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3669 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3670 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3671 //
3672 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3673 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3674 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3675 IRTemp p0 = newTempV128();
3676 IRTemp p1 = newTempV128();
3677 IRTemp p2 = newTempV128();
3678 IRTemp c1000 = newTempV128();
3679 IRTemp c0100 = newTempV128();
3680 IRTemp c0010 = newTempV128();
3681 IRTemp c0001 = newTempV128();
3682 assign(c1000, mkV128(0xF000));
3683 assign(c0100, mkV128(0x0F00));
3684 assign(c0010, mkV128(0x00F0));
3685 assign(c0001, mkV128(0x000F));
3686 // First interleave them at 32x4 granularity,
3687 // generating partial ("p") values.
3688 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3689 // And more shuffling around for the final answer
3690 assign(*i2,
3691 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3692 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3693 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3694 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3695 ));
3696 assign(*i1,
3697 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3698 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3699 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3700 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3701 ));
3702 assign(*i0,
3703 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3704 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3705 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3706 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3707 ));
3708 return;
3709 }
3710
3711 if (laneSzBlg2 == 0) {
3712 // 8x16. It doesn't seem worth the hassle of first doing a
3713 // 16x8 interleave, so just generate all 24 partial results
3714 // directly :-(
3715 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3716 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3717 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3718 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3719
3720 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3721 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3722 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3723 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3724 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3725 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3726 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3727 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3728 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3729
3730 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3731 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3732 //
3733 # define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3734 IRTemp t_##_tempName = newTempV128(); \
3735 assign(t_##_tempName, \
3736 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3737 ROR(EX(_srcVec2),(_srcShift2)) ) )
3738
3739 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3740 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3741
3742 // The slicing and reassembly are done as interleavedly as possible,
3743 // so as to minimise the demand for registers in the back end, which
3744 // was observed to be a problem in testing.
3745
3746 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3747 XXXX(AfCe, AA, 0xf, CC, 0xe);
3748 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3749
3750 XXXX(BeAe, BB, 0xe, AA, 0xe);
3751 XXXX(CdBd, CC, 0xd, BB, 0xd);
3752 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3753 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3754
3755 XXXX(AdCc, AA, 0xd, CC, 0xc);
3756 XXXX(BcAc, BB, 0xc, AA, 0xc);
3757 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3758
3759 XXXX(CbBb, CC, 0xb, BB, 0xb);
3760 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3761 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3762 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3763 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3764
3765 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3766 XXXX(C9B9, CC, 0x9, BB, 0x9);
3767 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3768
3769 XXXX(A9C8, AA, 0x9, CC, 0x8);
3770 XXXX(B8A8, BB, 0x8, AA, 0x8);
3771 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3772 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3773
3774 XXXX(C7B7, CC, 0x7, BB, 0x7);
3775 XXXX(A7C6, AA, 0x7, CC, 0x6);
3776 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3777
3778 XXXX(B6A6, BB, 0x6, AA, 0x6);
3779 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3780 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3781 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3782 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3783
3784 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3785 XXXX(B4A4, BB, 0x4, AA, 0x4);
3786 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3787
3788 XXXX(C3B3, CC, 0x3, BB, 0x3);
3789 XXXX(A3C2, AA, 0x3, CC, 0x2);
3790 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3791 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3792
3793 XXXX(B2A2, BB, 0x2, AA, 0x2);
3794 XXXX(C1B1, CC, 0x1, BB, 0x1);
3795 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3796
3797 XXXX(A1C0, AA, 0x1, CC, 0x0);
3798 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3799 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3800 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3801 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3802
3803 # undef XXXX
3804 return;
3805 }
3806
3807 /*NOTREACHED*/
3808 vassert(0);
3809 }
3810
3811
3812 /* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3813 static
math_INTERLEAVE4_128(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)3814 void math_INTERLEAVE4_128(
3815 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3816 UInt laneSzBlg2,
3817 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3818 {
3819 if (laneSzBlg2 == 3) {
3820 // 64x2
3821 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3822 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3823 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3824 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3825 return;
3826 }
3827 if (laneSzBlg2 == 2) {
3828 // 32x4
3829 // First, interleave at the 64-bit lane size.
3830 IRTemp p0 = newTempV128();
3831 IRTemp p1 = newTempV128();
3832 IRTemp p2 = newTempV128();
3833 IRTemp p3 = newTempV128();
3834 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3835 // And interleave (cat) at the 32 bit size.
3836 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3837 assign(*i1, COD32x4(EX(p1), EX(p0)));
3838 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3839 assign(*i3, COD32x4(EX(p3), EX(p2)));
3840 return;
3841 }
3842 if (laneSzBlg2 == 1) {
3843 // 16x8
3844 // First, interleave at the 32-bit lane size.
3845 IRTemp p0 = newTempV128();
3846 IRTemp p1 = newTempV128();
3847 IRTemp p2 = newTempV128();
3848 IRTemp p3 = newTempV128();
3849 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3850 // And rearrange within each vector, to get the right 16 bit lanes.
3851 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3852 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3853 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3854 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3855 return;
3856 }
3857 if (laneSzBlg2 == 0) {
3858 // 8x16
3859 // First, interleave at the 16-bit lane size.
3860 IRTemp p0 = newTempV128();
3861 IRTemp p1 = newTempV128();
3862 IRTemp p2 = newTempV128();
3863 IRTemp p3 = newTempV128();
3864 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3865 // And rearrange within each vector, to get the right 8 bit lanes.
3866 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3867 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3868 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3869 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3870 return;
3871 }
3872 /*NOTREACHED*/
3873 vassert(0);
3874 }
3875
3876
3877 /* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3878 static
math_DEINTERLEAVE1_128(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)3879 void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3880 UInt laneSzBlg2, IRTemp i0 )
3881 {
3882 assign(*u0, mkexpr(i0));
3883 }
3884
3885
3886 /* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3887 static
math_DEINTERLEAVE2_128(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)3888 void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3889 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3890 {
3891 /* This is pretty easy, since we have primitives directly to
3892 hand. */
3893 if (laneSzBlg2 == 3) {
3894 // 64x2
3895 // i1 == B1 A1, i0 == B0 A0
3896 // u1 == B1 B0, u0 == A1 A0
3897 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3898 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3899 return;
3900 }
3901 if (laneSzBlg2 == 2) {
3902 // 32x4
3903 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3904 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3905 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3906 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3907 return;
3908 }
3909 if (laneSzBlg2 == 1) {
3910 // 16x8
3911 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3912 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3913 // u1 == B{7..0}, u0 == A{7..0}
3914 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3915 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3916 return;
3917 }
3918 if (laneSzBlg2 == 0) {
3919 // 8x16
3920 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3921 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3922 // u1 == B{f..0}, u0 == A{f..0}
3923 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3924 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3925 return;
3926 }
3927 /*NOTREACHED*/
3928 vassert(0);
3929 }
3930
3931
3932 /* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3933 static
math_DEINTERLEAVE3_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)3934 void math_DEINTERLEAVE3_128(
3935 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3936 UInt laneSzBlg2,
3937 IRTemp i0, IRTemp i1, IRTemp i2 )
3938 {
3939 if (laneSzBlg2 == 3) {
3940 // 64x2
3941 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3942 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3943 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
3944 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
3945 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
3946 return;
3947 }
3948
3949 if (laneSzBlg2 == 2) {
3950 // 32x4
3951 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3952 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3953 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3954 IRTemp t_a1c0b0a0 = newTempV128();
3955 IRTemp t_a2c1b1a1 = newTempV128();
3956 IRTemp t_a3c2b2a2 = newTempV128();
3957 IRTemp t_a0c3b3a3 = newTempV128();
3958 IRTemp p0 = newTempV128();
3959 IRTemp p1 = newTempV128();
3960 IRTemp p2 = newTempV128();
3961 // Compute some intermediate values.
3962 assign(t_a1c0b0a0, EX(i0));
3963 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3964 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3965 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3966 // First deinterleave into lane-pairs
3967 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3968 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3969 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3970 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3971 // Then deinterleave at 64x2 granularity.
3972 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3973 return;
3974 }
3975
3976 if (laneSzBlg2 == 1) {
3977 // 16x8
3978 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3979 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3980 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3981 //
3982 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3983 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3984 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3985 //
3986 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3987 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3988 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3989
3990 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3991 s0 = s1 = s2 = s3
3992 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3993 newTempsV128_4(&s0, &s1, &s2, &s3);
3994 newTempsV128_4(&t0, &t1, &t2, &t3);
3995 newTempsV128_4(&p0, &p1, &p2, &c00111111);
3996
3997 // s0 == b2a2 c1b1a1 c0b0a0
3998 // s1 == b4a4 c3b3c3 c2b2a2
3999 // s2 == b6a6 c5b5a5 c4b4a4
4000 // s3 == b0a0 c7b7a7 c6b6a6
4001 assign(s0, EX(i0));
4002 assign(s1, SL(EX(i1),EX(i0),6*2));
4003 assign(s2, SL(EX(i2),EX(i1),4*2));
4004 assign(s3, SL(EX(i0),EX(i2),2*2));
4005
4006 // t0 == 0 0 c1c0 b1b0 a1a0
4007 // t1 == 0 0 c3c2 b3b2 a3a2
4008 // t2 == 0 0 c5c4 b5b4 a5a4
4009 // t3 == 0 0 c7c6 b7b6 a7a6
4010 assign(c00111111, mkV128(0x0FFF));
4011 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4012 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4013 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4014 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4015
4016 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4017 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4018 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4019
4020 // Then deinterleave at 32x4 granularity.
4021 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4022 return;
4023 }
4024
4025 if (laneSzBlg2 == 0) {
4026 // 8x16. This is the same scheme as for 16x8, with twice the
4027 // number of intermediate values.
4028 //
4029 // u2 == C{f..0}
4030 // u1 == B{f..0}
4031 // u0 == A{f..0}
4032 //
4033 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4034 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4035 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4036 //
4037 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4038 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4039 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4040 //
4041 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4042 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4043 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4044 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4045 = IRTemp_INVALID;
4046 newTempsV128_4(&s0, &s1, &s2, &s3);
4047 newTempsV128_4(&s4, &s5, &s6, &s7);
4048 newTempsV128_4(&t0, &t1, &t2, &t3);
4049 newTempsV128_4(&t4, &t5, &t6, &t7);
4050 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4051
4052 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4053 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4054 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4055 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4056 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4057 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4058 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4059 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4060 assign(s0, SL(EX(i1),EX(i0), 0));
4061 assign(s1, SL(EX(i1),EX(i0), 6));
4062 assign(s2, SL(EX(i1),EX(i0),12));
4063 assign(s3, SL(EX(i2),EX(i1), 2));
4064 assign(s4, SL(EX(i2),EX(i1), 8));
4065 assign(s5, SL(EX(i2),EX(i1),14));
4066 assign(s6, SL(EX(i0),EX(i2), 4));
4067 assign(s7, SL(EX(i0),EX(i2),10));
4068
4069 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4070 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4071 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4072 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4073 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4074 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4075 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4076 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4077 assign(cMASK, mkV128(0x003F));
4078 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4079 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4080 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4081 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4082 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4083 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4084 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4085 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4086
4087 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4088 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4089 SHL(EX(t3),2), SHR(EX(t2),4) ));
4090 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4091
4092 // Then deinterleave at 16x8 granularity.
4093 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4094 return;
4095 }
4096
4097 /*NOTREACHED*/
4098 vassert(0);
4099 }
4100
4101
4102 /* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4103 static
math_DEINTERLEAVE4_128(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4104 void math_DEINTERLEAVE4_128(
4105 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4106 UInt laneSzBlg2,
4107 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4108 {
4109 if (laneSzBlg2 == 3) {
4110 // 64x2
4111 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4112 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4113 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4114 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4115 return;
4116 }
4117 if (laneSzBlg2 == 2) {
4118 // 32x4
4119 IRTemp p0 = newTempV128();
4120 IRTemp p2 = newTempV128();
4121 IRTemp p1 = newTempV128();
4122 IRTemp p3 = newTempV128();
4123 assign(p0, ILO32x4(EX(i1), EX(i0)));
4124 assign(p1, IHI32x4(EX(i1), EX(i0)));
4125 assign(p2, ILO32x4(EX(i3), EX(i2)));
4126 assign(p3, IHI32x4(EX(i3), EX(i2)));
4127 // And now do what we did for the 64-bit case.
4128 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4129 return;
4130 }
4131 if (laneSzBlg2 == 1) {
4132 // 16x8
4133 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4134 IRTemp p0 = newTempV128();
4135 IRTemp p1 = newTempV128();
4136 IRTemp p2 = newTempV128();
4137 IRTemp p3 = newTempV128();
4138 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4139 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4140 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4141 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4142 // From here on is like the 32 bit case.
4143 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4144 return;
4145 }
4146 if (laneSzBlg2 == 0) {
4147 // 8x16
4148 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4149 IRTemp p0 = newTempV128();
4150 IRTemp p1 = newTempV128();
4151 IRTemp p2 = newTempV128();
4152 IRTemp p3 = newTempV128();
4153 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4154 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4155 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4156 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4157 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4158 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4159 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4160 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4161 // From here on is like the 16 bit case.
4162 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4163 return;
4164 }
4165 /*NOTREACHED*/
4166 vassert(0);
4167 }
4168
4169
4170 /* Wrappers that use the full-width (de)interleavers to do half-width
4171 (de)interleaving. The scheme is to clone each input lane in the
4172 lower half of each incoming value, do a full width (de)interleave
4173 at the next lane size up, and remove every other lane of the the
4174 result. The returned values may have any old junk in the upper
4175 64 bits -- the caller must ignore that. */
4176
4177 /* Helper function -- get doubling and narrowing operations. */
4178 static
math_get_doubler_and_halver(IROp * doubler,IROp * halver,UInt laneSzBlg2)4179 void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4180 /*OUT*/IROp* halver,
4181 UInt laneSzBlg2 )
4182 {
4183 switch (laneSzBlg2) {
4184 case 2:
4185 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4186 break;
4187 case 1:
4188 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4189 break;
4190 case 0:
4191 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4192 break;
4193 default:
4194 vassert(0);
4195 }
4196 }
4197
4198 /* Do interleaving for 1 64 bit vector, for ST1 insns. */
4199 static
math_INTERLEAVE1_64(IRTemp * i0,UInt laneSzBlg2,IRTemp u0)4200 void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4201 UInt laneSzBlg2, IRTemp u0 )
4202 {
4203 assign(*i0, mkexpr(u0));
4204 }
4205
4206
4207 /* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4208 static
math_INTERLEAVE2_64(IRTemp * i0,IRTemp * i1,UInt laneSzBlg2,IRTemp u0,IRTemp u1)4209 void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4210 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4211 {
4212 if (laneSzBlg2 == 3) {
4213 // 1x64, degenerate case
4214 assign(*i0, EX(u0));
4215 assign(*i1, EX(u1));
4216 return;
4217 }
4218
4219 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4220 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4221 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4222
4223 IRTemp du0 = newTempV128();
4224 IRTemp du1 = newTempV128();
4225 assign(du0, binop(doubler, EX(u0), EX(u0)));
4226 assign(du1, binop(doubler, EX(u1), EX(u1)));
4227 IRTemp di0 = newTempV128();
4228 IRTemp di1 = newTempV128();
4229 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4230 assign(*i0, binop(halver, EX(di0), EX(di0)));
4231 assign(*i1, binop(halver, EX(di1), EX(di1)));
4232 }
4233
4234
4235 /* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4236 static
math_INTERLEAVE3_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2)4237 void math_INTERLEAVE3_64(
4238 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4239 UInt laneSzBlg2,
4240 IRTemp u0, IRTemp u1, IRTemp u2 )
4241 {
4242 if (laneSzBlg2 == 3) {
4243 // 1x64, degenerate case
4244 assign(*i0, EX(u0));
4245 assign(*i1, EX(u1));
4246 assign(*i2, EX(u2));
4247 return;
4248 }
4249
4250 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4251 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4252 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4253
4254 IRTemp du0 = newTempV128();
4255 IRTemp du1 = newTempV128();
4256 IRTemp du2 = newTempV128();
4257 assign(du0, binop(doubler, EX(u0), EX(u0)));
4258 assign(du1, binop(doubler, EX(u1), EX(u1)));
4259 assign(du2, binop(doubler, EX(u2), EX(u2)));
4260 IRTemp di0 = newTempV128();
4261 IRTemp di1 = newTempV128();
4262 IRTemp di2 = newTempV128();
4263 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4264 assign(*i0, binop(halver, EX(di0), EX(di0)));
4265 assign(*i1, binop(halver, EX(di1), EX(di1)));
4266 assign(*i2, binop(halver, EX(di2), EX(di2)));
4267 }
4268
4269
4270 /* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4271 static
math_INTERLEAVE4_64(IRTemp * i0,IRTemp * i1,IRTemp * i2,IRTemp * i3,UInt laneSzBlg2,IRTemp u0,IRTemp u1,IRTemp u2,IRTemp u3)4272 void math_INTERLEAVE4_64(
4273 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4274 UInt laneSzBlg2,
4275 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4276 {
4277 if (laneSzBlg2 == 3) {
4278 // 1x64, degenerate case
4279 assign(*i0, EX(u0));
4280 assign(*i1, EX(u1));
4281 assign(*i2, EX(u2));
4282 assign(*i3, EX(u3));
4283 return;
4284 }
4285
4286 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4287 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4288 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4289
4290 IRTemp du0 = newTempV128();
4291 IRTemp du1 = newTempV128();
4292 IRTemp du2 = newTempV128();
4293 IRTemp du3 = newTempV128();
4294 assign(du0, binop(doubler, EX(u0), EX(u0)));
4295 assign(du1, binop(doubler, EX(u1), EX(u1)));
4296 assign(du2, binop(doubler, EX(u2), EX(u2)));
4297 assign(du3, binop(doubler, EX(u3), EX(u3)));
4298 IRTemp di0 = newTempV128();
4299 IRTemp di1 = newTempV128();
4300 IRTemp di2 = newTempV128();
4301 IRTemp di3 = newTempV128();
4302 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4303 laneSzBlg2 + 1, du0, du1, du2, du3);
4304 assign(*i0, binop(halver, EX(di0), EX(di0)));
4305 assign(*i1, binop(halver, EX(di1), EX(di1)));
4306 assign(*i2, binop(halver, EX(di2), EX(di2)));
4307 assign(*i3, binop(halver, EX(di3), EX(di3)));
4308 }
4309
4310
4311 /* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4312 static
math_DEINTERLEAVE1_64(IRTemp * u0,UInt laneSzBlg2,IRTemp i0)4313 void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4314 UInt laneSzBlg2, IRTemp i0 )
4315 {
4316 assign(*u0, mkexpr(i0));
4317 }
4318
4319
4320 /* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4321 static
math_DEINTERLEAVE2_64(IRTemp * u0,IRTemp * u1,UInt laneSzBlg2,IRTemp i0,IRTemp i1)4322 void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4323 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4324 {
4325 if (laneSzBlg2 == 3) {
4326 // 1x64, degenerate case
4327 assign(*u0, EX(i0));
4328 assign(*u1, EX(i1));
4329 return;
4330 }
4331
4332 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4333 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4334 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4335
4336 IRTemp di0 = newTempV128();
4337 IRTemp di1 = newTempV128();
4338 assign(di0, binop(doubler, EX(i0), EX(i0)));
4339 assign(di1, binop(doubler, EX(i1), EX(i1)));
4340
4341 IRTemp du0 = newTempV128();
4342 IRTemp du1 = newTempV128();
4343 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4344 assign(*u0, binop(halver, EX(du0), EX(du0)));
4345 assign(*u1, binop(halver, EX(du1), EX(du1)));
4346 }
4347
4348
4349 /* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4350 static
math_DEINTERLEAVE3_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2)4351 void math_DEINTERLEAVE3_64(
4352 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4353 UInt laneSzBlg2,
4354 IRTemp i0, IRTemp i1, IRTemp i2 )
4355 {
4356 if (laneSzBlg2 == 3) {
4357 // 1x64, degenerate case
4358 assign(*u0, EX(i0));
4359 assign(*u1, EX(i1));
4360 assign(*u2, EX(i2));
4361 return;
4362 }
4363
4364 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4365 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4366 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4367
4368 IRTemp di0 = newTempV128();
4369 IRTemp di1 = newTempV128();
4370 IRTemp di2 = newTempV128();
4371 assign(di0, binop(doubler, EX(i0), EX(i0)));
4372 assign(di1, binop(doubler, EX(i1), EX(i1)));
4373 assign(di2, binop(doubler, EX(i2), EX(i2)));
4374 IRTemp du0 = newTempV128();
4375 IRTemp du1 = newTempV128();
4376 IRTemp du2 = newTempV128();
4377 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4378 assign(*u0, binop(halver, EX(du0), EX(du0)));
4379 assign(*u1, binop(halver, EX(du1), EX(du1)));
4380 assign(*u2, binop(halver, EX(du2), EX(du2)));
4381 }
4382
4383
4384 /* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4385 static
math_DEINTERLEAVE4_64(IRTemp * u0,IRTemp * u1,IRTemp * u2,IRTemp * u3,UInt laneSzBlg2,IRTemp i0,IRTemp i1,IRTemp i2,IRTemp i3)4386 void math_DEINTERLEAVE4_64(
4387 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4388 UInt laneSzBlg2,
4389 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4390 {
4391 if (laneSzBlg2 == 3) {
4392 // 1x64, degenerate case
4393 assign(*u0, EX(i0));
4394 assign(*u1, EX(i1));
4395 assign(*u2, EX(i2));
4396 assign(*u3, EX(i3));
4397 return;
4398 }
4399
4400 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4401 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4402 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4403
4404 IRTemp di0 = newTempV128();
4405 IRTemp di1 = newTempV128();
4406 IRTemp di2 = newTempV128();
4407 IRTemp di3 = newTempV128();
4408 assign(di0, binop(doubler, EX(i0), EX(i0)));
4409 assign(di1, binop(doubler, EX(i1), EX(i1)));
4410 assign(di2, binop(doubler, EX(i2), EX(i2)));
4411 assign(di3, binop(doubler, EX(i3), EX(i3)));
4412 IRTemp du0 = newTempV128();
4413 IRTemp du1 = newTempV128();
4414 IRTemp du2 = newTempV128();
4415 IRTemp du3 = newTempV128();
4416 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4417 laneSzBlg2 + 1, di0, di1, di2, di3);
4418 assign(*u0, binop(halver, EX(du0), EX(du0)));
4419 assign(*u1, binop(halver, EX(du1), EX(du1)));
4420 assign(*u2, binop(halver, EX(du2), EX(du2)));
4421 assign(*u3, binop(halver, EX(du3), EX(du3)));
4422 }
4423
4424
4425 #undef EX
4426 #undef SL
4427 #undef ROR
4428 #undef ROL
4429 #undef SHR
4430 #undef SHL
4431 #undef ILO64x2
4432 #undef IHI64x2
4433 #undef ILO32x4
4434 #undef IHI32x4
4435 #undef ILO16x8
4436 #undef IHI16x8
4437 #undef ILO16x8
4438 #undef IHI16x8
4439 #undef CEV32x4
4440 #undef COD32x4
4441 #undef COD16x8
4442 #undef COD8x16
4443 #undef CEV8x16
4444 #undef AND
4445 #undef OR2
4446 #undef OR3
4447 #undef OR4
4448
4449
4450 /*------------------------------------------------------------*/
4451 /*--- Load and Store instructions ---*/
4452 /*------------------------------------------------------------*/
4453
4454 /* Generate the EA for a "reg + reg" style amode. This is done from
4455 parts of the insn, but for sanity checking sake it takes the whole
4456 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4457 and S=insn[12]:
4458
4459 The possible forms, along with their opt:S values, are:
4460 011:0 Xn|SP + Xm
4461 111:0 Xn|SP + Xm
4462 011:1 Xn|SP + Xm * transfer_szB
4463 111:1 Xn|SP + Xm * transfer_szB
4464 010:0 Xn|SP + 32Uto64(Wm)
4465 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4466 110:0 Xn|SP + 32Sto64(Wm)
4467 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4468
4469 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4470 the transfer size is insn[23,31,30]. For integer loads/stores,
4471 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4472
4473 If the decoding fails, it returns IRTemp_INVALID.
4474
4475 isInt is True iff this is decoding is for transfers to/from integer
4476 registers. If False it is for transfers to/from vector registers.
4477 */
gen_indexed_EA(HChar * buf,UInt insn,Bool isInt)4478 static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4479 {
4480 UInt optS = SLICE_UInt(insn, 15, 12);
4481 UInt mm = SLICE_UInt(insn, 20, 16);
4482 UInt nn = SLICE_UInt(insn, 9, 5);
4483 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4484 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4485
4486 buf[0] = 0;
4487
4488 /* Sanity checks, that this really is a load/store insn. */
4489 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4490 goto fail;
4491
4492 if (isInt
4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4494 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4495 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4496 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4497 goto fail;
4498
4499 if (!isInt
4500 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4501 goto fail;
4502
4503 /* Throw out non-verified but possibly valid cases. */
4504 switch (szLg2) {
4505 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4506 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4507 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4508 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4509 case BITS3(1,0,0): // can only ever be valid for the vector case
4510 if (isInt) goto fail; else break;
4511 case BITS3(1,0,1): // these sizes are never valid
4512 case BITS3(1,1,0):
4513 case BITS3(1,1,1): goto fail;
4514
4515 default: vassert(0);
4516 }
4517
4518 IRExpr* rhs = NULL;
4519 switch (optS) {
4520 case BITS4(1,1,1,0): goto fail; //ATC
4521 case BITS4(0,1,1,0):
4522 rhs = getIReg64orZR(mm);
4523 vex_sprintf(buf, "[%s, %s]",
4524 nameIReg64orZR(nn), nameIReg64orZR(mm));
4525 break;
4526 case BITS4(1,1,1,1): goto fail; //ATC
4527 case BITS4(0,1,1,1):
4528 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4529 vex_sprintf(buf, "[%s, %s lsl %u]",
4530 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4531 break;
4532 case BITS4(0,1,0,0):
4533 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4534 vex_sprintf(buf, "[%s, %s uxtx]",
4535 nameIReg64orZR(nn), nameIReg32orZR(mm));
4536 break;
4537 case BITS4(0,1,0,1):
4538 rhs = binop(Iop_Shl64,
4539 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4540 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4541 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4542 break;
4543 case BITS4(1,1,0,0):
4544 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4545 vex_sprintf(buf, "[%s, %s sxtx]",
4546 nameIReg64orZR(nn), nameIReg32orZR(mm));
4547 break;
4548 case BITS4(1,1,0,1):
4549 rhs = binop(Iop_Shl64,
4550 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4551 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4552 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4553 break;
4554 default:
4555 /* The rest appear to be genuinely invalid */
4556 goto fail;
4557 }
4558
4559 vassert(rhs);
4560 IRTemp res = newTemp(Ity_I64);
4561 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4562 return res;
4563
4564 fail:
4565 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4566 return IRTemp_INVALID;
4567 }
4568
4569
4570 /* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4571 bits of DATAE :: Ity_I64. */
gen_narrowing_store(UInt szB,IRTemp addr,IRExpr * dataE)4572 static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4573 {
4574 IRExpr* addrE = mkexpr(addr);
4575 switch (szB) {
4576 case 8:
4577 storeLE(addrE, dataE);
4578 break;
4579 case 4:
4580 storeLE(addrE, unop(Iop_64to32, dataE));
4581 break;
4582 case 2:
4583 storeLE(addrE, unop(Iop_64to16, dataE));
4584 break;
4585 case 1:
4586 storeLE(addrE, unop(Iop_64to8, dataE));
4587 break;
4588 default:
4589 vassert(0);
4590 }
4591 }
4592
4593
4594 /* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4595 placing the result in an Ity_I64 temporary. */
gen_zwidening_load(UInt szB,IRTemp addr)4596 static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4597 {
4598 IRTemp res = newTemp(Ity_I64);
4599 IRExpr* addrE = mkexpr(addr);
4600 switch (szB) {
4601 case 8:
4602 assign(res, loadLE(Ity_I64,addrE));
4603 break;
4604 case 4:
4605 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4606 break;
4607 case 2:
4608 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4609 break;
4610 case 1:
4611 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4612 break;
4613 default:
4614 vassert(0);
4615 }
4616 return res;
4617 }
4618
4619
4620 /* Generate a "standard 7" name, from bitQ and size. But also
4621 allow ".1d" since that's occasionally useful. */
4622 static
nameArr_Q_SZ(UInt bitQ,UInt size)4623 const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4624 {
4625 vassert(bitQ <= 1 && size <= 3);
4626 const HChar* nms[8]
4627 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
4628 UInt ix = (bitQ << 2) | size;
4629 vassert(ix < 8);
4630 return nms[ix];
4631 }
4632
4633
4634 static
dis_ARM64_load_store(DisResult * dres,UInt insn)4635 Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4636 {
4637 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4638
4639 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4640 /* uimm12 is scaled by the transfer size
4641
4642 31 29 26 21 9 4
4643 | | | | | |
4644 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4645 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4646
4647 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4648 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4649
4650 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4651 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4652
4653 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4654 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4655 */
4656 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4657 UInt szLg2 = INSN(31,30);
4658 UInt szB = 1 << szLg2;
4659 Bool isLD = INSN(22,22) == 1;
4660 UInt offs = INSN(21,10) * szB;
4661 UInt nn = INSN(9,5);
4662 UInt tt = INSN(4,0);
4663 IRTemp ta = newTemp(Ity_I64);
4664 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4665 if (nn == 31) { /* FIXME generate stack alignment check */ }
4666 vassert(szLg2 < 4);
4667 if (isLD) {
4668 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4669 } else {
4670 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4671 }
4672 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4673 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4674 DIP("%s %s, [%s, #%u]\n",
4675 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4676 nameIReg64orSP(nn), offs);
4677 return True;
4678 }
4679
4680 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4681 /*
4682 31 29 26 20 11 9 4
4683 | | | | | | |
4684 (at-Rn-then-Rn=EA) | | |
4685 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4686 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4687
4688 (at-EA-then-Rn=EA)
4689 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4690 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4691
4692 (at-EA)
4693 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4694 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4695
4696 simm9 is unscaled.
4697
4698 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4699 load case this is because would create two competing values for
4700 Rt. In the store case the reason is unclear, but the spec
4701 disallows it anyway.
4702
4703 Stores are narrowing, loads are unsigned widening. sz encodes
4704 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4705 */
4706 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4707 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4708 UInt szLg2 = INSN(31,30);
4709 UInt szB = 1 << szLg2;
4710 Bool isLoad = INSN(22,22) == 1;
4711 UInt imm9 = INSN(20,12);
4712 UInt nn = INSN(9,5);
4713 UInt tt = INSN(4,0);
4714 Bool wBack = INSN(10,10) == 1;
4715 UInt how = INSN(11,10);
4716 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4717 /* undecodable; fall through */
4718 } else {
4719 if (nn == 31) { /* FIXME generate stack alignment check */ }
4720
4721 // Compute the transfer address TA and the writeback address WA.
4722 IRTemp tRN = newTemp(Ity_I64);
4723 assign(tRN, getIReg64orSP(nn));
4724 IRTemp tEA = newTemp(Ity_I64);
4725 Long simm9 = (Long)sx_to_64(imm9, 9);
4726 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4727
4728 IRTemp tTA = newTemp(Ity_I64);
4729 IRTemp tWA = newTemp(Ity_I64);
4730 switch (how) {
4731 case BITS2(0,1):
4732 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4733 case BITS2(1,1):
4734 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4735 case BITS2(0,0):
4736 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4737 default:
4738 vassert(0); /* NOTREACHED */
4739 }
4740
4741 /* Normally rN would be updated after the transfer. However, in
4742 the special case typifed by
4743 str x30, [sp,#-16]!
4744 it is necessary to update SP before the transfer, (1)
4745 because Memcheck will otherwise complain about a write
4746 below the stack pointer, and (2) because the segfault
4747 stack extension mechanism will otherwise extend the stack
4748 only down to SP before the instruction, which might not be
4749 far enough, if the -16 bit takes the actual access
4750 address to the next page.
4751 */
4752 Bool earlyWBack
4753 = wBack && simm9 < 0 && szB == 8
4754 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4755
4756 if (wBack && earlyWBack)
4757 putIReg64orSP(nn, mkexpr(tEA));
4758
4759 if (isLoad) {
4760 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4761 } else {
4762 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4763 }
4764
4765 if (wBack && !earlyWBack)
4766 putIReg64orSP(nn, mkexpr(tEA));
4767
4768 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4769 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4770 const HChar* fmt_str = NULL;
4771 switch (how) {
4772 case BITS2(0,1):
4773 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4774 break;
4775 case BITS2(1,1):
4776 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4777 break;
4778 case BITS2(0,0):
4779 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4780 break;
4781 default:
4782 vassert(0);
4783 }
4784 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4785 nameIRegOrZR(szB == 8, tt),
4786 nameIReg64orSP(nn), simm9);
4787 return True;
4788 }
4789 }
4790
4791 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4792 /* L==1 => mm==LD
4793 L==0 => mm==ST
4794 x==0 => 32 bit transfers, and zero extended loads
4795 x==1 => 64 bit transfers
4796 simm7 is scaled by the (single-register) transfer size
4797
4798 (at-Rn-then-Rn=EA)
4799 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4800
4801 (at-EA-then-Rn=EA)
4802 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4803
4804 (at-EA)
4805 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4806 */
4807
4808 UInt insn_30_23 = INSN(30,23);
4809 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4810 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4811 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4812 UInt bL = INSN(22,22);
4813 UInt bX = INSN(31,31);
4814 UInt bWBack = INSN(23,23);
4815 UInt rT1 = INSN(4,0);
4816 UInt rN = INSN(9,5);
4817 UInt rT2 = INSN(14,10);
4818 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4819 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4820 || (bL && rT1 == rT2)) {
4821 /* undecodable; fall through */
4822 } else {
4823 if (rN == 31) { /* FIXME generate stack alignment check */ }
4824
4825 // Compute the transfer address TA and the writeback address WA.
4826 IRTemp tRN = newTemp(Ity_I64);
4827 assign(tRN, getIReg64orSP(rN));
4828 IRTemp tEA = newTemp(Ity_I64);
4829 simm7 = (bX ? 8 : 4) * simm7;
4830 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4831
4832 IRTemp tTA = newTemp(Ity_I64);
4833 IRTemp tWA = newTemp(Ity_I64);
4834 switch (INSN(24,23)) {
4835 case BITS2(0,1):
4836 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4837 case BITS2(1,1):
4838 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4839 case BITS2(1,0):
4840 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4841 default:
4842 vassert(0); /* NOTREACHED */
4843 }
4844
4845 /* Normally rN would be updated after the transfer. However, in
4846 the special case typifed by
4847 stp x29, x30, [sp,#-112]!
4848 it is necessary to update SP before the transfer, (1)
4849 because Memcheck will otherwise complain about a write
4850 below the stack pointer, and (2) because the segfault
4851 stack extension mechanism will otherwise extend the stack
4852 only down to SP before the instruction, which might not be
4853 far enough, if the -112 bit takes the actual access
4854 address to the next page.
4855 */
4856 Bool earlyWBack
4857 = bWBack && simm7 < 0
4858 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4859
4860 if (bWBack && earlyWBack)
4861 putIReg64orSP(rN, mkexpr(tEA));
4862
4863 /**/ if (bL == 1 && bX == 1) {
4864 // 64 bit load
4865 putIReg64orZR(rT1, loadLE(Ity_I64,
4866 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4867 putIReg64orZR(rT2, loadLE(Ity_I64,
4868 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4869 } else if (bL == 1 && bX == 0) {
4870 // 32 bit load
4871 putIReg32orZR(rT1, loadLE(Ity_I32,
4872 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4873 putIReg32orZR(rT2, loadLE(Ity_I32,
4874 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4875 } else if (bL == 0 && bX == 1) {
4876 // 64 bit store
4877 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4878 getIReg64orZR(rT1));
4879 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4880 getIReg64orZR(rT2));
4881 } else {
4882 vassert(bL == 0 && bX == 0);
4883 // 32 bit store
4884 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4885 getIReg32orZR(rT1));
4886 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4887 getIReg32orZR(rT2));
4888 }
4889
4890 if (bWBack && !earlyWBack)
4891 putIReg64orSP(rN, mkexpr(tEA));
4892
4893 const HChar* fmt_str = NULL;
4894 switch (INSN(24,23)) {
4895 case BITS2(0,1):
4896 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4897 break;
4898 case BITS2(1,1):
4899 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4900 break;
4901 case BITS2(1,0):
4902 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4903 break;
4904 default:
4905 vassert(0);
4906 }
4907 DIP(fmt_str, bL == 0 ? "st" : "ld",
4908 nameIRegOrZR(bX == 1, rT1),
4909 nameIRegOrZR(bX == 1, rT2),
4910 nameIReg64orSP(rN), simm7);
4911 return True;
4912 }
4913 }
4914
4915 /* ---------------- LDR (literal, int reg) ---------------- */
4916 /* 31 29 23 4
4917 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
4918 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
4919 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4920 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
4921 Just handles the first two cases for now.
4922 */
4923 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4924 UInt imm19 = INSN(23,5);
4925 UInt rT = INSN(4,0);
4926 UInt bX = INSN(30,30);
4927 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4928 if (bX) {
4929 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4930 } else {
4931 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4932 }
4933 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4934 return True;
4935 }
4936
4937 /* -------------- {LD,ST}R (integer register) --------------- */
4938 /* 31 29 20 15 12 11 9 4
4939 | | | | | | | |
4940 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
4941 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
4942 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4943 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4944
4945 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
4946 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
4947 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
4948 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
4949 */
4950 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4951 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4952 HChar dis_buf[64];
4953 UInt szLg2 = INSN(31,30);
4954 Bool isLD = INSN(22,22) == 1;
4955 UInt tt = INSN(4,0);
4956 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4957 if (ea != IRTemp_INVALID) {
4958 switch (szLg2) {
4959 case 3: /* 64 bit */
4960 if (isLD) {
4961 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4962 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4963 } else {
4964 storeLE(mkexpr(ea), getIReg64orZR(tt));
4965 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4966 }
4967 break;
4968 case 2: /* 32 bit */
4969 if (isLD) {
4970 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4971 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4972 } else {
4973 storeLE(mkexpr(ea), getIReg32orZR(tt));
4974 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4975 }
4976 break;
4977 case 1: /* 16 bit */
4978 if (isLD) {
4979 putIReg64orZR(tt, unop(Iop_16Uto64,
4980 loadLE(Ity_I16, mkexpr(ea))));
4981 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4982 } else {
4983 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4984 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4985 }
4986 break;
4987 case 0: /* 8 bit */
4988 if (isLD) {
4989 putIReg64orZR(tt, unop(Iop_8Uto64,
4990 loadLE(Ity_I8, mkexpr(ea))));
4991 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4992 } else {
4993 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4994 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4995 }
4996 break;
4997 default:
4998 vassert(0);
4999 }
5000 return True;
5001 }
5002 }
5003
5004 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5005 /* 31 29 26 23 21 9 4
5006 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5007 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5008 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5009 where
5010 Rt is Wt when x==1, Xt when x==0
5011 */
5012 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5013 /* Further checks on bits 31:30 and 22 */
5014 Bool valid = False;
5015 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5016 case BITS3(1,0,0):
5017 case BITS3(0,1,0): case BITS3(0,1,1):
5018 case BITS3(0,0,0): case BITS3(0,0,1):
5019 valid = True;
5020 break;
5021 }
5022 if (valid) {
5023 UInt szLg2 = INSN(31,30);
5024 UInt bitX = INSN(22,22);
5025 UInt imm12 = INSN(21,10);
5026 UInt nn = INSN(9,5);
5027 UInt tt = INSN(4,0);
5028 UInt szB = 1 << szLg2;
5029 IRExpr* ea = binop(Iop_Add64,
5030 getIReg64orSP(nn), mkU64(imm12 * szB));
5031 switch (szB) {
5032 case 4:
5033 vassert(bitX == 0);
5034 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5035 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5036 nameIReg64orSP(nn), imm12 * szB);
5037 break;
5038 case 2:
5039 if (bitX == 1) {
5040 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5041 } else {
5042 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5043 }
5044 DIP("ldrsh %s, [%s, #%u]\n",
5045 nameIRegOrZR(bitX == 0, tt),
5046 nameIReg64orSP(nn), imm12 * szB);
5047 break;
5048 case 1:
5049 if (bitX == 1) {
5050 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5051 } else {
5052 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5053 }
5054 DIP("ldrsb %s, [%s, #%u]\n",
5055 nameIRegOrZR(bitX == 0, tt),
5056 nameIReg64orSP(nn), imm12 * szB);
5057 break;
5058 default:
5059 vassert(0);
5060 }
5061 return True;
5062 }
5063 /* else fall through */
5064 }
5065
5066 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5067 /* (at-Rn-then-Rn=EA)
5068 31 29 23 21 20 11 9 4
5069 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5070 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5071 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5072
5073 (at-EA-then-Rn=EA)
5074 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5075 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5076 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5077 where
5078 Rt is Wt when x==1, Xt when x==0
5079 transfer-at-Rn when [11]==0, at EA when [11]==1
5080 */
5081 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5082 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5083 /* Further checks on bits 31:30 and 22 */
5084 Bool valid = False;
5085 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5086 case BITS3(1,0,0): // LDRSW Xt
5087 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5088 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5089 valid = True;
5090 break;
5091 }
5092 if (valid) {
5093 UInt szLg2 = INSN(31,30);
5094 UInt imm9 = INSN(20,12);
5095 Bool atRN = INSN(11,11) == 0;
5096 UInt nn = INSN(9,5);
5097 UInt tt = INSN(4,0);
5098 IRTemp tRN = newTemp(Ity_I64);
5099 IRTemp tEA = newTemp(Ity_I64);
5100 IRTemp tTA = IRTemp_INVALID;
5101 ULong simm9 = sx_to_64(imm9, 9);
5102 Bool is64 = INSN(22,22) == 0;
5103 assign(tRN, getIReg64orSP(nn));
5104 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5105 tTA = atRN ? tRN : tEA;
5106 HChar ch = '?';
5107 /* There are 5 cases:
5108 byte load, SX to 64
5109 byte load, SX to 32, ZX to 64
5110 halfword load, SX to 64
5111 halfword load, SX to 32, ZX to 64
5112 word load, SX to 64
5113 The ifs below handle them in the listed order.
5114 */
5115 if (szLg2 == 0) {
5116 ch = 'b';
5117 if (is64) {
5118 putIReg64orZR(tt, unop(Iop_8Sto64,
5119 loadLE(Ity_I8, mkexpr(tTA))));
5120 } else {
5121 putIReg32orZR(tt, unop(Iop_8Sto32,
5122 loadLE(Ity_I8, mkexpr(tTA))));
5123 }
5124 }
5125 else if (szLg2 == 1) {
5126 ch = 'h';
5127 if (is64) {
5128 putIReg64orZR(tt, unop(Iop_16Sto64,
5129 loadLE(Ity_I16, mkexpr(tTA))));
5130 } else {
5131 putIReg32orZR(tt, unop(Iop_16Sto32,
5132 loadLE(Ity_I16, mkexpr(tTA))));
5133 }
5134 }
5135 else if (szLg2 == 2 && is64) {
5136 ch = 'w';
5137 putIReg64orZR(tt, unop(Iop_32Sto64,
5138 loadLE(Ity_I32, mkexpr(tTA))));
5139 }
5140 else {
5141 vassert(0);
5142 }
5143 putIReg64orSP(nn, mkexpr(tEA));
5144 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
5145 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5146 return True;
5147 }
5148 /* else fall through */
5149 }
5150
5151 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5152 /* 31 29 23 21 20 11 9 4
5153 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5154 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5155 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5156 where
5157 Rt is Wt when x==1, Xt when x==0
5158 */
5159 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5160 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5161 /* Further checks on bits 31:30 and 22 */
5162 Bool valid = False;
5163 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5164 case BITS3(1,0,0): // LDURSW Xt
5165 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5166 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5167 valid = True;
5168 break;
5169 }
5170 if (valid) {
5171 UInt szLg2 = INSN(31,30);
5172 UInt imm9 = INSN(20,12);
5173 UInt nn = INSN(9,5);
5174 UInt tt = INSN(4,0);
5175 IRTemp tRN = newTemp(Ity_I64);
5176 IRTemp tEA = newTemp(Ity_I64);
5177 ULong simm9 = sx_to_64(imm9, 9);
5178 Bool is64 = INSN(22,22) == 0;
5179 assign(tRN, getIReg64orSP(nn));
5180 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5181 HChar ch = '?';
5182 /* There are 5 cases:
5183 byte load, SX to 64
5184 byte load, SX to 32, ZX to 64
5185 halfword load, SX to 64
5186 halfword load, SX to 32, ZX to 64
5187 word load, SX to 64
5188 The ifs below handle them in the listed order.
5189 */
5190 if (szLg2 == 0) {
5191 ch = 'b';
5192 if (is64) {
5193 putIReg64orZR(tt, unop(Iop_8Sto64,
5194 loadLE(Ity_I8, mkexpr(tEA))));
5195 } else {
5196 putIReg32orZR(tt, unop(Iop_8Sto32,
5197 loadLE(Ity_I8, mkexpr(tEA))));
5198 }
5199 }
5200 else if (szLg2 == 1) {
5201 ch = 'h';
5202 if (is64) {
5203 putIReg64orZR(tt, unop(Iop_16Sto64,
5204 loadLE(Ity_I16, mkexpr(tEA))));
5205 } else {
5206 putIReg32orZR(tt, unop(Iop_16Sto32,
5207 loadLE(Ity_I16, mkexpr(tEA))));
5208 }
5209 }
5210 else if (szLg2 == 2 && is64) {
5211 ch = 'w';
5212 putIReg64orZR(tt, unop(Iop_32Sto64,
5213 loadLE(Ity_I32, mkexpr(tEA))));
5214 }
5215 else {
5216 vassert(0);
5217 }
5218 DIP("ldurs%c %s, [%s, #%lld]",
5219 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5220 return True;
5221 }
5222 /* else fall through */
5223 }
5224
5225 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5226 /* L==1 => mm==LD
5227 L==0 => mm==ST
5228 sz==00 => 32 bit (S) transfers
5229 sz==01 => 64 bit (D) transfers
5230 sz==10 => 128 bit (Q) transfers
5231 sz==11 isn't allowed
5232 simm7 is scaled by the (single-register) transfer size
5233
5234 31 29 26 22 21 14 9 4
5235
5236 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5237 (at-EA, with nontemporal hint)
5238
5239 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5240 (at-Rn-then-Rn=EA)
5241
5242 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
5243 (at-EA)
5244
5245 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5246 (at-EA-then-Rn=EA)
5247 */
5248 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
5249 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5250 Bool isLD = INSN(22,22) == 1;
5251 Bool wBack = INSN(23,23) == 1;
5252 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5253 UInt tt2 = INSN(14,10);
5254 UInt nn = INSN(9,5);
5255 UInt tt1 = INSN(4,0);
5256 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5257 /* undecodable; fall through */
5258 } else {
5259 if (nn == 31) { /* FIXME generate stack alignment check */ }
5260
5261 // Compute the transfer address TA and the writeback address WA.
5262 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5263 IRTemp tRN = newTemp(Ity_I64);
5264 assign(tRN, getIReg64orSP(nn));
5265 IRTemp tEA = newTemp(Ity_I64);
5266 simm7 = szB * simm7;
5267 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5268
5269 IRTemp tTA = newTemp(Ity_I64);
5270 IRTemp tWA = newTemp(Ity_I64);
5271 switch (INSN(24,23)) {
5272 case BITS2(0,1):
5273 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5274 case BITS2(1,1):
5275 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5276 case BITS2(1,0):
5277 case BITS2(0,0):
5278 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5279 default:
5280 vassert(0); /* NOTREACHED */
5281 }
5282
5283 IRType ty = Ity_INVALID;
5284 switch (szB) {
5285 case 4: ty = Ity_F32; break;
5286 case 8: ty = Ity_F64; break;
5287 case 16: ty = Ity_V128; break;
5288 default: vassert(0);
5289 }
5290
5291 /* Normally rN would be updated after the transfer. However, in
5292 the special cases typifed by
5293 stp q0, q1, [sp,#-512]!
5294 stp d0, d1, [sp,#-512]!
5295 stp s0, s1, [sp,#-512]!
5296 it is necessary to update SP before the transfer, (1)
5297 because Memcheck will otherwise complain about a write
5298 below the stack pointer, and (2) because the segfault
5299 stack extension mechanism will otherwise extend the stack
5300 only down to SP before the instruction, which might not be
5301 far enough, if the -512 bit takes the actual access
5302 address to the next page.
5303 */
5304 Bool earlyWBack
5305 = wBack && simm7 < 0
5306 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5307
5308 if (wBack && earlyWBack)
5309 putIReg64orSP(nn, mkexpr(tEA));
5310
5311 if (isLD) {
5312 if (szB < 16) {
5313 putQReg128(tt1, mkV128(0x0000));
5314 }
5315 putQRegLO(tt1,
5316 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
5317 if (szB < 16) {
5318 putQReg128(tt2, mkV128(0x0000));
5319 }
5320 putQRegLO(tt2,
5321 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
5322 } else {
5323 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
5324 getQRegLO(tt1, ty));
5325 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
5326 getQRegLO(tt2, ty));
5327 }
5328
5329 if (wBack && !earlyWBack)
5330 putIReg64orSP(nn, mkexpr(tEA));
5331
5332 const HChar* fmt_str = NULL;
5333 switch (INSN(24,23)) {
5334 case BITS2(0,1):
5335 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5336 break;
5337 case BITS2(1,1):
5338 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5339 break;
5340 case BITS2(1,0):
5341 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5342 break;
5343 case BITS2(0,0):
5344 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5345 break;
5346 default:
5347 vassert(0);
5348 }
5349 DIP(fmt_str, isLD ? "ld" : "st",
5350 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
5351 nameIReg64orSP(nn), simm7);
5352 return True;
5353 }
5354 }
5355
5356 /* -------------- {LD,ST}R (vector register) --------------- */
5357 /* 31 29 23 20 15 12 11 9 4
5358 | | | | | | | | |
5359 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5360 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5361 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5362 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5363 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5364
5365 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5366 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5367 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5368 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5369 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5370 */
5371 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5372 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5373 HChar dis_buf[64];
5374 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5375 Bool isLD = INSN(22,22) == 1;
5376 UInt tt = INSN(4,0);
5377 if (szLg2 > 4) goto after_LDR_STR_vector_register;
5378 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5379 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5380 switch (szLg2) {
5381 case 0: /* 8 bit */
5382 if (isLD) {
5383 putQReg128(tt, mkV128(0x0000));
5384 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5386 } else {
5387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
5389 }
5390 break;
5391 case 1:
5392 if (isLD) {
5393 putQReg128(tt, mkV128(0x0000));
5394 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5395 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5396 } else {
5397 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5398 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
5399 }
5400 break;
5401 case 2: /* 32 bit */
5402 if (isLD) {
5403 putQReg128(tt, mkV128(0x0000));
5404 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5405 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5406 } else {
5407 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5408 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
5409 }
5410 break;
5411 case 3: /* 64 bit */
5412 if (isLD) {
5413 putQReg128(tt, mkV128(0x0000));
5414 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5415 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5416 } else {
5417 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5418 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
5419 }
5420 break;
5421 case 4:
5422 if (isLD) {
5423 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5424 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5425 } else {
5426 storeLE(mkexpr(ea), getQReg128(tt));
5427 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5428 }
5429 break;
5430 default:
5431 vassert(0);
5432 }
5433 return True;
5434 }
5435 after_LDR_STR_vector_register:
5436
5437 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5438 /* 31 29 22 20 15 12 11 9 4
5439 | | | | | | | | |
5440 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5441
5442 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5443 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5444
5445 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5446 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5447 */
5448 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5449 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5450 HChar dis_buf[64];
5451 UInt szLg2 = INSN(31,30);
5452 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5453 UInt tt = INSN(4,0);
5454 if (szLg2 == 3) goto after_LDRS_integer_register;
5455 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5456 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5457 /* Enumerate the 5 variants explicitly. */
5458 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5459 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5460 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5461 return True;
5462 }
5463 else
5464 if (szLg2 == 1/*16 bit*/) {
5465 if (sxTo64) {
5466 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5467 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5468 } else {
5469 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5470 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5471 }
5472 return True;
5473 }
5474 else
5475 if (szLg2 == 0/*8 bit*/) {
5476 if (sxTo64) {
5477 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5478 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5479 } else {
5480 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5481 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5482 }
5483 return True;
5484 }
5485 /* else it's an invalid combination */
5486 }
5487 after_LDRS_integer_register:
5488
5489 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5490 /* This is the Unsigned offset variant only. The Post-Index and
5491 Pre-Index variants are below.
5492
5493 31 29 23 21 9 4
5494 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5495 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5496 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5497 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5498 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5499
5500 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5501 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5502 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5503 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5504 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5505 */
5506 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5507 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5508 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5509 Bool isLD = INSN(22,22) == 1;
5510 UInt pimm12 = INSN(21,10) << szLg2;
5511 UInt nn = INSN(9,5);
5512 UInt tt = INSN(4,0);
5513 IRTemp tEA = newTemp(Ity_I64);
5514 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5515 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5516 if (isLD) {
5517 if (szLg2 < 4) {
5518 putQReg128(tt, mkV128(0x0000));
5519 }
5520 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5521 } else {
5522 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5523 }
5524 DIP("%s %s, [%s, #%u]\n",
5525 isLD ? "ldr" : "str",
5526 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
5527 return True;
5528 }
5529
5530 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5531 /* These are the Post-Index and Pre-Index variants.
5532
5533 31 29 23 20 11 9 4
5534 (at-Rn-then-Rn=EA)
5535 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5536 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5537 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5538 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5539 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5540
5541 (at-EA-then-Rn=EA)
5542 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5543 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5544 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5545 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5546 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5547
5548 Stores are the same except with bit 22 set to 0.
5549 */
5550 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5551 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5552 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5553 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5554 Bool isLD = INSN(22,22) == 1;
5555 UInt imm9 = INSN(20,12);
5556 Bool atRN = INSN(11,11) == 0;
5557 UInt nn = INSN(9,5);
5558 UInt tt = INSN(4,0);
5559 IRTemp tRN = newTemp(Ity_I64);
5560 IRTemp tEA = newTemp(Ity_I64);
5561 IRTemp tTA = IRTemp_INVALID;
5562 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5563 ULong simm9 = sx_to_64(imm9, 9);
5564 assign(tRN, getIReg64orSP(nn));
5565 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5566 tTA = atRN ? tRN : tEA;
5567 if (isLD) {
5568 if (szLg2 < 4) {
5569 putQReg128(tt, mkV128(0x0000));
5570 }
5571 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
5572 } else {
5573 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
5574 }
5575 putIReg64orSP(nn, mkexpr(tEA));
5576 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5577 isLD ? "ldr" : "str",
5578 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
5579 return True;
5580 }
5581
5582 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5583 /* 31 29 23 20 11 9 4
5584 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5585 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5586 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5587 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5588 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5589
5590 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5591 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5592 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5593 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5594 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5595 */
5596 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5597 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5598 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5599 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5600 Bool isLD = INSN(22,22) == 1;
5601 UInt imm9 = INSN(20,12);
5602 UInt nn = INSN(9,5);
5603 UInt tt = INSN(4,0);
5604 ULong simm9 = sx_to_64(imm9, 9);
5605 IRTemp tEA = newTemp(Ity_I64);
5606 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5607 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5608 if (isLD) {
5609 if (szLg2 < 4) {
5610 putQReg128(tt, mkV128(0x0000));
5611 }
5612 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
5613 } else {
5614 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
5615 }
5616 DIP("%s %s, [%s, #%lld]\n",
5617 isLD ? "ldur" : "stur",
5618 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
5619 return True;
5620 }
5621
5622 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5623 /* 31 29 23 4
5624 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5625 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5626 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5627 */
5628 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5629 UInt szB = 4 << INSN(31,30);
5630 UInt imm19 = INSN(23,5);
5631 UInt tt = INSN(4,0);
5632 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5633 IRType ty = preferredVectorSubTypeFromSize(szB);
5634 putQReg128(tt, mkV128(0x0000));
5635 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5636 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
5637 return True;
5638 }
5639
5640 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5641 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5642 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5643 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5644 /* 31 29 26 22 21 20 15 11 9 4
5645
5646 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5647 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
5648
5649 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5650 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
5651
5652 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5653 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
5654
5655 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5656 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
5657
5658 T = defined by Q and sz in the normal way
5659 step = if m == 11111 then transfer-size else Xm
5660 xx = case L of 1 -> LD ; 0 -> ST
5661 */
5662 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5663 && INSN(21,21) == 0) {
5664 Bool bitQ = INSN(30,30);
5665 Bool isPX = INSN(23,23) == 1;
5666 Bool isLD = INSN(22,22) == 1;
5667 UInt mm = INSN(20,16);
5668 UInt opc = INSN(15,12);
5669 UInt sz = INSN(11,10);
5670 UInt nn = INSN(9,5);
5671 UInt tt = INSN(4,0);
5672 Bool isQ = bitQ == 1;
5673 Bool is1d = sz == BITS2(1,1) && !isQ;
5674 UInt nRegs = 0;
5675 switch (opc) {
5676 case BITS4(0,0,0,0): nRegs = 4; break;
5677 case BITS4(0,1,0,0): nRegs = 3; break;
5678 case BITS4(1,0,0,0): nRegs = 2; break;
5679 case BITS4(0,1,1,1): nRegs = 1; break;
5680 default: break;
5681 }
5682
5683 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5684 If we see it, set nRegs to 0 so as to cause the next conditional
5685 to fail. */
5686 if (!isPX && mm != 0)
5687 nRegs = 0;
5688
5689 if (nRegs == 1 /* .1d is allowed */
5690 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5691
5692 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5693
5694 /* Generate the transfer address (TA) and if necessary the
5695 writeback address (WB) */
5696 IRTemp tTA = newTemp(Ity_I64);
5697 assign(tTA, getIReg64orSP(nn));
5698 if (nn == 31) { /* FIXME generate stack alignment check */ }
5699 IRTemp tWB = IRTemp_INVALID;
5700 if (isPX) {
5701 tWB = newTemp(Ity_I64);
5702 assign(tWB, binop(Iop_Add64,
5703 mkexpr(tTA),
5704 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5705 : getIReg64orZR(mm)));
5706 }
5707
5708 /* -- BEGIN generate the transfers -- */
5709
5710 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5711 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5712 switch (nRegs) {
5713 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5714 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5715 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5716 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5717 default: vassert(0);
5718 }
5719
5720 /* -- Multiple 128 or 64 bit stores -- */
5721 if (!isLD) {
5722 switch (nRegs) {
5723 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5724 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5725 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5726 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5727 default: vassert(0);
5728 }
5729 switch (nRegs) {
5730 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5731 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5732 break;
5733 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5734 (&i0, &i1, &i2, sz, u0, u1, u2);
5735 break;
5736 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5737 (&i0, &i1, sz, u0, u1);
5738 break;
5739 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5740 (&i0, sz, u0);
5741 break;
5742 default: vassert(0);
5743 }
5744 # define MAYBE_NARROW_TO_64(_expr) \
5745 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5746 UInt step = isQ ? 16 : 8;
5747 switch (nRegs) {
5748 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5749 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5750 /* fallthru */
5751 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5752 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5753 /* fallthru */
5754 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5755 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5756 /* fallthru */
5757 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5758 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5759 break;
5760 default: vassert(0);
5761 }
5762 # undef MAYBE_NARROW_TO_64
5763 }
5764
5765 /* -- Multiple 128 or 64 bit loads -- */
5766 else /* isLD */ {
5767 UInt step = isQ ? 16 : 8;
5768 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5769 # define MAYBE_WIDEN_FROM_64(_expr) \
5770 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5771 switch (nRegs) {
5772 case 4:
5773 assign(i3, MAYBE_WIDEN_FROM_64(
5774 loadLE(loadTy,
5775 binop(Iop_Add64, mkexpr(tTA),
5776 mkU64(3 * step)))));
5777 /* fallthru */
5778 case 3:
5779 assign(i2, MAYBE_WIDEN_FROM_64(
5780 loadLE(loadTy,
5781 binop(Iop_Add64, mkexpr(tTA),
5782 mkU64(2 * step)))));
5783 /* fallthru */
5784 case 2:
5785 assign(i1, MAYBE_WIDEN_FROM_64(
5786 loadLE(loadTy,
5787 binop(Iop_Add64, mkexpr(tTA),
5788 mkU64(1 * step)))));
5789 /* fallthru */
5790 case 1:
5791 assign(i0, MAYBE_WIDEN_FROM_64(
5792 loadLE(loadTy,
5793 binop(Iop_Add64, mkexpr(tTA),
5794 mkU64(0 * step)))));
5795 break;
5796 default:
5797 vassert(0);
5798 }
5799 # undef MAYBE_WIDEN_FROM_64
5800 switch (nRegs) {
5801 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5802 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5803 break;
5804 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5805 (&u0, &u1, &u2, sz, i0, i1, i2);
5806 break;
5807 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5808 (&u0, &u1, sz, i0, i1);
5809 break;
5810 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5811 (&u0, sz, i0);
5812 break;
5813 default: vassert(0);
5814 }
5815 switch (nRegs) {
5816 case 4: putQReg128( (tt+3) % 32,
5817 math_MAYBE_ZERO_HI64(bitQ, u3));
5818 /* fallthru */
5819 case 3: putQReg128( (tt+2) % 32,
5820 math_MAYBE_ZERO_HI64(bitQ, u2));
5821 /* fallthru */
5822 case 2: putQReg128( (tt+1) % 32,
5823 math_MAYBE_ZERO_HI64(bitQ, u1));
5824 /* fallthru */
5825 case 1: putQReg128( (tt+0) % 32,
5826 math_MAYBE_ZERO_HI64(bitQ, u0));
5827 break;
5828 default: vassert(0);
5829 }
5830 }
5831
5832 /* -- END generate the transfers -- */
5833
5834 /* Do the writeback, if necessary */
5835 if (isPX) {
5836 putIReg64orSP(nn, mkexpr(tWB));
5837 }
5838
5839 HChar pxStr[20];
5840 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5841 if (isPX) {
5842 if (mm == BITS5(1,1,1,1,1))
5843 vex_sprintf(pxStr, ", #%u", xferSzB);
5844 else
5845 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5846 }
5847 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5848 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5849 isLD ? "ld" : "st", nRegs,
5850 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5851 pxStr);
5852
5853 return True;
5854 }
5855 /* else fall through */
5856 }
5857
5858 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
5859 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
5860 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
5861 /* 31 29 26 22 21 20 15 11 9 4
5862
5863 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
5864 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
5865
5866 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
5867 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
5868
5869 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
5870 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
5871
5872 T = defined by Q and sz in the normal way
5873 step = if m == 11111 then transfer-size else Xm
5874 xx = case L of 1 -> LD ; 0 -> ST
5875 */
5876 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5877 && INSN(21,21) == 0) {
5878 Bool bitQ = INSN(30,30);
5879 Bool isPX = INSN(23,23) == 1;
5880 Bool isLD = INSN(22,22) == 1;
5881 UInt mm = INSN(20,16);
5882 UInt opc = INSN(15,12);
5883 UInt sz = INSN(11,10);
5884 UInt nn = INSN(9,5);
5885 UInt tt = INSN(4,0);
5886 Bool isQ = bitQ == 1;
5887 UInt nRegs = 0;
5888 switch (opc) {
5889 case BITS4(0,0,1,0): nRegs = 4; break;
5890 case BITS4(0,1,1,0): nRegs = 3; break;
5891 case BITS4(1,0,1,0): nRegs = 2; break;
5892 default: break;
5893 }
5894
5895 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5896 If we see it, set nRegs to 0 so as to cause the next conditional
5897 to fail. */
5898 if (!isPX && mm != 0)
5899 nRegs = 0;
5900
5901 if (nRegs >= 2 && nRegs <= 4) {
5902
5903 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5904
5905 /* Generate the transfer address (TA) and if necessary the
5906 writeback address (WB) */
5907 IRTemp tTA = newTemp(Ity_I64);
5908 assign(tTA, getIReg64orSP(nn));
5909 if (nn == 31) { /* FIXME generate stack alignment check */ }
5910 IRTemp tWB = IRTemp_INVALID;
5911 if (isPX) {
5912 tWB = newTemp(Ity_I64);
5913 assign(tWB, binop(Iop_Add64,
5914 mkexpr(tTA),
5915 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5916 : getIReg64orZR(mm)));
5917 }
5918
5919 /* -- BEGIN generate the transfers -- */
5920
5921 IRTemp u0, u1, u2, u3;
5922 u0 = u1 = u2 = u3 = IRTemp_INVALID;
5923 switch (nRegs) {
5924 case 4: u3 = newTempV128(); /* fallthru */
5925 case 3: u2 = newTempV128(); /* fallthru */
5926 case 2: u1 = newTempV128();
5927 u0 = newTempV128(); break;
5928 default: vassert(0);
5929 }
5930
5931 /* -- Multiple 128 or 64 bit stores -- */
5932 if (!isLD) {
5933 switch (nRegs) {
5934 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5935 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5936 case 2: assign(u1, getQReg128((tt+1) % 32));
5937 assign(u0, getQReg128((tt+0) % 32)); break;
5938 default: vassert(0);
5939 }
5940 # define MAYBE_NARROW_TO_64(_expr) \
5941 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5942 UInt step = isQ ? 16 : 8;
5943 switch (nRegs) {
5944 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5945 MAYBE_NARROW_TO_64(mkexpr(u3)) );
5946 /* fallthru */
5947 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5948 MAYBE_NARROW_TO_64(mkexpr(u2)) );
5949 /* fallthru */
5950 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5951 MAYBE_NARROW_TO_64(mkexpr(u1)) );
5952 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5953 MAYBE_NARROW_TO_64(mkexpr(u0)) );
5954 break;
5955 default: vassert(0);
5956 }
5957 # undef MAYBE_NARROW_TO_64
5958 }
5959
5960 /* -- Multiple 128 or 64 bit loads -- */
5961 else /* isLD */ {
5962 UInt step = isQ ? 16 : 8;
5963 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5964 # define MAYBE_WIDEN_FROM_64(_expr) \
5965 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5966 switch (nRegs) {
5967 case 4:
5968 assign(u3, MAYBE_WIDEN_FROM_64(
5969 loadLE(loadTy,
5970 binop(Iop_Add64, mkexpr(tTA),
5971 mkU64(3 * step)))));
5972 /* fallthru */
5973 case 3:
5974 assign(u2, MAYBE_WIDEN_FROM_64(
5975 loadLE(loadTy,
5976 binop(Iop_Add64, mkexpr(tTA),
5977 mkU64(2 * step)))));
5978 /* fallthru */
5979 case 2:
5980 assign(u1, MAYBE_WIDEN_FROM_64(
5981 loadLE(loadTy,
5982 binop(Iop_Add64, mkexpr(tTA),
5983 mkU64(1 * step)))));
5984 assign(u0, MAYBE_WIDEN_FROM_64(
5985 loadLE(loadTy,
5986 binop(Iop_Add64, mkexpr(tTA),
5987 mkU64(0 * step)))));
5988 break;
5989 default:
5990 vassert(0);
5991 }
5992 # undef MAYBE_WIDEN_FROM_64
5993 switch (nRegs) {
5994 case 4: putQReg128( (tt+3) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ, u3));
5996 /* fallthru */
5997 case 3: putQReg128( (tt+2) % 32,
5998 math_MAYBE_ZERO_HI64(bitQ, u2));
5999 /* fallthru */
6000 case 2: putQReg128( (tt+1) % 32,
6001 math_MAYBE_ZERO_HI64(bitQ, u1));
6002 putQReg128( (tt+0) % 32,
6003 math_MAYBE_ZERO_HI64(bitQ, u0));
6004 break;
6005 default: vassert(0);
6006 }
6007 }
6008
6009 /* -- END generate the transfers -- */
6010
6011 /* Do the writeback, if necessary */
6012 if (isPX) {
6013 putIReg64orSP(nn, mkexpr(tWB));
6014 }
6015
6016 HChar pxStr[20];
6017 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6018 if (isPX) {
6019 if (mm == BITS5(1,1,1,1,1))
6020 vex_sprintf(pxStr, ", #%u", xferSzB);
6021 else
6022 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6023 }
6024 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6025 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6026 isLD ? "ld" : "st",
6027 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6028 pxStr);
6029
6030 return True;
6031 }
6032 /* else fall through */
6033 }
6034
6035 /* ---------- LD1R (single structure, replicate) ---------- */
6036 /* ---------- LD2R (single structure, replicate) ---------- */
6037 /* ---------- LD3R (single structure, replicate) ---------- */
6038 /* ---------- LD4R (single structure, replicate) ---------- */
6039 /* 31 29 22 20 15 11 9 4
6040 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6041 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6042
6043 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6044 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6045
6046 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6047 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6048
6049 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6050 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6051
6052 step = if m == 11111 then transfer-size else Xm
6053 */
6054 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
6055 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6056 && INSN(12,12) == 0) {
6057 UInt bitQ = INSN(30,30);
6058 Bool isPX = INSN(23,23) == 1;
6059 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6060 UInt mm = INSN(20,16);
6061 UInt sz = INSN(11,10);
6062 UInt nn = INSN(9,5);
6063 UInt tt = INSN(4,0);
6064
6065 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6066 if (isPX || mm == 0) {
6067
6068 IRType ty = integerIRTypeOfSize(1 << sz);
6069
6070 UInt laneSzB = 1 << sz;
6071 UInt xferSzB = laneSzB * nRegs;
6072
6073 /* Generate the transfer address (TA) and if necessary the
6074 writeback address (WB) */
6075 IRTemp tTA = newTemp(Ity_I64);
6076 assign(tTA, getIReg64orSP(nn));
6077 if (nn == 31) { /* FIXME generate stack alignment check */ }
6078 IRTemp tWB = IRTemp_INVALID;
6079 if (isPX) {
6080 tWB = newTemp(Ity_I64);
6081 assign(tWB, binop(Iop_Add64,
6082 mkexpr(tTA),
6083 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6084 : getIReg64orZR(mm)));
6085 }
6086
6087 /* Do the writeback, if necessary */
6088 if (isPX) {
6089 putIReg64orSP(nn, mkexpr(tWB));
6090 }
6091
6092 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6093 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6094 switch (nRegs) {
6095 case 4:
6096 e3 = newTemp(ty);
6097 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6098 mkU64(3 * laneSzB))));
6099 v3 = math_DUP_TO_V128(e3, ty);
6100 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6101 /* fallthrough */
6102 case 3:
6103 e2 = newTemp(ty);
6104 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6105 mkU64(2 * laneSzB))));
6106 v2 = math_DUP_TO_V128(e2, ty);
6107 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6108 /* fallthrough */
6109 case 2:
6110 e1 = newTemp(ty);
6111 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6112 mkU64(1 * laneSzB))));
6113 v1 = math_DUP_TO_V128(e1, ty);
6114 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6115 /* fallthrough */
6116 case 1:
6117 e0 = newTemp(ty);
6118 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6119 mkU64(0 * laneSzB))));
6120 v0 = math_DUP_TO_V128(e0, ty);
6121 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6122 break;
6123 default:
6124 vassert(0);
6125 }
6126
6127 HChar pxStr[20];
6128 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6129 if (isPX) {
6130 if (mm == BITS5(1,1,1,1,1))
6131 vex_sprintf(pxStr, ", #%u", xferSzB);
6132 else
6133 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6134 }
6135 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6136 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6137 nRegs,
6138 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6139 pxStr);
6140
6141 return True;
6142 }
6143 /* else fall through */
6144 }
6145
6146 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6147 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6148 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6149 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6150 /* 31 29 22 21 20 15 11 9 4
6151 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6152 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
6153
6154 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6155 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
6156
6157 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6158 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6159
6160 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6161 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6162
6163 step = if m == 11111 then transfer-size else Xm
6164 op = case L of 1 -> LD ; 0 -> ST
6165
6166 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6167 01:b:b:b0 -> 2, bbb
6168 10:b:b:00 -> 4, bb
6169 10:b:0:01 -> 8, b
6170 */
6171 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6172 UInt bitQ = INSN(30,30);
6173 Bool isPX = INSN(23,23) == 1;
6174 Bool isLD = INSN(22,22) == 1;
6175 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6176 UInt mm = INSN(20,16);
6177 UInt xx = INSN(15,14);
6178 UInt bitS = INSN(12,12);
6179 UInt sz = INSN(11,10);
6180 UInt nn = INSN(9,5);
6181 UInt tt = INSN(4,0);
6182
6183 Bool valid = True;
6184
6185 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6186 if (!isPX && mm != 0)
6187 valid = False;
6188
6189 UInt laneSzB = 0; /* invalid */
6190 UInt ix = 16; /* invalid */
6191
6192 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6193 switch (xx_q_S_sz) {
6194 case 0x00: case 0x01: case 0x02: case 0x03:
6195 case 0x04: case 0x05: case 0x06: case 0x07:
6196 case 0x08: case 0x09: case 0x0A: case 0x0B:
6197 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6198 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6199 break;
6200 case 0x10: case 0x12: case 0x14: case 0x16:
6201 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6202 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6203 break;
6204 case 0x20: case 0x24: case 0x28: case 0x2C:
6205 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6206 break;
6207 case 0x21: case 0x29:
6208 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6209 break;
6210 default:
6211 break;
6212 }
6213
6214 if (valid && laneSzB != 0) {
6215
6216 IRType ty = integerIRTypeOfSize(laneSzB);
6217 UInt xferSzB = laneSzB * nRegs;
6218
6219 /* Generate the transfer address (TA) and if necessary the
6220 writeback address (WB) */
6221 IRTemp tTA = newTemp(Ity_I64);
6222 assign(tTA, getIReg64orSP(nn));
6223 if (nn == 31) { /* FIXME generate stack alignment check */ }
6224 IRTemp tWB = IRTemp_INVALID;
6225 if (isPX) {
6226 tWB = newTemp(Ity_I64);
6227 assign(tWB, binop(Iop_Add64,
6228 mkexpr(tTA),
6229 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6230 : getIReg64orZR(mm)));
6231 }
6232
6233 /* Do the writeback, if necessary */
6234 if (isPX) {
6235 putIReg64orSP(nn, mkexpr(tWB));
6236 }
6237
6238 switch (nRegs) {
6239 case 4: {
6240 IRExpr* addr
6241 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6242 if (isLD) {
6243 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6244 } else {
6245 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6246 }
6247 /* fallthrough */
6248 }
6249 case 3: {
6250 IRExpr* addr
6251 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6252 if (isLD) {
6253 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6254 } else {
6255 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6256 }
6257 /* fallthrough */
6258 }
6259 case 2: {
6260 IRExpr* addr
6261 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6262 if (isLD) {
6263 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6264 } else {
6265 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6266 }
6267 /* fallthrough */
6268 }
6269 case 1: {
6270 IRExpr* addr
6271 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6272 if (isLD) {
6273 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6274 } else {
6275 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6276 }
6277 break;
6278 }
6279 default:
6280 vassert(0);
6281 }
6282
6283 HChar pxStr[20];
6284 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6285 if (isPX) {
6286 if (mm == BITS5(1,1,1,1,1))
6287 vex_sprintf(pxStr, ", #%u", xferSzB);
6288 else
6289 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6290 }
6291 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6292 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6293 isLD ? "ld" : "st", nRegs,
6294 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6295 ix, nameIReg64orSP(nn), pxStr);
6296
6297 return True;
6298 }
6299 /* else fall through */
6300 }
6301
6302 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6303 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6304 /* 31 29 23 20 14 9 4
6305 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6306 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6307 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6308 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
6309 */
6310 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6311 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6312 && INSN(14,10) == BITS5(1,1,1,1,1)) {
6313 UInt szBlg2 = INSN(31,30);
6314 Bool isLD = INSN(22,22) == 1;
6315 Bool isAcqOrRel = INSN(15,15) == 1;
6316 UInt ss = INSN(20,16);
6317 UInt nn = INSN(9,5);
6318 UInt tt = INSN(4,0);
6319
6320 vassert(szBlg2 < 4);
6321 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6322 IRType ty = integerIRTypeOfSize(szB);
6323 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6324
6325 IRTemp ea = newTemp(Ity_I64);
6326 assign(ea, getIReg64orSP(nn));
6327 /* FIXME generate check that ea is szB-aligned */
6328
6329 if (isLD && ss == BITS5(1,1,1,1,1)) {
6330 IRTemp res = newTemp(ty);
6331 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6332 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6333 if (isAcqOrRel) {
6334 stmt(IRStmt_MBE(Imbe_Fence));
6335 }
6336 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6337 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6338 return True;
6339 }
6340 if (!isLD) {
6341 if (isAcqOrRel) {
6342 stmt(IRStmt_MBE(Imbe_Fence));
6343 }
6344 IRTemp res = newTemp(Ity_I1);
6345 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6346 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6347 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6348 Need to set rS to 1 on failure, 0 on success. */
6349 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6350 mkU64(1)));
6351 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6352 nameIRegOrZR(False, ss),
6353 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6354 return True;
6355 }
6356 /* else fall through */
6357 }
6358
6359 /* ------------------ LDA{R,RH,RB} ------------------ */
6360 /* ------------------ STL{R,RH,RB} ------------------ */
6361 /* 31 29 23 20 14 9 4
6362 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6363 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6364 */
6365 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6366 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6367 UInt szBlg2 = INSN(31,30);
6368 Bool isLD = INSN(22,22) == 1;
6369 UInt nn = INSN(9,5);
6370 UInt tt = INSN(4,0);
6371
6372 vassert(szBlg2 < 4);
6373 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6374 IRType ty = integerIRTypeOfSize(szB);
6375 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6376
6377 IRTemp ea = newTemp(Ity_I64);
6378 assign(ea, getIReg64orSP(nn));
6379 /* FIXME generate check that ea is szB-aligned */
6380
6381 if (isLD) {
6382 IRTemp res = newTemp(ty);
6383 assign(res, loadLE(ty, mkexpr(ea)));
6384 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6385 stmt(IRStmt_MBE(Imbe_Fence));
6386 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6387 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6388 } else {
6389 stmt(IRStmt_MBE(Imbe_Fence));
6390 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6391 storeLE(mkexpr(ea), data);
6392 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6393 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6394 }
6395 return True;
6396 }
6397
6398 /* ------------------ PRFM (immediate) ------------------ */
6399 /* 31 21 9 4
6400 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6401 */
6402 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6403 UInt imm12 = INSN(21,10);
6404 UInt nn = INSN(9,5);
6405 UInt tt = INSN(4,0);
6406 /* Generating any IR here is pointless, except for documentation
6407 purposes, as it will get optimised away later. */
6408 IRTemp ea = newTemp(Ity_I64);
6409 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6410 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6411 return True;
6412 }
6413
6414 vex_printf("ARM64 front end: load_store\n");
6415 return False;
6416 # undef INSN
6417 }
6418
6419
6420 /*------------------------------------------------------------*/
6421 /*--- Control flow and misc instructions ---*/
6422 /*------------------------------------------------------------*/
6423
6424 static
dis_ARM64_branch_etc(DisResult * dres,UInt insn,const VexArchInfo * archinfo)6425 Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6426 const VexArchInfo* archinfo)
6427 {
6428 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6429
6430 /* ---------------------- B cond ----------------------- */
6431 /* 31 24 4 3
6432 0101010 0 imm19 0 cond */
6433 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6434 UInt cond = INSN(3,0);
6435 ULong uimm64 = INSN(23,5) << 2;
6436 Long simm64 = (Long)sx_to_64(uimm64, 21);
6437 vassert(dres->whatNext == Dis_Continue);
6438 vassert(dres->len == 4);
6439 vassert(dres->continueAt == 0);
6440 vassert(dres->jk_StopHere == Ijk_INVALID);
6441 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6442 Ijk_Boring,
6443 IRConst_U64(guest_PC_curr_instr + simm64),
6444 OFFB_PC) );
6445 putPC(mkU64(guest_PC_curr_instr + 4));
6446 dres->whatNext = Dis_StopHere;
6447 dres->jk_StopHere = Ijk_Boring;
6448 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6449 return True;
6450 }
6451
6452 /* -------------------- B{L} uncond -------------------- */
6453 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6454 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6455 100101 imm26 B (PC + sxTo64(imm26 << 2))
6456 */
6457 UInt bLink = INSN(31,31);
6458 ULong uimm64 = INSN(25,0) << 2;
6459 Long simm64 = (Long)sx_to_64(uimm64, 28);
6460 if (bLink) {
6461 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6462 }
6463 putPC(mkU64(guest_PC_curr_instr + simm64));
6464 dres->whatNext = Dis_StopHere;
6465 dres->jk_StopHere = Ijk_Call;
6466 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6467 guest_PC_curr_instr + simm64);
6468 return True;
6469 }
6470
6471 /* --------------------- B{L} reg --------------------- */
6472 /* 31 24 22 20 15 9 4
6473 1101011 00 10 11111 000000 nn 00000 RET Rn
6474 1101011 00 01 11111 000000 nn 00000 CALL Rn
6475 1101011 00 00 11111 000000 nn 00000 JMP Rn
6476 */
6477 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6478 && INSN(20,16) == BITS5(1,1,1,1,1)
6479 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6480 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6481 UInt branch_type = INSN(22,21);
6482 UInt nn = INSN(9,5);
6483 if (branch_type == BITS2(1,0) /* RET */) {
6484 putPC(getIReg64orZR(nn));
6485 dres->whatNext = Dis_StopHere;
6486 dres->jk_StopHere = Ijk_Ret;
6487 DIP("ret %s\n", nameIReg64orZR(nn));
6488 return True;
6489 }
6490 if (branch_type == BITS2(0,1) /* CALL */) {
6491 IRTemp dst = newTemp(Ity_I64);
6492 assign(dst, getIReg64orZR(nn));
6493 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6494 putPC(mkexpr(dst));
6495 dres->whatNext = Dis_StopHere;
6496 dres->jk_StopHere = Ijk_Call;
6497 DIP("blr %s\n", nameIReg64orZR(nn));
6498 return True;
6499 }
6500 if (branch_type == BITS2(0,0) /* JMP */) {
6501 putPC(getIReg64orZR(nn));
6502 dres->whatNext = Dis_StopHere;
6503 dres->jk_StopHere = Ijk_Boring;
6504 DIP("jmp %s\n", nameIReg64orZR(nn));
6505 return True;
6506 }
6507 }
6508
6509 /* -------------------- CB{N}Z -------------------- */
6510 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6511 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6512 */
6513 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6514 Bool is64 = INSN(31,31) == 1;
6515 Bool bIfZ = INSN(24,24) == 0;
6516 ULong uimm64 = INSN(23,5) << 2;
6517 UInt rT = INSN(4,0);
6518 Long simm64 = (Long)sx_to_64(uimm64, 21);
6519 IRExpr* cond = NULL;
6520 if (is64) {
6521 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6522 getIReg64orZR(rT), mkU64(0));
6523 } else {
6524 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6525 getIReg32orZR(rT), mkU32(0));
6526 }
6527 stmt( IRStmt_Exit(cond,
6528 Ijk_Boring,
6529 IRConst_U64(guest_PC_curr_instr + simm64),
6530 OFFB_PC) );
6531 putPC(mkU64(guest_PC_curr_instr + 4));
6532 dres->whatNext = Dis_StopHere;
6533 dres->jk_StopHere = Ijk_Boring;
6534 DIP("cb%sz %s, 0x%llx\n",
6535 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6536 guest_PC_curr_instr + simm64);
6537 return True;
6538 }
6539
6540 /* -------------------- TB{N}Z -------------------- */
6541 /* 31 30 24 23 18 5 4
6542 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6543 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6544 */
6545 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6546 UInt b5 = INSN(31,31);
6547 Bool bIfZ = INSN(24,24) == 0;
6548 UInt b40 = INSN(23,19);
6549 UInt imm14 = INSN(18,5);
6550 UInt tt = INSN(4,0);
6551 UInt bitNo = (b5 << 5) | b40;
6552 ULong uimm64 = imm14 << 2;
6553 Long simm64 = sx_to_64(uimm64, 16);
6554 IRExpr* cond
6555 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6556 binop(Iop_And64,
6557 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6558 mkU64(1)),
6559 mkU64(0));
6560 stmt( IRStmt_Exit(cond,
6561 Ijk_Boring,
6562 IRConst_U64(guest_PC_curr_instr + simm64),
6563 OFFB_PC) );
6564 putPC(mkU64(guest_PC_curr_instr + 4));
6565 dres->whatNext = Dis_StopHere;
6566 dres->jk_StopHere = Ijk_Boring;
6567 DIP("tb%sz %s, #%u, 0x%llx\n",
6568 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6569 guest_PC_curr_instr + simm64);
6570 return True;
6571 }
6572
6573 /* -------------------- SVC -------------------- */
6574 /* 11010100 000 imm16 000 01
6575 Don't bother with anything except the imm16==0 case.
6576 */
6577 if (INSN(31,0) == 0xD4000001) {
6578 putPC(mkU64(guest_PC_curr_instr + 4));
6579 dres->whatNext = Dis_StopHere;
6580 dres->jk_StopHere = Ijk_Sys_syscall;
6581 DIP("svc #0\n");
6582 return True;
6583 }
6584
6585 /* ------------------ M{SR,RS} ------------------ */
6586 /* ---- Cases for TPIDR_EL0 ----
6587 0xD51BD0 010 Rt MSR tpidr_el0, rT
6588 0xD53BD0 010 Rt MRS rT, tpidr_el0
6589 */
6590 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6591 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6592 Bool toSys = INSN(21,21) == 0;
6593 UInt tt = INSN(4,0);
6594 if (toSys) {
6595 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6596 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6597 } else {
6598 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6599 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6600 }
6601 return True;
6602 }
6603 /* ---- Cases for FPCR ----
6604 0xD51B44 000 Rt MSR fpcr, rT
6605 0xD53B44 000 Rt MSR rT, fpcr
6606 */
6607 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6608 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6609 Bool toSys = INSN(21,21) == 0;
6610 UInt tt = INSN(4,0);
6611 if (toSys) {
6612 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6613 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6614 } else {
6615 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6616 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6617 }
6618 return True;
6619 }
6620 /* ---- Cases for FPSR ----
6621 0xD51B44 001 Rt MSR fpsr, rT
6622 0xD53B44 001 Rt MSR rT, fpsr
6623 The only part of this we model is FPSR.QC. All other bits
6624 are ignored when writing to it and RAZ when reading from it.
6625 */
6626 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6627 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6628 Bool toSys = INSN(21,21) == 0;
6629 UInt tt = INSN(4,0);
6630 if (toSys) {
6631 /* Just deal with FPSR.QC. Make up a V128 value which is
6632 zero if Xt[27] is zero and any other value if Xt[27] is
6633 nonzero. */
6634 IRTemp qc64 = newTemp(Ity_I64);
6635 assign(qc64, binop(Iop_And64,
6636 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6637 mkU64(1)));
6638 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6639 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
6640 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6641 } else {
6642 /* Generate a value which is all zeroes except for bit 27,
6643 which must be zero if QCFLAG is all zeroes and one otherwise. */
6644 IRTemp qcV128 = newTempV128();
6645 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6646 IRTemp qc64 = newTemp(Ity_I64);
6647 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6648 unop(Iop_V128to64, mkexpr(qcV128))));
6649 IRExpr* res = binop(Iop_Shl64,
6650 unop(Iop_1Uto64,
6651 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6652 mkU8(27));
6653 putIReg64orZR(tt, res);
6654 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6655 }
6656 return True;
6657 }
6658 /* ---- Cases for NZCV ----
6659 D51B42 000 Rt MSR nzcv, rT
6660 D53B42 000 Rt MRS rT, nzcv
6661 The only parts of NZCV that actually exist are bits 31:28, which
6662 are the N Z C and V bits themselves. Hence the flags thunk provides
6663 all the state we need.
6664 */
6665 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6666 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6667 Bool toSys = INSN(21,21) == 0;
6668 UInt tt = INSN(4,0);
6669 if (toSys) {
6670 IRTemp t = newTemp(Ity_I64);
6671 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6672 setFlags_COPY(t);
6673 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6674 } else {
6675 IRTemp res = newTemp(Ity_I64);
6676 assign(res, mk_arm64g_calculate_flags_nzcv());
6677 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6678 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6679 }
6680 return True;
6681 }
6682 /* ---- Cases for DCZID_EL0 ----
6683 Don't support arbitrary reads and writes to this register. Just
6684 return the value 16, which indicates that the DC ZVA instruction
6685 is not permitted, so we don't have to emulate it.
6686 D5 3B 00 111 Rt MRS rT, dczid_el0
6687 */
6688 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6689 UInt tt = INSN(4,0);
6690 putIReg64orZR(tt, mkU64(1<<4));
6691 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6692 return True;
6693 }
6694 /* ---- Cases for CTR_EL0 ----
6695 We just handle reads, and make up a value from the D and I line
6696 sizes in the VexArchInfo we are given, and patch in the following
6697 fields that the Foundation model gives ("natively"):
6698 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6699 D5 3B 00 001 Rt MRS rT, dczid_el0
6700 */
6701 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6702 UInt tt = INSN(4,0);
6703 /* Need to generate a value from dMinLine_lg2_szB and
6704 dMinLine_lg2_szB. The value in the register is in 32-bit
6705 units, so need to subtract 2 from the values in the
6706 VexArchInfo. We can assume that the values here are valid --
6707 disInstr_ARM64 checks them -- so there's no need to deal with
6708 out-of-range cases. */
6709 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6710 && archinfo->arm64_dMinLine_lg2_szB <= 17
6711 && archinfo->arm64_iMinLine_lg2_szB >= 2
6712 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6713 UInt val
6714 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6715 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6716 putIReg64orZR(tt, mkU64(val));
6717 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6718 return True;
6719 }
6720 /* ---- Cases for CNTVCT_EL0 ----
6721 This is a timestamp counter of some sort. Support reads of it only
6722 by passing through to the host.
6723 D5 3B E0 010 Rt MRS Xt, cntvct_el0
6724 */
6725 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6726 UInt tt = INSN(4,0);
6727 IRTemp val = newTemp(Ity_I64);
6728 IRExpr** args = mkIRExprVec_0();
6729 IRDirty* d = unsafeIRDirty_1_N (
6730 val,
6731 0/*regparms*/,
6732 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6733 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6734 args
6735 );
6736 /* execute the dirty call, dumping the result in val. */
6737 stmt( IRStmt_Dirty(d) );
6738 putIReg64orZR(tt, mkexpr(val));
6739 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6740 return True;
6741 }
6742
6743 /* ------------------ IC_IVAU ------------------ */
6744 /* D5 0B 75 001 Rt ic ivau, rT
6745 */
6746 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6747 /* We will always be provided with a valid iMinLine value. */
6748 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6749 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6750 /* Round the requested address, in rT, down to the start of the
6751 containing block. */
6752 UInt tt = INSN(4,0);
6753 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6754 IRTemp addr = newTemp(Ity_I64);
6755 assign( addr, binop( Iop_And64,
6756 getIReg64orZR(tt),
6757 mkU64(~(lineszB - 1))) );
6758 /* Set the invalidation range, request exit-and-invalidate, with
6759 continuation at the next instruction. */
6760 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6761 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
6762 /* be paranoid ... */
6763 stmt( IRStmt_MBE(Imbe_Fence) );
6764 putPC(mkU64( guest_PC_curr_instr + 4 ));
6765 dres->whatNext = Dis_StopHere;
6766 dres->jk_StopHere = Ijk_InvalICache;
6767 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6768 return True;
6769 }
6770
6771 /* ------------------ DC_CVAU ------------------ */
6772 /* D5 0B 7B 001 Rt dc cvau, rT
6773 */
6774 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6775 /* Exactly the same scheme as for IC IVAU, except we observe the
6776 dMinLine size, and request an Ijk_FlushDCache instead of
6777 Ijk_InvalICache. */
6778 /* We will always be provided with a valid dMinLine value. */
6779 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6780 && archinfo->arm64_dMinLine_lg2_szB <= 17);
6781 /* Round the requested address, in rT, down to the start of the
6782 containing block. */
6783 UInt tt = INSN(4,0);
6784 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6785 IRTemp addr = newTemp(Ity_I64);
6786 assign( addr, binop( Iop_And64,
6787 getIReg64orZR(tt),
6788 mkU64(~(lineszB - 1))) );
6789 /* Set the flush range, request exit-and-flush, with
6790 continuation at the next instruction. */
6791 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6792 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
6793 /* be paranoid ... */
6794 stmt( IRStmt_MBE(Imbe_Fence) );
6795 putPC(mkU64( guest_PC_curr_instr + 4 ));
6796 dres->whatNext = Dis_StopHere;
6797 dres->jk_StopHere = Ijk_FlushDCache;
6798 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6799 return True;
6800 }
6801
6802 /* ------------------ ISB, DMB, DSB ------------------ */
6803 /* 31 21 11 7 6 4
6804 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
6805 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
6806 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
6807 */
6808 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6809 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6810 && INSN(7,7) == 1
6811 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6812 UInt opc = INSN(6,5);
6813 UInt CRm = INSN(11,8);
6814 vassert(opc <= 2 && CRm <= 15);
6815 stmt(IRStmt_MBE(Imbe_Fence));
6816 const HChar* opNames[3]
6817 = { "dsb", "dmb", "isb" };
6818 const HChar* howNames[16]
6819 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6820 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6821 DIP("%s %s\n", opNames[opc], howNames[CRm]);
6822 return True;
6823 }
6824
6825 /* -------------------- NOP -------------------- */
6826 if (INSN(31,0) == 0xD503201F) {
6827 DIP("nop\n");
6828 return True;
6829 }
6830
6831 /* -------------------- BRK -------------------- */
6832 /* 31 23 20 4
6833 1101 0100 001 imm16 00000 BRK #imm16
6834 */
6835 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6836 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6837 UInt imm16 = INSN(20,5);
6838 /* Request SIGTRAP and then restart of this insn. */
6839 putPC(mkU64(guest_PC_curr_instr + 0));
6840 dres->whatNext = Dis_StopHere;
6841 dres->jk_StopHere = Ijk_SigTRAP;
6842 DIP("brk #%u\n", imm16);
6843 return True;
6844 }
6845
6846 //fail:
6847 vex_printf("ARM64 front end: branch_etc\n");
6848 return False;
6849 # undef INSN
6850 }
6851
6852
6853 /*------------------------------------------------------------*/
6854 /*--- SIMD and FP instructions: helper functions ---*/
6855 /*------------------------------------------------------------*/
6856
6857 /* Some constructors for interleave/deinterleave expressions. */
6858
mk_CatEvenLanes64x2(IRTemp a10,IRTemp b10)6859 static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6860 // returns a0 b0
6861 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6862 }
6863
mk_CatOddLanes64x2(IRTemp a10,IRTemp b10)6864 static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6865 // returns a1 b1
6866 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6867 }
6868
mk_CatEvenLanes32x4(IRTemp a3210,IRTemp b3210)6869 static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6870 // returns a2 a0 b2 b0
6871 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6872 }
6873
mk_CatOddLanes32x4(IRTemp a3210,IRTemp b3210)6874 static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6875 // returns a3 a1 b3 b1
6876 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6877 }
6878
mk_InterleaveLO32x4(IRTemp a3210,IRTemp b3210)6879 static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6880 // returns a1 b1 a0 b0
6881 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6882 }
6883
mk_InterleaveHI32x4(IRTemp a3210,IRTemp b3210)6884 static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6885 // returns a3 b3 a2 b2
6886 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6887 }
6888
mk_CatEvenLanes16x8(IRTemp a76543210,IRTemp b76543210)6889 static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6890 // returns a6 a4 a2 a0 b6 b4 b2 b0
6891 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6892 }
6893
mk_CatOddLanes16x8(IRTemp a76543210,IRTemp b76543210)6894 static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6895 // returns a7 a5 a3 a1 b7 b5 b3 b1
6896 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6897 }
6898
mk_InterleaveLO16x8(IRTemp a76543210,IRTemp b76543210)6899 static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6900 // returns a3 b3 a2 b2 a1 b1 a0 b0
6901 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6902 }
6903
mk_InterleaveHI16x8(IRTemp a76543210,IRTemp b76543210)6904 static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6905 // returns a7 b7 a6 b6 a5 b5 a4 b4
6906 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6907 }
6908
mk_CatEvenLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6909 static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6910 IRTemp bFEDCBA9876543210 ) {
6911 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6912 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6913 mkexpr(bFEDCBA9876543210));
6914 }
6915
mk_CatOddLanes8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6916 static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6917 IRTemp bFEDCBA9876543210 ) {
6918 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6919 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6920 mkexpr(bFEDCBA9876543210));
6921 }
6922
mk_InterleaveLO8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6923 static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6924 IRTemp bFEDCBA9876543210 ) {
6925 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6926 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6927 mkexpr(bFEDCBA9876543210));
6928 }
6929
mk_InterleaveHI8x16(IRTemp aFEDCBA9876543210,IRTemp bFEDCBA9876543210)6930 static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6931 IRTemp bFEDCBA9876543210 ) {
6932 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6933 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6934 mkexpr(bFEDCBA9876543210));
6935 }
6936
6937 /* Generate N copies of |bit| in the bottom of a ULong. */
Replicate(ULong bit,Int N)6938 static ULong Replicate ( ULong bit, Int N )
6939 {
6940 vassert(bit <= 1 && N >= 1 && N < 64);
6941 if (bit == 0) {
6942 return 0;
6943 } else {
6944 /* Careful. This won't work for N == 64. */
6945 return (1ULL << N) - 1;
6946 }
6947 }
6948
Replicate32x2(ULong bits32)6949 static ULong Replicate32x2 ( ULong bits32 )
6950 {
6951 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6952 return (bits32 << 32) | bits32;
6953 }
6954
Replicate16x4(ULong bits16)6955 static ULong Replicate16x4 ( ULong bits16 )
6956 {
6957 vassert(0 == (bits16 & ~0xFFFFULL));
6958 return Replicate32x2((bits16 << 16) | bits16);
6959 }
6960
Replicate8x8(ULong bits8)6961 static ULong Replicate8x8 ( ULong bits8 )
6962 {
6963 vassert(0 == (bits8 & ~0xFFULL));
6964 return Replicate16x4((bits8 << 8) | bits8);
6965 }
6966
6967 /* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6968 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6969 is 64. In the former case, the upper 32 bits of the returned value
6970 are guaranteed to be zero. */
VFPExpandImm(ULong imm8,Int N)6971 static ULong VFPExpandImm ( ULong imm8, Int N )
6972 {
6973 vassert(imm8 <= 0xFF);
6974 vassert(N == 32 || N == 64);
6975 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
6976 Int F = N - E - 1;
6977 ULong imm8_6 = (imm8 >> 6) & 1;
6978 /* sign: 1 bit */
6979 /* exp: E bits */
6980 /* frac: F bits */
6981 ULong sign = (imm8 >> 7) & 1;
6982 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
6983 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
6984 vassert(sign < (1ULL << 1));
6985 vassert(exp < (1ULL << E));
6986 vassert(frac < (1ULL << F));
6987 vassert(1 + E + F == N);
6988 ULong res = (sign << (E+F)) | (exp << F) | frac;
6989 return res;
6990 }
6991
6992 /* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
6993 This might fail, as indicated by the returned Bool. Page 2530 of
6994 the manual. */
AdvSIMDExpandImm(ULong * res,UInt op,UInt cmode,UInt imm8)6995 static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
6996 UInt op, UInt cmode, UInt imm8 )
6997 {
6998 vassert(op <= 1);
6999 vassert(cmode <= 15);
7000 vassert(imm8 <= 255);
7001
7002 *res = 0; /* will overwrite iff returning True */
7003
7004 ULong imm64 = 0;
7005 Bool testimm8 = False;
7006
7007 switch (cmode >> 1) {
7008 case 0:
7009 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7010 case 1:
7011 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7012 case 2:
7013 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7014 case 3:
7015 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7016 case 4:
7017 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7018 case 5:
7019 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7020 case 6:
7021 testimm8 = True;
7022 if ((cmode & 1) == 0)
7023 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7024 else
7025 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7026 break;
7027 case 7:
7028 testimm8 = False;
7029 if ((cmode & 1) == 0 && op == 0)
7030 imm64 = Replicate8x8(imm8);
7031 if ((cmode & 1) == 0 && op == 1) {
7032 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7033 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7034 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7035 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7036 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7037 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7038 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7039 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7040 }
7041 if ((cmode & 1) == 1 && op == 0) {
7042 ULong imm8_7 = (imm8 >> 7) & 1;
7043 ULong imm8_6 = (imm8 >> 6) & 1;
7044 ULong imm8_50 = imm8 & 63;
7045 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7046 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7047 | (Replicate(imm8_6, 5) << (6 + 19))
7048 | (imm8_50 << 19);
7049 imm64 = Replicate32x2(imm32);
7050 }
7051 if ((cmode & 1) == 1 && op == 1) {
7052 // imm64 = imm8<7>:NOT(imm8<6>)
7053 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7054 ULong imm8_7 = (imm8 >> 7) & 1;
7055 ULong imm8_6 = (imm8 >> 6) & 1;
7056 ULong imm8_50 = imm8 & 63;
7057 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7058 | (Replicate(imm8_6, 8) << 54)
7059 | (imm8_50 << 48);
7060 }
7061 break;
7062 default:
7063 vassert(0);
7064 }
7065
7066 if (testimm8 && imm8 == 0)
7067 return False;
7068
7069 *res = imm64;
7070 return True;
7071 }
7072
7073 /* Help a bit for decoding laneage for vector operations that can be
7074 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7075 and SZ bits, typically for vector floating point. */
getLaneInfo_Q_SZ(IRType * tyI,IRType * tyF,UInt * nLanes,Bool * zeroUpper,const HChar ** arrSpec,Bool bitQ,Bool bitSZ)7076 static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7077 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7078 /*OUT*/const HChar** arrSpec,
7079 Bool bitQ, Bool bitSZ )
7080 {
7081 vassert(bitQ == True || bitQ == False);
7082 vassert(bitSZ == True || bitSZ == False);
7083 if (bitQ && bitSZ) { // 2x64
7084 if (tyI) *tyI = Ity_I64;
7085 if (tyF) *tyF = Ity_F64;
7086 if (nLanes) *nLanes = 2;
7087 if (zeroUpper) *zeroUpper = False;
7088 if (arrSpec) *arrSpec = "2d";
7089 return True;
7090 }
7091 if (bitQ && !bitSZ) { // 4x32
7092 if (tyI) *tyI = Ity_I32;
7093 if (tyF) *tyF = Ity_F32;
7094 if (nLanes) *nLanes = 4;
7095 if (zeroUpper) *zeroUpper = False;
7096 if (arrSpec) *arrSpec = "4s";
7097 return True;
7098 }
7099 if (!bitQ && !bitSZ) { // 2x32
7100 if (tyI) *tyI = Ity_I32;
7101 if (tyF) *tyF = Ity_F32;
7102 if (nLanes) *nLanes = 2;
7103 if (zeroUpper) *zeroUpper = True;
7104 if (arrSpec) *arrSpec = "2s";
7105 return True;
7106 }
7107 // Else impliedly 1x64, which isn't allowed.
7108 return False;
7109 }
7110
7111 /* Helper for decoding laneage for shift-style vector operations
7112 that involve an immediate shift amount. */
getLaneInfo_IMMH_IMMB(UInt * shift,UInt * szBlg2,UInt immh,UInt immb)7113 static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7114 UInt immh, UInt immb )
7115 {
7116 vassert(immh < (1<<4));
7117 vassert(immb < (1<<3));
7118 UInt immhb = (immh << 3) | immb;
7119 if (immh & 8) {
7120 if (shift) *shift = 128 - immhb;
7121 if (szBlg2) *szBlg2 = 3;
7122 return True;
7123 }
7124 if (immh & 4) {
7125 if (shift) *shift = 64 - immhb;
7126 if (szBlg2) *szBlg2 = 2;
7127 return True;
7128 }
7129 if (immh & 2) {
7130 if (shift) *shift = 32 - immhb;
7131 if (szBlg2) *szBlg2 = 1;
7132 return True;
7133 }
7134 if (immh & 1) {
7135 if (shift) *shift = 16 - immhb;
7136 if (szBlg2) *szBlg2 = 0;
7137 return True;
7138 }
7139 return False;
7140 }
7141
7142 /* Generate IR to fold all lanes of the V128 value in 'src' as
7143 characterised by the operator 'op', and return the result in the
7144 bottom bits of a V128, with all other bits set to zero. */
math_FOLDV(IRTemp src,IROp op)7145 static IRTemp math_FOLDV ( IRTemp src, IROp op )
7146 {
7147 /* The basic idea is to use repeated applications of Iop_CatEven*
7148 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7149 a complete vector. Then fold all those vectors with 'op' and
7150 zero out all but the least significant lane. */
7151 switch (op) {
7152 case Iop_Min8Sx16: case Iop_Min8Ux16:
7153 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
7154 /* NB: temp naming here is misleading -- the naming is for 8
7155 lanes of 16 bit, whereas what is being operated on is 16
7156 lanes of 8 bits. */
7157 IRTemp x76543210 = src;
7158 IRTemp x76547654 = newTempV128();
7159 IRTemp x32103210 = newTempV128();
7160 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7161 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7162 IRTemp x76767676 = newTempV128();
7163 IRTemp x54545454 = newTempV128();
7164 IRTemp x32323232 = newTempV128();
7165 IRTemp x10101010 = newTempV128();
7166 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7167 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7168 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7169 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7170 IRTemp x77777777 = newTempV128();
7171 IRTemp x66666666 = newTempV128();
7172 IRTemp x55555555 = newTempV128();
7173 IRTemp x44444444 = newTempV128();
7174 IRTemp x33333333 = newTempV128();
7175 IRTemp x22222222 = newTempV128();
7176 IRTemp x11111111 = newTempV128();
7177 IRTemp x00000000 = newTempV128();
7178 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7179 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7180 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7181 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7182 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7183 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7184 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7185 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7186 /* Naming not misleading after here. */
7187 IRTemp xAllF = newTempV128();
7188 IRTemp xAllE = newTempV128();
7189 IRTemp xAllD = newTempV128();
7190 IRTemp xAllC = newTempV128();
7191 IRTemp xAllB = newTempV128();
7192 IRTemp xAllA = newTempV128();
7193 IRTemp xAll9 = newTempV128();
7194 IRTemp xAll8 = newTempV128();
7195 IRTemp xAll7 = newTempV128();
7196 IRTemp xAll6 = newTempV128();
7197 IRTemp xAll5 = newTempV128();
7198 IRTemp xAll4 = newTempV128();
7199 IRTemp xAll3 = newTempV128();
7200 IRTemp xAll2 = newTempV128();
7201 IRTemp xAll1 = newTempV128();
7202 IRTemp xAll0 = newTempV128();
7203 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7204 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7205 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7206 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7207 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7208 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7209 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7210 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7211 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7212 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7213 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7214 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7215 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7216 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7217 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7218 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
7219 IRTemp maxFE = newTempV128();
7220 IRTemp maxDC = newTempV128();
7221 IRTemp maxBA = newTempV128();
7222 IRTemp max98 = newTempV128();
7223 IRTemp max76 = newTempV128();
7224 IRTemp max54 = newTempV128();
7225 IRTemp max32 = newTempV128();
7226 IRTemp max10 = newTempV128();
7227 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7228 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7229 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7230 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7231 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7232 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7233 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7234 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
7235 IRTemp maxFEDC = newTempV128();
7236 IRTemp maxBA98 = newTempV128();
7237 IRTemp max7654 = newTempV128();
7238 IRTemp max3210 = newTempV128();
7239 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7240 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7241 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7242 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7243 IRTemp maxFEDCBA98 = newTempV128();
7244 IRTemp max76543210 = newTempV128();
7245 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7246 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7247 IRTemp maxAllLanes = newTempV128();
7248 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7249 mkexpr(max76543210)));
7250 IRTemp res = newTempV128();
7251 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7252 return res;
7253 }
7254 case Iop_Min16Sx8: case Iop_Min16Ux8:
7255 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
7256 IRTemp x76543210 = src;
7257 IRTemp x76547654 = newTempV128();
7258 IRTemp x32103210 = newTempV128();
7259 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7260 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
7261 IRTemp x76767676 = newTempV128();
7262 IRTemp x54545454 = newTempV128();
7263 IRTemp x32323232 = newTempV128();
7264 IRTemp x10101010 = newTempV128();
7265 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7266 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7267 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7268 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
7269 IRTemp x77777777 = newTempV128();
7270 IRTemp x66666666 = newTempV128();
7271 IRTemp x55555555 = newTempV128();
7272 IRTemp x44444444 = newTempV128();
7273 IRTemp x33333333 = newTempV128();
7274 IRTemp x22222222 = newTempV128();
7275 IRTemp x11111111 = newTempV128();
7276 IRTemp x00000000 = newTempV128();
7277 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7278 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7279 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7280 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7281 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7282 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7283 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7284 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7285 IRTemp max76 = newTempV128();
7286 IRTemp max54 = newTempV128();
7287 IRTemp max32 = newTempV128();
7288 IRTemp max10 = newTempV128();
7289 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7290 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7291 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7292 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
7293 IRTemp max7654 = newTempV128();
7294 IRTemp max3210 = newTempV128();
7295 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7296 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7297 IRTemp max76543210 = newTempV128();
7298 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
7299 IRTemp res = newTempV128();
7300 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7301 return res;
7302 }
7303 case Iop_Max32Fx4: case Iop_Min32Fx4:
7304 case Iop_Min32Sx4: case Iop_Min32Ux4:
7305 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
7306 IRTemp x3210 = src;
7307 IRTemp x3232 = newTempV128();
7308 IRTemp x1010 = newTempV128();
7309 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7310 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
7311 IRTemp x3333 = newTempV128();
7312 IRTemp x2222 = newTempV128();
7313 IRTemp x1111 = newTempV128();
7314 IRTemp x0000 = newTempV128();
7315 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7316 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7317 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7318 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
7319 IRTemp max32 = newTempV128();
7320 IRTemp max10 = newTempV128();
7321 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7322 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
7323 IRTemp max3210 = newTempV128();
7324 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
7325 IRTemp res = newTempV128();
7326 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7327 return res;
7328 }
7329 case Iop_Add64x2: {
7330 IRTemp x10 = src;
7331 IRTemp x00 = newTempV128();
7332 IRTemp x11 = newTempV128();
7333 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7334 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
7335 IRTemp max10 = newTempV128();
7336 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
7337 IRTemp res = newTempV128();
7338 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7339 return res;
7340 }
7341 default:
7342 vassert(0);
7343 }
7344 }
7345
7346
7347 /* Generate IR for TBL and TBX. This deals with the 128 bit case
7348 only. */
math_TBL_TBX(IRTemp tab[4],UInt len,IRTemp src,IRTemp oor_values)7349 static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7350 IRTemp oor_values )
7351 {
7352 vassert(len >= 0 && len <= 3);
7353
7354 /* Generate some useful constants as concisely as possible. */
7355 IRTemp half15 = newTemp(Ity_I64);
7356 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7357 IRTemp half16 = newTemp(Ity_I64);
7358 assign(half16, mkU64(0x1010101010101010ULL));
7359
7360 /* A zero vector */
7361 IRTemp allZero = newTempV128();
7362 assign(allZero, mkV128(0x0000));
7363 /* A vector containing 15 in each 8-bit lane */
7364 IRTemp all15 = newTempV128();
7365 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7366 /* A vector containing 16 in each 8-bit lane */
7367 IRTemp all16 = newTempV128();
7368 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7369 /* A vector containing 32 in each 8-bit lane */
7370 IRTemp all32 = newTempV128();
7371 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7372 /* A vector containing 48 in each 8-bit lane */
7373 IRTemp all48 = newTempV128();
7374 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7375 /* A vector containing 64 in each 8-bit lane */
7376 IRTemp all64 = newTempV128();
7377 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7378
7379 /* Group the 16/32/48/64 vectors so as to be indexable. */
7380 IRTemp allXX[4] = { all16, all32, all48, all64 };
7381
7382 /* Compute the result for each table vector, with zeroes in places
7383 where the index values are out of range, and OR them into the
7384 running vector. */
7385 IRTemp running_result = newTempV128();
7386 assign(running_result, mkV128(0));
7387
7388 UInt tabent;
7389 for (tabent = 0; tabent <= len; tabent++) {
7390 vassert(tabent >= 0 && tabent < 4);
7391 IRTemp bias = newTempV128();
7392 assign(bias,
7393 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
7394 IRTemp biased_indices = newTempV128();
7395 assign(biased_indices,
7396 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
7397 IRTemp valid_mask = newTempV128();
7398 assign(valid_mask,
7399 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
7400 IRTemp safe_biased_indices = newTempV128();
7401 assign(safe_biased_indices,
7402 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
7403 IRTemp results_or_junk = newTempV128();
7404 assign(results_or_junk,
7405 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7406 mkexpr(safe_biased_indices)));
7407 IRTemp results_or_zero = newTempV128();
7408 assign(results_or_zero,
7409 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7410 /* And OR that into the running result. */
7411 IRTemp tmp = newTempV128();
7412 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7413 mkexpr(running_result)));
7414 running_result = tmp;
7415 }
7416
7417 /* So now running_result holds the overall result where the indices
7418 are in range, and zero in out-of-range lanes. Now we need to
7419 compute an overall validity mask and use this to copy in the
7420 lanes in the oor_values for out of range indices. This is
7421 unnecessary for TBL but will get folded out by iropt, so we lean
7422 on that and generate the same code for TBL and TBX here. */
7423 IRTemp overall_valid_mask = newTempV128();
7424 assign(overall_valid_mask,
7425 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
7426 IRTemp result = newTempV128();
7427 assign(result,
7428 binop(Iop_OrV128,
7429 mkexpr(running_result),
7430 binop(Iop_AndV128,
7431 mkexpr(oor_values),
7432 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7433 return result;
7434 }
7435
7436
7437 /* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7438 an op which takes two I64s and produces a V128. That is, a widening
7439 operator. Generate IR which applies |opI64x2toV128| to either the
7440 lower (if |is2| is False) or upper (if |is2| is True) halves of
7441 |argL| and |argR|, and return the value in a new IRTemp.
7442 */
7443 static
math_BINARY_WIDENING_V128(Bool is2,IROp opI64x2toV128,IRExpr * argL,IRExpr * argR)7444 IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7445 IRExpr* argL, IRExpr* argR )
7446 {
7447 IRTemp res = newTempV128();
7448 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7449 assign(res, binop(opI64x2toV128, unop(slice, argL),
7450 unop(slice, argR)));
7451 return res;
7452 }
7453
7454
7455 /* Generate signed/unsigned absolute difference vector IR. */
7456 static
math_ABD(Bool isU,UInt size,IRExpr * argLE,IRExpr * argRE)7457 IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7458 {
7459 vassert(size <= 3);
7460 IRTemp argL = newTempV128();
7461 IRTemp argR = newTempV128();
7462 IRTemp msk = newTempV128();
7463 IRTemp res = newTempV128();
7464 assign(argL, argLE);
7465 assign(argR, argRE);
7466 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
7467 mkexpr(argL), mkexpr(argR)));
7468 assign(res,
7469 binop(Iop_OrV128,
7470 binop(Iop_AndV128,
7471 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
7472 mkexpr(msk)),
7473 binop(Iop_AndV128,
7474 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
7475 unop(Iop_NotV128, mkexpr(msk)))));
7476 return res;
7477 }
7478
7479
7480 /* Generate IR that takes a V128 and sign- or zero-widens
7481 either the lower or upper set of lanes to twice-as-wide,
7482 resulting in a new V128 value. */
7483 static
math_WIDEN_LO_OR_HI_LANES(Bool zWiden,Bool fromUpperHalf,UInt sizeNarrow,IRExpr * srcE)7484 IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7485 UInt sizeNarrow, IRExpr* srcE )
7486 {
7487 IRTemp src = newTempV128();
7488 IRTemp res = newTempV128();
7489 assign(src, srcE);
7490 switch (sizeNarrow) {
7491 case X10:
7492 assign(res,
7493 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7494 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7495 : Iop_InterleaveLO32x4,
7496 mkexpr(src),
7497 mkexpr(src)),
7498 mkU8(32)));
7499 break;
7500 case X01:
7501 assign(res,
7502 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7503 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7504 : Iop_InterleaveLO16x8,
7505 mkexpr(src),
7506 mkexpr(src)),
7507 mkU8(16)));
7508 break;
7509 case X00:
7510 assign(res,
7511 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7512 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7513 : Iop_InterleaveLO8x16,
7514 mkexpr(src),
7515 mkexpr(src)),
7516 mkU8(8)));
7517 break;
7518 default:
7519 vassert(0);
7520 }
7521 return res;
7522 }
7523
7524
7525 /* Generate IR that takes a V128 and sign- or zero-widens
7526 either the even or odd lanes to twice-as-wide,
7527 resulting in a new V128 value. */
7528 static
math_WIDEN_EVEN_OR_ODD_LANES(Bool zWiden,Bool fromOdd,UInt sizeNarrow,IRExpr * srcE)7529 IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7530 UInt sizeNarrow, IRExpr* srcE )
7531 {
7532 IRTemp src = newTempV128();
7533 IRTemp res = newTempV128();
7534 IROp opSAR = mkVecSARN(sizeNarrow+1);
7535 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7536 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7537 IROp opSxR = zWiden ? opSHR : opSAR;
7538 UInt amt = 0;
7539 switch (sizeNarrow) {
7540 case X10: amt = 32; break;
7541 case X01: amt = 16; break;
7542 case X00: amt = 8; break;
7543 default: vassert(0);
7544 }
7545 assign(src, srcE);
7546 if (fromOdd) {
7547 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7548 } else {
7549 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7550 mkU8(amt)));
7551 }
7552 return res;
7553 }
7554
7555
7556 /* Generate IR that takes two V128s and narrows (takes lower half)
7557 of each lane, producing a single V128 value. */
7558 static
math_NARROW_LANES(IRTemp argHi,IRTemp argLo,UInt sizeNarrow)7559 IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7560 {
7561 IRTemp res = newTempV128();
7562 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7563 mkexpr(argHi), mkexpr(argLo)));
7564 return res;
7565 }
7566
7567
7568 /* Return a temp which holds the vector dup of the lane of width
7569 (1 << size) obtained from src[laneNo]. */
7570 static
math_DUP_VEC_ELEM(IRExpr * src,UInt size,UInt laneNo)7571 IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7572 {
7573 vassert(size <= 3);
7574 /* Normalise |laneNo| so it is of the form
7575 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7576 This puts the bits we want to inspect at constant offsets
7577 regardless of the value of |size|.
7578 */
7579 UInt ix = laneNo << size;
7580 vassert(ix <= 15);
7581 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7582 switch (size) {
7583 case 0: /* B */
7584 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7585 /* fallthrough */
7586 case 1: /* H */
7587 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7588 /* fallthrough */
7589 case 2: /* S */
7590 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7591 /* fallthrough */
7592 case 3: /* D */
7593 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7594 break;
7595 default:
7596 vassert(0);
7597 }
7598 IRTemp res = newTempV128();
7599 assign(res, src);
7600 Int i;
7601 for (i = 3; i >= 0; i--) {
7602 if (ops[i] == Iop_INVALID)
7603 break;
7604 IRTemp tmp = newTempV128();
7605 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7606 res = tmp;
7607 }
7608 return res;
7609 }
7610
7611
7612 /* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7613 selector encoded as shown below. Return a new V128 holding the
7614 selected lane from |srcV| dup'd out to V128, and also return the
7615 lane number, log2 of the lane size in bytes, and width-character via
7616 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7617 is an invalid selector, in which case return
7618 IRTemp_INVALID, 0, 0 and '?' respectively.
7619
7620 imm5 = xxxx1 signifies .b[xxxx]
7621 = xxx10 .h[xxx]
7622 = xx100 .s[xx]
7623 = x1000 .d[x]
7624 otherwise invalid
7625 */
7626 static
handle_DUP_VEC_ELEM(UInt * laneNo,UInt * laneSzLg2,HChar * laneCh,IRExpr * srcV,UInt imm5)7627 IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7628 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7629 IRExpr* srcV, UInt imm5 )
7630 {
7631 *laneNo = 0;
7632 *laneSzLg2 = 0;
7633 *laneCh = '?';
7634
7635 if (imm5 & 1) {
7636 *laneNo = (imm5 >> 1) & 15;
7637 *laneSzLg2 = 0;
7638 *laneCh = 'b';
7639 }
7640 else if (imm5 & 2) {
7641 *laneNo = (imm5 >> 2) & 7;
7642 *laneSzLg2 = 1;
7643 *laneCh = 'h';
7644 }
7645 else if (imm5 & 4) {
7646 *laneNo = (imm5 >> 3) & 3;
7647 *laneSzLg2 = 2;
7648 *laneCh = 's';
7649 }
7650 else if (imm5 & 8) {
7651 *laneNo = (imm5 >> 4) & 1;
7652 *laneSzLg2 = 3;
7653 *laneCh = 'd';
7654 }
7655 else {
7656 /* invalid */
7657 return IRTemp_INVALID;
7658 }
7659
7660 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7661 }
7662
7663
7664 /* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7665 static
math_VEC_DUP_IMM(UInt size,ULong imm)7666 IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7667 {
7668 IRType ty = Ity_INVALID;
7669 IRTemp rcS = IRTemp_INVALID;
7670 switch (size) {
7671 case X01:
7672 vassert(imm <= 0xFFFFULL);
7673 ty = Ity_I16;
7674 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7675 break;
7676 case X10:
7677 vassert(imm <= 0xFFFFFFFFULL);
7678 ty = Ity_I32;
7679 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7680 break;
7681 case X11:
7682 ty = Ity_I64;
7683 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7684 default:
7685 vassert(0);
7686 }
7687 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7688 return rcV;
7689 }
7690
7691
7692 /* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7693 and the upper can contain any value -- it is ignored. If |is2| is False,
7694 generate IR to put |new64| in the lower half of vector reg |dd| and zero
7695 the upper half. If |is2| is True, generate IR to put |new64| in the upper
7696 half of vector reg |dd| and leave the lower half unchanged. This
7697 simulates the behaviour of the "foo/foo2" instructions in which the
7698 destination is half the width of sources, for example addhn/addhn2.
7699 */
7700 static
putLO64andZUorPutHI64(Bool is2,UInt dd,IRTemp new64)7701 void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7702 {
7703 if (is2) {
7704 /* Get the old contents of Vdd, zero the upper half, and replace
7705 it with 'x'. */
7706 IRTemp t_zero_oldLO = newTempV128();
7707 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7708 IRTemp t_newHI_zero = newTempV128();
7709 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7710 mkV128(0x0000)));
7711 IRTemp res = newTempV128();
7712 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7713 mkexpr(t_newHI_zero)));
7714 putQReg128(dd, mkexpr(res));
7715 } else {
7716 /* This is simple. */
7717 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7718 }
7719 }
7720
7721
7722 /* Compute vector SQABS at lane size |size| for |srcE|, returning
7723 the q result in |*qabs| and the normal result in |*nabs|. */
7724 static
math_SQABS(IRTemp * qabs,IRTemp * nabs,IRExpr * srcE,UInt size)7725 void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7726 IRExpr* srcE, UInt size )
7727 {
7728 IRTemp src, mask, maskn, nsub, qsub;
7729 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7730 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7731 assign(src, srcE);
7732 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
7733 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7734 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7735 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7736 assign(*nabs, binop(Iop_OrV128,
7737 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7738 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7739 assign(*qabs, binop(Iop_OrV128,
7740 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7741 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7742 }
7743
7744
7745 /* Compute vector SQNEG at lane size |size| for |srcE|, returning
7746 the q result in |*qneg| and the normal result in |*nneg|. */
7747 static
math_SQNEG(IRTemp * qneg,IRTemp * nneg,IRExpr * srcE,UInt size)7748 void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7749 IRExpr* srcE, UInt size )
7750 {
7751 IRTemp src = IRTemp_INVALID;
7752 newTempsV128_3(&src, nneg, qneg);
7753 assign(src, srcE);
7754 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7755 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7756 }
7757
7758
7759 /* Zero all except the least significant lane of |srcE|, where |size|
7760 indicates the lane size in the usual way. */
math_ZERO_ALL_EXCEPT_LOWEST_LANE(UInt size,IRExpr * srcE)7761 static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
7762 {
7763 vassert(size < 4);
7764 IRTemp t = newTempV128();
7765 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
7766 return t;
7767 }
7768
7769
7770 /* Generate IR to compute vector widening MULL from either the lower
7771 (is2==False) or upper (is2==True) halves of vecN and vecM. The
7772 widening multiplies are unsigned when isU==True and signed when
7773 isU==False. |size| is the narrow lane size indication. Optionally,
7774 the product may be added to or subtracted from vecD, at the wide lane
7775 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
7776 is 'm' (only multiply) then the accumulate part does not happen, and
7777 |vecD| is expected to == IRTemp_INVALID.
7778
7779 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7780 are allowed. The result is returned in a new IRTemp, which is
7781 returned in *res. */
7782 static
math_MULL_ACC(IRTemp * res,Bool is2,Bool isU,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7783 void math_MULL_ACC ( /*OUT*/IRTemp* res,
7784 Bool is2, Bool isU, UInt size, HChar mas,
7785 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7786 {
7787 vassert(res && *res == IRTemp_INVALID);
7788 vassert(size <= 2);
7789 vassert(mas == 'm' || mas == 'a' || mas == 's');
7790 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7791 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7792 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
7793 : (mas == 's' ? mkVecSUB(size+1)
7794 : Iop_INVALID);
7795 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
7796 mkexpr(vecN), mkexpr(vecM));
7797 *res = newTempV128();
7798 assign(*res, mas == 'm' ? mkexpr(mul)
7799 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7800 }
7801
7802
7803 /* Same as math_MULL_ACC, except the multiply is signed widening,
7804 the multiplied value is then doubled, before being added to or
7805 subtracted from the accumulated value. And everything is
7806 saturated. In all cases, saturation residuals are returned
7807 via (sat1q, sat1n), and in the accumulate cases,
7808 via (sat2q, sat2n) too. All results are returned in new temporaries.
7809 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7810 so the caller can tell this has happened. */
7811 static
math_SQDMULL_ACC(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,IRTemp * sat2q,IRTemp * sat2n,Bool is2,UInt size,HChar mas,IRTemp vecN,IRTemp vecM,IRTemp vecD)7812 void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7813 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7814 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7815 Bool is2, UInt size, HChar mas,
7816 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7817 {
7818 vassert(size <= 2);
7819 vassert(mas == 'm' || mas == 'a' || mas == 's');
7820 /* Compute
7821 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7822 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
7823 IOW take either the low or high halves of vecN and vecM, signed widen,
7824 multiply, double that, and signedly saturate. Also compute the same
7825 but without saturation.
7826 */
7827 vassert(sat2q && *sat2q == IRTemp_INVALID);
7828 vassert(sat2n && *sat2n == IRTemp_INVALID);
7829 newTempsV128_3(sat1q, sat1n, res);
7830 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7831 mkexpr(vecN), mkexpr(vecM));
7832 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7833 mkexpr(vecN), mkexpr(vecM));
7834 assign(*sat1q, mkexpr(tq));
7835 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7836
7837 /* If there is no accumulation, the final result is sat1q,
7838 and there's no assignment to sat2q or sat2n. */
7839 if (mas == 'm') {
7840 assign(*res, mkexpr(*sat1q));
7841 return;
7842 }
7843
7844 /* Compute
7845 sat2q = vecD +sq/-sq sat1q
7846 sat2n = vecD +/- sat1n
7847 result = sat2q
7848 */
7849 newTempsV128_2(sat2q, sat2n);
7850 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7851 mkexpr(vecD), mkexpr(*sat1q)));
7852 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7853 mkexpr(vecD), mkexpr(*sat1n)));
7854 assign(*res, mkexpr(*sat2q));
7855 }
7856
7857
7858 /* Generate IR for widening signed vector multiplies. The operands
7859 have their lane width signedly widened, and they are then multiplied
7860 at the wider width, returning results in two new IRTemps. */
7861 static
math_MULLS(IRTemp * resHI,IRTemp * resLO,UInt sizeNarrow,IRTemp argL,IRTemp argR)7862 void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7863 UInt sizeNarrow, IRTemp argL, IRTemp argR )
7864 {
7865 vassert(sizeNarrow <= 2);
7866 newTempsV128_2(resHI, resLO);
7867 IRTemp argLhi = newTemp(Ity_I64);
7868 IRTemp argLlo = newTemp(Ity_I64);
7869 IRTemp argRhi = newTemp(Ity_I64);
7870 IRTemp argRlo = newTemp(Ity_I64);
7871 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7872 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
7873 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7874 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
7875 IROp opMulls = mkVecMULLS(sizeNarrow);
7876 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7877 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7878 }
7879
7880
7881 /* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7882 double that, possibly add a rounding constant (R variants), and take
7883 the high half. */
7884 static
math_SQDMULH(IRTemp * res,IRTemp * sat1q,IRTemp * sat1n,Bool isR,UInt size,IRTemp vN,IRTemp vM)7885 void math_SQDMULH ( /*OUT*/IRTemp* res,
7886 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7887 Bool isR, UInt size, IRTemp vN, IRTemp vM )
7888 {
7889 vassert(size == X01 || size == X10); /* s or h only */
7890
7891 newTempsV128_3(res, sat1q, sat1n);
7892
7893 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7894 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7895
7896 IRTemp addWide = mkVecADD(size+1);
7897
7898 if (isR) {
7899 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7900
7901 Int rcShift = size == X01 ? 15 : 31;
7902 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7903 assign(*sat1n,
7904 binop(mkVecCATODDLANES(size),
7905 binop(addWide,
7906 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7907 mkexpr(roundConst)),
7908 binop(addWide,
7909 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7910 mkexpr(roundConst))));
7911 } else {
7912 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7913
7914 assign(*sat1n,
7915 binop(mkVecCATODDLANES(size),
7916 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7917 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7918 }
7919
7920 assign(*res, mkexpr(*sat1q));
7921 }
7922
7923
7924 /* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
7925 a new temp in *res, and the Q difference pair in new temps in
7926 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
7927 three operations it is. */
7928 static
math_QSHL_IMM(IRTemp * res,IRTemp * qDiff1,IRTemp * qDiff2,IRTemp src,UInt size,UInt shift,const HChar * nm)7929 void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7930 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7931 IRTemp src, UInt size, UInt shift, const HChar* nm )
7932 {
7933 vassert(size <= 3);
7934 UInt laneBits = 8 << size;
7935 vassert(shift < laneBits);
7936 newTempsV128_3(res, qDiff1, qDiff2);
7937 IRTemp z128 = newTempV128();
7938 assign(z128, mkV128(0x0000));
7939
7940 /* UQSHL */
7941 if (vex_streq(nm, "uqshl")) {
7942 IROp qop = mkVecQSHLNSATUU(size);
7943 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7944 if (shift == 0) {
7945 /* No shift means no saturation. */
7946 assign(*qDiff1, mkexpr(z128));
7947 assign(*qDiff2, mkexpr(z128));
7948 } else {
7949 /* Saturation has occurred if any of the shifted-out bits are
7950 nonzero. We get the shifted-out bits by right-shifting the
7951 original value. */
7952 UInt rshift = laneBits - shift;
7953 vassert(rshift >= 1 && rshift < laneBits);
7954 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7955 assign(*qDiff2, mkexpr(z128));
7956 }
7957 return;
7958 }
7959
7960 /* SQSHL */
7961 if (vex_streq(nm, "sqshl")) {
7962 IROp qop = mkVecQSHLNSATSS(size);
7963 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7964 if (shift == 0) {
7965 /* No shift means no saturation. */
7966 assign(*qDiff1, mkexpr(z128));
7967 assign(*qDiff2, mkexpr(z128));
7968 } else {
7969 /* Saturation has occurred if any of the shifted-out bits are
7970 different from the top bit of the original value. */
7971 UInt rshift = laneBits - 1 - shift;
7972 vassert(rshift >= 0 && rshift < laneBits-1);
7973 /* qDiff1 is the shifted out bits, and the top bit of the original
7974 value, preceded by zeroes. */
7975 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7976 /* qDiff2 is the top bit of the original value, cloned the
7977 correct number of times. */
7978 assign(*qDiff2, binop(mkVecSHRN(size),
7979 binop(mkVecSARN(size), mkexpr(src),
7980 mkU8(laneBits-1)),
7981 mkU8(rshift)));
7982 /* This also succeeds in comparing the top bit of the original
7983 value to itself, which is a bit stupid, but not wrong. */
7984 }
7985 return;
7986 }
7987
7988 /* SQSHLU */
7989 if (vex_streq(nm, "sqshlu")) {
7990 IROp qop = mkVecQSHLNSATSU(size);
7991 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7992 if (shift == 0) {
7993 /* If there's no shift, saturation depends on the top bit
7994 of the source. */
7995 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
7996 assign(*qDiff2, mkexpr(z128));
7997 } else {
7998 /* Saturation has occurred if any of the shifted-out bits are
7999 nonzero. We get the shifted-out bits by right-shifting the
8000 original value. */
8001 UInt rshift = laneBits - shift;
8002 vassert(rshift >= 1 && rshift < laneBits);
8003 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8004 assign(*qDiff2, mkexpr(z128));
8005 }
8006 return;
8007 }
8008
8009 vassert(0);
8010 }
8011
8012
8013 /* Generate IR to do SRHADD and URHADD. */
8014 static
math_RHADD(UInt size,Bool isU,IRTemp aa,IRTemp bb)8015 IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8016 {
8017 /* Generate this:
8018 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8019 */
8020 vassert(size <= 3);
8021 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8022 IROp opADD = mkVecADD(size);
8023 /* The only tricky bit is to generate the correct vector 1 constant. */
8024 const ULong ones64[4]
8025 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8026 0x0000000100000001ULL, 0x0000000000000001ULL };
8027 IRTemp imm64 = newTemp(Ity_I64);
8028 assign(imm64, mkU64(ones64[size]));
8029 IRTemp vecOne = newTempV128();
8030 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8031 IRTemp scaOne = newTemp(Ity_I8);
8032 assign(scaOne, mkU8(1));
8033 IRTemp res = newTempV128();
8034 assign(res,
8035 binop(opADD,
8036 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8037 binop(opADD,
8038 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8039 binop(opSHR,
8040 binop(opADD,
8041 binop(opADD,
8042 binop(Iop_AndV128, mkexpr(aa),
8043 mkexpr(vecOne)),
8044 binop(Iop_AndV128, mkexpr(bb),
8045 mkexpr(vecOne))
8046 ),
8047 mkexpr(vecOne)
8048 ),
8049 mkexpr(scaOne)
8050 )
8051 )
8052 )
8053 );
8054 return res;
8055 }
8056
8057
8058 /* QCFLAG tracks the SIMD sticky saturation status. Update the status
8059 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8060 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8061 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8062 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8063 unmodified. The presence |opZHI| means this function can be used to
8064 generate QCFLAG update code for both scalar and vector SIMD operations.
8065 */
8066 static
updateQCFLAGwithDifferenceZHI(IRTemp qres,IRTemp nres,IROp opZHI)8067 void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
8068 {
8069 IRTemp diff = newTempV128();
8070 IRTemp oldQCFLAG = newTempV128();
8071 IRTemp newQCFLAG = newTempV128();
8072 if (opZHI == Iop_INVALID) {
8073 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8074 } else {
8075 vassert(opZHI == Iop_ZeroHI64ofV128
8076 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
8077 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8078 }
8079 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8080 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8081 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8082 }
8083
8084
8085 /* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8086 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8087 operations. */
8088 static
updateQCFLAGwithDifference(IRTemp qres,IRTemp nres)8089 void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8090 {
8091 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8092 }
8093
8094
8095 /* Generate IR to rearrange two vector values in a way which is useful
8096 for doing S/D add-pair etc operations. There are 3 cases:
8097
8098 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8099
8100 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8101
8102 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8103
8104 The cases are distinguished as follows:
8105 isD == True, bitQ == 1 => 2d
8106 isD == False, bitQ == 1 => 4s
8107 isD == False, bitQ == 0 => 2s
8108 */
8109 static
math_REARRANGE_FOR_FLOATING_PAIRWISE(IRTemp * rearrL,IRTemp * rearrR,IRTemp vecM,IRTemp vecN,Bool isD,UInt bitQ)8110 void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8111 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8112 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8113 )
8114 {
8115 vassert(rearrL && *rearrL == IRTemp_INVALID);
8116 vassert(rearrR && *rearrR == IRTemp_INVALID);
8117 *rearrL = newTempV128();
8118 *rearrR = newTempV128();
8119 if (isD) {
8120 // 2d case
8121 vassert(bitQ == 1);
8122 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8123 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8124 }
8125 else if (!isD && bitQ == 1) {
8126 // 4s case
8127 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8128 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8129 } else {
8130 // 2s case
8131 vassert(!isD && bitQ == 0);
8132 IRTemp m1n1m0n0 = newTempV128();
8133 IRTemp m0n0m1n1 = newTempV128();
8134 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8135 mkexpr(vecM), mkexpr(vecN)));
8136 assign(m0n0m1n1, triop(Iop_SliceV128,
8137 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8138 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8139 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8140 }
8141 }
8142
8143
8144 /* Returns 2.0 ^ (-n) for n in 1 .. 64 */
two_to_the_minus(Int n)8145 static Double two_to_the_minus ( Int n )
8146 {
8147 if (n == 1) return 0.5;
8148 vassert(n >= 2 && n <= 64);
8149 Int half = n / 2;
8150 return two_to_the_minus(half) * two_to_the_minus(n - half);
8151 }
8152
8153
8154 /* Returns 2.0 ^ n for n in 1 .. 64 */
two_to_the_plus(Int n)8155 static Double two_to_the_plus ( Int n )
8156 {
8157 if (n == 1) return 2.0;
8158 vassert(n >= 2 && n <= 64);
8159 Int half = n / 2;
8160 return two_to_the_plus(half) * two_to_the_plus(n - half);
8161 }
8162
8163
8164 /*------------------------------------------------------------*/
8165 /*--- SIMD and FP instructions ---*/
8166 /*------------------------------------------------------------*/
8167
8168 static
dis_AdvSIMD_EXT(DisResult * dres,UInt insn)8169 Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
8170 {
8171 /* 31 29 23 21 20 15 14 10 9 4
8172 0 q 101110 op2 0 m 0 imm4 0 n d
8173 Decode fields: op2
8174 */
8175 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8176 if (INSN(31,31) != 0
8177 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8178 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8179 return False;
8180 }
8181 UInt bitQ = INSN(30,30);
8182 UInt op2 = INSN(23,22);
8183 UInt mm = INSN(20,16);
8184 UInt imm4 = INSN(14,11);
8185 UInt nn = INSN(9,5);
8186 UInt dd = INSN(4,0);
8187
8188 if (op2 == BITS2(0,0)) {
8189 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
8190 IRTemp sHi = newTempV128();
8191 IRTemp sLo = newTempV128();
8192 IRTemp res = newTempV128();
8193 assign(sHi, getQReg128(mm));
8194 assign(sLo, getQReg128(nn));
8195 if (bitQ == 1) {
8196 if (imm4 == 0) {
8197 assign(res, mkexpr(sLo));
8198 } else {
8199 vassert(imm4 >= 1 && imm4 <= 15);
8200 assign(res, triop(Iop_SliceV128,
8201 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
8202 }
8203 putQReg128(dd, mkexpr(res));
8204 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8205 } else {
8206 if (imm4 >= 8) return False;
8207 if (imm4 == 0) {
8208 assign(res, mkexpr(sLo));
8209 } else {
8210 vassert(imm4 >= 1 && imm4 <= 7);
8211 IRTemp hi64lo64 = newTempV128();
8212 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8213 mkexpr(sHi), mkexpr(sLo)));
8214 assign(res, triop(Iop_SliceV128,
8215 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
8216 }
8217 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8218 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8219 }
8220 return True;
8221 }
8222
8223 return False;
8224 # undef INSN
8225 }
8226
8227
8228 static
dis_AdvSIMD_TBL_TBX(DisResult * dres,UInt insn)8229 Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8230 {
8231 /* 31 29 23 21 20 15 14 12 11 9 4
8232 0 q 001110 op2 0 m 0 len op 00 n d
8233 Decode fields: op2,len,op
8234 */
8235 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8236 if (INSN(31,31) != 0
8237 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8238 || INSN(21,21) != 0
8239 || INSN(15,15) != 0
8240 || INSN(11,10) != BITS2(0,0)) {
8241 return False;
8242 }
8243 UInt bitQ = INSN(30,30);
8244 UInt op2 = INSN(23,22);
8245 UInt mm = INSN(20,16);
8246 UInt len = INSN(14,13);
8247 UInt bitOP = INSN(12,12);
8248 UInt nn = INSN(9,5);
8249 UInt dd = INSN(4,0);
8250
8251 if (op2 == X00) {
8252 /* -------- 00,xx,0 TBL, xx register table -------- */
8253 /* -------- 00,xx,1 TBX, xx register table -------- */
8254 /* 31 28 20 15 14 12 9 4
8255 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8256 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8257 where Ta = 16b(q=1) or 8b(q=0)
8258 */
8259 Bool isTBX = bitOP == 1;
8260 /* The out-of-range values to use. */
8261 IRTemp oor_values = newTempV128();
8262 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8263 /* src value */
8264 IRTemp src = newTempV128();
8265 assign(src, getQReg128(mm));
8266 /* The table values */
8267 IRTemp tab[4];
8268 UInt i;
8269 for (i = 0; i <= len; i++) {
8270 vassert(i < 4);
8271 tab[i] = newTempV128();
8272 assign(tab[i], getQReg128((nn + i) % 32));
8273 }
8274 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
8275 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8276 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
8277 const HChar* nm = isTBX ? "tbx" : "tbl";
8278 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
8279 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8280 return True;
8281 }
8282
8283 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8284 return False;
8285 # undef INSN
8286 }
8287
8288
8289 static
dis_AdvSIMD_ZIP_UZP_TRN(DisResult * dres,UInt insn)8290 Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8291 {
8292 /* 31 29 23 21 20 15 14 11 9 4
8293 0 q 001110 size 0 m 0 opcode 10 n d
8294 Decode fields: opcode
8295 */
8296 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8297 if (INSN(31,31) != 0
8298 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8299 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8300 return False;
8301 }
8302 UInt bitQ = INSN(30,30);
8303 UInt size = INSN(23,22);
8304 UInt mm = INSN(20,16);
8305 UInt opcode = INSN(14,12);
8306 UInt nn = INSN(9,5);
8307 UInt dd = INSN(4,0);
8308
8309 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8310 /* -------- 001 UZP1 std7_std7_std7 -------- */
8311 /* -------- 101 UZP2 std7_std7_std7 -------- */
8312 if (bitQ == 0 && size == X11) return False; // implied 1d case
8313 Bool isUZP1 = opcode == BITS3(0,0,1);
8314 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8315 : mkVecCATODDLANES(size);
8316 IRTemp preL = newTempV128();
8317 IRTemp preR = newTempV128();
8318 IRTemp res = newTempV128();
8319 if (bitQ == 0) {
8320 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8321 getQReg128(nn)));
8322 assign(preR, mkexpr(preL));
8323 } else {
8324 assign(preL, getQReg128(mm));
8325 assign(preR, getQReg128(nn));
8326 }
8327 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8328 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8329 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8330 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8331 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8332 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8333 return True;
8334 }
8335
8336 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8337 /* -------- 010 TRN1 std7_std7_std7 -------- */
8338 /* -------- 110 TRN2 std7_std7_std7 -------- */
8339 if (bitQ == 0 && size == X11) return False; // implied 1d case
8340 Bool isTRN1 = opcode == BITS3(0,1,0);
8341 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8342 : mkVecCATODDLANES(size);
8343 IROp op2 = mkVecINTERLEAVEHI(size);
8344 IRTemp srcM = newTempV128();
8345 IRTemp srcN = newTempV128();
8346 IRTemp res = newTempV128();
8347 assign(srcM, getQReg128(mm));
8348 assign(srcN, getQReg128(nn));
8349 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8350 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8351 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8352 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8353 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8354 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8355 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8356 return True;
8357 }
8358
8359 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8360 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8361 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8362 if (bitQ == 0 && size == X11) return False; // implied 1d case
8363 Bool isZIP1 = opcode == BITS3(0,1,1);
8364 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8365 : mkVecINTERLEAVEHI(size);
8366 IRTemp preL = newTempV128();
8367 IRTemp preR = newTempV128();
8368 IRTemp res = newTempV128();
8369 if (bitQ == 0 && !isZIP1) {
8370 IRTemp z128 = newTempV128();
8371 assign(z128, mkV128(0x0000));
8372 // preL = Vm shifted left 32 bits
8373 // preR = Vn shifted left 32 bits
8374 assign(preL, triop(Iop_SliceV128,
8375 getQReg128(mm), mkexpr(z128), mkU8(12)));
8376 assign(preR, triop(Iop_SliceV128,
8377 getQReg128(nn), mkexpr(z128), mkU8(12)));
8378
8379 } else {
8380 assign(preL, getQReg128(mm));
8381 assign(preR, getQReg128(nn));
8382 }
8383 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8384 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8385 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8386 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8387 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8388 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8389 return True;
8390 }
8391
8392 return False;
8393 # undef INSN
8394 }
8395
8396
8397 static
dis_AdvSIMD_across_lanes(DisResult * dres,UInt insn)8398 Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8399 {
8400 /* 31 28 23 21 16 11 9 4
8401 0 q u 01110 size 11000 opcode 10 n d
8402 Decode fields: u,size,opcode
8403 */
8404 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8405 if (INSN(31,31) != 0
8406 || INSN(28,24) != BITS5(0,1,1,1,0)
8407 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8408 return False;
8409 }
8410 UInt bitQ = INSN(30,30);
8411 UInt bitU = INSN(29,29);
8412 UInt size = INSN(23,22);
8413 UInt opcode = INSN(16,12);
8414 UInt nn = INSN(9,5);
8415 UInt dd = INSN(4,0);
8416
8417 if (opcode == BITS5(0,0,0,1,1)) {
8418 /* -------- 0,xx,00011 SADDLV -------- */
8419 /* -------- 1,xx,00011 UADDLV -------- */
8420 /* size is the narrow size */
8421 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8422 Bool isU = bitU == 1;
8423 IRTemp src = newTempV128();
8424 assign(src, getQReg128(nn));
8425 /* The basic plan is to widen the lower half, and if Q = 1,
8426 the upper half too. Add them together (if Q = 1), and in
8427 either case fold with add at twice the lane width.
8428 */
8429 IRExpr* widened
8430 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8431 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8432 if (bitQ == 1) {
8433 widened
8434 = binop(mkVecADD(size+1),
8435 widened,
8436 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8437 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8438 );
8439 }
8440 /* Now fold. */
8441 IRTemp tWi = newTempV128();
8442 assign(tWi, widened);
8443 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8444 putQReg128(dd, mkexpr(res));
8445 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8446 const HChar ch = "bhsd"[size];
8447 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8448 nameQReg128(dd), ch, nameQReg128(nn), arr);
8449 return True;
8450 }
8451
8452 UInt ix = 0;
8453 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8454 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8455 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8456 /**/
8457 if (ix != 0) {
8458 /* -------- 0,xx,01010: SMAXV -------- (1) */
8459 /* -------- 1,xx,01010: UMAXV -------- (2) */
8460 /* -------- 0,xx,11010: SMINV -------- (3) */
8461 /* -------- 1,xx,11010: UMINV -------- (4) */
8462 /* -------- 0,xx,11011: ADDV -------- (5) */
8463 vassert(ix >= 1 && ix <= 5);
8464 if (size == X11) return False; // 1d,2d cases not allowed
8465 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
8466 const IROp opMAXS[3]
8467 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8468 const IROp opMAXU[3]
8469 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
8470 const IROp opMINS[3]
8471 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8472 const IROp opMINU[3]
8473 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8474 const IROp opADD[3]
8475 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
8476 vassert(size < 3);
8477 IROp op = Iop_INVALID;
8478 const HChar* nm = NULL;
8479 switch (ix) {
8480 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8481 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8482 case 3: op = opMINS[size]; nm = "sminv"; break;
8483 case 4: op = opMINU[size]; nm = "uminv"; break;
8484 case 5: op = opADD[size]; nm = "addv"; break;
8485 default: vassert(0);
8486 }
8487 vassert(op != Iop_INVALID && nm != NULL);
8488 IRTemp tN1 = newTempV128();
8489 assign(tN1, getQReg128(nn));
8490 /* If Q == 0, we're just folding lanes in the lower half of
8491 the value. In which case, copy the lower half of the
8492 source into the upper half, so we can then treat it the
8493 same as the full width case. Except for the addition case,
8494 in which we have to zero out the upper half. */
8495 IRTemp tN2 = newTempV128();
8496 assign(tN2, bitQ == 0
8497 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8498 : mk_CatEvenLanes64x2(tN1,tN1))
8499 : mkexpr(tN1));
8500 IRTemp res = math_FOLDV(tN2, op);
8501 if (res == IRTemp_INVALID)
8502 return False; /* means math_FOLDV
8503 doesn't handle this case yet */
8504 putQReg128(dd, mkexpr(res));
8505 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8506 IRType laneTy = tys[size];
8507 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8508 DIP("%s %s, %s.%s\n", nm,
8509 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8510 return True;
8511 }
8512
8513 if ((size == X00 || size == X10)
8514 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8515 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8516 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8517 /* -------- 1,00,01111: FMAXV s_4s -------- */
8518 /* -------- 1,10,01111: FMINV s_4s -------- */
8519 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8520 if (bitQ == 0) return False; // Only 4s is allowed
8521 Bool isMIN = (size & 2) == 2;
8522 Bool isNM = opcode == BITS5(0,1,1,0,0);
8523 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8524 IRTemp src = newTempV128();
8525 assign(src, getQReg128(nn));
8526 IRTemp res = math_FOLDV(src, opMXX);
8527 putQReg128(dd, mkexpr(res));
8528 DIP("%s%sv s%u, %u.4s\n",
8529 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8530 return True;
8531 }
8532
8533 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8534 return False;
8535 # undef INSN
8536 }
8537
8538
8539 static
dis_AdvSIMD_copy(DisResult * dres,UInt insn)8540 Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8541 {
8542 /* 31 28 20 15 14 10 9 4
8543 0 q op 01110000 imm5 0 imm4 1 n d
8544 Decode fields: q,op,imm4
8545 */
8546 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8547 if (INSN(31,31) != 0
8548 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8549 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8550 return False;
8551 }
8552 UInt bitQ = INSN(30,30);
8553 UInt bitOP = INSN(29,29);
8554 UInt imm5 = INSN(20,16);
8555 UInt imm4 = INSN(14,11);
8556 UInt nn = INSN(9,5);
8557 UInt dd = INSN(4,0);
8558
8559 /* -------- x,0,0000: DUP (element, vector) -------- */
8560 /* 31 28 20 15 9 4
8561 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8562 */
8563 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8564 UInt laneNo = 0;
8565 UInt laneSzLg2 = 0;
8566 HChar laneCh = '?';
8567 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8568 getQReg128(nn), imm5);
8569 if (res == IRTemp_INVALID)
8570 return False;
8571 if (bitQ == 0 && laneSzLg2 == X11)
8572 return False; /* .1d case */
8573 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8574 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8575 DIP("dup %s.%s, %s.%c[%u]\n",
8576 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8577 return True;
8578 }
8579
8580 /* -------- x,0,0001: DUP (general, vector) -------- */
8581 /* 31 28 20 15 9 4
8582 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8583 Q=0 writes 64, Q=1 writes 128
8584 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8585 xxx10 4H(q=0) or 8H(q=1), R=W
8586 xx100 2S(q=0) or 4S(q=1), R=W
8587 x1000 Invalid(q=0) or 2D(q=1), R=X
8588 x0000 Invalid(q=0) or Invalid(q=1)
8589 Require op=0, imm4=0001
8590 */
8591 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8592 Bool isQ = bitQ == 1;
8593 IRTemp w0 = newTemp(Ity_I64);
8594 const HChar* arT = "??";
8595 IRType laneTy = Ity_INVALID;
8596 if (imm5 & 1) {
8597 arT = isQ ? "16b" : "8b";
8598 laneTy = Ity_I8;
8599 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8600 }
8601 else if (imm5 & 2) {
8602 arT = isQ ? "8h" : "4h";
8603 laneTy = Ity_I16;
8604 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8605 }
8606 else if (imm5 & 4) {
8607 arT = isQ ? "4s" : "2s";
8608 laneTy = Ity_I32;
8609 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8610 }
8611 else if ((imm5 & 8) && isQ) {
8612 arT = "2d";
8613 laneTy = Ity_I64;
8614 assign(w0, getIReg64orZR(nn));
8615 }
8616 else {
8617 /* invalid; leave laneTy unchanged. */
8618 }
8619 /* */
8620 if (laneTy != Ity_INVALID) {
8621 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8622 putQReg128(dd, binop(Iop_64HLtoV128,
8623 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8624 DIP("dup %s.%s, %s\n",
8625 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8626 return True;
8627 }
8628 /* invalid */
8629 return False;
8630 }
8631
8632 /* -------- 1,0,0011: INS (general) -------- */
8633 /* 31 28 20 15 9 4
8634 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8635 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8636 xxx10 -> H, xxx
8637 xx100 -> S, xx
8638 x1000 -> D, x
8639 */
8640 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8641 HChar ts = '?';
8642 UInt laneNo = 16;
8643 IRExpr* src = NULL;
8644 if (imm5 & 1) {
8645 src = unop(Iop_64to8, getIReg64orZR(nn));
8646 laneNo = (imm5 >> 1) & 15;
8647 ts = 'b';
8648 }
8649 else if (imm5 & 2) {
8650 src = unop(Iop_64to16, getIReg64orZR(nn));
8651 laneNo = (imm5 >> 2) & 7;
8652 ts = 'h';
8653 }
8654 else if (imm5 & 4) {
8655 src = unop(Iop_64to32, getIReg64orZR(nn));
8656 laneNo = (imm5 >> 3) & 3;
8657 ts = 's';
8658 }
8659 else if (imm5 & 8) {
8660 src = getIReg64orZR(nn);
8661 laneNo = (imm5 >> 4) & 1;
8662 ts = 'd';
8663 }
8664 /* */
8665 if (src) {
8666 vassert(laneNo < 16);
8667 putQRegLane(dd, laneNo, src);
8668 DIP("ins %s.%c[%u], %s\n",
8669 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8670 return True;
8671 }
8672 /* invalid */
8673 return False;
8674 }
8675
8676 /* -------- x,0,0101: SMOV -------- */
8677 /* -------- x,0,0111: UMOV -------- */
8678 /* 31 28 20 15 9 4
8679 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
8680 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
8681 dest is Xd when q==1, Wd when q==0
8682 UMOV:
8683 Ts,index,ops = case q:imm5 of
8684 0:xxxx1 -> B, xxxx, 8Uto64
8685 1:xxxx1 -> invalid
8686 0:xxx10 -> H, xxx, 16Uto64
8687 1:xxx10 -> invalid
8688 0:xx100 -> S, xx, 32Uto64
8689 1:xx100 -> invalid
8690 1:x1000 -> D, x, copy64
8691 other -> invalid
8692 SMOV:
8693 Ts,index,ops = case q:imm5 of
8694 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8695 1:xxxx1 -> B, xxxx, 8Sto64
8696 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
8697 1:xxx10 -> H, xxx, 16Sto64
8698 0:xx100 -> invalid
8699 1:xx100 -> S, xx, 32Sto64
8700 1:x1000 -> invalid
8701 other -> invalid
8702 */
8703 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8704 Bool isU = (imm4 & 2) == 2;
8705 const HChar* arTs = "??";
8706 UInt laneNo = 16; /* invalid */
8707 // Setting 'res' to non-NULL determines valid/invalid
8708 IRExpr* res = NULL;
8709 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8710 laneNo = (imm5 >> 1) & 15;
8711 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8712 res = isU ? unop(Iop_8Uto64, lane)
8713 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8714 arTs = "b";
8715 }
8716 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8717 laneNo = (imm5 >> 1) & 15;
8718 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8719 res = isU ? NULL
8720 : unop(Iop_8Sto64, lane);
8721 arTs = "b";
8722 }
8723 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8724 laneNo = (imm5 >> 2) & 7;
8725 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8726 res = isU ? unop(Iop_16Uto64, lane)
8727 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8728 arTs = "h";
8729 }
8730 else if (bitQ && (imm5 & 2)) { // 1:xxx10
8731 laneNo = (imm5 >> 2) & 7;
8732 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8733 res = isU ? NULL
8734 : unop(Iop_16Sto64, lane);
8735 arTs = "h";
8736 }
8737 else if (!bitQ && (imm5 & 4)) { // 0:xx100
8738 laneNo = (imm5 >> 3) & 3;
8739 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8740 res = isU ? unop(Iop_32Uto64, lane)
8741 : NULL;
8742 arTs = "s";
8743 }
8744 else if (bitQ && (imm5 & 4)) { // 1:xxx10
8745 laneNo = (imm5 >> 3) & 3;
8746 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8747 res = isU ? NULL
8748 : unop(Iop_32Sto64, lane);
8749 arTs = "s";
8750 }
8751 else if (bitQ && (imm5 & 8)) { // 1:x1000
8752 laneNo = (imm5 >> 4) & 1;
8753 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8754 res = isU ? lane
8755 : NULL;
8756 arTs = "d";
8757 }
8758 /* */
8759 if (res) {
8760 vassert(laneNo < 16);
8761 putIReg64orZR(dd, res);
8762 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8763 nameIRegOrZR(bitQ == 1, dd),
8764 nameQReg128(nn), arTs, laneNo);
8765 return True;
8766 }
8767 /* invalid */
8768 return False;
8769 }
8770
8771 /* -------- 1,1,xxxx: INS (element) -------- */
8772 /* 31 28 20 14 9 4
8773 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
8774 where Ts,ix1,ix2
8775 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8776 xxx10 -> H, xxx, imm4[3:1]
8777 xx100 -> S, xx, imm4[3:2]
8778 x1000 -> D, x, imm4[3:3]
8779 */
8780 if (bitQ == 1 && bitOP == 1) {
8781 HChar ts = '?';
8782 IRType ity = Ity_INVALID;
8783 UInt ix1 = 16;
8784 UInt ix2 = 16;
8785 if (imm5 & 1) {
8786 ts = 'b';
8787 ity = Ity_I8;
8788 ix1 = (imm5 >> 1) & 15;
8789 ix2 = (imm4 >> 0) & 15;
8790 }
8791 else if (imm5 & 2) {
8792 ts = 'h';
8793 ity = Ity_I16;
8794 ix1 = (imm5 >> 2) & 7;
8795 ix2 = (imm4 >> 1) & 7;
8796 }
8797 else if (imm5 & 4) {
8798 ts = 's';
8799 ity = Ity_I32;
8800 ix1 = (imm5 >> 3) & 3;
8801 ix2 = (imm4 >> 2) & 3;
8802 }
8803 else if (imm5 & 8) {
8804 ts = 'd';
8805 ity = Ity_I64;
8806 ix1 = (imm5 >> 4) & 1;
8807 ix2 = (imm4 >> 3) & 1;
8808 }
8809 /* */
8810 if (ity != Ity_INVALID) {
8811 vassert(ix1 < 16);
8812 vassert(ix2 < 16);
8813 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8814 DIP("ins %s.%c[%u], %s.%c[%u]\n",
8815 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8816 return True;
8817 }
8818 /* invalid */
8819 return False;
8820 }
8821
8822 return False;
8823 # undef INSN
8824 }
8825
8826
8827 static
dis_AdvSIMD_modified_immediate(DisResult * dres,UInt insn)8828 Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8829 {
8830 /* 31 28 18 15 11 9 4
8831 0q op 01111 00000 abc cmode 01 defgh d
8832 Decode fields: q,op,cmode
8833 Bit 11 is really "o2", but it is always zero.
8834 */
8835 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8836 if (INSN(31,31) != 0
8837 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8838 || INSN(11,10) != BITS2(0,1)) {
8839 return False;
8840 }
8841 UInt bitQ = INSN(30,30);
8842 UInt bitOP = INSN(29,29);
8843 UInt cmode = INSN(15,12);
8844 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8845 UInt dd = INSN(4,0);
8846
8847 ULong imm64lo = 0;
8848 UInt op_cmode = (bitOP << 4) | cmode;
8849 Bool ok = False;
8850 Bool isORR = False;
8851 Bool isBIC = False;
8852 Bool isMOV = False;
8853 Bool isMVN = False;
8854 Bool isFMOV = False;
8855 switch (op_cmode) {
8856 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
8857 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
8858 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8859 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8860 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8861 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8862 ok = True; isMOV = True; break;
8863
8864 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8865 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8866 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8867 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8868 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8869 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8870 ok = True; isORR = True; break;
8871
8872 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8873 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8874 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8875 ok = True; isMOV = True; break;
8876
8877 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8878 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8879 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8880 ok = True; isORR = True; break;
8881
8882 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8883 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8884 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8885 ok = True; isMOV = True; break;
8886
8887 /* -------- x,0,1110 MOVI 8-bit -------- */
8888 case BITS5(0,1,1,1,0):
8889 ok = True; isMOV = True; break;
8890
8891 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
8892 case BITS5(0,1,1,1,1): // 0:1111
8893 ok = True; isFMOV = True; break;
8894
8895 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8896 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
8897 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
8898 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
8899 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8900 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8901 ok = True; isMVN = True; break;
8902
8903 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8904 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8905 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8906 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8907 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8908 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8909 ok = True; isBIC = True; break;
8910
8911 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8912 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8913 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8914 ok = True; isMVN = True; break;
8915
8916 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8917 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8918 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8919 ok = True; isBIC = True; break;
8920
8921 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8922 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8923 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8924 ok = True; isMVN = True; break;
8925
8926 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8927 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8928 case BITS5(1,1,1,1,0):
8929 ok = True; isMOV = True; break;
8930
8931 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
8932 case BITS5(1,1,1,1,1): // 1:1111
8933 ok = bitQ == 1; isFMOV = True; break;
8934
8935 default:
8936 break;
8937 }
8938 if (ok) {
8939 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8940 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
8941 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8942 }
8943 if (ok) {
8944 if (isORR || isBIC) {
8945 ULong inv
8946 = isORR ? 0ULL : ~0ULL;
8947 IRExpr* immV128
8948 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8949 IRExpr* res
8950 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
8951 const HChar* nm = isORR ? "orr" : "bic";
8952 if (bitQ == 0) {
8953 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8954 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8955 } else {
8956 putQReg128(dd, res);
8957 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8958 nameQReg128(dd), imm64lo, imm64lo);
8959 }
8960 }
8961 else if (isMOV || isMVN || isFMOV) {
8962 if (isMVN) imm64lo = ~imm64lo;
8963 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
8964 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8965 mkU64(imm64lo));
8966 putQReg128(dd, immV128);
8967 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8968 }
8969 return True;
8970 }
8971 /* else fall through */
8972
8973 return False;
8974 # undef INSN
8975 }
8976
8977
8978 static
dis_AdvSIMD_scalar_copy(DisResult * dres,UInt insn)8979 Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8980 {
8981 /* 31 28 20 15 14 10 9 4
8982 01 op 11110000 imm5 0 imm4 1 n d
8983 Decode fields: op,imm4
8984 */
8985 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8986 if (INSN(31,30) != BITS2(0,1)
8987 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
8988 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8989 return False;
8990 }
8991 UInt bitOP = INSN(29,29);
8992 UInt imm5 = INSN(20,16);
8993 UInt imm4 = INSN(14,11);
8994 UInt nn = INSN(9,5);
8995 UInt dd = INSN(4,0);
8996
8997 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8998 /* -------- 0,0000 DUP (element, scalar) -------- */
8999 IRTemp w0 = newTemp(Ity_I64);
9000 const HChar* arTs = "??";
9001 IRType laneTy = Ity_INVALID;
9002 UInt laneNo = 16; /* invalid */
9003 if (imm5 & 1) {
9004 arTs = "b";
9005 laneNo = (imm5 >> 1) & 15;
9006 laneTy = Ity_I8;
9007 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9008 }
9009 else if (imm5 & 2) {
9010 arTs = "h";
9011 laneNo = (imm5 >> 2) & 7;
9012 laneTy = Ity_I16;
9013 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9014 }
9015 else if (imm5 & 4) {
9016 arTs = "s";
9017 laneNo = (imm5 >> 3) & 3;
9018 laneTy = Ity_I32;
9019 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9020 }
9021 else if (imm5 & 8) {
9022 arTs = "d";
9023 laneNo = (imm5 >> 4) & 1;
9024 laneTy = Ity_I64;
9025 assign(w0, getQRegLane(nn, laneNo, laneTy));
9026 }
9027 else {
9028 /* invalid; leave laneTy unchanged. */
9029 }
9030 /* */
9031 if (laneTy != Ity_INVALID) {
9032 vassert(laneNo < 16);
9033 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9034 DIP("dup %s, %s.%s[%u]\n",
9035 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9036 return True;
9037 }
9038 /* else fall through */
9039 }
9040
9041 return False;
9042 # undef INSN
9043 }
9044
9045
9046 static
dis_AdvSIMD_scalar_pairwise(DisResult * dres,UInt insn)9047 Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9048 {
9049 /* 31 28 23 21 16 11 9 4
9050 01 u 11110 sz 11000 opcode 10 n d
9051 Decode fields: u,sz,opcode
9052 */
9053 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9054 if (INSN(31,30) != BITS2(0,1)
9055 || INSN(28,24) != BITS5(1,1,1,1,0)
9056 || INSN(21,17) != BITS5(1,1,0,0,0)
9057 || INSN(11,10) != BITS2(1,0)) {
9058 return False;
9059 }
9060 UInt bitU = INSN(29,29);
9061 UInt sz = INSN(23,22);
9062 UInt opcode = INSN(16,12);
9063 UInt nn = INSN(9,5);
9064 UInt dd = INSN(4,0);
9065
9066 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9067 /* -------- 0,11,11011 ADDP d_2d -------- */
9068 IRTemp xy = newTempV128();
9069 IRTemp xx = newTempV128();
9070 assign(xy, getQReg128(nn));
9071 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9072 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9073 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9074 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9075 return True;
9076 }
9077
9078 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9079 /* -------- 1,00,01101 ADDP s_2s -------- */
9080 /* -------- 1,01,01101 ADDP d_2d -------- */
9081 Bool isD = sz == X01;
9082 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9083 IROp opADD = mkVecADDF(isD ? 3 : 2);
9084 IRTemp src = newTempV128();
9085 IRTemp argL = newTempV128();
9086 IRTemp argR = newTempV128();
9087 assign(src, getQReg128(nn));
9088 assign(argL, unop(opZHI, mkexpr(src)));
9089 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9090 mkU8(isD ? 8 : 4))));
9091 putQReg128(dd, unop(opZHI,
9092 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9093 mkexpr(argL), mkexpr(argR))));
9094 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9095 return True;
9096 }
9097
9098 if (bitU == 1
9099 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9100 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9101 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9102 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9103 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9104 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9105 Bool isD = (sz & 1) == 1;
9106 Bool isMIN = (sz & 2) == 2;
9107 Bool isNM = opcode == BITS5(0,1,1,0,0);
9108 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9109 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9110 IRTemp src = newTempV128();
9111 IRTemp argL = newTempV128();
9112 IRTemp argR = newTempV128();
9113 assign(src, getQReg128(nn));
9114 assign(argL, unop(opZHI, mkexpr(src)));
9115 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9116 mkU8(isD ? 8 : 4))));
9117 putQReg128(dd, unop(opZHI,
9118 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9119 HChar c = isD ? 'd' : 's';
9120 DIP("%s%sp %c%u, v%u.2%c\n",
9121 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9122 return True;
9123 }
9124
9125 return False;
9126 # undef INSN
9127 }
9128
9129
9130 static
dis_AdvSIMD_scalar_shift_by_imm(DisResult * dres,UInt insn)9131 Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9132 {
9133 /* 31 28 22 18 15 10 9 4
9134 01 u 111110 immh immb opcode 1 n d
9135 Decode fields: u,immh,opcode
9136 */
9137 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9138 if (INSN(31,30) != BITS2(0,1)
9139 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9140 return False;
9141 }
9142 UInt bitU = INSN(29,29);
9143 UInt immh = INSN(22,19);
9144 UInt immb = INSN(18,16);
9145 UInt opcode = INSN(15,11);
9146 UInt nn = INSN(9,5);
9147 UInt dd = INSN(4,0);
9148 UInt immhb = (immh << 3) | immb;
9149
9150 if ((immh & 8) == 8
9151 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9152 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9153 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9154 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9155 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9156 Bool isU = bitU == 1;
9157 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9158 UInt sh = 128 - immhb;
9159 vassert(sh >= 1 && sh <= 64);
9160 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9161 IRExpr* src = getQReg128(nn);
9162 IRTemp shf = newTempV128();
9163 IRTemp res = newTempV128();
9164 if (sh == 64 && isU) {
9165 assign(shf, mkV128(0x0000));
9166 } else {
9167 UInt nudge = 0;
9168 if (sh == 64) {
9169 vassert(!isU);
9170 nudge = 1;
9171 }
9172 assign(shf, binop(op, src, mkU8(sh - nudge)));
9173 }
9174 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9175 : mkexpr(shf));
9176 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9177 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9178 : (isU ? "ushr" : "sshr");
9179 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9180 return True;
9181 }
9182
9183 if ((immh & 8) == 8
9184 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9185 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9186 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9187 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9188 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9189 Bool isU = bitU == 1;
9190 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9191 UInt sh = 128 - immhb;
9192 vassert(sh >= 1 && sh <= 64);
9193 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9194 vassert(sh >= 1 && sh <= 64);
9195 IRExpr* src = getQReg128(nn);
9196 IRTemp imm8 = newTemp(Ity_I8);
9197 assign(imm8, mkU8((UChar)(-sh)));
9198 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9199 IRTemp shf = newTempV128();
9200 IRTemp res = newTempV128();
9201 assign(shf, binop(op, src, amt));
9202 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9203 : mkexpr(shf));
9204 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9205 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9206 : (isU ? "urshr" : "srshr");
9207 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9208 return True;
9209 }
9210
9211 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9212 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9213 UInt sh = 128 - immhb;
9214 vassert(sh >= 1 && sh <= 64);
9215 if (sh == 64) {
9216 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9217 } else {
9218 /* sh is in range 1 .. 63 */
9219 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9220 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9221 IRTemp res = newTempV128();
9222 assign(res, binop(Iop_OrV128,
9223 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9224 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9225 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9226 }
9227 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9228 return True;
9229 }
9230
9231 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9232 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9233 UInt sh = immhb - 64;
9234 vassert(sh >= 0 && sh < 64);
9235 putQReg128(dd,
9236 unop(Iop_ZeroHI64ofV128,
9237 sh == 0 ? getQReg128(nn)
9238 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9239 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9240 return True;
9241 }
9242
9243 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9244 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9245 UInt sh = immhb - 64;
9246 vassert(sh >= 0 && sh < 64);
9247 if (sh == 0) {
9248 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9249 } else {
9250 /* sh is in range 1 .. 63 */
9251 ULong nmask = (1ULL << sh) - 1;
9252 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9253 IRTemp res = newTempV128();
9254 assign(res, binop(Iop_OrV128,
9255 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9256 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9257 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9258 }
9259 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9260 return True;
9261 }
9262
9263 if (opcode == BITS5(0,1,1,1,0)
9264 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9265 /* -------- 0,01110 SQSHL #imm -------- */
9266 /* -------- 1,01110 UQSHL #imm -------- */
9267 /* -------- 1,01100 SQSHLU #imm -------- */
9268 UInt size = 0;
9269 UInt shift = 0;
9270 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9271 if (!ok) return False;
9272 vassert(size >= 0 && size <= 3);
9273 /* The shift encoding has opposite sign for the leftwards case.
9274 Adjust shift to compensate. */
9275 UInt lanebits = 8 << size;
9276 shift = lanebits - shift;
9277 vassert(shift >= 0 && shift < lanebits);
9278 const HChar* nm = NULL;
9279 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9280 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9281 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9282 else vassert(0);
9283 IRTemp qDiff1 = IRTemp_INVALID;
9284 IRTemp qDiff2 = IRTemp_INVALID;
9285 IRTemp res = IRTemp_INVALID;
9286 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9287 /* This relies on the fact that the zeroed out lanes generate zeroed
9288 result lanes and don't saturate, so there's no point in trimming
9289 the resulting res, qDiff1 or qDiff2 values. */
9290 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9291 putQReg128(dd, mkexpr(res));
9292 updateQCFLAGwithDifference(qDiff1, qDiff2);
9293 const HChar arr = "bhsd"[size];
9294 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9295 return True;
9296 }
9297
9298 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9299 || (bitU == 1
9300 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9301 /* -------- 0,10010 SQSHRN #imm -------- */
9302 /* -------- 1,10010 UQSHRN #imm -------- */
9303 /* -------- 0,10011 SQRSHRN #imm -------- */
9304 /* -------- 1,10011 UQRSHRN #imm -------- */
9305 /* -------- 1,10000 SQSHRUN #imm -------- */
9306 /* -------- 1,10001 SQRSHRUN #imm -------- */
9307 UInt size = 0;
9308 UInt shift = 0;
9309 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9310 if (!ok || size == X11) return False;
9311 vassert(size >= X00 && size <= X10);
9312 vassert(shift >= 1 && shift <= (8 << size));
9313 const HChar* nm = "??";
9314 IROp op = Iop_INVALID;
9315 /* Decide on the name and the operation. */
9316 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9317 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9318 }
9319 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9320 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9321 }
9322 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9323 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9324 }
9325 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9326 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9327 }
9328 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9329 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9330 }
9331 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9332 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9333 }
9334 else vassert(0);
9335 /* Compute the result (Q, shifted value) pair. */
9336 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9337 IRTemp pair = newTempV128();
9338 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9339 /* Update the result reg */
9340 IRTemp res64in128 = newTempV128();
9341 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9342 putQReg128(dd, mkexpr(res64in128));
9343 /* Update the Q flag. */
9344 IRTemp q64q64 = newTempV128();
9345 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9346 IRTemp z128 = newTempV128();
9347 assign(z128, mkV128(0x0000));
9348 updateQCFLAGwithDifference(q64q64, z128);
9349 /* */
9350 const HChar arrNarrow = "bhsd"[size];
9351 const HChar arrWide = "bhsd"[size+1];
9352 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9353 return True;
9354 }
9355
9356 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9357 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9358 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9359 UInt size = 0;
9360 UInt fbits = 0;
9361 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9362 /* The following holds because immh is never zero. */
9363 vassert(ok);
9364 /* The following holds because immh >= 0100. */
9365 vassert(size == X10 || size == X11);
9366 Bool isD = size == X11;
9367 Bool isU = bitU == 1;
9368 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9369 Double scale = two_to_the_minus(fbits);
9370 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9371 : IRExpr_Const(IRConst_F32( (Float)scale ));
9372 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9373 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9374 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9375 IRType tyF = isD ? Ity_F64 : Ity_F32;
9376 IRType tyI = isD ? Ity_I64 : Ity_I32;
9377 IRTemp src = newTemp(tyI);
9378 IRTemp res = newTemp(tyF);
9379 IRTemp rm = mk_get_IR_rounding_mode();
9380 assign(src, getQRegLane(nn, 0, tyI));
9381 assign(res, triop(opMUL, mkexpr(rm),
9382 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9383 putQRegLane(dd, 0, mkexpr(res));
9384 if (!isD) {
9385 putQRegLane(dd, 1, mkU32(0));
9386 }
9387 putQRegLane(dd, 1, mkU64(0));
9388 const HChar ch = isD ? 'd' : 's';
9389 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9390 ch, dd, ch, nn, fbits);
9391 return True;
9392 }
9393
9394 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9395 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9396 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9397 UInt size = 0;
9398 UInt fbits = 0;
9399 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9400 /* The following holds because immh is never zero. */
9401 vassert(ok);
9402 /* The following holds because immh >= 0100. */
9403 vassert(size == X10 || size == X11);
9404 Bool isD = size == X11;
9405 Bool isU = bitU == 1;
9406 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9407 Double scale = two_to_the_plus(fbits);
9408 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9409 : IRExpr_Const(IRConst_F32( (Float)scale ));
9410 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9411 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9412 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9413 IRType tyF = isD ? Ity_F64 : Ity_F32;
9414 IRType tyI = isD ? Ity_I64 : Ity_I32;
9415 IRTemp src = newTemp(tyF);
9416 IRTemp res = newTemp(tyI);
9417 IRTemp rm = newTemp(Ity_I32);
9418 assign(src, getQRegLane(nn, 0, tyF));
9419 assign(rm, mkU32(Irrm_ZERO));
9420 assign(res, binop(opCVT, mkexpr(rm),
9421 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9422 putQRegLane(dd, 0, mkexpr(res));
9423 if (!isD) {
9424 putQRegLane(dd, 1, mkU32(0));
9425 }
9426 putQRegLane(dd, 1, mkU64(0));
9427 const HChar ch = isD ? 'd' : 's';
9428 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9429 ch, dd, ch, nn, fbits);
9430 return True;
9431 }
9432
9433 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9434 return False;
9435 # undef INSN
9436 }
9437
9438
9439 static
dis_AdvSIMD_scalar_three_different(DisResult * dres,UInt insn)9440 Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9441 {
9442 /* 31 29 28 23 21 20 15 11 9 4
9443 01 U 11110 size 1 m opcode 00 n d
9444 Decode fields: u,opcode
9445 */
9446 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9447 if (INSN(31,30) != BITS2(0,1)
9448 || INSN(28,24) != BITS5(1,1,1,1,0)
9449 || INSN(21,21) != 1
9450 || INSN(11,10) != BITS2(0,0)) {
9451 return False;
9452 }
9453 UInt bitU = INSN(29,29);
9454 UInt size = INSN(23,22);
9455 UInt mm = INSN(20,16);
9456 UInt opcode = INSN(15,12);
9457 UInt nn = INSN(9,5);
9458 UInt dd = INSN(4,0);
9459 vassert(size < 4);
9460
9461 if (bitU == 0
9462 && (opcode == BITS4(1,1,0,1)
9463 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9464 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9465 /* -------- 0,1001 SQDMLAL -------- */ // 1
9466 /* -------- 0,1011 SQDMLSL -------- */ // 2
9467 /* Widens, and size refers to the narrowed lanes. */
9468 UInt ks = 3;
9469 switch (opcode) {
9470 case BITS4(1,1,0,1): ks = 0; break;
9471 case BITS4(1,0,0,1): ks = 1; break;
9472 case BITS4(1,0,1,1): ks = 2; break;
9473 default: vassert(0);
9474 }
9475 vassert(ks >= 0 && ks <= 2);
9476 if (size == X00 || size == X11) return False;
9477 vassert(size <= 2);
9478 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9479 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9480 newTempsV128_3(&vecN, &vecM, &vecD);
9481 assign(vecN, getQReg128(nn));
9482 assign(vecM, getQReg128(mm));
9483 assign(vecD, getQReg128(dd));
9484 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9485 False/*!is2*/, size, "mas"[ks],
9486 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9487 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9488 putQReg128(dd, unop(opZHI, mkexpr(res)));
9489 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9490 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9491 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9492 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9493 }
9494 const HChar* nm = ks == 0 ? "sqdmull"
9495 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9496 const HChar arrNarrow = "bhsd"[size];
9497 const HChar arrWide = "bhsd"[size+1];
9498 DIP("%s %c%d, %c%d, %c%d\n",
9499 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9500 return True;
9501 }
9502
9503 return False;
9504 # undef INSN
9505 }
9506
9507
9508 static
dis_AdvSIMD_scalar_three_same(DisResult * dres,UInt insn)9509 Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9510 {
9511 /* 31 29 28 23 21 20 15 10 9 4
9512 01 U 11110 size 1 m opcode 1 n d
9513 Decode fields: u,size,opcode
9514 */
9515 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9516 if (INSN(31,30) != BITS2(0,1)
9517 || INSN(28,24) != BITS5(1,1,1,1,0)
9518 || INSN(21,21) != 1
9519 || INSN(10,10) != 1) {
9520 return False;
9521 }
9522 UInt bitU = INSN(29,29);
9523 UInt size = INSN(23,22);
9524 UInt mm = INSN(20,16);
9525 UInt opcode = INSN(15,11);
9526 UInt nn = INSN(9,5);
9527 UInt dd = INSN(4,0);
9528 vassert(size < 4);
9529
9530 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9531 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9532 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9533 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9534 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9535 Bool isADD = opcode == BITS5(0,0,0,0,1);
9536 Bool isU = bitU == 1;
9537 IROp qop = Iop_INVALID;
9538 IROp nop = Iop_INVALID;
9539 if (isADD) {
9540 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9541 nop = mkVecADD(size);
9542 } else {
9543 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9544 nop = mkVecSUB(size);
9545 }
9546 IRTemp argL = newTempV128();
9547 IRTemp argR = newTempV128();
9548 IRTemp qres = newTempV128();
9549 IRTemp nres = newTempV128();
9550 assign(argL, getQReg128(nn));
9551 assign(argR, getQReg128(mm));
9552 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9553 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
9554 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9555 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
9556 putQReg128(dd, mkexpr(qres));
9557 updateQCFLAGwithDifference(qres, nres);
9558 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9559 : (isU ? "uqsub" : "sqsub");
9560 const HChar arr = "bhsd"[size];
9561 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9562 return True;
9563 }
9564
9565 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9566 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9567 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9568 Bool isGT = bitU == 0;
9569 IRExpr* argL = getQReg128(nn);
9570 IRExpr* argR = getQReg128(mm);
9571 IRTemp res = newTempV128();
9572 assign(res,
9573 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9574 : binop(Iop_CmpGT64Ux2, argL, argR));
9575 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9576 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9577 nameQRegLO(dd, Ity_I64),
9578 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9579 return True;
9580 }
9581
9582 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9583 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9584 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9585 Bool isGE = bitU == 0;
9586 IRExpr* argL = getQReg128(nn);
9587 IRExpr* argR = getQReg128(mm);
9588 IRTemp res = newTempV128();
9589 assign(res,
9590 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9591 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9592 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9593 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9594 nameQRegLO(dd, Ity_I64),
9595 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9596 return True;
9597 }
9598
9599 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9600 || opcode == BITS5(0,1,0,1,0))) {
9601 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9602 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9603 /* -------- 1,xx,01000 USHL d_d_d -------- */
9604 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9605 Bool isU = bitU == 1;
9606 Bool isR = opcode == BITS5(0,1,0,1,0);
9607 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9608 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9609 IRTemp res = newTempV128();
9610 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9611 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9612 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9613 : (isU ? "ushl" : "sshl");
9614 DIP("%s %s, %s, %s\n", nm,
9615 nameQRegLO(dd, Ity_I64),
9616 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9617 return True;
9618 }
9619
9620 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9621 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9622 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9623 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9624 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9625 Bool isU = bitU == 1;
9626 Bool isR = opcode == BITS5(0,1,0,1,1);
9627 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9628 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9629 /* This is a bit tricky. Since we're only interested in the lowest
9630 lane of the result, we zero out all the rest in the operands, so
9631 as to ensure that other lanes don't pollute the returned Q value.
9632 This works because it means, for the lanes we don't care about, we
9633 are shifting zero by zero, which can never saturate. */
9634 IRTemp res256 = newTemp(Ity_V256);
9635 IRTemp resSH = newTempV128();
9636 IRTemp resQ = newTempV128();
9637 IRTemp zero = newTempV128();
9638 assign(
9639 res256,
9640 binop(op,
9641 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9642 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9643 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9644 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9645 assign(zero, mkV128(0x0000));
9646 putQReg128(dd, mkexpr(resSH));
9647 updateQCFLAGwithDifference(resQ, zero);
9648 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9649 : (isU ? "uqshl" : "sqshl");
9650 const HChar arr = "bhsd"[size];
9651 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9652 return True;
9653 }
9654
9655 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9656 /* -------- 0,11,10000 ADD d_d_d -------- */
9657 /* -------- 1,11,10000 SUB d_d_d -------- */
9658 Bool isSUB = bitU == 1;
9659 IRTemp res = newTemp(Ity_I64);
9660 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9661 getQRegLane(nn, 0, Ity_I64),
9662 getQRegLane(mm, 0, Ity_I64)));
9663 putQRegLane(dd, 0, mkexpr(res));
9664 putQRegLane(dd, 1, mkU64(0));
9665 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9666 nameQRegLO(dd, Ity_I64),
9667 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9668 return True;
9669 }
9670
9671 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9672 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9673 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
9674 Bool isEQ = bitU == 1;
9675 IRExpr* argL = getQReg128(nn);
9676 IRExpr* argR = getQReg128(mm);
9677 IRTemp res = newTempV128();
9678 assign(res,
9679 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9680 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9681 binop(Iop_AndV128, argL, argR),
9682 mkV128(0x0000))));
9683 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9684 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9685 nameQRegLO(dd, Ity_I64),
9686 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9687 return True;
9688 }
9689
9690 if (opcode == BITS5(1,0,1,1,0)) {
9691 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9692 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9693 if (size == X00 || size == X11) return False;
9694 Bool isR = bitU == 1;
9695 IRTemp res, sat1q, sat1n, vN, vM;
9696 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9697 newTempsV128_2(&vN, &vM);
9698 assign(vN, getQReg128(nn));
9699 assign(vM, getQReg128(mm));
9700 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9701 putQReg128(dd,
9702 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9703 updateQCFLAGwithDifference(
9704 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9705 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9706 const HChar arr = "bhsd"[size];
9707 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9708 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
9709 return True;
9710 }
9711
9712 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9713 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9714 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9715 IRTemp res = newTemp(ity);
9716 assign(res, unop(mkABSF(ity),
9717 triop(mkSUBF(ity),
9718 mkexpr(mk_get_IR_rounding_mode()),
9719 getQRegLO(nn,ity), getQRegLO(mm,ity))));
9720 putQReg128(dd, mkV128(0x0000));
9721 putQRegLO(dd, mkexpr(res));
9722 DIP("fabd %s, %s, %s\n",
9723 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9724 return True;
9725 }
9726
9727 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9728 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9729 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
9730 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9731 IRTemp res = newTemp(ity);
9732 assign(res, triop(mkMULF(ity),
9733 mkexpr(mk_get_IR_rounding_mode()),
9734 getQRegLO(nn,ity), getQRegLO(mm,ity)));
9735 putQReg128(dd, mkV128(0x0000));
9736 putQRegLO(dd, mkexpr(res));
9737 DIP("fmulx %s, %s, %s\n",
9738 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9739 return True;
9740 }
9741
9742 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9743 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9744 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9745 Bool isD = size == X01;
9746 IRType ity = isD ? Ity_F64 : Ity_F32;
9747 Bool isGE = bitU == 1;
9748 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9749 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9750 IRTemp res = newTempV128();
9751 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9752 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9753 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9754 mkexpr(res))));
9755 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9756 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9757 return True;
9758 }
9759
9760 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9761 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9762 Bool isD = size == X11;
9763 IRType ity = isD ? Ity_F64 : Ity_F32;
9764 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9765 IRTemp res = newTempV128();
9766 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9767 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9768 mkexpr(res))));
9769 DIP("%s %s, %s, %s\n", "fcmgt",
9770 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9771 return True;
9772 }
9773
9774 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9775 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9776 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9777 Bool isD = (size & 1) == 1;
9778 IRType ity = isD ? Ity_F64 : Ity_F32;
9779 Bool isGT = (size & 2) == 2;
9780 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9781 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9782 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9783 IRTemp res = newTempV128();
9784 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9785 unop(opABS, getQReg128(nn)))); // swapd
9786 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9787 mkexpr(res))));
9788 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9789 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9790 return True;
9791 }
9792
9793 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9794 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
9795 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9796 Bool isSQRT = (size & 2) == 2;
9797 Bool isD = (size & 1) == 1;
9798 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9799 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9800 IRTemp res = newTempV128();
9801 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9802 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9803 mkexpr(res))));
9804 HChar c = isD ? 'd' : 's';
9805 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9806 c, dd, c, nn, c, mm);
9807 return True;
9808 }
9809
9810 return False;
9811 # undef INSN
9812 }
9813
9814
9815 static
dis_AdvSIMD_scalar_two_reg_misc(DisResult * dres,UInt insn)9816 Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9817 {
9818 /* 31 29 28 23 21 16 11 9 4
9819 01 U 11110 size 10000 opcode 10 n d
9820 Decode fields: u,size,opcode
9821 */
9822 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9823 if (INSN(31,30) != BITS2(0,1)
9824 || INSN(28,24) != BITS5(1,1,1,1,0)
9825 || INSN(21,17) != BITS5(1,0,0,0,0)
9826 || INSN(11,10) != BITS2(1,0)) {
9827 return False;
9828 }
9829 UInt bitU = INSN(29,29);
9830 UInt size = INSN(23,22);
9831 UInt opcode = INSN(16,12);
9832 UInt nn = INSN(9,5);
9833 UInt dd = INSN(4,0);
9834 vassert(size < 4);
9835
9836 if (opcode == BITS5(0,0,0,1,1)) {
9837 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9838 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9839 /* These are a bit tricky (to say the least). See comments on
9840 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9841 details. */
9842 Bool isUSQADD = bitU == 1;
9843 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9844 : mkVecQADDEXTUSSATSS(size);
9845 IROp nop = mkVecADD(size);
9846 IRTemp argL = newTempV128();
9847 IRTemp argR = newTempV128();
9848 assign(argL, getQReg128(nn));
9849 assign(argR, getQReg128(dd));
9850 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9851 size, binop(qop, mkexpr(argL), mkexpr(argR)));
9852 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9853 size, binop(nop, mkexpr(argL), mkexpr(argR)));
9854 putQReg128(dd, mkexpr(qres));
9855 updateQCFLAGwithDifference(qres, nres);
9856 const HChar arr = "bhsd"[size];
9857 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9858 return True;
9859 }
9860
9861 if (opcode == BITS5(0,0,1,1,1)) {
9862 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
9863 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9864 Bool isNEG = bitU == 1;
9865 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9866 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9867 getQReg128(nn), size );
9868 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9869 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
9870 putQReg128(dd, mkexpr(qres));
9871 updateQCFLAGwithDifference(qres, nres);
9872 const HChar arr = "bhsd"[size];
9873 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
9874 return True;
9875 }
9876
9877 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9878 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9879 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9880 Bool isGT = bitU == 0;
9881 IRExpr* argL = getQReg128(nn);
9882 IRExpr* argR = mkV128(0x0000);
9883 IRTemp res = newTempV128();
9884 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9885 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9886 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9887 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9888 return True;
9889 }
9890
9891 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9892 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9893 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9894 Bool isEQ = bitU == 0;
9895 IRExpr* argL = getQReg128(nn);
9896 IRExpr* argR = mkV128(0x0000);
9897 IRTemp res = newTempV128();
9898 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9899 : unop(Iop_NotV128,
9900 binop(Iop_CmpGT64Sx2, argL, argR)));
9901 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9902 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9903 return True;
9904 }
9905
9906 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9907 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
9908 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9909 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9910 getQReg128(nn))));
9911 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
9912 return True;
9913 }
9914
9915 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9916 /* -------- 0,11,01011 ABS d_d -------- */
9917 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9918 unop(Iop_Abs64x2, getQReg128(nn))));
9919 DIP("abs d%u, d%u\n", dd, nn);
9920 return True;
9921 }
9922
9923 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9924 /* -------- 1,11,01011 NEG d_d -------- */
9925 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9926 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9927 DIP("neg d%u, d%u\n", dd, nn);
9928 return True;
9929 }
9930
9931 UInt ix = 0; /*INVALID*/
9932 if (size >= X10) {
9933 switch (opcode) {
9934 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
9935 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
9936 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
9937 default: break;
9938 }
9939 }
9940 if (ix > 0) {
9941 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
9942 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
9943 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
9944 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
9945 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
9946 Bool isD = size == X11;
9947 IRType ity = isD ? Ity_F64 : Ity_F32;
9948 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
9949 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
9950 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9951 IROp opCmp = Iop_INVALID;
9952 Bool swap = False;
9953 const HChar* nm = "??";
9954 switch (ix) {
9955 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
9956 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
9957 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
9958 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
9959 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
9960 default: vassert(0);
9961 }
9962 IRExpr* zero = mkV128(0x0000);
9963 IRTemp res = newTempV128();
9964 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
9965 : binop(opCmp, getQReg128(nn), zero));
9966 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9967 mkexpr(res))));
9968
9969 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
9970 return True;
9971 }
9972
9973 if (opcode == BITS5(1,0,1,0,0)
9974 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9975 /* -------- 0,xx,10100: SQXTN -------- */
9976 /* -------- 1,xx,10100: UQXTN -------- */
9977 /* -------- 1,xx,10010: SQXTUN -------- */
9978 if (size == X11) return False;
9979 vassert(size < 3);
9980 IROp opN = Iop_INVALID;
9981 Bool zWiden = True;
9982 const HChar* nm = "??";
9983 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9984 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
9985 }
9986 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9987 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
9988 }
9989 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9990 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9991 }
9992 else vassert(0);
9993 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9994 size+1, getQReg128(nn));
9995 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9996 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9997 putQReg128(dd, mkexpr(resN));
9998 /* This widens zero lanes to zero, and compares it against zero, so all
9999 of the non-participating lanes make no contribution to the
10000 Q flag state. */
10001 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10002 size, mkexpr(resN));
10003 updateQCFLAGwithDifference(src, resW);
10004 const HChar arrNarrow = "bhsd"[size];
10005 const HChar arrWide = "bhsd"[size+1];
10006 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10007 return True;
10008 }
10009
10010 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10011 /* -------- 1,01,10110 FCVTXN s_d -------- */
10012 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10013 odd" but I don't know what that really means. */
10014 putQRegLO(dd,
10015 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10016 getQRegLO(nn, Ity_F64)));
10017 putQRegLane(dd, 1, mkU32(0));
10018 putQRegLane(dd, 1, mkU64(0));
10019 DIP("fcvtxn s%u, d%u\n", dd, nn);
10020 return True;
10021 }
10022
10023 ix = 0; /*INVALID*/
10024 switch (opcode) {
10025 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10026 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10027 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10028 default: break;
10029 }
10030 if (ix > 0) {
10031 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10032 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10033 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10034 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10035 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10036 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10037 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10038 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10039 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10040 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10041 Bool isD = (size & 1) == 1;
10042 IRType tyF = isD ? Ity_F64 : Ity_F32;
10043 IRType tyI = isD ? Ity_I64 : Ity_I32;
10044 IRRoundingMode irrm = 8; /*impossible*/
10045 HChar ch = '?';
10046 switch (ix) {
10047 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10048 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10049 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10050 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10051 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10052 default: vassert(0);
10053 }
10054 IROp cvt = Iop_INVALID;
10055 if (bitU == 1) {
10056 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
10057 } else {
10058 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
10059 }
10060 IRTemp src = newTemp(tyF);
10061 IRTemp res = newTemp(tyI);
10062 assign(src, getQRegLane(nn, 0, tyF));
10063 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10064 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
10065 if (!isD) {
10066 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10067 }
10068 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10069 HChar sOrD = isD ? 'd' : 's';
10070 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10071 sOrD, dd, sOrD, nn);
10072 return True;
10073 }
10074
10075 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10076 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10077 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10078 Bool isU = bitU == 1;
10079 Bool isD = (size & 1) == 1;
10080 IRType tyI = isD ? Ity_I64 : Ity_I32;
10081 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10082 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10083 IRTemp rm = mk_get_IR_rounding_mode();
10084 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10085 if (!isD) {
10086 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10087 }
10088 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10089 HChar c = isD ? 'd' : 's';
10090 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10091 return True;
10092 }
10093
10094 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10095 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10096 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10097 Bool isSQRT = bitU == 1;
10098 Bool isD = (size & 1) == 1;
10099 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10100 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10101 IRTemp resV = newTempV128();
10102 assign(resV, unop(op, getQReg128(nn)));
10103 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10104 mkexpr(resV))));
10105 HChar c = isD ? 'd' : 's';
10106 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10107 return True;
10108 }
10109
10110 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10111 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10112 Bool isD = (size & 1) == 1;
10113 IRType ty = isD ? Ity_F64 : Ity_F32;
10114 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10115 IRTemp res = newTemp(ty);
10116 IRTemp rm = mk_get_IR_rounding_mode();
10117 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10118 putQReg128(dd, mkV128(0x0000));
10119 putQRegLane(dd, 0, mkexpr(res));
10120 HChar c = isD ? 'd' : 's';
10121 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10122 return True;
10123 }
10124
10125 return False;
10126 # undef INSN
10127 }
10128
10129
10130 static
dis_AdvSIMD_scalar_x_indexed_element(DisResult * dres,UInt insn)10131 Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10132 {
10133 /* 31 28 23 21 20 19 15 11 9 4
10134 01 U 11111 size L M m opcode H 0 n d
10135 Decode fields are: u,size,opcode
10136 M is really part of the mm register number. Individual
10137 cases need to inspect L and H though.
10138 */
10139 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10140 if (INSN(31,30) != BITS2(0,1)
10141 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10142 return False;
10143 }
10144 UInt bitU = INSN(29,29);
10145 UInt size = INSN(23,22);
10146 UInt bitL = INSN(21,21);
10147 UInt bitM = INSN(20,20);
10148 UInt mmLO4 = INSN(19,16);
10149 UInt opcode = INSN(15,12);
10150 UInt bitH = INSN(11,11);
10151 UInt nn = INSN(9,5);
10152 UInt dd = INSN(4,0);
10153 vassert(size < 4);
10154 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10155
10156 if (bitU == 0 && size >= X10
10157 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10158 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10159 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10160 Bool isD = (size & 1) == 1;
10161 Bool isSUB = opcode == BITS4(0,1,0,1);
10162 UInt index;
10163 if (!isD) index = (bitH << 1) | bitL;
10164 else if (isD && bitL == 0) index = bitH;
10165 else return False; // sz:L == x11 => unallocated encoding
10166 vassert(index < (isD ? 2 : 4));
10167 IRType ity = isD ? Ity_F64 : Ity_F32;
10168 IRTemp elem = newTemp(ity);
10169 UInt mm = (bitM << 4) | mmLO4;
10170 assign(elem, getQRegLane(mm, index, ity));
10171 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10172 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10173 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10174 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10175 IRTemp rm = mk_get_IR_rounding_mode();
10176 IRTemp t1 = newTempV128();
10177 IRTemp t2 = newTempV128();
10178 // FIXME: double rounding; use FMA primops instead
10179 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10180 assign(t2, triop(isSUB ? opSUB : opADD,
10181 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10182 putQReg128(dd,
10183 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10184 mkexpr(t2))));
10185 const HChar c = isD ? 'd' : 's';
10186 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10187 c, dd, c, nn, nameQReg128(mm), c, index);
10188 return True;
10189 }
10190
10191 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10192 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10193 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10194 Bool isD = (size & 1) == 1;
10195 Bool isMULX = bitU == 1;
10196 UInt index;
10197 if (!isD) index = (bitH << 1) | bitL;
10198 else if (isD && bitL == 0) index = bitH;
10199 else return False; // sz:L == x11 => unallocated encoding
10200 vassert(index < (isD ? 2 : 4));
10201 IRType ity = isD ? Ity_F64 : Ity_F32;
10202 IRTemp elem = newTemp(ity);
10203 UInt mm = (bitM << 4) | mmLO4;
10204 assign(elem, getQRegLane(mm, index, ity));
10205 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10206 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10207 IRTemp rm = mk_get_IR_rounding_mode();
10208 IRTemp t1 = newTempV128();
10209 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10210 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10211 putQReg128(dd,
10212 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10213 mkexpr(t1))));
10214 const HChar c = isD ? 'd' : 's';
10215 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10216 c, dd, c, nn, nameQReg128(mm), c, index);
10217 return True;
10218 }
10219
10220 if (bitU == 0
10221 && (opcode == BITS4(1,0,1,1)
10222 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10223 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10224 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10225 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10226 /* Widens, and size refers to the narrowed lanes. */
10227 UInt ks = 3;
10228 switch (opcode) {
10229 case BITS4(1,0,1,1): ks = 0; break;
10230 case BITS4(0,0,1,1): ks = 1; break;
10231 case BITS4(0,1,1,1): ks = 2; break;
10232 default: vassert(0);
10233 }
10234 vassert(ks >= 0 && ks <= 2);
10235 UInt mm = 32; // invalid
10236 UInt ix = 16; // invalid
10237 switch (size) {
10238 case X00:
10239 return False; // h_b_b[] case is not allowed
10240 case X01:
10241 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10242 case X10:
10243 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10244 case X11:
10245 return False; // q_d_d[] case is not allowed
10246 default:
10247 vassert(0);
10248 }
10249 vassert(mm < 32 && ix < 16);
10250 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10251 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10252 newTempsV128_2(&vecN, &vecD);
10253 assign(vecN, getQReg128(nn));
10254 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10255 assign(vecD, getQReg128(dd));
10256 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10257 False/*!is2*/, size, "mas"[ks],
10258 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10259 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10260 putQReg128(dd, unop(opZHI, mkexpr(res)));
10261 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10262 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10263 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10264 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10265 }
10266 const HChar* nm = ks == 0 ? "sqmull"
10267 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10268 const HChar arrNarrow = "bhsd"[size];
10269 const HChar arrWide = "bhsd"[size+1];
10270 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
10271 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10272 return True;
10273 }
10274
10275 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10276 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10277 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10278 UInt mm = 32; // invalid
10279 UInt ix = 16; // invalid
10280 switch (size) {
10281 case X00:
10282 return False; // b case is not allowed
10283 case X01:
10284 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10285 case X10:
10286 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10287 case X11:
10288 return False; // q case is not allowed
10289 default:
10290 vassert(0);
10291 }
10292 vassert(mm < 32 && ix < 16);
10293 Bool isR = opcode == BITS4(1,1,0,1);
10294 IRTemp res, sat1q, sat1n, vN, vM;
10295 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10296 vN = newTempV128();
10297 assign(vN, getQReg128(nn));
10298 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10299 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10300 IROp opZHI = mkVecZEROHIxxOFV128(size);
10301 putQReg128(dd, unop(opZHI, mkexpr(res)));
10302 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10303 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10304 HChar ch = size == X01 ? 'h' : 's';
10305 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
10306 return True;
10307 }
10308
10309 return False;
10310 # undef INSN
10311 }
10312
10313
10314 static
dis_AdvSIMD_shift_by_immediate(DisResult * dres,UInt insn)10315 Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10316 {
10317 /* 31 28 22 18 15 10 9 4
10318 0 q u 011110 immh immb opcode 1 n d
10319 Decode fields: u,opcode
10320 */
10321 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10322 if (INSN(31,31) != 0
10323 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10324 return False;
10325 }
10326 UInt bitQ = INSN(30,30);
10327 UInt bitU = INSN(29,29);
10328 UInt immh = INSN(22,19);
10329 UInt immb = INSN(18,16);
10330 UInt opcode = INSN(15,11);
10331 UInt nn = INSN(9,5);
10332 UInt dd = INSN(4,0);
10333
10334 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
10335 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10336 /* -------- 1,00000 USHR std7_std7_#imm -------- */
10337 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10338 /* -------- 1,00010 USRA std7_std7_#imm -------- */
10339 /* laneTy, shift = case immh:immb of
10340 0001:xxx -> B, SHR:8-xxx
10341 001x:xxx -> H, SHR:16-xxxx
10342 01xx:xxx -> S, SHR:32-xxxxx
10343 1xxx:xxx -> D, SHR:64-xxxxxx
10344 other -> invalid
10345 */
10346 UInt size = 0;
10347 UInt shift = 0;
10348 Bool isQ = bitQ == 1;
10349 Bool isU = bitU == 1;
10350 Bool isAcc = opcode == BITS5(0,0,0,1,0);
10351 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10352 if (!ok || (bitQ == 0 && size == X11)) return False;
10353 vassert(size >= 0 && size <= 3);
10354 UInt lanebits = 8 << size;
10355 vassert(shift >= 1 && shift <= lanebits);
10356 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10357 IRExpr* src = getQReg128(nn);
10358 IRTemp shf = newTempV128();
10359 IRTemp res = newTempV128();
10360 if (shift == lanebits && isU) {
10361 assign(shf, mkV128(0x0000));
10362 } else {
10363 UInt nudge = 0;
10364 if (shift == lanebits) {
10365 vassert(!isU);
10366 nudge = 1;
10367 }
10368 assign(shf, binop(op, src, mkU8(shift - nudge)));
10369 }
10370 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10371 : mkexpr(shf));
10372 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10373 HChar laneCh = "bhsd"[size];
10374 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10375 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10376 : (isU ? "ushr" : "sshr");
10377 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10378 nameQReg128(dd), nLanes, laneCh,
10379 nameQReg128(nn), nLanes, laneCh, shift);
10380 return True;
10381 }
10382
10383 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10384 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10385 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10386 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10387 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10388 /* laneTy, shift = case immh:immb of
10389 0001:xxx -> B, SHR:8-xxx
10390 001x:xxx -> H, SHR:16-xxxx
10391 01xx:xxx -> S, SHR:32-xxxxx
10392 1xxx:xxx -> D, SHR:64-xxxxxx
10393 other -> invalid
10394 */
10395 UInt size = 0;
10396 UInt shift = 0;
10397 Bool isQ = bitQ == 1;
10398 Bool isU = bitU == 1;
10399 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10400 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10401 if (!ok || (bitQ == 0 && size == X11)) return False;
10402 vassert(size >= 0 && size <= 3);
10403 UInt lanebits = 8 << size;
10404 vassert(shift >= 1 && shift <= lanebits);
10405 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10406 IRExpr* src = getQReg128(nn);
10407 IRTemp imm8 = newTemp(Ity_I8);
10408 assign(imm8, mkU8((UChar)(-shift)));
10409 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10410 IRTemp shf = newTempV128();
10411 IRTemp res = newTempV128();
10412 assign(shf, binop(op, src, amt));
10413 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10414 : mkexpr(shf));
10415 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10416 HChar laneCh = "bhsd"[size];
10417 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10418 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10419 : (isU ? "urshr" : "srshr");
10420 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10421 nameQReg128(dd), nLanes, laneCh,
10422 nameQReg128(nn), nLanes, laneCh, shift);
10423 return True;
10424 }
10425
10426 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10427 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10428 /* laneTy, shift = case immh:immb of
10429 0001:xxx -> B, SHR:8-xxx
10430 001x:xxx -> H, SHR:16-xxxx
10431 01xx:xxx -> S, SHR:32-xxxxx
10432 1xxx:xxx -> D, SHR:64-xxxxxx
10433 other -> invalid
10434 */
10435 UInt size = 0;
10436 UInt shift = 0;
10437 Bool isQ = bitQ == 1;
10438 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10439 if (!ok || (bitQ == 0 && size == X11)) return False;
10440 vassert(size >= 0 && size <= 3);
10441 UInt lanebits = 8 << size;
10442 vassert(shift >= 1 && shift <= lanebits);
10443 IRExpr* src = getQReg128(nn);
10444 IRTemp res = newTempV128();
10445 if (shift == lanebits) {
10446 assign(res, getQReg128(dd));
10447 } else {
10448 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10449 IRExpr* nmask = binop(mkVecSHLN(size),
10450 mkV128(0xFFFF), mkU8(lanebits - shift));
10451 IRTemp tmp = newTempV128();
10452 assign(tmp, binop(Iop_OrV128,
10453 mkexpr(res),
10454 binop(Iop_AndV128, getQReg128(dd), nmask)));
10455 res = tmp;
10456 }
10457 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10458 HChar laneCh = "bhsd"[size];
10459 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10460 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10461 nameQReg128(dd), nLanes, laneCh,
10462 nameQReg128(nn), nLanes, laneCh, shift);
10463 return True;
10464 }
10465
10466 if (opcode == BITS5(0,1,0,1,0)) {
10467 /* -------- 0,01010 SHL std7_std7_#imm -------- */
10468 /* -------- 1,01010 SLI std7_std7_#imm -------- */
10469 /* laneTy, shift = case immh:immb of
10470 0001:xxx -> B, xxx
10471 001x:xxx -> H, xxxx
10472 01xx:xxx -> S, xxxxx
10473 1xxx:xxx -> D, xxxxxx
10474 other -> invalid
10475 */
10476 UInt size = 0;
10477 UInt shift = 0;
10478 Bool isSLI = bitU == 1;
10479 Bool isQ = bitQ == 1;
10480 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10481 if (!ok || (bitQ == 0 && size == X11)) return False;
10482 vassert(size >= 0 && size <= 3);
10483 /* The shift encoding has opposite sign for the leftwards case.
10484 Adjust shift to compensate. */
10485 UInt lanebits = 8 << size;
10486 shift = lanebits - shift;
10487 vassert(shift >= 0 && shift < lanebits);
10488 IROp op = mkVecSHLN(size);
10489 IRExpr* src = getQReg128(nn);
10490 IRTemp res = newTempV128();
10491 if (shift == 0) {
10492 assign(res, src);
10493 } else {
10494 assign(res, binop(op, src, mkU8(shift)));
10495 if (isSLI) {
10496 IRExpr* nmask = binop(mkVecSHRN(size),
10497 mkV128(0xFFFF), mkU8(lanebits - shift));
10498 IRTemp tmp = newTempV128();
10499 assign(tmp, binop(Iop_OrV128,
10500 mkexpr(res),
10501 binop(Iop_AndV128, getQReg128(dd), nmask)));
10502 res = tmp;
10503 }
10504 }
10505 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10506 HChar laneCh = "bhsd"[size];
10507 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10508 const HChar* nm = isSLI ? "sli" : "shl";
10509 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10510 nameQReg128(dd), nLanes, laneCh,
10511 nameQReg128(nn), nLanes, laneCh, shift);
10512 return True;
10513 }
10514
10515 if (opcode == BITS5(0,1,1,1,0)
10516 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10517 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10518 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10519 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10520 UInt size = 0;
10521 UInt shift = 0;
10522 Bool isQ = bitQ == 1;
10523 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10524 if (!ok || (bitQ == 0 && size == X11)) return False;
10525 vassert(size >= 0 && size <= 3);
10526 /* The shift encoding has opposite sign for the leftwards case.
10527 Adjust shift to compensate. */
10528 UInt lanebits = 8 << size;
10529 shift = lanebits - shift;
10530 vassert(shift >= 0 && shift < lanebits);
10531 const HChar* nm = NULL;
10532 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10533 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10534 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10535 else vassert(0);
10536 IRTemp qDiff1 = IRTemp_INVALID;
10537 IRTemp qDiff2 = IRTemp_INVALID;
10538 IRTemp res = IRTemp_INVALID;
10539 IRTemp src = newTempV128();
10540 assign(src, getQReg128(nn));
10541 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10542 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10543 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
10544 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
10545 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10546 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10547 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10548 return True;
10549 }
10550
10551 if (bitU == 0
10552 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10553 /* -------- 0,10000 SHRN{,2} #imm -------- */
10554 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10555 /* Narrows, and size is the narrow size. */
10556 UInt size = 0;
10557 UInt shift = 0;
10558 Bool is2 = bitQ == 1;
10559 Bool isR = opcode == BITS5(1,0,0,0,1);
10560 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10561 if (!ok || size == X11) return False;
10562 vassert(shift >= 1);
10563 IRTemp t1 = newTempV128();
10564 IRTemp t2 = newTempV128();
10565 IRTemp t3 = newTempV128();
10566 assign(t1, getQReg128(nn));
10567 assign(t2, isR ? binop(mkVecADD(size+1),
10568 mkexpr(t1),
10569 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10570 : mkexpr(t1));
10571 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10572 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10573 putLO64andZUorPutHI64(is2, dd, t4);
10574 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10575 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10576 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10577 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10578 return True;
10579 }
10580
10581 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10582 || (bitU == 1
10583 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10584 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10585 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10586 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10587 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10588 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10589 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10590 UInt size = 0;
10591 UInt shift = 0;
10592 Bool is2 = bitQ == 1;
10593 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10594 if (!ok || size == X11) return False;
10595 vassert(shift >= 1 && shift <= (8 << size));
10596 const HChar* nm = "??";
10597 IROp op = Iop_INVALID;
10598 /* Decide on the name and the operation. */
10599 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10600 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10601 }
10602 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10603 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10604 }
10605 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10606 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10607 }
10608 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10609 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10610 }
10611 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10612 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10613 }
10614 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10615 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10616 }
10617 else vassert(0);
10618 /* Compute the result (Q, shifted value) pair. */
10619 IRTemp src128 = newTempV128();
10620 assign(src128, getQReg128(nn));
10621 IRTemp pair = newTempV128();
10622 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10623 /* Update the result reg */
10624 IRTemp res64in128 = newTempV128();
10625 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10626 putLO64andZUorPutHI64(is2, dd, res64in128);
10627 /* Update the Q flag. */
10628 IRTemp q64q64 = newTempV128();
10629 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10630 IRTemp z128 = newTempV128();
10631 assign(z128, mkV128(0x0000));
10632 updateQCFLAGwithDifference(q64q64, z128);
10633 /* */
10634 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10635 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10636 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10637 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10638 return True;
10639 }
10640
10641 if (opcode == BITS5(1,0,1,0,0)) {
10642 /* -------- 0,10100 SSHLL{,2} #imm -------- */
10643 /* -------- 1,10100 USHLL{,2} #imm -------- */
10644 /* 31 28 22 18 15 9 4
10645 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
10646 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
10647 where Ta,Tb,sh
10648 = case immh of 1xxx -> invalid
10649 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
10650 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
10651 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
10652 0000 -> AdvSIMD modified immediate (???)
10653 */
10654 Bool isQ = bitQ == 1;
10655 Bool isU = bitU == 1;
10656 UInt immhb = (immh << 3) | immb;
10657 IRTemp src = newTempV128();
10658 IRTemp zero = newTempV128();
10659 IRExpr* res = NULL;
10660 UInt sh = 0;
10661 const HChar* ta = "??";
10662 const HChar* tb = "??";
10663 assign(src, getQReg128(nn));
10664 assign(zero, mkV128(0x0000));
10665 if (immh & 8) {
10666 /* invalid; don't assign to res */
10667 }
10668 else if (immh & 4) {
10669 sh = immhb - 32;
10670 vassert(sh < 32); /* so 32-sh is 1..32 */
10671 ta = "2d";
10672 tb = isQ ? "4s" : "2s";
10673 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10674 : mk_InterleaveLO32x4(src, zero);
10675 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10676 }
10677 else if (immh & 2) {
10678 sh = immhb - 16;
10679 vassert(sh < 16); /* so 16-sh is 1..16 */
10680 ta = "4s";
10681 tb = isQ ? "8h" : "4h";
10682 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10683 : mk_InterleaveLO16x8(src, zero);
10684 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10685 }
10686 else if (immh & 1) {
10687 sh = immhb - 8;
10688 vassert(sh < 8); /* so 8-sh is 1..8 */
10689 ta = "8h";
10690 tb = isQ ? "16b" : "8b";
10691 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10692 : mk_InterleaveLO8x16(src, zero);
10693 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10694 } else {
10695 vassert(immh == 0);
10696 /* invalid; don't assign to res */
10697 }
10698 /* */
10699 if (res) {
10700 putQReg128(dd, res);
10701 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
10702 isU ? 'u' : 's', isQ ? "2" : "",
10703 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10704 return True;
10705 }
10706 return False;
10707 }
10708
10709 if (opcode == BITS5(1,1,1,0,0)) {
10710 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10711 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10712 /* If immh is of the form 00xx, the insn is invalid. */
10713 if (immh < BITS4(0,1,0,0)) return False;
10714 UInt size = 0;
10715 UInt fbits = 0;
10716 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10717 /* The following holds because immh is never zero. */
10718 vassert(ok);
10719 /* The following holds because immh >= 0100. */
10720 vassert(size == X10 || size == X11);
10721 Bool isD = size == X11;
10722 Bool isU = bitU == 1;
10723 Bool isQ = bitQ == 1;
10724 if (isD && !isQ) return False; /* reject .1d case */
10725 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10726 Double scale = two_to_the_minus(fbits);
10727 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10728 : IRExpr_Const(IRConst_F32( (Float)scale ));
10729 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10730 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10731 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10732 IRType tyF = isD ? Ity_F64 : Ity_F32;
10733 IRType tyI = isD ? Ity_I64 : Ity_I32;
10734 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10735 vassert(nLanes == 2 || nLanes == 4);
10736 for (UInt i = 0; i < nLanes; i++) {
10737 IRTemp src = newTemp(tyI);
10738 IRTemp res = newTemp(tyF);
10739 IRTemp rm = mk_get_IR_rounding_mode();
10740 assign(src, getQRegLane(nn, i, tyI));
10741 assign(res, triop(opMUL, mkexpr(rm),
10742 binop(opCVT, mkexpr(rm), mkexpr(src)),
10743 scaleE));
10744 putQRegLane(dd, i, mkexpr(res));
10745 }
10746 if (!isQ) {
10747 putQRegLane(dd, 1, mkU64(0));
10748 }
10749 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10750 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
10751 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10752 return True;
10753 }
10754
10755 if (opcode == BITS5(1,1,1,1,1)) {
10756 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
10757 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
10758 /* If immh is of the form 00xx, the insn is invalid. */
10759 if (immh < BITS4(0,1,0,0)) return False;
10760 UInt size = 0;
10761 UInt fbits = 0;
10762 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10763 /* The following holds because immh is never zero. */
10764 vassert(ok);
10765 /* The following holds because immh >= 0100. */
10766 vassert(size == X10 || size == X11);
10767 Bool isD = size == X11;
10768 Bool isU = bitU == 1;
10769 Bool isQ = bitQ == 1;
10770 if (isD && !isQ) return False; /* reject .1d case */
10771 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10772 Double scale = two_to_the_plus(fbits);
10773 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10774 : IRExpr_Const(IRConst_F32( (Float)scale ));
10775 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10776 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10777 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10778 IRType tyF = isD ? Ity_F64 : Ity_F32;
10779 IRType tyI = isD ? Ity_I64 : Ity_I32;
10780 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10781 vassert(nLanes == 2 || nLanes == 4);
10782 for (UInt i = 0; i < nLanes; i++) {
10783 IRTemp src = newTemp(tyF);
10784 IRTemp res = newTemp(tyI);
10785 IRTemp rm = newTemp(Ity_I32);
10786 assign(src, getQRegLane(nn, i, tyF));
10787 assign(rm, mkU32(Irrm_ZERO));
10788 assign(res, binop(opCVT, mkexpr(rm),
10789 triop(opMUL, mkexpr(rm),
10790 mkexpr(src), scaleE)));
10791 putQRegLane(dd, i, mkexpr(res));
10792 }
10793 if (!isQ) {
10794 putQRegLane(dd, 1, mkU64(0));
10795 }
10796 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10797 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10798 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10799 return True;
10800 }
10801
10802 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10803 return False;
10804 # undef INSN
10805 }
10806
10807
10808 static
dis_AdvSIMD_three_different(DisResult * dres,UInt insn)10809 Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10810 {
10811 /* 31 30 29 28 23 21 20 15 11 9 4
10812 0 Q U 01110 size 1 m opcode 00 n d
10813 Decode fields: u,opcode
10814 */
10815 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10816 if (INSN(31,31) != 0
10817 || INSN(28,24) != BITS5(0,1,1,1,0)
10818 || INSN(21,21) != 1
10819 || INSN(11,10) != BITS2(0,0)) {
10820 return False;
10821 }
10822 UInt bitQ = INSN(30,30);
10823 UInt bitU = INSN(29,29);
10824 UInt size = INSN(23,22);
10825 UInt mm = INSN(20,16);
10826 UInt opcode = INSN(15,12);
10827 UInt nn = INSN(9,5);
10828 UInt dd = INSN(4,0);
10829 vassert(size < 4);
10830 Bool is2 = bitQ == 1;
10831
10832 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10833 /* -------- 0,0000 SADDL{2} -------- */
10834 /* -------- 1,0000 UADDL{2} -------- */
10835 /* -------- 0,0010 SSUBL{2} -------- */
10836 /* -------- 1,0010 USUBL{2} -------- */
10837 /* Widens, and size refers to the narrowed lanes. */
10838 if (size == X11) return False;
10839 vassert(size <= 2);
10840 Bool isU = bitU == 1;
10841 Bool isADD = opcode == BITS4(0,0,0,0);
10842 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10843 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10844 IRTemp res = newTempV128();
10845 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10846 mkexpr(argL), mkexpr(argR)));
10847 putQReg128(dd, mkexpr(res));
10848 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10849 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10850 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
10851 : (isU ? "usubl" : "ssubl");
10852 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10853 nameQReg128(dd), arrWide,
10854 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10855 return True;
10856 }
10857
10858 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10859 /* -------- 0,0001 SADDW{2} -------- */
10860 /* -------- 1,0001 UADDW{2} -------- */
10861 /* -------- 0,0011 SSUBW{2} -------- */
10862 /* -------- 1,0011 USUBW{2} -------- */
10863 /* Widens, and size refers to the narrowed lanes. */
10864 if (size == X11) return False;
10865 vassert(size <= 2);
10866 Bool isU = bitU == 1;
10867 Bool isADD = opcode == BITS4(0,0,0,1);
10868 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10869 IRTemp res = newTempV128();
10870 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10871 getQReg128(nn), mkexpr(argR)));
10872 putQReg128(dd, mkexpr(res));
10873 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10874 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10875 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
10876 : (isU ? "usubw" : "ssubw");
10877 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10878 nameQReg128(dd), arrWide,
10879 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10880 return True;
10881 }
10882
10883 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10884 /* -------- 0,0100 ADDHN{2} -------- */
10885 /* -------- 1,0100 RADDHN{2} -------- */
10886 /* -------- 0,0110 SUBHN{2} -------- */
10887 /* -------- 1,0110 RSUBHN{2} -------- */
10888 /* Narrows, and size refers to the narrowed lanes. */
10889 if (size == X11) return False;
10890 vassert(size <= 2);
10891 const UInt shift[3] = { 8, 16, 32 };
10892 Bool isADD = opcode == BITS4(0,1,0,0);
10893 Bool isR = bitU == 1;
10894 /* Combined elements in wide lanes */
10895 IRTemp wide = newTempV128();
10896 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10897 getQReg128(nn), getQReg128(mm));
10898 if (isR) {
10899 wideE = binop(mkVecADD(size+1),
10900 wideE,
10901 mkexpr(math_VEC_DUP_IMM(size+1,
10902 1ULL << (shift[size]-1))));
10903 }
10904 assign(wide, wideE);
10905 /* Top halves of elements, still in wide lanes */
10906 IRTemp shrd = newTempV128();
10907 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
10908 /* Elements now compacted into lower 64 bits */
10909 IRTemp new64 = newTempV128();
10910 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
10911 putLO64andZUorPutHI64(is2, dd, new64);
10912 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10913 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10914 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10915 : (isR ? "rsubhn" : "subhn");
10916 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10917 nameQReg128(dd), arrNarrow,
10918 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10919 return True;
10920 }
10921
10922 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10923 /* -------- 0,0101 SABAL{2} -------- */
10924 /* -------- 1,0101 UABAL{2} -------- */
10925 /* -------- 0,0111 SABDL{2} -------- */
10926 /* -------- 1,0111 UABDL{2} -------- */
10927 /* Widens, and size refers to the narrowed lanes. */
10928 if (size == X11) return False;
10929 vassert(size <= 2);
10930 Bool isU = bitU == 1;
10931 Bool isACC = opcode == BITS4(0,1,0,1);
10932 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10933 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
10934 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
10935 IRTemp res = newTempV128();
10936 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
10937 : mkexpr(abd));
10938 putQReg128(dd, mkexpr(res));
10939 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10940 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10941 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
10942 : (isU ? "uabdl" : "sabdl");
10943 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10944 nameQReg128(dd), arrWide,
10945 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10946 return True;
10947 }
10948
10949 if (opcode == BITS4(1,1,0,0)
10950 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
10951 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
10952 /* -------- 1,1100 UMULL{2} -------- */ // 0
10953 /* -------- 0,1000 SMLAL{2} -------- */ // 1
10954 /* -------- 1,1000 UMLAL{2} -------- */ // 1
10955 /* -------- 0,1010 SMLSL{2} -------- */ // 2
10956 /* -------- 1,1010 UMLSL{2} -------- */ // 2
10957 /* Widens, and size refers to the narrowed lanes. */
10958 UInt ks = 3;
10959 switch (opcode) {
10960 case BITS4(1,1,0,0): ks = 0; break;
10961 case BITS4(1,0,0,0): ks = 1; break;
10962 case BITS4(1,0,1,0): ks = 2; break;
10963 default: vassert(0);
10964 }
10965 vassert(ks >= 0 && ks <= 2);
10966 if (size == X11) return False;
10967 vassert(size <= 2);
10968 Bool isU = bitU == 1;
10969 IRTemp vecN = newTempV128();
10970 IRTemp vecM = newTempV128();
10971 IRTemp vecD = newTempV128();
10972 assign(vecN, getQReg128(nn));
10973 assign(vecM, getQReg128(mm));
10974 assign(vecD, getQReg128(dd));
10975 IRTemp res = IRTemp_INVALID;
10976 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10977 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10978 putQReg128(dd, mkexpr(res));
10979 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10980 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10981 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
10982 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
10983 nameQReg128(dd), arrWide,
10984 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10985 return True;
10986 }
10987
10988 if (bitU == 0
10989 && (opcode == BITS4(1,1,0,1)
10990 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10991 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
10992 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
10993 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
10994 /* Widens, and size refers to the narrowed lanes. */
10995 UInt ks = 3;
10996 switch (opcode) {
10997 case BITS4(1,1,0,1): ks = 0; break;
10998 case BITS4(1,0,0,1): ks = 1; break;
10999 case BITS4(1,0,1,1): ks = 2; break;
11000 default: vassert(0);
11001 }
11002 vassert(ks >= 0 && ks <= 2);
11003 if (size == X00 || size == X11) return False;
11004 vassert(size <= 2);
11005 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11006 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11007 newTempsV128_3(&vecN, &vecM, &vecD);
11008 assign(vecN, getQReg128(nn));
11009 assign(vecM, getQReg128(mm));
11010 assign(vecD, getQReg128(dd));
11011 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11012 is2, size, "mas"[ks],
11013 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11014 putQReg128(dd, mkexpr(res));
11015 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11016 updateQCFLAGwithDifference(sat1q, sat1n);
11017 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11018 updateQCFLAGwithDifference(sat2q, sat2n);
11019 }
11020 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11021 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11022 const HChar* nm = ks == 0 ? "sqdmull"
11023 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11024 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11025 nameQReg128(dd), arrWide,
11026 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11027 return True;
11028 }
11029
11030 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11031 /* -------- 0,1110 PMULL{2} -------- */
11032 /* Widens, and size refers to the narrowed lanes. */
11033 if (size != X00) return False;
11034 IRTemp res
11035 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11036 getQReg128(nn), getQReg128(mm));
11037 putQReg128(dd, mkexpr(res));
11038 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11039 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11040 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
11041 nameQReg128(dd), arrNarrow,
11042 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11043 return True;
11044 }
11045
11046 return False;
11047 # undef INSN
11048 }
11049
11050
11051 static
dis_AdvSIMD_three_same(DisResult * dres,UInt insn)11052 Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11053 {
11054 /* 31 30 29 28 23 21 20 15 10 9 4
11055 0 Q U 01110 size 1 m opcode 1 n d
11056 Decode fields: u,size,opcode
11057 */
11058 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11059 if (INSN(31,31) != 0
11060 || INSN(28,24) != BITS5(0,1,1,1,0)
11061 || INSN(21,21) != 1
11062 || INSN(10,10) != 1) {
11063 return False;
11064 }
11065 UInt bitQ = INSN(30,30);
11066 UInt bitU = INSN(29,29);
11067 UInt size = INSN(23,22);
11068 UInt mm = INSN(20,16);
11069 UInt opcode = INSN(15,11);
11070 UInt nn = INSN(9,5);
11071 UInt dd = INSN(4,0);
11072 vassert(size < 4);
11073
11074 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11075 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11076 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11077 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11078 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11079 if (size == X11) return False;
11080 Bool isADD = opcode == BITS5(0,0,0,0,0);
11081 Bool isU = bitU == 1;
11082 /* Widen both args out, do the math, narrow to final result. */
11083 IRTemp argL = newTempV128();
11084 IRTemp argLhi = IRTemp_INVALID;
11085 IRTemp argLlo = IRTemp_INVALID;
11086 IRTemp argR = newTempV128();
11087 IRTemp argRhi = IRTemp_INVALID;
11088 IRTemp argRlo = IRTemp_INVALID;
11089 IRTemp resHi = newTempV128();
11090 IRTemp resLo = newTempV128();
11091 IRTemp res = IRTemp_INVALID;
11092 assign(argL, getQReg128(nn));
11093 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11094 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11095 assign(argR, getQReg128(mm));
11096 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11097 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11098 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11099 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11100 assign(resHi, binop(opSxR,
11101 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11102 mkU8(1)));
11103 assign(resLo, binop(opSxR,
11104 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11105 mkU8(1)));
11106 res = math_NARROW_LANES ( resHi, resLo, size );
11107 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11108 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11109 : (isU ? "uhsub" : "shsub");
11110 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11111 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11112 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11113 return True;
11114 }
11115
11116 if (opcode == BITS5(0,0,0,1,0)) {
11117 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11118 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11119 if (bitQ == 0 && size == X11) return False; // implied 1d case
11120 Bool isU = bitU == 1;
11121 IRTemp argL = newTempV128();
11122 IRTemp argR = newTempV128();
11123 assign(argL, getQReg128(nn));
11124 assign(argR, getQReg128(mm));
11125 IRTemp res = math_RHADD(size, isU, argL, argR);
11126 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11127 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11128 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11129 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11130 return True;
11131 }
11132
11133 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11134 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11135 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11136 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11137 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11138 if (bitQ == 0 && size == X11) return False; // implied 1d case
11139 Bool isADD = opcode == BITS5(0,0,0,0,1);
11140 Bool isU = bitU == 1;
11141 IROp qop = Iop_INVALID;
11142 IROp nop = Iop_INVALID;
11143 if (isADD) {
11144 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11145 nop = mkVecADD(size);
11146 } else {
11147 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11148 nop = mkVecSUB(size);
11149 }
11150 IRTemp argL = newTempV128();
11151 IRTemp argR = newTempV128();
11152 IRTemp qres = newTempV128();
11153 IRTemp nres = newTempV128();
11154 assign(argL, getQReg128(nn));
11155 assign(argR, getQReg128(mm));
11156 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11157 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11158 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11159 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11160 putQReg128(dd, mkexpr(qres));
11161 updateQCFLAGwithDifference(qres, nres);
11162 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11163 : (isU ? "uqsub" : "sqsub");
11164 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11165 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11166 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11167 return True;
11168 }
11169
11170 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11171 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11172 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11173 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11174 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
11175 Bool isORx = (size & 2) == 2;
11176 Bool invert = (size & 1) == 1;
11177 IRTemp res = newTempV128();
11178 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
11179 getQReg128(nn),
11180 invert ? unop(Iop_NotV128, getQReg128(mm))
11181 : getQReg128(mm)));
11182 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11183 const HChar* names[4] = { "and", "bic", "orr", "orn" };
11184 const HChar* ar = bitQ == 1 ? "16b" : "8b";
11185 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11186 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11187 return True;
11188 }
11189
11190 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11191 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11192 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11193 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11194 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
11195 IRTemp argD = newTempV128();
11196 IRTemp argN = newTempV128();
11197 IRTemp argM = newTempV128();
11198 assign(argD, getQReg128(dd));
11199 assign(argN, getQReg128(nn));
11200 assign(argM, getQReg128(mm));
11201 const IROp opXOR = Iop_XorV128;
11202 const IROp opAND = Iop_AndV128;
11203 const IROp opNOT = Iop_NotV128;
11204 IRTemp res = newTempV128();
11205 switch (size) {
11206 case BITS2(0,0): /* EOR */
11207 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
11208 break;
11209 case BITS2(0,1): /* BSL */
11210 assign(res, binop(opXOR, mkexpr(argM),
11211 binop(opAND,
11212 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11213 mkexpr(argD))));
11214 break;
11215 case BITS2(1,0): /* BIT */
11216 assign(res, binop(opXOR, mkexpr(argD),
11217 binop(opAND,
11218 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11219 mkexpr(argM))));
11220 break;
11221 case BITS2(1,1): /* BIF */
11222 assign(res, binop(opXOR, mkexpr(argD),
11223 binop(opAND,
11224 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11225 unop(opNOT, mkexpr(argM)))));
11226 break;
11227 default:
11228 vassert(0);
11229 }
11230 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11231 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
11232 const HChar* arr = bitQ == 1 ? "16b" : "8b";
11233 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11234 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11235 return True;
11236 }
11237
11238 if (opcode == BITS5(0,0,1,1,0)) {
11239 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11240 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11241 if (bitQ == 0 && size == X11) return False; // implied 1d case
11242 Bool isGT = bitU == 0;
11243 IRExpr* argL = getQReg128(nn);
11244 IRExpr* argR = getQReg128(mm);
11245 IRTemp res = newTempV128();
11246 assign(res,
11247 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11248 : binop(mkVecCMPGTU(size), argL, argR));
11249 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11250 const HChar* nm = isGT ? "cmgt" : "cmhi";
11251 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11252 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11253 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11254 return True;
11255 }
11256
11257 if (opcode == BITS5(0,0,1,1,1)) {
11258 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11259 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11260 if (bitQ == 0 && size == X11) return False; // implied 1d case
11261 Bool isGE = bitU == 0;
11262 IRExpr* argL = getQReg128(nn);
11263 IRExpr* argR = getQReg128(mm);
11264 IRTemp res = newTempV128();
11265 assign(res,
11266 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11267 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
11268 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11269 const HChar* nm = isGE ? "cmge" : "cmhs";
11270 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11271 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11272 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11273 return True;
11274 }
11275
11276 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11277 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11278 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11279 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11280 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11281 if (bitQ == 0 && size == X11) return False; // implied 1d case
11282 Bool isU = bitU == 1;
11283 Bool isR = opcode == BITS5(0,1,0,1,0);
11284 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11285 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11286 IRTemp res = newTempV128();
11287 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11289 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11290 : (isU ? "ushl" : "sshl");
11291 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11292 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11293 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11294 return True;
11295 }
11296
11297 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11298 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11299 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11300 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11301 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11302 if (bitQ == 0 && size == X11) return False; // implied 1d case
11303 Bool isU = bitU == 1;
11304 Bool isR = opcode == BITS5(0,1,0,1,1);
11305 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11306 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11307 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11308 of the result (viz, bitQ == 0), then we must adjust the operands to
11309 ensure that the upper part of the result, that we don't care about,
11310 doesn't pollute the returned Q value. To do this, zero out the upper
11311 operand halves beforehand. This works because it means, for the
11312 lanes we don't care about, we are shifting zero by zero, which can
11313 never saturate. */
11314 IRTemp res256 = newTemp(Ity_V256);
11315 IRTemp resSH = newTempV128();
11316 IRTemp resQ = newTempV128();
11317 IRTemp zero = newTempV128();
11318 assign(res256, binop(op,
11319 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11320 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11321 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11322 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11323 assign(zero, mkV128(0x0000));
11324 putQReg128(dd, mkexpr(resSH));
11325 updateQCFLAGwithDifference(resQ, zero);
11326 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11327 : (isU ? "uqshl" : "sqshl");
11328 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11329 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11330 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11331 return True;
11332 }
11333
11334 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11335 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11336 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11337 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11338 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11339 if (bitQ == 0 && size == X11) return False; // implied 1d case
11340 Bool isU = bitU == 1;
11341 Bool isMAX = (opcode & 1) == 0;
11342 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11343 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11344 IRTemp t = newTempV128();
11345 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11346 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11347 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11348 : (isU ? "umin" : "smin");
11349 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11350 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11351 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11352 return True;
11353 }
11354
11355 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11356 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11357 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11358 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11359 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11360 if (size == X11) return False; // 1d/2d cases not allowed
11361 Bool isU = bitU == 1;
11362 Bool isACC = opcode == BITS5(0,1,1,1,1);
11363 vassert(size <= 2);
11364 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
11365 IRTemp t2 = newTempV128();
11366 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
11367 : mkexpr(t1));
11368 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11369 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11370 : (isU ? "uabd" : "sabd");
11371 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11372 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11373 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11374 return True;
11375 }
11376
11377 if (opcode == BITS5(1,0,0,0,0)) {
11378 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11379 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11380 if (bitQ == 0 && size == X11) return False; // implied 1d case
11381 Bool isSUB = bitU == 1;
11382 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11383 IRTemp t = newTempV128();
11384 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
11385 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
11386 const HChar* nm = isSUB ? "sub" : "add";
11387 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11388 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11389 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11390 return True;
11391 }
11392
11393 if (opcode == BITS5(1,0,0,0,1)) {
11394 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11395 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11396 if (bitQ == 0 && size == X11) return False; // implied 1d case
11397 Bool isEQ = bitU == 1;
11398 IRExpr* argL = getQReg128(nn);
11399 IRExpr* argR = getQReg128(mm);
11400 IRTemp res = newTempV128();
11401 assign(res,
11402 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11403 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
11404 binop(Iop_AndV128, argL, argR),
11405 mkV128(0x0000))));
11406 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11407 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11408 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11409 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11410 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11411 return True;
11412 }
11413
11414 if (opcode == BITS5(1,0,0,1,0)) {
11415 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11416 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11417 if (bitQ == 0 && size == X11) return False; // implied 1d case
11418 Bool isMLS = bitU == 1;
11419 IROp opMUL = mkVecMUL(size);
11420 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11421 IRTemp res = newTempV128();
11422 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11423 assign(res, binop(opADDSUB,
11424 getQReg128(dd),
11425 binop(opMUL, getQReg128(nn), getQReg128(mm))));
11426 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11427 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11428 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11429 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11430 return True;
11431 }
11432 return False;
11433 }
11434
11435 if (opcode == BITS5(1,0,0,1,1)) {
11436 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11437 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11438 if (bitQ == 0 && size == X11) return False; // implied 1d case
11439 Bool isPMUL = bitU == 1;
11440 const IROp opsPMUL[4]
11441 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
11442 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11443 IRTemp res = newTempV128();
11444 if (opMUL != Iop_INVALID) {
11445 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
11446 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11447 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11448 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11449 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11450 return True;
11451 }
11452 return False;
11453 }
11454
11455 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11456 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11457 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11458 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11459 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11460 if (size == X11) return False;
11461 Bool isU = bitU == 1;
11462 Bool isMAX = opcode == BITS5(1,0,1,0,0);
11463 IRTemp vN = newTempV128();
11464 IRTemp vM = newTempV128();
11465 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11466 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11467 assign(vN, getQReg128(nn));
11468 assign(vM, getQReg128(mm));
11469 IRTemp res128 = newTempV128();
11470 assign(res128,
11471 binop(op,
11472 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11473 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11474 /* In the half-width case, use CatEL32x4 to extract the half-width
11475 result from the full-width result. */
11476 IRExpr* res
11477 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11478 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11479 mkexpr(res128)))
11480 : mkexpr(res128);
11481 putQReg128(dd, res);
11482 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11483 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11484 : (isU ? "uminp" : "sminp");
11485 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11486 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11487 return True;
11488 }
11489
11490 if (opcode == BITS5(1,0,1,1,0)) {
11491 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11492 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11493 if (size == X00 || size == X11) return False;
11494 Bool isR = bitU == 1;
11495 IRTemp res, sat1q, sat1n, vN, vM;
11496 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11497 newTempsV128_2(&vN, &vM);
11498 assign(vN, getQReg128(nn));
11499 assign(vM, getQReg128(mm));
11500 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11501 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11502 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11503 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11504 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11505 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11506 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11507 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11508 return True;
11509 }
11510
11511 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11512 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11513 if (bitQ == 0 && size == X11) return False; // implied 1d case
11514 IRTemp vN = newTempV128();
11515 IRTemp vM = newTempV128();
11516 assign(vN, getQReg128(nn));
11517 assign(vM, getQReg128(mm));
11518 IRTemp res128 = newTempV128();
11519 assign(res128,
11520 binop(mkVecADD(size),
11521 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11522 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11523 /* In the half-width case, use CatEL32x4 to extract the half-width
11524 result from the full-width result. */
11525 IRExpr* res
11526 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11527 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11528 mkexpr(res128)))
11529 : mkexpr(res128);
11530 putQReg128(dd, res);
11531 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11532 DIP("addp %s.%s, %s.%s, %s.%s\n",
11533 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11534 return True;
11535 }
11536
11537 if (bitU == 0
11538 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11539 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11540 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11541 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11542 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11543 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11544 Bool isD = (size & 1) == 1;
11545 if (bitQ == 0 && isD) return False; // implied 1d case
11546 Bool isMIN = (size & 2) == 2;
11547 Bool isNM = opcode == BITS5(1,1,0,0,0);
11548 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11549 IRTemp res = newTempV128();
11550 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11551 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11552 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11553 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11554 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11555 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11556 return True;
11557 }
11558
11559 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11560 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11561 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11562 Bool isD = (size & 1) == 1;
11563 Bool isSUB = (size & 2) == 2;
11564 if (bitQ == 0 && isD) return False; // implied 1d case
11565 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11566 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11567 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11568 IRTemp rm = mk_get_IR_rounding_mode();
11569 IRTemp t1 = newTempV128();
11570 IRTemp t2 = newTempV128();
11571 // FIXME: double rounding; use FMA primops instead
11572 assign(t1, triop(opMUL,
11573 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11574 assign(t2, triop(isSUB ? opSUB : opADD,
11575 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11576 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11577 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11578 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11579 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11580 return True;
11581 }
11582
11583 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11584 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11585 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11586 Bool isD = (size & 1) == 1;
11587 Bool isSUB = (size & 2) == 2;
11588 if (bitQ == 0 && isD) return False; // implied 1d case
11589 const IROp ops[4]
11590 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11591 IROp op = ops[size];
11592 IRTemp rm = mk_get_IR_rounding_mode();
11593 IRTemp t1 = newTempV128();
11594 IRTemp t2 = newTempV128();
11595 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11596 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11597 putQReg128(dd, mkexpr(t2));
11598 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11599 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11600 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11601 return True;
11602 }
11603
11604 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11605 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11606 Bool isD = (size & 1) == 1;
11607 if (bitQ == 0 && isD) return False; // implied 1d case
11608 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11609 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11610 IRTemp rm = mk_get_IR_rounding_mode();
11611 IRTemp t1 = newTempV128();
11612 IRTemp t2 = newTempV128();
11613 // FIXME: use Abd primop instead?
11614 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11615 assign(t2, unop(opABS, mkexpr(t1)));
11616 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11617 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11618 DIP("fabd %s.%s, %s.%s, %s.%s\n",
11619 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11620 return True;
11621 }
11622
11623 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11624 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11625 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11626 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11627 Bool isD = (size & 1) == 1;
11628 Bool isMULX = bitU == 0;
11629 if (bitQ == 0 && isD) return False; // implied 1d case
11630 IRTemp rm = mk_get_IR_rounding_mode();
11631 IRTemp t1 = newTempV128();
11632 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11633 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11634 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11635 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11636 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
11637 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11638 return True;
11639 }
11640
11641 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11642 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11643 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11644 Bool isD = (size & 1) == 1;
11645 if (bitQ == 0 && isD) return False; // implied 1d case
11646 Bool isGE = bitU == 1;
11647 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11648 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
11649 IRTemp t1 = newTempV128();
11650 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11651 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
11652 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11653 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11654 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11655 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11656 return True;
11657 }
11658
11659 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11660 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11661 Bool isD = (size & 1) == 1;
11662 if (bitQ == 0 && isD) return False; // implied 1d case
11663 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11664 IRTemp t1 = newTempV128();
11665 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
11666 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11667 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11668 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11669 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11670 return True;
11671 }
11672
11673 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11674 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11675 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11676 Bool isD = (size & 1) == 1;
11677 Bool isGT = (size & 2) == 2;
11678 if (bitQ == 0 && isD) return False; // implied 1d case
11679 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11680 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11681 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11682 IRTemp t1 = newTempV128();
11683 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11684 unop(opABS, getQReg128(nn)))); // swapd
11685 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
11686 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11687 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11688 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11689 return True;
11690 }
11691
11692 if (bitU == 1
11693 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11694 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11695 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11696 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11697 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11698 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11699 Bool isD = (size & 1) == 1;
11700 if (bitQ == 0 && isD) return False; // implied 1d case
11701 Bool isMIN = (size & 2) == 2;
11702 Bool isNM = opcode == BITS5(1,1,0,0,0);
11703 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11704 IRTemp srcN = newTempV128();
11705 IRTemp srcM = newTempV128();
11706 IRTemp preL = IRTemp_INVALID;
11707 IRTemp preR = IRTemp_INVALID;
11708 assign(srcN, getQReg128(nn));
11709 assign(srcM, getQReg128(mm));
11710 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11711 srcM, srcN, isD, bitQ);
11712 putQReg128(
11713 dd, math_MAYBE_ZERO_HI64_fromE(
11714 bitQ,
11715 binop(opMXX, mkexpr(preL), mkexpr(preR))));
11716 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11717 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11718 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11719 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11720 return True;
11721 }
11722
11723 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11724 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11725 Bool isD = size == X01;
11726 if (bitQ == 0 && isD) return False; // implied 1d case
11727 IRTemp srcN = newTempV128();
11728 IRTemp srcM = newTempV128();
11729 IRTemp preL = IRTemp_INVALID;
11730 IRTemp preR = IRTemp_INVALID;
11731 assign(srcN, getQReg128(nn));
11732 assign(srcM, getQReg128(mm));
11733 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11734 srcM, srcN, isD, bitQ);
11735 putQReg128(
11736 dd, math_MAYBE_ZERO_HI64_fromE(
11737 bitQ,
11738 triop(mkVecADDF(isD ? 3 : 2),
11739 mkexpr(mk_get_IR_rounding_mode()),
11740 mkexpr(preL), mkexpr(preR))));
11741 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11742 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11743 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11744 return True;
11745 }
11746
11747 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11748 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11749 Bool isD = (size & 1) == 1;
11750 if (bitQ == 0 && isD) return False; // implied 1d case
11751 vassert(size <= 1);
11752 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11753 IROp op = ops[size];
11754 IRTemp rm = mk_get_IR_rounding_mode();
11755 IRTemp t1 = newTempV128();
11756 IRTemp t2 = newTempV128();
11757 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11758 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
11759 putQReg128(dd, mkexpr(t2));
11760 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11761 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11762 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11763 return True;
11764 }
11765
11766 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11767 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11768 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11769 Bool isSQRT = (size & 2) == 2;
11770 Bool isD = (size & 1) == 1;
11771 if (bitQ == 0 && isD) return False; // implied 1d case
11772 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11773 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11774 IRTemp res = newTempV128();
11775 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11776 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11777 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11778 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11779 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11780 return True;
11781 }
11782
11783 return False;
11784 # undef INSN
11785 }
11786
11787
11788 static
dis_AdvSIMD_two_reg_misc(DisResult * dres,UInt insn)11789 Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11790 {
11791 /* 31 30 29 28 23 21 16 11 9 4
11792 0 Q U 01110 size 10000 opcode 10 n d
11793 Decode fields: U,size,opcode
11794 */
11795 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11796 if (INSN(31,31) != 0
11797 || INSN(28,24) != BITS5(0,1,1,1,0)
11798 || INSN(21,17) != BITS5(1,0,0,0,0)
11799 || INSN(11,10) != BITS2(1,0)) {
11800 return False;
11801 }
11802 UInt bitQ = INSN(30,30);
11803 UInt bitU = INSN(29,29);
11804 UInt size = INSN(23,22);
11805 UInt opcode = INSN(16,12);
11806 UInt nn = INSN(9,5);
11807 UInt dd = INSN(4,0);
11808 vassert(size < 4);
11809
11810 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11811 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11812 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11813 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11814 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11815 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11816 vassert(size <= 2);
11817 IRTemp res = newTempV128();
11818 assign(res, unop(iops[size], getQReg128(nn)));
11819 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11820 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11821 DIP("%s %s.%s, %s.%s\n", "rev64",
11822 nameQReg128(dd), arr, nameQReg128(nn), arr);
11823 return True;
11824 }
11825
11826 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11827 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11828 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11829 Bool isH = size == X01;
11830 IRTemp res = newTempV128();
11831 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11832 assign(res, unop(iop, getQReg128(nn)));
11833 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11834 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11835 DIP("%s %s.%s, %s.%s\n", "rev32",
11836 nameQReg128(dd), arr, nameQReg128(nn), arr);
11837 return True;
11838 }
11839
11840 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11841 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
11842 IRTemp res = newTempV128();
11843 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11844 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11845 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11846 DIP("%s %s.%s, %s.%s\n", "rev16",
11847 nameQReg128(dd), arr, nameQReg128(nn), arr);
11848 return True;
11849 }
11850
11851 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11852 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11853 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11854 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11855 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11856 /* Widens, and size refers to the narrow size. */
11857 if (size == X11) return False; // no 1d or 2d cases
11858 Bool isU = bitU == 1;
11859 Bool isACC = opcode == BITS5(0,0,1,1,0);
11860 IRTemp src = newTempV128();
11861 IRTemp sum = newTempV128();
11862 IRTemp res = newTempV128();
11863 assign(src, getQReg128(nn));
11864 assign(sum,
11865 binop(mkVecADD(size+1),
11866 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11867 isU, True/*fromOdd*/, size, mkexpr(src))),
11868 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11869 isU, False/*!fromOdd*/, size, mkexpr(src)))));
11870 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11871 : mkexpr(sum));
11872 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11873 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11874 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
11875 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11876 : (isU ? "uaddlp" : "saddlp"),
11877 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11878 return True;
11879 }
11880
11881 if (opcode == BITS5(0,0,0,1,1)) {
11882 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11883 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11884 if (bitQ == 0 && size == X11) return False; // implied 1d case
11885 Bool isUSQADD = bitU == 1;
11886 /* This is switched (in the US vs SU sense) deliberately.
11887 SUQADD corresponds to the ExtUSsatSS variants and
11888 USQADD corresponds to the ExtSUsatUU variants.
11889 See libvex_ir for more details. */
11890 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11891 : mkVecQADDEXTUSSATSS(size);
11892 IROp nop = mkVecADD(size);
11893 IRTemp argL = newTempV128();
11894 IRTemp argR = newTempV128();
11895 IRTemp qres = newTempV128();
11896 IRTemp nres = newTempV128();
11897 /* Because the two arguments to the addition are implicitly
11898 extended differently (one signedly, the other unsignedly) it is
11899 important to present them to the primop in the correct order. */
11900 assign(argL, getQReg128(nn));
11901 assign(argR, getQReg128(dd));
11902 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11903 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11904 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11905 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11906 putQReg128(dd, mkexpr(qres));
11907 updateQCFLAGwithDifference(qres, nres);
11908 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11909 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11910 nameQReg128(dd), arr, nameQReg128(nn), arr);
11911 return True;
11912 }
11913
11914 if (opcode == BITS5(0,0,1,0,0)) {
11915 /* -------- 0,xx,00100: CLS std6_std6 -------- */
11916 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11917 if (size == X11) return False; // no 1d or 2d cases
11918 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11919 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
11920 Bool isCLZ = bitU == 1;
11921 IRTemp res = newTempV128();
11922 vassert(size <= 2);
11923 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
11924 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11925 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11926 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11927 nameQReg128(dd), arr, nameQReg128(nn), arr);
11928 return True;
11929 }
11930
11931 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
11932 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
11933 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
11934 IRTemp res = newTempV128();
11935 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
11936 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11937 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11938 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
11939 nameQReg128(dd), arr, nameQReg128(nn), arr);
11940 return True;
11941 }
11942
11943 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11944 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
11945 IRTemp res = newTempV128();
11946 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
11947 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11948 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11949 DIP("%s %s.%s, %s.%s\n", "rbit",
11950 nameQReg128(dd), arr, nameQReg128(nn), arr);
11951 return True;
11952 }
11953
11954 if (opcode == BITS5(0,0,1,1,1)) {
11955 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
11956 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
11957 if (bitQ == 0 && size == X11) return False; // implied 1d case
11958 Bool isNEG = bitU == 1;
11959 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11960 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11961 getQReg128(nn), size );
11962 IRTemp qres = newTempV128(), nres = newTempV128();
11963 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11964 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
11965 putQReg128(dd, mkexpr(qres));
11966 updateQCFLAGwithDifference(qres, nres);
11967 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11968 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
11969 nameQReg128(dd), arr, nameQReg128(nn), arr);
11970 return True;
11971 }
11972
11973 if (opcode == BITS5(0,1,0,0,0)) {
11974 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
11975 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
11976 if (bitQ == 0 && size == X11) return False; // implied 1d case
11977 Bool isGT = bitU == 0;
11978 IRExpr* argL = getQReg128(nn);
11979 IRExpr* argR = mkV128(0x0000);
11980 IRTemp res = newTempV128();
11981 IROp opGTS = mkVecCMPGTS(size);
11982 assign(res, isGT ? binop(opGTS, argL, argR)
11983 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
11984 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11985 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11986 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
11987 nameQReg128(dd), arr, nameQReg128(nn), arr);
11988 return True;
11989 }
11990
11991 if (opcode == BITS5(0,1,0,0,1)) {
11992 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
11993 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
11994 if (bitQ == 0 && size == X11) return False; // implied 1d case
11995 Bool isEQ = bitU == 0;
11996 IRExpr* argL = getQReg128(nn);
11997 IRExpr* argR = mkV128(0x0000);
11998 IRTemp res = newTempV128();
11999 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
12000 : unop(Iop_NotV128,
12001 binop(mkVecCMPGTS(size), argL, argR)));
12002 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12003 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12004 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12005 nameQReg128(dd), arr, nameQReg128(nn), arr);
12006 return True;
12007 }
12008
12009 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12010 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12011 if (bitQ == 0 && size == X11) return False; // implied 1d case
12012 IRExpr* argL = getQReg128(nn);
12013 IRExpr* argR = mkV128(0x0000);
12014 IRTemp res = newTempV128();
12015 assign(res, binop(mkVecCMPGTS(size), argR, argL));
12016 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12017 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12018 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12019 nameQReg128(dd), arr, nameQReg128(nn), arr);
12020 return True;
12021 }
12022
12023 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12024 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12025 if (bitQ == 0 && size == X11) return False; // implied 1d case
12026 IRTemp res = newTempV128();
12027 assign(res, unop(mkVecABS(size), getQReg128(nn)));
12028 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12029 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12030 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12031 return True;
12032 }
12033
12034 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12035 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12036 if (bitQ == 0 && size == X11) return False; // implied 1d case
12037 IRTemp res = newTempV128();
12038 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
12039 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12040 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12041 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12042 return True;
12043 }
12044
12045 UInt ix = 0; /*INVALID*/
12046 if (size >= X10) {
12047 switch (opcode) {
12048 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12049 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12050 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12051 default: break;
12052 }
12053 }
12054 if (ix > 0) {
12055 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12056 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12057 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12058 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12059 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12060 if (bitQ == 0 && size == X11) return False; // implied 1d case
12061 Bool isD = size == X11;
12062 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12063 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12064 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12065 IROp opCmp = Iop_INVALID;
12066 Bool swap = False;
12067 const HChar* nm = "??";
12068 switch (ix) {
12069 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12070 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12071 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12072 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12073 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12074 default: vassert(0);
12075 }
12076 IRExpr* zero = mkV128(0x0000);
12077 IRTemp res = newTempV128();
12078 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12079 : binop(opCmp, getQReg128(nn), zero));
12080 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12081 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12082 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12083 nameQReg128(dd), arr, nameQReg128(nn), arr);
12084 return True;
12085 }
12086
12087 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12088 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12089 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12090 if (bitQ == 0 && size == X11) return False; // implied 1d case
12091 Bool isFNEG = bitU == 1;
12092 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12093 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
12094 IRTemp res = newTempV128();
12095 assign(res, unop(op, getQReg128(nn)));
12096 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12097 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12098 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12099 nameQReg128(dd), arr, nameQReg128(nn), arr);
12100 return True;
12101 }
12102
12103 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12104 /* -------- 0,xx,10010: XTN{,2} -------- */
12105 if (size == X11) return False;
12106 vassert(size < 3);
12107 Bool is2 = bitQ == 1;
12108 IROp opN = mkVecNARROWUN(size);
12109 IRTemp resN = newTempV128();
12110 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12111 putLO64andZUorPutHI64(is2, dd, resN);
12112 const HChar* nm = "xtn";
12113 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12114 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12115 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12116 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12117 return True;
12118 }
12119
12120 if (opcode == BITS5(1,0,1,0,0)
12121 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12122 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12123 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12124 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12125 if (size == X11) return False;
12126 vassert(size < 3);
12127 Bool is2 = bitQ == 1;
12128 IROp opN = Iop_INVALID;
12129 Bool zWiden = True;
12130 const HChar* nm = "??";
12131 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12132 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
12133 }
12134 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12135 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
12136 }
12137 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12138 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12139 }
12140 else vassert(0);
12141 IRTemp src = newTempV128();
12142 assign(src, getQReg128(nn));
12143 IRTemp resN = newTempV128();
12144 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12145 putLO64andZUorPutHI64(is2, dd, resN);
12146 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12147 size, mkexpr(resN));
12148 updateQCFLAGwithDifference(src, resW);
12149 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12150 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12151 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12152 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12153 return True;
12154 }
12155
12156 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12157 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12158 /* Widens, and size is the narrow size. */
12159 if (size == X11) return False;
12160 Bool is2 = bitQ == 1;
12161 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12162 IROp opSHL = mkVecSHLN(size+1);
12163 IRTemp src = newTempV128();
12164 IRTemp res = newTempV128();
12165 assign(src, getQReg128(nn));
12166 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12167 mkU8(8 << size)));
12168 putQReg128(dd, mkexpr(res));
12169 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12170 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12171 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
12172 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12173 return True;
12174 }
12175
12176 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12177 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12178 UInt nLanes = size == X00 ? 4 : 2;
12179 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12180 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12181 IRTemp rm = mk_get_IR_rounding_mode();
12182 IRTemp src[nLanes];
12183 for (UInt i = 0; i < nLanes; i++) {
12184 src[i] = newTemp(srcTy);
12185 assign(src[i], getQRegLane(nn, i, srcTy));
12186 }
12187 for (UInt i = 0; i < nLanes; i++) {
12188 putQRegLane(dd, nLanes * bitQ + i,
12189 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12190 }
12191 if (bitQ == 0) {
12192 putQRegLane(dd, 1, mkU64(0));
12193 }
12194 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12195 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12196 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12197 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12198 return True;
12199 }
12200
12201 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12202 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12203 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12204 odd" but I don't know what that really means. */
12205 IRType srcTy = Ity_F64;
12206 IROp opCvt = Iop_F64toF32;
12207 IRTemp src[2];
12208 for (UInt i = 0; i < 2; i++) {
12209 src[i] = newTemp(srcTy);
12210 assign(src[i], getQRegLane(nn, i, srcTy));
12211 }
12212 for (UInt i = 0; i < 2; i++) {
12213 putQRegLane(dd, 2 * bitQ + i,
12214 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12215 }
12216 if (bitQ == 0) {
12217 putQRegLane(dd, 1, mkU64(0));
12218 }
12219 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12220 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12221 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12222 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12223 return True;
12224 }
12225
12226 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
12227 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
12228 UInt nLanes = size == X00 ? 4 : 2;
12229 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12230 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12231 IRTemp src[nLanes];
12232 for (UInt i = 0; i < nLanes; i++) {
12233 src[i] = newTemp(srcTy);
12234 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12235 }
12236 for (UInt i = 0; i < nLanes; i++) {
12237 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12238 }
12239 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12240 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12241 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12242 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12243 return True;
12244 }
12245
12246 ix = 0;
12247 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12248 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12249 // = 1 + bitU[0]:size[1]:opcode[0]
12250 vassert(ix >= 1 && ix <= 8);
12251 if (ix == 7) ix = 0;
12252 }
12253 if (ix > 0) {
12254 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12255 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12256 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12257 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12258 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12259 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12260 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12261 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12262 /* rm plan:
12263 FRINTN: tieeven -- !! FIXME KLUDGED !!
12264 FRINTM: -inf
12265 FRINTP: +inf
12266 FRINTZ: zero
12267 FRINTA: tieaway -- !! FIXME KLUDGED !!
12268 FRINTX: per FPCR + "exact = TRUE"
12269 FRINTI: per FPCR
12270 */
12271 Bool isD = (size & 1) == 1;
12272 if (bitQ == 0 && isD) return False; // implied 1d case
12273
12274 IRTemp irrmRM = mk_get_IR_rounding_mode();
12275
12276 UChar ch = '?';
12277 IRTemp irrm = newTemp(Ity_I32);
12278 switch (ix) {
12279 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12280 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12281 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12282 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12283 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12284 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12285 // I am unsure about the following, due to the "integral exact"
12286 // description in the manual. What does it mean? (frintx, that is)
12287 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12288 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12289 default: vassert(0);
12290 }
12291
12292 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12293 if (isD) {
12294 for (UInt i = 0; i < 2; i++) {
12295 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12296 getQRegLane(nn, i, Ity_F64)));
12297 }
12298 } else {
12299 UInt n = bitQ==1 ? 4 : 2;
12300 for (UInt i = 0; i < n; i++) {
12301 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12302 getQRegLane(nn, i, Ity_F32)));
12303 }
12304 if (bitQ == 0)
12305 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12306 }
12307 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12308 DIP("frint%c %s.%s, %s.%s\n", ch,
12309 nameQReg128(dd), arr, nameQReg128(nn), arr);
12310 return True;
12311 }
12312
12313 ix = 0; /*INVALID*/
12314 switch (opcode) {
12315 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12316 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12317 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12318 default: break;
12319 }
12320 if (ix > 0) {
12321 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12322 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12323 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12324 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12325 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12326 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12327 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12328 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12329 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12330 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12331 Bool isD = (size & 1) == 1;
12332 if (bitQ == 0 && isD) return False; // implied 1d case
12333
12334 IRRoundingMode irrm = 8; /*impossible*/
12335 HChar ch = '?';
12336 switch (ix) {
12337 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12338 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12339 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12340 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12341 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12342 default: vassert(0);
12343 }
12344 IROp cvt = Iop_INVALID;
12345 if (bitU == 1) {
12346 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12347 } else {
12348 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12349 }
12350 if (isD) {
12351 for (UInt i = 0; i < 2; i++) {
12352 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12353 getQRegLane(nn, i, Ity_F64)));
12354 }
12355 } else {
12356 UInt n = bitQ==1 ? 4 : 2;
12357 for (UInt i = 0; i < n; i++) {
12358 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12359 getQRegLane(nn, i, Ity_F32)));
12360 }
12361 if (bitQ == 0)
12362 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12363 }
12364 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12365 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12366 nameQReg128(dd), arr, nameQReg128(nn), arr);
12367 return True;
12368 }
12369
12370 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12371 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12372 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12373 Bool isREC = bitU == 0;
12374 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12375 IRTemp res = newTempV128();
12376 assign(res, unop(op, getQReg128(nn)));
12377 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12378 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12379 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12380 DIP("%s %s.%s, %s.%s\n", nm,
12381 nameQReg128(dd), arr, nameQReg128(nn), arr);
12382 return True;
12383 }
12384
12385 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12386 /* -------- 0,0x,11101: SCVTF -------- */
12387 /* -------- 1,0x,11101: UCVTF -------- */
12388 /* 31 28 22 21 15 9 4
12389 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12390 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12391 with laneage:
12392 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12393 */
12394 Bool isQ = bitQ == 1;
12395 Bool isU = bitU == 1;
12396 Bool isF64 = (size & 1) == 1;
12397 if (isQ || !isF64) {
12398 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12399 UInt nLanes = 0;
12400 Bool zeroHI = False;
12401 const HChar* arrSpec = NULL;
12402 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12403 isQ, isF64 );
12404 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12405 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12406 IRTemp rm = mk_get_IR_rounding_mode();
12407 UInt i;
12408 vassert(ok); /* the 'if' above should ensure this */
12409 for (i = 0; i < nLanes; i++) {
12410 putQRegLane(dd, i,
12411 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12412 }
12413 if (zeroHI) {
12414 putQRegLane(dd, 1, mkU64(0));
12415 }
12416 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12417 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12418 return True;
12419 }
12420 /* else fall through */
12421 }
12422
12423 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12424 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12425 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12426 Bool isSQRT = bitU == 1;
12427 Bool isD = (size & 1) == 1;
12428 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12429 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12430 if (bitQ == 0 && isD) return False; // implied 1d case
12431 IRTemp resV = newTempV128();
12432 assign(resV, unop(op, getQReg128(nn)));
12433 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12434 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12435 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12436 nameQReg128(dd), arr, nameQReg128(nn), arr);
12437 return True;
12438 }
12439
12440 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12441 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12442 Bool isD = (size & 1) == 1;
12443 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12444 if (bitQ == 0 && isD) return False; // implied 1d case
12445 IRTemp resV = newTempV128();
12446 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12447 getQReg128(nn)));
12448 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12449 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12450 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12451 nameQReg128(dd), arr, nameQReg128(nn), arr);
12452 return True;
12453 }
12454
12455 return False;
12456 # undef INSN
12457 }
12458
12459
12460 static
dis_AdvSIMD_vector_x_indexed_elem(DisResult * dres,UInt insn)12461 Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12462 {
12463 /* 31 28 23 21 20 19 15 11 9 4
12464 0 Q U 01111 size L M m opcode H 0 n d
12465 Decode fields are: u,size,opcode
12466 M is really part of the mm register number. Individual
12467 cases need to inspect L and H though.
12468 */
12469 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12470 if (INSN(31,31) != 0
12471 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
12472 return False;
12473 }
12474 UInt bitQ = INSN(30,30);
12475 UInt bitU = INSN(29,29);
12476 UInt size = INSN(23,22);
12477 UInt bitL = INSN(21,21);
12478 UInt bitM = INSN(20,20);
12479 UInt mmLO4 = INSN(19,16);
12480 UInt opcode = INSN(15,12);
12481 UInt bitH = INSN(11,11);
12482 UInt nn = INSN(9,5);
12483 UInt dd = INSN(4,0);
12484 vassert(size < 4);
12485 vassert(bitH < 2 && bitM < 2 && bitL < 2);
12486
12487 if (bitU == 0 && size >= X10
12488 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12489 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12490 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12491 if (bitQ == 0 && size == X11) return False; // implied 1d case
12492 Bool isD = (size & 1) == 1;
12493 Bool isSUB = opcode == BITS4(0,1,0,1);
12494 UInt index;
12495 if (!isD) index = (bitH << 1) | bitL;
12496 else if (isD && bitL == 0) index = bitH;
12497 else return False; // sz:L == x11 => unallocated encoding
12498 vassert(index < (isD ? 2 : 4));
12499 IRType ity = isD ? Ity_F64 : Ity_F32;
12500 IRTemp elem = newTemp(ity);
12501 UInt mm = (bitM << 4) | mmLO4;
12502 assign(elem, getQRegLane(mm, index, ity));
12503 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12504 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12505 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12506 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12507 IRTemp rm = mk_get_IR_rounding_mode();
12508 IRTemp t1 = newTempV128();
12509 IRTemp t2 = newTempV128();
12510 // FIXME: double rounding; use FMA primops instead
12511 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12512 assign(t2, triop(isSUB ? opSUB : opADD,
12513 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12514 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12515 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12516 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12517 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12518 isD ? 'd' : 's', index);
12519 return True;
12520 }
12521
12522 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12523 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12524 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12525 if (bitQ == 0 && size == X11) return False; // implied 1d case
12526 Bool isD = (size & 1) == 1;
12527 Bool isMULX = bitU == 1;
12528 UInt index;
12529 if (!isD) index = (bitH << 1) | bitL;
12530 else if (isD && bitL == 0) index = bitH;
12531 else return False; // sz:L == x11 => unallocated encoding
12532 vassert(index < (isD ? 2 : 4));
12533 IRType ity = isD ? Ity_F64 : Ity_F32;
12534 IRTemp elem = newTemp(ity);
12535 UInt mm = (bitM << 4) | mmLO4;
12536 assign(elem, getQRegLane(mm, index, ity));
12537 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12538 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
12539 IRTemp res = newTempV128();
12540 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12541 mkexpr(mk_get_IR_rounding_mode()),
12542 getQReg128(nn), mkexpr(dupd)));
12543 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12544 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12545 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12546 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
12547 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12548 return True;
12549 }
12550
12551 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12552 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12553 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12554 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12555 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12556 Bool isMLA = opcode == BITS4(0,0,0,0);
12557 Bool isMLS = opcode == BITS4(0,1,0,0);
12558 UInt mm = 32; // invalid
12559 UInt ix = 16; // invalid
12560 switch (size) {
12561 case X00:
12562 return False; // b case is not allowed
12563 case X01:
12564 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12565 case X10:
12566 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12567 case X11:
12568 return False; // d case is not allowed
12569 default:
12570 vassert(0);
12571 }
12572 vassert(mm < 32 && ix < 16);
12573 IROp opMUL = mkVecMUL(size);
12574 IROp opADD = mkVecADD(size);
12575 IROp opSUB = mkVecSUB(size);
12576 HChar ch = size == X01 ? 'h' : 's';
12577 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12578 IRTemp vecD = newTempV128();
12579 IRTemp vecN = newTempV128();
12580 IRTemp res = newTempV128();
12581 assign(vecD, getQReg128(dd));
12582 assign(vecN, getQReg128(nn));
12583 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12584 if (isMLA || isMLS) {
12585 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12586 } else {
12587 assign(res, prod);
12588 }
12589 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12590 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12591 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12592 : (isMLS ? "mls" : "mul"),
12593 nameQReg128(dd), arr,
12594 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12595 return True;
12596 }
12597
12598 if (opcode == BITS4(1,0,1,0)
12599 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12600 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12601 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12602 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12603 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12604 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12605 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12606 /* Widens, and size refers to the narrowed lanes. */
12607 UInt ks = 3;
12608 switch (opcode) {
12609 case BITS4(1,0,1,0): ks = 0; break;
12610 case BITS4(0,0,1,0): ks = 1; break;
12611 case BITS4(0,1,1,0): ks = 2; break;
12612 default: vassert(0);
12613 }
12614 vassert(ks >= 0 && ks <= 2);
12615 Bool isU = bitU == 1;
12616 Bool is2 = bitQ == 1;
12617 UInt mm = 32; // invalid
12618 UInt ix = 16; // invalid
12619 switch (size) {
12620 case X00:
12621 return False; // h_b_b[] case is not allowed
12622 case X01:
12623 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12624 case X10:
12625 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12626 case X11:
12627 return False; // q_d_d[] case is not allowed
12628 default:
12629 vassert(0);
12630 }
12631 vassert(mm < 32 && ix < 16);
12632 IRTemp vecN = newTempV128();
12633 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12634 IRTemp vecD = newTempV128();
12635 assign(vecN, getQReg128(nn));
12636 assign(vecD, getQReg128(dd));
12637 IRTemp res = IRTemp_INVALID;
12638 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12639 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12640 putQReg128(dd, mkexpr(res));
12641 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12642 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12643 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12644 HChar ch = size == X01 ? 'h' : 's';
12645 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12646 isU ? 'u' : 's', nm, is2 ? "2" : "",
12647 nameQReg128(dd), arrWide,
12648 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12649 return True;
12650 }
12651
12652 if (bitU == 0
12653 && (opcode == BITS4(1,0,1,1)
12654 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12655 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12656 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12657 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12658 /* Widens, and size refers to the narrowed lanes. */
12659 UInt ks = 3;
12660 switch (opcode) {
12661 case BITS4(1,0,1,1): ks = 0; break;
12662 case BITS4(0,0,1,1): ks = 1; break;
12663 case BITS4(0,1,1,1): ks = 2; break;
12664 default: vassert(0);
12665 }
12666 vassert(ks >= 0 && ks <= 2);
12667 Bool is2 = bitQ == 1;
12668 UInt mm = 32; // invalid
12669 UInt ix = 16; // invalid
12670 switch (size) {
12671 case X00:
12672 return False; // h_b_b[] case is not allowed
12673 case X01:
12674 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12675 case X10:
12676 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12677 case X11:
12678 return False; // q_d_d[] case is not allowed
12679 default:
12680 vassert(0);
12681 }
12682 vassert(mm < 32 && ix < 16);
12683 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12684 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12685 newTempsV128_2(&vecN, &vecD);
12686 assign(vecN, getQReg128(nn));
12687 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12688 assign(vecD, getQReg128(dd));
12689 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12690 is2, size, "mas"[ks],
12691 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12692 putQReg128(dd, mkexpr(res));
12693 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12694 updateQCFLAGwithDifference(sat1q, sat1n);
12695 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12696 updateQCFLAGwithDifference(sat2q, sat2n);
12697 }
12698 const HChar* nm = ks == 0 ? "sqdmull"
12699 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12700 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12701 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12702 HChar ch = size == X01 ? 'h' : 's';
12703 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12704 nm, is2 ? "2" : "",
12705 nameQReg128(dd), arrWide,
12706 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12707 return True;
12708 }
12709
12710 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12711 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12712 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12713 UInt mm = 32; // invalid
12714 UInt ix = 16; // invalid
12715 switch (size) {
12716 case X00:
12717 return False; // b case is not allowed
12718 case X01:
12719 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12720 case X10:
12721 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12722 case X11:
12723 return False; // q case is not allowed
12724 default:
12725 vassert(0);
12726 }
12727 vassert(mm < 32 && ix < 16);
12728 Bool isR = opcode == BITS4(1,1,0,1);
12729 IRTemp res, sat1q, sat1n, vN, vM;
12730 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12731 vN = newTempV128();
12732 assign(vN, getQReg128(nn));
12733 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12734 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12735 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12736 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12737 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12738 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12739 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12740 HChar ch = size == X01 ? 'h' : 's';
12741 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12742 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12743 return True;
12744 }
12745
12746 return False;
12747 # undef INSN
12748 }
12749
12750
12751 static
dis_AdvSIMD_crypto_aes(DisResult * dres,UInt insn)12752 Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12753 {
12754 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12755 return False;
12756 # undef INSN
12757 }
12758
12759
12760 static
dis_AdvSIMD_crypto_three_reg_sha(DisResult * dres,UInt insn)12761 Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12762 {
12763 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12764 return False;
12765 # undef INSN
12766 }
12767
12768
12769 static
dis_AdvSIMD_crypto_two_reg_sha(DisResult * dres,UInt insn)12770 Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12771 {
12772 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12773 return False;
12774 # undef INSN
12775 }
12776
12777
12778 static
dis_AdvSIMD_fp_compare(DisResult * dres,UInt insn)12779 Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12780 {
12781 /* 31 28 23 21 20 15 13 9 4
12782 000 11110 ty 1 m op 1000 n opcode2
12783 The first 3 bits are really "M 0 S", but M and S are always zero.
12784 Decode fields are: ty,op,opcode2
12785 */
12786 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12787 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12788 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
12789 return False;
12790 }
12791 UInt ty = INSN(23,22);
12792 UInt mm = INSN(20,16);
12793 UInt op = INSN(15,14);
12794 UInt nn = INSN(9,5);
12795 UInt opcode2 = INSN(4,0);
12796 vassert(ty < 4);
12797
12798 if (ty <= X01 && op == X00
12799 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
12800 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
12801 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
12802 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
12803 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
12804 /* 31 23 20 15 9 4
12805 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
12806 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
12807 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
12808 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
12809
12810 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
12811 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
12812 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
12813 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
12814
12815 FCMPE generates Invalid Operation exn if either arg is any kind
12816 of NaN. FCMP generates Invalid Operation exn if either arg is a
12817 signalling NaN. We ignore this detail here and produce the same
12818 IR for both.
12819 */
12820 Bool isD = (ty & 1) == 1;
12821 Bool isCMPE = (opcode2 & 16) == 16;
12822 Bool cmpZero = (opcode2 & 8) == 8;
12823 IRType ity = isD ? Ity_F64 : Ity_F32;
12824 Bool valid = True;
12825 if (cmpZero && mm != 0) valid = False;
12826 if (valid) {
12827 IRTemp argL = newTemp(ity);
12828 IRTemp argR = newTemp(ity);
12829 IRTemp irRes = newTemp(Ity_I32);
12830 assign(argL, getQRegLO(nn, ity));
12831 assign(argR,
12832 cmpZero
12833 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
12834 : getQRegLO(mm, ity));
12835 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12836 mkexpr(argL), mkexpr(argR)));
12837 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12838 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12839 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
12840 setFlags_COPY(nzcv_28x0);
12841 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
12842 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
12843 return True;
12844 }
12845 return False;
12846 }
12847
12848 return False;
12849 # undef INSN
12850 }
12851
12852
12853 static
dis_AdvSIMD_fp_conditional_compare(DisResult * dres,UInt insn)12854 Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12855 {
12856 /* 31 28 23 21 20 15 11 9 4 3
12857 000 11110 ty 1 m cond 01 n op nzcv
12858 The first 3 bits are really "M 0 S", but M and S are always zero.
12859 Decode fields are: ty,op
12860 */
12861 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12862 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12863 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
12864 return False;
12865 }
12866 UInt ty = INSN(23,22);
12867 UInt mm = INSN(20,16);
12868 UInt cond = INSN(15,12);
12869 UInt nn = INSN(9,5);
12870 UInt op = INSN(4,4);
12871 UInt nzcv = INSN(3,0);
12872 vassert(ty < 4 && op <= 1);
12873
12874 if (ty <= BITS2(0,1)) {
12875 /* -------- 00,0 FCCMP s_s -------- */
12876 /* -------- 00,1 FCCMPE s_s -------- */
12877 /* -------- 01,0 FCCMP d_d -------- */
12878 /* -------- 01,1 FCCMPE d_d -------- */
12879
12880 /* FCCMPE generates Invalid Operation exn if either arg is any kind
12881 of NaN. FCCMP generates Invalid Operation exn if either arg is a
12882 signalling NaN. We ignore this detail here and produce the same
12883 IR for both.
12884 */
12885 Bool isD = (ty & 1) == 1;
12886 Bool isCMPE = op == 1;
12887 IRType ity = isD ? Ity_F64 : Ity_F32;
12888 IRTemp argL = newTemp(ity);
12889 IRTemp argR = newTemp(ity);
12890 IRTemp irRes = newTemp(Ity_I32);
12891 assign(argL, getQRegLO(nn, ity));
12892 assign(argR, getQRegLO(mm, ity));
12893 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12894 mkexpr(argL), mkexpr(argR)));
12895 IRTemp condT = newTemp(Ity_I1);
12896 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
12897 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12898
12899 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
12900 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
12901
12902 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
12903
12904 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12905 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
12906 mkexpr(nzcvT_28x0), nzcvF_28x0));
12907 setFlags_COPY(nzcv_28x0);
12908 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
12909 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
12910 return True;
12911 }
12912
12913 return False;
12914 # undef INSN
12915 }
12916
12917
12918 static
dis_AdvSIMD_fp_conditional_select(DisResult * dres,UInt insn)12919 Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
12920 {
12921 /* 31 23 21 20 15 11 9 5
12922 000 11110 ty 1 m cond 11 n d
12923 The first 3 bits are really "M 0 S", but M and S are always zero.
12924 Decode fields: ty
12925 */
12926 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12927 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12928 || INSN(11,10) != BITS2(1,1)) {
12929 return False;
12930 }
12931 UInt ty = INSN(23,22);
12932 UInt mm = INSN(20,16);
12933 UInt cond = INSN(15,12);
12934 UInt nn = INSN(9,5);
12935 UInt dd = INSN(4,0);
12936 if (ty <= X01) {
12937 /* -------- 00: FCSEL s_s -------- */
12938 /* -------- 00: FCSEL d_d -------- */
12939 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12940 IRTemp srcT = newTemp(ity);
12941 IRTemp srcF = newTemp(ity);
12942 IRTemp res = newTemp(ity);
12943 assign(srcT, getQRegLO(nn, ity));
12944 assign(srcF, getQRegLO(mm, ity));
12945 assign(res, IRExpr_ITE(
12946 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12947 mkexpr(srcT), mkexpr(srcF)));
12948 putQReg128(dd, mkV128(0x0000));
12949 putQRegLO(dd, mkexpr(res));
12950 DIP("fcsel %s, %s, %s, %s\n",
12951 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12952 nameCC(cond));
12953 return True;
12954 }
12955 return False;
12956 # undef INSN
12957 }
12958
12959
12960 static
dis_AdvSIMD_fp_data_proc_1_source(DisResult * dres,UInt insn)12961 Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12962 {
12963 /* 31 28 23 21 20 14 9 4
12964 000 11110 ty 1 opcode 10000 n d
12965 The first 3 bits are really "M 0 S", but M and S are always zero.
12966 Decode fields: ty,opcode
12967 */
12968 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12969 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12970 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
12971 return False;
12972 }
12973 UInt ty = INSN(23,22);
12974 UInt opcode = INSN(20,15);
12975 UInt nn = INSN(9,5);
12976 UInt dd = INSN(4,0);
12977
12978 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
12979 /* -------- 0x,000000: FMOV d_d, s_s -------- */
12980 /* -------- 0x,000001: FABS d_d, s_s -------- */
12981 /* -------- 0x,000010: FNEG d_d, s_s -------- */
12982 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
12983 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12984 IRTemp src = newTemp(ity);
12985 IRTemp res = newTemp(ity);
12986 const HChar* nm = "??";
12987 assign(src, getQRegLO(nn, ity));
12988 switch (opcode) {
12989 case BITS6(0,0,0,0,0,0):
12990 nm = "fmov"; assign(res, mkexpr(src)); break;
12991 case BITS6(0,0,0,0,0,1):
12992 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
12993 case BITS6(0,0,0,0,1,0):
12994 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
12995 case BITS6(0,0,0,0,1,1):
12996 nm = "fsqrt";
12997 assign(res, binop(mkSQRTF(ity),
12998 mkexpr(mk_get_IR_rounding_mode()),
12999 mkexpr(src))); break;
13000 default:
13001 vassert(0);
13002 }
13003 putQReg128(dd, mkV128(0x0000));
13004 putQRegLO(dd, mkexpr(res));
13005 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13006 return True;
13007 }
13008
13009 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13010 || opcode == BITS6(0,0,0,1,0,1)))
13011 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13012 || opcode == BITS6(0,0,0,1,0,1)))
13013 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13014 || opcode == BITS6(0,0,0,1,0,0)))) {
13015 /* -------- 11,000100: FCVT s_h -------- */
13016 /* -------- 11,000101: FCVT d_h -------- */
13017 /* -------- 00,000111: FCVT h_s -------- */
13018 /* -------- 00,000101: FCVT d_s -------- */
13019 /* -------- 01,000111: FCVT h_d -------- */
13020 /* -------- 01,000100: FCVT s_d -------- */
13021 /* 31 23 21 16 14 9 4
13022 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13023 --------- 11 ----- 01 --------- FCVT Dd, Hn
13024 --------- 00 ----- 11 --------- FCVT Hd, Sn
13025 --------- 00 ----- 01 --------- FCVT Dd, Sn
13026 --------- 01 ----- 11 --------- FCVT Hd, Dn
13027 --------- 01 ----- 00 --------- FCVT Sd, Dn
13028 Rounding, when dst is smaller than src, is per the FPCR.
13029 */
13030 UInt b2322 = ty;
13031 UInt b1615 = opcode & BITS2(1,1);
13032 switch ((b2322 << 2) | b1615) {
13033 case BITS4(0,0,0,1): // S -> D
13034 case BITS4(1,1,0,1): { // H -> D
13035 Bool srcIsH = b2322 == BITS2(1,1);
13036 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
13037 IRTemp res = newTemp(Ity_F64);
13038 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13039 getQRegLO(nn, srcTy)));
13040 putQReg128(dd, mkV128(0x0000));
13041 putQRegLO(dd, mkexpr(res));
13042 DIP("fcvt %s, %s\n",
13043 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13044 return True;
13045 }
13046 case BITS4(0,1,0,0): // D -> S
13047 case BITS4(0,1,1,1): { // D -> H
13048 Bool dstIsH = b1615 == BITS2(1,1);
13049 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
13050 IRTemp res = newTemp(dstTy);
13051 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13052 mkexpr(mk_get_IR_rounding_mode()),
13053 getQRegLO(nn, Ity_F64)));
13054 putQReg128(dd, mkV128(0x0000));
13055 putQRegLO(dd, mkexpr(res));
13056 DIP("fcvt %s, %s\n",
13057 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13058 return True;
13059 }
13060 case BITS4(0,0,1,1): // S -> H
13061 case BITS4(1,1,0,0): { // H -> S
13062 Bool toH = b1615 == BITS2(1,1);
13063 IRType srcTy = toH ? Ity_F32 : Ity_F16;
13064 IRType dstTy = toH ? Ity_F16 : Ity_F32;
13065 IRTemp res = newTemp(dstTy);
13066 if (toH) {
13067 assign(res, binop(Iop_F32toF16,
13068 mkexpr(mk_get_IR_rounding_mode()),
13069 getQRegLO(nn, srcTy)));
13070
13071 } else {
13072 assign(res, unop(Iop_F16toF32,
13073 getQRegLO(nn, srcTy)));
13074 }
13075 putQReg128(dd, mkV128(0x0000));
13076 putQRegLO(dd, mkexpr(res));
13077 DIP("fcvt %s, %s\n",
13078 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13079 return True;
13080 }
13081 default:
13082 break;
13083 }
13084 /* else unhandled */
13085 return False;
13086 }
13087
13088 if (ty <= X01
13089 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13090 && opcode != BITS6(0,0,1,1,0,1)) {
13091 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13092 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13093 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13094 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13095 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13096 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13097 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13098 /* 31 23 21 17 14 9 4
13099 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13100 rm
13101 x==0 => S-registers, x==1 => D-registers
13102 rm (17:15) encodings:
13103 111 per FPCR (FRINTI)
13104 001 +inf (FRINTP)
13105 010 -inf (FRINTM)
13106 011 zero (FRINTZ)
13107 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
13108 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
13109 110 per FPCR + "exact = TRUE" (FRINTX)
13110 101 unallocated
13111 */
13112 Bool isD = (ty & 1) == 1;
13113 UInt rm = opcode & BITS6(0,0,0,1,1,1);
13114 IRType ity = isD ? Ity_F64 : Ity_F32;
13115 IRExpr* irrmE = NULL;
13116 UChar ch = '?';
13117 switch (rm) {
13118 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13119 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13120 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13121 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13122 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
13123 // I am unsure about the following, due to the "integral exact"
13124 // description in the manual. What does it mean? (frintx, that is)
13125 case BITS3(1,1,0):
13126 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13127 case BITS3(1,1,1):
13128 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
13129 // The following is a kludge. There's no Irrm_ value to represent
13130 // this ("to nearest, with ties to even")
13131 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
13132 default: break;
13133 }
13134 if (irrmE) {
13135 IRTemp src = newTemp(ity);
13136 IRTemp dst = newTemp(ity);
13137 assign(src, getQRegLO(nn, ity));
13138 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13139 irrmE, mkexpr(src)));
13140 putQReg128(dd, mkV128(0x0000));
13141 putQRegLO(dd, mkexpr(dst));
13142 DIP("frint%c %s, %s\n",
13143 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13144 return True;
13145 }
13146 return False;
13147 }
13148
13149 return False;
13150 # undef INSN
13151 }
13152
13153
13154 static
dis_AdvSIMD_fp_data_proc_2_source(DisResult * dres,UInt insn)13155 Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13156 {
13157 /* 31 28 23 21 20 15 11 9 4
13158 000 11110 ty 1 m opcode 10 n d
13159 The first 3 bits are really "M 0 S", but M and S are always zero.
13160 Decode fields: ty, opcode
13161 */
13162 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13163 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13164 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13165 return False;
13166 }
13167 UInt ty = INSN(23,22);
13168 UInt mm = INSN(20,16);
13169 UInt opcode = INSN(15,12);
13170 UInt nn = INSN(9,5);
13171 UInt dd = INSN(4,0);
13172
13173 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
13174 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13175 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13176 /* ------- 0x,0010: FADD d_d, s_s ------- */
13177 /* ------- 0x,0011: FSUB d_d, s_s ------- */
13178 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13179 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13180 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13181 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
13182 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13183 IROp iop = Iop_INVALID;
13184 const HChar* nm = "???";
13185 switch (opcode) {
13186 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13187 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13188 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13189 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13190 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13191 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13192 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13193 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
13194 default: vassert(0);
13195 }
13196 if (opcode <= BITS4(0,0,1,1)) {
13197 // This is really not good code. TODO: avoid width-changing
13198 IRTemp res = newTemp(ity);
13199 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13200 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13201 putQReg128(dd, mkV128(0));
13202 putQRegLO(dd, mkexpr(res));
13203 } else {
13204 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13205 binop(iop, getQReg128(nn), getQReg128(mm))));
13206 }
13207 DIP("%s %s, %s, %s\n",
13208 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13209 return True;
13210 }
13211
13212 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13213 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13214 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13215 IROp iop = mkMULF(ity);
13216 IROp iopn = mkNEGF(ity);
13217 const HChar* nm = "fnmul";
13218 IRExpr* resE = unop(iopn,
13219 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13220 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13221 IRTemp res = newTemp(ity);
13222 assign(res, resE);
13223 putQReg128(dd, mkV128(0));
13224 putQRegLO(dd, mkexpr(res));
13225 DIP("%s %s, %s, %s\n",
13226 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13227 return True;
13228 }
13229
13230 return False;
13231 # undef INSN
13232 }
13233
13234
13235 static
dis_AdvSIMD_fp_data_proc_3_source(DisResult * dres,UInt insn)13236 Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13237 {
13238 /* 31 28 23 21 20 15 14 9 4
13239 000 11111 ty o1 m o0 a n d
13240 The first 3 bits are really "M 0 S", but M and S are always zero.
13241 Decode fields: ty,o1,o0
13242 */
13243 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13244 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13245 return False;
13246 }
13247 UInt ty = INSN(23,22);
13248 UInt bitO1 = INSN(21,21);
13249 UInt mm = INSN(20,16);
13250 UInt bitO0 = INSN(15,15);
13251 UInt aa = INSN(14,10);
13252 UInt nn = INSN(9,5);
13253 UInt dd = INSN(4,0);
13254 vassert(ty < 4);
13255
13256 if (ty <= X01) {
13257 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13258 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13259 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13260 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13261 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13262 /* 31 22 20 15 14 9 4 ix
13263 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13264 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13265 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13266 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13267 where Fx=Dx when sz=1, Fx=Sx when sz=0
13268
13269 -----SPEC------ ----IMPL----
13270 fmadd a + n * m a + n * m
13271 fmsub a + (-n) * m a - n * m
13272 fnmadd (-a) + (-n) * m -(a + n * m)
13273 fnmsub (-a) + n * m -(a - n * m)
13274 */
13275 Bool isD = (ty & 1) == 1;
13276 UInt ix = (bitO1 << 1) | bitO0;
13277 IRType ity = isD ? Ity_F64 : Ity_F32;
13278 IROp opADD = mkADDF(ity);
13279 IROp opSUB = mkSUBF(ity);
13280 IROp opMUL = mkMULF(ity);
13281 IROp opNEG = mkNEGF(ity);
13282 IRTemp res = newTemp(ity);
13283 IRExpr* eA = getQRegLO(aa, ity);
13284 IRExpr* eN = getQRegLO(nn, ity);
13285 IRExpr* eM = getQRegLO(mm, ity);
13286 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13287 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13288 switch (ix) {
13289 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13290 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13291 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13292 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13293 default: vassert(0);
13294 }
13295 putQReg128(dd, mkV128(0x0000));
13296 putQRegLO(dd, mkexpr(res));
13297 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13298 DIP("%s %s, %s, %s, %s\n",
13299 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13300 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13301 return True;
13302 }
13303
13304 return False;
13305 # undef INSN
13306 }
13307
13308
13309 static
dis_AdvSIMD_fp_immediate(DisResult * dres,UInt insn)13310 Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13311 {
13312 /* 31 28 23 21 20 12 9 4
13313 000 11110 ty 1 imm8 100 imm5 d
13314 The first 3 bits are really "M 0 S", but M and S are always zero.
13315 */
13316 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13317 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13318 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13319 return False;
13320 }
13321 UInt ty = INSN(23,22);
13322 UInt imm8 = INSN(20,13);
13323 UInt imm5 = INSN(9,5);
13324 UInt dd = INSN(4,0);
13325
13326 /* ------- 00,00000: FMOV s_imm ------- */
13327 /* ------- 01,00000: FMOV d_imm ------- */
13328 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13329 Bool isD = (ty & 1) == 1;
13330 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13331 if (!isD) {
13332 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13333 }
13334 putQReg128(dd, mkV128(0));
13335 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13336 DIP("fmov %s, #0x%llx\n",
13337 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13338 return True;
13339 }
13340
13341 return False;
13342 # undef INSN
13343 }
13344
13345
13346 static
dis_AdvSIMD_fp_to_from_fixedp_conv(DisResult * dres,UInt insn)13347 Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13348 {
13349 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13350 /* 31 30 29 28 23 21 20 18 15 9 4
13351 sf 0 0 11110 type 0 rmode opcode scale n d
13352 The first 3 bits are really "sf 0 S", but S is always zero.
13353 Decode fields: sf,type,rmode,opcode
13354 */
13355 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13356 if (INSN(30,29) != BITS2(0,0)
13357 || INSN(28,24) != BITS5(1,1,1,1,0)
13358 || INSN(21,21) != 0) {
13359 return False;
13360 }
13361 UInt bitSF = INSN(31,31);
13362 UInt ty = INSN(23,22); // type
13363 UInt rm = INSN(20,19); // rmode
13364 UInt op = INSN(18,16); // opcode
13365 UInt sc = INSN(15,10); // scale
13366 UInt nn = INSN(9,5);
13367 UInt dd = INSN(4,0);
13368
13369 if (ty <= X01 && rm == X11
13370 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
13371 /* -------- (ix) sf ty rm opc -------- */
13372 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
13373 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
13374 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
13375 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
13376
13377 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
13378 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
13379 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
13380 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
13381 Bool isI64 = bitSF == 1;
13382 Bool isF64 = (ty & 1) == 1;
13383 Bool isU = (op & 1) == 1;
13384 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13385
13386 Int fbits = 64 - sc;
13387 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13388
13389 Double scale = two_to_the_plus(fbits);
13390 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13391 : IRExpr_Const(IRConst_F32( (Float)scale ));
13392 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13393
13394 const IROp ops[8]
13395 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
13396 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
13397 IRTemp irrm = newTemp(Ity_I32);
13398 assign(irrm, mkU32(Irrm_ZERO));
13399
13400 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
13401 IRExpr* res = binop(ops[ix], mkexpr(irrm),
13402 triop(opMUL, mkexpr(irrm), src, scaleE));
13403 putIRegOrZR(isI64, dd, res);
13404
13405 DIP("fcvtz%c %s, %s, #%d\n",
13406 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
13407 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
13408 return True;
13409 }
13410
13411 /* ------ sf,ty,rm,opc ------ */
13412 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
13413 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
13414 /* (ix) sf S 28 ty rm opc 15 9 4
13415 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
13416 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
13417 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
13418 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
13419
13420 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
13421 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
13422 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
13423 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
13424
13425 These are signed/unsigned conversion from integer registers to
13426 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13427 scaled per |scale|.
13428 */
13429 if (ty <= X01 && rm == X00
13430 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13431 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13432 Bool isI64 = bitSF == 1;
13433 Bool isF64 = (ty & 1) == 1;
13434 Bool isU = (op & 1) == 1;
13435 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13436
13437 Int fbits = 64 - sc;
13438 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13439
13440 Double scale = two_to_the_minus(fbits);
13441 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13442 : IRExpr_Const(IRConst_F32( (Float)scale ));
13443 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13444
13445 const IROp ops[8]
13446 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13447 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13448 IRExpr* src = getIRegOrZR(isI64, nn);
13449 IRExpr* res = (isF64 && !isI64)
13450 ? unop(ops[ix], src)
13451 : binop(ops[ix],
13452 mkexpr(mk_get_IR_rounding_mode()), src);
13453 putQReg128(dd, mkV128(0));
13454 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13455
13456 DIP("%ccvtf %s, %s, #%d\n",
13457 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13458 nameIRegOrZR(isI64, nn), fbits);
13459 return True;
13460 }
13461
13462 return False;
13463 # undef INSN
13464 }
13465
13466
13467 static
dis_AdvSIMD_fp_to_from_int_conv(DisResult * dres,UInt insn)13468 Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
13469 {
13470 /* 31 30 29 28 23 21 20 18 15 9 4
13471 sf 0 0 11110 type 1 rmode opcode 000000 n d
13472 The first 3 bits are really "sf 0 S", but S is always zero.
13473 Decode fields: sf,type,rmode,opcode
13474 */
13475 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13476 if (INSN(30,29) != BITS2(0,0)
13477 || INSN(28,24) != BITS5(1,1,1,1,0)
13478 || INSN(21,21) != 1
13479 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13480 return False;
13481 }
13482 UInt bitSF = INSN(31,31);
13483 UInt ty = INSN(23,22); // type
13484 UInt rm = INSN(20,19); // rmode
13485 UInt op = INSN(18,16); // opcode
13486 UInt nn = INSN(9,5);
13487 UInt dd = INSN(4,0);
13488
13489 // op = 000, 001
13490 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
13491 /* 30 23 20 18 15 9 4
13492 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
13493 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
13494 ---------------- 01 -------------- FCVTP-------- (round to +inf)
13495 ---------------- 10 -------------- FCVTM-------- (round to -inf)
13496 ---------------- 11 -------------- FCVTZ-------- (round to zero)
13497 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
13498 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
13499
13500 Rd is Xd when sf==1, Wd when sf==0
13501 Fn is Dn when x==1, Sn when x==0
13502 20:19 carry the rounding mode, using the same encoding as FPCR
13503 */
13504 if (ty <= X01
13505 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13506 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13507 )
13508 ) {
13509 Bool isI64 = bitSF == 1;
13510 Bool isF64 = (ty & 1) == 1;
13511 Bool isU = (op & 1) == 1;
13512 /* Decide on the IR rounding mode to use. */
13513 IRRoundingMode irrm = 8; /*impossible*/
13514 HChar ch = '?';
13515 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13516 switch (rm) {
13517 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13518 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13519 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13520 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13521 default: vassert(0);
13522 }
13523 } else {
13524 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13525 switch (rm) {
13526 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13527 default: vassert(0);
13528 }
13529 }
13530 vassert(irrm != 8);
13531 /* Decide on the conversion primop, based on the source size,
13532 dest size and signedness (8 possibilities). Case coding:
13533 F32 ->s I32 0
13534 F32 ->u I32 1
13535 F32 ->s I64 2
13536 F32 ->u I64 3
13537 F64 ->s I32 4
13538 F64 ->u I32 5
13539 F64 ->s I64 6
13540 F64 ->u I64 7
13541 */
13542 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
13543 vassert(ix < 8);
13544 const IROp iops[8]
13545 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
13546 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
13547 IROp iop = iops[ix];
13548 // A bit of ATCery: bounce all cases we haven't seen an example of.
13549 if (/* F32toI32S */
13550 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
13551 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
13552 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
13553 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
13554 /* F32toI32U */
13555 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
13556 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
13557 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
13558 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
13559 /* F32toI64S */
13560 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
13561 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
13562 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
13563 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
13564 /* F32toI64U */
13565 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
13566 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
13567 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
13568 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
13569 /* F64toI32S */
13570 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
13571 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
13572 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
13573 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
13574 /* F64toI32U */
13575 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
13576 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
13577 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
13578 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
13579 /* F64toI64S */
13580 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
13581 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
13582 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
13583 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
13584 /* F64toI64U */
13585 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
13586 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
13587 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
13588 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
13589 ) {
13590 /* validated */
13591 } else {
13592 return False;
13593 }
13594 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
13595 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
13596 IRTemp src = newTemp(srcTy);
13597 IRTemp dst = newTemp(dstTy);
13598 assign(src, getQRegLO(nn, srcTy));
13599 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
13600 putIRegOrZR(isI64, dd, mkexpr(dst));
13601 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
13602 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
13603 return True;
13604 }
13605
13606 // op = 010, 011
13607 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
13608 /* (ix) sf S 28 ty rm op 15 9 4
13609 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
13610 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
13611 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
13612 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
13613
13614 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
13615 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
13616 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
13617 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
13618
13619 These are signed/unsigned conversion from integer registers to
13620 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
13621 */
13622 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
13623 Bool isI64 = bitSF == 1;
13624 Bool isF64 = (ty & 1) == 1;
13625 Bool isU = (op & 1) == 1;
13626 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13627 const IROp ops[8]
13628 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13629 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13630 IRExpr* src = getIRegOrZR(isI64, nn);
13631 IRExpr* res = (isF64 && !isI64)
13632 ? unop(ops[ix], src)
13633 : binop(ops[ix],
13634 mkexpr(mk_get_IR_rounding_mode()), src);
13635 putQReg128(dd, mkV128(0));
13636 putQRegLO(dd, res);
13637 DIP("%ccvtf %s, %s\n",
13638 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13639 nameIRegOrZR(isI64, nn));
13640 return True;
13641 }
13642
13643 // op = 110, 111
13644 /* -------- FMOV (general) -------- */
13645 /* case sf S ty rm op 15 9 4
13646 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
13647 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
13648 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
13649
13650 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
13651 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
13652 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
13653 */
13654 if (1) {
13655 UInt ix = 0; // case
13656 if (bitSF == 0) {
13657 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13658 ix = 1;
13659 else
13660 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13661 ix = 4;
13662 } else {
13663 vassert(bitSF == 1);
13664 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13665 ix = 2;
13666 else
13667 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13668 ix = 5;
13669 else
13670 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
13671 ix = 3;
13672 else
13673 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
13674 ix = 6;
13675 }
13676 if (ix > 0) {
13677 switch (ix) {
13678 case 1:
13679 putQReg128(dd, mkV128(0));
13680 putQRegLO(dd, getIReg32orZR(nn));
13681 DIP("fmov s%u, w%u\n", dd, nn);
13682 break;
13683 case 2:
13684 putQReg128(dd, mkV128(0));
13685 putQRegLO(dd, getIReg64orZR(nn));
13686 DIP("fmov d%u, x%u\n", dd, nn);
13687 break;
13688 case 3:
13689 putQRegHI64(dd, getIReg64orZR(nn));
13690 DIP("fmov v%u.d[1], x%u\n", dd, nn);
13691 break;
13692 case 4:
13693 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
13694 DIP("fmov w%u, s%u\n", dd, nn);
13695 break;
13696 case 5:
13697 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
13698 DIP("fmov x%u, d%u\n", dd, nn);
13699 break;
13700 case 6:
13701 putIReg64orZR(dd, getQRegHI64(nn));
13702 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
13703 break;
13704 default:
13705 vassert(0);
13706 }
13707 return True;
13708 }
13709 /* undecodable; fall through */
13710 }
13711
13712 return False;
13713 # undef INSN
13714 }
13715
13716
13717 static
dis_ARM64_simd_and_fp(DisResult * dres,UInt insn)13718 Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
13719 {
13720 Bool ok;
13721 ok = dis_AdvSIMD_EXT(dres, insn);
13722 if (UNLIKELY(ok)) return True;
13723 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
13724 if (UNLIKELY(ok)) return True;
13725 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
13726 if (UNLIKELY(ok)) return True;
13727 ok = dis_AdvSIMD_across_lanes(dres, insn);
13728 if (UNLIKELY(ok)) return True;
13729 ok = dis_AdvSIMD_copy(dres, insn);
13730 if (UNLIKELY(ok)) return True;
13731 ok = dis_AdvSIMD_modified_immediate(dres, insn);
13732 if (UNLIKELY(ok)) return True;
13733 ok = dis_AdvSIMD_scalar_copy(dres, insn);
13734 if (UNLIKELY(ok)) return True;
13735 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
13736 if (UNLIKELY(ok)) return True;
13737 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
13738 if (UNLIKELY(ok)) return True;
13739 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
13740 if (UNLIKELY(ok)) return True;
13741 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
13742 if (UNLIKELY(ok)) return True;
13743 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
13744 if (UNLIKELY(ok)) return True;
13745 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
13746 if (UNLIKELY(ok)) return True;
13747 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
13748 if (UNLIKELY(ok)) return True;
13749 ok = dis_AdvSIMD_three_different(dres, insn);
13750 if (UNLIKELY(ok)) return True;
13751 ok = dis_AdvSIMD_three_same(dres, insn);
13752 if (UNLIKELY(ok)) return True;
13753 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
13754 if (UNLIKELY(ok)) return True;
13755 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
13756 if (UNLIKELY(ok)) return True;
13757 ok = dis_AdvSIMD_crypto_aes(dres, insn);
13758 if (UNLIKELY(ok)) return True;
13759 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
13760 if (UNLIKELY(ok)) return True;
13761 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
13762 if (UNLIKELY(ok)) return True;
13763 ok = dis_AdvSIMD_fp_compare(dres, insn);
13764 if (UNLIKELY(ok)) return True;
13765 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
13766 if (UNLIKELY(ok)) return True;
13767 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
13768 if (UNLIKELY(ok)) return True;
13769 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
13770 if (UNLIKELY(ok)) return True;
13771 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
13772 if (UNLIKELY(ok)) return True;
13773 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
13774 if (UNLIKELY(ok)) return True;
13775 ok = dis_AdvSIMD_fp_immediate(dres, insn);
13776 if (UNLIKELY(ok)) return True;
13777 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
13778 if (UNLIKELY(ok)) return True;
13779 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
13780 if (UNLIKELY(ok)) return True;
13781 return False;
13782 }
13783
13784
13785 /*------------------------------------------------------------*/
13786 /*--- Disassemble a single ARM64 instruction ---*/
13787 /*------------------------------------------------------------*/
13788
13789 /* Disassemble a single ARM64 instruction into IR. The instruction
13790 has is located at |guest_instr| and has guest IP of
13791 |guest_PC_curr_instr|, which will have been set before the call
13792 here. Returns True iff the instruction was decoded, in which case
13793 *dres will be set accordingly, or False, in which case *dres should
13794 be ignored by the caller. */
13795
13796 static
disInstr_ARM64_WRK(DisResult * dres,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_instr,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo)13797 Bool disInstr_ARM64_WRK (
13798 /*MB_OUT*/DisResult* dres,
13799 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
13800 Bool resteerCisOk,
13801 void* callback_opaque,
13802 const UChar* guest_instr,
13803 const VexArchInfo* archinfo,
13804 const VexAbiInfo* abiinfo
13805 )
13806 {
13807 // A macro to fish bits out of 'insn'.
13808 # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13809
13810 //ZZ DisResult dres;
13811 //ZZ UInt insn;
13812 //ZZ //Bool allow_VFP = False;
13813 //ZZ //UInt hwcaps = archinfo->hwcaps;
13814 //ZZ IRTemp condT; /* :: Ity_I32 */
13815 //ZZ UInt summary;
13816 //ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
13817 //ZZ
13818 //ZZ /* What insn variants are we supporting today? */
13819 //ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
13820 //ZZ // etc etc
13821
13822 /* Set result defaults. */
13823 dres->whatNext = Dis_Continue;
13824 dres->len = 4;
13825 dres->continueAt = 0;
13826 dres->jk_StopHere = Ijk_INVALID;
13827
13828 /* At least this is simple on ARM64: insns are all 4 bytes long, and
13829 4-aligned. So just fish the whole thing out of memory right now
13830 and have done. */
13831 UInt insn = getUIntLittleEndianly( guest_instr );
13832
13833 if (0) vex_printf("insn: 0x%x\n", insn);
13834
13835 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
13836
13837 vassert(0 == (guest_PC_curr_instr & 3ULL));
13838
13839 /* ----------------------------------------------------------- */
13840
13841 /* Spot "Special" instructions (see comment at top of file). */
13842 {
13843 const UChar* code = guest_instr;
13844 /* Spot the 16-byte preamble:
13845 93CC0D8C ror x12, x12, #3
13846 93CC358C ror x12, x12, #13
13847 93CCCD8C ror x12, x12, #51
13848 93CCF58C ror x12, x12, #61
13849 */
13850 UInt word1 = 0x93CC0D8C;
13851 UInt word2 = 0x93CC358C;
13852 UInt word3 = 0x93CCCD8C;
13853 UInt word4 = 0x93CCF58C;
13854 if (getUIntLittleEndianly(code+ 0) == word1 &&
13855 getUIntLittleEndianly(code+ 4) == word2 &&
13856 getUIntLittleEndianly(code+ 8) == word3 &&
13857 getUIntLittleEndianly(code+12) == word4) {
13858 /* Got a "Special" instruction preamble. Which one is it? */
13859 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
13860 /* orr x10,x10,x10 */) {
13861 /* X3 = client_request ( X4 ) */
13862 DIP("x3 = client_request ( x4 )\n");
13863 putPC(mkU64( guest_PC_curr_instr + 20 ));
13864 dres->jk_StopHere = Ijk_ClientReq;
13865 dres->whatNext = Dis_StopHere;
13866 return True;
13867 }
13868 else
13869 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
13870 /* orr x11,x11,x11 */) {
13871 /* X3 = guest_NRADDR */
13872 DIP("x3 = guest_NRADDR\n");
13873 dres->len = 20;
13874 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
13875 return True;
13876 }
13877 else
13878 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
13879 /* orr x12,x12,x12 */) {
13880 /* branch-and-link-to-noredir X8 */
13881 DIP("branch-and-link-to-noredir x8\n");
13882 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
13883 putPC(getIReg64orZR(8));
13884 dres->jk_StopHere = Ijk_NoRedir;
13885 dres->whatNext = Dis_StopHere;
13886 return True;
13887 }
13888 else
13889 if (getUIntLittleEndianly(code+16) == 0xAA090129
13890 /* orr x9,x9,x9 */) {
13891 /* IR injection */
13892 DIP("IR injection\n");
13893 vex_inject_ir(irsb, Iend_LE);
13894 // Invalidate the current insn. The reason is that the IRop we're
13895 // injecting here can change. In which case the translation has to
13896 // be redone. For ease of handling, we simply invalidate all the
13897 // time.
13898 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
13899 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
13900 putPC(mkU64( guest_PC_curr_instr + 20 ));
13901 dres->whatNext = Dis_StopHere;
13902 dres->jk_StopHere = Ijk_InvalICache;
13903 return True;
13904 }
13905 /* We don't know what it is. */
13906 return False;
13907 /*NOTREACHED*/
13908 }
13909 }
13910
13911 /* ----------------------------------------------------------- */
13912
13913 /* Main ARM64 instruction decoder starts here. */
13914
13915 Bool ok = False;
13916
13917 /* insn[28:25] determines the top-level grouping, so let's start
13918 off with that.
13919
13920 For all of these dis_ARM64_ functions, we pass *dres with the
13921 normal default results "insn OK, 4 bytes long, keep decoding" so
13922 they don't need to change it. However, decodes of control-flow
13923 insns may cause *dres to change.
13924 */
13925 switch (INSN(28,25)) {
13926 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
13927 // Data processing - immediate
13928 ok = dis_ARM64_data_processing_immediate(dres, insn);
13929 break;
13930 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
13931 // Branch, exception generation and system instructions
13932 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
13933 break;
13934 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
13935 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
13936 // Loads and stores
13937 ok = dis_ARM64_load_store(dres, insn);
13938 break;
13939 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
13940 // Data processing - register
13941 ok = dis_ARM64_data_processing_register(dres, insn);
13942 break;
13943 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
13944 // Data processing - SIMD and floating point
13945 ok = dis_ARM64_simd_and_fp(dres, insn);
13946 break;
13947 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
13948 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
13949 // UNALLOCATED
13950 break;
13951 default:
13952 vassert(0); /* Can't happen */
13953 }
13954
13955 /* If the next-level down decoders failed, make sure |dres| didn't
13956 get changed. */
13957 if (!ok) {
13958 vassert(dres->whatNext == Dis_Continue);
13959 vassert(dres->len == 4);
13960 vassert(dres->continueAt == 0);
13961 vassert(dres->jk_StopHere == Ijk_INVALID);
13962 }
13963
13964 return ok;
13965
13966 # undef INSN
13967 }
13968
13969
13970 /*------------------------------------------------------------*/
13971 /*--- Top-level fn ---*/
13972 /*------------------------------------------------------------*/
13973
13974 /* Disassemble a single instruction into IR. The instruction
13975 is located in host memory at &guest_code[delta]. */
13976
disInstr_ARM64(IRSB * irsb_IN,Bool (* resteerOkFn)(void *,Addr),Bool resteerCisOk,void * callback_opaque,const UChar * guest_code_IN,Long delta_IN,Addr guest_IP,VexArch guest_arch,const VexArchInfo * archinfo,const VexAbiInfo * abiinfo,VexEndness host_endness_IN,Bool sigill_diag_IN)13977 DisResult disInstr_ARM64 ( IRSB* irsb_IN,
13978 Bool (*resteerOkFn) ( void*, Addr ),
13979 Bool resteerCisOk,
13980 void* callback_opaque,
13981 const UChar* guest_code_IN,
13982 Long delta_IN,
13983 Addr guest_IP,
13984 VexArch guest_arch,
13985 const VexArchInfo* archinfo,
13986 const VexAbiInfo* abiinfo,
13987 VexEndness host_endness_IN,
13988 Bool sigill_diag_IN )
13989 {
13990 DisResult dres;
13991 vex_bzero(&dres, sizeof(dres));
13992
13993 /* Set globals (see top of this file) */
13994 vassert(guest_arch == VexArchARM64);
13995
13996 irsb = irsb_IN;
13997 host_endness = host_endness_IN;
13998 guest_PC_curr_instr = (Addr64)guest_IP;
13999
14000 /* Sanity checks */
14001 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14002 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14003 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14004
14005 /* Try to decode */
14006 Bool ok = disInstr_ARM64_WRK( &dres,
14007 resteerOkFn, resteerCisOk, callback_opaque,
14008 &guest_code_IN[delta_IN],
14009 archinfo, abiinfo );
14010 if (ok) {
14011 /* All decode successes end up here. */
14012 vassert(dres.len == 4 || dres.len == 20);
14013 switch (dres.whatNext) {
14014 case Dis_Continue:
14015 putPC( mkU64(dres.len + guest_PC_curr_instr) );
14016 break;
14017 case Dis_ResteerU:
14018 case Dis_ResteerC:
14019 putPC(mkU64(dres.continueAt));
14020 break;
14021 case Dis_StopHere:
14022 break;
14023 default:
14024 vassert(0);
14025 }
14026 DIP("\n");
14027 } else {
14028 /* All decode failures end up here. */
14029 if (sigill_diag_IN) {
14030 Int i, j;
14031 UChar buf[64];
14032 UInt insn
14033 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
14034 vex_bzero(buf, sizeof(buf));
14035 for (i = j = 0; i < 32; i++) {
14036 if (i > 0) {
14037 if ((i & 7) == 0) buf[j++] = ' ';
14038 else if ((i & 3) == 0) buf[j++] = '\'';
14039 }
14040 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14041 }
14042 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14043 vex_printf("disInstr(arm64): %s\n", buf);
14044 }
14045
14046 /* Tell the dispatcher that this insn cannot be decoded, and so
14047 has not been executed, and (is currently) the next to be
14048 executed. PC should be up-to-date since it is made so at the
14049 start of each insn, but nevertheless be paranoid and update
14050 it again right now. */
14051 putPC( mkU64(guest_PC_curr_instr) );
14052 dres.len = 0;
14053 dres.whatNext = Dis_StopHere;
14054 dres.jk_StopHere = Ijk_NoDecode;
14055 dres.continueAt = 0;
14056 }
14057 return dres;
14058 }
14059
14060
14061 /*--------------------------------------------------------------------*/
14062 /*--- end guest_arm64_toIR.c ---*/
14063 /*--------------------------------------------------------------------*/
14064