1
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
10
11 Copyright (C) 2000-2015 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
42
43 #include "mc_include.h"
44
45
46 /* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
55
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
58 */
59
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
64
65 Using Valgrind to detect undefined value errors with
66 bit-precision
67
68 Julian Seward and Nicholas Nethercote
69
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
72
73 ----
74
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
77
78
79 Memcheck complains when an undefined value is used:
80
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
84
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
88
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
93
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
96
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
100
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
103
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
107
108
109 Memcheck also complains, but should not, when an undefined value is used:
110
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
114
115
116 Memcheck does not complain, but should, when an undefined value is used:
117
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124
125
126 In practice, 1 and 2 account for the vast majority of cases.
127 */
128
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
133
134 /*------------------------------------------------------------*/
135 /*--- Forward decls ---*/
136 /*------------------------------------------------------------*/
137
138 struct _MCEnv;
139
140 static IRType shadowTypeV ( IRType ty );
141 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
142 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
143
144 static IRExpr *i128_const_zero(void);
145
146 /*------------------------------------------------------------*/
147 /*--- Memcheck running state, and tmp management. ---*/
148 /*------------------------------------------------------------*/
149
150 /* Carries info about a particular tmp. The tmp's number is not
151 recorded, as this is implied by (equal to) its index in the tmpMap
152 in MCEnv. The tmp's type is also not recorded, as this is present
153 in MCEnv.sb->tyenv.
154
155 When .kind is Orig, .shadowV and .shadowB may give the identities
156 of the temps currently holding the associated definedness (shadowV)
157 and origin (shadowB) values, or these may be IRTemp_INVALID if code
158 to compute such values has not yet been emitted.
159
160 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
161 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
162 illogical for a shadow tmp itself to be shadowed.
163 */
164 typedef
165 enum { Orig=1, VSh=2, BSh=3 }
166 TempKind;
167
168 typedef
169 struct {
170 TempKind kind;
171 IRTemp shadowV;
172 IRTemp shadowB;
173 }
174 TempMapEnt;
175
176
177 /* Carries around state during memcheck instrumentation. */
178 typedef
179 struct _MCEnv {
180 /* MODIFIED: the superblock being constructed. IRStmts are
181 added. */
182 IRSB* sb;
183 Bool trace;
184
185 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
186 current kind and possibly shadow temps for each temp in the
187 IRSB being constructed. Note that it does not contain the
188 type of each tmp. If you want to know the type, look at the
189 relevant entry in sb->tyenv. It follows that at all times
190 during the instrumentation process, the valid indices for
191 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
192 total number of Orig, V- and B- temps allocated so far.
193
194 The reason for this strange split (types in one place, all
195 other info in another) is that we need the types to be
196 attached to sb so as to make it possible to do
197 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
198 instrumentation process. */
199 XArray* /* of TempMapEnt */ tmpMap;
200
201 /* MODIFIED: indicates whether "bogus" literals have so far been
202 found. Starts off False, and may change to True. */
203 Bool bogusLiterals;
204
205 /* READONLY: indicates whether we should use expensive
206 interpretations of integer adds, since unfortunately LLVM
207 uses them to do ORs in some circumstances. Defaulted to True
208 on MacOS and False everywhere else. */
209 Bool useLLVMworkarounds;
210
211 /* READONLY: the guest layout. This indicates which parts of
212 the guest state should be regarded as 'always defined'. */
213 const VexGuestLayout* layout;
214
215 /* READONLY: the host word type. Needed for constructing
216 arguments of type 'HWord' to be passed to helper functions.
217 Ity_I32 or Ity_I64 only. */
218 IRType hWordTy;
219 }
220 MCEnv;
221
222 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
223 demand), as they are encountered. This is for two reasons.
224
225 (1) (less important reason): Many original tmps are unused due to
226 initial IR optimisation, and we do not want to spaces in tables
227 tracking them.
228
229 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
230 table indexed [0 .. n_types-1], which gives the current shadow for
231 each original tmp, or INVALID_IRTEMP if none is so far assigned.
232 It is necessary to support making multiple assignments to a shadow
233 -- specifically, after testing a shadow for definedness, it needs
234 to be made defined. But IR's SSA property disallows this.
235
236 (2) (more important reason): Therefore, when a shadow needs to get
237 a new value, a new temporary is created, the value is assigned to
238 that, and the tmpMap is updated to reflect the new binding.
239
240 A corollary is that if the tmpMap maps a given tmp to
241 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
242 there's a read-before-write error in the original tmps. The IR
243 sanity checker should catch all such anomalies, however.
244 */
245
246 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
247 both the table in mce->sb and to our auxiliary mapping. Note that
248 newTemp may cause mce->tmpMap to resize, hence previous results
249 from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)250 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
251 {
252 Word newIx;
253 TempMapEnt ent;
254 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
255 ent.kind = kind;
256 ent.shadowV = IRTemp_INVALID;
257 ent.shadowB = IRTemp_INVALID;
258 newIx = VG_(addToXA)( mce->tmpMap, &ent );
259 tl_assert(newIx == (Word)tmp);
260 return tmp;
261 }
262
263
264 /* Find the tmp currently shadowing the given original tmp. If none
265 so far exists, allocate one. */
findShadowTmpV(MCEnv * mce,IRTemp orig)266 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
267 {
268 TempMapEnt* ent;
269 /* VG_(indexXA) range-checks 'orig', hence no need to check
270 here. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 if (ent->shadowV == IRTemp_INVALID) {
274 IRTemp tmpV
275 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
276 /* newTemp may cause mce->tmpMap to resize, hence previous results
277 from VG_(indexXA) are invalid. */
278 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
279 tl_assert(ent->kind == Orig);
280 tl_assert(ent->shadowV == IRTemp_INVALID);
281 ent->shadowV = tmpV;
282 }
283 return ent->shadowV;
284 }
285
286 /* Allocate a new shadow for the given original tmp. This means any
287 previous shadow is abandoned. This is needed because it is
288 necessary to give a new value to a shadow once it has been tested
289 for undefinedness, but unfortunately IR's SSA property disallows
290 this. Instead we must abandon the old shadow, allocate a new one
291 and use that instead.
292
293 This is the same as findShadowTmpV, except we don't bother to see
294 if a shadow temp already existed -- we simply allocate a new one
295 regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)296 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
297 {
298 TempMapEnt* ent;
299 /* VG_(indexXA) range-checks 'orig', hence no need to check
300 here. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 if (1) {
304 IRTemp tmpV
305 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
306 /* newTemp may cause mce->tmpMap to resize, hence previous results
307 from VG_(indexXA) are invalid. */
308 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
309 tl_assert(ent->kind == Orig);
310 ent->shadowV = tmpV;
311 }
312 }
313
314
315 /*------------------------------------------------------------*/
316 /*--- IRAtoms -- a subset of IRExprs ---*/
317 /*------------------------------------------------------------*/
318
319 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
320 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
321 input, most of this code deals in atoms. Usefully, a value atom
322 always has a V-value which is also an atom: constants are shadowed
323 by constants, and temps are shadowed by the corresponding shadow
324 temporary. */
325
326 typedef IRExpr IRAtom;
327
328 /* (used for sanity checks only): is this an atom which looks
329 like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)330 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
331 {
332 if (a1->tag == Iex_Const)
333 return True;
334 if (a1->tag == Iex_RdTmp) {
335 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
336 return ent->kind == Orig;
337 }
338 return False;
339 }
340
341 /* (used for sanity checks only): is this an atom which looks
342 like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)343 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
344 {
345 if (a1->tag == Iex_Const)
346 return True;
347 if (a1->tag == Iex_RdTmp) {
348 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
349 return ent->kind == VSh || ent->kind == BSh;
350 }
351 return False;
352 }
353
354 /* (used for sanity checks only): check that both args are atoms and
355 are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)356 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
357 {
358 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
359 return True;
360 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
361 return True;
362 return False;
363 }
364
365
366 /*------------------------------------------------------------*/
367 /*--- Type management ---*/
368 /*------------------------------------------------------------*/
369
370 /* Shadow state is always accessed using integer types. This returns
371 an integer type with the same size (as per sizeofIRType) as the
372 given type. The only valid shadow types are Bit, I8, I16, I32,
373 I64, I128, V128, V256. */
374
shadowTypeV(IRType ty)375 static IRType shadowTypeV ( IRType ty )
376 {
377 switch (ty) {
378 case Ity_I1:
379 case Ity_I8:
380 case Ity_I16:
381 case Ity_I32:
382 case Ity_I64:
383 case Ity_I128: return ty;
384 case Ity_F16: return Ity_I16;
385 case Ity_F32: return Ity_I32;
386 case Ity_D32: return Ity_I32;
387 case Ity_F64: return Ity_I64;
388 case Ity_D64: return Ity_I64;
389 case Ity_F128: return Ity_I128;
390 case Ity_D128: return Ity_I128;
391 case Ity_V128: return Ity_V128;
392 case Ity_V256: return Ity_V256;
393 default: ppIRType(ty);
394 VG_(tool_panic)("memcheck:shadowTypeV");
395 }
396 }
397
398 /* Produce a 'defined' value of the given shadow type. Should only be
399 supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)400 static IRExpr* definedOfType ( IRType ty ) {
401 switch (ty) {
402 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
403 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
404 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
405 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
406 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
407 case Ity_I128: return i128_const_zero();
408 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
409 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
410 default: VG_(tool_panic)("memcheck:definedOfType");
411 }
412 }
413
414
415 /*------------------------------------------------------------*/
416 /*--- Constructing IR fragments ---*/
417 /*------------------------------------------------------------*/
418
419 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)420 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
421 if (mce->trace) {
422 VG_(printf)(" %c: ", cat);
423 ppIRStmt(st);
424 VG_(printf)("\n");
425 }
426 addStmtToIRSB(mce->sb, st);
427 }
428
429 /* assign value to tmp */
430 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)431 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
432 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
433 }
434
435 /* build various kinds of expressions */
436 #define triop(_op, _arg1, _arg2, _arg3) \
437 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
438 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
439 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
440 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
441 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
442 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
443 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
444 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
445 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
446 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
447
448 /* Bind the given expression to a new temporary, and return the
449 temporary. This effectively converts an arbitrary expression into
450 an atom.
451
452 'ty' is the type of 'e' and hence the type that the new temporary
453 needs to be. But passing it in is redundant, since we can deduce
454 the type merely by inspecting 'e'. So at least use that fact to
455 assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)456 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
457 {
458 TempKind k;
459 IRTemp t;
460 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
461
462 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
463 switch (cat) {
464 case 'V': k = VSh; break;
465 case 'B': k = BSh; break;
466 case 'C': k = Orig; break;
467 /* happens when we are making up new "orig"
468 expressions, for IRCAS handling */
469 default: tl_assert(0);
470 }
471 t = newTemp(mce, ty, k);
472 assign(cat, mce, t, e);
473 return mkexpr(t);
474 }
475
476
477 /*------------------------------------------------------------*/
478 /*--- Helper functions for 128-bit ops ---*/
479 /*------------------------------------------------------------*/
480
i128_const_zero(void)481 static IRExpr *i128_const_zero(void)
482 {
483 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
484 return binop(Iop_64HLto128, z64, z64);
485 }
486
487 /* There are no I128-bit loads and/or stores [as generated by any
488 current front ends]. So we do not need to worry about that in
489 expr2vbits_Load */
490
491
492 /*------------------------------------------------------------*/
493 /*--- Constructing definedness primitive ops ---*/
494 /*------------------------------------------------------------*/
495
496 /* --------- Defined-if-either-defined --------- */
497
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)498 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
499 tl_assert(isShadowAtom(mce,a1));
500 tl_assert(isShadowAtom(mce,a2));
501 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
502 }
503
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)504 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
505 tl_assert(isShadowAtom(mce,a1));
506 tl_assert(isShadowAtom(mce,a2));
507 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
508 }
509
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)510 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
511 tl_assert(isShadowAtom(mce,a1));
512 tl_assert(isShadowAtom(mce,a2));
513 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
514 }
515
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)516 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
517 tl_assert(isShadowAtom(mce,a1));
518 tl_assert(isShadowAtom(mce,a2));
519 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
520 }
521
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)522 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
523 tl_assert(isShadowAtom(mce,a1));
524 tl_assert(isShadowAtom(mce,a2));
525 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
526 }
527
mkDifDV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)528 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
529 tl_assert(isShadowAtom(mce,a1));
530 tl_assert(isShadowAtom(mce,a2));
531 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
532 }
533
534 /* --------- Undefined-if-either-undefined --------- */
535
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)536 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
537 tl_assert(isShadowAtom(mce,a1));
538 tl_assert(isShadowAtom(mce,a2));
539 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
540 }
541
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)542 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
543 tl_assert(isShadowAtom(mce,a1));
544 tl_assert(isShadowAtom(mce,a2));
545 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
546 }
547
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)548 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
549 tl_assert(isShadowAtom(mce,a1));
550 tl_assert(isShadowAtom(mce,a2));
551 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
552 }
553
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)554 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
555 tl_assert(isShadowAtom(mce,a1));
556 tl_assert(isShadowAtom(mce,a2));
557 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
558 }
559
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)560 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
561 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
562 tl_assert(isShadowAtom(mce,a1));
563 tl_assert(isShadowAtom(mce,a2));
564 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
565 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
566 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
567 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
568 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
569 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
570
571 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
572 }
573
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)574 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
575 tl_assert(isShadowAtom(mce,a1));
576 tl_assert(isShadowAtom(mce,a2));
577 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
578 }
579
mkUifUV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)580 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
581 tl_assert(isShadowAtom(mce,a1));
582 tl_assert(isShadowAtom(mce,a2));
583 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
584 }
585
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)586 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
587 switch (vty) {
588 case Ity_I8: return mkUifU8(mce, a1, a2);
589 case Ity_I16: return mkUifU16(mce, a1, a2);
590 case Ity_I32: return mkUifU32(mce, a1, a2);
591 case Ity_I64: return mkUifU64(mce, a1, a2);
592 case Ity_I128: return mkUifU128(mce, a1, a2);
593 case Ity_V128: return mkUifUV128(mce, a1, a2);
594 case Ity_V256: return mkUifUV256(mce, a1, a2);
595 default:
596 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
597 VG_(tool_panic)("memcheck:mkUifU");
598 }
599 }
600
601 /* --------- The Left-family of operations. --------- */
602
mkLeft8(MCEnv * mce,IRAtom * a1)603 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
604 tl_assert(isShadowAtom(mce,a1));
605 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
606 }
607
mkLeft16(MCEnv * mce,IRAtom * a1)608 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
609 tl_assert(isShadowAtom(mce,a1));
610 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
611 }
612
mkLeft32(MCEnv * mce,IRAtom * a1)613 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
614 tl_assert(isShadowAtom(mce,a1));
615 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
616 }
617
mkLeft64(MCEnv * mce,IRAtom * a1)618 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
619 tl_assert(isShadowAtom(mce,a1));
620 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
621 }
622
623 /* --------- 'Improvement' functions for AND/OR. --------- */
624
625 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
626 defined (0); all other -> undefined (1).
627 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
633 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
634 }
635
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)636 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637 {
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
641 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
642 }
643
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)644 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645 {
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
649 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
650 }
651
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
658 }
659
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)660 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
661 {
662 tl_assert(isOriginalAtom(mce, data));
663 tl_assert(isShadowAtom(mce, vbits));
664 tl_assert(sameKindedAtoms(data, vbits));
665 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
666 }
667
mkImproveANDV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)668 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
669 {
670 tl_assert(isOriginalAtom(mce, data));
671 tl_assert(isShadowAtom(mce, vbits));
672 tl_assert(sameKindedAtoms(data, vbits));
673 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
674 }
675
676 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
677 defined (0); all other -> undefined (1).
678 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)679 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
680 {
681 tl_assert(isOriginalAtom(mce, data));
682 tl_assert(isShadowAtom(mce, vbits));
683 tl_assert(sameKindedAtoms(data, vbits));
684 return assignNew(
685 'V', mce, Ity_I8,
686 binop(Iop_Or8,
687 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
688 vbits) );
689 }
690
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)691 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
692 {
693 tl_assert(isOriginalAtom(mce, data));
694 tl_assert(isShadowAtom(mce, vbits));
695 tl_assert(sameKindedAtoms(data, vbits));
696 return assignNew(
697 'V', mce, Ity_I16,
698 binop(Iop_Or16,
699 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
700 vbits) );
701 }
702
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)703 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
704 {
705 tl_assert(isOriginalAtom(mce, data));
706 tl_assert(isShadowAtom(mce, vbits));
707 tl_assert(sameKindedAtoms(data, vbits));
708 return assignNew(
709 'V', mce, Ity_I32,
710 binop(Iop_Or32,
711 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
712 vbits) );
713 }
714
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)715 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
716 {
717 tl_assert(isOriginalAtom(mce, data));
718 tl_assert(isShadowAtom(mce, vbits));
719 tl_assert(sameKindedAtoms(data, vbits));
720 return assignNew(
721 'V', mce, Ity_I64,
722 binop(Iop_Or64,
723 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
724 vbits) );
725 }
726
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)727 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
728 {
729 tl_assert(isOriginalAtom(mce, data));
730 tl_assert(isShadowAtom(mce, vbits));
731 tl_assert(sameKindedAtoms(data, vbits));
732 return assignNew(
733 'V', mce, Ity_V128,
734 binop(Iop_OrV128,
735 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
736 vbits) );
737 }
738
mkImproveORV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)739 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
740 {
741 tl_assert(isOriginalAtom(mce, data));
742 tl_assert(isShadowAtom(mce, vbits));
743 tl_assert(sameKindedAtoms(data, vbits));
744 return assignNew(
745 'V', mce, Ity_V256,
746 binop(Iop_OrV256,
747 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
748 vbits) );
749 }
750
751 /* --------- Pessimising casts. --------- */
752
753 /* The function returns an expression of type DST_TY. If any of the VBITS
754 is undefined (value == 1) the resulting expression has all bits set to
755 1. Otherwise, all bits are 0. */
756
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)757 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
758 {
759 IRType src_ty;
760 IRAtom* tmp1;
761
762 /* Note, dst_ty is a shadow type, not an original type. */
763 tl_assert(isShadowAtom(mce,vbits));
764 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
765
766 /* Fast-track some common cases */
767 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
768 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
769
770 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
771 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
772
773 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
774 /* PCast the arg, then clone it. */
775 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
776 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
777 }
778
779 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
780 /* PCast the arg, then clone it 4 times. */
781 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
782 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
783 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
784 }
785
786 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
787 /* PCast the arg, then clone it 8 times. */
788 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
789 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
790 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
791 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
792 }
793
794 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
795 /* PCast the arg. This gives all 0s or all 1s. Then throw away
796 the top half. */
797 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
798 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
799 }
800
801 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
802 /* Use InterleaveHI64x2 to copy the top half of the vector into
803 the bottom half. Then we can UifU it with the original, throw
804 away the upper half of the result, and PCast-I64-to-I64
805 the lower half. */
806 // Generates vbits[127:64] : vbits[127:64]
807 IRAtom* hi64hi64
808 = assignNew('V', mce, Ity_V128,
809 binop(Iop_InterleaveHI64x2, vbits, vbits));
810 // Generates
811 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
812 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
813 IRAtom* lohi64
814 = mkUifUV128(mce, hi64hi64, vbits);
815 // Generates UifU(vbits[127:64],vbits[63:0])
816 IRAtom* lo64
817 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
818 // Generates
819 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
820 // == PCast-to-I64( vbits[127:0] )
821 IRAtom* res
822 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
823 return res;
824 }
825
826 /* Else do it the slow way .. */
827 /* First of all, collapse vbits down to a single bit. */
828 tmp1 = NULL;
829 switch (src_ty) {
830 case Ity_I1:
831 tmp1 = vbits;
832 break;
833 case Ity_I8:
834 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
835 break;
836 case Ity_I16:
837 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
838 break;
839 case Ity_I32:
840 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
841 break;
842 case Ity_I64:
843 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
844 break;
845 case Ity_I128: {
846 /* Gah. Chop it in half, OR the halves together, and compare
847 that with zero. */
848 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
849 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
850 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
851 tmp1 = assignNew('V', mce, Ity_I1,
852 unop(Iop_CmpNEZ64, tmp4));
853 break;
854 }
855 default:
856 ppIRType(src_ty);
857 VG_(tool_panic)("mkPCastTo(1)");
858 }
859 tl_assert(tmp1);
860 /* Now widen up to the dst type. */
861 switch (dst_ty) {
862 case Ity_I1:
863 return tmp1;
864 case Ity_I8:
865 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
866 case Ity_I16:
867 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
868 case Ity_I32:
869 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
870 case Ity_I64:
871 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
872 case Ity_V128:
873 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
874 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
875 return tmp1;
876 case Ity_I128:
877 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
878 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
879 return tmp1;
880 case Ity_V256:
881 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
882 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
883 tmp1, tmp1));
884 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
885 tmp1, tmp1));
886 return tmp1;
887 default:
888 ppIRType(dst_ty);
889 VG_(tool_panic)("mkPCastTo(2)");
890 }
891 }
892
893 /* This is a minor variant. It takes an arg of some type and returns
894 a value of the same type. The result consists entirely of Defined
895 (zero) bits except its least significant bit, which is a PCast of
896 the entire argument down to a single bit. */
mkPCastXXtoXXlsb(MCEnv * mce,IRAtom * varg,IRType ty)897 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
898 {
899 if (ty == Ity_V128) {
900 /* --- Case for V128 --- */
901 IRAtom* varg128 = varg;
902 // generates: PCast-to-I64(varg128)
903 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
904 // Now introduce zeros (defined bits) in the top 63 places
905 // generates: Def--(63)--Def PCast-to-I1(varg128)
906 IRAtom* d63pc
907 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
908 // generates: Def--(64)--Def
909 IRAtom* d64
910 = definedOfType(Ity_I64);
911 // generates: Def--(127)--Def PCast-to-I1(varg128)
912 IRAtom* res
913 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
914 return res;
915 }
916 if (ty == Ity_I64) {
917 /* --- Case for I64 --- */
918 // PCast to 64
919 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
920 // Zero (Def) out the top 63 bits
921 IRAtom* res
922 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
923 return res;
924 }
925 /*NOTREACHED*/
926 tl_assert(0);
927 }
928
929 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
930 /*
931 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
932 PCasting to Ity_U1. However, sometimes it is necessary to be more
933 accurate. The insight is that the result is defined if two
934 corresponding bits can be found, one from each argument, so that
935 both bits are defined but are different -- that makes EQ say "No"
936 and NE say "Yes". Hence, we compute an improvement term and DifD
937 it onto the "normal" (UifU) result.
938
939 The result is:
940
941 PCastTo<1> (
942 -- naive version
943 PCastTo<sz>( UifU<sz>(vxx, vyy) )
944
945 `DifD<sz>`
946
947 -- improvement term
948 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
949 )
950
951 where
952 vec contains 0 (defined) bits where the corresponding arg bits
953 are defined but different, and 1 bits otherwise.
954
955 vec = Or<sz>( vxx, // 0 iff bit defined
956 vyy, // 0 iff bit defined
957 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
958 )
959
960 If any bit of vec is 0, the result is defined and so the
961 improvement term should produce 0...0, else it should produce
962 1...1.
963
964 Hence require for the improvement term:
965
966 if vec == 1...1 then 1...1 else 0...0
967 ->
968 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
969
970 This was extensively re-analysed and checked on 6 July 05.
971 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)972 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
973 IRType ty,
974 IRAtom* vxx, IRAtom* vyy,
975 IRAtom* xx, IRAtom* yy )
976 {
977 IRAtom *naive, *vec, *improvement_term;
978 IRAtom *improved, *final_cast, *top;
979 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
980
981 tl_assert(isShadowAtom(mce,vxx));
982 tl_assert(isShadowAtom(mce,vyy));
983 tl_assert(isOriginalAtom(mce,xx));
984 tl_assert(isOriginalAtom(mce,yy));
985 tl_assert(sameKindedAtoms(vxx,xx));
986 tl_assert(sameKindedAtoms(vyy,yy));
987
988 switch (ty) {
989 case Ity_I16:
990 opOR = Iop_Or16;
991 opDIFD = Iop_And16;
992 opUIFU = Iop_Or16;
993 opNOT = Iop_Not16;
994 opXOR = Iop_Xor16;
995 opCMP = Iop_CmpEQ16;
996 top = mkU16(0xFFFF);
997 break;
998 case Ity_I32:
999 opOR = Iop_Or32;
1000 opDIFD = Iop_And32;
1001 opUIFU = Iop_Or32;
1002 opNOT = Iop_Not32;
1003 opXOR = Iop_Xor32;
1004 opCMP = Iop_CmpEQ32;
1005 top = mkU32(0xFFFFFFFF);
1006 break;
1007 case Ity_I64:
1008 opOR = Iop_Or64;
1009 opDIFD = Iop_And64;
1010 opUIFU = Iop_Or64;
1011 opNOT = Iop_Not64;
1012 opXOR = Iop_Xor64;
1013 opCMP = Iop_CmpEQ64;
1014 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
1015 break;
1016 default:
1017 VG_(tool_panic)("expensiveCmpEQorNE");
1018 }
1019
1020 naive
1021 = mkPCastTo(mce,ty,
1022 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
1023
1024 vec
1025 = assignNew(
1026 'V', mce,ty,
1027 binop( opOR,
1028 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
1029 assignNew(
1030 'V', mce,ty,
1031 unop( opNOT,
1032 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1033
1034 improvement_term
1035 = mkPCastTo( mce,ty,
1036 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
1037
1038 improved
1039 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
1040
1041 final_cast
1042 = mkPCastTo( mce, Ity_I1, improved );
1043
1044 return final_cast;
1045 }
1046
1047
1048 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1049
1050 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1051
1052 CmpORD32S(x,y) = 1<<3 if x <s y
1053 = 1<<2 if x >s y
1054 = 1<<1 if x == y
1055
1056 and similarly the unsigned variant. The default interpretation is:
1057
1058 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1059 & (7<<1)
1060
1061 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1062 are zero and therefore defined (viz, zero).
1063
1064 Also deal with a special case better:
1065
1066 CmpORD32S(x,0)
1067
1068 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1069 will be defined even if the rest of x isn't. In which case we do:
1070
1071 CmpORD32S#(x,x#,0,{impliedly 0}#)
1072 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1073 | (x# >>u 31) << 3 -- LT# = x#[31]
1074
1075 Analogous handling for CmpORD64{S,U}.
1076 */
isZeroU32(IRAtom * e)1077 static Bool isZeroU32 ( IRAtom* e )
1078 {
1079 return
1080 toBool( e->tag == Iex_Const
1081 && e->Iex.Const.con->tag == Ico_U32
1082 && e->Iex.Const.con->Ico.U32 == 0 );
1083 }
1084
isZeroU64(IRAtom * e)1085 static Bool isZeroU64 ( IRAtom* e )
1086 {
1087 return
1088 toBool( e->tag == Iex_Const
1089 && e->Iex.Const.con->tag == Ico_U64
1090 && e->Iex.Const.con->Ico.U64 == 0 );
1091 }
1092
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)1093 static IRAtom* doCmpORD ( MCEnv* mce,
1094 IROp cmp_op,
1095 IRAtom* xxhash, IRAtom* yyhash,
1096 IRAtom* xx, IRAtom* yy )
1097 {
1098 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1099 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1100 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1101 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1102 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1103 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1104 IRType ty = m64 ? Ity_I64 : Ity_I32;
1105 Int width = m64 ? 64 : 32;
1106
1107 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1108
1109 IRAtom* threeLeft1 = NULL;
1110 IRAtom* sevenLeft1 = NULL;
1111
1112 tl_assert(isShadowAtom(mce,xxhash));
1113 tl_assert(isShadowAtom(mce,yyhash));
1114 tl_assert(isOriginalAtom(mce,xx));
1115 tl_assert(isOriginalAtom(mce,yy));
1116 tl_assert(sameKindedAtoms(xxhash,xx));
1117 tl_assert(sameKindedAtoms(yyhash,yy));
1118 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1119 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1120
1121 if (0) {
1122 ppIROp(cmp_op); VG_(printf)(" ");
1123 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1124 }
1125
1126 if (syned && isZero(yy)) {
1127 /* fancy interpretation */
1128 /* if yy is zero, then it must be fully defined (zero#). */
1129 tl_assert(isZero(yyhash));
1130 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1131 return
1132 binop(
1133 opOR,
1134 assignNew(
1135 'V', mce,ty,
1136 binop(
1137 opAND,
1138 mkPCastTo(mce,ty, xxhash),
1139 threeLeft1
1140 )),
1141 assignNew(
1142 'V', mce,ty,
1143 binop(
1144 opSHL,
1145 assignNew(
1146 'V', mce,ty,
1147 binop(opSHR, xxhash, mkU8(width-1))),
1148 mkU8(3)
1149 ))
1150 );
1151 } else {
1152 /* standard interpretation */
1153 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1154 return
1155 binop(
1156 opAND,
1157 mkPCastTo( mce,ty,
1158 mkUifU(mce,ty, xxhash,yyhash)),
1159 sevenLeft1
1160 );
1161 }
1162 }
1163
1164
1165 /*------------------------------------------------------------*/
1166 /*--- Emit a test and complaint if something is undefined. ---*/
1167 /*------------------------------------------------------------*/
1168
1169 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1170
1171
1172 /* Set the annotations on a dirty helper to indicate that the stack
1173 pointer and instruction pointers might be read. This is the
1174 behaviour of all 'emit-a-complaint' style functions we might
1175 call. */
1176
setHelperAnns(MCEnv * mce,IRDirty * di)1177 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1178 di->nFxState = 2;
1179 di->fxState[0].fx = Ifx_Read;
1180 di->fxState[0].offset = mce->layout->offset_SP;
1181 di->fxState[0].size = mce->layout->sizeof_SP;
1182 di->fxState[0].nRepeats = 0;
1183 di->fxState[0].repeatLen = 0;
1184 di->fxState[1].fx = Ifx_Read;
1185 di->fxState[1].offset = mce->layout->offset_IP;
1186 di->fxState[1].size = mce->layout->sizeof_IP;
1187 di->fxState[1].nRepeats = 0;
1188 di->fxState[1].repeatLen = 0;
1189 }
1190
1191
1192 /* Check the supplied *original* |atom| for undefinedness, and emit a
1193 complaint if so. Once that happens, mark it as defined. This is
1194 possible because the atom is either a tmp or literal. If it's a
1195 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1196 be defined. In fact as mentioned above, we will have to allocate a
1197 new tmp to carry the new 'defined' shadow value, and update the
1198 original->tmp mapping accordingly; we cannot simply assign a new
1199 value to an existing shadow tmp as this breaks SSAness.
1200
1201 The checks are performed, any resulting complaint emitted, and
1202 |atom|'s shadow temp set to 'defined', ONLY in the case that
1203 |guard| evaluates to True at run-time. If it evaluates to False
1204 then no action is performed. If |guard| is NULL (the usual case)
1205 then it is assumed to be always-true, and hence these actions are
1206 performed unconditionally.
1207
1208 This routine does not generate code to check the definedness of
1209 |guard|. The caller is assumed to have taken care of that already.
1210 */
complainIfUndefined(MCEnv * mce,IRAtom * atom,IRExpr * guard)1211 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1212 {
1213 IRAtom* vatom;
1214 IRType ty;
1215 Int sz;
1216 IRDirty* di;
1217 IRAtom* cond;
1218 IRAtom* origin;
1219 void* fn;
1220 const HChar* nm;
1221 IRExpr** args;
1222 Int nargs;
1223
1224 // Don't do V bit tests if we're not reporting undefined value errors.
1225 if (MC_(clo_mc_level) == 1)
1226 return;
1227
1228 if (guard)
1229 tl_assert(isOriginalAtom(mce, guard));
1230
1231 /* Since the original expression is atomic, there's no duplicated
1232 work generated by making multiple V-expressions for it. So we
1233 don't really care about the possibility that someone else may
1234 also create a V-interpretion for it. */
1235 tl_assert(isOriginalAtom(mce, atom));
1236 vatom = expr2vbits( mce, atom );
1237 tl_assert(isShadowAtom(mce, vatom));
1238 tl_assert(sameKindedAtoms(atom, vatom));
1239
1240 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1241
1242 /* sz is only used for constructing the error message */
1243 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1244
1245 cond = mkPCastTo( mce, Ity_I1, vatom );
1246 /* cond will be 0 if all defined, and 1 if any not defined. */
1247
1248 /* Get the origin info for the value we are about to check. At
1249 least, if we are doing origin tracking. If not, use a dummy
1250 zero origin. */
1251 if (MC_(clo_mc_level) == 3) {
1252 origin = schemeE( mce, atom );
1253 if (mce->hWordTy == Ity_I64) {
1254 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1255 }
1256 } else {
1257 origin = NULL;
1258 }
1259
1260 fn = NULL;
1261 nm = NULL;
1262 args = NULL;
1263 nargs = -1;
1264
1265 switch (sz) {
1266 case 0:
1267 if (origin) {
1268 fn = &MC_(helperc_value_check0_fail_w_o);
1269 nm = "MC_(helperc_value_check0_fail_w_o)";
1270 args = mkIRExprVec_1(origin);
1271 nargs = 1;
1272 } else {
1273 fn = &MC_(helperc_value_check0_fail_no_o);
1274 nm = "MC_(helperc_value_check0_fail_no_o)";
1275 args = mkIRExprVec_0();
1276 nargs = 0;
1277 }
1278 break;
1279 case 1:
1280 if (origin) {
1281 fn = &MC_(helperc_value_check1_fail_w_o);
1282 nm = "MC_(helperc_value_check1_fail_w_o)";
1283 args = mkIRExprVec_1(origin);
1284 nargs = 1;
1285 } else {
1286 fn = &MC_(helperc_value_check1_fail_no_o);
1287 nm = "MC_(helperc_value_check1_fail_no_o)";
1288 args = mkIRExprVec_0();
1289 nargs = 0;
1290 }
1291 break;
1292 case 4:
1293 if (origin) {
1294 fn = &MC_(helperc_value_check4_fail_w_o);
1295 nm = "MC_(helperc_value_check4_fail_w_o)";
1296 args = mkIRExprVec_1(origin);
1297 nargs = 1;
1298 } else {
1299 fn = &MC_(helperc_value_check4_fail_no_o);
1300 nm = "MC_(helperc_value_check4_fail_no_o)";
1301 args = mkIRExprVec_0();
1302 nargs = 0;
1303 }
1304 break;
1305 case 8:
1306 if (origin) {
1307 fn = &MC_(helperc_value_check8_fail_w_o);
1308 nm = "MC_(helperc_value_check8_fail_w_o)";
1309 args = mkIRExprVec_1(origin);
1310 nargs = 1;
1311 } else {
1312 fn = &MC_(helperc_value_check8_fail_no_o);
1313 nm = "MC_(helperc_value_check8_fail_no_o)";
1314 args = mkIRExprVec_0();
1315 nargs = 0;
1316 }
1317 break;
1318 case 2:
1319 case 16:
1320 if (origin) {
1321 fn = &MC_(helperc_value_checkN_fail_w_o);
1322 nm = "MC_(helperc_value_checkN_fail_w_o)";
1323 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1324 nargs = 2;
1325 } else {
1326 fn = &MC_(helperc_value_checkN_fail_no_o);
1327 nm = "MC_(helperc_value_checkN_fail_no_o)";
1328 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1329 nargs = 1;
1330 }
1331 break;
1332 default:
1333 VG_(tool_panic)("unexpected szB");
1334 }
1335
1336 tl_assert(fn);
1337 tl_assert(nm);
1338 tl_assert(args);
1339 tl_assert(nargs >= 0 && nargs <= 2);
1340 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1341 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1342
1343 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1344 VG_(fnptr_to_fnentry)( fn ), args );
1345 di->guard = cond; // and cond is PCast-to-1(atom#)
1346
1347 /* If the complaint is to be issued under a guard condition, AND
1348 that into the guard condition for the helper call. */
1349 if (guard) {
1350 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1351 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1352 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1353 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1354 }
1355
1356 setHelperAnns( mce, di );
1357 stmt( 'V', mce, IRStmt_Dirty(di));
1358
1359 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1360 defined -- but only in the case where the guard evaluates to
1361 True at run-time. Do the update by setting the orig->shadow
1362 mapping for tmp to reflect the fact that this shadow is getting
1363 a new value. */
1364 tl_assert(isIRAtom(vatom));
1365 /* sameKindedAtoms ... */
1366 if (vatom->tag == Iex_RdTmp) {
1367 tl_assert(atom->tag == Iex_RdTmp);
1368 if (guard == NULL) {
1369 // guard is 'always True', hence update unconditionally
1370 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1371 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1372 definedOfType(ty));
1373 } else {
1374 // update the temp only conditionally. Do this by copying
1375 // its old value when the guard is False.
1376 // The old value ..
1377 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1378 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1379 IRAtom* new_tmpV
1380 = assignNew('V', mce, shadowTypeV(ty),
1381 IRExpr_ITE(guard, definedOfType(ty),
1382 mkexpr(old_tmpV)));
1383 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1384 }
1385 }
1386 }
1387
1388
1389 /*------------------------------------------------------------*/
1390 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1391 /*------------------------------------------------------------*/
1392
1393 /* Examine the always-defined sections declared in layout to see if
1394 the (offset,size) section is within one. Note, is is an error to
1395 partially fall into such a region: (offset,size) should either be
1396 completely in such a region or completely not-in such a region.
1397 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1398 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1399 {
1400 Int minoffD, maxoffD, i;
1401 Int minoff = offset;
1402 Int maxoff = minoff + size - 1;
1403 tl_assert((minoff & ~0xFFFF) == 0);
1404 tl_assert((maxoff & ~0xFFFF) == 0);
1405
1406 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1407 minoffD = mce->layout->alwaysDefd[i].offset;
1408 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1409 tl_assert((minoffD & ~0xFFFF) == 0);
1410 tl_assert((maxoffD & ~0xFFFF) == 0);
1411
1412 if (maxoff < minoffD || maxoffD < minoff)
1413 continue; /* no overlap */
1414 if (minoff >= minoffD && maxoff <= maxoffD)
1415 return True; /* completely contained in an always-defd section */
1416
1417 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1418 }
1419 return False; /* could not find any containing section */
1420 }
1421
1422
1423 /* Generate into bb suitable actions to shadow this Put. If the state
1424 slice is marked 'always defined', do nothing. Otherwise, write the
1425 supplied V bits to the shadow state. We can pass in either an
1426 original atom or a V-atom, but not both. In the former case the
1427 relevant V-bits are then generated from the original.
1428 We assume here, that the definedness of GUARD has already been checked.
1429 */
1430 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom,IRExpr * guard)1431 void do_shadow_PUT ( MCEnv* mce, Int offset,
1432 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1433 {
1434 IRType ty;
1435
1436 // Don't do shadow PUTs if we're not doing undefined value checking.
1437 // Their absence lets Vex's optimiser remove all the shadow computation
1438 // that they depend on, which includes GETs of the shadow registers.
1439 if (MC_(clo_mc_level) == 1)
1440 return;
1441
1442 if (atom) {
1443 tl_assert(!vatom);
1444 tl_assert(isOriginalAtom(mce, atom));
1445 vatom = expr2vbits( mce, atom );
1446 } else {
1447 tl_assert(vatom);
1448 tl_assert(isShadowAtom(mce, vatom));
1449 }
1450
1451 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1452 tl_assert(ty != Ity_I1);
1453 tl_assert(ty != Ity_I128);
1454 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1455 /* later: no ... */
1456 /* emit code to emit a complaint if any of the vbits are 1. */
1457 /* complainIfUndefined(mce, atom); */
1458 } else {
1459 /* Do a plain shadow Put. */
1460 if (guard) {
1461 /* If the guard expression evaluates to false we simply Put the value
1462 that is already stored in the guest state slot */
1463 IRAtom *cond, *iffalse;
1464
1465 cond = assignNew('V', mce, Ity_I1, guard);
1466 iffalse = assignNew('V', mce, ty,
1467 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1468 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1469 }
1470 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1471 }
1472 }
1473
1474
1475 /* Return an expression which contains the V bits corresponding to the
1476 given GETI (passed in in pieces).
1477 */
1478 static
do_shadow_PUTI(MCEnv * mce,IRPutI * puti)1479 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1480 {
1481 IRAtom* vatom;
1482 IRType ty, tyS;
1483 Int arrSize;;
1484 IRRegArray* descr = puti->descr;
1485 IRAtom* ix = puti->ix;
1486 Int bias = puti->bias;
1487 IRAtom* atom = puti->data;
1488
1489 // Don't do shadow PUTIs if we're not doing undefined value checking.
1490 // Their absence lets Vex's optimiser remove all the shadow computation
1491 // that they depend on, which includes GETIs of the shadow registers.
1492 if (MC_(clo_mc_level) == 1)
1493 return;
1494
1495 tl_assert(isOriginalAtom(mce,atom));
1496 vatom = expr2vbits( mce, atom );
1497 tl_assert(sameKindedAtoms(atom, vatom));
1498 ty = descr->elemTy;
1499 tyS = shadowTypeV(ty);
1500 arrSize = descr->nElems * sizeofIRType(ty);
1501 tl_assert(ty != Ity_I1);
1502 tl_assert(isOriginalAtom(mce,ix));
1503 complainIfUndefined(mce, ix, NULL);
1504 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1505 /* later: no ... */
1506 /* emit code to emit a complaint if any of the vbits are 1. */
1507 /* complainIfUndefined(mce, atom); */
1508 } else {
1509 /* Do a cloned version of the Put that refers to the shadow
1510 area. */
1511 IRRegArray* new_descr
1512 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1513 tyS, descr->nElems);
1514 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1515 }
1516 }
1517
1518
1519 /* Return an expression which contains the V bits corresponding to the
1520 given GET (passed in in pieces).
1521 */
1522 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1523 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1524 {
1525 IRType tyS = shadowTypeV(ty);
1526 tl_assert(ty != Ity_I1);
1527 tl_assert(ty != Ity_I128);
1528 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1529 /* Always defined, return all zeroes of the relevant type */
1530 return definedOfType(tyS);
1531 } else {
1532 /* return a cloned version of the Get that refers to the shadow
1533 area. */
1534 /* FIXME: this isn't an atom! */
1535 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1536 }
1537 }
1538
1539
1540 /* Return an expression which contains the V bits corresponding to the
1541 given GETI (passed in in pieces).
1542 */
1543 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1544 IRExpr* shadow_GETI ( MCEnv* mce,
1545 IRRegArray* descr, IRAtom* ix, Int bias )
1546 {
1547 IRType ty = descr->elemTy;
1548 IRType tyS = shadowTypeV(ty);
1549 Int arrSize = descr->nElems * sizeofIRType(ty);
1550 tl_assert(ty != Ity_I1);
1551 tl_assert(isOriginalAtom(mce,ix));
1552 complainIfUndefined(mce, ix, NULL);
1553 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1554 /* Always defined, return all zeroes of the relevant type */
1555 return definedOfType(tyS);
1556 } else {
1557 /* return a cloned version of the Get that refers to the shadow
1558 area. */
1559 IRRegArray* new_descr
1560 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1561 tyS, descr->nElems);
1562 return IRExpr_GetI( new_descr, ix, bias );
1563 }
1564 }
1565
1566
1567 /*------------------------------------------------------------*/
1568 /*--- Generating approximations for unknown operations, ---*/
1569 /*--- using lazy-propagate semantics ---*/
1570 /*------------------------------------------------------------*/
1571
1572 /* Lazy propagation of undefinedness from two values, resulting in the
1573 specified shadow type.
1574 */
1575 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1576 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1577 {
1578 IRAtom* at;
1579 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1580 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1581 tl_assert(isShadowAtom(mce,va1));
1582 tl_assert(isShadowAtom(mce,va2));
1583
1584 /* The general case is inefficient because PCast is an expensive
1585 operation. Here are some special cases which use PCast only
1586 once rather than twice. */
1587
1588 /* I64 x I64 -> I64 */
1589 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1590 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1591 at = mkUifU(mce, Ity_I64, va1, va2);
1592 at = mkPCastTo(mce, Ity_I64, at);
1593 return at;
1594 }
1595
1596 /* I64 x I64 -> I32 */
1597 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1598 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1599 at = mkUifU(mce, Ity_I64, va1, va2);
1600 at = mkPCastTo(mce, Ity_I32, at);
1601 return at;
1602 }
1603
1604 if (0) {
1605 VG_(printf)("mkLazy2 ");
1606 ppIRType(t1);
1607 VG_(printf)("_");
1608 ppIRType(t2);
1609 VG_(printf)("_");
1610 ppIRType(finalVty);
1611 VG_(printf)("\n");
1612 }
1613
1614 /* General case: force everything via 32-bit intermediaries. */
1615 at = mkPCastTo(mce, Ity_I32, va1);
1616 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1617 at = mkPCastTo(mce, finalVty, at);
1618 return at;
1619 }
1620
1621
1622 /* 3-arg version of the above. */
1623 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1624 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1625 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1626 {
1627 IRAtom* at;
1628 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1629 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1630 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1631 tl_assert(isShadowAtom(mce,va1));
1632 tl_assert(isShadowAtom(mce,va2));
1633 tl_assert(isShadowAtom(mce,va3));
1634
1635 /* The general case is inefficient because PCast is an expensive
1636 operation. Here are some special cases which use PCast only
1637 twice rather than three times. */
1638
1639 /* I32 x I64 x I64 -> I64 */
1640 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1641 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1642 && finalVty == Ity_I64) {
1643 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1644 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1645 mode indication which is fully defined, this should get
1646 folded out later. */
1647 at = mkPCastTo(mce, Ity_I64, va1);
1648 /* Now fold in 2nd and 3rd args. */
1649 at = mkUifU(mce, Ity_I64, at, va2);
1650 at = mkUifU(mce, Ity_I64, at, va3);
1651 /* and PCast once again. */
1652 at = mkPCastTo(mce, Ity_I64, at);
1653 return at;
1654 }
1655
1656 /* I32 x I8 x I64 -> I64 */
1657 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1658 && finalVty == Ity_I64) {
1659 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1660 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1661 * rounding mode indication which is fully defined, this should
1662 * get folded out later.
1663 */
1664 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1665 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1666 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1667 at = mkUifU(mce, Ity_I64, at, va3);
1668 /* and PCast once again. */
1669 at = mkPCastTo(mce, Ity_I64, at);
1670 return at;
1671 }
1672
1673 /* I32 x I64 x I64 -> I32 */
1674 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1675 && finalVty == Ity_I32) {
1676 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1677 at = mkPCastTo(mce, Ity_I64, va1);
1678 at = mkUifU(mce, Ity_I64, at, va2);
1679 at = mkUifU(mce, Ity_I64, at, va3);
1680 at = mkPCastTo(mce, Ity_I32, at);
1681 return at;
1682 }
1683
1684 /* I32 x I32 x I32 -> I32 */
1685 /* 32-bit FP idiom, as (eg) happens on ARM */
1686 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1687 && finalVty == Ity_I32) {
1688 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1689 at = va1;
1690 at = mkUifU(mce, Ity_I32, at, va2);
1691 at = mkUifU(mce, Ity_I32, at, va3);
1692 at = mkPCastTo(mce, Ity_I32, at);
1693 return at;
1694 }
1695
1696 /* I32 x I128 x I128 -> I128 */
1697 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1698 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1699 && finalVty == Ity_I128) {
1700 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1701 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1702 mode indication which is fully defined, this should get
1703 folded out later. */
1704 at = mkPCastTo(mce, Ity_I128, va1);
1705 /* Now fold in 2nd and 3rd args. */
1706 at = mkUifU(mce, Ity_I128, at, va2);
1707 at = mkUifU(mce, Ity_I128, at, va3);
1708 /* and PCast once again. */
1709 at = mkPCastTo(mce, Ity_I128, at);
1710 return at;
1711 }
1712
1713 /* I32 x I8 x I128 -> I128 */
1714 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1715 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1716 && finalVty == Ity_I128) {
1717 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1718 /* Use I64 as an intermediate type, which means PCasting all 3
1719 args to I64 to start with. 1st arg is typically a rounding
1720 mode indication which is fully defined, so we hope that it
1721 will get folded out later. */
1722 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1723 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1724 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1725 /* Now UifU all three together. */
1726 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1727 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
1728 /* and PCast once again. */
1729 at = mkPCastTo(mce, Ity_I128, at);
1730 return at;
1731 }
1732 if (1) {
1733 VG_(printf)("mkLazy3: ");
1734 ppIRType(t1);
1735 VG_(printf)(" x ");
1736 ppIRType(t2);
1737 VG_(printf)(" x ");
1738 ppIRType(t3);
1739 VG_(printf)(" -> ");
1740 ppIRType(finalVty);
1741 VG_(printf)("\n");
1742 }
1743
1744 tl_assert(0);
1745 /* General case: force everything via 32-bit intermediaries. */
1746 /*
1747 at = mkPCastTo(mce, Ity_I32, va1);
1748 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1749 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1750 at = mkPCastTo(mce, finalVty, at);
1751 return at;
1752 */
1753 }
1754
1755
1756 /* 4-arg version of the above. */
1757 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1758 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1759 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1760 {
1761 IRAtom* at;
1762 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1763 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1764 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1765 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1766 tl_assert(isShadowAtom(mce,va1));
1767 tl_assert(isShadowAtom(mce,va2));
1768 tl_assert(isShadowAtom(mce,va3));
1769 tl_assert(isShadowAtom(mce,va4));
1770
1771 /* The general case is inefficient because PCast is an expensive
1772 operation. Here are some special cases which use PCast only
1773 twice rather than three times. */
1774
1775 /* I32 x I64 x I64 x I64 -> I64 */
1776 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1777 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1778 && finalVty == Ity_I64) {
1779 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1780 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1781 mode indication which is fully defined, this should get
1782 folded out later. */
1783 at = mkPCastTo(mce, Ity_I64, va1);
1784 /* Now fold in 2nd, 3rd, 4th args. */
1785 at = mkUifU(mce, Ity_I64, at, va2);
1786 at = mkUifU(mce, Ity_I64, at, va3);
1787 at = mkUifU(mce, Ity_I64, at, va4);
1788 /* and PCast once again. */
1789 at = mkPCastTo(mce, Ity_I64, at);
1790 return at;
1791 }
1792 /* I32 x I32 x I32 x I32 -> I32 */
1793 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1794 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1795 && finalVty == Ity_I32) {
1796 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1797 at = va1;
1798 /* Now fold in 2nd, 3rd, 4th args. */
1799 at = mkUifU(mce, Ity_I32, at, va2);
1800 at = mkUifU(mce, Ity_I32, at, va3);
1801 at = mkUifU(mce, Ity_I32, at, va4);
1802 at = mkPCastTo(mce, Ity_I32, at);
1803 return at;
1804 }
1805
1806 if (1) {
1807 VG_(printf)("mkLazy4: ");
1808 ppIRType(t1);
1809 VG_(printf)(" x ");
1810 ppIRType(t2);
1811 VG_(printf)(" x ");
1812 ppIRType(t3);
1813 VG_(printf)(" x ");
1814 ppIRType(t4);
1815 VG_(printf)(" -> ");
1816 ppIRType(finalVty);
1817 VG_(printf)("\n");
1818 }
1819
1820 tl_assert(0);
1821 }
1822
1823
1824 /* Do the lazy propagation game from a null-terminated vector of
1825 atoms. This is presumably the arguments to a helper call, so the
1826 IRCallee info is also supplied in order that we can know which
1827 arguments should be ignored (via the .mcx_mask field).
1828 */
1829 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1830 IRAtom* mkLazyN ( MCEnv* mce,
1831 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1832 {
1833 Int i;
1834 IRAtom* here;
1835 IRAtom* curr;
1836 IRType mergeTy;
1837 Bool mergeTy64 = True;
1838
1839 /* Decide on the type of the merge intermediary. If all relevant
1840 args are I64, then it's I64. In all other circumstances, use
1841 I32. */
1842 for (i = 0; exprvec[i]; i++) {
1843 tl_assert(i < 32);
1844 tl_assert(isOriginalAtom(mce, exprvec[i]));
1845 if (cee->mcx_mask & (1<<i))
1846 continue;
1847 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1848 mergeTy64 = False;
1849 }
1850
1851 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1852 curr = definedOfType(mergeTy);
1853
1854 for (i = 0; exprvec[i]; i++) {
1855 tl_assert(i < 32);
1856 tl_assert(isOriginalAtom(mce, exprvec[i]));
1857 /* Only take notice of this arg if the callee's mc-exclusion
1858 mask does not say it is to be excluded. */
1859 if (cee->mcx_mask & (1<<i)) {
1860 /* the arg is to be excluded from definedness checking. Do
1861 nothing. */
1862 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1863 } else {
1864 /* calculate the arg's definedness, and pessimistically merge
1865 it in. */
1866 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1867 curr = mergeTy64
1868 ? mkUifU64(mce, here, curr)
1869 : mkUifU32(mce, here, curr);
1870 }
1871 }
1872 return mkPCastTo(mce, finalVtype, curr );
1873 }
1874
1875
1876 /*------------------------------------------------------------*/
1877 /*--- Generating expensive sequences for exact carry-chain ---*/
1878 /*--- propagation in add/sub and related operations. ---*/
1879 /*------------------------------------------------------------*/
1880
1881 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1882 IRAtom* expensiveAddSub ( MCEnv* mce,
1883 Bool add,
1884 IRType ty,
1885 IRAtom* qaa, IRAtom* qbb,
1886 IRAtom* aa, IRAtom* bb )
1887 {
1888 IRAtom *a_min, *b_min, *a_max, *b_max;
1889 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
1890
1891 tl_assert(isShadowAtom(mce,qaa));
1892 tl_assert(isShadowAtom(mce,qbb));
1893 tl_assert(isOriginalAtom(mce,aa));
1894 tl_assert(isOriginalAtom(mce,bb));
1895 tl_assert(sameKindedAtoms(qaa,aa));
1896 tl_assert(sameKindedAtoms(qbb,bb));
1897
1898 switch (ty) {
1899 case Ity_I32:
1900 opAND = Iop_And32;
1901 opOR = Iop_Or32;
1902 opXOR = Iop_Xor32;
1903 opNOT = Iop_Not32;
1904 opADD = Iop_Add32;
1905 opSUB = Iop_Sub32;
1906 break;
1907 case Ity_I64:
1908 opAND = Iop_And64;
1909 opOR = Iop_Or64;
1910 opXOR = Iop_Xor64;
1911 opNOT = Iop_Not64;
1912 opADD = Iop_Add64;
1913 opSUB = Iop_Sub64;
1914 break;
1915 default:
1916 VG_(tool_panic)("expensiveAddSub");
1917 }
1918
1919 // a_min = aa & ~qaa
1920 a_min = assignNew('V', mce,ty,
1921 binop(opAND, aa,
1922 assignNew('V', mce,ty, unop(opNOT, qaa))));
1923
1924 // b_min = bb & ~qbb
1925 b_min = assignNew('V', mce,ty,
1926 binop(opAND, bb,
1927 assignNew('V', mce,ty, unop(opNOT, qbb))));
1928
1929 // a_max = aa | qaa
1930 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1931
1932 // b_max = bb | qbb
1933 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1934
1935 if (add) {
1936 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1937 return
1938 assignNew('V', mce,ty,
1939 binop( opOR,
1940 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1941 assignNew('V', mce,ty,
1942 binop( opXOR,
1943 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1944 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1945 )
1946 )
1947 )
1948 );
1949 } else {
1950 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1951 return
1952 assignNew('V', mce,ty,
1953 binop( opOR,
1954 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1955 assignNew('V', mce,ty,
1956 binop( opXOR,
1957 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1958 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1959 )
1960 )
1961 )
1962 );
1963 }
1964
1965 }
1966
1967
1968 static
expensiveCountTrailingZeroes(MCEnv * mce,IROp czop,IRAtom * atom,IRAtom * vatom)1969 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1970 IRAtom* atom, IRAtom* vatom )
1971 {
1972 IRType ty;
1973 IROp xorOp, subOp, andOp;
1974 IRExpr *one;
1975 IRAtom *improver, *improved;
1976 tl_assert(isShadowAtom(mce,vatom));
1977 tl_assert(isOriginalAtom(mce,atom));
1978 tl_assert(sameKindedAtoms(atom,vatom));
1979
1980 switch (czop) {
1981 case Iop_Ctz32:
1982 ty = Ity_I32;
1983 xorOp = Iop_Xor32;
1984 subOp = Iop_Sub32;
1985 andOp = Iop_And32;
1986 one = mkU32(1);
1987 break;
1988 case Iop_Ctz64:
1989 ty = Ity_I64;
1990 xorOp = Iop_Xor64;
1991 subOp = Iop_Sub64;
1992 andOp = Iop_And64;
1993 one = mkU64(1);
1994 break;
1995 default:
1996 ppIROp(czop);
1997 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1998 }
1999
2000 // improver = atom ^ (atom - 1)
2001 //
2002 // That is, improver has its low ctz(atom) bits equal to one;
2003 // higher bits (if any) equal to zero.
2004 improver = assignNew('V', mce,ty,
2005 binop(xorOp,
2006 atom,
2007 assignNew('V', mce, ty,
2008 binop(subOp, atom, one))));
2009
2010 // improved = vatom & improver
2011 //
2012 // That is, treat any V bits above the first ctz(atom) bits as
2013 // "defined".
2014 improved = assignNew('V', mce, ty,
2015 binop(andOp, vatom, improver));
2016
2017 // Return pessimizing cast of improved.
2018 return mkPCastTo(mce, ty, improved);
2019 }
2020
2021
2022 /*------------------------------------------------------------*/
2023 /*--- Scalar shifts. ---*/
2024 /*------------------------------------------------------------*/
2025
2026 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2027 idea is to shift the definedness bits by the original shift amount.
2028 This introduces 0s ("defined") in new positions for left shifts and
2029 unsigned right shifts, and copies the top definedness bit for
2030 signed right shifts. So, conveniently, applying the original shift
2031 operator to the definedness bits for the left arg is exactly the
2032 right thing to do:
2033
2034 (qaa << bb)
2035
2036 However if the shift amount is undefined then the whole result
2037 is undefined. Hence need:
2038
2039 (qaa << bb) `UifU` PCast(qbb)
2040
2041 If the shift amount bb is a literal than qbb will say 'all defined'
2042 and the UifU and PCast will get folded out by post-instrumentation
2043 optimisation.
2044 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)2045 static IRAtom* scalarShift ( MCEnv* mce,
2046 IRType ty,
2047 IROp original_op,
2048 IRAtom* qaa, IRAtom* qbb,
2049 IRAtom* aa, IRAtom* bb )
2050 {
2051 tl_assert(isShadowAtom(mce,qaa));
2052 tl_assert(isShadowAtom(mce,qbb));
2053 tl_assert(isOriginalAtom(mce,aa));
2054 tl_assert(isOriginalAtom(mce,bb));
2055 tl_assert(sameKindedAtoms(qaa,aa));
2056 tl_assert(sameKindedAtoms(qbb,bb));
2057 return
2058 assignNew(
2059 'V', mce, ty,
2060 mkUifU( mce, ty,
2061 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2062 mkPCastTo(mce, ty, qbb)
2063 )
2064 );
2065 }
2066
2067
2068 /*------------------------------------------------------------*/
2069 /*--- Helpers for dealing with vector primops. ---*/
2070 /*------------------------------------------------------------*/
2071
2072 /* Vector pessimisation -- pessimise within each lane individually. */
2073
mkPCast8x16(MCEnv * mce,IRAtom * at)2074 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2075 {
2076 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2077 }
2078
mkPCast16x8(MCEnv * mce,IRAtom * at)2079 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2080 {
2081 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2082 }
2083
mkPCast32x4(MCEnv * mce,IRAtom * at)2084 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2085 {
2086 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2087 }
2088
mkPCast64x2(MCEnv * mce,IRAtom * at)2089 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2090 {
2091 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2092 }
2093
mkPCast64x4(MCEnv * mce,IRAtom * at)2094 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2095 {
2096 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2097 }
2098
mkPCast32x8(MCEnv * mce,IRAtom * at)2099 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2100 {
2101 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2102 }
2103
mkPCast32x2(MCEnv * mce,IRAtom * at)2104 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2105 {
2106 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2107 }
2108
mkPCast16x16(MCEnv * mce,IRAtom * at)2109 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2110 {
2111 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2112 }
2113
mkPCast16x4(MCEnv * mce,IRAtom * at)2114 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2115 {
2116 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2117 }
2118
mkPCast8x32(MCEnv * mce,IRAtom * at)2119 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2120 {
2121 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2122 }
2123
mkPCast8x8(MCEnv * mce,IRAtom * at)2124 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2125 {
2126 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2127 }
2128
mkPCast16x2(MCEnv * mce,IRAtom * at)2129 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2130 {
2131 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2132 }
2133
mkPCast8x4(MCEnv * mce,IRAtom * at)2134 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2135 {
2136 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2137 }
2138
2139
2140 /* Here's a simple scheme capable of handling ops derived from SSE1
2141 code and while only generating ops that can be efficiently
2142 implemented in SSE1. */
2143
2144 /* All-lanes versions are straightforward:
2145
2146 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2147
2148 unary32Fx4(x,y) ==> PCast32x4(x#)
2149
2150 Lowest-lane-only versions are more complex:
2151
2152 binary32F0x4(x,y) ==> SetV128lo32(
2153 x#,
2154 PCast32(V128to32(UifUV128(x#,y#)))
2155 )
2156
2157 This is perhaps not so obvious. In particular, it's faster to
2158 do a V128-bit UifU and then take the bottom 32 bits than the more
2159 obvious scheme of taking the bottom 32 bits of each operand
2160 and doing a 32-bit UifU. Basically since UifU is fast and
2161 chopping lanes off vector values is slow.
2162
2163 Finally:
2164
2165 unary32F0x4(x) ==> SetV128lo32(
2166 x#,
2167 PCast32(V128to32(x#))
2168 )
2169
2170 Where:
2171
2172 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2173 PCast32x4(v#) = CmpNEZ32x4(v#)
2174 */
2175
2176 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2177 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2178 {
2179 IRAtom* at;
2180 tl_assert(isShadowAtom(mce, vatomX));
2181 tl_assert(isShadowAtom(mce, vatomY));
2182 at = mkUifUV128(mce, vatomX, vatomY);
2183 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2184 return at;
2185 }
2186
2187 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)2188 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2189 {
2190 IRAtom* at;
2191 tl_assert(isShadowAtom(mce, vatomX));
2192 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2193 return at;
2194 }
2195
2196 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2197 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2198 {
2199 IRAtom* at;
2200 tl_assert(isShadowAtom(mce, vatomX));
2201 tl_assert(isShadowAtom(mce, vatomY));
2202 at = mkUifUV128(mce, vatomX, vatomY);
2203 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2204 at = mkPCastTo(mce, Ity_I32, at);
2205 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2206 return at;
2207 }
2208
2209 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)2210 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2211 {
2212 IRAtom* at;
2213 tl_assert(isShadowAtom(mce, vatomX));
2214 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2215 at = mkPCastTo(mce, Ity_I32, at);
2216 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2217 return at;
2218 }
2219
2220 /* --- ... and ... 64Fx2 versions of the same ... --- */
2221
2222 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2223 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2224 {
2225 IRAtom* at;
2226 tl_assert(isShadowAtom(mce, vatomX));
2227 tl_assert(isShadowAtom(mce, vatomY));
2228 at = mkUifUV128(mce, vatomX, vatomY);
2229 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2230 return at;
2231 }
2232
2233 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)2234 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2235 {
2236 IRAtom* at;
2237 tl_assert(isShadowAtom(mce, vatomX));
2238 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2239 return at;
2240 }
2241
2242 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2243 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2244 {
2245 IRAtom* at;
2246 tl_assert(isShadowAtom(mce, vatomX));
2247 tl_assert(isShadowAtom(mce, vatomY));
2248 at = mkUifUV128(mce, vatomX, vatomY);
2249 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2250 at = mkPCastTo(mce, Ity_I64, at);
2251 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2252 return at;
2253 }
2254
2255 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)2256 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2257 {
2258 IRAtom* at;
2259 tl_assert(isShadowAtom(mce, vatomX));
2260 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2261 at = mkPCastTo(mce, Ity_I64, at);
2262 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2263 return at;
2264 }
2265
2266 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2267
2268 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2269 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2270 {
2271 IRAtom* at;
2272 tl_assert(isShadowAtom(mce, vatomX));
2273 tl_assert(isShadowAtom(mce, vatomY));
2274 at = mkUifU64(mce, vatomX, vatomY);
2275 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2276 return at;
2277 }
2278
2279 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)2280 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2281 {
2282 IRAtom* at;
2283 tl_assert(isShadowAtom(mce, vatomX));
2284 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2285 return at;
2286 }
2287
2288 /* --- ... and ... 64Fx4 versions of the same ... --- */
2289
2290 static
binary64Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2291 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2292 {
2293 IRAtom* at;
2294 tl_assert(isShadowAtom(mce, vatomX));
2295 tl_assert(isShadowAtom(mce, vatomY));
2296 at = mkUifUV256(mce, vatomX, vatomY);
2297 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2298 return at;
2299 }
2300
2301 static
unary64Fx4(MCEnv * mce,IRAtom * vatomX)2302 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2303 {
2304 IRAtom* at;
2305 tl_assert(isShadowAtom(mce, vatomX));
2306 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2307 return at;
2308 }
2309
2310 /* --- ... and ... 32Fx8 versions of the same ... --- */
2311
2312 static
binary32Fx8(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2313 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2314 {
2315 IRAtom* at;
2316 tl_assert(isShadowAtom(mce, vatomX));
2317 tl_assert(isShadowAtom(mce, vatomY));
2318 at = mkUifUV256(mce, vatomX, vatomY);
2319 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2320 return at;
2321 }
2322
2323 static
unary32Fx8(MCEnv * mce,IRAtom * vatomX)2324 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2325 {
2326 IRAtom* at;
2327 tl_assert(isShadowAtom(mce, vatomX));
2328 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2329 return at;
2330 }
2331
2332 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2333
2334 static
binary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2335 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2336 IRAtom* vatomX, IRAtom* vatomY )
2337 {
2338 /* This is the same as binary64Fx2, except that we subsequently
2339 pessimise vRM (definedness of the rounding mode), widen to 128
2340 bits and UifU it into the result. As with the scalar cases, if
2341 the RM is a constant then it is defined and so this extra bit
2342 will get constant-folded out later. */
2343 // "do" the vector args
2344 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2345 // PCast the RM, and widen it to 128 bits
2346 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2347 // Roll it into the result
2348 t1 = mkUifUV128(mce, t1, t2);
2349 return t1;
2350 }
2351
2352 /* --- ... and ... 32Fx4 versions of the same --- */
2353
2354 static
binary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2355 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2356 IRAtom* vatomX, IRAtom* vatomY )
2357 {
2358 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2359 // PCast the RM, and widen it to 128 bits
2360 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2361 // Roll it into the result
2362 t1 = mkUifUV128(mce, t1, t2);
2363 return t1;
2364 }
2365
2366 /* --- ... and ... 64Fx4 versions of the same --- */
2367
2368 static
binary64Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2369 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2370 IRAtom* vatomX, IRAtom* vatomY )
2371 {
2372 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2373 // PCast the RM, and widen it to 256 bits
2374 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2375 // Roll it into the result
2376 t1 = mkUifUV256(mce, t1, t2);
2377 return t1;
2378 }
2379
2380 /* --- ... and ... 32Fx8 versions of the same --- */
2381
2382 static
binary32Fx8_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2383 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2384 IRAtom* vatomX, IRAtom* vatomY )
2385 {
2386 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2387 // PCast the RM, and widen it to 256 bits
2388 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2389 // Roll it into the result
2390 t1 = mkUifUV256(mce, t1, t2);
2391 return t1;
2392 }
2393
2394 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2395
2396 static
unary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2397 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2398 {
2399 /* Same scheme as binary64Fx2_w_rm. */
2400 // "do" the vector arg
2401 IRAtom* t1 = unary64Fx2(mce, vatomX);
2402 // PCast the RM, and widen it to 128 bits
2403 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2404 // Roll it into the result
2405 t1 = mkUifUV128(mce, t1, t2);
2406 return t1;
2407 }
2408
2409 /* --- ... and ... 32Fx4 versions of the same --- */
2410
2411 static
unary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2412 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2413 {
2414 /* Same scheme as unary32Fx4_w_rm. */
2415 IRAtom* t1 = unary32Fx4(mce, vatomX);
2416 // PCast the RM, and widen it to 128 bits
2417 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2418 // Roll it into the result
2419 t1 = mkUifUV128(mce, t1, t2);
2420 return t1;
2421 }
2422
2423
2424 /* --- --- Vector saturated narrowing --- --- */
2425
2426 /* We used to do something very clever here, but on closer inspection
2427 (2011-Jun-15), and in particular bug #279698, it turns out to be
2428 wrong. Part of the problem came from the fact that for a long
2429 time, the IR primops to do with saturated narrowing were
2430 underspecified and managed to confuse multiple cases which needed
2431 to be separate: the op names had a signedness qualifier, but in
2432 fact the source and destination signednesses needed to be specified
2433 independently, so the op names really need two independent
2434 signedness specifiers.
2435
2436 As of 2011-Jun-15 (ish) the underspecification was sorted out
2437 properly. The incorrect instrumentation remained, though. That
2438 has now (2011-Oct-22) been fixed.
2439
2440 What we now do is simple:
2441
2442 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2443 number of lanes, X is the source lane width and signedness, and Y
2444 is the destination lane width and signedness. In all cases the
2445 destination lane width is half the source lane width, so the names
2446 have a bit of redundancy, but are at least easy to read.
2447
2448 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2449 to unsigned 16s.
2450
2451 Let Vanilla(OP) be a function that takes OP, one of these
2452 saturating narrowing ops, and produces the same "shaped" narrowing
2453 op which is not saturating, but merely dumps the most significant
2454 bits. "same shape" means that the lane numbers and widths are the
2455 same as with OP.
2456
2457 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2458 = Iop_NarrowBin32to16x8,
2459 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2460 dumping the top half of each lane.
2461
2462 So, with that in place, the scheme is simple, and it is simple to
2463 pessimise each lane individually and then apply Vanilla(OP) so as
2464 to get the result in the right "shape". If the original OP is
2465 QNarrowBinXtoYxZ then we produce
2466
2467 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2468
2469 or for the case when OP is unary (Iop_QNarrowUn*)
2470
2471 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2472 */
2473 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2474 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2475 {
2476 switch (qnarrowOp) {
2477 /* Binary: (128, 128) -> 128 */
2478 case Iop_QNarrowBin16Sto8Ux16:
2479 case Iop_QNarrowBin16Sto8Sx16:
2480 case Iop_QNarrowBin16Uto8Ux16:
2481 case Iop_QNarrowBin64Sto32Sx4:
2482 case Iop_QNarrowBin64Uto32Ux4:
2483 return Iop_NarrowBin16to8x16;
2484 case Iop_QNarrowBin32Sto16Ux8:
2485 case Iop_QNarrowBin32Sto16Sx8:
2486 case Iop_QNarrowBin32Uto16Ux8:
2487 return Iop_NarrowBin32to16x8;
2488 /* Binary: (64, 64) -> 64 */
2489 case Iop_QNarrowBin32Sto16Sx4:
2490 return Iop_NarrowBin32to16x4;
2491 case Iop_QNarrowBin16Sto8Ux8:
2492 case Iop_QNarrowBin16Sto8Sx8:
2493 return Iop_NarrowBin16to8x8;
2494 /* Unary: 128 -> 64 */
2495 case Iop_QNarrowUn64Uto32Ux2:
2496 case Iop_QNarrowUn64Sto32Sx2:
2497 case Iop_QNarrowUn64Sto32Ux2:
2498 return Iop_NarrowUn64to32x2;
2499 case Iop_QNarrowUn32Uto16Ux4:
2500 case Iop_QNarrowUn32Sto16Sx4:
2501 case Iop_QNarrowUn32Sto16Ux4:
2502 return Iop_NarrowUn32to16x4;
2503 case Iop_QNarrowUn16Uto8Ux8:
2504 case Iop_QNarrowUn16Sto8Sx8:
2505 case Iop_QNarrowUn16Sto8Ux8:
2506 return Iop_NarrowUn16to8x8;
2507 default:
2508 ppIROp(qnarrowOp);
2509 VG_(tool_panic)("vanillaNarrowOpOfShape");
2510 }
2511 }
2512
2513 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2514 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2515 IRAtom* vatom1, IRAtom* vatom2)
2516 {
2517 IRAtom *at1, *at2, *at3;
2518 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2519 switch (narrow_op) {
2520 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2521 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2522 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2523 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2524 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2525 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2526 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2527 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2528 default: VG_(tool_panic)("vectorNarrowBinV128");
2529 }
2530 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2531 tl_assert(isShadowAtom(mce,vatom1));
2532 tl_assert(isShadowAtom(mce,vatom2));
2533 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2534 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2535 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2536 return at3;
2537 }
2538
2539 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2540 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2541 IRAtom* vatom1, IRAtom* vatom2)
2542 {
2543 IRAtom *at1, *at2, *at3;
2544 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2545 switch (narrow_op) {
2546 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2547 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2548 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2549 default: VG_(tool_panic)("vectorNarrowBin64");
2550 }
2551 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2552 tl_assert(isShadowAtom(mce,vatom1));
2553 tl_assert(isShadowAtom(mce,vatom2));
2554 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2555 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2556 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2557 return at3;
2558 }
2559
2560 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2561 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2562 IRAtom* vatom1)
2563 {
2564 IRAtom *at1, *at2;
2565 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2566 tl_assert(isShadowAtom(mce,vatom1));
2567 /* For vanilla narrowing (non-saturating), we can just apply
2568 the op directly to the V bits. */
2569 switch (narrow_op) {
2570 case Iop_NarrowUn16to8x8:
2571 case Iop_NarrowUn32to16x4:
2572 case Iop_NarrowUn64to32x2:
2573 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2574 return at1;
2575 default:
2576 break; /* Do Plan B */
2577 }
2578 /* Plan B: for ops that involve a saturation operation on the args,
2579 we must PCast before the vanilla narrow. */
2580 switch (narrow_op) {
2581 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2582 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2583 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2584 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2585 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2586 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2587 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2588 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2589 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2590 default: VG_(tool_panic)("vectorNarrowUnV128");
2591 }
2592 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2593 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2594 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2595 return at2;
2596 }
2597
2598 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2599 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2600 IRAtom* vatom1)
2601 {
2602 IRAtom *at1, *at2;
2603 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2604 switch (longen_op) {
2605 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2606 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2607 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2608 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2609 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2610 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2611 default: VG_(tool_panic)("vectorWidenI64");
2612 }
2613 tl_assert(isShadowAtom(mce,vatom1));
2614 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2615 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2616 return at2;
2617 }
2618
2619
2620 /* --- --- Vector integer arithmetic --- --- */
2621
2622 /* Simple ... UifU the args and per-lane pessimise the results. */
2623
2624 /* --- V256-bit versions --- */
2625
2626 static
binary8Ix32(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2627 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2628 {
2629 IRAtom* at;
2630 at = mkUifUV256(mce, vatom1, vatom2);
2631 at = mkPCast8x32(mce, at);
2632 return at;
2633 }
2634
2635 static
binary16Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2636 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2637 {
2638 IRAtom* at;
2639 at = mkUifUV256(mce, vatom1, vatom2);
2640 at = mkPCast16x16(mce, at);
2641 return at;
2642 }
2643
2644 static
binary32Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2645 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2646 {
2647 IRAtom* at;
2648 at = mkUifUV256(mce, vatom1, vatom2);
2649 at = mkPCast32x8(mce, at);
2650 return at;
2651 }
2652
2653 static
binary64Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2654 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2655 {
2656 IRAtom* at;
2657 at = mkUifUV256(mce, vatom1, vatom2);
2658 at = mkPCast64x4(mce, at);
2659 return at;
2660 }
2661
2662 /* --- V128-bit versions --- */
2663
2664 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2665 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2666 {
2667 IRAtom* at;
2668 at = mkUifUV128(mce, vatom1, vatom2);
2669 at = mkPCast8x16(mce, at);
2670 return at;
2671 }
2672
2673 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2674 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2675 {
2676 IRAtom* at;
2677 at = mkUifUV128(mce, vatom1, vatom2);
2678 at = mkPCast16x8(mce, at);
2679 return at;
2680 }
2681
2682 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2683 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2684 {
2685 IRAtom* at;
2686 at = mkUifUV128(mce, vatom1, vatom2);
2687 at = mkPCast32x4(mce, at);
2688 return at;
2689 }
2690
2691 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2692 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2693 {
2694 IRAtom* at;
2695 at = mkUifUV128(mce, vatom1, vatom2);
2696 at = mkPCast64x2(mce, at);
2697 return at;
2698 }
2699
2700 /* --- 64-bit versions --- */
2701
2702 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2703 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2704 {
2705 IRAtom* at;
2706 at = mkUifU64(mce, vatom1, vatom2);
2707 at = mkPCast8x8(mce, at);
2708 return at;
2709 }
2710
2711 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2712 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2713 {
2714 IRAtom* at;
2715 at = mkUifU64(mce, vatom1, vatom2);
2716 at = mkPCast16x4(mce, at);
2717 return at;
2718 }
2719
2720 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2721 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2722 {
2723 IRAtom* at;
2724 at = mkUifU64(mce, vatom1, vatom2);
2725 at = mkPCast32x2(mce, at);
2726 return at;
2727 }
2728
2729 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2730 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2731 {
2732 IRAtom* at;
2733 at = mkUifU64(mce, vatom1, vatom2);
2734 at = mkPCastTo(mce, Ity_I64, at);
2735 return at;
2736 }
2737
2738 /* --- 32-bit versions --- */
2739
2740 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2741 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2742 {
2743 IRAtom* at;
2744 at = mkUifU32(mce, vatom1, vatom2);
2745 at = mkPCast8x4(mce, at);
2746 return at;
2747 }
2748
2749 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2750 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2751 {
2752 IRAtom* at;
2753 at = mkUifU32(mce, vatom1, vatom2);
2754 at = mkPCast16x2(mce, at);
2755 return at;
2756 }
2757
2758
2759 /*------------------------------------------------------------*/
2760 /*--- Generate shadow values from all kinds of IRExprs. ---*/
2761 /*------------------------------------------------------------*/
2762
2763 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2764 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2765 IROp op,
2766 IRAtom* atom1, IRAtom* atom2,
2767 IRAtom* atom3, IRAtom* atom4 )
2768 {
2769 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2770 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2771 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2772 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2773
2774 tl_assert(isOriginalAtom(mce,atom1));
2775 tl_assert(isOriginalAtom(mce,atom2));
2776 tl_assert(isOriginalAtom(mce,atom3));
2777 tl_assert(isOriginalAtom(mce,atom4));
2778 tl_assert(isShadowAtom(mce,vatom1));
2779 tl_assert(isShadowAtom(mce,vatom2));
2780 tl_assert(isShadowAtom(mce,vatom3));
2781 tl_assert(isShadowAtom(mce,vatom4));
2782 tl_assert(sameKindedAtoms(atom1,vatom1));
2783 tl_assert(sameKindedAtoms(atom2,vatom2));
2784 tl_assert(sameKindedAtoms(atom3,vatom3));
2785 tl_assert(sameKindedAtoms(atom4,vatom4));
2786 switch (op) {
2787 case Iop_MAddF64:
2788 case Iop_MAddF64r32:
2789 case Iop_MSubF64:
2790 case Iop_MSubF64r32:
2791 /* I32(rm) x F64 x F64 x F64 -> F64 */
2792 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2793
2794 case Iop_MAddF32:
2795 case Iop_MSubF32:
2796 /* I32(rm) x F32 x F32 x F32 -> F32 */
2797 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2798
2799 /* V256-bit data-steering */
2800 case Iop_64x4toV256:
2801 return assignNew('V', mce, Ity_V256,
2802 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2803
2804 default:
2805 ppIROp(op);
2806 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2807 }
2808 }
2809
2810
2811 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2812 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2813 IROp op,
2814 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2815 {
2816 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2817 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2818 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2819
2820 tl_assert(isOriginalAtom(mce,atom1));
2821 tl_assert(isOriginalAtom(mce,atom2));
2822 tl_assert(isOriginalAtom(mce,atom3));
2823 tl_assert(isShadowAtom(mce,vatom1));
2824 tl_assert(isShadowAtom(mce,vatom2));
2825 tl_assert(isShadowAtom(mce,vatom3));
2826 tl_assert(sameKindedAtoms(atom1,vatom1));
2827 tl_assert(sameKindedAtoms(atom2,vatom2));
2828 tl_assert(sameKindedAtoms(atom3,vatom3));
2829 switch (op) {
2830 case Iop_AddF128:
2831 case Iop_AddD128:
2832 case Iop_SubF128:
2833 case Iop_SubD128:
2834 case Iop_MulF128:
2835 case Iop_MulD128:
2836 case Iop_DivF128:
2837 case Iop_DivD128:
2838 case Iop_QuantizeD128:
2839 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2840 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2841 case Iop_AddF64:
2842 case Iop_AddD64:
2843 case Iop_AddF64r32:
2844 case Iop_SubF64:
2845 case Iop_SubD64:
2846 case Iop_SubF64r32:
2847 case Iop_MulF64:
2848 case Iop_MulD64:
2849 case Iop_MulF64r32:
2850 case Iop_DivF64:
2851 case Iop_DivD64:
2852 case Iop_DivF64r32:
2853 case Iop_ScaleF64:
2854 case Iop_Yl2xF64:
2855 case Iop_Yl2xp1F64:
2856 case Iop_AtanF64:
2857 case Iop_PRemF64:
2858 case Iop_PRem1F64:
2859 case Iop_QuantizeD64:
2860 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2861 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2862 case Iop_PRemC3210F64:
2863 case Iop_PRem1C3210F64:
2864 /* I32(rm) x F64 x F64 -> I32 */
2865 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2866 case Iop_AddF32:
2867 case Iop_SubF32:
2868 case Iop_MulF32:
2869 case Iop_DivF32:
2870 /* I32(rm) x F32 x F32 -> I32 */
2871 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2872 case Iop_SignificanceRoundD64:
2873 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
2874 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2875 case Iop_SignificanceRoundD128:
2876 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
2877 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2878 case Iop_SliceV128:
2879 /* (V128, V128, I8) -> V128 */
2880 complainIfUndefined(mce, atom3, NULL);
2881 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2882 case Iop_Slice64:
2883 /* (I64, I64, I8) -> I64 */
2884 complainIfUndefined(mce, atom3, NULL);
2885 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2886 case Iop_SetElem8x8:
2887 case Iop_SetElem16x4:
2888 case Iop_SetElem32x2:
2889 complainIfUndefined(mce, atom2, NULL);
2890 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2891 /* BCDIops */
2892 case Iop_BCDAdd:
2893 case Iop_BCDSub:
2894 complainIfUndefined(mce, atom3, NULL);
2895 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2896
2897 /* Vector FP with rounding mode as the first arg */
2898 case Iop_Add64Fx2:
2899 case Iop_Sub64Fx2:
2900 case Iop_Mul64Fx2:
2901 case Iop_Div64Fx2:
2902 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2903
2904 case Iop_Add32Fx4:
2905 case Iop_Sub32Fx4:
2906 case Iop_Mul32Fx4:
2907 case Iop_Div32Fx4:
2908 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2909
2910 case Iop_Add64Fx4:
2911 case Iop_Sub64Fx4:
2912 case Iop_Mul64Fx4:
2913 case Iop_Div64Fx4:
2914 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2915
2916 case Iop_Add32Fx8:
2917 case Iop_Sub32Fx8:
2918 case Iop_Mul32Fx8:
2919 case Iop_Div32Fx8:
2920 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2921
2922 default:
2923 ppIROp(op);
2924 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2925 }
2926 }
2927
2928
2929 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2930 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2931 IROp op,
2932 IRAtom* atom1, IRAtom* atom2 )
2933 {
2934 IRType and_or_ty;
2935 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2936 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2937 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2938
2939 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2940 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2941
2942 tl_assert(isOriginalAtom(mce,atom1));
2943 tl_assert(isOriginalAtom(mce,atom2));
2944 tl_assert(isShadowAtom(mce,vatom1));
2945 tl_assert(isShadowAtom(mce,vatom2));
2946 tl_assert(sameKindedAtoms(atom1,vatom1));
2947 tl_assert(sameKindedAtoms(atom2,vatom2));
2948 switch (op) {
2949
2950 /* 32-bit SIMD */
2951
2952 case Iop_Add16x2:
2953 case Iop_HAdd16Ux2:
2954 case Iop_HAdd16Sx2:
2955 case Iop_Sub16x2:
2956 case Iop_HSub16Ux2:
2957 case Iop_HSub16Sx2:
2958 case Iop_QAdd16Sx2:
2959 case Iop_QSub16Sx2:
2960 case Iop_QSub16Ux2:
2961 case Iop_QAdd16Ux2:
2962 return binary16Ix2(mce, vatom1, vatom2);
2963
2964 case Iop_Add8x4:
2965 case Iop_HAdd8Ux4:
2966 case Iop_HAdd8Sx4:
2967 case Iop_Sub8x4:
2968 case Iop_HSub8Ux4:
2969 case Iop_HSub8Sx4:
2970 case Iop_QSub8Ux4:
2971 case Iop_QAdd8Ux4:
2972 case Iop_QSub8Sx4:
2973 case Iop_QAdd8Sx4:
2974 return binary8Ix4(mce, vatom1, vatom2);
2975
2976 /* 64-bit SIMD */
2977
2978 case Iop_ShrN8x8:
2979 case Iop_ShrN16x4:
2980 case Iop_ShrN32x2:
2981 case Iop_SarN8x8:
2982 case Iop_SarN16x4:
2983 case Iop_SarN32x2:
2984 case Iop_ShlN16x4:
2985 case Iop_ShlN32x2:
2986 case Iop_ShlN8x8:
2987 /* Same scheme as with all other shifts. */
2988 complainIfUndefined(mce, atom2, NULL);
2989 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2990
2991 case Iop_QNarrowBin32Sto16Sx4:
2992 case Iop_QNarrowBin16Sto8Sx8:
2993 case Iop_QNarrowBin16Sto8Ux8:
2994 return vectorNarrowBin64(mce, op, vatom1, vatom2);
2995
2996 case Iop_Min8Ux8:
2997 case Iop_Min8Sx8:
2998 case Iop_Max8Ux8:
2999 case Iop_Max8Sx8:
3000 case Iop_Avg8Ux8:
3001 case Iop_QSub8Sx8:
3002 case Iop_QSub8Ux8:
3003 case Iop_Sub8x8:
3004 case Iop_CmpGT8Sx8:
3005 case Iop_CmpGT8Ux8:
3006 case Iop_CmpEQ8x8:
3007 case Iop_QAdd8Sx8:
3008 case Iop_QAdd8Ux8:
3009 case Iop_QSal8x8:
3010 case Iop_QShl8x8:
3011 case Iop_Add8x8:
3012 case Iop_Mul8x8:
3013 case Iop_PolynomialMul8x8:
3014 return binary8Ix8(mce, vatom1, vatom2);
3015
3016 case Iop_Min16Sx4:
3017 case Iop_Min16Ux4:
3018 case Iop_Max16Sx4:
3019 case Iop_Max16Ux4:
3020 case Iop_Avg16Ux4:
3021 case Iop_QSub16Ux4:
3022 case Iop_QSub16Sx4:
3023 case Iop_Sub16x4:
3024 case Iop_Mul16x4:
3025 case Iop_MulHi16Sx4:
3026 case Iop_MulHi16Ux4:
3027 case Iop_CmpGT16Sx4:
3028 case Iop_CmpGT16Ux4:
3029 case Iop_CmpEQ16x4:
3030 case Iop_QAdd16Sx4:
3031 case Iop_QAdd16Ux4:
3032 case Iop_QSal16x4:
3033 case Iop_QShl16x4:
3034 case Iop_Add16x4:
3035 case Iop_QDMulHi16Sx4:
3036 case Iop_QRDMulHi16Sx4:
3037 return binary16Ix4(mce, vatom1, vatom2);
3038
3039 case Iop_Sub32x2:
3040 case Iop_Mul32x2:
3041 case Iop_Max32Sx2:
3042 case Iop_Max32Ux2:
3043 case Iop_Min32Sx2:
3044 case Iop_Min32Ux2:
3045 case Iop_CmpGT32Sx2:
3046 case Iop_CmpGT32Ux2:
3047 case Iop_CmpEQ32x2:
3048 case Iop_Add32x2:
3049 case Iop_QAdd32Ux2:
3050 case Iop_QAdd32Sx2:
3051 case Iop_QSub32Ux2:
3052 case Iop_QSub32Sx2:
3053 case Iop_QSal32x2:
3054 case Iop_QShl32x2:
3055 case Iop_QDMulHi32Sx2:
3056 case Iop_QRDMulHi32Sx2:
3057 return binary32Ix2(mce, vatom1, vatom2);
3058
3059 case Iop_QSub64Ux1:
3060 case Iop_QSub64Sx1:
3061 case Iop_QAdd64Ux1:
3062 case Iop_QAdd64Sx1:
3063 case Iop_QSal64x1:
3064 case Iop_QShl64x1:
3065 case Iop_Sal64x1:
3066 return binary64Ix1(mce, vatom1, vatom2);
3067
3068 case Iop_QShlNsatSU8x8:
3069 case Iop_QShlNsatUU8x8:
3070 case Iop_QShlNsatSS8x8:
3071 complainIfUndefined(mce, atom2, NULL);
3072 return mkPCast8x8(mce, vatom1);
3073
3074 case Iop_QShlNsatSU16x4:
3075 case Iop_QShlNsatUU16x4:
3076 case Iop_QShlNsatSS16x4:
3077 complainIfUndefined(mce, atom2, NULL);
3078 return mkPCast16x4(mce, vatom1);
3079
3080 case Iop_QShlNsatSU32x2:
3081 case Iop_QShlNsatUU32x2:
3082 case Iop_QShlNsatSS32x2:
3083 complainIfUndefined(mce, atom2, NULL);
3084 return mkPCast32x2(mce, vatom1);
3085
3086 case Iop_QShlNsatSU64x1:
3087 case Iop_QShlNsatUU64x1:
3088 case Iop_QShlNsatSS64x1:
3089 complainIfUndefined(mce, atom2, NULL);
3090 return mkPCast32x2(mce, vatom1);
3091
3092 case Iop_PwMax32Sx2:
3093 case Iop_PwMax32Ux2:
3094 case Iop_PwMin32Sx2:
3095 case Iop_PwMin32Ux2:
3096 case Iop_PwMax32Fx2:
3097 case Iop_PwMin32Fx2:
3098 return assignNew('V', mce, Ity_I64,
3099 binop(Iop_PwMax32Ux2,
3100 mkPCast32x2(mce, vatom1),
3101 mkPCast32x2(mce, vatom2)));
3102
3103 case Iop_PwMax16Sx4:
3104 case Iop_PwMax16Ux4:
3105 case Iop_PwMin16Sx4:
3106 case Iop_PwMin16Ux4:
3107 return assignNew('V', mce, Ity_I64,
3108 binop(Iop_PwMax16Ux4,
3109 mkPCast16x4(mce, vatom1),
3110 mkPCast16x4(mce, vatom2)));
3111
3112 case Iop_PwMax8Sx8:
3113 case Iop_PwMax8Ux8:
3114 case Iop_PwMin8Sx8:
3115 case Iop_PwMin8Ux8:
3116 return assignNew('V', mce, Ity_I64,
3117 binop(Iop_PwMax8Ux8,
3118 mkPCast8x8(mce, vatom1),
3119 mkPCast8x8(mce, vatom2)));
3120
3121 case Iop_PwAdd32x2:
3122 case Iop_PwAdd32Fx2:
3123 return mkPCast32x2(mce,
3124 assignNew('V', mce, Ity_I64,
3125 binop(Iop_PwAdd32x2,
3126 mkPCast32x2(mce, vatom1),
3127 mkPCast32x2(mce, vatom2))));
3128
3129 case Iop_PwAdd16x4:
3130 return mkPCast16x4(mce,
3131 assignNew('V', mce, Ity_I64,
3132 binop(op, mkPCast16x4(mce, vatom1),
3133 mkPCast16x4(mce, vatom2))));
3134
3135 case Iop_PwAdd8x8:
3136 return mkPCast8x8(mce,
3137 assignNew('V', mce, Ity_I64,
3138 binop(op, mkPCast8x8(mce, vatom1),
3139 mkPCast8x8(mce, vatom2))));
3140
3141 case Iop_Shl8x8:
3142 case Iop_Shr8x8:
3143 case Iop_Sar8x8:
3144 case Iop_Sal8x8:
3145 return mkUifU64(mce,
3146 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3147 mkPCast8x8(mce,vatom2)
3148 );
3149
3150 case Iop_Shl16x4:
3151 case Iop_Shr16x4:
3152 case Iop_Sar16x4:
3153 case Iop_Sal16x4:
3154 return mkUifU64(mce,
3155 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3156 mkPCast16x4(mce,vatom2)
3157 );
3158
3159 case Iop_Shl32x2:
3160 case Iop_Shr32x2:
3161 case Iop_Sar32x2:
3162 case Iop_Sal32x2:
3163 return mkUifU64(mce,
3164 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3165 mkPCast32x2(mce,vatom2)
3166 );
3167
3168 /* 64-bit data-steering */
3169 case Iop_InterleaveLO32x2:
3170 case Iop_InterleaveLO16x4:
3171 case Iop_InterleaveLO8x8:
3172 case Iop_InterleaveHI32x2:
3173 case Iop_InterleaveHI16x4:
3174 case Iop_InterleaveHI8x8:
3175 case Iop_CatOddLanes8x8:
3176 case Iop_CatEvenLanes8x8:
3177 case Iop_CatOddLanes16x4:
3178 case Iop_CatEvenLanes16x4:
3179 case Iop_InterleaveOddLanes8x8:
3180 case Iop_InterleaveEvenLanes8x8:
3181 case Iop_InterleaveOddLanes16x4:
3182 case Iop_InterleaveEvenLanes16x4:
3183 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3184
3185 case Iop_GetElem8x8:
3186 complainIfUndefined(mce, atom2, NULL);
3187 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3188 case Iop_GetElem16x4:
3189 complainIfUndefined(mce, atom2, NULL);
3190 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3191 case Iop_GetElem32x2:
3192 complainIfUndefined(mce, atom2, NULL);
3193 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3194
3195 /* Perm8x8: rearrange values in left arg using steering values
3196 from right arg. So rearrange the vbits in the same way but
3197 pessimise wrt steering values. */
3198 case Iop_Perm8x8:
3199 return mkUifU64(
3200 mce,
3201 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3202 mkPCast8x8(mce, vatom2)
3203 );
3204
3205 /* V128-bit SIMD */
3206
3207 case Iop_Sqrt32Fx4:
3208 return unary32Fx4_w_rm(mce, vatom1, vatom2);
3209 case Iop_Sqrt64Fx2:
3210 return unary64Fx2_w_rm(mce, vatom1, vatom2);
3211
3212 case Iop_ShrN8x16:
3213 case Iop_ShrN16x8:
3214 case Iop_ShrN32x4:
3215 case Iop_ShrN64x2:
3216 case Iop_SarN8x16:
3217 case Iop_SarN16x8:
3218 case Iop_SarN32x4:
3219 case Iop_SarN64x2:
3220 case Iop_ShlN8x16:
3221 case Iop_ShlN16x8:
3222 case Iop_ShlN32x4:
3223 case Iop_ShlN64x2:
3224 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3225 this is wrong now, scalar shifts are done properly lazily.
3226 Vector shifts should be fixed too. */
3227 complainIfUndefined(mce, atom2, NULL);
3228 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3229
3230 /* V x V shifts/rotates are done using the standard lazy scheme. */
3231 /* For the non-rounding variants of bi-di vector x vector
3232 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3233 But note that this is overly pessimistic, because in fact only
3234 the bottom 8 bits of each lane of the second argument are taken
3235 into account when shifting. So really we ought to ignore
3236 undefinedness in bits 8 and above of each lane in the
3237 second argument. */
3238 case Iop_Shl8x16:
3239 case Iop_Shr8x16:
3240 case Iop_Sar8x16:
3241 case Iop_Sal8x16:
3242 case Iop_Rol8x16:
3243 case Iop_Sh8Sx16:
3244 case Iop_Sh8Ux16:
3245 return mkUifUV128(mce,
3246 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3247 mkPCast8x16(mce,vatom2)
3248 );
3249
3250 case Iop_Shl16x8:
3251 case Iop_Shr16x8:
3252 case Iop_Sar16x8:
3253 case Iop_Sal16x8:
3254 case Iop_Rol16x8:
3255 case Iop_Sh16Sx8:
3256 case Iop_Sh16Ux8:
3257 return mkUifUV128(mce,
3258 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3259 mkPCast16x8(mce,vatom2)
3260 );
3261
3262 case Iop_Shl32x4:
3263 case Iop_Shr32x4:
3264 case Iop_Sar32x4:
3265 case Iop_Sal32x4:
3266 case Iop_Rol32x4:
3267 case Iop_Sh32Sx4:
3268 case Iop_Sh32Ux4:
3269 return mkUifUV128(mce,
3270 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3271 mkPCast32x4(mce,vatom2)
3272 );
3273
3274 case Iop_Shl64x2:
3275 case Iop_Shr64x2:
3276 case Iop_Sar64x2:
3277 case Iop_Sal64x2:
3278 case Iop_Rol64x2:
3279 case Iop_Sh64Sx2:
3280 case Iop_Sh64Ux2:
3281 return mkUifUV128(mce,
3282 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3283 mkPCast64x2(mce,vatom2)
3284 );
3285
3286 /* For the rounding variants of bi-di vector x vector shifts, the
3287 rounding adjustment can cause undefinedness to propagate through
3288 the entire lane, in the worst case. Too complex to handle
3289 properly .. just UifU the arguments and then PCast them.
3290 Suboptimal but safe. */
3291 case Iop_Rsh8Sx16:
3292 case Iop_Rsh8Ux16:
3293 return binary8Ix16(mce, vatom1, vatom2);
3294 case Iop_Rsh16Sx8:
3295 case Iop_Rsh16Ux8:
3296 return binary16Ix8(mce, vatom1, vatom2);
3297 case Iop_Rsh32Sx4:
3298 case Iop_Rsh32Ux4:
3299 return binary32Ix4(mce, vatom1, vatom2);
3300 case Iop_Rsh64Sx2:
3301 case Iop_Rsh64Ux2:
3302 return binary64Ix2(mce, vatom1, vatom2);
3303
3304 case Iop_F32ToFixed32Ux4_RZ:
3305 case Iop_F32ToFixed32Sx4_RZ:
3306 case Iop_Fixed32UToF32x4_RN:
3307 case Iop_Fixed32SToF32x4_RN:
3308 complainIfUndefined(mce, atom2, NULL);
3309 return mkPCast32x4(mce, vatom1);
3310
3311 case Iop_F32ToFixed32Ux2_RZ:
3312 case Iop_F32ToFixed32Sx2_RZ:
3313 case Iop_Fixed32UToF32x2_RN:
3314 case Iop_Fixed32SToF32x2_RN:
3315 complainIfUndefined(mce, atom2, NULL);
3316 return mkPCast32x2(mce, vatom1);
3317
3318 case Iop_QSub8Ux16:
3319 case Iop_QSub8Sx16:
3320 case Iop_Sub8x16:
3321 case Iop_Min8Ux16:
3322 case Iop_Min8Sx16:
3323 case Iop_Max8Ux16:
3324 case Iop_Max8Sx16:
3325 case Iop_CmpGT8Sx16:
3326 case Iop_CmpGT8Ux16:
3327 case Iop_CmpEQ8x16:
3328 case Iop_Avg8Ux16:
3329 case Iop_Avg8Sx16:
3330 case Iop_QAdd8Ux16:
3331 case Iop_QAdd8Sx16:
3332 case Iop_QAddExtUSsatSS8x16:
3333 case Iop_QAddExtSUsatUU8x16:
3334 case Iop_QSal8x16:
3335 case Iop_QShl8x16:
3336 case Iop_Add8x16:
3337 case Iop_Mul8x16:
3338 case Iop_PolynomialMul8x16:
3339 case Iop_PolynomialMulAdd8x16:
3340 return binary8Ix16(mce, vatom1, vatom2);
3341
3342 case Iop_QSub16Ux8:
3343 case Iop_QSub16Sx8:
3344 case Iop_Sub16x8:
3345 case Iop_Mul16x8:
3346 case Iop_MulHi16Sx8:
3347 case Iop_MulHi16Ux8:
3348 case Iop_Min16Sx8:
3349 case Iop_Min16Ux8:
3350 case Iop_Max16Sx8:
3351 case Iop_Max16Ux8:
3352 case Iop_CmpGT16Sx8:
3353 case Iop_CmpGT16Ux8:
3354 case Iop_CmpEQ16x8:
3355 case Iop_Avg16Ux8:
3356 case Iop_Avg16Sx8:
3357 case Iop_QAdd16Ux8:
3358 case Iop_QAdd16Sx8:
3359 case Iop_QAddExtUSsatSS16x8:
3360 case Iop_QAddExtSUsatUU16x8:
3361 case Iop_QSal16x8:
3362 case Iop_QShl16x8:
3363 case Iop_Add16x8:
3364 case Iop_QDMulHi16Sx8:
3365 case Iop_QRDMulHi16Sx8:
3366 case Iop_PolynomialMulAdd16x8:
3367 return binary16Ix8(mce, vatom1, vatom2);
3368
3369 case Iop_Sub32x4:
3370 case Iop_CmpGT32Sx4:
3371 case Iop_CmpGT32Ux4:
3372 case Iop_CmpEQ32x4:
3373 case Iop_QAdd32Sx4:
3374 case Iop_QAdd32Ux4:
3375 case Iop_QSub32Sx4:
3376 case Iop_QSub32Ux4:
3377 case Iop_QAddExtUSsatSS32x4:
3378 case Iop_QAddExtSUsatUU32x4:
3379 case Iop_QSal32x4:
3380 case Iop_QShl32x4:
3381 case Iop_Avg32Ux4:
3382 case Iop_Avg32Sx4:
3383 case Iop_Add32x4:
3384 case Iop_Max32Ux4:
3385 case Iop_Max32Sx4:
3386 case Iop_Min32Ux4:
3387 case Iop_Min32Sx4:
3388 case Iop_Mul32x4:
3389 case Iop_QDMulHi32Sx4:
3390 case Iop_QRDMulHi32Sx4:
3391 case Iop_PolynomialMulAdd32x4:
3392 return binary32Ix4(mce, vatom1, vatom2);
3393
3394 case Iop_Sub64x2:
3395 case Iop_Add64x2:
3396 case Iop_Max64Sx2:
3397 case Iop_Max64Ux2:
3398 case Iop_Min64Sx2:
3399 case Iop_Min64Ux2:
3400 case Iop_CmpEQ64x2:
3401 case Iop_CmpGT64Sx2:
3402 case Iop_CmpGT64Ux2:
3403 case Iop_QSal64x2:
3404 case Iop_QShl64x2:
3405 case Iop_QAdd64Ux2:
3406 case Iop_QAdd64Sx2:
3407 case Iop_QSub64Ux2:
3408 case Iop_QSub64Sx2:
3409 case Iop_QAddExtUSsatSS64x2:
3410 case Iop_QAddExtSUsatUU64x2:
3411 case Iop_PolynomialMulAdd64x2:
3412 case Iop_CipherV128:
3413 case Iop_CipherLV128:
3414 case Iop_NCipherV128:
3415 case Iop_NCipherLV128:
3416 return binary64Ix2(mce, vatom1, vatom2);
3417
3418 case Iop_QNarrowBin64Sto32Sx4:
3419 case Iop_QNarrowBin64Uto32Ux4:
3420 case Iop_QNarrowBin32Sto16Sx8:
3421 case Iop_QNarrowBin32Uto16Ux8:
3422 case Iop_QNarrowBin32Sto16Ux8:
3423 case Iop_QNarrowBin16Sto8Sx16:
3424 case Iop_QNarrowBin16Uto8Ux16:
3425 case Iop_QNarrowBin16Sto8Ux16:
3426 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3427
3428 case Iop_Min64Fx2:
3429 case Iop_Max64Fx2:
3430 case Iop_CmpLT64Fx2:
3431 case Iop_CmpLE64Fx2:
3432 case Iop_CmpEQ64Fx2:
3433 case Iop_CmpUN64Fx2:
3434 case Iop_RecipStep64Fx2:
3435 case Iop_RSqrtStep64Fx2:
3436 return binary64Fx2(mce, vatom1, vatom2);
3437
3438 case Iop_Sub64F0x2:
3439 case Iop_Mul64F0x2:
3440 case Iop_Min64F0x2:
3441 case Iop_Max64F0x2:
3442 case Iop_Div64F0x2:
3443 case Iop_CmpLT64F0x2:
3444 case Iop_CmpLE64F0x2:
3445 case Iop_CmpEQ64F0x2:
3446 case Iop_CmpUN64F0x2:
3447 case Iop_Add64F0x2:
3448 return binary64F0x2(mce, vatom1, vatom2);
3449
3450 case Iop_Min32Fx4:
3451 case Iop_Max32Fx4:
3452 case Iop_CmpLT32Fx4:
3453 case Iop_CmpLE32Fx4:
3454 case Iop_CmpEQ32Fx4:
3455 case Iop_CmpUN32Fx4:
3456 case Iop_CmpGT32Fx4:
3457 case Iop_CmpGE32Fx4:
3458 case Iop_RecipStep32Fx4:
3459 case Iop_RSqrtStep32Fx4:
3460 return binary32Fx4(mce, vatom1, vatom2);
3461
3462 case Iop_Sub32Fx2:
3463 case Iop_Mul32Fx2:
3464 case Iop_Min32Fx2:
3465 case Iop_Max32Fx2:
3466 case Iop_CmpEQ32Fx2:
3467 case Iop_CmpGT32Fx2:
3468 case Iop_CmpGE32Fx2:
3469 case Iop_Add32Fx2:
3470 case Iop_RecipStep32Fx2:
3471 case Iop_RSqrtStep32Fx2:
3472 return binary32Fx2(mce, vatom1, vatom2);
3473
3474 case Iop_Sub32F0x4:
3475 case Iop_Mul32F0x4:
3476 case Iop_Min32F0x4:
3477 case Iop_Max32F0x4:
3478 case Iop_Div32F0x4:
3479 case Iop_CmpLT32F0x4:
3480 case Iop_CmpLE32F0x4:
3481 case Iop_CmpEQ32F0x4:
3482 case Iop_CmpUN32F0x4:
3483 case Iop_Add32F0x4:
3484 return binary32F0x4(mce, vatom1, vatom2);
3485
3486 case Iop_QShlNsatSU8x16:
3487 case Iop_QShlNsatUU8x16:
3488 case Iop_QShlNsatSS8x16:
3489 complainIfUndefined(mce, atom2, NULL);
3490 return mkPCast8x16(mce, vatom1);
3491
3492 case Iop_QShlNsatSU16x8:
3493 case Iop_QShlNsatUU16x8:
3494 case Iop_QShlNsatSS16x8:
3495 complainIfUndefined(mce, atom2, NULL);
3496 return mkPCast16x8(mce, vatom1);
3497
3498 case Iop_QShlNsatSU32x4:
3499 case Iop_QShlNsatUU32x4:
3500 case Iop_QShlNsatSS32x4:
3501 complainIfUndefined(mce, atom2, NULL);
3502 return mkPCast32x4(mce, vatom1);
3503
3504 case Iop_QShlNsatSU64x2:
3505 case Iop_QShlNsatUU64x2:
3506 case Iop_QShlNsatSS64x2:
3507 complainIfUndefined(mce, atom2, NULL);
3508 return mkPCast32x4(mce, vatom1);
3509
3510 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3511 To make this simpler, do the following:
3512 * complain if the shift amount (the I8) is undefined
3513 * pcast each lane at the wide width
3514 * truncate each lane to half width
3515 * pcast the resulting 64-bit value to a single bit and use
3516 that as the least significant bit of the upper half of the
3517 result. */
3518 case Iop_QandQShrNnarrow64Uto32Ux2:
3519 case Iop_QandQSarNnarrow64Sto32Sx2:
3520 case Iop_QandQSarNnarrow64Sto32Ux2:
3521 case Iop_QandQRShrNnarrow64Uto32Ux2:
3522 case Iop_QandQRSarNnarrow64Sto32Sx2:
3523 case Iop_QandQRSarNnarrow64Sto32Ux2:
3524 case Iop_QandQShrNnarrow32Uto16Ux4:
3525 case Iop_QandQSarNnarrow32Sto16Sx4:
3526 case Iop_QandQSarNnarrow32Sto16Ux4:
3527 case Iop_QandQRShrNnarrow32Uto16Ux4:
3528 case Iop_QandQRSarNnarrow32Sto16Sx4:
3529 case Iop_QandQRSarNnarrow32Sto16Ux4:
3530 case Iop_QandQShrNnarrow16Uto8Ux8:
3531 case Iop_QandQSarNnarrow16Sto8Sx8:
3532 case Iop_QandQSarNnarrow16Sto8Ux8:
3533 case Iop_QandQRShrNnarrow16Uto8Ux8:
3534 case Iop_QandQRSarNnarrow16Sto8Sx8:
3535 case Iop_QandQRSarNnarrow16Sto8Ux8:
3536 {
3537 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3538 IROp opNarrow = Iop_INVALID;
3539 switch (op) {
3540 case Iop_QandQShrNnarrow64Uto32Ux2:
3541 case Iop_QandQSarNnarrow64Sto32Sx2:
3542 case Iop_QandQSarNnarrow64Sto32Ux2:
3543 case Iop_QandQRShrNnarrow64Uto32Ux2:
3544 case Iop_QandQRSarNnarrow64Sto32Sx2:
3545 case Iop_QandQRSarNnarrow64Sto32Ux2:
3546 fnPessim = mkPCast64x2;
3547 opNarrow = Iop_NarrowUn64to32x2;
3548 break;
3549 case Iop_QandQShrNnarrow32Uto16Ux4:
3550 case Iop_QandQSarNnarrow32Sto16Sx4:
3551 case Iop_QandQSarNnarrow32Sto16Ux4:
3552 case Iop_QandQRShrNnarrow32Uto16Ux4:
3553 case Iop_QandQRSarNnarrow32Sto16Sx4:
3554 case Iop_QandQRSarNnarrow32Sto16Ux4:
3555 fnPessim = mkPCast32x4;
3556 opNarrow = Iop_NarrowUn32to16x4;
3557 break;
3558 case Iop_QandQShrNnarrow16Uto8Ux8:
3559 case Iop_QandQSarNnarrow16Sto8Sx8:
3560 case Iop_QandQSarNnarrow16Sto8Ux8:
3561 case Iop_QandQRShrNnarrow16Uto8Ux8:
3562 case Iop_QandQRSarNnarrow16Sto8Sx8:
3563 case Iop_QandQRSarNnarrow16Sto8Ux8:
3564 fnPessim = mkPCast16x8;
3565 opNarrow = Iop_NarrowUn16to8x8;
3566 break;
3567 default:
3568 tl_assert(0);
3569 }
3570 complainIfUndefined(mce, atom2, NULL);
3571 // Pessimised shift result
3572 IRAtom* shV
3573 = fnPessim(mce, vatom1);
3574 // Narrowed, pessimised shift result
3575 IRAtom* shVnarrowed
3576 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3577 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3578 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3579 // and assemble the result
3580 return assignNew('V', mce, Ity_V128,
3581 binop(Iop_64HLtoV128, qV, shVnarrowed));
3582 }
3583
3584 case Iop_Mull32Sx2:
3585 case Iop_Mull32Ux2:
3586 case Iop_QDMull32Sx2:
3587 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3588 mkUifU64(mce, vatom1, vatom2));
3589
3590 case Iop_Mull16Sx4:
3591 case Iop_Mull16Ux4:
3592 case Iop_QDMull16Sx4:
3593 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3594 mkUifU64(mce, vatom1, vatom2));
3595
3596 case Iop_Mull8Sx8:
3597 case Iop_Mull8Ux8:
3598 case Iop_PolynomialMull8x8:
3599 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3600 mkUifU64(mce, vatom1, vatom2));
3601
3602 case Iop_PwAdd32x4:
3603 return mkPCast32x4(mce,
3604 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3605 mkPCast32x4(mce, vatom2))));
3606
3607 case Iop_PwAdd16x8:
3608 return mkPCast16x8(mce,
3609 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3610 mkPCast16x8(mce, vatom2))));
3611
3612 case Iop_PwAdd8x16:
3613 return mkPCast8x16(mce,
3614 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3615 mkPCast8x16(mce, vatom2))));
3616
3617 /* V128-bit data-steering */
3618 case Iop_SetV128lo32:
3619 case Iop_SetV128lo64:
3620 case Iop_64HLtoV128:
3621 case Iop_InterleaveLO64x2:
3622 case Iop_InterleaveLO32x4:
3623 case Iop_InterleaveLO16x8:
3624 case Iop_InterleaveLO8x16:
3625 case Iop_InterleaveHI64x2:
3626 case Iop_InterleaveHI32x4:
3627 case Iop_InterleaveHI16x8:
3628 case Iop_InterleaveHI8x16:
3629 case Iop_CatOddLanes8x16:
3630 case Iop_CatOddLanes16x8:
3631 case Iop_CatOddLanes32x4:
3632 case Iop_CatEvenLanes8x16:
3633 case Iop_CatEvenLanes16x8:
3634 case Iop_CatEvenLanes32x4:
3635 case Iop_InterleaveOddLanes8x16:
3636 case Iop_InterleaveOddLanes16x8:
3637 case Iop_InterleaveOddLanes32x4:
3638 case Iop_InterleaveEvenLanes8x16:
3639 case Iop_InterleaveEvenLanes16x8:
3640 case Iop_InterleaveEvenLanes32x4:
3641 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3642
3643 case Iop_GetElem8x16:
3644 complainIfUndefined(mce, atom2, NULL);
3645 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3646 case Iop_GetElem16x8:
3647 complainIfUndefined(mce, atom2, NULL);
3648 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3649 case Iop_GetElem32x4:
3650 complainIfUndefined(mce, atom2, NULL);
3651 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3652 case Iop_GetElem64x2:
3653 complainIfUndefined(mce, atom2, NULL);
3654 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3655
3656 /* Perm8x16: rearrange values in left arg using steering values
3657 from right arg. So rearrange the vbits in the same way but
3658 pessimise wrt steering values. Perm32x4 ditto. */
3659 case Iop_Perm8x16:
3660 return mkUifUV128(
3661 mce,
3662 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3663 mkPCast8x16(mce, vatom2)
3664 );
3665 case Iop_Perm32x4:
3666 return mkUifUV128(
3667 mce,
3668 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3669 mkPCast32x4(mce, vatom2)
3670 );
3671
3672 /* These two take the lower half of each 16-bit lane, sign/zero
3673 extend it to 32, and multiply together, producing a 32x4
3674 result (and implicitly ignoring half the operand bits). So
3675 treat it as a bunch of independent 16x8 operations, but then
3676 do 32-bit shifts left-right to copy the lower half results
3677 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3678 into the upper half of each result lane. */
3679 case Iop_MullEven16Ux8:
3680 case Iop_MullEven16Sx8: {
3681 IRAtom* at;
3682 at = binary16Ix8(mce,vatom1,vatom2);
3683 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3684 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3685 return at;
3686 }
3687
3688 /* Same deal as Iop_MullEven16{S,U}x8 */
3689 case Iop_MullEven8Ux16:
3690 case Iop_MullEven8Sx16: {
3691 IRAtom* at;
3692 at = binary8Ix16(mce,vatom1,vatom2);
3693 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3694 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3695 return at;
3696 }
3697
3698 /* Same deal as Iop_MullEven16{S,U}x8 */
3699 case Iop_MullEven32Ux4:
3700 case Iop_MullEven32Sx4: {
3701 IRAtom* at;
3702 at = binary32Ix4(mce,vatom1,vatom2);
3703 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3704 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3705 return at;
3706 }
3707
3708 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3709 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3710 Simply apply same op to the V bits, since this really no more
3711 than a data steering operation. */
3712 case Iop_NarrowBin32to16x8:
3713 case Iop_NarrowBin16to8x16:
3714 case Iop_NarrowBin64to32x4:
3715 return assignNew('V', mce, Ity_V128,
3716 binop(op, vatom1, vatom2));
3717
3718 case Iop_ShrV128:
3719 case Iop_ShlV128:
3720 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3721 this is wrong now, scalar shifts are done properly lazily.
3722 Vector shifts should be fixed too. */
3723 complainIfUndefined(mce, atom2, NULL);
3724 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3725
3726 /* SHA Iops */
3727 case Iop_SHA256:
3728 case Iop_SHA512:
3729 complainIfUndefined(mce, atom2, NULL);
3730 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3731
3732 /* I128-bit data-steering */
3733 case Iop_64HLto128:
3734 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3735
3736 /* V256-bit SIMD */
3737
3738 case Iop_Max64Fx4:
3739 case Iop_Min64Fx4:
3740 return binary64Fx4(mce, vatom1, vatom2);
3741
3742 case Iop_Max32Fx8:
3743 case Iop_Min32Fx8:
3744 return binary32Fx8(mce, vatom1, vatom2);
3745
3746 /* V256-bit data-steering */
3747 case Iop_V128HLtoV256:
3748 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3749
3750 /* Scalar floating point */
3751
3752 case Iop_F32toI64S:
3753 case Iop_F32toI64U:
3754 /* I32(rm) x F32 -> I64 */
3755 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3756
3757 case Iop_I64StoF32:
3758 /* I32(rm) x I64 -> F32 */
3759 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3760
3761 case Iop_RoundF64toInt:
3762 case Iop_RoundF64toF32:
3763 case Iop_F64toI64S:
3764 case Iop_F64toI64U:
3765 case Iop_I64StoF64:
3766 case Iop_I64UtoF64:
3767 case Iop_SinF64:
3768 case Iop_CosF64:
3769 case Iop_TanF64:
3770 case Iop_2xm1F64:
3771 case Iop_SqrtF64:
3772 case Iop_RecpExpF64:
3773 /* I32(rm) x I64/F64 -> I64/F64 */
3774 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3775
3776 case Iop_ShlD64:
3777 case Iop_ShrD64:
3778 case Iop_RoundD64toInt:
3779 /* I32(rm) x D64 -> D64 */
3780 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3781
3782 case Iop_ShlD128:
3783 case Iop_ShrD128:
3784 case Iop_RoundD128toInt:
3785 /* I32(rm) x D128 -> D128 */
3786 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3787
3788 case Iop_RoundF128toInt:
3789 /* I32(rm) x F128 -> F128 */
3790 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3791
3792 case Iop_D64toI64S:
3793 case Iop_D64toI64U:
3794 case Iop_I64StoD64:
3795 case Iop_I64UtoD64:
3796 /* I32(rm) x I64/D64 -> D64/I64 */
3797 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3798
3799 case Iop_F32toD32:
3800 case Iop_F64toD32:
3801 case Iop_F128toD32:
3802 case Iop_D32toF32:
3803 case Iop_D64toF32:
3804 case Iop_D128toF32:
3805 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3806 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3807
3808 case Iop_F32toD64:
3809 case Iop_F64toD64:
3810 case Iop_F128toD64:
3811 case Iop_D32toF64:
3812 case Iop_D64toF64:
3813 case Iop_D128toF64:
3814 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
3815 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3816
3817 case Iop_F32toD128:
3818 case Iop_F64toD128:
3819 case Iop_F128toD128:
3820 case Iop_D32toF128:
3821 case Iop_D64toF128:
3822 case Iop_D128toF128:
3823 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
3824 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3825
3826 case Iop_RoundF32toInt:
3827 case Iop_SqrtF32:
3828 case Iop_RecpExpF32:
3829 /* I32(rm) x I32/F32 -> I32/F32 */
3830 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3831
3832 case Iop_SqrtF128:
3833 /* I32(rm) x F128 -> F128 */
3834 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3835
3836 case Iop_I32StoF32:
3837 case Iop_I32UtoF32:
3838 case Iop_F32toI32S:
3839 case Iop_F32toI32U:
3840 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3841 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3842
3843 case Iop_F64toF16:
3844 case Iop_F32toF16:
3845 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
3846 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3847
3848 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
3849 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
3850 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3851 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3852 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
3853 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3854
3855 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
3856 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
3857 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
3858 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3859 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3860 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
3861 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3862
3863 case Iop_F64HLtoF128:
3864 case Iop_D64HLtoD128:
3865 return assignNew('V', mce, Ity_I128,
3866 binop(Iop_64HLto128, vatom1, vatom2));
3867
3868 case Iop_F64toI32U:
3869 case Iop_F64toI32S:
3870 case Iop_F64toF32:
3871 case Iop_I64UtoF32:
3872 case Iop_D64toI32U:
3873 case Iop_D64toI32S:
3874 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
3875 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3876
3877 case Iop_D64toD32:
3878 /* First arg is I32 (rounding mode), second is D64 (data). */
3879 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3880
3881 case Iop_F64toI16S:
3882 /* First arg is I32 (rounding mode), second is F64 (data). */
3883 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3884
3885 case Iop_InsertExpD64:
3886 /* I64 x I64 -> D64 */
3887 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3888
3889 case Iop_InsertExpD128:
3890 /* I64 x I128 -> D128 */
3891 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3892
3893 case Iop_CmpF32:
3894 case Iop_CmpF64:
3895 case Iop_CmpF128:
3896 case Iop_CmpD64:
3897 case Iop_CmpD128:
3898 case Iop_CmpExpD64:
3899 case Iop_CmpExpD128:
3900 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3901
3902 /* non-FP after here */
3903
3904 case Iop_DivModU64to32:
3905 case Iop_DivModS64to32:
3906 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3907
3908 case Iop_DivModU128to64:
3909 case Iop_DivModS128to64:
3910 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3911
3912 case Iop_8HLto16:
3913 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
3914 case Iop_16HLto32:
3915 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3916 case Iop_32HLto64:
3917 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3918
3919 case Iop_DivModS64to64:
3920 case Iop_MullS64:
3921 case Iop_MullU64: {
3922 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3923 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3924 return assignNew('V', mce, Ity_I128,
3925 binop(Iop_64HLto128, vHi64, vLo64));
3926 }
3927
3928 case Iop_MullS32:
3929 case Iop_MullU32: {
3930 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3931 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3932 return assignNew('V', mce, Ity_I64,
3933 binop(Iop_32HLto64, vHi32, vLo32));
3934 }
3935
3936 case Iop_MullS16:
3937 case Iop_MullU16: {
3938 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3939 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3940 return assignNew('V', mce, Ity_I32,
3941 binop(Iop_16HLto32, vHi16, vLo16));
3942 }
3943
3944 case Iop_MullS8:
3945 case Iop_MullU8: {
3946 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3947 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3948 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3949 }
3950
3951 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
3952 case Iop_DivS32:
3953 case Iop_DivU32:
3954 case Iop_DivU32E:
3955 case Iop_DivS32E:
3956 case Iop_QAdd32S: /* could probably do better */
3957 case Iop_QSub32S: /* could probably do better */
3958 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3959
3960 case Iop_DivS64:
3961 case Iop_DivU64:
3962 case Iop_DivS64E:
3963 case Iop_DivU64E:
3964 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3965
3966 case Iop_Add32:
3967 if (mce->bogusLiterals || mce->useLLVMworkarounds)
3968 return expensiveAddSub(mce,True,Ity_I32,
3969 vatom1,vatom2, atom1,atom2);
3970 else
3971 goto cheap_AddSub32;
3972 case Iop_Sub32:
3973 if (mce->bogusLiterals)
3974 return expensiveAddSub(mce,False,Ity_I32,
3975 vatom1,vatom2, atom1,atom2);
3976 else
3977 goto cheap_AddSub32;
3978
3979 cheap_AddSub32:
3980 case Iop_Mul32:
3981 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3982
3983 case Iop_CmpORD32S:
3984 case Iop_CmpORD32U:
3985 case Iop_CmpORD64S:
3986 case Iop_CmpORD64U:
3987 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3988
3989 case Iop_Add64:
3990 if (mce->bogusLiterals || mce->useLLVMworkarounds)
3991 return expensiveAddSub(mce,True,Ity_I64,
3992 vatom1,vatom2, atom1,atom2);
3993 else
3994 goto cheap_AddSub64;
3995 case Iop_Sub64:
3996 if (mce->bogusLiterals)
3997 return expensiveAddSub(mce,False,Ity_I64,
3998 vatom1,vatom2, atom1,atom2);
3999 else
4000 goto cheap_AddSub64;
4001
4002 cheap_AddSub64:
4003 case Iop_Mul64:
4004 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4005
4006 case Iop_Mul16:
4007 case Iop_Add16:
4008 case Iop_Sub16:
4009 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4010
4011 case Iop_Mul8:
4012 case Iop_Sub8:
4013 case Iop_Add8:
4014 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4015
4016 case Iop_CmpEQ64:
4017 case Iop_CmpNE64:
4018 if (mce->bogusLiterals)
4019 goto expensive_cmp64;
4020 else
4021 goto cheap_cmp64;
4022
4023 expensive_cmp64:
4024 case Iop_ExpCmpNE64:
4025 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4026
4027 cheap_cmp64:
4028 case Iop_CmpLE64S: case Iop_CmpLE64U:
4029 case Iop_CmpLT64U: case Iop_CmpLT64S:
4030 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4031
4032 case Iop_CmpEQ32:
4033 case Iop_CmpNE32:
4034 if (mce->bogusLiterals)
4035 goto expensive_cmp32;
4036 else
4037 goto cheap_cmp32;
4038
4039 expensive_cmp32:
4040 case Iop_ExpCmpNE32:
4041 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4042
4043 cheap_cmp32:
4044 case Iop_CmpLE32S: case Iop_CmpLE32U:
4045 case Iop_CmpLT32U: case Iop_CmpLT32S:
4046 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4047
4048 case Iop_CmpEQ16: case Iop_CmpNE16:
4049 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4050
4051 case Iop_ExpCmpNE16:
4052 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4053
4054 case Iop_CmpEQ8: case Iop_CmpNE8:
4055 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4056
4057 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4058 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4059 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4060 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4061 /* Just say these all produce a defined result, regardless
4062 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4063 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4064
4065 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4066 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4067
4068 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4069 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4070
4071 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4072 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4073
4074 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4075 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4076
4077 case Iop_AndV256:
4078 uifu = mkUifUV256; difd = mkDifDV256;
4079 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4080 case Iop_AndV128:
4081 uifu = mkUifUV128; difd = mkDifDV128;
4082 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4083 case Iop_And64:
4084 uifu = mkUifU64; difd = mkDifD64;
4085 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4086 case Iop_And32:
4087 uifu = mkUifU32; difd = mkDifD32;
4088 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4089 case Iop_And16:
4090 uifu = mkUifU16; difd = mkDifD16;
4091 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4092 case Iop_And8:
4093 uifu = mkUifU8; difd = mkDifD8;
4094 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4095
4096 case Iop_OrV256:
4097 uifu = mkUifUV256; difd = mkDifDV256;
4098 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4099 case Iop_OrV128:
4100 uifu = mkUifUV128; difd = mkDifDV128;
4101 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4102 case Iop_Or64:
4103 uifu = mkUifU64; difd = mkDifD64;
4104 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4105 case Iop_Or32:
4106 uifu = mkUifU32; difd = mkDifD32;
4107 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4108 case Iop_Or16:
4109 uifu = mkUifU16; difd = mkDifD16;
4110 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4111 case Iop_Or8:
4112 uifu = mkUifU8; difd = mkDifD8;
4113 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4114
4115 do_And_Or:
4116 return
4117 assignNew(
4118 'V', mce,
4119 and_or_ty,
4120 difd(mce, uifu(mce, vatom1, vatom2),
4121 difd(mce, improve(mce, atom1, vatom1),
4122 improve(mce, atom2, vatom2) ) ) );
4123
4124 case Iop_Xor8:
4125 return mkUifU8(mce, vatom1, vatom2);
4126 case Iop_Xor16:
4127 return mkUifU16(mce, vatom1, vatom2);
4128 case Iop_Xor32:
4129 return mkUifU32(mce, vatom1, vatom2);
4130 case Iop_Xor64:
4131 return mkUifU64(mce, vatom1, vatom2);
4132 case Iop_XorV128:
4133 return mkUifUV128(mce, vatom1, vatom2);
4134 case Iop_XorV256:
4135 return mkUifUV256(mce, vatom1, vatom2);
4136
4137 /* V256-bit SIMD */
4138
4139 case Iop_ShrN16x16:
4140 case Iop_ShrN32x8:
4141 case Iop_ShrN64x4:
4142 case Iop_SarN16x16:
4143 case Iop_SarN32x8:
4144 case Iop_ShlN16x16:
4145 case Iop_ShlN32x8:
4146 case Iop_ShlN64x4:
4147 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4148 this is wrong now, scalar shifts are done properly lazily.
4149 Vector shifts should be fixed too. */
4150 complainIfUndefined(mce, atom2, NULL);
4151 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4152
4153 case Iop_QSub8Ux32:
4154 case Iop_QSub8Sx32:
4155 case Iop_Sub8x32:
4156 case Iop_Min8Ux32:
4157 case Iop_Min8Sx32:
4158 case Iop_Max8Ux32:
4159 case Iop_Max8Sx32:
4160 case Iop_CmpGT8Sx32:
4161 case Iop_CmpEQ8x32:
4162 case Iop_Avg8Ux32:
4163 case Iop_QAdd8Ux32:
4164 case Iop_QAdd8Sx32:
4165 case Iop_Add8x32:
4166 return binary8Ix32(mce, vatom1, vatom2);
4167
4168 case Iop_QSub16Ux16:
4169 case Iop_QSub16Sx16:
4170 case Iop_Sub16x16:
4171 case Iop_Mul16x16:
4172 case Iop_MulHi16Sx16:
4173 case Iop_MulHi16Ux16:
4174 case Iop_Min16Sx16:
4175 case Iop_Min16Ux16:
4176 case Iop_Max16Sx16:
4177 case Iop_Max16Ux16:
4178 case Iop_CmpGT16Sx16:
4179 case Iop_CmpEQ16x16:
4180 case Iop_Avg16Ux16:
4181 case Iop_QAdd16Ux16:
4182 case Iop_QAdd16Sx16:
4183 case Iop_Add16x16:
4184 return binary16Ix16(mce, vatom1, vatom2);
4185
4186 case Iop_Sub32x8:
4187 case Iop_CmpGT32Sx8:
4188 case Iop_CmpEQ32x8:
4189 case Iop_Add32x8:
4190 case Iop_Max32Ux8:
4191 case Iop_Max32Sx8:
4192 case Iop_Min32Ux8:
4193 case Iop_Min32Sx8:
4194 case Iop_Mul32x8:
4195 return binary32Ix8(mce, vatom1, vatom2);
4196
4197 case Iop_Sub64x4:
4198 case Iop_Add64x4:
4199 case Iop_CmpEQ64x4:
4200 case Iop_CmpGT64Sx4:
4201 return binary64Ix4(mce, vatom1, vatom2);
4202
4203 /* Perm32x8: rearrange values in left arg using steering values
4204 from right arg. So rearrange the vbits in the same way but
4205 pessimise wrt steering values. */
4206 case Iop_Perm32x8:
4207 return mkUifUV256(
4208 mce,
4209 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4210 mkPCast32x8(mce, vatom2)
4211 );
4212
4213 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4214 Handle the shifted results in the same way that other
4215 binary Q ops are handled, eg QSub: UifU the two args,
4216 then pessimise -- which is binaryNIxM. But for the upper
4217 V128, we require to generate just 1 bit which is the
4218 pessimised shift result, with 127 defined zeroes above it.
4219
4220 Note that this overly pessimistic in that in fact only the
4221 bottom 8 bits of each lane of the second arg determine the shift
4222 amount. Really we ought to ignore any undefinedness in the
4223 rest of the lanes of the second arg. */
4224 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4225 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4226 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4227 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4228 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4229 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4230 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4231 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4232 {
4233 // The function to generate the pessimised shift result
4234 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4235 switch (op) {
4236 case Iop_QandSQsh64x2:
4237 case Iop_QandUQsh64x2:
4238 case Iop_QandSQRsh64x2:
4239 case Iop_QandUQRsh64x2:
4240 binaryNIxM = binary64Ix2;
4241 break;
4242 case Iop_QandSQsh32x4:
4243 case Iop_QandUQsh32x4:
4244 case Iop_QandSQRsh32x4:
4245 case Iop_QandUQRsh32x4:
4246 binaryNIxM = binary32Ix4;
4247 break;
4248 case Iop_QandSQsh16x8:
4249 case Iop_QandUQsh16x8:
4250 case Iop_QandSQRsh16x8:
4251 case Iop_QandUQRsh16x8:
4252 binaryNIxM = binary16Ix8;
4253 break;
4254 case Iop_QandSQsh8x16:
4255 case Iop_QandUQsh8x16:
4256 case Iop_QandSQRsh8x16:
4257 case Iop_QandUQRsh8x16:
4258 binaryNIxM = binary8Ix16;
4259 break;
4260 default:
4261 tl_assert(0);
4262 }
4263 tl_assert(binaryNIxM);
4264 // Pessimised shift result, shV[127:0]
4265 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4266 // Generates: Def--(127)--Def PCast-to-I1(shV)
4267 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4268 // and assemble the result
4269 return assignNew('V', mce, Ity_V256,
4270 binop(Iop_V128HLtoV256, qV, shV));
4271 }
4272
4273 default:
4274 ppIROp(op);
4275 VG_(tool_panic)("memcheck:expr2vbits_Binop");
4276 }
4277 }
4278
4279
4280 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)4281 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4282 {
4283 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4284 selection of shadow operation implicitly duplicates the logic in
4285 do_shadow_LoadG and should be kept in sync (in the very unlikely
4286 event that the interpretation of such widening ops changes in
4287 future). See comment in do_shadow_LoadG. */
4288 IRAtom* vatom = expr2vbits( mce, atom );
4289 tl_assert(isOriginalAtom(mce,atom));
4290 switch (op) {
4291
4292 case Iop_Abs64Fx2:
4293 case Iop_Neg64Fx2:
4294 case Iop_RSqrtEst64Fx2:
4295 case Iop_RecipEst64Fx2:
4296 return unary64Fx2(mce, vatom);
4297
4298 case Iop_Sqrt64F0x2:
4299 return unary64F0x2(mce, vatom);
4300
4301 case Iop_Sqrt32Fx8:
4302 case Iop_RSqrtEst32Fx8:
4303 case Iop_RecipEst32Fx8:
4304 return unary32Fx8(mce, vatom);
4305
4306 case Iop_Sqrt64Fx4:
4307 return unary64Fx4(mce, vatom);
4308
4309 case Iop_RecipEst32Fx4:
4310 case Iop_I32UtoFx4:
4311 case Iop_I32StoFx4:
4312 case Iop_QFtoI32Ux4_RZ:
4313 case Iop_QFtoI32Sx4_RZ:
4314 case Iop_RoundF32x4_RM:
4315 case Iop_RoundF32x4_RP:
4316 case Iop_RoundF32x4_RN:
4317 case Iop_RoundF32x4_RZ:
4318 case Iop_RecipEst32Ux4:
4319 case Iop_Abs32Fx4:
4320 case Iop_Neg32Fx4:
4321 case Iop_RSqrtEst32Fx4:
4322 return unary32Fx4(mce, vatom);
4323
4324 case Iop_I32UtoFx2:
4325 case Iop_I32StoFx2:
4326 case Iop_RecipEst32Fx2:
4327 case Iop_RecipEst32Ux2:
4328 case Iop_Abs32Fx2:
4329 case Iop_Neg32Fx2:
4330 case Iop_RSqrtEst32Fx2:
4331 return unary32Fx2(mce, vatom);
4332
4333 case Iop_Sqrt32F0x4:
4334 case Iop_RSqrtEst32F0x4:
4335 case Iop_RecipEst32F0x4:
4336 return unary32F0x4(mce, vatom);
4337
4338 case Iop_32UtoV128:
4339 case Iop_64UtoV128:
4340 case Iop_Dup8x16:
4341 case Iop_Dup16x8:
4342 case Iop_Dup32x4:
4343 case Iop_Reverse1sIn8_x16:
4344 case Iop_Reverse8sIn16_x8:
4345 case Iop_Reverse8sIn32_x4:
4346 case Iop_Reverse16sIn32_x4:
4347 case Iop_Reverse8sIn64_x2:
4348 case Iop_Reverse16sIn64_x2:
4349 case Iop_Reverse32sIn64_x2:
4350 case Iop_V256toV128_1: case Iop_V256toV128_0:
4351 case Iop_ZeroHI64ofV128:
4352 case Iop_ZeroHI96ofV128:
4353 case Iop_ZeroHI112ofV128:
4354 case Iop_ZeroHI120ofV128:
4355 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4356
4357 case Iop_F128HItoF64: /* F128 -> high half of F128 */
4358 case Iop_D128HItoD64: /* D128 -> high half of D128 */
4359 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4360 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
4361 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
4362 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4363
4364 case Iop_NegF128:
4365 case Iop_AbsF128:
4366 return mkPCastTo(mce, Ity_I128, vatom);
4367
4368 case Iop_I32StoF128: /* signed I32 -> F128 */
4369 case Iop_I64StoF128: /* signed I64 -> F128 */
4370 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4371 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
4372 case Iop_F32toF128: /* F32 -> F128 */
4373 case Iop_F64toF128: /* F64 -> F128 */
4374 case Iop_I32StoD128: /* signed I64 -> D128 */
4375 case Iop_I64StoD128: /* signed I64 -> D128 */
4376 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4377 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
4378 return mkPCastTo(mce, Ity_I128, vatom);
4379
4380 case Iop_F16toF64:
4381 case Iop_F32toF64:
4382 case Iop_I32StoF64:
4383 case Iop_I32UtoF64:
4384 case Iop_NegF64:
4385 case Iop_AbsF64:
4386 case Iop_RSqrtEst5GoodF64:
4387 case Iop_RoundF64toF64_NEAREST:
4388 case Iop_RoundF64toF64_NegINF:
4389 case Iop_RoundF64toF64_PosINF:
4390 case Iop_RoundF64toF64_ZERO:
4391 case Iop_Clz64:
4392 case Iop_D32toD64:
4393 case Iop_I32StoD64:
4394 case Iop_I32UtoD64:
4395 case Iop_ExtractExpD64: /* D64 -> I64 */
4396 case Iop_ExtractExpD128: /* D128 -> I64 */
4397 case Iop_ExtractSigD64: /* D64 -> I64 */
4398 case Iop_ExtractSigD128: /* D128 -> I64 */
4399 case Iop_DPBtoBCD:
4400 case Iop_BCDtoDPB:
4401 return mkPCastTo(mce, Ity_I64, vatom);
4402
4403 case Iop_D64toD128:
4404 return mkPCastTo(mce, Ity_I128, vatom);
4405
4406 case Iop_Clz32:
4407 case Iop_TruncF64asF32:
4408 case Iop_NegF32:
4409 case Iop_AbsF32:
4410 case Iop_F16toF32:
4411 return mkPCastTo(mce, Ity_I32, vatom);
4412
4413 case Iop_Ctz32:
4414 case Iop_Ctz64:
4415 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4416
4417 case Iop_1Uto64:
4418 case Iop_1Sto64:
4419 case Iop_8Uto64:
4420 case Iop_8Sto64:
4421 case Iop_16Uto64:
4422 case Iop_16Sto64:
4423 case Iop_32Sto64:
4424 case Iop_32Uto64:
4425 case Iop_V128to64:
4426 case Iop_V128HIto64:
4427 case Iop_128HIto64:
4428 case Iop_128to64:
4429 case Iop_Dup8x8:
4430 case Iop_Dup16x4:
4431 case Iop_Dup32x2:
4432 case Iop_Reverse8sIn16_x4:
4433 case Iop_Reverse8sIn32_x2:
4434 case Iop_Reverse16sIn32_x2:
4435 case Iop_Reverse8sIn64_x1:
4436 case Iop_Reverse16sIn64_x1:
4437 case Iop_Reverse32sIn64_x1:
4438 case Iop_V256to64_0: case Iop_V256to64_1:
4439 case Iop_V256to64_2: case Iop_V256to64_3:
4440 return assignNew('V', mce, Ity_I64, unop(op, vatom));
4441
4442 case Iop_64to32:
4443 case Iop_64HIto32:
4444 case Iop_1Uto32:
4445 case Iop_1Sto32:
4446 case Iop_8Uto32:
4447 case Iop_16Uto32:
4448 case Iop_16Sto32:
4449 case Iop_8Sto32:
4450 case Iop_V128to32:
4451 return assignNew('V', mce, Ity_I32, unop(op, vatom));
4452
4453 case Iop_8Sto16:
4454 case Iop_8Uto16:
4455 case Iop_32to16:
4456 case Iop_32HIto16:
4457 case Iop_64to16:
4458 case Iop_GetMSBs8x16:
4459 return assignNew('V', mce, Ity_I16, unop(op, vatom));
4460
4461 case Iop_1Uto8:
4462 case Iop_1Sto8:
4463 case Iop_16to8:
4464 case Iop_16HIto8:
4465 case Iop_32to8:
4466 case Iop_64to8:
4467 case Iop_GetMSBs8x8:
4468 return assignNew('V', mce, Ity_I8, unop(op, vatom));
4469
4470 case Iop_32to1:
4471 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
4472
4473 case Iop_64to1:
4474 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
4475
4476 case Iop_ReinterpF64asI64:
4477 case Iop_ReinterpI64asF64:
4478 case Iop_ReinterpI32asF32:
4479 case Iop_ReinterpF32asI32:
4480 case Iop_ReinterpI64asD64:
4481 case Iop_ReinterpD64asI64:
4482 case Iop_NotV256:
4483 case Iop_NotV128:
4484 case Iop_Not64:
4485 case Iop_Not32:
4486 case Iop_Not16:
4487 case Iop_Not8:
4488 case Iop_Not1:
4489 return vatom;
4490
4491 case Iop_CmpNEZ8x8:
4492 case Iop_Cnt8x8:
4493 case Iop_Clz8x8:
4494 case Iop_Cls8x8:
4495 case Iop_Abs8x8:
4496 return mkPCast8x8(mce, vatom);
4497
4498 case Iop_CmpNEZ8x16:
4499 case Iop_Cnt8x16:
4500 case Iop_Clz8x16:
4501 case Iop_Cls8x16:
4502 case Iop_Abs8x16:
4503 return mkPCast8x16(mce, vatom);
4504
4505 case Iop_CmpNEZ16x4:
4506 case Iop_Clz16x4:
4507 case Iop_Cls16x4:
4508 case Iop_Abs16x4:
4509 return mkPCast16x4(mce, vatom);
4510
4511 case Iop_CmpNEZ16x8:
4512 case Iop_Clz16x8:
4513 case Iop_Cls16x8:
4514 case Iop_Abs16x8:
4515 return mkPCast16x8(mce, vatom);
4516
4517 case Iop_CmpNEZ32x2:
4518 case Iop_Clz32x2:
4519 case Iop_Cls32x2:
4520 case Iop_FtoI32Ux2_RZ:
4521 case Iop_FtoI32Sx2_RZ:
4522 case Iop_Abs32x2:
4523 return mkPCast32x2(mce, vatom);
4524
4525 case Iop_CmpNEZ32x4:
4526 case Iop_Clz32x4:
4527 case Iop_Cls32x4:
4528 case Iop_FtoI32Ux4_RZ:
4529 case Iop_FtoI32Sx4_RZ:
4530 case Iop_Abs32x4:
4531 case Iop_RSqrtEst32Ux4:
4532 return mkPCast32x4(mce, vatom);
4533
4534 case Iop_CmpwNEZ32:
4535 return mkPCastTo(mce, Ity_I32, vatom);
4536
4537 case Iop_CmpwNEZ64:
4538 return mkPCastTo(mce, Ity_I64, vatom);
4539
4540 case Iop_CmpNEZ64x2:
4541 case Iop_CipherSV128:
4542 case Iop_Clz64x2:
4543 case Iop_Abs64x2:
4544 return mkPCast64x2(mce, vatom);
4545
4546 case Iop_PwBitMtxXpose64x2:
4547 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4548
4549 case Iop_NarrowUn16to8x8:
4550 case Iop_NarrowUn32to16x4:
4551 case Iop_NarrowUn64to32x2:
4552 case Iop_QNarrowUn16Sto8Sx8:
4553 case Iop_QNarrowUn16Sto8Ux8:
4554 case Iop_QNarrowUn16Uto8Ux8:
4555 case Iop_QNarrowUn32Sto16Sx4:
4556 case Iop_QNarrowUn32Sto16Ux4:
4557 case Iop_QNarrowUn32Uto16Ux4:
4558 case Iop_QNarrowUn64Sto32Sx2:
4559 case Iop_QNarrowUn64Sto32Ux2:
4560 case Iop_QNarrowUn64Uto32Ux2:
4561 return vectorNarrowUnV128(mce, op, vatom);
4562
4563 case Iop_Widen8Sto16x8:
4564 case Iop_Widen8Uto16x8:
4565 case Iop_Widen16Sto32x4:
4566 case Iop_Widen16Uto32x4:
4567 case Iop_Widen32Sto64x2:
4568 case Iop_Widen32Uto64x2:
4569 return vectorWidenI64(mce, op, vatom);
4570
4571 case Iop_PwAddL32Ux2:
4572 case Iop_PwAddL32Sx2:
4573 return mkPCastTo(mce, Ity_I64,
4574 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4575
4576 case Iop_PwAddL16Ux4:
4577 case Iop_PwAddL16Sx4:
4578 return mkPCast32x2(mce,
4579 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4580
4581 case Iop_PwAddL8Ux8:
4582 case Iop_PwAddL8Sx8:
4583 return mkPCast16x4(mce,
4584 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4585
4586 case Iop_PwAddL32Ux4:
4587 case Iop_PwAddL32Sx4:
4588 return mkPCast64x2(mce,
4589 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4590
4591 case Iop_PwAddL16Ux8:
4592 case Iop_PwAddL16Sx8:
4593 return mkPCast32x4(mce,
4594 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4595
4596 case Iop_PwAddL8Ux16:
4597 case Iop_PwAddL8Sx16:
4598 return mkPCast16x8(mce,
4599 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4600
4601 case Iop_I64UtoF32:
4602 default:
4603 ppIROp(op);
4604 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4605 }
4606 }
4607
4608
4609 /* Worker function -- do not call directly. See comments on
4610 expr2vbits_Load for the meaning of |guard|.
4611
4612 Generates IR to (1) perform a definedness test of |addr|, (2)
4613 perform a validity test of |addr|, and (3) return the Vbits for the
4614 location indicated by |addr|. All of this only happens when
4615 |guard| is NULL or |guard| evaluates to True at run time.
4616
4617 If |guard| evaluates to False at run time, the returned value is
4618 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4619 performed.
4620
4621 The definedness of |guard| itself is not checked. That is assumed
4622 to have been done before this point, by the caller. */
4623 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4624 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4625 IREndness end, IRType ty,
4626 IRAtom* addr, UInt bias, IRAtom* guard )
4627 {
4628 tl_assert(isOriginalAtom(mce,addr));
4629 tl_assert(end == Iend_LE || end == Iend_BE);
4630
4631 /* First, emit a definedness test for the address. This also sets
4632 the address (shadow) to 'defined' following the test. */
4633 complainIfUndefined( mce, addr, guard );
4634
4635 /* Now cook up a call to the relevant helper function, to read the
4636 data V bits from shadow memory. */
4637 ty = shadowTypeV(ty);
4638
4639 void* helper = NULL;
4640 const HChar* hname = NULL;
4641 Bool ret_via_outparam = False;
4642
4643 if (end == Iend_LE) {
4644 switch (ty) {
4645 case Ity_V256: helper = &MC_(helperc_LOADV256le);
4646 hname = "MC_(helperc_LOADV256le)";
4647 ret_via_outparam = True;
4648 break;
4649 case Ity_V128: helper = &MC_(helperc_LOADV128le);
4650 hname = "MC_(helperc_LOADV128le)";
4651 ret_via_outparam = True;
4652 break;
4653 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4654 hname = "MC_(helperc_LOADV64le)";
4655 break;
4656 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4657 hname = "MC_(helperc_LOADV32le)";
4658 break;
4659 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4660 hname = "MC_(helperc_LOADV16le)";
4661 break;
4662 case Ity_I8: helper = &MC_(helperc_LOADV8);
4663 hname = "MC_(helperc_LOADV8)";
4664 break;
4665 default: ppIRType(ty);
4666 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
4667 }
4668 } else {
4669 switch (ty) {
4670 case Ity_V256: helper = &MC_(helperc_LOADV256be);
4671 hname = "MC_(helperc_LOADV256be)";
4672 ret_via_outparam = True;
4673 break;
4674 case Ity_V128: helper = &MC_(helperc_LOADV128be);
4675 hname = "MC_(helperc_LOADV128be)";
4676 ret_via_outparam = True;
4677 break;
4678 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4679 hname = "MC_(helperc_LOADV64be)";
4680 break;
4681 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4682 hname = "MC_(helperc_LOADV32be)";
4683 break;
4684 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4685 hname = "MC_(helperc_LOADV16be)";
4686 break;
4687 case Ity_I8: helper = &MC_(helperc_LOADV8);
4688 hname = "MC_(helperc_LOADV8)";
4689 break;
4690 default: ppIRType(ty);
4691 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
4692 }
4693 }
4694
4695 tl_assert(helper);
4696 tl_assert(hname);
4697
4698 /* Generate the actual address into addrAct. */
4699 IRAtom* addrAct;
4700 if (bias == 0) {
4701 addrAct = addr;
4702 } else {
4703 IROp mkAdd;
4704 IRAtom* eBias;
4705 IRType tyAddr = mce->hWordTy;
4706 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4707 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4708 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4709 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
4710 }
4711
4712 /* We need to have a place to park the V bits we're just about to
4713 read. */
4714 IRTemp datavbits = newTemp(mce, ty, VSh);
4715
4716 /* Here's the call. */
4717 IRDirty* di;
4718 if (ret_via_outparam) {
4719 di = unsafeIRDirty_1_N( datavbits,
4720 2/*regparms*/,
4721 hname, VG_(fnptr_to_fnentry)( helper ),
4722 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
4723 } else {
4724 di = unsafeIRDirty_1_N( datavbits,
4725 1/*regparms*/,
4726 hname, VG_(fnptr_to_fnentry)( helper ),
4727 mkIRExprVec_1( addrAct ) );
4728 }
4729
4730 setHelperAnns( mce, di );
4731 if (guard) {
4732 di->guard = guard;
4733 /* Ideally the didn't-happen return value here would be all-ones
4734 (all-undefined), so it'd be obvious if it got used
4735 inadvertently. We can get by with the IR-mandated default
4736 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4737 undefined if it ever leaks out. */
4738 }
4739 stmt( 'V', mce, IRStmt_Dirty(di) );
4740
4741 return mkexpr(datavbits);
4742 }
4743
4744
4745 /* Generate IR to do a shadow load. The helper is expected to check
4746 the validity of the address and return the V bits for that address.
4747 This can optionally be controlled by a guard, which is assumed to
4748 be True if NULL. In the case where the guard is False at runtime,
4749 the helper will return the didn't-do-the-call value of 0x55..55.
4750 Since that means "completely undefined result", the caller of
4751 this function will need to fix up the result somehow in that
4752 case.
4753
4754 Caller of this function is also expected to have checked the
4755 definedness of |guard| before this point.
4756 */
4757 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4758 IRAtom* expr2vbits_Load ( MCEnv* mce,
4759 IREndness end, IRType ty,
4760 IRAtom* addr, UInt bias,
4761 IRAtom* guard )
4762 {
4763 tl_assert(end == Iend_LE || end == Iend_BE);
4764 switch (shadowTypeV(ty)) {
4765 case Ity_I8:
4766 case Ity_I16:
4767 case Ity_I32:
4768 case Ity_I64:
4769 case Ity_V128:
4770 case Ity_V256:
4771 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
4772 default:
4773 VG_(tool_panic)("expr2vbits_Load");
4774 }
4775 }
4776
4777
4778 /* The most general handler for guarded loads. Assumes the
4779 definedness of GUARD has already been checked by the caller. A
4780 GUARD of NULL is assumed to mean "always True". Generates code to
4781 check the definedness and validity of ADDR.
4782
4783 Generate IR to do a shadow load from ADDR and return the V bits.
4784 The loaded type is TY. The loaded data is then (shadow) widened by
4785 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4786 evaluates to False at run time then the returned Vbits are simply
4787 VALT instead. Note therefore that the argument type of VWIDEN must
4788 be TY and the result type of VWIDEN must equal the type of VALT.
4789 */
4790 static
expr2vbits_Load_guarded_General(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IROp vwiden,IRAtom * valt)4791 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4792 IREndness end, IRType ty,
4793 IRAtom* addr, UInt bias,
4794 IRAtom* guard,
4795 IROp vwiden, IRAtom* valt )
4796 {
4797 /* Sanity check the conversion operation, and also set TYWIDE. */
4798 IRType tyWide = Ity_INVALID;
4799 switch (vwiden) {
4800 case Iop_INVALID:
4801 tyWide = ty;
4802 break;
4803 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4804 tyWide = Ity_I32;
4805 break;
4806 default:
4807 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
4808 }
4809
4810 /* If the guard evaluates to True, this will hold the loaded V bits
4811 at TY. If the guard evaluates to False, this will be all
4812 ones, meaning "all undefined", in which case we will have to
4813 replace it using an ITE below. */
4814 IRAtom* iftrue1
4815 = assignNew('V', mce, ty,
4816 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4817 /* Now (shadow-) widen the loaded V bits to the desired width. In
4818 the guard-is-False case, the allowable widening operators will
4819 in the worst case (unsigned widening) at least leave the
4820 pre-widened part as being marked all-undefined, and in the best
4821 case (signed widening) mark the whole widened result as
4822 undefined. Anyway, it doesn't matter really, since in this case
4823 we will replace said value with the default value |valt| using an
4824 ITE. */
4825 IRAtom* iftrue2
4826 = vwiden == Iop_INVALID
4827 ? iftrue1
4828 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4829 /* These are the V bits we will return if the load doesn't take
4830 place. */
4831 IRAtom* iffalse
4832 = valt;
4833 /* Prepare the cond for the ITE. Convert a NULL cond into
4834 something that iropt knows how to fold out later. */
4835 IRAtom* cond
4836 = guard == NULL ? mkU1(1) : guard;
4837 /* And assemble the final result. */
4838 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
4839 }
4840
4841
4842 /* A simpler handler for guarded loads, in which there is no
4843 conversion operation, and the default V bit return (when the guard
4844 evaluates to False at runtime) is "all defined". If there is no
4845 guard expression or the guard is always TRUE this function behaves
4846 like expr2vbits_Load. It is assumed that definedness of GUARD has
4847 already been checked at the call site. */
4848 static
expr2vbits_Load_guarded_Simple(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4849 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4850 IREndness end, IRType ty,
4851 IRAtom* addr, UInt bias,
4852 IRAtom *guard )
4853 {
4854 return expr2vbits_Load_guarded_General(
4855 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4856 );
4857 }
4858
4859
4860 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)4861 IRAtom* expr2vbits_ITE ( MCEnv* mce,
4862 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
4863 {
4864 IRAtom *vbitsC, *vbits0, *vbits1;
4865 IRType ty;
4866 /* Given ITE(cond, iftrue, iffalse), generate
4867 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
4868 That is, steer the V bits like the originals, but trash the
4869 result if the steering value is undefined. This gives
4870 lazy propagation. */
4871 tl_assert(isOriginalAtom(mce, cond));
4872 tl_assert(isOriginalAtom(mce, iftrue));
4873 tl_assert(isOriginalAtom(mce, iffalse));
4874
4875 vbitsC = expr2vbits(mce, cond);
4876 vbits1 = expr2vbits(mce, iftrue);
4877 vbits0 = expr2vbits(mce, iffalse);
4878 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4879
4880 return
4881 mkUifU(mce, ty, assignNew('V', mce, ty,
4882 IRExpr_ITE(cond, vbits1, vbits0)),
4883 mkPCastTo(mce, ty, vbitsC) );
4884 }
4885
4886 /* --------- This is the main expression-handling function. --------- */
4887
4888 static
expr2vbits(MCEnv * mce,IRExpr * e)4889 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4890 {
4891 switch (e->tag) {
4892
4893 case Iex_Get:
4894 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4895
4896 case Iex_GetI:
4897 return shadow_GETI( mce, e->Iex.GetI.descr,
4898 e->Iex.GetI.ix, e->Iex.GetI.bias );
4899
4900 case Iex_RdTmp:
4901 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4902
4903 case Iex_Const:
4904 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4905
4906 case Iex_Qop:
4907 return expr2vbits_Qop(
4908 mce,
4909 e->Iex.Qop.details->op,
4910 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4911 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4912 );
4913
4914 case Iex_Triop:
4915 return expr2vbits_Triop(
4916 mce,
4917 e->Iex.Triop.details->op,
4918 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4919 e->Iex.Triop.details->arg3
4920 );
4921
4922 case Iex_Binop:
4923 return expr2vbits_Binop(
4924 mce,
4925 e->Iex.Binop.op,
4926 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4927 );
4928
4929 case Iex_Unop:
4930 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4931
4932 case Iex_Load:
4933 return expr2vbits_Load( mce, e->Iex.Load.end,
4934 e->Iex.Load.ty,
4935 e->Iex.Load.addr, 0/*addr bias*/,
4936 NULL/* guard == "always True"*/ );
4937
4938 case Iex_CCall:
4939 return mkLazyN( mce, e->Iex.CCall.args,
4940 e->Iex.CCall.retty,
4941 e->Iex.CCall.cee );
4942
4943 case Iex_ITE:
4944 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
4945 e->Iex.ITE.iffalse);
4946
4947 default:
4948 VG_(printf)("\n");
4949 ppIRExpr(e);
4950 VG_(printf)("\n");
4951 VG_(tool_panic)("memcheck: expr2vbits");
4952 }
4953 }
4954
4955 /*------------------------------------------------------------*/
4956 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
4957 /*------------------------------------------------------------*/
4958
4959 /* Widen a value to the host word size. */
4960
4961 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)4962 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4963 {
4964 IRType ty, tyH;
4965
4966 /* vatom is vbits-value and as such can only have a shadow type. */
4967 tl_assert(isShadowAtom(mce,vatom));
4968
4969 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
4970 tyH = mce->hWordTy;
4971
4972 if (tyH == Ity_I32) {
4973 switch (ty) {
4974 case Ity_I32:
4975 return vatom;
4976 case Ity_I16:
4977 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4978 case Ity_I8:
4979 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4980 default:
4981 goto unhandled;
4982 }
4983 } else
4984 if (tyH == Ity_I64) {
4985 switch (ty) {
4986 case Ity_I32:
4987 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4988 case Ity_I16:
4989 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4990 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4991 case Ity_I8:
4992 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4993 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4994 default:
4995 goto unhandled;
4996 }
4997 } else {
4998 goto unhandled;
4999 }
5000 unhandled:
5001 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5002 VG_(tool_panic)("zwidenToHostWord");
5003 }
5004
5005
5006 /* Generate a shadow store. |addr| is always the original address
5007 atom. You can pass in either originals or V-bits for the data
5008 atom, but obviously not both. This function generates a check for
5009 the definedness and (indirectly) the validity of |addr|, but only
5010 when |guard| evaluates to True at run time (or is NULL).
5011
5012 |guard| :: Ity_I1 controls whether the store really happens; NULL
5013 means it unconditionally does. Note that |guard| itself is not
5014 checked for definedness; the caller of this function must do that
5015 if necessary.
5016 */
5017 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)5018 void do_shadow_Store ( MCEnv* mce,
5019 IREndness end,
5020 IRAtom* addr, UInt bias,
5021 IRAtom* data, IRAtom* vdata,
5022 IRAtom* guard )
5023 {
5024 IROp mkAdd;
5025 IRType ty, tyAddr;
5026 void* helper = NULL;
5027 const HChar* hname = NULL;
5028 IRConst* c;
5029
5030 tyAddr = mce->hWordTy;
5031 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5032 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5033 tl_assert( end == Iend_LE || end == Iend_BE );
5034
5035 if (data) {
5036 tl_assert(!vdata);
5037 tl_assert(isOriginalAtom(mce, data));
5038 tl_assert(bias == 0);
5039 vdata = expr2vbits( mce, data );
5040 } else {
5041 tl_assert(vdata);
5042 }
5043
5044 tl_assert(isOriginalAtom(mce,addr));
5045 tl_assert(isShadowAtom(mce,vdata));
5046
5047 if (guard) {
5048 tl_assert(isOriginalAtom(mce, guard));
5049 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5050 }
5051
5052 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5053
5054 // If we're not doing undefined value checking, pretend that this value
5055 // is "all valid". That lets Vex's optimiser remove some of the V bit
5056 // shadow computation ops that precede it.
5057 if (MC_(clo_mc_level) == 1) {
5058 switch (ty) {
5059 case Ity_V256: // V256 weirdness -- used four times
5060 c = IRConst_V256(V_BITS32_DEFINED); break;
5061 case Ity_V128: // V128 weirdness -- used twice
5062 c = IRConst_V128(V_BITS16_DEFINED); break;
5063 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5064 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5065 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5066 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5067 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5068 }
5069 vdata = IRExpr_Const( c );
5070 }
5071
5072 /* First, emit a definedness test for the address. This also sets
5073 the address (shadow) to 'defined' following the test. Both of
5074 those actions are gated on |guard|. */
5075 complainIfUndefined( mce, addr, guard );
5076
5077 /* Now decide which helper function to call to write the data V
5078 bits into shadow memory. */
5079 if (end == Iend_LE) {
5080 switch (ty) {
5081 case Ity_V256: /* we'll use the helper four times */
5082 case Ity_V128: /* we'll use the helper twice */
5083 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5084 hname = "MC_(helperc_STOREV64le)";
5085 break;
5086 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5087 hname = "MC_(helperc_STOREV32le)";
5088 break;
5089 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5090 hname = "MC_(helperc_STOREV16le)";
5091 break;
5092 case Ity_I8: helper = &MC_(helperc_STOREV8);
5093 hname = "MC_(helperc_STOREV8)";
5094 break;
5095 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5096 }
5097 } else {
5098 switch (ty) {
5099 case Ity_V128: /* we'll use the helper twice */
5100 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5101 hname = "MC_(helperc_STOREV64be)";
5102 break;
5103 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5104 hname = "MC_(helperc_STOREV32be)";
5105 break;
5106 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5107 hname = "MC_(helperc_STOREV16be)";
5108 break;
5109 case Ity_I8: helper = &MC_(helperc_STOREV8);
5110 hname = "MC_(helperc_STOREV8)";
5111 break;
5112 /* Note, no V256 case here, because no big-endian target that
5113 we support, has 256 vectors. */
5114 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5115 }
5116 }
5117
5118 if (UNLIKELY(ty == Ity_V256)) {
5119
5120 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5121 Q3 being the most significant lane. */
5122 /* These are the offsets of the Qs in memory. */
5123 Int offQ0, offQ1, offQ2, offQ3;
5124
5125 /* Various bits for constructing the 4 lane helper calls */
5126 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5127 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5128 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5129 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5130
5131 if (end == Iend_LE) {
5132 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5133 } else {
5134 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5135 }
5136
5137 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5138 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5139 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5140 diQ0 = unsafeIRDirty_0_N(
5141 1/*regparms*/,
5142 hname, VG_(fnptr_to_fnentry)( helper ),
5143 mkIRExprVec_2( addrQ0, vdataQ0 )
5144 );
5145
5146 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5147 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5148 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5149 diQ1 = unsafeIRDirty_0_N(
5150 1/*regparms*/,
5151 hname, VG_(fnptr_to_fnentry)( helper ),
5152 mkIRExprVec_2( addrQ1, vdataQ1 )
5153 );
5154
5155 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5156 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5157 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5158 diQ2 = unsafeIRDirty_0_N(
5159 1/*regparms*/,
5160 hname, VG_(fnptr_to_fnentry)( helper ),
5161 mkIRExprVec_2( addrQ2, vdataQ2 )
5162 );
5163
5164 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5165 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5166 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5167 diQ3 = unsafeIRDirty_0_N(
5168 1/*regparms*/,
5169 hname, VG_(fnptr_to_fnentry)( helper ),
5170 mkIRExprVec_2( addrQ3, vdataQ3 )
5171 );
5172
5173 if (guard)
5174 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5175
5176 setHelperAnns( mce, diQ0 );
5177 setHelperAnns( mce, diQ1 );
5178 setHelperAnns( mce, diQ2 );
5179 setHelperAnns( mce, diQ3 );
5180 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5181 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5182 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5183 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5184
5185 }
5186 else if (UNLIKELY(ty == Ity_V128)) {
5187
5188 /* V128-bit case */
5189 /* See comment in next clause re 64-bit regparms */
5190 /* also, need to be careful about endianness */
5191
5192 Int offLo64, offHi64;
5193 IRDirty *diLo64, *diHi64;
5194 IRAtom *addrLo64, *addrHi64;
5195 IRAtom *vdataLo64, *vdataHi64;
5196 IRAtom *eBiasLo64, *eBiasHi64;
5197
5198 if (end == Iend_LE) {
5199 offLo64 = 0;
5200 offHi64 = 8;
5201 } else {
5202 offLo64 = 8;
5203 offHi64 = 0;
5204 }
5205
5206 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5207 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5208 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5209 diLo64 = unsafeIRDirty_0_N(
5210 1/*regparms*/,
5211 hname, VG_(fnptr_to_fnentry)( helper ),
5212 mkIRExprVec_2( addrLo64, vdataLo64 )
5213 );
5214 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5215 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5216 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5217 diHi64 = unsafeIRDirty_0_N(
5218 1/*regparms*/,
5219 hname, VG_(fnptr_to_fnentry)( helper ),
5220 mkIRExprVec_2( addrHi64, vdataHi64 )
5221 );
5222 if (guard) diLo64->guard = guard;
5223 if (guard) diHi64->guard = guard;
5224 setHelperAnns( mce, diLo64 );
5225 setHelperAnns( mce, diHi64 );
5226 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5227 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5228
5229 } else {
5230
5231 IRDirty *di;
5232 IRAtom *addrAct;
5233
5234 /* 8/16/32/64-bit cases */
5235 /* Generate the actual address into addrAct. */
5236 if (bias == 0) {
5237 addrAct = addr;
5238 } else {
5239 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5240 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5241 }
5242
5243 if (ty == Ity_I64) {
5244 /* We can't do this with regparm 2 on 32-bit platforms, since
5245 the back ends aren't clever enough to handle 64-bit
5246 regparm args. Therefore be different. */
5247 di = unsafeIRDirty_0_N(
5248 1/*regparms*/,
5249 hname, VG_(fnptr_to_fnentry)( helper ),
5250 mkIRExprVec_2( addrAct, vdata )
5251 );
5252 } else {
5253 di = unsafeIRDirty_0_N(
5254 2/*regparms*/,
5255 hname, VG_(fnptr_to_fnentry)( helper ),
5256 mkIRExprVec_2( addrAct,
5257 zwidenToHostWord( mce, vdata ))
5258 );
5259 }
5260 if (guard) di->guard = guard;
5261 setHelperAnns( mce, di );
5262 stmt( 'V', mce, IRStmt_Dirty(di) );
5263 }
5264
5265 }
5266
5267
5268 /* Do lazy pessimistic propagation through a dirty helper call, by
5269 looking at the annotations on it. This is the most complex part of
5270 Memcheck. */
5271
szToITy(Int n)5272 static IRType szToITy ( Int n )
5273 {
5274 switch (n) {
5275 case 1: return Ity_I8;
5276 case 2: return Ity_I16;
5277 case 4: return Ity_I32;
5278 case 8: return Ity_I64;
5279 default: VG_(tool_panic)("szToITy(memcheck)");
5280 }
5281 }
5282
5283 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)5284 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5285 {
5286 Int i, k, n, toDo, gSz, gOff;
5287 IRAtom *src, *here, *curr;
5288 IRType tySrc, tyDst;
5289 IRTemp dst;
5290 IREndness end;
5291
5292 /* What's the native endianness? We need to know this. */
5293 # if defined(VG_BIGENDIAN)
5294 end = Iend_BE;
5295 # elif defined(VG_LITTLEENDIAN)
5296 end = Iend_LE;
5297 # else
5298 # error "Unknown endianness"
5299 # endif
5300
5301 /* First check the guard. */
5302 complainIfUndefined(mce, d->guard, NULL);
5303
5304 /* Now round up all inputs and PCast over them. */
5305 curr = definedOfType(Ity_I32);
5306
5307 /* Inputs: unmasked args
5308 Note: arguments are evaluated REGARDLESS of the guard expression */
5309 for (i = 0; d->args[i]; i++) {
5310 IRAtom* arg = d->args[i];
5311 if ( (d->cee->mcx_mask & (1<<i))
5312 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
5313 /* ignore this arg */
5314 } else {
5315 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
5316 curr = mkUifU32(mce, here, curr);
5317 }
5318 }
5319
5320 /* Inputs: guest state that we read. */
5321 for (i = 0; i < d->nFxState; i++) {
5322 tl_assert(d->fxState[i].fx != Ifx_None);
5323 if (d->fxState[i].fx == Ifx_Write)
5324 continue;
5325
5326 /* Enumerate the described state segments */
5327 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5328 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5329 gSz = d->fxState[i].size;
5330
5331 /* Ignore any sections marked as 'always defined'. */
5332 if (isAlwaysDefd(mce, gOff, gSz)) {
5333 if (0)
5334 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5335 gOff, gSz);
5336 continue;
5337 }
5338
5339 /* This state element is read or modified. So we need to
5340 consider it. If larger than 8 bytes, deal with it in
5341 8-byte chunks. */
5342 while (True) {
5343 tl_assert(gSz >= 0);
5344 if (gSz == 0) break;
5345 n = gSz <= 8 ? gSz : 8;
5346 /* update 'curr' with UifU of the state slice
5347 gOff .. gOff+n-1 */
5348 tySrc = szToITy( n );
5349
5350 /* Observe the guard expression. If it is false use an
5351 all-bits-defined bit pattern */
5352 IRAtom *cond, *iffalse, *iftrue;
5353
5354 cond = assignNew('V', mce, Ity_I1, d->guard);
5355 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5356 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5357 src = assignNew('V', mce, tySrc,
5358 IRExpr_ITE(cond, iftrue, iffalse));
5359
5360 here = mkPCastTo( mce, Ity_I32, src );
5361 curr = mkUifU32(mce, here, curr);
5362 gSz -= n;
5363 gOff += n;
5364 }
5365 }
5366 }
5367
5368 /* Inputs: memory. First set up some info needed regardless of
5369 whether we're doing reads or writes. */
5370
5371 if (d->mFx != Ifx_None) {
5372 /* Because we may do multiple shadow loads/stores from the same
5373 base address, it's best to do a single test of its
5374 definedness right now. Post-instrumentation optimisation
5375 should remove all but this test. */
5376 IRType tyAddr;
5377 tl_assert(d->mAddr);
5378 complainIfUndefined(mce, d->mAddr, d->guard);
5379
5380 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
5381 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5382 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5383 }
5384
5385 /* Deal with memory inputs (reads or modifies) */
5386 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5387 toDo = d->mSize;
5388 /* chew off 32-bit chunks. We don't care about the endianness
5389 since it's all going to be condensed down to a single bit,
5390 but nevertheless choose an endianness which is hopefully
5391 native to the platform. */
5392 while (toDo >= 4) {
5393 here = mkPCastTo(
5394 mce, Ity_I32,
5395 expr2vbits_Load_guarded_Simple(
5396 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
5397 );
5398 curr = mkUifU32(mce, here, curr);
5399 toDo -= 4;
5400 }
5401 /* chew off 16-bit chunks */
5402 while (toDo >= 2) {
5403 here = mkPCastTo(
5404 mce, Ity_I32,
5405 expr2vbits_Load_guarded_Simple(
5406 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
5407 );
5408 curr = mkUifU32(mce, here, curr);
5409 toDo -= 2;
5410 }
5411 /* chew off the remaining 8-bit chunk, if any */
5412 if (toDo == 1) {
5413 here = mkPCastTo(
5414 mce, Ity_I32,
5415 expr2vbits_Load_guarded_Simple(
5416 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
5417 );
5418 curr = mkUifU32(mce, here, curr);
5419 toDo -= 1;
5420 }
5421 tl_assert(toDo == 0);
5422 }
5423
5424 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5425 all the inputs to the helper. Now we need to re-distribute the
5426 results to all destinations. */
5427
5428 /* Outputs: the destination temporary, if there is one. */
5429 if (d->tmp != IRTemp_INVALID) {
5430 dst = findShadowTmpV(mce, d->tmp);
5431 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
5432 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
5433 }
5434
5435 /* Outputs: guest state that we write or modify. */
5436 for (i = 0; i < d->nFxState; i++) {
5437 tl_assert(d->fxState[i].fx != Ifx_None);
5438 if (d->fxState[i].fx == Ifx_Read)
5439 continue;
5440
5441 /* Enumerate the described state segments */
5442 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5443 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5444 gSz = d->fxState[i].size;
5445
5446 /* Ignore any sections marked as 'always defined'. */
5447 if (isAlwaysDefd(mce, gOff, gSz))
5448 continue;
5449
5450 /* This state element is written or modified. So we need to
5451 consider it. If larger than 8 bytes, deal with it in
5452 8-byte chunks. */
5453 while (True) {
5454 tl_assert(gSz >= 0);
5455 if (gSz == 0) break;
5456 n = gSz <= 8 ? gSz : 8;
5457 /* Write suitably-casted 'curr' to the state slice
5458 gOff .. gOff+n-1 */
5459 tyDst = szToITy( n );
5460 do_shadow_PUT( mce, gOff,
5461 NULL, /* original atom */
5462 mkPCastTo( mce, tyDst, curr ), d->guard );
5463 gSz -= n;
5464 gOff += n;
5465 }
5466 }
5467 }
5468
5469 /* Outputs: memory that we write or modify. Same comments about
5470 endianness as above apply. */
5471 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5472 toDo = d->mSize;
5473 /* chew off 32-bit chunks */
5474 while (toDo >= 4) {
5475 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5476 NULL, /* original data */
5477 mkPCastTo( mce, Ity_I32, curr ),
5478 d->guard );
5479 toDo -= 4;
5480 }
5481 /* chew off 16-bit chunks */
5482 while (toDo >= 2) {
5483 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5484 NULL, /* original data */
5485 mkPCastTo( mce, Ity_I16, curr ),
5486 d->guard );
5487 toDo -= 2;
5488 }
5489 /* chew off the remaining 8-bit chunk, if any */
5490 if (toDo == 1) {
5491 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5492 NULL, /* original data */
5493 mkPCastTo( mce, Ity_I8, curr ),
5494 d->guard );
5495 toDo -= 1;
5496 }
5497 tl_assert(toDo == 0);
5498 }
5499
5500 }
5501
5502
5503 /* We have an ABI hint telling us that [base .. base+len-1] is to
5504 become undefined ("writable"). Generate code to call a helper to
5505 notify the A/V bit machinery of this fact.
5506
5507 We call
5508 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5509 Addr nia );
5510 */
5511 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)5512 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
5513 {
5514 IRDirty* di;
5515 /* Minor optimisation: if not doing origin tracking, ignore the
5516 supplied nia and pass zero instead. This is on the basis that
5517 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5518 almost always generate a shorter instruction to put zero into a
5519 register than any other value. */
5520 if (MC_(clo_mc_level) < 3)
5521 nia = mkIRExpr_HWord(0);
5522
5523 di = unsafeIRDirty_0_N(
5524 0/*regparms*/,
5525 "MC_(helperc_MAKE_STACK_UNINIT)",
5526 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
5527 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
5528 );
5529 stmt( 'V', mce, IRStmt_Dirty(di) );
5530 }
5531
5532
5533 /* ------ Dealing with IRCAS (big and complex) ------ */
5534
5535 /* FWDS */
5536 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5537 IRAtom* baseaddr, Int offset );
5538 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5539 static void gen_store_b ( MCEnv* mce, Int szB,
5540 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5541 IRAtom* guard );
5542
5543 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5544 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5545
5546
5547 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5548 IRExpr.Consts, else this asserts. If they are both Consts, it
5549 doesn't do anything. So that just leaves the RdTmp case.
5550
5551 In which case: this assigns the shadow value SHADOW to the IR
5552 shadow temporary associated with ORIG. That is, ORIG, being an
5553 original temporary, will have a shadow temporary associated with
5554 it. However, in the case envisaged here, there will so far have
5555 been no IR emitted to actually write a shadow value into that
5556 temporary. What this routine does is to (emit IR to) copy the
5557 value in SHADOW into said temporary, so that after this call,
5558 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5559 value in SHADOW.
5560
5561 Point is to allow callers to compute "by hand" a shadow value for
5562 ORIG, and force it to be associated with ORIG.
5563
5564 How do we know that that shadow associated with ORIG has not so far
5565 been assigned to? Well, we don't per se know that, but supposing
5566 it had. Then this routine would create a second assignment to it,
5567 and later the IR sanity checker would barf. But that never
5568 happens. QED.
5569 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)5570 static void bind_shadow_tmp_to_orig ( UChar how,
5571 MCEnv* mce,
5572 IRAtom* orig, IRAtom* shadow )
5573 {
5574 tl_assert(isOriginalAtom(mce, orig));
5575 tl_assert(isShadowAtom(mce, shadow));
5576 switch (orig->tag) {
5577 case Iex_Const:
5578 tl_assert(shadow->tag == Iex_Const);
5579 break;
5580 case Iex_RdTmp:
5581 tl_assert(shadow->tag == Iex_RdTmp);
5582 if (how == 'V') {
5583 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5584 shadow);
5585 } else {
5586 tl_assert(how == 'B');
5587 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5588 shadow);
5589 }
5590 break;
5591 default:
5592 tl_assert(0);
5593 }
5594 }
5595
5596
5597 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)5598 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5599 {
5600 /* Scheme is (both single- and double- cases):
5601
5602 1. fetch data#,dataB (the proposed new value)
5603
5604 2. fetch expd#,expdB (what we expect to see at the address)
5605
5606 3. check definedness of address
5607
5608 4. load old#,oldB from shadow memory; this also checks
5609 addressibility of the address
5610
5611 5. the CAS itself
5612
5613 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
5614
5615 7. if "expected == old" (as computed by (6))
5616 store data#,dataB to shadow memory
5617
5618 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5619 'data' but 7 stores 'data#'. Hence it is possible for the
5620 shadow data to be incorrectly checked and/or updated:
5621
5622 * 7 is at least gated correctly, since the 'expected == old'
5623 condition is derived from outputs of 5. However, the shadow
5624 write could happen too late: imagine after 5 we are
5625 descheduled, a different thread runs, writes a different
5626 (shadow) value at the address, and then we resume, hence
5627 overwriting the shadow value written by the other thread.
5628
5629 Because the original memory access is atomic, there's no way to
5630 make both the original and shadow accesses into a single atomic
5631 thing, hence this is unavoidable.
5632
5633 At least as Valgrind stands, I don't think it's a problem, since
5634 we're single threaded *and* we guarantee that there are no
5635 context switches during the execution of any specific superblock
5636 -- context switches can only happen at superblock boundaries.
5637
5638 If Valgrind ever becomes MT in the future, then it might be more
5639 of a problem. A possible kludge would be to artificially
5640 associate with the location, a lock, which we must acquire and
5641 release around the transaction as a whole. Hmm, that probably
5642 would't work properly since it only guards us against other
5643 threads doing CASs on the same location, not against other
5644 threads doing normal reads and writes.
5645
5646 ------------------------------------------------------------
5647
5648 COMMENT_ON_CasCmpEQ:
5649
5650 Note two things. Firstly, in the sequence above, we compute
5651 "expected == old", but we don't check definedness of it. Why
5652 not? Also, the x86 and amd64 front ends use
5653 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
5654 determination (expected == old ?) for themselves, and we also
5655 don't check definedness for those primops; we just say that the
5656 result is defined. Why? Details follow.
5657
5658 x86/amd64 contains various forms of locked insns:
5659 * lock prefix before all basic arithmetic insn;
5660 eg lock xorl %reg1,(%reg2)
5661 * atomic exchange reg-mem
5662 * compare-and-swaps
5663
5664 Rather than attempt to represent them all, which would be a
5665 royal PITA, I used a result from Maurice Herlihy
5666 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5667 demonstrates that compare-and-swap is a primitive more general
5668 than the other two, and so can be used to represent all of them.
5669 So the translation scheme for (eg) lock incl (%reg) is as
5670 follows:
5671
5672 again:
5673 old = * %reg
5674 new = old + 1
5675 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5676
5677 The "atomically" is the CAS bit. The scheme is always the same:
5678 get old value from memory, compute new value, atomically stuff
5679 new value back in memory iff the old value has not changed (iow,
5680 no other thread modified it in the meantime). If it has changed
5681 then we've been out-raced and we have to start over.
5682
5683 Now that's all very neat, but it has the bad side effect of
5684 introducing an explicit equality test into the translation.
5685 Consider the behaviour of said code on a memory location which
5686 is uninitialised. We will wind up doing a comparison on
5687 uninitialised data, and mc duly complains.
5688
5689 What's difficult about this is, the common case is that the
5690 location is uncontended, and so we're usually comparing the same
5691 value (* %reg) with itself. So we shouldn't complain even if it
5692 is undefined. But mc doesn't know that.
5693
5694 My solution is to mark the == in the IR specially, so as to tell
5695 mc that it almost certainly compares a value with itself, and we
5696 should just regard the result as always defined. Rather than
5697 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5698 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5699
5700 So there's always the question of, can this give a false
5701 negative? eg, imagine that initially, * %reg is defined; and we
5702 read that; but then in the gap between the read and the CAS, a
5703 different thread writes an undefined (and different) value at
5704 the location. Then the CAS in this thread will fail and we will
5705 go back to "again:", but without knowing that the trip back
5706 there was based on an undefined comparison. No matter; at least
5707 the other thread won the race and the location is correctly
5708 marked as undefined. What if it wrote an uninitialised version
5709 of the same value that was there originally, though?
5710
5711 etc etc. Seems like there's a small corner case in which we
5712 might lose the fact that something's defined -- we're out-raced
5713 in between the "old = * reg" and the "atomically {", _and_ the
5714 other thread is writing in an undefined version of what's
5715 already there. Well, that seems pretty unlikely.
5716
5717 ---
5718
5719 If we ever need to reinstate it .. code which generates a
5720 definedness test for "expected == old" was removed at r10432 of
5721 this file.
5722 */
5723 if (cas->oldHi == IRTemp_INVALID) {
5724 do_shadow_CAS_single( mce, cas );
5725 } else {
5726 do_shadow_CAS_double( mce, cas );
5727 }
5728 }
5729
5730
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)5731 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5732 {
5733 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5734 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5735 IRAtom *voldLo = NULL, *boldLo = NULL;
5736 IRAtom *expd_eq_old = NULL;
5737 IROp opCasCmpEQ;
5738 Int elemSzB;
5739 IRType elemTy;
5740 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5741
5742 /* single CAS */
5743 tl_assert(cas->oldHi == IRTemp_INVALID);
5744 tl_assert(cas->expdHi == NULL);
5745 tl_assert(cas->dataHi == NULL);
5746
5747 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5748 switch (elemTy) {
5749 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5750 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5751 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5752 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
5753 default: tl_assert(0); /* IR defn disallows any other types */
5754 }
5755
5756 /* 1. fetch data# (the proposed new value) */
5757 tl_assert(isOriginalAtom(mce, cas->dataLo));
5758 vdataLo
5759 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5760 tl_assert(isShadowAtom(mce, vdataLo));
5761 if (otrak) {
5762 bdataLo
5763 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5764 tl_assert(isShadowAtom(mce, bdataLo));
5765 }
5766
5767 /* 2. fetch expected# (what we expect to see at the address) */
5768 tl_assert(isOriginalAtom(mce, cas->expdLo));
5769 vexpdLo
5770 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5771 tl_assert(isShadowAtom(mce, vexpdLo));
5772 if (otrak) {
5773 bexpdLo
5774 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5775 tl_assert(isShadowAtom(mce, bexpdLo));
5776 }
5777
5778 /* 3. check definedness of address */
5779 /* 4. fetch old# from shadow memory; this also checks
5780 addressibility of the address */
5781 voldLo
5782 = assignNew(
5783 'V', mce, elemTy,
5784 expr2vbits_Load(
5785 mce,
5786 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5787 NULL/*always happens*/
5788 ));
5789 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5790 if (otrak) {
5791 boldLo
5792 = assignNew('B', mce, Ity_I32,
5793 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
5794 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5795 }
5796
5797 /* 5. the CAS itself */
5798 stmt( 'C', mce, IRStmt_CAS(cas) );
5799
5800 /* 6. compute "expected == old" */
5801 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5802 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5803 tree, but it's not copied from the input block. */
5804 expd_eq_old
5805 = assignNew('C', mce, Ity_I1,
5806 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
5807
5808 /* 7. if "expected == old"
5809 store data# to shadow memory */
5810 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5811 NULL/*data*/, vdataLo/*vdata*/,
5812 expd_eq_old/*guard for store*/ );
5813 if (otrak) {
5814 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5815 bdataLo/*bdata*/,
5816 expd_eq_old/*guard for store*/ );
5817 }
5818 }
5819
5820
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)5821 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5822 {
5823 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5824 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5825 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5826 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5827 IRAtom *voldHi = NULL, *boldHi = NULL;
5828 IRAtom *voldLo = NULL, *boldLo = NULL;
5829 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5830 IRAtom *expd_eq_old = NULL, *zero = NULL;
5831 IROp opCasCmpEQ, opOr, opXor;
5832 Int elemSzB, memOffsLo, memOffsHi;
5833 IRType elemTy;
5834 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5835
5836 /* double CAS */
5837 tl_assert(cas->oldHi != IRTemp_INVALID);
5838 tl_assert(cas->expdHi != NULL);
5839 tl_assert(cas->dataHi != NULL);
5840
5841 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5842 switch (elemTy) {
5843 case Ity_I8:
5844 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
5845 elemSzB = 1; zero = mkU8(0);
5846 break;
5847 case Ity_I16:
5848 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
5849 elemSzB = 2; zero = mkU16(0);
5850 break;
5851 case Ity_I32:
5852 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
5853 elemSzB = 4; zero = mkU32(0);
5854 break;
5855 case Ity_I64:
5856 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
5857 elemSzB = 8; zero = mkU64(0);
5858 break;
5859 default:
5860 tl_assert(0); /* IR defn disallows any other types */
5861 }
5862
5863 /* 1. fetch data# (the proposed new value) */
5864 tl_assert(isOriginalAtom(mce, cas->dataHi));
5865 tl_assert(isOriginalAtom(mce, cas->dataLo));
5866 vdataHi
5867 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5868 vdataLo
5869 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5870 tl_assert(isShadowAtom(mce, vdataHi));
5871 tl_assert(isShadowAtom(mce, vdataLo));
5872 if (otrak) {
5873 bdataHi
5874 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5875 bdataLo
5876 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5877 tl_assert(isShadowAtom(mce, bdataHi));
5878 tl_assert(isShadowAtom(mce, bdataLo));
5879 }
5880
5881 /* 2. fetch expected# (what we expect to see at the address) */
5882 tl_assert(isOriginalAtom(mce, cas->expdHi));
5883 tl_assert(isOriginalAtom(mce, cas->expdLo));
5884 vexpdHi
5885 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5886 vexpdLo
5887 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5888 tl_assert(isShadowAtom(mce, vexpdHi));
5889 tl_assert(isShadowAtom(mce, vexpdLo));
5890 if (otrak) {
5891 bexpdHi
5892 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5893 bexpdLo
5894 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5895 tl_assert(isShadowAtom(mce, bexpdHi));
5896 tl_assert(isShadowAtom(mce, bexpdLo));
5897 }
5898
5899 /* 3. check definedness of address */
5900 /* 4. fetch old# from shadow memory; this also checks
5901 addressibility of the address */
5902 if (cas->end == Iend_LE) {
5903 memOffsLo = 0;
5904 memOffsHi = elemSzB;
5905 } else {
5906 tl_assert(cas->end == Iend_BE);
5907 memOffsLo = elemSzB;
5908 memOffsHi = 0;
5909 }
5910 voldHi
5911 = assignNew(
5912 'V', mce, elemTy,
5913 expr2vbits_Load(
5914 mce,
5915 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5916 NULL/*always happens*/
5917 ));
5918 voldLo
5919 = assignNew(
5920 'V', mce, elemTy,
5921 expr2vbits_Load(
5922 mce,
5923 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5924 NULL/*always happens*/
5925 ));
5926 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5927 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5928 if (otrak) {
5929 boldHi
5930 = assignNew('B', mce, Ity_I32,
5931 gen_load_b(mce, elemSzB, cas->addr,
5932 memOffsHi/*addr bias*/));
5933 boldLo
5934 = assignNew('B', mce, Ity_I32,
5935 gen_load_b(mce, elemSzB, cas->addr,
5936 memOffsLo/*addr bias*/));
5937 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5938 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5939 }
5940
5941 /* 5. the CAS itself */
5942 stmt( 'C', mce, IRStmt_CAS(cas) );
5943
5944 /* 6. compute "expected == old" */
5945 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5946 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5947 tree, but it's not copied from the input block. */
5948 /*
5949 xHi = oldHi ^ expdHi;
5950 xLo = oldLo ^ expdLo;
5951 xHL = xHi | xLo;
5952 expd_eq_old = xHL == 0;
5953 */
5954 xHi = assignNew('C', mce, elemTy,
5955 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5956 xLo = assignNew('C', mce, elemTy,
5957 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5958 xHL = assignNew('C', mce, elemTy,
5959 binop(opOr, xHi, xLo));
5960 expd_eq_old
5961 = assignNew('C', mce, Ity_I1,
5962 binop(opCasCmpEQ, xHL, zero));
5963
5964 /* 7. if "expected == old"
5965 store data# to shadow memory */
5966 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5967 NULL/*data*/, vdataHi/*vdata*/,
5968 expd_eq_old/*guard for store*/ );
5969 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5970 NULL/*data*/, vdataLo/*vdata*/,
5971 expd_eq_old/*guard for store*/ );
5972 if (otrak) {
5973 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5974 bdataHi/*bdata*/,
5975 expd_eq_old/*guard for store*/ );
5976 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5977 bdataLo/*bdata*/,
5978 expd_eq_old/*guard for store*/ );
5979 }
5980 }
5981
5982
5983 /* ------ Dealing with LL/SC (not difficult) ------ */
5984
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)5985 static void do_shadow_LLSC ( MCEnv* mce,
5986 IREndness stEnd,
5987 IRTemp stResult,
5988 IRExpr* stAddr,
5989 IRExpr* stStoredata )
5990 {
5991 /* In short: treat a load-linked like a normal load followed by an
5992 assignment of the loaded (shadow) data to the result temporary.
5993 Treat a store-conditional like a normal store, and mark the
5994 result temporary as defined. */
5995 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5996 IRTemp resTmp = findShadowTmpV(mce, stResult);
5997
5998 tl_assert(isIRAtom(stAddr));
5999 if (stStoredata)
6000 tl_assert(isIRAtom(stStoredata));
6001
6002 if (stStoredata == NULL) {
6003 /* Load Linked */
6004 /* Just treat this as a normal load, followed by an assignment of
6005 the value to .result. */
6006 /* Stay sane */
6007 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6008 || resTy == Ity_I16 || resTy == Ity_I8);
6009 assign( 'V', mce, resTmp,
6010 expr2vbits_Load(
6011 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6012 NULL/*always happens*/) );
6013 } else {
6014 /* Store Conditional */
6015 /* Stay sane */
6016 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6017 stStoredata);
6018 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6019 || dataTy == Ity_I16 || dataTy == Ity_I8);
6020 do_shadow_Store( mce, stEnd,
6021 stAddr, 0/* addr bias */,
6022 stStoredata,
6023 NULL /* shadow data */,
6024 NULL/*guard*/ );
6025 /* This is a store conditional, so it writes to .result a value
6026 indicating whether or not the store succeeded. Just claim
6027 this value is always defined. In the PowerPC interpretation
6028 of store-conditional, definedness of the success indication
6029 depends on whether the address of the store matches the
6030 reservation address. But we can't tell that here (and
6031 anyway, we're not being PowerPC-specific). At least we are
6032 guaranteed that the definedness of the store address, and its
6033 addressibility, will be checked as per normal. So it seems
6034 pretty safe to just say that the success indication is always
6035 defined.
6036
6037 In schemeS, for origin tracking, we must correspondingly set
6038 a no-origin value for the origin shadow of .result.
6039 */
6040 tl_assert(resTy == Ity_I1);
6041 assign( 'V', mce, resTmp, definedOfType(resTy) );
6042 }
6043 }
6044
6045
6046 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6047
do_shadow_StoreG(MCEnv * mce,IRStoreG * sg)6048 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6049 {
6050 complainIfUndefined(mce, sg->guard, NULL);
6051 /* do_shadow_Store will generate code to check the definedness and
6052 validity of sg->addr, in the case where sg->guard evaluates to
6053 True at run-time. */
6054 do_shadow_Store( mce, sg->end,
6055 sg->addr, 0/* addr bias */,
6056 sg->data,
6057 NULL /* shadow data */,
6058 sg->guard );
6059 }
6060
do_shadow_LoadG(MCEnv * mce,IRLoadG * lg)6061 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6062 {
6063 complainIfUndefined(mce, lg->guard, NULL);
6064 /* expr2vbits_Load_guarded_General will generate code to check the
6065 definedness and validity of lg->addr, in the case where
6066 lg->guard evaluates to True at run-time. */
6067
6068 /* Look at the LoadG's built-in conversion operation, to determine
6069 the source (actual loaded data) type, and the equivalent IROp.
6070 NOTE that implicitly we are taking a widening operation to be
6071 applied to original atoms and producing one that applies to V
6072 bits. Since signed and unsigned widening are self-shadowing,
6073 this is a straight copy of the op (modulo swapping from the
6074 IRLoadGOp form to the IROp form). Note also therefore that this
6075 implicitly duplicates the logic to do with said widening ops in
6076 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6077 IROp vwiden = Iop_INVALID;
6078 IRType loadedTy = Ity_INVALID;
6079 switch (lg->cvt) {
6080 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6081 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
6082 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6083 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6084 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6085 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6086 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6087 default: VG_(tool_panic)("do_shadow_LoadG");
6088 }
6089
6090 IRAtom* vbits_alt
6091 = expr2vbits( mce, lg->alt );
6092 IRAtom* vbits_final
6093 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6094 lg->addr, 0/*addr bias*/,
6095 lg->guard, vwiden, vbits_alt );
6096 /* And finally, bind the V bits to the destination temporary. */
6097 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6098 }
6099
6100
6101 /*------------------------------------------------------------*/
6102 /*--- Memcheck main ---*/
6103 /*------------------------------------------------------------*/
6104
6105 static void schemeS ( MCEnv* mce, IRStmt* st );
6106
isBogusAtom(IRAtom * at)6107 static Bool isBogusAtom ( IRAtom* at )
6108 {
6109 ULong n = 0;
6110 IRConst* con;
6111 tl_assert(isIRAtom(at));
6112 if (at->tag == Iex_RdTmp)
6113 return False;
6114 tl_assert(at->tag == Iex_Const);
6115 con = at->Iex.Const.con;
6116 switch (con->tag) {
6117 case Ico_U1: return False;
6118 case Ico_U8: n = (ULong)con->Ico.U8; break;
6119 case Ico_U16: n = (ULong)con->Ico.U16; break;
6120 case Ico_U32: n = (ULong)con->Ico.U32; break;
6121 case Ico_U64: n = (ULong)con->Ico.U64; break;
6122 case Ico_F32: return False;
6123 case Ico_F64: return False;
6124 case Ico_F32i: return False;
6125 case Ico_F64i: return False;
6126 case Ico_V128: return False;
6127 case Ico_V256: return False;
6128 default: ppIRExpr(at); tl_assert(0);
6129 }
6130 /* VG_(printf)("%llx\n", n); */
6131 return (/*32*/ n == 0xFEFEFEFFULL
6132 /*32*/ || n == 0x80808080ULL
6133 /*32*/ || n == 0x7F7F7F7FULL
6134 /*32*/ || n == 0x7EFEFEFFULL
6135 /*32*/ || n == 0x81010100ULL
6136 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
6137 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
6138 /*64*/ || n == 0x0000000000008080ULL
6139 /*64*/ || n == 0x8080808080808080ULL
6140 /*64*/ || n == 0x0101010101010101ULL
6141 );
6142 }
6143
checkForBogusLiterals(IRStmt * st)6144 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
6145 {
6146 Int i;
6147 IRExpr* e;
6148 IRDirty* d;
6149 IRCAS* cas;
6150 switch (st->tag) {
6151 case Ist_WrTmp:
6152 e = st->Ist.WrTmp.data;
6153 switch (e->tag) {
6154 case Iex_Get:
6155 case Iex_RdTmp:
6156 return False;
6157 case Iex_Const:
6158 return isBogusAtom(e);
6159 case Iex_Unop:
6160 return isBogusAtom(e->Iex.Unop.arg)
6161 || e->Iex.Unop.op == Iop_GetMSBs8x16;
6162 case Iex_GetI:
6163 return isBogusAtom(e->Iex.GetI.ix);
6164 case Iex_Binop:
6165 return isBogusAtom(e->Iex.Binop.arg1)
6166 || isBogusAtom(e->Iex.Binop.arg2);
6167 case Iex_Triop:
6168 return isBogusAtom(e->Iex.Triop.details->arg1)
6169 || isBogusAtom(e->Iex.Triop.details->arg2)
6170 || isBogusAtom(e->Iex.Triop.details->arg3);
6171 case Iex_Qop:
6172 return isBogusAtom(e->Iex.Qop.details->arg1)
6173 || isBogusAtom(e->Iex.Qop.details->arg2)
6174 || isBogusAtom(e->Iex.Qop.details->arg3)
6175 || isBogusAtom(e->Iex.Qop.details->arg4);
6176 case Iex_ITE:
6177 return isBogusAtom(e->Iex.ITE.cond)
6178 || isBogusAtom(e->Iex.ITE.iftrue)
6179 || isBogusAtom(e->Iex.ITE.iffalse);
6180 case Iex_Load:
6181 return isBogusAtom(e->Iex.Load.addr);
6182 case Iex_CCall:
6183 for (i = 0; e->Iex.CCall.args[i]; i++)
6184 if (isBogusAtom(e->Iex.CCall.args[i]))
6185 return True;
6186 return False;
6187 default:
6188 goto unhandled;
6189 }
6190 case Ist_Dirty:
6191 d = st->Ist.Dirty.details;
6192 for (i = 0; d->args[i]; i++) {
6193 IRAtom* atom = d->args[i];
6194 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) {
6195 if (isBogusAtom(atom))
6196 return True;
6197 }
6198 }
6199 if (isBogusAtom(d->guard))
6200 return True;
6201 if (d->mAddr && isBogusAtom(d->mAddr))
6202 return True;
6203 return False;
6204 case Ist_Put:
6205 return isBogusAtom(st->Ist.Put.data);
6206 case Ist_PutI:
6207 return isBogusAtom(st->Ist.PutI.details->ix)
6208 || isBogusAtom(st->Ist.PutI.details->data);
6209 case Ist_Store:
6210 return isBogusAtom(st->Ist.Store.addr)
6211 || isBogusAtom(st->Ist.Store.data);
6212 case Ist_StoreG: {
6213 IRStoreG* sg = st->Ist.StoreG.details;
6214 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
6215 || isBogusAtom(sg->guard);
6216 }
6217 case Ist_LoadG: {
6218 IRLoadG* lg = st->Ist.LoadG.details;
6219 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
6220 || isBogusAtom(lg->guard);
6221 }
6222 case Ist_Exit:
6223 return isBogusAtom(st->Ist.Exit.guard);
6224 case Ist_AbiHint:
6225 return isBogusAtom(st->Ist.AbiHint.base)
6226 || isBogusAtom(st->Ist.AbiHint.nia);
6227 case Ist_NoOp:
6228 case Ist_IMark:
6229 case Ist_MBE:
6230 return False;
6231 case Ist_CAS:
6232 cas = st->Ist.CAS.details;
6233 return isBogusAtom(cas->addr)
6234 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
6235 || isBogusAtom(cas->expdLo)
6236 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
6237 || isBogusAtom(cas->dataLo);
6238 case Ist_LLSC:
6239 return isBogusAtom(st->Ist.LLSC.addr)
6240 || (st->Ist.LLSC.storedata
6241 ? isBogusAtom(st->Ist.LLSC.storedata)
6242 : False);
6243 default:
6244 unhandled:
6245 ppIRStmt(st);
6246 VG_(tool_panic)("hasBogusLiterals");
6247 }
6248 }
6249
6250
MC_(instrument)6251 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
6252 IRSB* sb_in,
6253 const VexGuestLayout* layout,
6254 const VexGuestExtents* vge,
6255 const VexArchInfo* archinfo_host,
6256 IRType gWordTy, IRType hWordTy )
6257 {
6258 Bool verboze = 0||False;
6259 Int i, j, first_stmt;
6260 IRStmt* st;
6261 MCEnv mce;
6262 IRSB* sb_out;
6263
6264 if (gWordTy != hWordTy) {
6265 /* We don't currently support this case. */
6266 VG_(tool_panic)("host/guest word size mismatch");
6267 }
6268
6269 /* Check we're not completely nuts */
6270 tl_assert(sizeof(UWord) == sizeof(void*));
6271 tl_assert(sizeof(Word) == sizeof(void*));
6272 tl_assert(sizeof(Addr) == sizeof(void*));
6273 tl_assert(sizeof(ULong) == 8);
6274 tl_assert(sizeof(Long) == 8);
6275 tl_assert(sizeof(UInt) == 4);
6276 tl_assert(sizeof(Int) == 4);
6277
6278 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
6279
6280 /* Set up SB */
6281 sb_out = deepCopyIRSBExceptStmts(sb_in);
6282
6283 /* Set up the running environment. Both .sb and .tmpMap are
6284 modified as we go along. Note that tmps are added to both
6285 .sb->tyenv and .tmpMap together, so the valid index-set for
6286 those two arrays should always be identical. */
6287 VG_(memset)(&mce, 0, sizeof(mce));
6288 mce.sb = sb_out;
6289 mce.trace = verboze;
6290 mce.layout = layout;
6291 mce.hWordTy = hWordTy;
6292 mce.bogusLiterals = False;
6293
6294 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
6295 Darwin. 10.7 is mostly built with LLVM, which uses these for
6296 bitfield inserts, and we get a lot of false errors if the cheap
6297 interpretation is used, alas. Could solve this much better if
6298 we knew which of such adds came from x86/amd64 LEA instructions,
6299 since these are the only ones really needing the expensive
6300 interpretation, but that would require some way to tag them in
6301 the _toIR.c front ends, which is a lot of faffing around. So
6302 for now just use the slow and blunt-instrument solution. */
6303 mce.useLLVMworkarounds = False;
6304 # if defined(VGO_darwin)
6305 mce.useLLVMworkarounds = True;
6306 # endif
6307
6308 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6309 sizeof(TempMapEnt));
6310 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
6311 for (i = 0; i < sb_in->tyenv->types_used; i++) {
6312 TempMapEnt ent;
6313 ent.kind = Orig;
6314 ent.shadowV = IRTemp_INVALID;
6315 ent.shadowB = IRTemp_INVALID;
6316 VG_(addToXA)( mce.tmpMap, &ent );
6317 }
6318 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
6319
6320 if (MC_(clo_expensive_definedness_checks)) {
6321 /* For expensive definedness checking skip looking for bogus
6322 literals. */
6323 mce.bogusLiterals = True;
6324 } else {
6325 /* Make a preliminary inspection of the statements, to see if there
6326 are any dodgy-looking literals. If there are, we generate
6327 extra-detailed (hence extra-expensive) instrumentation in
6328 places. Scan the whole bb even if dodgyness is found earlier,
6329 so that the flatness assertion is applied to all stmts. */
6330 Bool bogus = False;
6331
6332 for (i = 0; i < sb_in->stmts_used; i++) {
6333 st = sb_in->stmts[i];
6334 tl_assert(st);
6335 tl_assert(isFlatIRStmt(st));
6336
6337 if (!bogus) {
6338 bogus = checkForBogusLiterals(st);
6339 if (0 && bogus) {
6340 VG_(printf)("bogus: ");
6341 ppIRStmt(st);
6342 VG_(printf)("\n");
6343 }
6344 if (bogus) break;
6345 }
6346 }
6347 mce.bogusLiterals = bogus;
6348 }
6349
6350 /* Copy verbatim any IR preamble preceding the first IMark */
6351
6352 tl_assert(mce.sb == sb_out);
6353 tl_assert(mce.sb != sb_in);
6354
6355 i = 0;
6356 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
6357
6358 st = sb_in->stmts[i];
6359 tl_assert(st);
6360 tl_assert(isFlatIRStmt(st));
6361
6362 stmt( 'C', &mce, sb_in->stmts[i] );
6363 i++;
6364 }
6365
6366 /* Nasty problem. IR optimisation of the pre-instrumented IR may
6367 cause the IR following the preamble to contain references to IR
6368 temporaries defined in the preamble. Because the preamble isn't
6369 instrumented, these temporaries don't have any shadows.
6370 Nevertheless uses of them following the preamble will cause
6371 memcheck to generate references to their shadows. End effect is
6372 to cause IR sanity check failures, due to references to
6373 non-existent shadows. This is only evident for the complex
6374 preambles used for function wrapping on TOC-afflicted platforms
6375 (ppc64-linux).
6376
6377 The following loop therefore scans the preamble looking for
6378 assignments to temporaries. For each one found it creates an
6379 assignment to the corresponding (V) shadow temp, marking it as
6380 'defined'. This is the same resulting IR as if the main
6381 instrumentation loop before had been applied to the statement
6382 'tmp = CONSTANT'.
6383
6384 Similarly, if origin tracking is enabled, we must generate an
6385 assignment for the corresponding origin (B) shadow, claiming
6386 no-origin, as appropriate for a defined value.
6387 */
6388 for (j = 0; j < i; j++) {
6389 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
6390 /* findShadowTmpV checks its arg is an original tmp;
6391 no need to assert that here. */
6392 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
6393 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
6394 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
6395 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6396 if (MC_(clo_mc_level) == 3) {
6397 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
6398 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
6399 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6400 }
6401 if (0) {
6402 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6403 ppIRType( ty_v );
6404 VG_(printf)("\n");
6405 }
6406 }
6407 }
6408
6409 /* Iterate over the remaining stmts to generate instrumentation. */
6410
6411 tl_assert(sb_in->stmts_used > 0);
6412 tl_assert(i >= 0);
6413 tl_assert(i < sb_in->stmts_used);
6414 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
6415
6416 for (/* use current i*/; i < sb_in->stmts_used; i++) {
6417
6418 st = sb_in->stmts[i];
6419 first_stmt = sb_out->stmts_used;
6420
6421 if (verboze) {
6422 VG_(printf)("\n");
6423 ppIRStmt(st);
6424 VG_(printf)("\n");
6425 }
6426
6427 if (MC_(clo_mc_level) == 3) {
6428 /* See comments on case Ist_CAS below. */
6429 if (st->tag != Ist_CAS)
6430 schemeS( &mce, st );
6431 }
6432
6433 /* Generate instrumentation code for each stmt ... */
6434
6435 switch (st->tag) {
6436
6437 case Ist_WrTmp:
6438 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6439 expr2vbits( &mce, st->Ist.WrTmp.data) );
6440 break;
6441
6442 case Ist_Put:
6443 do_shadow_PUT( &mce,
6444 st->Ist.Put.offset,
6445 st->Ist.Put.data,
6446 NULL /* shadow atom */, NULL /* guard */ );
6447 break;
6448
6449 case Ist_PutI:
6450 do_shadow_PUTI( &mce, st->Ist.PutI.details);
6451 break;
6452
6453 case Ist_Store:
6454 do_shadow_Store( &mce, st->Ist.Store.end,
6455 st->Ist.Store.addr, 0/* addr bias */,
6456 st->Ist.Store.data,
6457 NULL /* shadow data */,
6458 NULL/*guard*/ );
6459 break;
6460
6461 case Ist_StoreG:
6462 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6463 break;
6464
6465 case Ist_LoadG:
6466 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6467 break;
6468
6469 case Ist_Exit:
6470 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
6471 break;
6472
6473 case Ist_IMark:
6474 break;
6475
6476 case Ist_NoOp:
6477 case Ist_MBE:
6478 break;
6479
6480 case Ist_Dirty:
6481 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
6482 break;
6483
6484 case Ist_AbiHint:
6485 do_AbiHint( &mce, st->Ist.AbiHint.base,
6486 st->Ist.AbiHint.len,
6487 st->Ist.AbiHint.nia );
6488 break;
6489
6490 case Ist_CAS:
6491 do_shadow_CAS( &mce, st->Ist.CAS.details );
6492 /* Note, do_shadow_CAS copies the CAS itself to the output
6493 block, because it needs to add instrumentation both
6494 before and after it. Hence skip the copy below. Also
6495 skip the origin-tracking stuff (call to schemeS) above,
6496 since that's all tangled up with it too; do_shadow_CAS
6497 does it all. */
6498 break;
6499
6500 case Ist_LLSC:
6501 do_shadow_LLSC( &mce,
6502 st->Ist.LLSC.end,
6503 st->Ist.LLSC.result,
6504 st->Ist.LLSC.addr,
6505 st->Ist.LLSC.storedata );
6506 break;
6507
6508 default:
6509 VG_(printf)("\n");
6510 ppIRStmt(st);
6511 VG_(printf)("\n");
6512 VG_(tool_panic)("memcheck: unhandled IRStmt");
6513
6514 } /* switch (st->tag) */
6515
6516 if (0 && verboze) {
6517 for (j = first_stmt; j < sb_out->stmts_used; j++) {
6518 VG_(printf)(" ");
6519 ppIRStmt(sb_out->stmts[j]);
6520 VG_(printf)("\n");
6521 }
6522 VG_(printf)("\n");
6523 }
6524
6525 /* ... and finally copy the stmt itself to the output. Except,
6526 skip the copy of IRCASs; see comments on case Ist_CAS
6527 above. */
6528 if (st->tag != Ist_CAS)
6529 stmt('C', &mce, st);
6530 }
6531
6532 /* Now we need to complain if the jump target is undefined. */
6533 first_stmt = sb_out->stmts_used;
6534
6535 if (verboze) {
6536 VG_(printf)("sb_in->next = ");
6537 ppIRExpr(sb_in->next);
6538 VG_(printf)("\n\n");
6539 }
6540
6541 complainIfUndefined( &mce, sb_in->next, NULL );
6542
6543 if (0 && verboze) {
6544 for (j = first_stmt; j < sb_out->stmts_used; j++) {
6545 VG_(printf)(" ");
6546 ppIRStmt(sb_out->stmts[j]);
6547 VG_(printf)("\n");
6548 }
6549 VG_(printf)("\n");
6550 }
6551
6552 /* If this fails, there's been some serious snafu with tmp management,
6553 that should be investigated. */
6554 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6555 VG_(deleteXA)( mce.tmpMap );
6556
6557 tl_assert(mce.sb == sb_out);
6558 return sb_out;
6559 }
6560
6561 /*------------------------------------------------------------*/
6562 /*--- Post-tree-build final tidying ---*/
6563 /*------------------------------------------------------------*/
6564
6565 /* This exploits the observation that Memcheck often produces
6566 repeated conditional calls of the form
6567
6568 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
6569
6570 with the same guard expression G guarding the same helper call.
6571 The second and subsequent calls are redundant. This usually
6572 results from instrumentation of guest code containing multiple
6573 memory references at different constant offsets from the same base
6574 register. After optimisation of the instrumentation, you get a
6575 test for the definedness of the base register for each memory
6576 reference, which is kinda pointless. MC_(final_tidy) therefore
6577 looks for such repeated calls and removes all but the first. */
6578
6579 /* A struct for recording which (helper, guard) pairs we have already
6580 seen. */
6581 typedef
6582 struct { void* entry; IRExpr* guard; }
6583 Pair;
6584
6585 /* Return True if e1 and e2 definitely denote the same value (used to
6586 compare guards). Return False if unknown; False is the safe
6587 answer. Since guest registers and guest memory do not have the
6588 SSA property we must return False if any Gets or Loads appear in
6589 the expression. */
6590
sameIRValue(IRExpr * e1,IRExpr * e2)6591 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6592 {
6593 if (e1->tag != e2->tag)
6594 return False;
6595 switch (e1->tag) {
6596 case Iex_Const:
6597 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6598 case Iex_Binop:
6599 return e1->Iex.Binop.op == e2->Iex.Binop.op
6600 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6601 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6602 case Iex_Unop:
6603 return e1->Iex.Unop.op == e2->Iex.Unop.op
6604 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6605 case Iex_RdTmp:
6606 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
6607 case Iex_ITE:
6608 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6609 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6610 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
6611 case Iex_Qop:
6612 case Iex_Triop:
6613 case Iex_CCall:
6614 /* be lazy. Could define equality for these, but they never
6615 appear to be used. */
6616 return False;
6617 case Iex_Get:
6618 case Iex_GetI:
6619 case Iex_Load:
6620 /* be conservative - these may not give the same value each
6621 time */
6622 return False;
6623 case Iex_Binder:
6624 /* should never see this */
6625 /* fallthrough */
6626 default:
6627 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6628 ppIRExpr(e1);
6629 VG_(tool_panic)("memcheck:sameIRValue");
6630 return False;
6631 }
6632 }
6633
6634 /* See if 'pairs' already has an entry for (entry, guard). Return
6635 True if so. If not, add an entry. */
6636
6637 static
check_or_add(XArray * pairs,IRExpr * guard,void * entry)6638 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6639 {
6640 Pair p;
6641 Pair* pp;
6642 Int i, n = VG_(sizeXA)( pairs );
6643 for (i = 0; i < n; i++) {
6644 pp = VG_(indexXA)( pairs, i );
6645 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6646 return True;
6647 }
6648 p.guard = guard;
6649 p.entry = entry;
6650 VG_(addToXA)( pairs, &p );
6651 return False;
6652 }
6653
is_helperc_value_checkN_fail(const HChar * name)6654 static Bool is_helperc_value_checkN_fail ( const HChar* name )
6655 {
6656 return
6657 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6658 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6659 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6660 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6661 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6662 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6663 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6664 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
6665 }
6666
MC_(final_tidy)6667 IRSB* MC_(final_tidy) ( IRSB* sb_in )
6668 {
6669 Int i;
6670 IRStmt* st;
6671 IRDirty* di;
6672 IRExpr* guard;
6673 IRCallee* cee;
6674 Bool alreadyPresent;
6675 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6676 VG_(free), sizeof(Pair) );
6677 /* Scan forwards through the statements. Each time a call to one
6678 of the relevant helpers is seen, check if we have made a
6679 previous call to the same helper using the same guard
6680 expression, and if so, delete the call. */
6681 for (i = 0; i < sb_in->stmts_used; i++) {
6682 st = sb_in->stmts[i];
6683 tl_assert(st);
6684 if (st->tag != Ist_Dirty)
6685 continue;
6686 di = st->Ist.Dirty.details;
6687 guard = di->guard;
6688 tl_assert(guard);
6689 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6690 cee = di->cee;
6691 if (!is_helperc_value_checkN_fail( cee->name ))
6692 continue;
6693 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6694 guard 'guard'. Check if we have already seen a call to this
6695 function with the same guard. If so, delete it. If not,
6696 add it to the set of calls we do know about. */
6697 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6698 if (alreadyPresent) {
6699 sb_in->stmts[i] = IRStmt_NoOp();
6700 if (0) VG_(printf)("XX\n");
6701 }
6702 }
6703 VG_(deleteXA)( pairs );
6704 return sb_in;
6705 }
6706
6707
6708 /*------------------------------------------------------------*/
6709 /*--- Origin tracking stuff ---*/
6710 /*------------------------------------------------------------*/
6711
6712 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)6713 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6714 {
6715 TempMapEnt* ent;
6716 /* VG_(indexXA) range-checks 'orig', hence no need to check
6717 here. */
6718 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6719 tl_assert(ent->kind == Orig);
6720 if (ent->shadowB == IRTemp_INVALID) {
6721 IRTemp tmpB
6722 = newTemp( mce, Ity_I32, BSh );
6723 /* newTemp may cause mce->tmpMap to resize, hence previous results
6724 from VG_(indexXA) are invalid. */
6725 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6726 tl_assert(ent->kind == Orig);
6727 tl_assert(ent->shadowB == IRTemp_INVALID);
6728 ent->shadowB = tmpB;
6729 }
6730 return ent->shadowB;
6731 }
6732
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)6733 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6734 {
6735 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6736 }
6737
6738
6739 /* Make a guarded origin load, with no special handling in the
6740 didn't-happen case. A GUARD of NULL is assumed to mean "always
6741 True".
6742
6743 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6744 return the otag. The loaded size is SZB. If GUARD evaluates to
6745 False at run time then the returned otag is zero.
6746 */
gen_guarded_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRExpr * guard)6747 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6748 IRAtom* baseaddr,
6749 Int offset, IRExpr* guard )
6750 {
6751 void* hFun;
6752 const HChar* hName;
6753 IRTemp bTmp;
6754 IRDirty* di;
6755 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6756 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6757 IRAtom* ea = baseaddr;
6758 if (offset != 0) {
6759 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6760 : mkU64( (Long)(Int)offset );
6761 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6762 }
6763 bTmp = newTemp(mce, mce->hWordTy, BSh);
6764
6765 switch (szB) {
6766 case 1: hFun = (void*)&MC_(helperc_b_load1);
6767 hName = "MC_(helperc_b_load1)";
6768 break;
6769 case 2: hFun = (void*)&MC_(helperc_b_load2);
6770 hName = "MC_(helperc_b_load2)";
6771 break;
6772 case 4: hFun = (void*)&MC_(helperc_b_load4);
6773 hName = "MC_(helperc_b_load4)";
6774 break;
6775 case 8: hFun = (void*)&MC_(helperc_b_load8);
6776 hName = "MC_(helperc_b_load8)";
6777 break;
6778 case 16: hFun = (void*)&MC_(helperc_b_load16);
6779 hName = "MC_(helperc_b_load16)";
6780 break;
6781 case 32: hFun = (void*)&MC_(helperc_b_load32);
6782 hName = "MC_(helperc_b_load32)";
6783 break;
6784 default:
6785 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6786 tl_assert(0);
6787 }
6788 di = unsafeIRDirty_1_N(
6789 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6790 mkIRExprVec_1( ea )
6791 );
6792 if (guard) {
6793 di->guard = guard;
6794 /* Ideally the didn't-happen return value here would be
6795 all-zeroes (unknown-origin), so it'd be harmless if it got
6796 used inadvertently. We slum it out with the IR-mandated
6797 default value (0b01 repeating, 0x55 etc) as that'll probably
6798 trump all legitimate otags via Max32, and it's pretty
6799 obviously bogus. */
6800 }
6801 /* no need to mess with any annotations. This call accesses
6802 neither guest state nor guest memory. */
6803 stmt( 'B', mce, IRStmt_Dirty(di) );
6804 if (mce->hWordTy == Ity_I64) {
6805 /* 64-bit host */
6806 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
6807 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6808 return mkexpr(bTmp32);
6809 } else {
6810 /* 32-bit host */
6811 return mkexpr(bTmp);
6812 }
6813 }
6814
6815
6816 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6817 loaded size is SZB. The load is regarded as unconditional (always
6818 happens).
6819 */
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)6820 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6821 Int offset )
6822 {
6823 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
6824 }
6825
6826
6827 /* The most general handler for guarded origin loads. A GUARD of NULL
6828 is assumed to mean "always True".
6829
6830 Generate IR to do a shadow origin load from ADDR+BIAS and return
6831 the B bits. The loaded type is TY. If GUARD evaluates to False at
6832 run time then the returned B bits are simply BALT instead.
6833 */
6834 static
expr2ori_Load_guarded_General(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IRAtom * balt)6835 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6836 IRType ty,
6837 IRAtom* addr, UInt bias,
6838 IRAtom* guard, IRAtom* balt )
6839 {
6840 /* If the guard evaluates to True, this will hold the loaded
6841 origin. If the guard evaluates to False, this will be zero,
6842 meaning "unknown origin", in which case we will have to replace
6843 it using an ITE below. */
6844 IRAtom* iftrue
6845 = assignNew('B', mce, Ity_I32,
6846 gen_guarded_load_b(mce, sizeofIRType(ty),
6847 addr, bias, guard));
6848 /* These are the bits we will return if the load doesn't take
6849 place. */
6850 IRAtom* iffalse
6851 = balt;
6852 /* Prepare the cond for the ITE. Convert a NULL cond into
6853 something that iropt knows how to fold out later. */
6854 IRAtom* cond
6855 = guard == NULL ? mkU1(1) : guard;
6856 /* And assemble the final result. */
6857 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
6858 }
6859
6860
6861 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
6862 the store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)6863 static void gen_store_b ( MCEnv* mce, Int szB,
6864 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6865 IRAtom* guard )
6866 {
6867 void* hFun;
6868 const HChar* hName;
6869 IRDirty* di;
6870 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6871 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6872 IRAtom* ea = baseaddr;
6873 if (guard) {
6874 tl_assert(isOriginalAtom(mce, guard));
6875 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6876 }
6877 if (offset != 0) {
6878 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6879 : mkU64( (Long)(Int)offset );
6880 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6881 }
6882 if (mce->hWordTy == Ity_I64)
6883 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6884
6885 switch (szB) {
6886 case 1: hFun = (void*)&MC_(helperc_b_store1);
6887 hName = "MC_(helperc_b_store1)";
6888 break;
6889 case 2: hFun = (void*)&MC_(helperc_b_store2);
6890 hName = "MC_(helperc_b_store2)";
6891 break;
6892 case 4: hFun = (void*)&MC_(helperc_b_store4);
6893 hName = "MC_(helperc_b_store4)";
6894 break;
6895 case 8: hFun = (void*)&MC_(helperc_b_store8);
6896 hName = "MC_(helperc_b_store8)";
6897 break;
6898 case 16: hFun = (void*)&MC_(helperc_b_store16);
6899 hName = "MC_(helperc_b_store16)";
6900 break;
6901 case 32: hFun = (void*)&MC_(helperc_b_store32);
6902 hName = "MC_(helperc_b_store32)";
6903 break;
6904 default:
6905 tl_assert(0);
6906 }
6907 di = unsafeIRDirty_0_N( 2/*regparms*/,
6908 hName, VG_(fnptr_to_fnentry)( hFun ),
6909 mkIRExprVec_2( ea, dataB )
6910 );
6911 /* no need to mess with any annotations. This call accesses
6912 neither guest state nor guest memory. */
6913 if (guard) di->guard = guard;
6914 stmt( 'B', mce, IRStmt_Dirty(di) );
6915 }
6916
narrowTo32(MCEnv * mce,IRAtom * e)6917 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
6918 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6919 if (eTy == Ity_I64)
6920 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6921 if (eTy == Ity_I32)
6922 return e;
6923 tl_assert(0);
6924 }
6925
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)6926 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
6927 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6928 tl_assert(eTy == Ity_I32);
6929 if (dstTy == Ity_I64)
6930 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6931 tl_assert(0);
6932 }
6933
6934
schemeE(MCEnv * mce,IRExpr * e)6935 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6936 {
6937 tl_assert(MC_(clo_mc_level) == 3);
6938
6939 switch (e->tag) {
6940
6941 case Iex_GetI: {
6942 IRRegArray* descr_b;
6943 IRAtom *t1, *t2, *t3, *t4;
6944 IRRegArray* descr = e->Iex.GetI.descr;
6945 IRType equivIntTy
6946 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6947 /* If this array is unshadowable for whatever reason, use the
6948 usual approximation. */
6949 if (equivIntTy == Ity_INVALID)
6950 return mkU32(0);
6951 tl_assert(sizeofIRType(equivIntTy) >= 4);
6952 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6953 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6954 equivIntTy, descr->nElems );
6955 /* Do a shadow indexed get of the same size, giving t1. Take
6956 the bottom 32 bits of it, giving t2. Compute into t3 the
6957 origin for the index (almost certainly zero, but there's
6958 no harm in being completely general here, since iropt will
6959 remove any useless code), and fold it in, giving a final
6960 value t4. */
6961 t1 = assignNew( 'B', mce, equivIntTy,
6962 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6963 e->Iex.GetI.bias ));
6964 t2 = narrowTo32( mce, t1 );
6965 t3 = schemeE( mce, e->Iex.GetI.ix );
6966 t4 = gen_maxU32( mce, t2, t3 );
6967 return t4;
6968 }
6969 case Iex_CCall: {
6970 Int i;
6971 IRAtom* here;
6972 IRExpr** args = e->Iex.CCall.args;
6973 IRAtom* curr = mkU32(0);
6974 for (i = 0; args[i]; i++) {
6975 tl_assert(i < 32);
6976 tl_assert(isOriginalAtom(mce, args[i]));
6977 /* Only take notice of this arg if the callee's
6978 mc-exclusion mask does not say it is to be excluded. */
6979 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6980 /* the arg is to be excluded from definedness checking.
6981 Do nothing. */
6982 if (0) VG_(printf)("excluding %s(%d)\n",
6983 e->Iex.CCall.cee->name, i);
6984 } else {
6985 /* calculate the arg's definedness, and pessimistically
6986 merge it in. */
6987 here = schemeE( mce, args[i] );
6988 curr = gen_maxU32( mce, curr, here );
6989 }
6990 }
6991 return curr;
6992 }
6993 case Iex_Load: {
6994 Int dszB;
6995 dszB = sizeofIRType(e->Iex.Load.ty);
6996 /* assert that the B value for the address is already
6997 available (somewhere) */
6998 tl_assert(isIRAtom(e->Iex.Load.addr));
6999 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
7000 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
7001 }
7002 case Iex_ITE: {
7003 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
7004 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
7005 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
7006 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
7007 }
7008 case Iex_Qop: {
7009 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
7010 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
7011 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
7012 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
7013 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
7014 gen_maxU32( mce, b3, b4 ) );
7015 }
7016 case Iex_Triop: {
7017 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
7018 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
7019 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
7020 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
7021 }
7022 case Iex_Binop: {
7023 switch (e->Iex.Binop.op) {
7024 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
7025 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
7026 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
7027 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
7028 /* Just say these all produce a defined result,
7029 regardless of their arguments. See
7030 COMMENT_ON_CasCmpEQ in this file. */
7031 return mkU32(0);
7032 default: {
7033 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
7034 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
7035 return gen_maxU32( mce, b1, b2 );
7036 }
7037 }
7038 tl_assert(0);
7039 /*NOTREACHED*/
7040 }
7041 case Iex_Unop: {
7042 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
7043 return b1;
7044 }
7045 case Iex_Const:
7046 return mkU32(0);
7047 case Iex_RdTmp:
7048 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
7049 case Iex_Get: {
7050 Int b_offset = MC_(get_otrack_shadow_offset)(
7051 e->Iex.Get.offset,
7052 sizeofIRType(e->Iex.Get.ty)
7053 );
7054 tl_assert(b_offset >= -1
7055 && b_offset <= mce->layout->total_sizeB -4);
7056 if (b_offset >= 0) {
7057 /* FIXME: this isn't an atom! */
7058 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
7059 Ity_I32 );
7060 }
7061 return mkU32(0);
7062 }
7063 default:
7064 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
7065 ppIRExpr(e);
7066 VG_(tool_panic)("memcheck:schemeE");
7067 }
7068 }
7069
7070
do_origins_Dirty(MCEnv * mce,IRDirty * d)7071 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
7072 {
7073 // This is a hacked version of do_shadow_Dirty
7074 Int i, k, n, toDo, gSz, gOff;
7075 IRAtom *here, *curr;
7076 IRTemp dst;
7077
7078 /* First check the guard. */
7079 curr = schemeE( mce, d->guard );
7080
7081 /* Now round up all inputs and maxU32 over them. */
7082
7083 /* Inputs: unmasked args
7084 Note: arguments are evaluated REGARDLESS of the guard expression */
7085 for (i = 0; d->args[i]; i++) {
7086 IRAtom* arg = d->args[i];
7087 if ( (d->cee->mcx_mask & (1<<i))
7088 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
7089 /* ignore this arg */
7090 } else {
7091 here = schemeE( mce, arg );
7092 curr = gen_maxU32( mce, curr, here );
7093 }
7094 }
7095
7096 /* Inputs: guest state that we read. */
7097 for (i = 0; i < d->nFxState; i++) {
7098 tl_assert(d->fxState[i].fx != Ifx_None);
7099 if (d->fxState[i].fx == Ifx_Write)
7100 continue;
7101
7102 /* Enumerate the described state segments */
7103 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7104 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7105 gSz = d->fxState[i].size;
7106
7107 /* Ignore any sections marked as 'always defined'. */
7108 if (isAlwaysDefd(mce, gOff, gSz)) {
7109 if (0)
7110 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7111 gOff, gSz);
7112 continue;
7113 }
7114
7115 /* This state element is read or modified. So we need to
7116 consider it. If larger than 4 bytes, deal with it in
7117 4-byte chunks. */
7118 while (True) {
7119 Int b_offset;
7120 tl_assert(gSz >= 0);
7121 if (gSz == 0) break;
7122 n = gSz <= 4 ? gSz : 4;
7123 /* update 'curr' with maxU32 of the state slice
7124 gOff .. gOff+n-1 */
7125 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7126 if (b_offset != -1) {
7127 /* Observe the guard expression. If it is false use 0, i.e.
7128 nothing is known about the origin */
7129 IRAtom *cond, *iffalse, *iftrue;
7130
7131 cond = assignNew( 'B', mce, Ity_I1, d->guard);
7132 iffalse = mkU32(0);
7133 iftrue = assignNew( 'B', mce, Ity_I32,
7134 IRExpr_Get(b_offset
7135 + 2*mce->layout->total_sizeB,
7136 Ity_I32));
7137 here = assignNew( 'B', mce, Ity_I32,
7138 IRExpr_ITE(cond, iftrue, iffalse));
7139 curr = gen_maxU32( mce, curr, here );
7140 }
7141 gSz -= n;
7142 gOff += n;
7143 }
7144 }
7145 }
7146
7147 /* Inputs: memory */
7148
7149 if (d->mFx != Ifx_None) {
7150 /* Because we may do multiple shadow loads/stores from the same
7151 base address, it's best to do a single test of its
7152 definedness right now. Post-instrumentation optimisation
7153 should remove all but this test. */
7154 tl_assert(d->mAddr);
7155 here = schemeE( mce, d->mAddr );
7156 curr = gen_maxU32( mce, curr, here );
7157 }
7158
7159 /* Deal with memory inputs (reads or modifies) */
7160 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
7161 toDo = d->mSize;
7162 /* chew off 32-bit chunks. We don't care about the endianness
7163 since it's all going to be condensed down to a single bit,
7164 but nevertheless choose an endianness which is hopefully
7165 native to the platform. */
7166 while (toDo >= 4) {
7167 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
7168 d->guard );
7169 curr = gen_maxU32( mce, curr, here );
7170 toDo -= 4;
7171 }
7172 /* handle possible 16-bit excess */
7173 while (toDo >= 2) {
7174 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
7175 d->guard );
7176 curr = gen_maxU32( mce, curr, here );
7177 toDo -= 2;
7178 }
7179 /* chew off the remaining 8-bit chunk, if any */
7180 if (toDo == 1) {
7181 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
7182 d->guard );
7183 curr = gen_maxU32( mce, curr, here );
7184 toDo -= 1;
7185 }
7186 tl_assert(toDo == 0);
7187 }
7188
7189 /* Whew! So curr is a 32-bit B-value which should give an origin
7190 of some use if any of the inputs to the helper are undefined.
7191 Now we need to re-distribute the results to all destinations. */
7192
7193 /* Outputs: the destination temporary, if there is one. */
7194 if (d->tmp != IRTemp_INVALID) {
7195 dst = findShadowTmpB(mce, d->tmp);
7196 assign( 'V', mce, dst, curr );
7197 }
7198
7199 /* Outputs: guest state that we write or modify. */
7200 for (i = 0; i < d->nFxState; i++) {
7201 tl_assert(d->fxState[i].fx != Ifx_None);
7202 if (d->fxState[i].fx == Ifx_Read)
7203 continue;
7204
7205 /* Enumerate the described state segments */
7206 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7207 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7208 gSz = d->fxState[i].size;
7209
7210 /* Ignore any sections marked as 'always defined'. */
7211 if (isAlwaysDefd(mce, gOff, gSz))
7212 continue;
7213
7214 /* This state element is written or modified. So we need to
7215 consider it. If larger than 4 bytes, deal with it in
7216 4-byte chunks. */
7217 while (True) {
7218 Int b_offset;
7219 tl_assert(gSz >= 0);
7220 if (gSz == 0) break;
7221 n = gSz <= 4 ? gSz : 4;
7222 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7223 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7224 if (b_offset != -1) {
7225
7226 /* If the guard expression evaluates to false we simply Put
7227 the value that is already stored in the guest state slot */
7228 IRAtom *cond, *iffalse;
7229
7230 cond = assignNew('B', mce, Ity_I1,
7231 d->guard);
7232 iffalse = assignNew('B', mce, Ity_I32,
7233 IRExpr_Get(b_offset +
7234 2*mce->layout->total_sizeB,
7235 Ity_I32));
7236 curr = assignNew('V', mce, Ity_I32,
7237 IRExpr_ITE(cond, curr, iffalse));
7238
7239 stmt( 'B', mce, IRStmt_Put(b_offset
7240 + 2*mce->layout->total_sizeB,
7241 curr ));
7242 }
7243 gSz -= n;
7244 gOff += n;
7245 }
7246 }
7247 }
7248
7249 /* Outputs: memory that we write or modify. Same comments about
7250 endianness as above apply. */
7251 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7252 toDo = d->mSize;
7253 /* chew off 32-bit chunks */
7254 while (toDo >= 4) {
7255 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7256 d->guard );
7257 toDo -= 4;
7258 }
7259 /* handle possible 16-bit excess */
7260 while (toDo >= 2) {
7261 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7262 d->guard );
7263 toDo -= 2;
7264 }
7265 /* chew off the remaining 8-bit chunk, if any */
7266 if (toDo == 1) {
7267 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7268 d->guard );
7269 toDo -= 1;
7270 }
7271 tl_assert(toDo == 0);
7272 }
7273 }
7274
7275
7276 /* Generate IR for origin shadowing for a general guarded store. */
do_origins_Store_guarded(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData,IRExpr * guard)7277 static void do_origins_Store_guarded ( MCEnv* mce,
7278 IREndness stEnd,
7279 IRExpr* stAddr,
7280 IRExpr* stData,
7281 IRExpr* guard )
7282 {
7283 Int dszB;
7284 IRAtom* dataB;
7285 /* assert that the B value for the address is already available
7286 (somewhere), since the call to schemeE will want to see it.
7287 XXXX how does this actually ensure that?? */
7288 tl_assert(isIRAtom(stAddr));
7289 tl_assert(isIRAtom(stData));
7290 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7291 dataB = schemeE( mce, stData );
7292 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7293 }
7294
7295
7296 /* Generate IR for origin shadowing for a plain store. */
do_origins_Store_plain(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)7297 static void do_origins_Store_plain ( MCEnv* mce,
7298 IREndness stEnd,
7299 IRExpr* stAddr,
7300 IRExpr* stData )
7301 {
7302 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7303 NULL/*guard*/ );
7304 }
7305
7306
7307 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7308
do_origins_StoreG(MCEnv * mce,IRStoreG * sg)7309 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7310 {
7311 do_origins_Store_guarded( mce, sg->end, sg->addr,
7312 sg->data, sg->guard );
7313 }
7314
do_origins_LoadG(MCEnv * mce,IRLoadG * lg)7315 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7316 {
7317 IRType loadedTy = Ity_INVALID;
7318 switch (lg->cvt) {
7319 case ILGop_IdentV128: loadedTy = Ity_V128; break;
7320 case ILGop_Ident64: loadedTy = Ity_I64; break;
7321 case ILGop_Ident32: loadedTy = Ity_I32; break;
7322 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7323 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7324 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7325 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7326 default: VG_(tool_panic)("schemeS.IRLoadG");
7327 }
7328 IRAtom* ori_alt
7329 = schemeE( mce,lg->alt );
7330 IRAtom* ori_final
7331 = expr2ori_Load_guarded_General(mce, loadedTy,
7332 lg->addr, 0/*addr bias*/,
7333 lg->guard, ori_alt );
7334 /* And finally, bind the origin to the destination temporary. */
7335 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7336 }
7337
7338
schemeS(MCEnv * mce,IRStmt * st)7339 static void schemeS ( MCEnv* mce, IRStmt* st )
7340 {
7341 tl_assert(MC_(clo_mc_level) == 3);
7342
7343 switch (st->tag) {
7344
7345 case Ist_AbiHint:
7346 /* The value-check instrumenter handles this - by arranging
7347 to pass the address of the next instruction to
7348 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7349 happen for origin tracking w.r.t. AbiHints. So there is
7350 nothing to do here. */
7351 break;
7352
7353 case Ist_PutI: {
7354 IRPutI *puti = st->Ist.PutI.details;
7355 IRRegArray* descr_b;
7356 IRAtom *t1, *t2, *t3, *t4;
7357 IRRegArray* descr = puti->descr;
7358 IRType equivIntTy
7359 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7360 /* If this array is unshadowable for whatever reason,
7361 generate no code. */
7362 if (equivIntTy == Ity_INVALID)
7363 break;
7364 tl_assert(sizeofIRType(equivIntTy) >= 4);
7365 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7366 descr_b
7367 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7368 equivIntTy, descr->nElems );
7369 /* Compute a value to Put - the conjoinment of the origin for
7370 the data to be Put-ted (obviously) and of the index value
7371 (not so obviously). */
7372 t1 = schemeE( mce, puti->data );
7373 t2 = schemeE( mce, puti->ix );
7374 t3 = gen_maxU32( mce, t1, t2 );
7375 t4 = zWidenFrom32( mce, equivIntTy, t3 );
7376 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7377 puti->bias, t4) ));
7378 break;
7379 }
7380
7381 case Ist_Dirty:
7382 do_origins_Dirty( mce, st->Ist.Dirty.details );
7383 break;
7384
7385 case Ist_Store:
7386 do_origins_Store_plain( mce, st->Ist.Store.end,
7387 st->Ist.Store.addr,
7388 st->Ist.Store.data );
7389 break;
7390
7391 case Ist_StoreG:
7392 do_origins_StoreG( mce, st->Ist.StoreG.details );
7393 break;
7394
7395 case Ist_LoadG:
7396 do_origins_LoadG( mce, st->Ist.LoadG.details );
7397 break;
7398
7399 case Ist_LLSC: {
7400 /* In short: treat a load-linked like a normal load followed
7401 by an assignment of the loaded (shadow) data the result
7402 temporary. Treat a store-conditional like a normal store,
7403 and mark the result temporary as defined. */
7404 if (st->Ist.LLSC.storedata == NULL) {
7405 /* Load Linked */
7406 IRType resTy
7407 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7408 IRExpr* vanillaLoad
7409 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7410 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7411 || resTy == Ity_I16 || resTy == Ity_I8);
7412 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7413 schemeE(mce, vanillaLoad));
7414 } else {
7415 /* Store conditional */
7416 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7417 st->Ist.LLSC.addr,
7418 st->Ist.LLSC.storedata );
7419 /* For the rationale behind this, see comments at the
7420 place where the V-shadow for .result is constructed, in
7421 do_shadow_LLSC. In short, we regard .result as
7422 always-defined. */
7423 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7424 mkU32(0) );
7425 }
7426 break;
7427 }
7428
7429 case Ist_Put: {
7430 Int b_offset
7431 = MC_(get_otrack_shadow_offset)(
7432 st->Ist.Put.offset,
7433 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7434 );
7435 if (b_offset >= 0) {
7436 /* FIXME: this isn't an atom! */
7437 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7438 schemeE( mce, st->Ist.Put.data )) );
7439 }
7440 break;
7441 }
7442
7443 case Ist_WrTmp:
7444 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7445 schemeE(mce, st->Ist.WrTmp.data) );
7446 break;
7447
7448 case Ist_MBE:
7449 case Ist_NoOp:
7450 case Ist_Exit:
7451 case Ist_IMark:
7452 break;
7453
7454 default:
7455 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7456 ppIRStmt(st);
7457 VG_(tool_panic)("memcheck:schemeS");
7458 }
7459 }
7460
7461
7462 /*--------------------------------------------------------------------*/
7463 /*--- end mc_translate.c ---*/
7464 /*--------------------------------------------------------------------*/
7465