1 
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations.         ---*/
4 /*---                                               mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of MemCheck, a heavyweight Valgrind tool for
9    detecting memory errors.
10 
11    Copyright (C) 2000-2015 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h"     // For mc_include.h
34 #include "pub_tool_hashtable.h"     // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
42 
43 #include "mc_include.h"
44 
45 
46 /* FIXMEs JRS 2011-June-16.
47 
48    Check the interpretation for vector narrowing and widening ops,
49    particularly the saturating ones.  I suspect they are either overly
50    pessimistic and/or wrong.
51 
52    Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53    saturating shifts): the interpretation is overly pessimistic.
54    See comments on the relevant cases below for details.
55 
56    Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57    both rounding and non-rounding variants): ditto
58 */
59 
60 /* This file implements the Memcheck instrumentation, and in
61    particular contains the core of its undefined value detection
62    machinery.  For a comprehensive background of the terminology,
63    algorithms and rationale used herein, read:
64 
65      Using Valgrind to detect undefined value errors with
66      bit-precision
67 
68      Julian Seward and Nicholas Nethercote
69 
70      2005 USENIX Annual Technical Conference (General Track),
71      Anaheim, CA, USA, April 10-15, 2005.
72 
73    ----
74 
75    Here is as good a place as any to record exactly when V bits are and
76    should be checked, why, and what function is responsible.
77 
78 
79    Memcheck complains when an undefined value is used:
80 
81    1. In the condition of a conditional branch.  Because it could cause
82       incorrect control flow, and thus cause incorrect externally-visible
83       behaviour.  [mc_translate.c:complainIfUndefined]
84 
85    2. As an argument to a system call, or as the value that specifies
86       the system call number.  Because it could cause an incorrect
87       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
88 
89    3. As the address in a load or store.  Because it could cause an
90       incorrect value to be used later, which could cause externally-visible
91       behaviour (eg. via incorrect control flow or an incorrect system call
92       argument)  [complainIfUndefined]
93 
94    4. As the target address of a branch.  Because it could cause incorrect
95       control flow.  [complainIfUndefined]
96 
97    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
98       an incorrect value into the external environment.
99       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
100 
101    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
102       [complainIfUndefined]
103 
104    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
106       requested it.  [in memcheck.h]
107 
108 
109    Memcheck also complains, but should not, when an undefined value is used:
110 
111    8. As the shift value in certain SIMD shift operations (but not in the
112       standard integer shift operations).  This inconsistency is due to
113       historical reasons.)  [complainIfUndefined]
114 
115 
116    Memcheck does not complain, but should, when an undefined value is used:
117 
118    9. As an input to a client request.  Because the client request may
119       affect the visible behaviour -- see bug #144362 for an example
120       involving the malloc replacements in vg_replace_malloc.c and
121       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122       isn't identified.  That bug report also has some info on how to solve
123       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124 
125 
126    In practice, 1 and 2 account for the vast majority of cases.
127 */
128 
129 /* Generation of addr-definedness, addr-validity and
130    guard-definedness checks pertaining to loads and stores (Iex_Load,
131    Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132    loads/stores) was re-checked 11 May 2013. */
133 
134 /*------------------------------------------------------------*/
135 /*--- Forward decls                                        ---*/
136 /*------------------------------------------------------------*/
137 
138 struct _MCEnv;
139 
140 static IRType  shadowTypeV ( IRType ty );
141 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
142 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
143 
144 static IRExpr *i128_const_zero(void);
145 
146 /*------------------------------------------------------------*/
147 /*--- Memcheck running state, and tmp management.          ---*/
148 /*------------------------------------------------------------*/
149 
150 /* Carries info about a particular tmp.  The tmp's number is not
151    recorded, as this is implied by (equal to) its index in the tmpMap
152    in MCEnv.  The tmp's type is also not recorded, as this is present
153    in MCEnv.sb->tyenv.
154 
155    When .kind is Orig, .shadowV and .shadowB may give the identities
156    of the temps currently holding the associated definedness (shadowV)
157    and origin (shadowB) values, or these may be IRTemp_INVALID if code
158    to compute such values has not yet been emitted.
159 
160    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
161    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
162    illogical for a shadow tmp itself to be shadowed.
163 */
164 typedef
165    enum { Orig=1, VSh=2, BSh=3 }
166    TempKind;
167 
168 typedef
169    struct {
170       TempKind kind;
171       IRTemp   shadowV;
172       IRTemp   shadowB;
173    }
174    TempMapEnt;
175 
176 
177 /* Carries around state during memcheck instrumentation. */
178 typedef
179    struct _MCEnv {
180       /* MODIFIED: the superblock being constructed.  IRStmts are
181          added. */
182       IRSB* sb;
183       Bool  trace;
184 
185       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
186          current kind and possibly shadow temps for each temp in the
187          IRSB being constructed.  Note that it does not contain the
188          type of each tmp.  If you want to know the type, look at the
189          relevant entry in sb->tyenv.  It follows that at all times
190          during the instrumentation process, the valid indices for
191          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
192          total number of Orig, V- and B- temps allocated so far.
193 
194          The reason for this strange split (types in one place, all
195          other info in another) is that we need the types to be
196          attached to sb so as to make it possible to do
197          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
198          instrumentation process. */
199       XArray* /* of TempMapEnt */ tmpMap;
200 
201       /* MODIFIED: indicates whether "bogus" literals have so far been
202          found.  Starts off False, and may change to True. */
203       Bool bogusLiterals;
204 
205       /* READONLY: indicates whether we should use expensive
206          interpretations of integer adds, since unfortunately LLVM
207          uses them to do ORs in some circumstances.  Defaulted to True
208          on MacOS and False everywhere else. */
209       Bool useLLVMworkarounds;
210 
211       /* READONLY: the guest layout.  This indicates which parts of
212          the guest state should be regarded as 'always defined'. */
213       const VexGuestLayout* layout;
214 
215       /* READONLY: the host word type.  Needed for constructing
216          arguments of type 'HWord' to be passed to helper functions.
217          Ity_I32 or Ity_I64 only. */
218       IRType hWordTy;
219    }
220    MCEnv;
221 
222 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
223    demand), as they are encountered.  This is for two reasons.
224 
225    (1) (less important reason): Many original tmps are unused due to
226    initial IR optimisation, and we do not want to spaces in tables
227    tracking them.
228 
229    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
230    table indexed [0 .. n_types-1], which gives the current shadow for
231    each original tmp, or INVALID_IRTEMP if none is so far assigned.
232    It is necessary to support making multiple assignments to a shadow
233    -- specifically, after testing a shadow for definedness, it needs
234    to be made defined.  But IR's SSA property disallows this.
235 
236    (2) (more important reason): Therefore, when a shadow needs to get
237    a new value, a new temporary is created, the value is assigned to
238    that, and the tmpMap is updated to reflect the new binding.
239 
240    A corollary is that if the tmpMap maps a given tmp to
241    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
242    there's a read-before-write error in the original tmps.  The IR
243    sanity checker should catch all such anomalies, however.
244 */
245 
246 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
247    both the table in mce->sb and to our auxiliary mapping.  Note that
248    newTemp may cause mce->tmpMap to resize, hence previous results
249    from VG_(indexXA)(mce->tmpMap) are invalidated. */
newTemp(MCEnv * mce,IRType ty,TempKind kind)250 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
251 {
252    Word       newIx;
253    TempMapEnt ent;
254    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
255    ent.kind    = kind;
256    ent.shadowV = IRTemp_INVALID;
257    ent.shadowB = IRTemp_INVALID;
258    newIx = VG_(addToXA)( mce->tmpMap, &ent );
259    tl_assert(newIx == (Word)tmp);
260    return tmp;
261 }
262 
263 
264 /* Find the tmp currently shadowing the given original tmp.  If none
265    so far exists, allocate one.  */
findShadowTmpV(MCEnv * mce,IRTemp orig)266 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
267 {
268    TempMapEnt* ent;
269    /* VG_(indexXA) range-checks 'orig', hence no need to check
270       here. */
271    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272    tl_assert(ent->kind == Orig);
273    if (ent->shadowV == IRTemp_INVALID) {
274       IRTemp tmpV
275         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
276       /* newTemp may cause mce->tmpMap to resize, hence previous results
277          from VG_(indexXA) are invalid. */
278       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
279       tl_assert(ent->kind == Orig);
280       tl_assert(ent->shadowV == IRTemp_INVALID);
281       ent->shadowV = tmpV;
282    }
283    return ent->shadowV;
284 }
285 
286 /* Allocate a new shadow for the given original tmp.  This means any
287    previous shadow is abandoned.  This is needed because it is
288    necessary to give a new value to a shadow once it has been tested
289    for undefinedness, but unfortunately IR's SSA property disallows
290    this.  Instead we must abandon the old shadow, allocate a new one
291    and use that instead.
292 
293    This is the same as findShadowTmpV, except we don't bother to see
294    if a shadow temp already existed -- we simply allocate a new one
295    regardless. */
newShadowTmpV(MCEnv * mce,IRTemp orig)296 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
297 {
298    TempMapEnt* ent;
299    /* VG_(indexXA) range-checks 'orig', hence no need to check
300       here. */
301    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302    tl_assert(ent->kind == Orig);
303    if (1) {
304       IRTemp tmpV
305         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
306       /* newTemp may cause mce->tmpMap to resize, hence previous results
307          from VG_(indexXA) are invalid. */
308       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
309       tl_assert(ent->kind == Orig);
310       ent->shadowV = tmpV;
311    }
312 }
313 
314 
315 /*------------------------------------------------------------*/
316 /*--- IRAtoms -- a subset of IRExprs                       ---*/
317 /*------------------------------------------------------------*/
318 
319 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
320    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
321    input, most of this code deals in atoms.  Usefully, a value atom
322    always has a V-value which is also an atom: constants are shadowed
323    by constants, and temps are shadowed by the corresponding shadow
324    temporary. */
325 
326 typedef  IRExpr  IRAtom;
327 
328 /* (used for sanity checks only): is this an atom which looks
329    like it's from original code? */
isOriginalAtom(MCEnv * mce,IRAtom * a1)330 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
331 {
332    if (a1->tag == Iex_Const)
333       return True;
334    if (a1->tag == Iex_RdTmp) {
335       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
336       return ent->kind == Orig;
337    }
338    return False;
339 }
340 
341 /* (used for sanity checks only): is this an atom which looks
342    like it's from shadow code? */
isShadowAtom(MCEnv * mce,IRAtom * a1)343 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
344 {
345    if (a1->tag == Iex_Const)
346       return True;
347    if (a1->tag == Iex_RdTmp) {
348       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
349       return ent->kind == VSh || ent->kind == BSh;
350    }
351    return False;
352 }
353 
354 /* (used for sanity checks only): check that both args are atoms and
355    are identically-kinded. */
sameKindedAtoms(IRAtom * a1,IRAtom * a2)356 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
357 {
358    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
359       return True;
360    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
361       return True;
362    return False;
363 }
364 
365 
366 /*------------------------------------------------------------*/
367 /*--- Type management                                      ---*/
368 /*------------------------------------------------------------*/
369 
370 /* Shadow state is always accessed using integer types.  This returns
371    an integer type with the same size (as per sizeofIRType) as the
372    given type.  The only valid shadow types are Bit, I8, I16, I32,
373    I64, I128, V128, V256. */
374 
shadowTypeV(IRType ty)375 static IRType shadowTypeV ( IRType ty )
376 {
377    switch (ty) {
378       case Ity_I1:
379       case Ity_I8:
380       case Ity_I16:
381       case Ity_I32:
382       case Ity_I64:
383       case Ity_I128: return ty;
384       case Ity_F16:  return Ity_I16;
385       case Ity_F32:  return Ity_I32;
386       case Ity_D32:  return Ity_I32;
387       case Ity_F64:  return Ity_I64;
388       case Ity_D64:  return Ity_I64;
389       case Ity_F128: return Ity_I128;
390       case Ity_D128: return Ity_I128;
391       case Ity_V128: return Ity_V128;
392       case Ity_V256: return Ity_V256;
393       default: ppIRType(ty);
394                VG_(tool_panic)("memcheck:shadowTypeV");
395    }
396 }
397 
398 /* Produce a 'defined' value of the given shadow type.  Should only be
399    supplied shadow types (Bit/I8/I16/I32/UI64). */
definedOfType(IRType ty)400 static IRExpr* definedOfType ( IRType ty ) {
401    switch (ty) {
402       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
403       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
404       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
405       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
406       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
407       case Ity_I128: return i128_const_zero();
408       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
409       case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
410       default:       VG_(tool_panic)("memcheck:definedOfType");
411    }
412 }
413 
414 
415 /*------------------------------------------------------------*/
416 /*--- Constructing IR fragments                            ---*/
417 /*------------------------------------------------------------*/
418 
419 /* add stmt to a bb */
stmt(HChar cat,MCEnv * mce,IRStmt * st)420 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
421    if (mce->trace) {
422       VG_(printf)("  %c: ", cat);
423       ppIRStmt(st);
424       VG_(printf)("\n");
425    }
426    addStmtToIRSB(mce->sb, st);
427 }
428 
429 /* assign value to tmp */
430 static inline
assign(HChar cat,MCEnv * mce,IRTemp tmp,IRExpr * expr)431 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
432    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
433 }
434 
435 /* build various kinds of expressions */
436 #define triop(_op, _arg1, _arg2, _arg3) \
437                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
438 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
439 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
440 #define mkU1(_n)                 IRExpr_Const(IRConst_U1(_n))
441 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
442 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
443 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
444 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
445 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
446 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
447 
448 /* Bind the given expression to a new temporary, and return the
449    temporary.  This effectively converts an arbitrary expression into
450    an atom.
451 
452    'ty' is the type of 'e' and hence the type that the new temporary
453    needs to be.  But passing it in is redundant, since we can deduce
454    the type merely by inspecting 'e'.  So at least use that fact to
455    assert that the two types agree. */
assignNew(HChar cat,MCEnv * mce,IRType ty,IRExpr * e)456 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
457 {
458    TempKind k;
459    IRTemp   t;
460    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
461 
462    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
463    switch (cat) {
464       case 'V': k = VSh;  break;
465       case 'B': k = BSh;  break;
466       case 'C': k = Orig; break;
467                 /* happens when we are making up new "orig"
468                    expressions, for IRCAS handling */
469       default: tl_assert(0);
470    }
471    t = newTemp(mce, ty, k);
472    assign(cat, mce, t, e);
473    return mkexpr(t);
474 }
475 
476 
477 /*------------------------------------------------------------*/
478 /*--- Helper functions for 128-bit ops                     ---*/
479 /*------------------------------------------------------------*/
480 
i128_const_zero(void)481 static IRExpr *i128_const_zero(void)
482 {
483    IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
484    return binop(Iop_64HLto128, z64, z64);
485 }
486 
487 /* There are no I128-bit loads and/or stores [as generated by any
488    current front ends].  So we do not need to worry about that in
489    expr2vbits_Load */
490 
491 
492 /*------------------------------------------------------------*/
493 /*--- Constructing definedness primitive ops               ---*/
494 /*------------------------------------------------------------*/
495 
496 /* --------- Defined-if-either-defined --------- */
497 
mkDifD8(MCEnv * mce,IRAtom * a1,IRAtom * a2)498 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
499    tl_assert(isShadowAtom(mce,a1));
500    tl_assert(isShadowAtom(mce,a2));
501    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
502 }
503 
mkDifD16(MCEnv * mce,IRAtom * a1,IRAtom * a2)504 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
505    tl_assert(isShadowAtom(mce,a1));
506    tl_assert(isShadowAtom(mce,a2));
507    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
508 }
509 
mkDifD32(MCEnv * mce,IRAtom * a1,IRAtom * a2)510 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
511    tl_assert(isShadowAtom(mce,a1));
512    tl_assert(isShadowAtom(mce,a2));
513    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
514 }
515 
mkDifD64(MCEnv * mce,IRAtom * a1,IRAtom * a2)516 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
517    tl_assert(isShadowAtom(mce,a1));
518    tl_assert(isShadowAtom(mce,a2));
519    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
520 }
521 
mkDifDV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)522 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
523    tl_assert(isShadowAtom(mce,a1));
524    tl_assert(isShadowAtom(mce,a2));
525    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
526 }
527 
mkDifDV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)528 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
529    tl_assert(isShadowAtom(mce,a1));
530    tl_assert(isShadowAtom(mce,a2));
531    return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
532 }
533 
534 /* --------- Undefined-if-either-undefined --------- */
535 
mkUifU8(MCEnv * mce,IRAtom * a1,IRAtom * a2)536 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
537    tl_assert(isShadowAtom(mce,a1));
538    tl_assert(isShadowAtom(mce,a2));
539    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
540 }
541 
mkUifU16(MCEnv * mce,IRAtom * a1,IRAtom * a2)542 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
543    tl_assert(isShadowAtom(mce,a1));
544    tl_assert(isShadowAtom(mce,a2));
545    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
546 }
547 
mkUifU32(MCEnv * mce,IRAtom * a1,IRAtom * a2)548 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
549    tl_assert(isShadowAtom(mce,a1));
550    tl_assert(isShadowAtom(mce,a2));
551    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
552 }
553 
mkUifU64(MCEnv * mce,IRAtom * a1,IRAtom * a2)554 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
555    tl_assert(isShadowAtom(mce,a1));
556    tl_assert(isShadowAtom(mce,a2));
557    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
558 }
559 
mkUifU128(MCEnv * mce,IRAtom * a1,IRAtom * a2)560 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
561    IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
562    tl_assert(isShadowAtom(mce,a1));
563    tl_assert(isShadowAtom(mce,a2));
564    tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
565    tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
566    tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
567    tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
568    tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
569    tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
570 
571    return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
572 }
573 
mkUifUV128(MCEnv * mce,IRAtom * a1,IRAtom * a2)574 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
575    tl_assert(isShadowAtom(mce,a1));
576    tl_assert(isShadowAtom(mce,a2));
577    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
578 }
579 
mkUifUV256(MCEnv * mce,IRAtom * a1,IRAtom * a2)580 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
581    tl_assert(isShadowAtom(mce,a1));
582    tl_assert(isShadowAtom(mce,a2));
583    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
584 }
585 
mkUifU(MCEnv * mce,IRType vty,IRAtom * a1,IRAtom * a2)586 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
587    switch (vty) {
588       case Ity_I8:   return mkUifU8(mce, a1, a2);
589       case Ity_I16:  return mkUifU16(mce, a1, a2);
590       case Ity_I32:  return mkUifU32(mce, a1, a2);
591       case Ity_I64:  return mkUifU64(mce, a1, a2);
592       case Ity_I128: return mkUifU128(mce, a1, a2);
593       case Ity_V128: return mkUifUV128(mce, a1, a2);
594       case Ity_V256: return mkUifUV256(mce, a1, a2);
595       default:
596          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
597          VG_(tool_panic)("memcheck:mkUifU");
598    }
599 }
600 
601 /* --------- The Left-family of operations. --------- */
602 
mkLeft8(MCEnv * mce,IRAtom * a1)603 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
604    tl_assert(isShadowAtom(mce,a1));
605    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
606 }
607 
mkLeft16(MCEnv * mce,IRAtom * a1)608 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
609    tl_assert(isShadowAtom(mce,a1));
610    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
611 }
612 
mkLeft32(MCEnv * mce,IRAtom * a1)613 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
614    tl_assert(isShadowAtom(mce,a1));
615    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
616 }
617 
mkLeft64(MCEnv * mce,IRAtom * a1)618 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
619    tl_assert(isShadowAtom(mce,a1));
620    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
621 }
622 
623 /* --------- 'Improvement' functions for AND/OR. --------- */
624 
625 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
626    defined (0); all other -> undefined (1).
627 */
mkImproveAND8(MCEnv * mce,IRAtom * data,IRAtom * vbits)628 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629 {
630    tl_assert(isOriginalAtom(mce, data));
631    tl_assert(isShadowAtom(mce, vbits));
632    tl_assert(sameKindedAtoms(data, vbits));
633    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
634 }
635 
mkImproveAND16(MCEnv * mce,IRAtom * data,IRAtom * vbits)636 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637 {
638    tl_assert(isOriginalAtom(mce, data));
639    tl_assert(isShadowAtom(mce, vbits));
640    tl_assert(sameKindedAtoms(data, vbits));
641    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
642 }
643 
mkImproveAND32(MCEnv * mce,IRAtom * data,IRAtom * vbits)644 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645 {
646    tl_assert(isOriginalAtom(mce, data));
647    tl_assert(isShadowAtom(mce, vbits));
648    tl_assert(sameKindedAtoms(data, vbits));
649    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
650 }
651 
mkImproveAND64(MCEnv * mce,IRAtom * data,IRAtom * vbits)652 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653 {
654    tl_assert(isOriginalAtom(mce, data));
655    tl_assert(isShadowAtom(mce, vbits));
656    tl_assert(sameKindedAtoms(data, vbits));
657    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
658 }
659 
mkImproveANDV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)660 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
661 {
662    tl_assert(isOriginalAtom(mce, data));
663    tl_assert(isShadowAtom(mce, vbits));
664    tl_assert(sameKindedAtoms(data, vbits));
665    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
666 }
667 
mkImproveANDV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)668 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
669 {
670    tl_assert(isOriginalAtom(mce, data));
671    tl_assert(isShadowAtom(mce, vbits));
672    tl_assert(sameKindedAtoms(data, vbits));
673    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
674 }
675 
676 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
677    defined (0); all other -> undefined (1).
678 */
mkImproveOR8(MCEnv * mce,IRAtom * data,IRAtom * vbits)679 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
680 {
681    tl_assert(isOriginalAtom(mce, data));
682    tl_assert(isShadowAtom(mce, vbits));
683    tl_assert(sameKindedAtoms(data, vbits));
684    return assignNew(
685              'V', mce, Ity_I8,
686              binop(Iop_Or8,
687                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
688                    vbits) );
689 }
690 
mkImproveOR16(MCEnv * mce,IRAtom * data,IRAtom * vbits)691 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
692 {
693    tl_assert(isOriginalAtom(mce, data));
694    tl_assert(isShadowAtom(mce, vbits));
695    tl_assert(sameKindedAtoms(data, vbits));
696    return assignNew(
697              'V', mce, Ity_I16,
698              binop(Iop_Or16,
699                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
700                    vbits) );
701 }
702 
mkImproveOR32(MCEnv * mce,IRAtom * data,IRAtom * vbits)703 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
704 {
705    tl_assert(isOriginalAtom(mce, data));
706    tl_assert(isShadowAtom(mce, vbits));
707    tl_assert(sameKindedAtoms(data, vbits));
708    return assignNew(
709              'V', mce, Ity_I32,
710              binop(Iop_Or32,
711                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
712                    vbits) );
713 }
714 
mkImproveOR64(MCEnv * mce,IRAtom * data,IRAtom * vbits)715 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
716 {
717    tl_assert(isOriginalAtom(mce, data));
718    tl_assert(isShadowAtom(mce, vbits));
719    tl_assert(sameKindedAtoms(data, vbits));
720    return assignNew(
721              'V', mce, Ity_I64,
722              binop(Iop_Or64,
723                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
724                    vbits) );
725 }
726 
mkImproveORV128(MCEnv * mce,IRAtom * data,IRAtom * vbits)727 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
728 {
729    tl_assert(isOriginalAtom(mce, data));
730    tl_assert(isShadowAtom(mce, vbits));
731    tl_assert(sameKindedAtoms(data, vbits));
732    return assignNew(
733              'V', mce, Ity_V128,
734              binop(Iop_OrV128,
735                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
736                    vbits) );
737 }
738 
mkImproveORV256(MCEnv * mce,IRAtom * data,IRAtom * vbits)739 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
740 {
741    tl_assert(isOriginalAtom(mce, data));
742    tl_assert(isShadowAtom(mce, vbits));
743    tl_assert(sameKindedAtoms(data, vbits));
744    return assignNew(
745              'V', mce, Ity_V256,
746              binop(Iop_OrV256,
747                    assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
748                    vbits) );
749 }
750 
751 /* --------- Pessimising casts. --------- */
752 
753 /* The function returns an expression of type DST_TY. If any of the VBITS
754    is undefined (value == 1) the resulting expression has all bits set to
755    1. Otherwise, all bits are 0. */
756 
mkPCastTo(MCEnv * mce,IRType dst_ty,IRAtom * vbits)757 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
758 {
759    IRType  src_ty;
760    IRAtom* tmp1;
761 
762    /* Note, dst_ty is a shadow type, not an original type. */
763    tl_assert(isShadowAtom(mce,vbits));
764    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
765 
766    /* Fast-track some common cases */
767    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
768       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
769 
770    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
771       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
772 
773    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
774       /* PCast the arg, then clone it. */
775       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
776       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
777    }
778 
779    if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
780       /* PCast the arg, then clone it 4 times. */
781       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
782       tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
783       return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
784    }
785 
786    if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
787       /* PCast the arg, then clone it 8 times. */
788       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
789       tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
790       tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
791       return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
792    }
793 
794    if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
795       /* PCast the arg.  This gives all 0s or all 1s.  Then throw away
796          the top half. */
797       IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
798       return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
799    }
800 
801    if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
802       /* Use InterleaveHI64x2 to copy the top half of the vector into
803          the bottom half.  Then we can UifU it with the original, throw
804          away the upper half of the result, and PCast-I64-to-I64
805          the lower half. */
806       // Generates vbits[127:64] : vbits[127:64]
807       IRAtom* hi64hi64
808          = assignNew('V', mce, Ity_V128,
809                      binop(Iop_InterleaveHI64x2, vbits, vbits));
810       // Generates
811       //   UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
812       //   == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
813       IRAtom* lohi64
814          = mkUifUV128(mce, hi64hi64, vbits);
815       // Generates UifU(vbits[127:64],vbits[63:0])
816       IRAtom* lo64
817          = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
818       // Generates
819       //   PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
820       //   == PCast-to-I64( vbits[127:0] )
821       IRAtom* res
822          = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
823       return res;
824    }
825 
826    /* Else do it the slow way .. */
827    /* First of all, collapse vbits down to a single bit. */
828    tmp1   = NULL;
829    switch (src_ty) {
830       case Ity_I1:
831          tmp1 = vbits;
832          break;
833       case Ity_I8:
834          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
835          break;
836       case Ity_I16:
837          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
838          break;
839       case Ity_I32:
840          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
841          break;
842       case Ity_I64:
843          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
844          break;
845       case Ity_I128: {
846          /* Gah.  Chop it in half, OR the halves together, and compare
847             that with zero. */
848          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
849          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
850          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
851          tmp1         = assignNew('V', mce, Ity_I1,
852                                        unop(Iop_CmpNEZ64, tmp4));
853          break;
854       }
855       default:
856          ppIRType(src_ty);
857          VG_(tool_panic)("mkPCastTo(1)");
858    }
859    tl_assert(tmp1);
860    /* Now widen up to the dst type. */
861    switch (dst_ty) {
862       case Ity_I1:
863          return tmp1;
864       case Ity_I8:
865          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
866       case Ity_I16:
867          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
868       case Ity_I32:
869          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
870       case Ity_I64:
871          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
872       case Ity_V128:
873          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
874          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
875          return tmp1;
876       case Ity_I128:
877          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
878          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
879          return tmp1;
880       case Ity_V256:
881          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
882          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
883                                                     tmp1, tmp1));
884          tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
885                                                     tmp1, tmp1));
886          return tmp1;
887       default:
888          ppIRType(dst_ty);
889          VG_(tool_panic)("mkPCastTo(2)");
890    }
891 }
892 
893 /* This is a minor variant.  It takes an arg of some type and returns
894    a value of the same type.  The result consists entirely of Defined
895    (zero) bits except its least significant bit, which is a PCast of
896    the entire argument down to a single bit. */
mkPCastXXtoXXlsb(MCEnv * mce,IRAtom * varg,IRType ty)897 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
898 {
899    if (ty == Ity_V128) {
900       /* --- Case for V128 --- */
901       IRAtom* varg128 = varg;
902       // generates: PCast-to-I64(varg128)
903       IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
904       // Now introduce zeros (defined bits) in the top 63 places
905       // generates: Def--(63)--Def PCast-to-I1(varg128)
906       IRAtom* d63pc
907          = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
908       // generates: Def--(64)--Def
909       IRAtom* d64
910          = definedOfType(Ity_I64);
911       // generates: Def--(127)--Def PCast-to-I1(varg128)
912       IRAtom* res
913          = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
914       return res;
915    }
916    if (ty == Ity_I64) {
917       /* --- Case for I64 --- */
918       // PCast to 64
919       IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
920       // Zero (Def) out the top 63 bits
921       IRAtom* res
922          = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
923       return res;
924    }
925    /*NOTREACHED*/
926    tl_assert(0);
927 }
928 
929 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
930 /*
931    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
932    PCasting to Ity_U1.  However, sometimes it is necessary to be more
933    accurate.  The insight is that the result is defined if two
934    corresponding bits can be found, one from each argument, so that
935    both bits are defined but are different -- that makes EQ say "No"
936    and NE say "Yes".  Hence, we compute an improvement term and DifD
937    it onto the "normal" (UifU) result.
938 
939    The result is:
940 
941    PCastTo<1> (
942       -- naive version
943       PCastTo<sz>( UifU<sz>(vxx, vyy) )
944 
945       `DifD<sz>`
946 
947       -- improvement term
948       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
949    )
950 
951    where
952      vec contains 0 (defined) bits where the corresponding arg bits
953      are defined but different, and 1 bits otherwise.
954 
955      vec = Or<sz>( vxx,   // 0 iff bit defined
956                    vyy,   // 0 iff bit defined
957                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
958                  )
959 
960      If any bit of vec is 0, the result is defined and so the
961      improvement term should produce 0...0, else it should produce
962      1...1.
963 
964      Hence require for the improvement term:
965 
966         if vec == 1...1 then 1...1 else 0...0
967      ->
968         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
969 
970    This was extensively re-analysed and checked on 6 July 05.
971 */
expensiveCmpEQorNE(MCEnv * mce,IRType ty,IRAtom * vxx,IRAtom * vyy,IRAtom * xx,IRAtom * yy)972 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
973                                     IRType  ty,
974                                     IRAtom* vxx, IRAtom* vyy,
975                                     IRAtom* xx,  IRAtom* yy )
976 {
977    IRAtom *naive, *vec, *improvement_term;
978    IRAtom *improved, *final_cast, *top;
979    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
980 
981    tl_assert(isShadowAtom(mce,vxx));
982    tl_assert(isShadowAtom(mce,vyy));
983    tl_assert(isOriginalAtom(mce,xx));
984    tl_assert(isOriginalAtom(mce,yy));
985    tl_assert(sameKindedAtoms(vxx,xx));
986    tl_assert(sameKindedAtoms(vyy,yy));
987 
988    switch (ty) {
989       case Ity_I16:
990          opOR   = Iop_Or16;
991          opDIFD = Iop_And16;
992          opUIFU = Iop_Or16;
993          opNOT  = Iop_Not16;
994          opXOR  = Iop_Xor16;
995          opCMP  = Iop_CmpEQ16;
996          top    = mkU16(0xFFFF);
997          break;
998       case Ity_I32:
999          opOR   = Iop_Or32;
1000          opDIFD = Iop_And32;
1001          opUIFU = Iop_Or32;
1002          opNOT  = Iop_Not32;
1003          opXOR  = Iop_Xor32;
1004          opCMP  = Iop_CmpEQ32;
1005          top    = mkU32(0xFFFFFFFF);
1006          break;
1007       case Ity_I64:
1008          opOR   = Iop_Or64;
1009          opDIFD = Iop_And64;
1010          opUIFU = Iop_Or64;
1011          opNOT  = Iop_Not64;
1012          opXOR  = Iop_Xor64;
1013          opCMP  = Iop_CmpEQ64;
1014          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
1015          break;
1016       default:
1017          VG_(tool_panic)("expensiveCmpEQorNE");
1018    }
1019 
1020    naive
1021       = mkPCastTo(mce,ty,
1022                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
1023 
1024    vec
1025       = assignNew(
1026            'V', mce,ty,
1027            binop( opOR,
1028                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
1029                   assignNew(
1030                      'V', mce,ty,
1031                      unop( opNOT,
1032                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1033 
1034    improvement_term
1035       = mkPCastTo( mce,ty,
1036                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
1037 
1038    improved
1039       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
1040 
1041    final_cast
1042       = mkPCastTo( mce, Ity_I1, improved );
1043 
1044    return final_cast;
1045 }
1046 
1047 
1048 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1049 
1050 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1051 
1052       CmpORD32S(x,y) = 1<<3   if  x <s y
1053                      = 1<<2   if  x >s y
1054                      = 1<<1   if  x == y
1055 
1056    and similarly the unsigned variant.  The default interpretation is:
1057 
1058       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1059                                   & (7<<1)
1060 
1061    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1062    are zero and therefore defined (viz, zero).
1063 
1064    Also deal with a special case better:
1065 
1066       CmpORD32S(x,0)
1067 
1068    Here, bit 3 (LT) of the result is a copy of the top bit of x and
1069    will be defined even if the rest of x isn't.  In which case we do:
1070 
1071       CmpORD32S#(x,x#,0,{impliedly 0}#)
1072          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
1073            | (x# >>u 31) << 3      -- LT# = x#[31]
1074 
1075    Analogous handling for CmpORD64{S,U}.
1076 */
isZeroU32(IRAtom * e)1077 static Bool isZeroU32 ( IRAtom* e )
1078 {
1079    return
1080       toBool( e->tag == Iex_Const
1081               && e->Iex.Const.con->tag == Ico_U32
1082               && e->Iex.Const.con->Ico.U32 == 0 );
1083 }
1084 
isZeroU64(IRAtom * e)1085 static Bool isZeroU64 ( IRAtom* e )
1086 {
1087    return
1088       toBool( e->tag == Iex_Const
1089               && e->Iex.Const.con->tag == Ico_U64
1090               && e->Iex.Const.con->Ico.U64 == 0 );
1091 }
1092 
doCmpORD(MCEnv * mce,IROp cmp_op,IRAtom * xxhash,IRAtom * yyhash,IRAtom * xx,IRAtom * yy)1093 static IRAtom* doCmpORD ( MCEnv*  mce,
1094                           IROp    cmp_op,
1095                           IRAtom* xxhash, IRAtom* yyhash,
1096                           IRAtom* xx,     IRAtom* yy )
1097 {
1098    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1099    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1100    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
1101    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
1102    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
1103    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
1104    IRType ty     = m64 ? Ity_I64   : Ity_I32;
1105    Int    width  = m64 ? 64        : 32;
1106 
1107    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1108 
1109    IRAtom* threeLeft1 = NULL;
1110    IRAtom* sevenLeft1 = NULL;
1111 
1112    tl_assert(isShadowAtom(mce,xxhash));
1113    tl_assert(isShadowAtom(mce,yyhash));
1114    tl_assert(isOriginalAtom(mce,xx));
1115    tl_assert(isOriginalAtom(mce,yy));
1116    tl_assert(sameKindedAtoms(xxhash,xx));
1117    tl_assert(sameKindedAtoms(yyhash,yy));
1118    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1119              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1120 
1121    if (0) {
1122       ppIROp(cmp_op); VG_(printf)(" ");
1123       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1124    }
1125 
1126    if (syned && isZero(yy)) {
1127       /* fancy interpretation */
1128       /* if yy is zero, then it must be fully defined (zero#). */
1129       tl_assert(isZero(yyhash));
1130       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1131       return
1132          binop(
1133             opOR,
1134             assignNew(
1135                'V', mce,ty,
1136                binop(
1137                   opAND,
1138                   mkPCastTo(mce,ty, xxhash),
1139                   threeLeft1
1140                )),
1141             assignNew(
1142                'V', mce,ty,
1143                binop(
1144                   opSHL,
1145                   assignNew(
1146                      'V', mce,ty,
1147                      binop(opSHR, xxhash, mkU8(width-1))),
1148                   mkU8(3)
1149                ))
1150 	 );
1151    } else {
1152       /* standard interpretation */
1153       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1154       return
1155          binop(
1156             opAND,
1157             mkPCastTo( mce,ty,
1158                        mkUifU(mce,ty, xxhash,yyhash)),
1159             sevenLeft1
1160          );
1161    }
1162 }
1163 
1164 
1165 /*------------------------------------------------------------*/
1166 /*--- Emit a test and complaint if something is undefined. ---*/
1167 /*------------------------------------------------------------*/
1168 
1169 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1170 
1171 
1172 /* Set the annotations on a dirty helper to indicate that the stack
1173    pointer and instruction pointers might be read.  This is the
1174    behaviour of all 'emit-a-complaint' style functions we might
1175    call. */
1176 
setHelperAnns(MCEnv * mce,IRDirty * di)1177 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1178    di->nFxState = 2;
1179    di->fxState[0].fx        = Ifx_Read;
1180    di->fxState[0].offset    = mce->layout->offset_SP;
1181    di->fxState[0].size      = mce->layout->sizeof_SP;
1182    di->fxState[0].nRepeats  = 0;
1183    di->fxState[0].repeatLen = 0;
1184    di->fxState[1].fx        = Ifx_Read;
1185    di->fxState[1].offset    = mce->layout->offset_IP;
1186    di->fxState[1].size      = mce->layout->sizeof_IP;
1187    di->fxState[1].nRepeats  = 0;
1188    di->fxState[1].repeatLen = 0;
1189 }
1190 
1191 
1192 /* Check the supplied *original* |atom| for undefinedness, and emit a
1193    complaint if so.  Once that happens, mark it as defined.  This is
1194    possible because the atom is either a tmp or literal.  If it's a
1195    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1196    be defined.  In fact as mentioned above, we will have to allocate a
1197    new tmp to carry the new 'defined' shadow value, and update the
1198    original->tmp mapping accordingly; we cannot simply assign a new
1199    value to an existing shadow tmp as this breaks SSAness.
1200 
1201    The checks are performed, any resulting complaint emitted, and
1202    |atom|'s shadow temp set to 'defined', ONLY in the case that
1203    |guard| evaluates to True at run-time.  If it evaluates to False
1204    then no action is performed.  If |guard| is NULL (the usual case)
1205    then it is assumed to be always-true, and hence these actions are
1206    performed unconditionally.
1207 
1208    This routine does not generate code to check the definedness of
1209    |guard|.  The caller is assumed to have taken care of that already.
1210 */
complainIfUndefined(MCEnv * mce,IRAtom * atom,IRExpr * guard)1211 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1212 {
1213    IRAtom*  vatom;
1214    IRType   ty;
1215    Int      sz;
1216    IRDirty* di;
1217    IRAtom*  cond;
1218    IRAtom*  origin;
1219    void*    fn;
1220    const HChar* nm;
1221    IRExpr** args;
1222    Int      nargs;
1223 
1224    // Don't do V bit tests if we're not reporting undefined value errors.
1225    if (MC_(clo_mc_level) == 1)
1226       return;
1227 
1228    if (guard)
1229       tl_assert(isOriginalAtom(mce, guard));
1230 
1231    /* Since the original expression is atomic, there's no duplicated
1232       work generated by making multiple V-expressions for it.  So we
1233       don't really care about the possibility that someone else may
1234       also create a V-interpretion for it. */
1235    tl_assert(isOriginalAtom(mce, atom));
1236    vatom = expr2vbits( mce, atom );
1237    tl_assert(isShadowAtom(mce, vatom));
1238    tl_assert(sameKindedAtoms(atom, vatom));
1239 
1240    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1241 
1242    /* sz is only used for constructing the error message */
1243    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1244 
1245    cond = mkPCastTo( mce, Ity_I1, vatom );
1246    /* cond will be 0 if all defined, and 1 if any not defined. */
1247 
1248    /* Get the origin info for the value we are about to check.  At
1249       least, if we are doing origin tracking.  If not, use a dummy
1250       zero origin. */
1251    if (MC_(clo_mc_level) == 3) {
1252       origin = schemeE( mce, atom );
1253       if (mce->hWordTy == Ity_I64) {
1254          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1255       }
1256    } else {
1257       origin = NULL;
1258    }
1259 
1260    fn    = NULL;
1261    nm    = NULL;
1262    args  = NULL;
1263    nargs = -1;
1264 
1265    switch (sz) {
1266       case 0:
1267          if (origin) {
1268             fn    = &MC_(helperc_value_check0_fail_w_o);
1269             nm    = "MC_(helperc_value_check0_fail_w_o)";
1270             args  = mkIRExprVec_1(origin);
1271             nargs = 1;
1272          } else {
1273             fn    = &MC_(helperc_value_check0_fail_no_o);
1274             nm    = "MC_(helperc_value_check0_fail_no_o)";
1275             args  = mkIRExprVec_0();
1276             nargs = 0;
1277          }
1278          break;
1279       case 1:
1280          if (origin) {
1281             fn    = &MC_(helperc_value_check1_fail_w_o);
1282             nm    = "MC_(helperc_value_check1_fail_w_o)";
1283             args  = mkIRExprVec_1(origin);
1284             nargs = 1;
1285          } else {
1286             fn    = &MC_(helperc_value_check1_fail_no_o);
1287             nm    = "MC_(helperc_value_check1_fail_no_o)";
1288             args  = mkIRExprVec_0();
1289             nargs = 0;
1290          }
1291          break;
1292       case 4:
1293          if (origin) {
1294             fn    = &MC_(helperc_value_check4_fail_w_o);
1295             nm    = "MC_(helperc_value_check4_fail_w_o)";
1296             args  = mkIRExprVec_1(origin);
1297             nargs = 1;
1298          } else {
1299             fn    = &MC_(helperc_value_check4_fail_no_o);
1300             nm    = "MC_(helperc_value_check4_fail_no_o)";
1301             args  = mkIRExprVec_0();
1302             nargs = 0;
1303          }
1304          break;
1305       case 8:
1306          if (origin) {
1307             fn    = &MC_(helperc_value_check8_fail_w_o);
1308             nm    = "MC_(helperc_value_check8_fail_w_o)";
1309             args  = mkIRExprVec_1(origin);
1310             nargs = 1;
1311          } else {
1312             fn    = &MC_(helperc_value_check8_fail_no_o);
1313             nm    = "MC_(helperc_value_check8_fail_no_o)";
1314             args  = mkIRExprVec_0();
1315             nargs = 0;
1316          }
1317          break;
1318       case 2:
1319       case 16:
1320          if (origin) {
1321             fn    = &MC_(helperc_value_checkN_fail_w_o);
1322             nm    = "MC_(helperc_value_checkN_fail_w_o)";
1323             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1324             nargs = 2;
1325          } else {
1326             fn    = &MC_(helperc_value_checkN_fail_no_o);
1327             nm    = "MC_(helperc_value_checkN_fail_no_o)";
1328             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1329             nargs = 1;
1330          }
1331          break;
1332       default:
1333          VG_(tool_panic)("unexpected szB");
1334    }
1335 
1336    tl_assert(fn);
1337    tl_assert(nm);
1338    tl_assert(args);
1339    tl_assert(nargs >= 0 && nargs <= 2);
1340    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1341               || (MC_(clo_mc_level) == 2 && origin == NULL) );
1342 
1343    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1344                            VG_(fnptr_to_fnentry)( fn ), args );
1345    di->guard = cond; // and cond is PCast-to-1(atom#)
1346 
1347    /* If the complaint is to be issued under a guard condition, AND
1348       that into the guard condition for the helper call. */
1349    if (guard) {
1350       IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1351       IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1352       IRAtom *e  = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1353       di->guard  = assignNew('V', mce, Ity_I1,  unop(Iop_32to1, e));
1354    }
1355 
1356    setHelperAnns( mce, di );
1357    stmt( 'V', mce, IRStmt_Dirty(di));
1358 
1359    /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1360       defined -- but only in the case where the guard evaluates to
1361       True at run-time.  Do the update by setting the orig->shadow
1362       mapping for tmp to reflect the fact that this shadow is getting
1363       a new value. */
1364    tl_assert(isIRAtom(vatom));
1365    /* sameKindedAtoms ... */
1366    if (vatom->tag == Iex_RdTmp) {
1367       tl_assert(atom->tag == Iex_RdTmp);
1368       if (guard == NULL) {
1369          // guard is 'always True', hence update unconditionally
1370          newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1371          assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1372                           definedOfType(ty));
1373       } else {
1374          // update the temp only conditionally.  Do this by copying
1375          // its old value when the guard is False.
1376          // The old value ..
1377          IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1378          newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1379          IRAtom* new_tmpV
1380             = assignNew('V', mce, shadowTypeV(ty),
1381                         IRExpr_ITE(guard, definedOfType(ty),
1382                                           mkexpr(old_tmpV)));
1383          assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1384       }
1385    }
1386 }
1387 
1388 
1389 /*------------------------------------------------------------*/
1390 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1391 /*------------------------------------------------------------*/
1392 
1393 /* Examine the always-defined sections declared in layout to see if
1394    the (offset,size) section is within one.  Note, is is an error to
1395    partially fall into such a region: (offset,size) should either be
1396    completely in such a region or completely not-in such a region.
1397 */
isAlwaysDefd(MCEnv * mce,Int offset,Int size)1398 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1399 {
1400    Int minoffD, maxoffD, i;
1401    Int minoff = offset;
1402    Int maxoff = minoff + size - 1;
1403    tl_assert((minoff & ~0xFFFF) == 0);
1404    tl_assert((maxoff & ~0xFFFF) == 0);
1405 
1406    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1407       minoffD = mce->layout->alwaysDefd[i].offset;
1408       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1409       tl_assert((minoffD & ~0xFFFF) == 0);
1410       tl_assert((maxoffD & ~0xFFFF) == 0);
1411 
1412       if (maxoff < minoffD || maxoffD < minoff)
1413          continue; /* no overlap */
1414       if (minoff >= minoffD && maxoff <= maxoffD)
1415          return True; /* completely contained in an always-defd section */
1416 
1417       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1418    }
1419    return False; /* could not find any containing section */
1420 }
1421 
1422 
1423 /* Generate into bb suitable actions to shadow this Put.  If the state
1424    slice is marked 'always defined', do nothing.  Otherwise, write the
1425    supplied V bits to the shadow state.  We can pass in either an
1426    original atom or a V-atom, but not both.  In the former case the
1427    relevant V-bits are then generated from the original.
1428    We assume here, that the definedness of GUARD has already been checked.
1429 */
1430 static
do_shadow_PUT(MCEnv * mce,Int offset,IRAtom * atom,IRAtom * vatom,IRExpr * guard)1431 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1432                      IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1433 {
1434    IRType ty;
1435 
1436    // Don't do shadow PUTs if we're not doing undefined value checking.
1437    // Their absence lets Vex's optimiser remove all the shadow computation
1438    // that they depend on, which includes GETs of the shadow registers.
1439    if (MC_(clo_mc_level) == 1)
1440       return;
1441 
1442    if (atom) {
1443       tl_assert(!vatom);
1444       tl_assert(isOriginalAtom(mce, atom));
1445       vatom = expr2vbits( mce, atom );
1446    } else {
1447       tl_assert(vatom);
1448       tl_assert(isShadowAtom(mce, vatom));
1449    }
1450 
1451    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1452    tl_assert(ty != Ity_I1);
1453    tl_assert(ty != Ity_I128);
1454    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1455       /* later: no ... */
1456       /* emit code to emit a complaint if any of the vbits are 1. */
1457       /* complainIfUndefined(mce, atom); */
1458    } else {
1459       /* Do a plain shadow Put. */
1460       if (guard) {
1461          /* If the guard expression evaluates to false we simply Put the value
1462             that is already stored in the guest state slot */
1463          IRAtom *cond, *iffalse;
1464 
1465          cond    = assignNew('V', mce, Ity_I1, guard);
1466          iffalse = assignNew('V', mce, ty,
1467                              IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1468          vatom   = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1469       }
1470       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1471    }
1472 }
1473 
1474 
1475 /* Return an expression which contains the V bits corresponding to the
1476    given GETI (passed in in pieces).
1477 */
1478 static
do_shadow_PUTI(MCEnv * mce,IRPutI * puti)1479 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1480 {
1481    IRAtom* vatom;
1482    IRType  ty, tyS;
1483    Int     arrSize;;
1484    IRRegArray* descr = puti->descr;
1485    IRAtom*     ix    = puti->ix;
1486    Int         bias  = puti->bias;
1487    IRAtom*     atom  = puti->data;
1488 
1489    // Don't do shadow PUTIs if we're not doing undefined value checking.
1490    // Their absence lets Vex's optimiser remove all the shadow computation
1491    // that they depend on, which includes GETIs of the shadow registers.
1492    if (MC_(clo_mc_level) == 1)
1493       return;
1494 
1495    tl_assert(isOriginalAtom(mce,atom));
1496    vatom = expr2vbits( mce, atom );
1497    tl_assert(sameKindedAtoms(atom, vatom));
1498    ty   = descr->elemTy;
1499    tyS  = shadowTypeV(ty);
1500    arrSize = descr->nElems * sizeofIRType(ty);
1501    tl_assert(ty != Ity_I1);
1502    tl_assert(isOriginalAtom(mce,ix));
1503    complainIfUndefined(mce, ix, NULL);
1504    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1505       /* later: no ... */
1506       /* emit code to emit a complaint if any of the vbits are 1. */
1507       /* complainIfUndefined(mce, atom); */
1508    } else {
1509       /* Do a cloned version of the Put that refers to the shadow
1510          area. */
1511       IRRegArray* new_descr
1512          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1513                          tyS, descr->nElems);
1514       stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1515    }
1516 }
1517 
1518 
1519 /* Return an expression which contains the V bits corresponding to the
1520    given GET (passed in in pieces).
1521 */
1522 static
shadow_GET(MCEnv * mce,Int offset,IRType ty)1523 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1524 {
1525    IRType tyS = shadowTypeV(ty);
1526    tl_assert(ty != Ity_I1);
1527    tl_assert(ty != Ity_I128);
1528    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1529       /* Always defined, return all zeroes of the relevant type */
1530       return definedOfType(tyS);
1531    } else {
1532       /* return a cloned version of the Get that refers to the shadow
1533          area. */
1534       /* FIXME: this isn't an atom! */
1535       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1536    }
1537 }
1538 
1539 
1540 /* Return an expression which contains the V bits corresponding to the
1541    given GETI (passed in in pieces).
1542 */
1543 static
shadow_GETI(MCEnv * mce,IRRegArray * descr,IRAtom * ix,Int bias)1544 IRExpr* shadow_GETI ( MCEnv* mce,
1545                       IRRegArray* descr, IRAtom* ix, Int bias )
1546 {
1547    IRType ty   = descr->elemTy;
1548    IRType tyS  = shadowTypeV(ty);
1549    Int arrSize = descr->nElems * sizeofIRType(ty);
1550    tl_assert(ty != Ity_I1);
1551    tl_assert(isOriginalAtom(mce,ix));
1552    complainIfUndefined(mce, ix, NULL);
1553    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1554       /* Always defined, return all zeroes of the relevant type */
1555       return definedOfType(tyS);
1556    } else {
1557       /* return a cloned version of the Get that refers to the shadow
1558          area. */
1559       IRRegArray* new_descr
1560          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1561                          tyS, descr->nElems);
1562       return IRExpr_GetI( new_descr, ix, bias );
1563    }
1564 }
1565 
1566 
1567 /*------------------------------------------------------------*/
1568 /*--- Generating approximations for unknown operations,    ---*/
1569 /*--- using lazy-propagate semantics                       ---*/
1570 /*------------------------------------------------------------*/
1571 
1572 /* Lazy propagation of undefinedness from two values, resulting in the
1573    specified shadow type.
1574 */
1575 static
mkLazy2(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2)1576 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1577 {
1578    IRAtom* at;
1579    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1580    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1581    tl_assert(isShadowAtom(mce,va1));
1582    tl_assert(isShadowAtom(mce,va2));
1583 
1584    /* The general case is inefficient because PCast is an expensive
1585       operation.  Here are some special cases which use PCast only
1586       once rather than twice. */
1587 
1588    /* I64 x I64 -> I64 */
1589    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1590       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1591       at = mkUifU(mce, Ity_I64, va1, va2);
1592       at = mkPCastTo(mce, Ity_I64, at);
1593       return at;
1594    }
1595 
1596    /* I64 x I64 -> I32 */
1597    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1598       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1599       at = mkUifU(mce, Ity_I64, va1, va2);
1600       at = mkPCastTo(mce, Ity_I32, at);
1601       return at;
1602    }
1603 
1604    if (0) {
1605       VG_(printf)("mkLazy2 ");
1606       ppIRType(t1);
1607       VG_(printf)("_");
1608       ppIRType(t2);
1609       VG_(printf)("_");
1610       ppIRType(finalVty);
1611       VG_(printf)("\n");
1612    }
1613 
1614    /* General case: force everything via 32-bit intermediaries. */
1615    at = mkPCastTo(mce, Ity_I32, va1);
1616    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1617    at = mkPCastTo(mce, finalVty, at);
1618    return at;
1619 }
1620 
1621 
1622 /* 3-arg version of the above. */
1623 static
mkLazy3(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3)1624 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1625                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1626 {
1627    IRAtom* at;
1628    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1629    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1630    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1631    tl_assert(isShadowAtom(mce,va1));
1632    tl_assert(isShadowAtom(mce,va2));
1633    tl_assert(isShadowAtom(mce,va3));
1634 
1635    /* The general case is inefficient because PCast is an expensive
1636       operation.  Here are some special cases which use PCast only
1637       twice rather than three times. */
1638 
1639    /* I32 x I64 x I64 -> I64 */
1640    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1641    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1642        && finalVty == Ity_I64) {
1643       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1644       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1645          mode indication which is fully defined, this should get
1646          folded out later. */
1647       at = mkPCastTo(mce, Ity_I64, va1);
1648       /* Now fold in 2nd and 3rd args. */
1649       at = mkUifU(mce, Ity_I64, at, va2);
1650       at = mkUifU(mce, Ity_I64, at, va3);
1651       /* and PCast once again. */
1652       at = mkPCastTo(mce, Ity_I64, at);
1653       return at;
1654    }
1655 
1656    /* I32 x I8 x I64 -> I64 */
1657    if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1658        && finalVty == Ity_I64) {
1659       if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1660       /* Widen 1st and 2nd args to I64.  Since 1st arg is typically a
1661        * rounding mode indication which is fully defined, this should
1662        * get folded out later.
1663       */
1664       IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1665       IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1666       at = mkUifU(mce, Ity_I64, at1, at2);  // UifU(PCast(va1), PCast(va2))
1667       at = mkUifU(mce, Ity_I64, at, va3);
1668       /* and PCast once again. */
1669       at = mkPCastTo(mce, Ity_I64, at);
1670       return at;
1671    }
1672 
1673    /* I32 x I64 x I64 -> I32 */
1674    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1675        && finalVty == Ity_I32) {
1676       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1677       at = mkPCastTo(mce, Ity_I64, va1);
1678       at = mkUifU(mce, Ity_I64, at, va2);
1679       at = mkUifU(mce, Ity_I64, at, va3);
1680       at = mkPCastTo(mce, Ity_I32, at);
1681       return at;
1682    }
1683 
1684    /* I32 x I32 x I32 -> I32 */
1685    /* 32-bit FP idiom, as (eg) happens on ARM */
1686    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1687        && finalVty == Ity_I32) {
1688       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1689       at = va1;
1690       at = mkUifU(mce, Ity_I32, at, va2);
1691       at = mkUifU(mce, Ity_I32, at, va3);
1692       at = mkPCastTo(mce, Ity_I32, at);
1693       return at;
1694    }
1695 
1696    /* I32 x I128 x I128 -> I128 */
1697    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1698    if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1699        && finalVty == Ity_I128) {
1700       if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1701       /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1702          mode indication which is fully defined, this should get
1703          folded out later. */
1704       at = mkPCastTo(mce, Ity_I128, va1);
1705       /* Now fold in 2nd and 3rd args. */
1706       at = mkUifU(mce, Ity_I128, at, va2);
1707       at = mkUifU(mce, Ity_I128, at, va3);
1708       /* and PCast once again. */
1709       at = mkPCastTo(mce, Ity_I128, at);
1710       return at;
1711    }
1712 
1713    /* I32 x I8 x I128 -> I128 */
1714    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1715    if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1716        && finalVty == Ity_I128) {
1717       if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1718       /* Use I64 as an intermediate type, which means PCasting all 3
1719          args to I64 to start with. 1st arg is typically a rounding
1720          mode indication which is fully defined, so we hope that it
1721          will get folded out later. */
1722       IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1723       IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1724       IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1725       /* Now UifU all three together. */
1726       at = mkUifU(mce, Ity_I64, at1, at2);  // UifU(PCast(va1), PCast(va2))
1727       at = mkUifU(mce, Ity_I64, at, at3);   // ... `UifU` PCast(va3)
1728       /* and PCast once again. */
1729       at = mkPCastTo(mce, Ity_I128, at);
1730       return at;
1731    }
1732    if (1) {
1733       VG_(printf)("mkLazy3: ");
1734       ppIRType(t1);
1735       VG_(printf)(" x ");
1736       ppIRType(t2);
1737       VG_(printf)(" x ");
1738       ppIRType(t3);
1739       VG_(printf)(" -> ");
1740       ppIRType(finalVty);
1741       VG_(printf)("\n");
1742    }
1743 
1744    tl_assert(0);
1745    /* General case: force everything via 32-bit intermediaries. */
1746    /*
1747    at = mkPCastTo(mce, Ity_I32, va1);
1748    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1749    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1750    at = mkPCastTo(mce, finalVty, at);
1751    return at;
1752    */
1753 }
1754 
1755 
1756 /* 4-arg version of the above. */
1757 static
mkLazy4(MCEnv * mce,IRType finalVty,IRAtom * va1,IRAtom * va2,IRAtom * va3,IRAtom * va4)1758 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1759                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1760 {
1761    IRAtom* at;
1762    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1763    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1764    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1765    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1766    tl_assert(isShadowAtom(mce,va1));
1767    tl_assert(isShadowAtom(mce,va2));
1768    tl_assert(isShadowAtom(mce,va3));
1769    tl_assert(isShadowAtom(mce,va4));
1770 
1771    /* The general case is inefficient because PCast is an expensive
1772       operation.  Here are some special cases which use PCast only
1773       twice rather than three times. */
1774 
1775    /* I32 x I64 x I64 x I64 -> I64 */
1776    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1777    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1778        && finalVty == Ity_I64) {
1779       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1780       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1781          mode indication which is fully defined, this should get
1782          folded out later. */
1783       at = mkPCastTo(mce, Ity_I64, va1);
1784       /* Now fold in 2nd, 3rd, 4th args. */
1785       at = mkUifU(mce, Ity_I64, at, va2);
1786       at = mkUifU(mce, Ity_I64, at, va3);
1787       at = mkUifU(mce, Ity_I64, at, va4);
1788       /* and PCast once again. */
1789       at = mkPCastTo(mce, Ity_I64, at);
1790       return at;
1791    }
1792    /* I32 x I32 x I32 x I32 -> I32 */
1793    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1794    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1795        && finalVty == Ity_I32) {
1796       if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1797       at = va1;
1798       /* Now fold in 2nd, 3rd, 4th args. */
1799       at = mkUifU(mce, Ity_I32, at, va2);
1800       at = mkUifU(mce, Ity_I32, at, va3);
1801       at = mkUifU(mce, Ity_I32, at, va4);
1802       at = mkPCastTo(mce, Ity_I32, at);
1803       return at;
1804    }
1805 
1806    if (1) {
1807       VG_(printf)("mkLazy4: ");
1808       ppIRType(t1);
1809       VG_(printf)(" x ");
1810       ppIRType(t2);
1811       VG_(printf)(" x ");
1812       ppIRType(t3);
1813       VG_(printf)(" x ");
1814       ppIRType(t4);
1815       VG_(printf)(" -> ");
1816       ppIRType(finalVty);
1817       VG_(printf)("\n");
1818    }
1819 
1820    tl_assert(0);
1821 }
1822 
1823 
1824 /* Do the lazy propagation game from a null-terminated vector of
1825    atoms.  This is presumably the arguments to a helper call, so the
1826    IRCallee info is also supplied in order that we can know which
1827    arguments should be ignored (via the .mcx_mask field).
1828 */
1829 static
mkLazyN(MCEnv * mce,IRAtom ** exprvec,IRType finalVtype,IRCallee * cee)1830 IRAtom* mkLazyN ( MCEnv* mce,
1831                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1832 {
1833    Int     i;
1834    IRAtom* here;
1835    IRAtom* curr;
1836    IRType  mergeTy;
1837    Bool    mergeTy64 = True;
1838 
1839    /* Decide on the type of the merge intermediary.  If all relevant
1840       args are I64, then it's I64.  In all other circumstances, use
1841       I32. */
1842    for (i = 0; exprvec[i]; i++) {
1843       tl_assert(i < 32);
1844       tl_assert(isOriginalAtom(mce, exprvec[i]));
1845       if (cee->mcx_mask & (1<<i))
1846          continue;
1847       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1848          mergeTy64 = False;
1849    }
1850 
1851    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1852    curr    = definedOfType(mergeTy);
1853 
1854    for (i = 0; exprvec[i]; i++) {
1855       tl_assert(i < 32);
1856       tl_assert(isOriginalAtom(mce, exprvec[i]));
1857       /* Only take notice of this arg if the callee's mc-exclusion
1858          mask does not say it is to be excluded. */
1859       if (cee->mcx_mask & (1<<i)) {
1860          /* the arg is to be excluded from definedness checking.  Do
1861             nothing. */
1862          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1863       } else {
1864          /* calculate the arg's definedness, and pessimistically merge
1865             it in. */
1866          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1867          curr = mergeTy64
1868                    ? mkUifU64(mce, here, curr)
1869                    : mkUifU32(mce, here, curr);
1870       }
1871    }
1872    return mkPCastTo(mce, finalVtype, curr );
1873 }
1874 
1875 
1876 /*------------------------------------------------------------*/
1877 /*--- Generating expensive sequences for exact carry-chain ---*/
1878 /*--- propagation in add/sub and related operations.       ---*/
1879 /*------------------------------------------------------------*/
1880 
1881 static
expensiveAddSub(MCEnv * mce,Bool add,IRType ty,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)1882 IRAtom* expensiveAddSub ( MCEnv*  mce,
1883                           Bool    add,
1884                           IRType  ty,
1885                           IRAtom* qaa, IRAtom* qbb,
1886                           IRAtom* aa,  IRAtom* bb )
1887 {
1888    IRAtom *a_min, *b_min, *a_max, *b_max;
1889    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1890 
1891    tl_assert(isShadowAtom(mce,qaa));
1892    tl_assert(isShadowAtom(mce,qbb));
1893    tl_assert(isOriginalAtom(mce,aa));
1894    tl_assert(isOriginalAtom(mce,bb));
1895    tl_assert(sameKindedAtoms(qaa,aa));
1896    tl_assert(sameKindedAtoms(qbb,bb));
1897 
1898    switch (ty) {
1899       case Ity_I32:
1900          opAND = Iop_And32;
1901          opOR  = Iop_Or32;
1902          opXOR = Iop_Xor32;
1903          opNOT = Iop_Not32;
1904          opADD = Iop_Add32;
1905          opSUB = Iop_Sub32;
1906          break;
1907       case Ity_I64:
1908          opAND = Iop_And64;
1909          opOR  = Iop_Or64;
1910          opXOR = Iop_Xor64;
1911          opNOT = Iop_Not64;
1912          opADD = Iop_Add64;
1913          opSUB = Iop_Sub64;
1914          break;
1915       default:
1916          VG_(tool_panic)("expensiveAddSub");
1917    }
1918 
1919    // a_min = aa & ~qaa
1920    a_min = assignNew('V', mce,ty,
1921                      binop(opAND, aa,
1922                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
1923 
1924    // b_min = bb & ~qbb
1925    b_min = assignNew('V', mce,ty,
1926                      binop(opAND, bb,
1927                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
1928 
1929    // a_max = aa | qaa
1930    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1931 
1932    // b_max = bb | qbb
1933    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1934 
1935    if (add) {
1936       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1937       return
1938       assignNew('V', mce,ty,
1939          binop( opOR,
1940                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1941                 assignNew('V', mce,ty,
1942                    binop( opXOR,
1943                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1944                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1945                    )
1946                 )
1947          )
1948       );
1949    } else {
1950       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1951       return
1952       assignNew('V', mce,ty,
1953          binop( opOR,
1954                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1955                 assignNew('V', mce,ty,
1956                    binop( opXOR,
1957                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1958                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1959                    )
1960                 )
1961          )
1962       );
1963    }
1964 
1965 }
1966 
1967 
1968 static
expensiveCountTrailingZeroes(MCEnv * mce,IROp czop,IRAtom * atom,IRAtom * vatom)1969 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1970                                        IRAtom* atom, IRAtom* vatom )
1971 {
1972    IRType ty;
1973    IROp xorOp, subOp, andOp;
1974    IRExpr *one;
1975    IRAtom *improver, *improved;
1976    tl_assert(isShadowAtom(mce,vatom));
1977    tl_assert(isOriginalAtom(mce,atom));
1978    tl_assert(sameKindedAtoms(atom,vatom));
1979 
1980    switch (czop) {
1981       case Iop_Ctz32:
1982          ty = Ity_I32;
1983          xorOp = Iop_Xor32;
1984          subOp = Iop_Sub32;
1985          andOp = Iop_And32;
1986          one = mkU32(1);
1987          break;
1988       case Iop_Ctz64:
1989          ty = Ity_I64;
1990          xorOp = Iop_Xor64;
1991          subOp = Iop_Sub64;
1992          andOp = Iop_And64;
1993          one = mkU64(1);
1994          break;
1995       default:
1996          ppIROp(czop);
1997          VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1998    }
1999 
2000    // improver = atom ^ (atom - 1)
2001    //
2002    // That is, improver has its low ctz(atom) bits equal to one;
2003    // higher bits (if any) equal to zero.
2004    improver = assignNew('V', mce,ty,
2005                         binop(xorOp,
2006                               atom,
2007                               assignNew('V', mce, ty,
2008                                         binop(subOp, atom, one))));
2009 
2010    // improved = vatom & improver
2011    //
2012    // That is, treat any V bits above the first ctz(atom) bits as
2013    // "defined".
2014    improved = assignNew('V', mce, ty,
2015                         binop(andOp, vatom, improver));
2016 
2017    // Return pessimizing cast of improved.
2018    return mkPCastTo(mce, ty, improved);
2019 }
2020 
2021 
2022 /*------------------------------------------------------------*/
2023 /*--- Scalar shifts.                                       ---*/
2024 /*------------------------------------------------------------*/
2025 
2026 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
2027    idea is to shift the definedness bits by the original shift amount.
2028    This introduces 0s ("defined") in new positions for left shifts and
2029    unsigned right shifts, and copies the top definedness bit for
2030    signed right shifts.  So, conveniently, applying the original shift
2031    operator to the definedness bits for the left arg is exactly the
2032    right thing to do:
2033 
2034       (qaa << bb)
2035 
2036    However if the shift amount is undefined then the whole result
2037    is undefined.  Hence need:
2038 
2039       (qaa << bb) `UifU` PCast(qbb)
2040 
2041    If the shift amount bb is a literal than qbb will say 'all defined'
2042    and the UifU and PCast will get folded out by post-instrumentation
2043    optimisation.
2044 */
scalarShift(MCEnv * mce,IRType ty,IROp original_op,IRAtom * qaa,IRAtom * qbb,IRAtom * aa,IRAtom * bb)2045 static IRAtom* scalarShift ( MCEnv*  mce,
2046                              IRType  ty,
2047                              IROp    original_op,
2048                              IRAtom* qaa, IRAtom* qbb,
2049                              IRAtom* aa,  IRAtom* bb )
2050 {
2051    tl_assert(isShadowAtom(mce,qaa));
2052    tl_assert(isShadowAtom(mce,qbb));
2053    tl_assert(isOriginalAtom(mce,aa));
2054    tl_assert(isOriginalAtom(mce,bb));
2055    tl_assert(sameKindedAtoms(qaa,aa));
2056    tl_assert(sameKindedAtoms(qbb,bb));
2057    return
2058       assignNew(
2059          'V', mce, ty,
2060          mkUifU( mce, ty,
2061                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2062                  mkPCastTo(mce, ty, qbb)
2063          )
2064    );
2065 }
2066 
2067 
2068 /*------------------------------------------------------------*/
2069 /*--- Helpers for dealing with vector primops.             ---*/
2070 /*------------------------------------------------------------*/
2071 
2072 /* Vector pessimisation -- pessimise within each lane individually. */
2073 
mkPCast8x16(MCEnv * mce,IRAtom * at)2074 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2075 {
2076    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2077 }
2078 
mkPCast16x8(MCEnv * mce,IRAtom * at)2079 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2080 {
2081    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2082 }
2083 
mkPCast32x4(MCEnv * mce,IRAtom * at)2084 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2085 {
2086    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2087 }
2088 
mkPCast64x2(MCEnv * mce,IRAtom * at)2089 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2090 {
2091    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2092 }
2093 
mkPCast64x4(MCEnv * mce,IRAtom * at)2094 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2095 {
2096    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2097 }
2098 
mkPCast32x8(MCEnv * mce,IRAtom * at)2099 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2100 {
2101    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2102 }
2103 
mkPCast32x2(MCEnv * mce,IRAtom * at)2104 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2105 {
2106    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2107 }
2108 
mkPCast16x16(MCEnv * mce,IRAtom * at)2109 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2110 {
2111    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2112 }
2113 
mkPCast16x4(MCEnv * mce,IRAtom * at)2114 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2115 {
2116    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2117 }
2118 
mkPCast8x32(MCEnv * mce,IRAtom * at)2119 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2120 {
2121    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2122 }
2123 
mkPCast8x8(MCEnv * mce,IRAtom * at)2124 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2125 {
2126    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2127 }
2128 
mkPCast16x2(MCEnv * mce,IRAtom * at)2129 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2130 {
2131    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2132 }
2133 
mkPCast8x4(MCEnv * mce,IRAtom * at)2134 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2135 {
2136    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2137 }
2138 
2139 
2140 /* Here's a simple scheme capable of handling ops derived from SSE1
2141    code and while only generating ops that can be efficiently
2142    implemented in SSE1. */
2143 
2144 /* All-lanes versions are straightforward:
2145 
2146    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
2147 
2148    unary32Fx4(x,y)    ==> PCast32x4(x#)
2149 
2150    Lowest-lane-only versions are more complex:
2151 
2152    binary32F0x4(x,y)  ==> SetV128lo32(
2153                              x#,
2154                              PCast32(V128to32(UifUV128(x#,y#)))
2155                           )
2156 
2157    This is perhaps not so obvious.  In particular, it's faster to
2158    do a V128-bit UifU and then take the bottom 32 bits than the more
2159    obvious scheme of taking the bottom 32 bits of each operand
2160    and doing a 32-bit UifU.  Basically since UifU is fast and
2161    chopping lanes off vector values is slow.
2162 
2163    Finally:
2164 
2165    unary32F0x4(x)     ==> SetV128lo32(
2166                              x#,
2167                              PCast32(V128to32(x#))
2168                           )
2169 
2170    Where:
2171 
2172    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
2173    PCast32x4(v#) = CmpNEZ32x4(v#)
2174 */
2175 
2176 static
binary32Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2177 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2178 {
2179    IRAtom* at;
2180    tl_assert(isShadowAtom(mce, vatomX));
2181    tl_assert(isShadowAtom(mce, vatomY));
2182    at = mkUifUV128(mce, vatomX, vatomY);
2183    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2184    return at;
2185 }
2186 
2187 static
unary32Fx4(MCEnv * mce,IRAtom * vatomX)2188 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2189 {
2190    IRAtom* at;
2191    tl_assert(isShadowAtom(mce, vatomX));
2192    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2193    return at;
2194 }
2195 
2196 static
binary32F0x4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2197 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2198 {
2199    IRAtom* at;
2200    tl_assert(isShadowAtom(mce, vatomX));
2201    tl_assert(isShadowAtom(mce, vatomY));
2202    at = mkUifUV128(mce, vatomX, vatomY);
2203    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2204    at = mkPCastTo(mce, Ity_I32, at);
2205    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2206    return at;
2207 }
2208 
2209 static
unary32F0x4(MCEnv * mce,IRAtom * vatomX)2210 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2211 {
2212    IRAtom* at;
2213    tl_assert(isShadowAtom(mce, vatomX));
2214    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2215    at = mkPCastTo(mce, Ity_I32, at);
2216    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2217    return at;
2218 }
2219 
2220 /* --- ... and ... 64Fx2 versions of the same ... --- */
2221 
2222 static
binary64Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2223 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2224 {
2225    IRAtom* at;
2226    tl_assert(isShadowAtom(mce, vatomX));
2227    tl_assert(isShadowAtom(mce, vatomY));
2228    at = mkUifUV128(mce, vatomX, vatomY);
2229    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2230    return at;
2231 }
2232 
2233 static
unary64Fx2(MCEnv * mce,IRAtom * vatomX)2234 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2235 {
2236    IRAtom* at;
2237    tl_assert(isShadowAtom(mce, vatomX));
2238    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2239    return at;
2240 }
2241 
2242 static
binary64F0x2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2243 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2244 {
2245    IRAtom* at;
2246    tl_assert(isShadowAtom(mce, vatomX));
2247    tl_assert(isShadowAtom(mce, vatomY));
2248    at = mkUifUV128(mce, vatomX, vatomY);
2249    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2250    at = mkPCastTo(mce, Ity_I64, at);
2251    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2252    return at;
2253 }
2254 
2255 static
unary64F0x2(MCEnv * mce,IRAtom * vatomX)2256 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2257 {
2258    IRAtom* at;
2259    tl_assert(isShadowAtom(mce, vatomX));
2260    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2261    at = mkPCastTo(mce, Ity_I64, at);
2262    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2263    return at;
2264 }
2265 
2266 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2267 
2268 static
binary32Fx2(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2269 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2270 {
2271    IRAtom* at;
2272    tl_assert(isShadowAtom(mce, vatomX));
2273    tl_assert(isShadowAtom(mce, vatomY));
2274    at = mkUifU64(mce, vatomX, vatomY);
2275    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2276    return at;
2277 }
2278 
2279 static
unary32Fx2(MCEnv * mce,IRAtom * vatomX)2280 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2281 {
2282    IRAtom* at;
2283    tl_assert(isShadowAtom(mce, vatomX));
2284    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2285    return at;
2286 }
2287 
2288 /* --- ... and ... 64Fx4 versions of the same ... --- */
2289 
2290 static
binary64Fx4(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2291 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2292 {
2293    IRAtom* at;
2294    tl_assert(isShadowAtom(mce, vatomX));
2295    tl_assert(isShadowAtom(mce, vatomY));
2296    at = mkUifUV256(mce, vatomX, vatomY);
2297    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2298    return at;
2299 }
2300 
2301 static
unary64Fx4(MCEnv * mce,IRAtom * vatomX)2302 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2303 {
2304    IRAtom* at;
2305    tl_assert(isShadowAtom(mce, vatomX));
2306    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2307    return at;
2308 }
2309 
2310 /* --- ... and ... 32Fx8 versions of the same ... --- */
2311 
2312 static
binary32Fx8(MCEnv * mce,IRAtom * vatomX,IRAtom * vatomY)2313 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2314 {
2315    IRAtom* at;
2316    tl_assert(isShadowAtom(mce, vatomX));
2317    tl_assert(isShadowAtom(mce, vatomY));
2318    at = mkUifUV256(mce, vatomX, vatomY);
2319    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2320    return at;
2321 }
2322 
2323 static
unary32Fx8(MCEnv * mce,IRAtom * vatomX)2324 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2325 {
2326    IRAtom* at;
2327    tl_assert(isShadowAtom(mce, vatomX));
2328    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2329    return at;
2330 }
2331 
2332 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2333 
2334 static
binary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2335 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2336                                        IRAtom* vatomX, IRAtom* vatomY )
2337 {
2338    /* This is the same as binary64Fx2, except that we subsequently
2339       pessimise vRM (definedness of the rounding mode), widen to 128
2340       bits and UifU it into the result.  As with the scalar cases, if
2341       the RM is a constant then it is defined and so this extra bit
2342       will get constant-folded out later. */
2343    // "do" the vector args
2344    IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2345    // PCast the RM, and widen it to 128 bits
2346    IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2347    // Roll it into the result
2348    t1 = mkUifUV128(mce, t1, t2);
2349    return t1;
2350 }
2351 
2352 /* --- ... and ... 32Fx4 versions of the same --- */
2353 
2354 static
binary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2355 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2356                                        IRAtom* vatomX, IRAtom* vatomY )
2357 {
2358    IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2359    // PCast the RM, and widen it to 128 bits
2360    IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2361    // Roll it into the result
2362    t1 = mkUifUV128(mce, t1, t2);
2363    return t1;
2364 }
2365 
2366 /* --- ... and ... 64Fx4 versions of the same --- */
2367 
2368 static
binary64Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2369 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2370                                        IRAtom* vatomX, IRAtom* vatomY )
2371 {
2372    IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2373    // PCast the RM, and widen it to 256 bits
2374    IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2375    // Roll it into the result
2376    t1 = mkUifUV256(mce, t1, t2);
2377    return t1;
2378 }
2379 
2380 /* --- ... and ... 32Fx8 versions of the same --- */
2381 
2382 static
binary32Fx8_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX,IRAtom * vatomY)2383 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2384                                        IRAtom* vatomX, IRAtom* vatomY )
2385 {
2386    IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2387    // PCast the RM, and widen it to 256 bits
2388    IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2389    // Roll it into the result
2390    t1 = mkUifUV256(mce, t1, t2);
2391    return t1;
2392 }
2393 
2394 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2395 
2396 static
unary64Fx2_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2397 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2398 {
2399    /* Same scheme as binary64Fx2_w_rm. */
2400    // "do" the vector arg
2401    IRAtom* t1 = unary64Fx2(mce, vatomX);
2402    // PCast the RM, and widen it to 128 bits
2403    IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2404    // Roll it into the result
2405    t1 = mkUifUV128(mce, t1, t2);
2406    return t1;
2407 }
2408 
2409 /* --- ... and ... 32Fx4 versions of the same --- */
2410 
2411 static
unary32Fx4_w_rm(MCEnv * mce,IRAtom * vRM,IRAtom * vatomX)2412 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2413 {
2414    /* Same scheme as unary32Fx4_w_rm. */
2415    IRAtom* t1 = unary32Fx4(mce, vatomX);
2416    // PCast the RM, and widen it to 128 bits
2417    IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2418    // Roll it into the result
2419    t1 = mkUifUV128(mce, t1, t2);
2420    return t1;
2421 }
2422 
2423 
2424 /* --- --- Vector saturated narrowing --- --- */
2425 
2426 /* We used to do something very clever here, but on closer inspection
2427    (2011-Jun-15), and in particular bug #279698, it turns out to be
2428    wrong.  Part of the problem came from the fact that for a long
2429    time, the IR primops to do with saturated narrowing were
2430    underspecified and managed to confuse multiple cases which needed
2431    to be separate: the op names had a signedness qualifier, but in
2432    fact the source and destination signednesses needed to be specified
2433    independently, so the op names really need two independent
2434    signedness specifiers.
2435 
2436    As of 2011-Jun-15 (ish) the underspecification was sorted out
2437    properly.  The incorrect instrumentation remained, though.  That
2438    has now (2011-Oct-22) been fixed.
2439 
2440    What we now do is simple:
2441 
2442    Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2443    number of lanes, X is the source lane width and signedness, and Y
2444    is the destination lane width and signedness.  In all cases the
2445    destination lane width is half the source lane width, so the names
2446    have a bit of redundancy, but are at least easy to read.
2447 
2448    For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2449    to unsigned 16s.
2450 
2451    Let Vanilla(OP) be a function that takes OP, one of these
2452    saturating narrowing ops, and produces the same "shaped" narrowing
2453    op which is not saturating, but merely dumps the most significant
2454    bits.  "same shape" means that the lane numbers and widths are the
2455    same as with OP.
2456 
2457    For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2458                   = Iop_NarrowBin32to16x8,
2459    that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2460    dumping the top half of each lane.
2461 
2462    So, with that in place, the scheme is simple, and it is simple to
2463    pessimise each lane individually and then apply Vanilla(OP) so as
2464    to get the result in the right "shape".  If the original OP is
2465    QNarrowBinXtoYxZ then we produce
2466 
2467    Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2468 
2469    or for the case when OP is unary (Iop_QNarrowUn*)
2470 
2471    Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2472 */
2473 static
vanillaNarrowingOpOfShape(IROp qnarrowOp)2474 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2475 {
2476    switch (qnarrowOp) {
2477       /* Binary: (128, 128) -> 128 */
2478       case Iop_QNarrowBin16Sto8Ux16:
2479       case Iop_QNarrowBin16Sto8Sx16:
2480       case Iop_QNarrowBin16Uto8Ux16:
2481       case Iop_QNarrowBin64Sto32Sx4:
2482       case Iop_QNarrowBin64Uto32Ux4:
2483          return Iop_NarrowBin16to8x16;
2484       case Iop_QNarrowBin32Sto16Ux8:
2485       case Iop_QNarrowBin32Sto16Sx8:
2486       case Iop_QNarrowBin32Uto16Ux8:
2487          return Iop_NarrowBin32to16x8;
2488       /* Binary: (64, 64) -> 64 */
2489       case Iop_QNarrowBin32Sto16Sx4:
2490          return Iop_NarrowBin32to16x4;
2491       case Iop_QNarrowBin16Sto8Ux8:
2492       case Iop_QNarrowBin16Sto8Sx8:
2493          return Iop_NarrowBin16to8x8;
2494       /* Unary: 128 -> 64 */
2495       case Iop_QNarrowUn64Uto32Ux2:
2496       case Iop_QNarrowUn64Sto32Sx2:
2497       case Iop_QNarrowUn64Sto32Ux2:
2498          return Iop_NarrowUn64to32x2;
2499       case Iop_QNarrowUn32Uto16Ux4:
2500       case Iop_QNarrowUn32Sto16Sx4:
2501       case Iop_QNarrowUn32Sto16Ux4:
2502          return Iop_NarrowUn32to16x4;
2503       case Iop_QNarrowUn16Uto8Ux8:
2504       case Iop_QNarrowUn16Sto8Sx8:
2505       case Iop_QNarrowUn16Sto8Ux8:
2506          return Iop_NarrowUn16to8x8;
2507       default:
2508          ppIROp(qnarrowOp);
2509          VG_(tool_panic)("vanillaNarrowOpOfShape");
2510    }
2511 }
2512 
2513 static
vectorNarrowBinV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2514 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2515                               IRAtom* vatom1, IRAtom* vatom2)
2516 {
2517    IRAtom *at1, *at2, *at3;
2518    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2519    switch (narrow_op) {
2520       case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2521       case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2522       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2523       case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2524       case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2525       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2526       case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2527       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2528       default: VG_(tool_panic)("vectorNarrowBinV128");
2529    }
2530    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2531    tl_assert(isShadowAtom(mce,vatom1));
2532    tl_assert(isShadowAtom(mce,vatom2));
2533    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2534    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2535    at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2536    return at3;
2537 }
2538 
2539 static
vectorNarrowBin64(MCEnv * mce,IROp narrow_op,IRAtom * vatom1,IRAtom * vatom2)2540 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2541                             IRAtom* vatom1, IRAtom* vatom2)
2542 {
2543    IRAtom *at1, *at2, *at3;
2544    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2545    switch (narrow_op) {
2546       case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2547       case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2548       case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2549       default: VG_(tool_panic)("vectorNarrowBin64");
2550    }
2551    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2552    tl_assert(isShadowAtom(mce,vatom1));
2553    tl_assert(isShadowAtom(mce,vatom2));
2554    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2555    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2556    at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2557    return at3;
2558 }
2559 
2560 static
vectorNarrowUnV128(MCEnv * mce,IROp narrow_op,IRAtom * vatom1)2561 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2562                              IRAtom* vatom1)
2563 {
2564    IRAtom *at1, *at2;
2565    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2566    tl_assert(isShadowAtom(mce,vatom1));
2567    /* For vanilla narrowing (non-saturating), we can just apply
2568       the op directly to the V bits. */
2569    switch (narrow_op) {
2570       case Iop_NarrowUn16to8x8:
2571       case Iop_NarrowUn32to16x4:
2572       case Iop_NarrowUn64to32x2:
2573          at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2574          return at1;
2575       default:
2576          break; /* Do Plan B */
2577    }
2578    /* Plan B: for ops that involve a saturation operation on the args,
2579       we must PCast before the vanilla narrow. */
2580    switch (narrow_op) {
2581       case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2582       case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2583       case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2584       case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2585       case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2586       case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2587       case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2588       case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2589       case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2590       default: VG_(tool_panic)("vectorNarrowUnV128");
2591    }
2592    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2593    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2594    at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2595    return at2;
2596 }
2597 
2598 static
vectorWidenI64(MCEnv * mce,IROp longen_op,IRAtom * vatom1)2599 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2600                          IRAtom* vatom1)
2601 {
2602    IRAtom *at1, *at2;
2603    IRAtom* (*pcast)( MCEnv*, IRAtom* );
2604    switch (longen_op) {
2605       case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2606       case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2607       case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2608       case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2609       case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2610       case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2611       default: VG_(tool_panic)("vectorWidenI64");
2612    }
2613    tl_assert(isShadowAtom(mce,vatom1));
2614    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2615    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2616    return at2;
2617 }
2618 
2619 
2620 /* --- --- Vector integer arithmetic --- --- */
2621 
2622 /* Simple ... UifU the args and per-lane pessimise the results. */
2623 
2624 /* --- V256-bit versions --- */
2625 
2626 static
binary8Ix32(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2627 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2628 {
2629    IRAtom* at;
2630    at = mkUifUV256(mce, vatom1, vatom2);
2631    at = mkPCast8x32(mce, at);
2632    return at;
2633 }
2634 
2635 static
binary16Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2636 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2637 {
2638    IRAtom* at;
2639    at = mkUifUV256(mce, vatom1, vatom2);
2640    at = mkPCast16x16(mce, at);
2641    return at;
2642 }
2643 
2644 static
binary32Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2645 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2646 {
2647    IRAtom* at;
2648    at = mkUifUV256(mce, vatom1, vatom2);
2649    at = mkPCast32x8(mce, at);
2650    return at;
2651 }
2652 
2653 static
binary64Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2654 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2655 {
2656    IRAtom* at;
2657    at = mkUifUV256(mce, vatom1, vatom2);
2658    at = mkPCast64x4(mce, at);
2659    return at;
2660 }
2661 
2662 /* --- V128-bit versions --- */
2663 
2664 static
binary8Ix16(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2665 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2666 {
2667    IRAtom* at;
2668    at = mkUifUV128(mce, vatom1, vatom2);
2669    at = mkPCast8x16(mce, at);
2670    return at;
2671 }
2672 
2673 static
binary16Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2674 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2675 {
2676    IRAtom* at;
2677    at = mkUifUV128(mce, vatom1, vatom2);
2678    at = mkPCast16x8(mce, at);
2679    return at;
2680 }
2681 
2682 static
binary32Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2683 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2684 {
2685    IRAtom* at;
2686    at = mkUifUV128(mce, vatom1, vatom2);
2687    at = mkPCast32x4(mce, at);
2688    return at;
2689 }
2690 
2691 static
binary64Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2692 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2693 {
2694    IRAtom* at;
2695    at = mkUifUV128(mce, vatom1, vatom2);
2696    at = mkPCast64x2(mce, at);
2697    return at;
2698 }
2699 
2700 /* --- 64-bit versions --- */
2701 
2702 static
binary8Ix8(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2703 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2704 {
2705    IRAtom* at;
2706    at = mkUifU64(mce, vatom1, vatom2);
2707    at = mkPCast8x8(mce, at);
2708    return at;
2709 }
2710 
2711 static
binary16Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2712 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2713 {
2714    IRAtom* at;
2715    at = mkUifU64(mce, vatom1, vatom2);
2716    at = mkPCast16x4(mce, at);
2717    return at;
2718 }
2719 
2720 static
binary32Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2721 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2722 {
2723    IRAtom* at;
2724    at = mkUifU64(mce, vatom1, vatom2);
2725    at = mkPCast32x2(mce, at);
2726    return at;
2727 }
2728 
2729 static
binary64Ix1(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2730 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2731 {
2732    IRAtom* at;
2733    at = mkUifU64(mce, vatom1, vatom2);
2734    at = mkPCastTo(mce, Ity_I64, at);
2735    return at;
2736 }
2737 
2738 /* --- 32-bit versions --- */
2739 
2740 static
binary8Ix4(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2741 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2742 {
2743    IRAtom* at;
2744    at = mkUifU32(mce, vatom1, vatom2);
2745    at = mkPCast8x4(mce, at);
2746    return at;
2747 }
2748 
2749 static
binary16Ix2(MCEnv * mce,IRAtom * vatom1,IRAtom * vatom2)2750 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2751 {
2752    IRAtom* at;
2753    at = mkUifU32(mce, vatom1, vatom2);
2754    at = mkPCast16x2(mce, at);
2755    return at;
2756 }
2757 
2758 
2759 /*------------------------------------------------------------*/
2760 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
2761 /*------------------------------------------------------------*/
2762 
2763 static
expr2vbits_Qop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3,IRAtom * atom4)2764 IRAtom* expr2vbits_Qop ( MCEnv* mce,
2765                          IROp op,
2766                          IRAtom* atom1, IRAtom* atom2,
2767                          IRAtom* atom3, IRAtom* atom4 )
2768 {
2769    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2770    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2771    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2772    IRAtom* vatom4 = expr2vbits( mce, atom4 );
2773 
2774    tl_assert(isOriginalAtom(mce,atom1));
2775    tl_assert(isOriginalAtom(mce,atom2));
2776    tl_assert(isOriginalAtom(mce,atom3));
2777    tl_assert(isOriginalAtom(mce,atom4));
2778    tl_assert(isShadowAtom(mce,vatom1));
2779    tl_assert(isShadowAtom(mce,vatom2));
2780    tl_assert(isShadowAtom(mce,vatom3));
2781    tl_assert(isShadowAtom(mce,vatom4));
2782    tl_assert(sameKindedAtoms(atom1,vatom1));
2783    tl_assert(sameKindedAtoms(atom2,vatom2));
2784    tl_assert(sameKindedAtoms(atom3,vatom3));
2785    tl_assert(sameKindedAtoms(atom4,vatom4));
2786    switch (op) {
2787       case Iop_MAddF64:
2788       case Iop_MAddF64r32:
2789       case Iop_MSubF64:
2790       case Iop_MSubF64r32:
2791          /* I32(rm) x F64 x F64 x F64 -> F64 */
2792          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2793 
2794       case Iop_MAddF32:
2795       case Iop_MSubF32:
2796          /* I32(rm) x F32 x F32 x F32 -> F32 */
2797          return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2798 
2799       /* V256-bit data-steering */
2800       case Iop_64x4toV256:
2801          return assignNew('V', mce, Ity_V256,
2802                           IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2803 
2804       default:
2805          ppIROp(op);
2806          VG_(tool_panic)("memcheck:expr2vbits_Qop");
2807    }
2808 }
2809 
2810 
2811 static
expr2vbits_Triop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2,IRAtom * atom3)2812 IRAtom* expr2vbits_Triop ( MCEnv* mce,
2813                            IROp op,
2814                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2815 {
2816    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2817    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2818    IRAtom* vatom3 = expr2vbits( mce, atom3 );
2819 
2820    tl_assert(isOriginalAtom(mce,atom1));
2821    tl_assert(isOriginalAtom(mce,atom2));
2822    tl_assert(isOriginalAtom(mce,atom3));
2823    tl_assert(isShadowAtom(mce,vatom1));
2824    tl_assert(isShadowAtom(mce,vatom2));
2825    tl_assert(isShadowAtom(mce,vatom3));
2826    tl_assert(sameKindedAtoms(atom1,vatom1));
2827    tl_assert(sameKindedAtoms(atom2,vatom2));
2828    tl_assert(sameKindedAtoms(atom3,vatom3));
2829    switch (op) {
2830       case Iop_AddF128:
2831       case Iop_AddD128:
2832       case Iop_SubF128:
2833       case Iop_SubD128:
2834       case Iop_MulF128:
2835       case Iop_MulD128:
2836       case Iop_DivF128:
2837       case Iop_DivD128:
2838       case Iop_QuantizeD128:
2839          /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2840          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2841       case Iop_AddF64:
2842       case Iop_AddD64:
2843       case Iop_AddF64r32:
2844       case Iop_SubF64:
2845       case Iop_SubD64:
2846       case Iop_SubF64r32:
2847       case Iop_MulF64:
2848       case Iop_MulD64:
2849       case Iop_MulF64r32:
2850       case Iop_DivF64:
2851       case Iop_DivD64:
2852       case Iop_DivF64r32:
2853       case Iop_ScaleF64:
2854       case Iop_Yl2xF64:
2855       case Iop_Yl2xp1F64:
2856       case Iop_AtanF64:
2857       case Iop_PRemF64:
2858       case Iop_PRem1F64:
2859       case Iop_QuantizeD64:
2860          /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2861          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2862       case Iop_PRemC3210F64:
2863       case Iop_PRem1C3210F64:
2864          /* I32(rm) x F64 x F64 -> I32 */
2865          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2866       case Iop_AddF32:
2867       case Iop_SubF32:
2868       case Iop_MulF32:
2869       case Iop_DivF32:
2870          /* I32(rm) x F32 x F32 -> I32 */
2871          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2872       case Iop_SignificanceRoundD64:
2873          /* IRRoundingMode(I32) x I8 x D64 -> D64 */
2874          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2875       case Iop_SignificanceRoundD128:
2876          /* IRRoundingMode(I32) x I8 x D128 -> D128 */
2877          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2878       case Iop_SliceV128:
2879          /* (V128, V128, I8) -> V128 */
2880          complainIfUndefined(mce, atom3, NULL);
2881          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2882       case Iop_Slice64:
2883          /* (I64, I64, I8) -> I64 */
2884          complainIfUndefined(mce, atom3, NULL);
2885          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2886       case Iop_SetElem8x8:
2887       case Iop_SetElem16x4:
2888       case Iop_SetElem32x2:
2889          complainIfUndefined(mce, atom2, NULL);
2890          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2891       /* BCDIops */
2892       case Iop_BCDAdd:
2893       case Iop_BCDSub:
2894          complainIfUndefined(mce, atom3, NULL);
2895          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2896 
2897       /* Vector FP with rounding mode as the first arg */
2898       case Iop_Add64Fx2:
2899       case Iop_Sub64Fx2:
2900       case Iop_Mul64Fx2:
2901       case Iop_Div64Fx2:
2902          return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2903 
2904       case Iop_Add32Fx4:
2905       case Iop_Sub32Fx4:
2906       case Iop_Mul32Fx4:
2907       case Iop_Div32Fx4:
2908         return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2909 
2910       case Iop_Add64Fx4:
2911       case Iop_Sub64Fx4:
2912       case Iop_Mul64Fx4:
2913       case Iop_Div64Fx4:
2914          return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2915 
2916       case Iop_Add32Fx8:
2917       case Iop_Sub32Fx8:
2918       case Iop_Mul32Fx8:
2919       case Iop_Div32Fx8:
2920          return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2921 
2922       default:
2923          ppIROp(op);
2924          VG_(tool_panic)("memcheck:expr2vbits_Triop");
2925    }
2926 }
2927 
2928 
2929 static
expr2vbits_Binop(MCEnv * mce,IROp op,IRAtom * atom1,IRAtom * atom2)2930 IRAtom* expr2vbits_Binop ( MCEnv* mce,
2931                            IROp op,
2932                            IRAtom* atom1, IRAtom* atom2 )
2933 {
2934    IRType  and_or_ty;
2935    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2936    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2937    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2938 
2939    IRAtom* vatom1 = expr2vbits( mce, atom1 );
2940    IRAtom* vatom2 = expr2vbits( mce, atom2 );
2941 
2942    tl_assert(isOriginalAtom(mce,atom1));
2943    tl_assert(isOriginalAtom(mce,atom2));
2944    tl_assert(isShadowAtom(mce,vatom1));
2945    tl_assert(isShadowAtom(mce,vatom2));
2946    tl_assert(sameKindedAtoms(atom1,vatom1));
2947    tl_assert(sameKindedAtoms(atom2,vatom2));
2948    switch (op) {
2949 
2950       /* 32-bit SIMD */
2951 
2952       case Iop_Add16x2:
2953       case Iop_HAdd16Ux2:
2954       case Iop_HAdd16Sx2:
2955       case Iop_Sub16x2:
2956       case Iop_HSub16Ux2:
2957       case Iop_HSub16Sx2:
2958       case Iop_QAdd16Sx2:
2959       case Iop_QSub16Sx2:
2960       case Iop_QSub16Ux2:
2961       case Iop_QAdd16Ux2:
2962          return binary16Ix2(mce, vatom1, vatom2);
2963 
2964       case Iop_Add8x4:
2965       case Iop_HAdd8Ux4:
2966       case Iop_HAdd8Sx4:
2967       case Iop_Sub8x4:
2968       case Iop_HSub8Ux4:
2969       case Iop_HSub8Sx4:
2970       case Iop_QSub8Ux4:
2971       case Iop_QAdd8Ux4:
2972       case Iop_QSub8Sx4:
2973       case Iop_QAdd8Sx4:
2974          return binary8Ix4(mce, vatom1, vatom2);
2975 
2976       /* 64-bit SIMD */
2977 
2978       case Iop_ShrN8x8:
2979       case Iop_ShrN16x4:
2980       case Iop_ShrN32x2:
2981       case Iop_SarN8x8:
2982       case Iop_SarN16x4:
2983       case Iop_SarN32x2:
2984       case Iop_ShlN16x4:
2985       case Iop_ShlN32x2:
2986       case Iop_ShlN8x8:
2987          /* Same scheme as with all other shifts. */
2988          complainIfUndefined(mce, atom2, NULL);
2989          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2990 
2991       case Iop_QNarrowBin32Sto16Sx4:
2992       case Iop_QNarrowBin16Sto8Sx8:
2993       case Iop_QNarrowBin16Sto8Ux8:
2994          return vectorNarrowBin64(mce, op, vatom1, vatom2);
2995 
2996       case Iop_Min8Ux8:
2997       case Iop_Min8Sx8:
2998       case Iop_Max8Ux8:
2999       case Iop_Max8Sx8:
3000       case Iop_Avg8Ux8:
3001       case Iop_QSub8Sx8:
3002       case Iop_QSub8Ux8:
3003       case Iop_Sub8x8:
3004       case Iop_CmpGT8Sx8:
3005       case Iop_CmpGT8Ux8:
3006       case Iop_CmpEQ8x8:
3007       case Iop_QAdd8Sx8:
3008       case Iop_QAdd8Ux8:
3009       case Iop_QSal8x8:
3010       case Iop_QShl8x8:
3011       case Iop_Add8x8:
3012       case Iop_Mul8x8:
3013       case Iop_PolynomialMul8x8:
3014          return binary8Ix8(mce, vatom1, vatom2);
3015 
3016       case Iop_Min16Sx4:
3017       case Iop_Min16Ux4:
3018       case Iop_Max16Sx4:
3019       case Iop_Max16Ux4:
3020       case Iop_Avg16Ux4:
3021       case Iop_QSub16Ux4:
3022       case Iop_QSub16Sx4:
3023       case Iop_Sub16x4:
3024       case Iop_Mul16x4:
3025       case Iop_MulHi16Sx4:
3026       case Iop_MulHi16Ux4:
3027       case Iop_CmpGT16Sx4:
3028       case Iop_CmpGT16Ux4:
3029       case Iop_CmpEQ16x4:
3030       case Iop_QAdd16Sx4:
3031       case Iop_QAdd16Ux4:
3032       case Iop_QSal16x4:
3033       case Iop_QShl16x4:
3034       case Iop_Add16x4:
3035       case Iop_QDMulHi16Sx4:
3036       case Iop_QRDMulHi16Sx4:
3037          return binary16Ix4(mce, vatom1, vatom2);
3038 
3039       case Iop_Sub32x2:
3040       case Iop_Mul32x2:
3041       case Iop_Max32Sx2:
3042       case Iop_Max32Ux2:
3043       case Iop_Min32Sx2:
3044       case Iop_Min32Ux2:
3045       case Iop_CmpGT32Sx2:
3046       case Iop_CmpGT32Ux2:
3047       case Iop_CmpEQ32x2:
3048       case Iop_Add32x2:
3049       case Iop_QAdd32Ux2:
3050       case Iop_QAdd32Sx2:
3051       case Iop_QSub32Ux2:
3052       case Iop_QSub32Sx2:
3053       case Iop_QSal32x2:
3054       case Iop_QShl32x2:
3055       case Iop_QDMulHi32Sx2:
3056       case Iop_QRDMulHi32Sx2:
3057          return binary32Ix2(mce, vatom1, vatom2);
3058 
3059       case Iop_QSub64Ux1:
3060       case Iop_QSub64Sx1:
3061       case Iop_QAdd64Ux1:
3062       case Iop_QAdd64Sx1:
3063       case Iop_QSal64x1:
3064       case Iop_QShl64x1:
3065       case Iop_Sal64x1:
3066          return binary64Ix1(mce, vatom1, vatom2);
3067 
3068       case Iop_QShlNsatSU8x8:
3069       case Iop_QShlNsatUU8x8:
3070       case Iop_QShlNsatSS8x8:
3071          complainIfUndefined(mce, atom2, NULL);
3072          return mkPCast8x8(mce, vatom1);
3073 
3074       case Iop_QShlNsatSU16x4:
3075       case Iop_QShlNsatUU16x4:
3076       case Iop_QShlNsatSS16x4:
3077          complainIfUndefined(mce, atom2, NULL);
3078          return mkPCast16x4(mce, vatom1);
3079 
3080       case Iop_QShlNsatSU32x2:
3081       case Iop_QShlNsatUU32x2:
3082       case Iop_QShlNsatSS32x2:
3083          complainIfUndefined(mce, atom2, NULL);
3084          return mkPCast32x2(mce, vatom1);
3085 
3086       case Iop_QShlNsatSU64x1:
3087       case Iop_QShlNsatUU64x1:
3088       case Iop_QShlNsatSS64x1:
3089          complainIfUndefined(mce, atom2, NULL);
3090          return mkPCast32x2(mce, vatom1);
3091 
3092       case Iop_PwMax32Sx2:
3093       case Iop_PwMax32Ux2:
3094       case Iop_PwMin32Sx2:
3095       case Iop_PwMin32Ux2:
3096       case Iop_PwMax32Fx2:
3097       case Iop_PwMin32Fx2:
3098          return assignNew('V', mce, Ity_I64,
3099                           binop(Iop_PwMax32Ux2,
3100                                 mkPCast32x2(mce, vatom1),
3101                                 mkPCast32x2(mce, vatom2)));
3102 
3103       case Iop_PwMax16Sx4:
3104       case Iop_PwMax16Ux4:
3105       case Iop_PwMin16Sx4:
3106       case Iop_PwMin16Ux4:
3107          return assignNew('V', mce, Ity_I64,
3108                           binop(Iop_PwMax16Ux4,
3109                                 mkPCast16x4(mce, vatom1),
3110                                 mkPCast16x4(mce, vatom2)));
3111 
3112       case Iop_PwMax8Sx8:
3113       case Iop_PwMax8Ux8:
3114       case Iop_PwMin8Sx8:
3115       case Iop_PwMin8Ux8:
3116          return assignNew('V', mce, Ity_I64,
3117                           binop(Iop_PwMax8Ux8,
3118                                 mkPCast8x8(mce, vatom1),
3119                                 mkPCast8x8(mce, vatom2)));
3120 
3121       case Iop_PwAdd32x2:
3122       case Iop_PwAdd32Fx2:
3123          return mkPCast32x2(mce,
3124                assignNew('V', mce, Ity_I64,
3125                          binop(Iop_PwAdd32x2,
3126                                mkPCast32x2(mce, vatom1),
3127                                mkPCast32x2(mce, vatom2))));
3128 
3129       case Iop_PwAdd16x4:
3130          return mkPCast16x4(mce,
3131                assignNew('V', mce, Ity_I64,
3132                          binop(op, mkPCast16x4(mce, vatom1),
3133                                    mkPCast16x4(mce, vatom2))));
3134 
3135       case Iop_PwAdd8x8:
3136          return mkPCast8x8(mce,
3137                assignNew('V', mce, Ity_I64,
3138                          binop(op, mkPCast8x8(mce, vatom1),
3139                                    mkPCast8x8(mce, vatom2))));
3140 
3141       case Iop_Shl8x8:
3142       case Iop_Shr8x8:
3143       case Iop_Sar8x8:
3144       case Iop_Sal8x8:
3145          return mkUifU64(mce,
3146                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3147                    mkPCast8x8(mce,vatom2)
3148                 );
3149 
3150       case Iop_Shl16x4:
3151       case Iop_Shr16x4:
3152       case Iop_Sar16x4:
3153       case Iop_Sal16x4:
3154          return mkUifU64(mce,
3155                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3156                    mkPCast16x4(mce,vatom2)
3157                 );
3158 
3159       case Iop_Shl32x2:
3160       case Iop_Shr32x2:
3161       case Iop_Sar32x2:
3162       case Iop_Sal32x2:
3163          return mkUifU64(mce,
3164                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3165                    mkPCast32x2(mce,vatom2)
3166                 );
3167 
3168       /* 64-bit data-steering */
3169       case Iop_InterleaveLO32x2:
3170       case Iop_InterleaveLO16x4:
3171       case Iop_InterleaveLO8x8:
3172       case Iop_InterleaveHI32x2:
3173       case Iop_InterleaveHI16x4:
3174       case Iop_InterleaveHI8x8:
3175       case Iop_CatOddLanes8x8:
3176       case Iop_CatEvenLanes8x8:
3177       case Iop_CatOddLanes16x4:
3178       case Iop_CatEvenLanes16x4:
3179       case Iop_InterleaveOddLanes8x8:
3180       case Iop_InterleaveEvenLanes8x8:
3181       case Iop_InterleaveOddLanes16x4:
3182       case Iop_InterleaveEvenLanes16x4:
3183          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3184 
3185       case Iop_GetElem8x8:
3186          complainIfUndefined(mce, atom2, NULL);
3187          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3188       case Iop_GetElem16x4:
3189          complainIfUndefined(mce, atom2, NULL);
3190          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3191       case Iop_GetElem32x2:
3192          complainIfUndefined(mce, atom2, NULL);
3193          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3194 
3195       /* Perm8x8: rearrange values in left arg using steering values
3196         from right arg.  So rearrange the vbits in the same way but
3197         pessimise wrt steering values. */
3198       case Iop_Perm8x8:
3199          return mkUifU64(
3200                    mce,
3201                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3202                    mkPCast8x8(mce, vatom2)
3203                 );
3204 
3205       /* V128-bit SIMD */
3206 
3207       case Iop_Sqrt32Fx4:
3208          return unary32Fx4_w_rm(mce, vatom1, vatom2);
3209       case Iop_Sqrt64Fx2:
3210          return unary64Fx2_w_rm(mce, vatom1, vatom2);
3211 
3212       case Iop_ShrN8x16:
3213       case Iop_ShrN16x8:
3214       case Iop_ShrN32x4:
3215       case Iop_ShrN64x2:
3216       case Iop_SarN8x16:
3217       case Iop_SarN16x8:
3218       case Iop_SarN32x4:
3219       case Iop_SarN64x2:
3220       case Iop_ShlN8x16:
3221       case Iop_ShlN16x8:
3222       case Iop_ShlN32x4:
3223       case Iop_ShlN64x2:
3224          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
3225             this is wrong now, scalar shifts are done properly lazily.
3226             Vector shifts should be fixed too. */
3227          complainIfUndefined(mce, atom2, NULL);
3228          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3229 
3230       /* V x V shifts/rotates are done using the standard lazy scheme. */
3231       /* For the non-rounding variants of bi-di vector x vector
3232          shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3233          But note that this is overly pessimistic, because in fact only
3234          the bottom 8 bits of each lane of the second argument are taken
3235          into account when shifting.  So really we ought to ignore
3236          undefinedness in bits 8 and above of each lane in the
3237          second argument. */
3238       case Iop_Shl8x16:
3239       case Iop_Shr8x16:
3240       case Iop_Sar8x16:
3241       case Iop_Sal8x16:
3242       case Iop_Rol8x16:
3243       case Iop_Sh8Sx16:
3244       case Iop_Sh8Ux16:
3245          return mkUifUV128(mce,
3246                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3247                    mkPCast8x16(mce,vatom2)
3248                 );
3249 
3250       case Iop_Shl16x8:
3251       case Iop_Shr16x8:
3252       case Iop_Sar16x8:
3253       case Iop_Sal16x8:
3254       case Iop_Rol16x8:
3255       case Iop_Sh16Sx8:
3256       case Iop_Sh16Ux8:
3257          return mkUifUV128(mce,
3258                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3259                    mkPCast16x8(mce,vatom2)
3260                 );
3261 
3262       case Iop_Shl32x4:
3263       case Iop_Shr32x4:
3264       case Iop_Sar32x4:
3265       case Iop_Sal32x4:
3266       case Iop_Rol32x4:
3267       case Iop_Sh32Sx4:
3268       case Iop_Sh32Ux4:
3269          return mkUifUV128(mce,
3270                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3271                    mkPCast32x4(mce,vatom2)
3272                 );
3273 
3274       case Iop_Shl64x2:
3275       case Iop_Shr64x2:
3276       case Iop_Sar64x2:
3277       case Iop_Sal64x2:
3278       case Iop_Rol64x2:
3279       case Iop_Sh64Sx2:
3280       case Iop_Sh64Ux2:
3281          return mkUifUV128(mce,
3282                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3283                    mkPCast64x2(mce,vatom2)
3284                 );
3285 
3286       /* For the rounding variants of bi-di vector x vector shifts, the
3287          rounding adjustment can cause undefinedness to propagate through
3288          the entire lane, in the worst case.  Too complex to handle
3289          properly .. just UifU the arguments and then PCast them.
3290          Suboptimal but safe. */
3291       case Iop_Rsh8Sx16:
3292       case Iop_Rsh8Ux16:
3293          return binary8Ix16(mce, vatom1, vatom2);
3294       case Iop_Rsh16Sx8:
3295       case Iop_Rsh16Ux8:
3296          return binary16Ix8(mce, vatom1, vatom2);
3297       case Iop_Rsh32Sx4:
3298       case Iop_Rsh32Ux4:
3299          return binary32Ix4(mce, vatom1, vatom2);
3300       case Iop_Rsh64Sx2:
3301       case Iop_Rsh64Ux2:
3302          return binary64Ix2(mce, vatom1, vatom2);
3303 
3304       case Iop_F32ToFixed32Ux4_RZ:
3305       case Iop_F32ToFixed32Sx4_RZ:
3306       case Iop_Fixed32UToF32x4_RN:
3307       case Iop_Fixed32SToF32x4_RN:
3308          complainIfUndefined(mce, atom2, NULL);
3309          return mkPCast32x4(mce, vatom1);
3310 
3311       case Iop_F32ToFixed32Ux2_RZ:
3312       case Iop_F32ToFixed32Sx2_RZ:
3313       case Iop_Fixed32UToF32x2_RN:
3314       case Iop_Fixed32SToF32x2_RN:
3315          complainIfUndefined(mce, atom2, NULL);
3316          return mkPCast32x2(mce, vatom1);
3317 
3318       case Iop_QSub8Ux16:
3319       case Iop_QSub8Sx16:
3320       case Iop_Sub8x16:
3321       case Iop_Min8Ux16:
3322       case Iop_Min8Sx16:
3323       case Iop_Max8Ux16:
3324       case Iop_Max8Sx16:
3325       case Iop_CmpGT8Sx16:
3326       case Iop_CmpGT8Ux16:
3327       case Iop_CmpEQ8x16:
3328       case Iop_Avg8Ux16:
3329       case Iop_Avg8Sx16:
3330       case Iop_QAdd8Ux16:
3331       case Iop_QAdd8Sx16:
3332       case Iop_QAddExtUSsatSS8x16:
3333       case Iop_QAddExtSUsatUU8x16:
3334       case Iop_QSal8x16:
3335       case Iop_QShl8x16:
3336       case Iop_Add8x16:
3337       case Iop_Mul8x16:
3338       case Iop_PolynomialMul8x16:
3339       case Iop_PolynomialMulAdd8x16:
3340          return binary8Ix16(mce, vatom1, vatom2);
3341 
3342       case Iop_QSub16Ux8:
3343       case Iop_QSub16Sx8:
3344       case Iop_Sub16x8:
3345       case Iop_Mul16x8:
3346       case Iop_MulHi16Sx8:
3347       case Iop_MulHi16Ux8:
3348       case Iop_Min16Sx8:
3349       case Iop_Min16Ux8:
3350       case Iop_Max16Sx8:
3351       case Iop_Max16Ux8:
3352       case Iop_CmpGT16Sx8:
3353       case Iop_CmpGT16Ux8:
3354       case Iop_CmpEQ16x8:
3355       case Iop_Avg16Ux8:
3356       case Iop_Avg16Sx8:
3357       case Iop_QAdd16Ux8:
3358       case Iop_QAdd16Sx8:
3359       case Iop_QAddExtUSsatSS16x8:
3360       case Iop_QAddExtSUsatUU16x8:
3361       case Iop_QSal16x8:
3362       case Iop_QShl16x8:
3363       case Iop_Add16x8:
3364       case Iop_QDMulHi16Sx8:
3365       case Iop_QRDMulHi16Sx8:
3366       case Iop_PolynomialMulAdd16x8:
3367          return binary16Ix8(mce, vatom1, vatom2);
3368 
3369       case Iop_Sub32x4:
3370       case Iop_CmpGT32Sx4:
3371       case Iop_CmpGT32Ux4:
3372       case Iop_CmpEQ32x4:
3373       case Iop_QAdd32Sx4:
3374       case Iop_QAdd32Ux4:
3375       case Iop_QSub32Sx4:
3376       case Iop_QSub32Ux4:
3377       case Iop_QAddExtUSsatSS32x4:
3378       case Iop_QAddExtSUsatUU32x4:
3379       case Iop_QSal32x4:
3380       case Iop_QShl32x4:
3381       case Iop_Avg32Ux4:
3382       case Iop_Avg32Sx4:
3383       case Iop_Add32x4:
3384       case Iop_Max32Ux4:
3385       case Iop_Max32Sx4:
3386       case Iop_Min32Ux4:
3387       case Iop_Min32Sx4:
3388       case Iop_Mul32x4:
3389       case Iop_QDMulHi32Sx4:
3390       case Iop_QRDMulHi32Sx4:
3391       case Iop_PolynomialMulAdd32x4:
3392          return binary32Ix4(mce, vatom1, vatom2);
3393 
3394       case Iop_Sub64x2:
3395       case Iop_Add64x2:
3396       case Iop_Max64Sx2:
3397       case Iop_Max64Ux2:
3398       case Iop_Min64Sx2:
3399       case Iop_Min64Ux2:
3400       case Iop_CmpEQ64x2:
3401       case Iop_CmpGT64Sx2:
3402       case Iop_CmpGT64Ux2:
3403       case Iop_QSal64x2:
3404       case Iop_QShl64x2:
3405       case Iop_QAdd64Ux2:
3406       case Iop_QAdd64Sx2:
3407       case Iop_QSub64Ux2:
3408       case Iop_QSub64Sx2:
3409       case Iop_QAddExtUSsatSS64x2:
3410       case Iop_QAddExtSUsatUU64x2:
3411       case Iop_PolynomialMulAdd64x2:
3412       case Iop_CipherV128:
3413       case Iop_CipherLV128:
3414       case Iop_NCipherV128:
3415       case Iop_NCipherLV128:
3416         return binary64Ix2(mce, vatom1, vatom2);
3417 
3418       case Iop_QNarrowBin64Sto32Sx4:
3419       case Iop_QNarrowBin64Uto32Ux4:
3420       case Iop_QNarrowBin32Sto16Sx8:
3421       case Iop_QNarrowBin32Uto16Ux8:
3422       case Iop_QNarrowBin32Sto16Ux8:
3423       case Iop_QNarrowBin16Sto8Sx16:
3424       case Iop_QNarrowBin16Uto8Ux16:
3425       case Iop_QNarrowBin16Sto8Ux16:
3426          return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3427 
3428       case Iop_Min64Fx2:
3429       case Iop_Max64Fx2:
3430       case Iop_CmpLT64Fx2:
3431       case Iop_CmpLE64Fx2:
3432       case Iop_CmpEQ64Fx2:
3433       case Iop_CmpUN64Fx2:
3434       case Iop_RecipStep64Fx2:
3435       case Iop_RSqrtStep64Fx2:
3436          return binary64Fx2(mce, vatom1, vatom2);
3437 
3438       case Iop_Sub64F0x2:
3439       case Iop_Mul64F0x2:
3440       case Iop_Min64F0x2:
3441       case Iop_Max64F0x2:
3442       case Iop_Div64F0x2:
3443       case Iop_CmpLT64F0x2:
3444       case Iop_CmpLE64F0x2:
3445       case Iop_CmpEQ64F0x2:
3446       case Iop_CmpUN64F0x2:
3447       case Iop_Add64F0x2:
3448          return binary64F0x2(mce, vatom1, vatom2);
3449 
3450       case Iop_Min32Fx4:
3451       case Iop_Max32Fx4:
3452       case Iop_CmpLT32Fx4:
3453       case Iop_CmpLE32Fx4:
3454       case Iop_CmpEQ32Fx4:
3455       case Iop_CmpUN32Fx4:
3456       case Iop_CmpGT32Fx4:
3457       case Iop_CmpGE32Fx4:
3458       case Iop_RecipStep32Fx4:
3459       case Iop_RSqrtStep32Fx4:
3460          return binary32Fx4(mce, vatom1, vatom2);
3461 
3462       case Iop_Sub32Fx2:
3463       case Iop_Mul32Fx2:
3464       case Iop_Min32Fx2:
3465       case Iop_Max32Fx2:
3466       case Iop_CmpEQ32Fx2:
3467       case Iop_CmpGT32Fx2:
3468       case Iop_CmpGE32Fx2:
3469       case Iop_Add32Fx2:
3470       case Iop_RecipStep32Fx2:
3471       case Iop_RSqrtStep32Fx2:
3472          return binary32Fx2(mce, vatom1, vatom2);
3473 
3474       case Iop_Sub32F0x4:
3475       case Iop_Mul32F0x4:
3476       case Iop_Min32F0x4:
3477       case Iop_Max32F0x4:
3478       case Iop_Div32F0x4:
3479       case Iop_CmpLT32F0x4:
3480       case Iop_CmpLE32F0x4:
3481       case Iop_CmpEQ32F0x4:
3482       case Iop_CmpUN32F0x4:
3483       case Iop_Add32F0x4:
3484          return binary32F0x4(mce, vatom1, vatom2);
3485 
3486       case Iop_QShlNsatSU8x16:
3487       case Iop_QShlNsatUU8x16:
3488       case Iop_QShlNsatSS8x16:
3489          complainIfUndefined(mce, atom2, NULL);
3490          return mkPCast8x16(mce, vatom1);
3491 
3492       case Iop_QShlNsatSU16x8:
3493       case Iop_QShlNsatUU16x8:
3494       case Iop_QShlNsatSS16x8:
3495          complainIfUndefined(mce, atom2, NULL);
3496          return mkPCast16x8(mce, vatom1);
3497 
3498       case Iop_QShlNsatSU32x4:
3499       case Iop_QShlNsatUU32x4:
3500       case Iop_QShlNsatSS32x4:
3501          complainIfUndefined(mce, atom2, NULL);
3502          return mkPCast32x4(mce, vatom1);
3503 
3504       case Iop_QShlNsatSU64x2:
3505       case Iop_QShlNsatUU64x2:
3506       case Iop_QShlNsatSS64x2:
3507          complainIfUndefined(mce, atom2, NULL);
3508          return mkPCast32x4(mce, vatom1);
3509 
3510       /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3511          To make this simpler, do the following:
3512          * complain if the shift amount (the I8) is undefined
3513          * pcast each lane at the wide width
3514          * truncate each lane to half width
3515          * pcast the resulting 64-bit value to a single bit and use
3516            that as the least significant bit of the upper half of the
3517            result. */
3518       case Iop_QandQShrNnarrow64Uto32Ux2:
3519       case Iop_QandQSarNnarrow64Sto32Sx2:
3520       case Iop_QandQSarNnarrow64Sto32Ux2:
3521       case Iop_QandQRShrNnarrow64Uto32Ux2:
3522       case Iop_QandQRSarNnarrow64Sto32Sx2:
3523       case Iop_QandQRSarNnarrow64Sto32Ux2:
3524       case Iop_QandQShrNnarrow32Uto16Ux4:
3525       case Iop_QandQSarNnarrow32Sto16Sx4:
3526       case Iop_QandQSarNnarrow32Sto16Ux4:
3527       case Iop_QandQRShrNnarrow32Uto16Ux4:
3528       case Iop_QandQRSarNnarrow32Sto16Sx4:
3529       case Iop_QandQRSarNnarrow32Sto16Ux4:
3530       case Iop_QandQShrNnarrow16Uto8Ux8:
3531       case Iop_QandQSarNnarrow16Sto8Sx8:
3532       case Iop_QandQSarNnarrow16Sto8Ux8:
3533       case Iop_QandQRShrNnarrow16Uto8Ux8:
3534       case Iop_QandQRSarNnarrow16Sto8Sx8:
3535       case Iop_QandQRSarNnarrow16Sto8Ux8:
3536       {
3537          IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3538          IROp opNarrow = Iop_INVALID;
3539          switch (op) {
3540             case Iop_QandQShrNnarrow64Uto32Ux2:
3541             case Iop_QandQSarNnarrow64Sto32Sx2:
3542             case Iop_QandQSarNnarrow64Sto32Ux2:
3543             case Iop_QandQRShrNnarrow64Uto32Ux2:
3544             case Iop_QandQRSarNnarrow64Sto32Sx2:
3545             case Iop_QandQRSarNnarrow64Sto32Ux2:
3546                fnPessim = mkPCast64x2;
3547                opNarrow = Iop_NarrowUn64to32x2;
3548                break;
3549             case Iop_QandQShrNnarrow32Uto16Ux4:
3550             case Iop_QandQSarNnarrow32Sto16Sx4:
3551             case Iop_QandQSarNnarrow32Sto16Ux4:
3552             case Iop_QandQRShrNnarrow32Uto16Ux4:
3553             case Iop_QandQRSarNnarrow32Sto16Sx4:
3554             case Iop_QandQRSarNnarrow32Sto16Ux4:
3555                fnPessim = mkPCast32x4;
3556                opNarrow = Iop_NarrowUn32to16x4;
3557                break;
3558             case Iop_QandQShrNnarrow16Uto8Ux8:
3559             case Iop_QandQSarNnarrow16Sto8Sx8:
3560             case Iop_QandQSarNnarrow16Sto8Ux8:
3561             case Iop_QandQRShrNnarrow16Uto8Ux8:
3562             case Iop_QandQRSarNnarrow16Sto8Sx8:
3563             case Iop_QandQRSarNnarrow16Sto8Ux8:
3564                fnPessim = mkPCast16x8;
3565                opNarrow = Iop_NarrowUn16to8x8;
3566                break;
3567             default:
3568                tl_assert(0);
3569          }
3570          complainIfUndefined(mce, atom2, NULL);
3571          // Pessimised shift result
3572          IRAtom* shV
3573             = fnPessim(mce, vatom1);
3574          // Narrowed, pessimised shift result
3575          IRAtom* shVnarrowed
3576             = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3577          // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3578          IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3579          // and assemble the result
3580          return assignNew('V', mce, Ity_V128,
3581                           binop(Iop_64HLtoV128, qV, shVnarrowed));
3582       }
3583 
3584       case Iop_Mull32Sx2:
3585       case Iop_Mull32Ux2:
3586       case Iop_QDMull32Sx2:
3587          return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3588                                     mkUifU64(mce, vatom1, vatom2));
3589 
3590       case Iop_Mull16Sx4:
3591       case Iop_Mull16Ux4:
3592       case Iop_QDMull16Sx4:
3593          return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3594                                     mkUifU64(mce, vatom1, vatom2));
3595 
3596       case Iop_Mull8Sx8:
3597       case Iop_Mull8Ux8:
3598       case Iop_PolynomialMull8x8:
3599          return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3600                                     mkUifU64(mce, vatom1, vatom2));
3601 
3602       case Iop_PwAdd32x4:
3603          return mkPCast32x4(mce,
3604                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3605                      mkPCast32x4(mce, vatom2))));
3606 
3607       case Iop_PwAdd16x8:
3608          return mkPCast16x8(mce,
3609                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3610                      mkPCast16x8(mce, vatom2))));
3611 
3612       case Iop_PwAdd8x16:
3613          return mkPCast8x16(mce,
3614                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3615                      mkPCast8x16(mce, vatom2))));
3616 
3617       /* V128-bit data-steering */
3618       case Iop_SetV128lo32:
3619       case Iop_SetV128lo64:
3620       case Iop_64HLtoV128:
3621       case Iop_InterleaveLO64x2:
3622       case Iop_InterleaveLO32x4:
3623       case Iop_InterleaveLO16x8:
3624       case Iop_InterleaveLO8x16:
3625       case Iop_InterleaveHI64x2:
3626       case Iop_InterleaveHI32x4:
3627       case Iop_InterleaveHI16x8:
3628       case Iop_InterleaveHI8x16:
3629       case Iop_CatOddLanes8x16:
3630       case Iop_CatOddLanes16x8:
3631       case Iop_CatOddLanes32x4:
3632       case Iop_CatEvenLanes8x16:
3633       case Iop_CatEvenLanes16x8:
3634       case Iop_CatEvenLanes32x4:
3635       case Iop_InterleaveOddLanes8x16:
3636       case Iop_InterleaveOddLanes16x8:
3637       case Iop_InterleaveOddLanes32x4:
3638       case Iop_InterleaveEvenLanes8x16:
3639       case Iop_InterleaveEvenLanes16x8:
3640       case Iop_InterleaveEvenLanes32x4:
3641          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3642 
3643       case Iop_GetElem8x16:
3644          complainIfUndefined(mce, atom2, NULL);
3645          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3646       case Iop_GetElem16x8:
3647          complainIfUndefined(mce, atom2, NULL);
3648          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3649       case Iop_GetElem32x4:
3650          complainIfUndefined(mce, atom2, NULL);
3651          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3652       case Iop_GetElem64x2:
3653          complainIfUndefined(mce, atom2, NULL);
3654          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3655 
3656      /* Perm8x16: rearrange values in left arg using steering values
3657         from right arg.  So rearrange the vbits in the same way but
3658         pessimise wrt steering values.  Perm32x4 ditto. */
3659       case Iop_Perm8x16:
3660          return mkUifUV128(
3661                    mce,
3662                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3663                    mkPCast8x16(mce, vatom2)
3664                 );
3665       case Iop_Perm32x4:
3666          return mkUifUV128(
3667                    mce,
3668                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3669                    mkPCast32x4(mce, vatom2)
3670                 );
3671 
3672      /* These two take the lower half of each 16-bit lane, sign/zero
3673         extend it to 32, and multiply together, producing a 32x4
3674         result (and implicitly ignoring half the operand bits).  So
3675         treat it as a bunch of independent 16x8 operations, but then
3676         do 32-bit shifts left-right to copy the lower half results
3677         (which are all 0s or all 1s due to PCasting in binary16Ix8)
3678         into the upper half of each result lane. */
3679       case Iop_MullEven16Ux8:
3680       case Iop_MullEven16Sx8: {
3681          IRAtom* at;
3682          at = binary16Ix8(mce,vatom1,vatom2);
3683          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3684          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3685 	 return at;
3686       }
3687 
3688       /* Same deal as Iop_MullEven16{S,U}x8 */
3689       case Iop_MullEven8Ux16:
3690       case Iop_MullEven8Sx16: {
3691          IRAtom* at;
3692          at = binary8Ix16(mce,vatom1,vatom2);
3693          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3694          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3695 	 return at;
3696       }
3697 
3698       /* Same deal as Iop_MullEven16{S,U}x8 */
3699       case Iop_MullEven32Ux4:
3700       case Iop_MullEven32Sx4: {
3701          IRAtom* at;
3702          at = binary32Ix4(mce,vatom1,vatom2);
3703          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3704          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3705          return at;
3706       }
3707 
3708       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3709          32x4 -> 16x8 laneage, discarding the upper half of each lane.
3710          Simply apply same op to the V bits, since this really no more
3711          than a data steering operation. */
3712       case Iop_NarrowBin32to16x8:
3713       case Iop_NarrowBin16to8x16:
3714       case Iop_NarrowBin64to32x4:
3715          return assignNew('V', mce, Ity_V128,
3716                                     binop(op, vatom1, vatom2));
3717 
3718       case Iop_ShrV128:
3719       case Iop_ShlV128:
3720          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3721             this is wrong now, scalar shifts are done properly lazily.
3722             Vector shifts should be fixed too. */
3723          complainIfUndefined(mce, atom2, NULL);
3724          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3725 
3726       /* SHA Iops */
3727       case Iop_SHA256:
3728       case Iop_SHA512:
3729          complainIfUndefined(mce, atom2, NULL);
3730          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3731 
3732       /* I128-bit data-steering */
3733       case Iop_64HLto128:
3734          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3735 
3736       /* V256-bit SIMD */
3737 
3738       case Iop_Max64Fx4:
3739       case Iop_Min64Fx4:
3740          return binary64Fx4(mce, vatom1, vatom2);
3741 
3742       case Iop_Max32Fx8:
3743       case Iop_Min32Fx8:
3744          return binary32Fx8(mce, vatom1, vatom2);
3745 
3746       /* V256-bit data-steering */
3747       case Iop_V128HLtoV256:
3748          return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3749 
3750       /* Scalar floating point */
3751 
3752       case Iop_F32toI64S:
3753       case Iop_F32toI64U:
3754          /* I32(rm) x F32 -> I64 */
3755          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3756 
3757       case Iop_I64StoF32:
3758          /* I32(rm) x I64 -> F32 */
3759          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3760 
3761       case Iop_RoundF64toInt:
3762       case Iop_RoundF64toF32:
3763       case Iop_F64toI64S:
3764       case Iop_F64toI64U:
3765       case Iop_I64StoF64:
3766       case Iop_I64UtoF64:
3767       case Iop_SinF64:
3768       case Iop_CosF64:
3769       case Iop_TanF64:
3770       case Iop_2xm1F64:
3771       case Iop_SqrtF64:
3772       case Iop_RecpExpF64:
3773          /* I32(rm) x I64/F64 -> I64/F64 */
3774          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3775 
3776       case Iop_ShlD64:
3777       case Iop_ShrD64:
3778       case Iop_RoundD64toInt:
3779          /* I32(rm) x D64 -> D64 */
3780          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3781 
3782       case Iop_ShlD128:
3783       case Iop_ShrD128:
3784       case Iop_RoundD128toInt:
3785          /* I32(rm) x D128 -> D128 */
3786          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3787 
3788       case Iop_RoundF128toInt:
3789          /* I32(rm) x F128 -> F128 */
3790          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3791 
3792       case Iop_D64toI64S:
3793       case Iop_D64toI64U:
3794       case Iop_I64StoD64:
3795       case Iop_I64UtoD64:
3796          /* I32(rm) x I64/D64 -> D64/I64 */
3797          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3798 
3799       case Iop_F32toD32:
3800       case Iop_F64toD32:
3801       case Iop_F128toD32:
3802       case Iop_D32toF32:
3803       case Iop_D64toF32:
3804       case Iop_D128toF32:
3805          /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3806          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3807 
3808       case Iop_F32toD64:
3809       case Iop_F64toD64:
3810       case Iop_F128toD64:
3811       case Iop_D32toF64:
3812       case Iop_D64toF64:
3813       case Iop_D128toF64:
3814          /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
3815          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3816 
3817       case Iop_F32toD128:
3818       case Iop_F64toD128:
3819       case Iop_F128toD128:
3820       case Iop_D32toF128:
3821       case Iop_D64toF128:
3822       case Iop_D128toF128:
3823          /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
3824          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3825 
3826       case Iop_RoundF32toInt:
3827       case Iop_SqrtF32:
3828       case Iop_RecpExpF32:
3829          /* I32(rm) x I32/F32 -> I32/F32 */
3830          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3831 
3832       case Iop_SqrtF128:
3833          /* I32(rm) x F128 -> F128 */
3834          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3835 
3836       case Iop_I32StoF32:
3837       case Iop_I32UtoF32:
3838       case Iop_F32toI32S:
3839       case Iop_F32toI32U:
3840          /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3841          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3842 
3843       case Iop_F64toF16:
3844       case Iop_F32toF16:
3845          /* First arg is I32 (rounding mode), second is F64/F32 (data). */
3846          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3847 
3848       case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3849       case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32  */
3850       case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3851       case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32  */
3852       case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32  */
3853          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3854 
3855       case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3856       case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64  */
3857       case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3858       case Iop_D128toD64:  /* IRRoundingMode(I64) x D128 -> D64 */
3859       case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64  */
3860       case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64  */
3861          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3862 
3863       case Iop_F64HLtoF128:
3864       case Iop_D64HLtoD128:
3865          return assignNew('V', mce, Ity_I128,
3866                           binop(Iop_64HLto128, vatom1, vatom2));
3867 
3868       case Iop_F64toI32U:
3869       case Iop_F64toI32S:
3870       case Iop_F64toF32:
3871       case Iop_I64UtoF32:
3872       case Iop_D64toI32U:
3873       case Iop_D64toI32S:
3874          /* First arg is I32 (rounding mode), second is F64/D64 (data). */
3875          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3876 
3877       case Iop_D64toD32:
3878          /* First arg is I32 (rounding mode), second is D64 (data). */
3879          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3880 
3881       case Iop_F64toI16S:
3882          /* First arg is I32 (rounding mode), second is F64 (data). */
3883          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3884 
3885       case Iop_InsertExpD64:
3886          /*  I64 x I64 -> D64 */
3887          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3888 
3889       case Iop_InsertExpD128:
3890          /*  I64 x I128 -> D128 */
3891          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3892 
3893       case Iop_CmpF32:
3894       case Iop_CmpF64:
3895       case Iop_CmpF128:
3896       case Iop_CmpD64:
3897       case Iop_CmpD128:
3898       case Iop_CmpExpD64:
3899       case Iop_CmpExpD128:
3900          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3901 
3902       /* non-FP after here */
3903 
3904       case Iop_DivModU64to32:
3905       case Iop_DivModS64to32:
3906          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3907 
3908       case Iop_DivModU128to64:
3909       case Iop_DivModS128to64:
3910          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3911 
3912       case Iop_8HLto16:
3913          return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
3914       case Iop_16HLto32:
3915          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3916       case Iop_32HLto64:
3917          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3918 
3919       case Iop_DivModS64to64:
3920       case Iop_MullS64:
3921       case Iop_MullU64: {
3922          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3923          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3924          return assignNew('V', mce, Ity_I128,
3925                           binop(Iop_64HLto128, vHi64, vLo64));
3926       }
3927 
3928       case Iop_MullS32:
3929       case Iop_MullU32: {
3930          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3931          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3932          return assignNew('V', mce, Ity_I64,
3933                           binop(Iop_32HLto64, vHi32, vLo32));
3934       }
3935 
3936       case Iop_MullS16:
3937       case Iop_MullU16: {
3938          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3939          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3940          return assignNew('V', mce, Ity_I32,
3941                           binop(Iop_16HLto32, vHi16, vLo16));
3942       }
3943 
3944       case Iop_MullS8:
3945       case Iop_MullU8: {
3946          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3947          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3948          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3949       }
3950 
3951       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3952       case Iop_DivS32:
3953       case Iop_DivU32:
3954       case Iop_DivU32E:
3955       case Iop_DivS32E:
3956       case Iop_QAdd32S: /* could probably do better */
3957       case Iop_QSub32S: /* could probably do better */
3958          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3959 
3960       case Iop_DivS64:
3961       case Iop_DivU64:
3962       case Iop_DivS64E:
3963       case Iop_DivU64E:
3964          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3965 
3966       case Iop_Add32:
3967          if (mce->bogusLiterals || mce->useLLVMworkarounds)
3968             return expensiveAddSub(mce,True,Ity_I32,
3969                                    vatom1,vatom2, atom1,atom2);
3970          else
3971             goto cheap_AddSub32;
3972       case Iop_Sub32:
3973          if (mce->bogusLiterals)
3974             return expensiveAddSub(mce,False,Ity_I32,
3975                                    vatom1,vatom2, atom1,atom2);
3976          else
3977             goto cheap_AddSub32;
3978 
3979       cheap_AddSub32:
3980       case Iop_Mul32:
3981          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3982 
3983       case Iop_CmpORD32S:
3984       case Iop_CmpORD32U:
3985       case Iop_CmpORD64S:
3986       case Iop_CmpORD64U:
3987          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3988 
3989       case Iop_Add64:
3990          if (mce->bogusLiterals || mce->useLLVMworkarounds)
3991             return expensiveAddSub(mce,True,Ity_I64,
3992                                    vatom1,vatom2, atom1,atom2);
3993          else
3994             goto cheap_AddSub64;
3995       case Iop_Sub64:
3996          if (mce->bogusLiterals)
3997             return expensiveAddSub(mce,False,Ity_I64,
3998                                    vatom1,vatom2, atom1,atom2);
3999          else
4000             goto cheap_AddSub64;
4001 
4002       cheap_AddSub64:
4003       case Iop_Mul64:
4004          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4005 
4006       case Iop_Mul16:
4007       case Iop_Add16:
4008       case Iop_Sub16:
4009          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4010 
4011       case Iop_Mul8:
4012       case Iop_Sub8:
4013       case Iop_Add8:
4014          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4015 
4016       case Iop_CmpEQ64:
4017       case Iop_CmpNE64:
4018          if (mce->bogusLiterals)
4019             goto expensive_cmp64;
4020          else
4021             goto cheap_cmp64;
4022 
4023       expensive_cmp64:
4024       case Iop_ExpCmpNE64:
4025          return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4026 
4027       cheap_cmp64:
4028       case Iop_CmpLE64S: case Iop_CmpLE64U:
4029       case Iop_CmpLT64U: case Iop_CmpLT64S:
4030          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4031 
4032       case Iop_CmpEQ32:
4033       case Iop_CmpNE32:
4034          if (mce->bogusLiterals)
4035             goto expensive_cmp32;
4036          else
4037             goto cheap_cmp32;
4038 
4039       expensive_cmp32:
4040       case Iop_ExpCmpNE32:
4041          return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4042 
4043       cheap_cmp32:
4044       case Iop_CmpLE32S: case Iop_CmpLE32U:
4045       case Iop_CmpLT32U: case Iop_CmpLT32S:
4046          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4047 
4048       case Iop_CmpEQ16: case Iop_CmpNE16:
4049          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4050 
4051       case Iop_ExpCmpNE16:
4052          return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4053 
4054       case Iop_CmpEQ8: case Iop_CmpNE8:
4055          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4056 
4057       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
4058       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4059       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4060       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4061          /* Just say these all produce a defined result, regardless
4062             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
4063          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4064 
4065       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4066          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4067 
4068       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4069          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4070 
4071       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4072          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4073 
4074       case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4075          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4076 
4077       case Iop_AndV256:
4078          uifu = mkUifUV256; difd = mkDifDV256;
4079          and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4080       case Iop_AndV128:
4081          uifu = mkUifUV128; difd = mkDifDV128;
4082          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4083       case Iop_And64:
4084          uifu = mkUifU64; difd = mkDifD64;
4085          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4086       case Iop_And32:
4087          uifu = mkUifU32; difd = mkDifD32;
4088          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4089       case Iop_And16:
4090          uifu = mkUifU16; difd = mkDifD16;
4091          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4092       case Iop_And8:
4093          uifu = mkUifU8; difd = mkDifD8;
4094          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4095 
4096       case Iop_OrV256:
4097          uifu = mkUifUV256; difd = mkDifDV256;
4098          and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4099       case Iop_OrV128:
4100          uifu = mkUifUV128; difd = mkDifDV128;
4101          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4102       case Iop_Or64:
4103          uifu = mkUifU64; difd = mkDifD64;
4104          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4105       case Iop_Or32:
4106          uifu = mkUifU32; difd = mkDifD32;
4107          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4108       case Iop_Or16:
4109          uifu = mkUifU16; difd = mkDifD16;
4110          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4111       case Iop_Or8:
4112          uifu = mkUifU8; difd = mkDifD8;
4113          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4114 
4115       do_And_Or:
4116          return
4117          assignNew(
4118             'V', mce,
4119             and_or_ty,
4120             difd(mce, uifu(mce, vatom1, vatom2),
4121                       difd(mce, improve(mce, atom1, vatom1),
4122                                 improve(mce, atom2, vatom2) ) ) );
4123 
4124       case Iop_Xor8:
4125          return mkUifU8(mce, vatom1, vatom2);
4126       case Iop_Xor16:
4127          return mkUifU16(mce, vatom1, vatom2);
4128       case Iop_Xor32:
4129          return mkUifU32(mce, vatom1, vatom2);
4130       case Iop_Xor64:
4131          return mkUifU64(mce, vatom1, vatom2);
4132       case Iop_XorV128:
4133          return mkUifUV128(mce, vatom1, vatom2);
4134       case Iop_XorV256:
4135          return mkUifUV256(mce, vatom1, vatom2);
4136 
4137       /* V256-bit SIMD */
4138 
4139       case Iop_ShrN16x16:
4140       case Iop_ShrN32x8:
4141       case Iop_ShrN64x4:
4142       case Iop_SarN16x16:
4143       case Iop_SarN32x8:
4144       case Iop_ShlN16x16:
4145       case Iop_ShlN32x8:
4146       case Iop_ShlN64x4:
4147          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
4148             this is wrong now, scalar shifts are done properly lazily.
4149             Vector shifts should be fixed too. */
4150          complainIfUndefined(mce, atom2, NULL);
4151          return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4152 
4153       case Iop_QSub8Ux32:
4154       case Iop_QSub8Sx32:
4155       case Iop_Sub8x32:
4156       case Iop_Min8Ux32:
4157       case Iop_Min8Sx32:
4158       case Iop_Max8Ux32:
4159       case Iop_Max8Sx32:
4160       case Iop_CmpGT8Sx32:
4161       case Iop_CmpEQ8x32:
4162       case Iop_Avg8Ux32:
4163       case Iop_QAdd8Ux32:
4164       case Iop_QAdd8Sx32:
4165       case Iop_Add8x32:
4166          return binary8Ix32(mce, vatom1, vatom2);
4167 
4168       case Iop_QSub16Ux16:
4169       case Iop_QSub16Sx16:
4170       case Iop_Sub16x16:
4171       case Iop_Mul16x16:
4172       case Iop_MulHi16Sx16:
4173       case Iop_MulHi16Ux16:
4174       case Iop_Min16Sx16:
4175       case Iop_Min16Ux16:
4176       case Iop_Max16Sx16:
4177       case Iop_Max16Ux16:
4178       case Iop_CmpGT16Sx16:
4179       case Iop_CmpEQ16x16:
4180       case Iop_Avg16Ux16:
4181       case Iop_QAdd16Ux16:
4182       case Iop_QAdd16Sx16:
4183       case Iop_Add16x16:
4184          return binary16Ix16(mce, vatom1, vatom2);
4185 
4186       case Iop_Sub32x8:
4187       case Iop_CmpGT32Sx8:
4188       case Iop_CmpEQ32x8:
4189       case Iop_Add32x8:
4190       case Iop_Max32Ux8:
4191       case Iop_Max32Sx8:
4192       case Iop_Min32Ux8:
4193       case Iop_Min32Sx8:
4194       case Iop_Mul32x8:
4195          return binary32Ix8(mce, vatom1, vatom2);
4196 
4197       case Iop_Sub64x4:
4198       case Iop_Add64x4:
4199       case Iop_CmpEQ64x4:
4200       case Iop_CmpGT64Sx4:
4201          return binary64Ix4(mce, vatom1, vatom2);
4202 
4203      /* Perm32x8: rearrange values in left arg using steering values
4204         from right arg.  So rearrange the vbits in the same way but
4205         pessimise wrt steering values. */
4206       case Iop_Perm32x8:
4207          return mkUifUV256(
4208                    mce,
4209                    assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4210                    mkPCast32x8(mce, vatom2)
4211                 );
4212 
4213       /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4214          Handle the shifted results in the same way that other
4215          binary Q ops are handled, eg QSub: UifU the two args,
4216          then pessimise -- which is binaryNIxM.  But for the upper
4217          V128, we require to generate just 1 bit which is the
4218          pessimised shift result, with 127 defined zeroes above it.
4219 
4220          Note that this overly pessimistic in that in fact only the
4221          bottom 8 bits of each lane of the second arg determine the shift
4222          amount.  Really we ought to ignore any undefinedness in the
4223          rest of the lanes of the second arg. */
4224       case Iop_QandSQsh64x2:  case Iop_QandUQsh64x2:
4225       case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4226       case Iop_QandSQsh32x4:  case Iop_QandUQsh32x4:
4227       case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4228       case Iop_QandSQsh16x8:  case Iop_QandUQsh16x8:
4229       case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4230       case Iop_QandSQsh8x16:  case Iop_QandUQsh8x16:
4231       case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4232       {
4233          // The function to generate the pessimised shift result
4234          IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4235          switch (op) {
4236             case Iop_QandSQsh64x2:
4237             case Iop_QandUQsh64x2:
4238             case Iop_QandSQRsh64x2:
4239             case Iop_QandUQRsh64x2:
4240                binaryNIxM = binary64Ix2;
4241                break;
4242             case Iop_QandSQsh32x4:
4243             case Iop_QandUQsh32x4:
4244             case Iop_QandSQRsh32x4:
4245             case Iop_QandUQRsh32x4:
4246                binaryNIxM = binary32Ix4;
4247                break;
4248             case Iop_QandSQsh16x8:
4249             case Iop_QandUQsh16x8:
4250             case Iop_QandSQRsh16x8:
4251             case Iop_QandUQRsh16x8:
4252                binaryNIxM = binary16Ix8;
4253                break;
4254             case Iop_QandSQsh8x16:
4255             case Iop_QandUQsh8x16:
4256             case Iop_QandSQRsh8x16:
4257             case Iop_QandUQRsh8x16:
4258                binaryNIxM = binary8Ix16;
4259                break;
4260             default:
4261                tl_assert(0);
4262          }
4263          tl_assert(binaryNIxM);
4264          // Pessimised shift result, shV[127:0]
4265          IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4266          // Generates: Def--(127)--Def PCast-to-I1(shV)
4267          IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4268          // and assemble the result
4269          return assignNew('V', mce, Ity_V256,
4270                           binop(Iop_V128HLtoV256, qV, shV));
4271       }
4272 
4273       default:
4274          ppIROp(op);
4275          VG_(tool_panic)("memcheck:expr2vbits_Binop");
4276    }
4277 }
4278 
4279 
4280 static
expr2vbits_Unop(MCEnv * mce,IROp op,IRAtom * atom)4281 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4282 {
4283    /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4284       selection of shadow operation implicitly duplicates the logic in
4285       do_shadow_LoadG and should be kept in sync (in the very unlikely
4286       event that the interpretation of such widening ops changes in
4287       future).  See comment in do_shadow_LoadG. */
4288    IRAtom* vatom = expr2vbits( mce, atom );
4289    tl_assert(isOriginalAtom(mce,atom));
4290    switch (op) {
4291 
4292       case Iop_Abs64Fx2:
4293       case Iop_Neg64Fx2:
4294       case Iop_RSqrtEst64Fx2:
4295       case Iop_RecipEst64Fx2:
4296          return unary64Fx2(mce, vatom);
4297 
4298       case Iop_Sqrt64F0x2:
4299          return unary64F0x2(mce, vatom);
4300 
4301       case Iop_Sqrt32Fx8:
4302       case Iop_RSqrtEst32Fx8:
4303       case Iop_RecipEst32Fx8:
4304          return unary32Fx8(mce, vatom);
4305 
4306       case Iop_Sqrt64Fx4:
4307          return unary64Fx4(mce, vatom);
4308 
4309       case Iop_RecipEst32Fx4:
4310       case Iop_I32UtoFx4:
4311       case Iop_I32StoFx4:
4312       case Iop_QFtoI32Ux4_RZ:
4313       case Iop_QFtoI32Sx4_RZ:
4314       case Iop_RoundF32x4_RM:
4315       case Iop_RoundF32x4_RP:
4316       case Iop_RoundF32x4_RN:
4317       case Iop_RoundF32x4_RZ:
4318       case Iop_RecipEst32Ux4:
4319       case Iop_Abs32Fx4:
4320       case Iop_Neg32Fx4:
4321       case Iop_RSqrtEst32Fx4:
4322          return unary32Fx4(mce, vatom);
4323 
4324       case Iop_I32UtoFx2:
4325       case Iop_I32StoFx2:
4326       case Iop_RecipEst32Fx2:
4327       case Iop_RecipEst32Ux2:
4328       case Iop_Abs32Fx2:
4329       case Iop_Neg32Fx2:
4330       case Iop_RSqrtEst32Fx2:
4331          return unary32Fx2(mce, vatom);
4332 
4333       case Iop_Sqrt32F0x4:
4334       case Iop_RSqrtEst32F0x4:
4335       case Iop_RecipEst32F0x4:
4336          return unary32F0x4(mce, vatom);
4337 
4338       case Iop_32UtoV128:
4339       case Iop_64UtoV128:
4340       case Iop_Dup8x16:
4341       case Iop_Dup16x8:
4342       case Iop_Dup32x4:
4343       case Iop_Reverse1sIn8_x16:
4344       case Iop_Reverse8sIn16_x8:
4345       case Iop_Reverse8sIn32_x4:
4346       case Iop_Reverse16sIn32_x4:
4347       case Iop_Reverse8sIn64_x2:
4348       case Iop_Reverse16sIn64_x2:
4349       case Iop_Reverse32sIn64_x2:
4350       case Iop_V256toV128_1: case Iop_V256toV128_0:
4351       case Iop_ZeroHI64ofV128:
4352       case Iop_ZeroHI96ofV128:
4353       case Iop_ZeroHI112ofV128:
4354       case Iop_ZeroHI120ofV128:
4355          return assignNew('V', mce, Ity_V128, unop(op, vatom));
4356 
4357       case Iop_F128HItoF64:  /* F128 -> high half of F128 */
4358       case Iop_D128HItoD64:  /* D128 -> high half of D128 */
4359          return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4360       case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
4361       case Iop_D128LOtoD64:  /* D128 -> low  half of D128 */
4362          return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4363 
4364       case Iop_NegF128:
4365       case Iop_AbsF128:
4366          return mkPCastTo(mce, Ity_I128, vatom);
4367 
4368       case Iop_I32StoF128: /* signed I32 -> F128 */
4369       case Iop_I64StoF128: /* signed I64 -> F128 */
4370       case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4371       case Iop_I64UtoF128: /* unsigned I64 -> F128 */
4372       case Iop_F32toF128:  /* F32 -> F128 */
4373       case Iop_F64toF128:  /* F64 -> F128 */
4374       case Iop_I32StoD128: /* signed I64 -> D128 */
4375       case Iop_I64StoD128: /* signed I64 -> D128 */
4376       case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4377       case Iop_I64UtoD128: /* unsigned I64 -> D128 */
4378          return mkPCastTo(mce, Ity_I128, vatom);
4379 
4380       case Iop_F16toF64:
4381       case Iop_F32toF64:
4382       case Iop_I32StoF64:
4383       case Iop_I32UtoF64:
4384       case Iop_NegF64:
4385       case Iop_AbsF64:
4386       case Iop_RSqrtEst5GoodF64:
4387       case Iop_RoundF64toF64_NEAREST:
4388       case Iop_RoundF64toF64_NegINF:
4389       case Iop_RoundF64toF64_PosINF:
4390       case Iop_RoundF64toF64_ZERO:
4391       case Iop_Clz64:
4392       case Iop_D32toD64:
4393       case Iop_I32StoD64:
4394       case Iop_I32UtoD64:
4395       case Iop_ExtractExpD64:    /* D64  -> I64 */
4396       case Iop_ExtractExpD128:   /* D128 -> I64 */
4397       case Iop_ExtractSigD64:    /* D64  -> I64 */
4398       case Iop_ExtractSigD128:   /* D128 -> I64 */
4399       case Iop_DPBtoBCD:
4400       case Iop_BCDtoDPB:
4401          return mkPCastTo(mce, Ity_I64, vatom);
4402 
4403       case Iop_D64toD128:
4404          return mkPCastTo(mce, Ity_I128, vatom);
4405 
4406       case Iop_Clz32:
4407       case Iop_TruncF64asF32:
4408       case Iop_NegF32:
4409       case Iop_AbsF32:
4410       case Iop_F16toF32:
4411          return mkPCastTo(mce, Ity_I32, vatom);
4412 
4413       case Iop_Ctz32:
4414       case Iop_Ctz64:
4415          return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4416 
4417       case Iop_1Uto64:
4418       case Iop_1Sto64:
4419       case Iop_8Uto64:
4420       case Iop_8Sto64:
4421       case Iop_16Uto64:
4422       case Iop_16Sto64:
4423       case Iop_32Sto64:
4424       case Iop_32Uto64:
4425       case Iop_V128to64:
4426       case Iop_V128HIto64:
4427       case Iop_128HIto64:
4428       case Iop_128to64:
4429       case Iop_Dup8x8:
4430       case Iop_Dup16x4:
4431       case Iop_Dup32x2:
4432       case Iop_Reverse8sIn16_x4:
4433       case Iop_Reverse8sIn32_x2:
4434       case Iop_Reverse16sIn32_x2:
4435       case Iop_Reverse8sIn64_x1:
4436       case Iop_Reverse16sIn64_x1:
4437       case Iop_Reverse32sIn64_x1:
4438       case Iop_V256to64_0: case Iop_V256to64_1:
4439       case Iop_V256to64_2: case Iop_V256to64_3:
4440          return assignNew('V', mce, Ity_I64, unop(op, vatom));
4441 
4442       case Iop_64to32:
4443       case Iop_64HIto32:
4444       case Iop_1Uto32:
4445       case Iop_1Sto32:
4446       case Iop_8Uto32:
4447       case Iop_16Uto32:
4448       case Iop_16Sto32:
4449       case Iop_8Sto32:
4450       case Iop_V128to32:
4451          return assignNew('V', mce, Ity_I32, unop(op, vatom));
4452 
4453       case Iop_8Sto16:
4454       case Iop_8Uto16:
4455       case Iop_32to16:
4456       case Iop_32HIto16:
4457       case Iop_64to16:
4458       case Iop_GetMSBs8x16:
4459          return assignNew('V', mce, Ity_I16, unop(op, vatom));
4460 
4461       case Iop_1Uto8:
4462       case Iop_1Sto8:
4463       case Iop_16to8:
4464       case Iop_16HIto8:
4465       case Iop_32to8:
4466       case Iop_64to8:
4467       case Iop_GetMSBs8x8:
4468          return assignNew('V', mce, Ity_I8, unop(op, vatom));
4469 
4470       case Iop_32to1:
4471          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
4472 
4473       case Iop_64to1:
4474          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
4475 
4476       case Iop_ReinterpF64asI64:
4477       case Iop_ReinterpI64asF64:
4478       case Iop_ReinterpI32asF32:
4479       case Iop_ReinterpF32asI32:
4480       case Iop_ReinterpI64asD64:
4481       case Iop_ReinterpD64asI64:
4482       case Iop_NotV256:
4483       case Iop_NotV128:
4484       case Iop_Not64:
4485       case Iop_Not32:
4486       case Iop_Not16:
4487       case Iop_Not8:
4488       case Iop_Not1:
4489          return vatom;
4490 
4491       case Iop_CmpNEZ8x8:
4492       case Iop_Cnt8x8:
4493       case Iop_Clz8x8:
4494       case Iop_Cls8x8:
4495       case Iop_Abs8x8:
4496          return mkPCast8x8(mce, vatom);
4497 
4498       case Iop_CmpNEZ8x16:
4499       case Iop_Cnt8x16:
4500       case Iop_Clz8x16:
4501       case Iop_Cls8x16:
4502       case Iop_Abs8x16:
4503          return mkPCast8x16(mce, vatom);
4504 
4505       case Iop_CmpNEZ16x4:
4506       case Iop_Clz16x4:
4507       case Iop_Cls16x4:
4508       case Iop_Abs16x4:
4509          return mkPCast16x4(mce, vatom);
4510 
4511       case Iop_CmpNEZ16x8:
4512       case Iop_Clz16x8:
4513       case Iop_Cls16x8:
4514       case Iop_Abs16x8:
4515          return mkPCast16x8(mce, vatom);
4516 
4517       case Iop_CmpNEZ32x2:
4518       case Iop_Clz32x2:
4519       case Iop_Cls32x2:
4520       case Iop_FtoI32Ux2_RZ:
4521       case Iop_FtoI32Sx2_RZ:
4522       case Iop_Abs32x2:
4523          return mkPCast32x2(mce, vatom);
4524 
4525       case Iop_CmpNEZ32x4:
4526       case Iop_Clz32x4:
4527       case Iop_Cls32x4:
4528       case Iop_FtoI32Ux4_RZ:
4529       case Iop_FtoI32Sx4_RZ:
4530       case Iop_Abs32x4:
4531       case Iop_RSqrtEst32Ux4:
4532          return mkPCast32x4(mce, vatom);
4533 
4534       case Iop_CmpwNEZ32:
4535          return mkPCastTo(mce, Ity_I32, vatom);
4536 
4537       case Iop_CmpwNEZ64:
4538          return mkPCastTo(mce, Ity_I64, vatom);
4539 
4540       case Iop_CmpNEZ64x2:
4541       case Iop_CipherSV128:
4542       case Iop_Clz64x2:
4543       case Iop_Abs64x2:
4544          return mkPCast64x2(mce, vatom);
4545 
4546       case Iop_PwBitMtxXpose64x2:
4547          return assignNew('V', mce, Ity_V128, unop(op, vatom));
4548 
4549       case Iop_NarrowUn16to8x8:
4550       case Iop_NarrowUn32to16x4:
4551       case Iop_NarrowUn64to32x2:
4552       case Iop_QNarrowUn16Sto8Sx8:
4553       case Iop_QNarrowUn16Sto8Ux8:
4554       case Iop_QNarrowUn16Uto8Ux8:
4555       case Iop_QNarrowUn32Sto16Sx4:
4556       case Iop_QNarrowUn32Sto16Ux4:
4557       case Iop_QNarrowUn32Uto16Ux4:
4558       case Iop_QNarrowUn64Sto32Sx2:
4559       case Iop_QNarrowUn64Sto32Ux2:
4560       case Iop_QNarrowUn64Uto32Ux2:
4561          return vectorNarrowUnV128(mce, op, vatom);
4562 
4563       case Iop_Widen8Sto16x8:
4564       case Iop_Widen8Uto16x8:
4565       case Iop_Widen16Sto32x4:
4566       case Iop_Widen16Uto32x4:
4567       case Iop_Widen32Sto64x2:
4568       case Iop_Widen32Uto64x2:
4569          return vectorWidenI64(mce, op, vatom);
4570 
4571       case Iop_PwAddL32Ux2:
4572       case Iop_PwAddL32Sx2:
4573          return mkPCastTo(mce, Ity_I64,
4574                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4575 
4576       case Iop_PwAddL16Ux4:
4577       case Iop_PwAddL16Sx4:
4578          return mkPCast32x2(mce,
4579                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4580 
4581       case Iop_PwAddL8Ux8:
4582       case Iop_PwAddL8Sx8:
4583          return mkPCast16x4(mce,
4584                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4585 
4586       case Iop_PwAddL32Ux4:
4587       case Iop_PwAddL32Sx4:
4588          return mkPCast64x2(mce,
4589                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4590 
4591       case Iop_PwAddL16Ux8:
4592       case Iop_PwAddL16Sx8:
4593          return mkPCast32x4(mce,
4594                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4595 
4596       case Iop_PwAddL8Ux16:
4597       case Iop_PwAddL8Sx16:
4598          return mkPCast16x8(mce,
4599                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4600 
4601       case Iop_I64UtoF32:
4602       default:
4603          ppIROp(op);
4604          VG_(tool_panic)("memcheck:expr2vbits_Unop");
4605    }
4606 }
4607 
4608 
4609 /* Worker function -- do not call directly.  See comments on
4610    expr2vbits_Load for the meaning of |guard|.
4611 
4612    Generates IR to (1) perform a definedness test of |addr|, (2)
4613    perform a validity test of |addr|, and (3) return the Vbits for the
4614    location indicated by |addr|.  All of this only happens when
4615    |guard| is NULL or |guard| evaluates to True at run time.
4616 
4617    If |guard| evaluates to False at run time, the returned value is
4618    the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4619    performed.
4620 
4621    The definedness of |guard| itself is not checked.  That is assumed
4622    to have been done before this point, by the caller. */
4623 static
expr2vbits_Load_WRK(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4624 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4625                               IREndness end, IRType ty,
4626                               IRAtom* addr, UInt bias, IRAtom* guard )
4627 {
4628    tl_assert(isOriginalAtom(mce,addr));
4629    tl_assert(end == Iend_LE || end == Iend_BE);
4630 
4631    /* First, emit a definedness test for the address.  This also sets
4632       the address (shadow) to 'defined' following the test. */
4633    complainIfUndefined( mce, addr, guard );
4634 
4635    /* Now cook up a call to the relevant helper function, to read the
4636       data V bits from shadow memory. */
4637    ty = shadowTypeV(ty);
4638 
4639    void*        helper           = NULL;
4640    const HChar* hname            = NULL;
4641    Bool         ret_via_outparam = False;
4642 
4643    if (end == Iend_LE) {
4644       switch (ty) {
4645          case Ity_V256: helper = &MC_(helperc_LOADV256le);
4646                         hname = "MC_(helperc_LOADV256le)";
4647                         ret_via_outparam = True;
4648                         break;
4649          case Ity_V128: helper = &MC_(helperc_LOADV128le);
4650                         hname = "MC_(helperc_LOADV128le)";
4651                         ret_via_outparam = True;
4652                         break;
4653          case Ity_I64:  helper = &MC_(helperc_LOADV64le);
4654                         hname = "MC_(helperc_LOADV64le)";
4655                         break;
4656          case Ity_I32:  helper = &MC_(helperc_LOADV32le);
4657                         hname = "MC_(helperc_LOADV32le)";
4658                         break;
4659          case Ity_I16:  helper = &MC_(helperc_LOADV16le);
4660                         hname = "MC_(helperc_LOADV16le)";
4661                         break;
4662          case Ity_I8:   helper = &MC_(helperc_LOADV8);
4663                         hname = "MC_(helperc_LOADV8)";
4664                         break;
4665          default:       ppIRType(ty);
4666                         VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
4667       }
4668    } else {
4669       switch (ty) {
4670          case Ity_V256: helper = &MC_(helperc_LOADV256be);
4671                         hname = "MC_(helperc_LOADV256be)";
4672                         ret_via_outparam = True;
4673                         break;
4674          case Ity_V128: helper = &MC_(helperc_LOADV128be);
4675                         hname = "MC_(helperc_LOADV128be)";
4676                         ret_via_outparam = True;
4677                         break;
4678          case Ity_I64:  helper = &MC_(helperc_LOADV64be);
4679                         hname = "MC_(helperc_LOADV64be)";
4680                         break;
4681          case Ity_I32:  helper = &MC_(helperc_LOADV32be);
4682                         hname = "MC_(helperc_LOADV32be)";
4683                         break;
4684          case Ity_I16:  helper = &MC_(helperc_LOADV16be);
4685                         hname = "MC_(helperc_LOADV16be)";
4686                         break;
4687          case Ity_I8:   helper = &MC_(helperc_LOADV8);
4688                         hname = "MC_(helperc_LOADV8)";
4689                         break;
4690          default:       ppIRType(ty);
4691                         VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
4692       }
4693    }
4694 
4695    tl_assert(helper);
4696    tl_assert(hname);
4697 
4698    /* Generate the actual address into addrAct. */
4699    IRAtom* addrAct;
4700    if (bias == 0) {
4701       addrAct = addr;
4702    } else {
4703       IROp    mkAdd;
4704       IRAtom* eBias;
4705       IRType  tyAddr  = mce->hWordTy;
4706       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4707       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4708       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4709       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
4710    }
4711 
4712    /* We need to have a place to park the V bits we're just about to
4713       read. */
4714    IRTemp datavbits = newTemp(mce, ty, VSh);
4715 
4716    /* Here's the call. */
4717    IRDirty* di;
4718    if (ret_via_outparam) {
4719       di = unsafeIRDirty_1_N( datavbits,
4720                               2/*regparms*/,
4721                               hname, VG_(fnptr_to_fnentry)( helper ),
4722                               mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
4723    } else {
4724       di = unsafeIRDirty_1_N( datavbits,
4725                               1/*regparms*/,
4726                               hname, VG_(fnptr_to_fnentry)( helper ),
4727                               mkIRExprVec_1( addrAct ) );
4728    }
4729 
4730    setHelperAnns( mce, di );
4731    if (guard) {
4732       di->guard = guard;
4733       /* Ideally the didn't-happen return value here would be all-ones
4734          (all-undefined), so it'd be obvious if it got used
4735          inadvertently.  We can get by with the IR-mandated default
4736          value (0b01 repeating, 0x55 etc) as that'll still look pretty
4737          undefined if it ever leaks out. */
4738    }
4739    stmt( 'V', mce, IRStmt_Dirty(di) );
4740 
4741    return mkexpr(datavbits);
4742 }
4743 
4744 
4745 /* Generate IR to do a shadow load.  The helper is expected to check
4746    the validity of the address and return the V bits for that address.
4747    This can optionally be controlled by a guard, which is assumed to
4748    be True if NULL.  In the case where the guard is False at runtime,
4749    the helper will return the didn't-do-the-call value of 0x55..55.
4750    Since that means "completely undefined result", the caller of
4751    this function will need to fix up the result somehow in that
4752    case.
4753 
4754    Caller of this function is also expected to have checked the
4755    definedness of |guard| before this point.
4756 */
4757 static
expr2vbits_Load(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4758 IRAtom* expr2vbits_Load ( MCEnv* mce,
4759                           IREndness end, IRType ty,
4760                           IRAtom* addr, UInt bias,
4761                           IRAtom* guard )
4762 {
4763    tl_assert(end == Iend_LE || end == Iend_BE);
4764    switch (shadowTypeV(ty)) {
4765       case Ity_I8:
4766       case Ity_I16:
4767       case Ity_I32:
4768       case Ity_I64:
4769       case Ity_V128:
4770       case Ity_V256:
4771          return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
4772       default:
4773          VG_(tool_panic)("expr2vbits_Load");
4774    }
4775 }
4776 
4777 
4778 /* The most general handler for guarded loads.  Assumes the
4779    definedness of GUARD has already been checked by the caller.  A
4780    GUARD of NULL is assumed to mean "always True".  Generates code to
4781    check the definedness and validity of ADDR.
4782 
4783    Generate IR to do a shadow load from ADDR and return the V bits.
4784    The loaded type is TY.  The loaded data is then (shadow) widened by
4785    using VWIDEN, which can be Iop_INVALID to denote a no-op.  If GUARD
4786    evaluates to False at run time then the returned Vbits are simply
4787    VALT instead.  Note therefore that the argument type of VWIDEN must
4788    be TY and the result type of VWIDEN must equal the type of VALT.
4789 */
4790 static
expr2vbits_Load_guarded_General(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IROp vwiden,IRAtom * valt)4791 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4792                                           IREndness end, IRType ty,
4793                                           IRAtom* addr, UInt bias,
4794                                           IRAtom* guard,
4795                                           IROp vwiden, IRAtom* valt )
4796 {
4797    /* Sanity check the conversion operation, and also set TYWIDE. */
4798    IRType tyWide = Ity_INVALID;
4799    switch (vwiden) {
4800       case Iop_INVALID:
4801          tyWide = ty;
4802          break;
4803       case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4804          tyWide = Ity_I32;
4805          break;
4806       default:
4807          VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
4808    }
4809 
4810    /* If the guard evaluates to True, this will hold the loaded V bits
4811       at TY.  If the guard evaluates to False, this will be all
4812       ones, meaning "all undefined", in which case we will have to
4813       replace it using an ITE below. */
4814    IRAtom* iftrue1
4815       = assignNew('V', mce, ty,
4816                   expr2vbits_Load(mce, end, ty, addr, bias, guard));
4817    /* Now (shadow-) widen the loaded V bits to the desired width.  In
4818       the guard-is-False case, the allowable widening operators will
4819       in the worst case (unsigned widening) at least leave the
4820       pre-widened part as being marked all-undefined, and in the best
4821       case (signed widening) mark the whole widened result as
4822       undefined.  Anyway, it doesn't matter really, since in this case
4823       we will replace said value with the default value |valt| using an
4824       ITE. */
4825    IRAtom* iftrue2
4826       = vwiden == Iop_INVALID
4827            ? iftrue1
4828            : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4829    /* These are the V bits we will return if the load doesn't take
4830       place. */
4831    IRAtom* iffalse
4832       = valt;
4833    /* Prepare the cond for the ITE.  Convert a NULL cond into
4834       something that iropt knows how to fold out later. */
4835    IRAtom* cond
4836       = guard == NULL  ? mkU1(1)  : guard;
4837    /* And assemble the final result. */
4838    return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
4839 }
4840 
4841 
4842 /* A simpler handler for guarded loads, in which there is no
4843    conversion operation, and the default V bit return (when the guard
4844    evaluates to False at runtime) is "all defined".  If there is no
4845    guard expression or the guard is always TRUE this function behaves
4846    like expr2vbits_Load.  It is assumed that definedness of GUARD has
4847    already been checked at the call site. */
4848 static
expr2vbits_Load_guarded_Simple(MCEnv * mce,IREndness end,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard)4849 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4850                                          IREndness end, IRType ty,
4851                                          IRAtom* addr, UInt bias,
4852                                          IRAtom *guard )
4853 {
4854    return expr2vbits_Load_guarded_General(
4855              mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4856           );
4857 }
4858 
4859 
4860 static
expr2vbits_ITE(MCEnv * mce,IRAtom * cond,IRAtom * iftrue,IRAtom * iffalse)4861 IRAtom* expr2vbits_ITE ( MCEnv* mce,
4862                          IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
4863 {
4864    IRAtom *vbitsC, *vbits0, *vbits1;
4865    IRType ty;
4866    /* Given ITE(cond, iftrue,  iffalse),  generate
4867             ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
4868       That is, steer the V bits like the originals, but trash the
4869       result if the steering value is undefined.  This gives
4870       lazy propagation. */
4871    tl_assert(isOriginalAtom(mce, cond));
4872    tl_assert(isOriginalAtom(mce, iftrue));
4873    tl_assert(isOriginalAtom(mce, iffalse));
4874 
4875    vbitsC = expr2vbits(mce, cond);
4876    vbits1 = expr2vbits(mce, iftrue);
4877    vbits0 = expr2vbits(mce, iffalse);
4878    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4879 
4880    return
4881       mkUifU(mce, ty, assignNew('V', mce, ty,
4882                                      IRExpr_ITE(cond, vbits1, vbits0)),
4883                       mkPCastTo(mce, ty, vbitsC) );
4884 }
4885 
4886 /* --------- This is the main expression-handling function. --------- */
4887 
4888 static
expr2vbits(MCEnv * mce,IRExpr * e)4889 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4890 {
4891    switch (e->tag) {
4892 
4893       case Iex_Get:
4894          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4895 
4896       case Iex_GetI:
4897          return shadow_GETI( mce, e->Iex.GetI.descr,
4898                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
4899 
4900       case Iex_RdTmp:
4901          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4902 
4903       case Iex_Const:
4904          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4905 
4906       case Iex_Qop:
4907          return expr2vbits_Qop(
4908                    mce,
4909                    e->Iex.Qop.details->op,
4910                    e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4911                    e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4912                 );
4913 
4914       case Iex_Triop:
4915          return expr2vbits_Triop(
4916                    mce,
4917                    e->Iex.Triop.details->op,
4918                    e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4919                    e->Iex.Triop.details->arg3
4920                 );
4921 
4922       case Iex_Binop:
4923          return expr2vbits_Binop(
4924                    mce,
4925                    e->Iex.Binop.op,
4926                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
4927                 );
4928 
4929       case Iex_Unop:
4930          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4931 
4932       case Iex_Load:
4933          return expr2vbits_Load( mce, e->Iex.Load.end,
4934                                       e->Iex.Load.ty,
4935                                       e->Iex.Load.addr, 0/*addr bias*/,
4936                                       NULL/* guard == "always True"*/ );
4937 
4938       case Iex_CCall:
4939          return mkLazyN( mce, e->Iex.CCall.args,
4940                               e->Iex.CCall.retty,
4941                               e->Iex.CCall.cee );
4942 
4943       case Iex_ITE:
4944          return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
4945                                      e->Iex.ITE.iffalse);
4946 
4947       default:
4948          VG_(printf)("\n");
4949          ppIRExpr(e);
4950          VG_(printf)("\n");
4951          VG_(tool_panic)("memcheck: expr2vbits");
4952    }
4953 }
4954 
4955 /*------------------------------------------------------------*/
4956 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
4957 /*------------------------------------------------------------*/
4958 
4959 /* Widen a value to the host word size. */
4960 
4961 static
zwidenToHostWord(MCEnv * mce,IRAtom * vatom)4962 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4963 {
4964    IRType ty, tyH;
4965 
4966    /* vatom is vbits-value and as such can only have a shadow type. */
4967    tl_assert(isShadowAtom(mce,vatom));
4968 
4969    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
4970    tyH = mce->hWordTy;
4971 
4972    if (tyH == Ity_I32) {
4973       switch (ty) {
4974          case Ity_I32:
4975             return vatom;
4976          case Ity_I16:
4977             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4978          case Ity_I8:
4979             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4980          default:
4981             goto unhandled;
4982       }
4983    } else
4984    if (tyH == Ity_I64) {
4985       switch (ty) {
4986          case Ity_I32:
4987             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4988          case Ity_I16:
4989             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4990                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4991          case Ity_I8:
4992             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4993                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4994          default:
4995             goto unhandled;
4996       }
4997    } else {
4998       goto unhandled;
4999    }
5000   unhandled:
5001    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5002    VG_(tool_panic)("zwidenToHostWord");
5003 }
5004 
5005 
5006 /* Generate a shadow store.  |addr| is always the original address
5007    atom.  You can pass in either originals or V-bits for the data
5008    atom, but obviously not both.  This function generates a check for
5009    the definedness and (indirectly) the validity of |addr|, but only
5010    when |guard| evaluates to True at run time (or is NULL).
5011 
5012    |guard| :: Ity_I1 controls whether the store really happens; NULL
5013    means it unconditionally does.  Note that |guard| itself is not
5014    checked for definedness; the caller of this function must do that
5015    if necessary.
5016 */
5017 static
do_shadow_Store(MCEnv * mce,IREndness end,IRAtom * addr,UInt bias,IRAtom * data,IRAtom * vdata,IRAtom * guard)5018 void do_shadow_Store ( MCEnv* mce,
5019                        IREndness end,
5020                        IRAtom* addr, UInt bias,
5021                        IRAtom* data, IRAtom* vdata,
5022                        IRAtom* guard )
5023 {
5024    IROp     mkAdd;
5025    IRType   ty, tyAddr;
5026    void*    helper = NULL;
5027    const HChar* hname = NULL;
5028    IRConst* c;
5029 
5030    tyAddr = mce->hWordTy;
5031    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5032    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5033    tl_assert( end == Iend_LE || end == Iend_BE );
5034 
5035    if (data) {
5036       tl_assert(!vdata);
5037       tl_assert(isOriginalAtom(mce, data));
5038       tl_assert(bias == 0);
5039       vdata = expr2vbits( mce, data );
5040    } else {
5041       tl_assert(vdata);
5042    }
5043 
5044    tl_assert(isOriginalAtom(mce,addr));
5045    tl_assert(isShadowAtom(mce,vdata));
5046 
5047    if (guard) {
5048       tl_assert(isOriginalAtom(mce, guard));
5049       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5050    }
5051 
5052    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5053 
5054    // If we're not doing undefined value checking, pretend that this value
5055    // is "all valid".  That lets Vex's optimiser remove some of the V bit
5056    // shadow computation ops that precede it.
5057    if (MC_(clo_mc_level) == 1) {
5058       switch (ty) {
5059          case Ity_V256: // V256 weirdness -- used four times
5060                         c = IRConst_V256(V_BITS32_DEFINED); break;
5061          case Ity_V128: // V128 weirdness -- used twice
5062                         c = IRConst_V128(V_BITS16_DEFINED); break;
5063          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
5064          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
5065          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
5066          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
5067          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5068       }
5069       vdata = IRExpr_Const( c );
5070    }
5071 
5072    /* First, emit a definedness test for the address.  This also sets
5073       the address (shadow) to 'defined' following the test.  Both of
5074       those actions are gated on |guard|. */
5075    complainIfUndefined( mce, addr, guard );
5076 
5077    /* Now decide which helper function to call to write the data V
5078       bits into shadow memory. */
5079    if (end == Iend_LE) {
5080       switch (ty) {
5081          case Ity_V256: /* we'll use the helper four times */
5082          case Ity_V128: /* we'll use the helper twice */
5083          case Ity_I64: helper = &MC_(helperc_STOREV64le);
5084                        hname = "MC_(helperc_STOREV64le)";
5085                        break;
5086          case Ity_I32: helper = &MC_(helperc_STOREV32le);
5087                        hname = "MC_(helperc_STOREV32le)";
5088                        break;
5089          case Ity_I16: helper = &MC_(helperc_STOREV16le);
5090                        hname = "MC_(helperc_STOREV16le)";
5091                        break;
5092          case Ity_I8:  helper = &MC_(helperc_STOREV8);
5093                        hname = "MC_(helperc_STOREV8)";
5094                        break;
5095          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5096       }
5097    } else {
5098       switch (ty) {
5099          case Ity_V128: /* we'll use the helper twice */
5100          case Ity_I64: helper = &MC_(helperc_STOREV64be);
5101                        hname = "MC_(helperc_STOREV64be)";
5102                        break;
5103          case Ity_I32: helper = &MC_(helperc_STOREV32be);
5104                        hname = "MC_(helperc_STOREV32be)";
5105                        break;
5106          case Ity_I16: helper = &MC_(helperc_STOREV16be);
5107                        hname = "MC_(helperc_STOREV16be)";
5108                        break;
5109          case Ity_I8:  helper = &MC_(helperc_STOREV8);
5110                        hname = "MC_(helperc_STOREV8)";
5111                        break;
5112          /* Note, no V256 case here, because no big-endian target that
5113             we support, has 256 vectors. */
5114          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5115       }
5116    }
5117 
5118    if (UNLIKELY(ty == Ity_V256)) {
5119 
5120       /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5121          Q3 being the most significant lane. */
5122       /* These are the offsets of the Qs in memory. */
5123       Int     offQ0, offQ1, offQ2, offQ3;
5124 
5125       /* Various bits for constructing the 4 lane helper calls */
5126       IRDirty *diQ0,    *diQ1,    *diQ2,    *diQ3;
5127       IRAtom  *addrQ0,  *addrQ1,  *addrQ2,  *addrQ3;
5128       IRAtom  *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5129       IRAtom  *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5130 
5131       if (end == Iend_LE) {
5132          offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5133       } else {
5134          offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5135       }
5136 
5137       eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5138       addrQ0  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5139       vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5140       diQ0    = unsafeIRDirty_0_N(
5141                    1/*regparms*/,
5142                    hname, VG_(fnptr_to_fnentry)( helper ),
5143                    mkIRExprVec_2( addrQ0, vdataQ0 )
5144                 );
5145 
5146       eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5147       addrQ1  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5148       vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5149       diQ1    = unsafeIRDirty_0_N(
5150                    1/*regparms*/,
5151                    hname, VG_(fnptr_to_fnentry)( helper ),
5152                    mkIRExprVec_2( addrQ1, vdataQ1 )
5153                 );
5154 
5155       eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5156       addrQ2  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5157       vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5158       diQ2    = unsafeIRDirty_0_N(
5159                    1/*regparms*/,
5160                    hname, VG_(fnptr_to_fnentry)( helper ),
5161                    mkIRExprVec_2( addrQ2, vdataQ2 )
5162                 );
5163 
5164       eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5165       addrQ3  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5166       vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5167       diQ3    = unsafeIRDirty_0_N(
5168                    1/*regparms*/,
5169                    hname, VG_(fnptr_to_fnentry)( helper ),
5170                    mkIRExprVec_2( addrQ3, vdataQ3 )
5171                 );
5172 
5173       if (guard)
5174          diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5175 
5176       setHelperAnns( mce, diQ0 );
5177       setHelperAnns( mce, diQ1 );
5178       setHelperAnns( mce, diQ2 );
5179       setHelperAnns( mce, diQ3 );
5180       stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5181       stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5182       stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5183       stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5184 
5185    }
5186    else if (UNLIKELY(ty == Ity_V128)) {
5187 
5188       /* V128-bit case */
5189       /* See comment in next clause re 64-bit regparms */
5190       /* also, need to be careful about endianness */
5191 
5192       Int     offLo64, offHi64;
5193       IRDirty *diLo64, *diHi64;
5194       IRAtom  *addrLo64, *addrHi64;
5195       IRAtom  *vdataLo64, *vdataHi64;
5196       IRAtom  *eBiasLo64, *eBiasHi64;
5197 
5198       if (end == Iend_LE) {
5199          offLo64 = 0;
5200          offHi64 = 8;
5201       } else {
5202          offLo64 = 8;
5203          offHi64 = 0;
5204       }
5205 
5206       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5207       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5208       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5209       diLo64    = unsafeIRDirty_0_N(
5210                      1/*regparms*/,
5211                      hname, VG_(fnptr_to_fnentry)( helper ),
5212                      mkIRExprVec_2( addrLo64, vdataLo64 )
5213                   );
5214       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5215       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5216       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5217       diHi64    = unsafeIRDirty_0_N(
5218                      1/*regparms*/,
5219                      hname, VG_(fnptr_to_fnentry)( helper ),
5220                      mkIRExprVec_2( addrHi64, vdataHi64 )
5221                   );
5222       if (guard) diLo64->guard = guard;
5223       if (guard) diHi64->guard = guard;
5224       setHelperAnns( mce, diLo64 );
5225       setHelperAnns( mce, diHi64 );
5226       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5227       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5228 
5229    } else {
5230 
5231       IRDirty *di;
5232       IRAtom  *addrAct;
5233 
5234       /* 8/16/32/64-bit cases */
5235       /* Generate the actual address into addrAct. */
5236       if (bias == 0) {
5237          addrAct = addr;
5238       } else {
5239          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5240          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5241       }
5242 
5243       if (ty == Ity_I64) {
5244          /* We can't do this with regparm 2 on 32-bit platforms, since
5245             the back ends aren't clever enough to handle 64-bit
5246             regparm args.  Therefore be different. */
5247          di = unsafeIRDirty_0_N(
5248                  1/*regparms*/,
5249                  hname, VG_(fnptr_to_fnentry)( helper ),
5250                  mkIRExprVec_2( addrAct, vdata )
5251               );
5252       } else {
5253          di = unsafeIRDirty_0_N(
5254                  2/*regparms*/,
5255                  hname, VG_(fnptr_to_fnentry)( helper ),
5256                  mkIRExprVec_2( addrAct,
5257                                 zwidenToHostWord( mce, vdata ))
5258               );
5259       }
5260       if (guard) di->guard = guard;
5261       setHelperAnns( mce, di );
5262       stmt( 'V', mce, IRStmt_Dirty(di) );
5263    }
5264 
5265 }
5266 
5267 
5268 /* Do lazy pessimistic propagation through a dirty helper call, by
5269    looking at the annotations on it.  This is the most complex part of
5270    Memcheck. */
5271 
szToITy(Int n)5272 static IRType szToITy ( Int n )
5273 {
5274    switch (n) {
5275       case 1: return Ity_I8;
5276       case 2: return Ity_I16;
5277       case 4: return Ity_I32;
5278       case 8: return Ity_I64;
5279       default: VG_(tool_panic)("szToITy(memcheck)");
5280    }
5281 }
5282 
5283 static
do_shadow_Dirty(MCEnv * mce,IRDirty * d)5284 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5285 {
5286    Int       i, k, n, toDo, gSz, gOff;
5287    IRAtom    *src, *here, *curr;
5288    IRType    tySrc, tyDst;
5289    IRTemp    dst;
5290    IREndness end;
5291 
5292    /* What's the native endianness?  We need to know this. */
5293 #  if defined(VG_BIGENDIAN)
5294    end = Iend_BE;
5295 #  elif defined(VG_LITTLEENDIAN)
5296    end = Iend_LE;
5297 #  else
5298 #    error "Unknown endianness"
5299 #  endif
5300 
5301    /* First check the guard. */
5302    complainIfUndefined(mce, d->guard, NULL);
5303 
5304    /* Now round up all inputs and PCast over them. */
5305    curr = definedOfType(Ity_I32);
5306 
5307    /* Inputs: unmasked args
5308       Note: arguments are evaluated REGARDLESS of the guard expression */
5309    for (i = 0; d->args[i]; i++) {
5310       IRAtom* arg = d->args[i];
5311       if ( (d->cee->mcx_mask & (1<<i))
5312            || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
5313          /* ignore this arg */
5314       } else {
5315          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
5316          curr = mkUifU32(mce, here, curr);
5317       }
5318    }
5319 
5320    /* Inputs: guest state that we read. */
5321    for (i = 0; i < d->nFxState; i++) {
5322       tl_assert(d->fxState[i].fx != Ifx_None);
5323       if (d->fxState[i].fx == Ifx_Write)
5324          continue;
5325 
5326       /* Enumerate the described state segments */
5327       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5328          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5329          gSz  = d->fxState[i].size;
5330 
5331          /* Ignore any sections marked as 'always defined'. */
5332          if (isAlwaysDefd(mce, gOff, gSz)) {
5333             if (0)
5334             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5335                         gOff, gSz);
5336             continue;
5337          }
5338 
5339          /* This state element is read or modified.  So we need to
5340             consider it.  If larger than 8 bytes, deal with it in
5341             8-byte chunks. */
5342          while (True) {
5343             tl_assert(gSz >= 0);
5344             if (gSz == 0) break;
5345             n = gSz <= 8 ? gSz : 8;
5346             /* update 'curr' with UifU of the state slice
5347                gOff .. gOff+n-1 */
5348             tySrc = szToITy( n );
5349 
5350             /* Observe the guard expression. If it is false use an
5351                all-bits-defined bit pattern */
5352             IRAtom *cond, *iffalse, *iftrue;
5353 
5354             cond    = assignNew('V', mce, Ity_I1, d->guard);
5355             iftrue  = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5356             iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5357             src     = assignNew('V', mce, tySrc,
5358                                 IRExpr_ITE(cond, iftrue, iffalse));
5359 
5360             here = mkPCastTo( mce, Ity_I32, src );
5361             curr = mkUifU32(mce, here, curr);
5362             gSz -= n;
5363             gOff += n;
5364          }
5365       }
5366    }
5367 
5368    /* Inputs: memory.  First set up some info needed regardless of
5369       whether we're doing reads or writes. */
5370 
5371    if (d->mFx != Ifx_None) {
5372       /* Because we may do multiple shadow loads/stores from the same
5373          base address, it's best to do a single test of its
5374          definedness right now.  Post-instrumentation optimisation
5375          should remove all but this test. */
5376       IRType tyAddr;
5377       tl_assert(d->mAddr);
5378       complainIfUndefined(mce, d->mAddr, d->guard);
5379 
5380       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
5381       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5382       tl_assert(tyAddr == mce->hWordTy); /* not really right */
5383    }
5384 
5385    /* Deal with memory inputs (reads or modifies) */
5386    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5387       toDo   = d->mSize;
5388       /* chew off 32-bit chunks.  We don't care about the endianness
5389          since it's all going to be condensed down to a single bit,
5390          but nevertheless choose an endianness which is hopefully
5391          native to the platform. */
5392       while (toDo >= 4) {
5393          here = mkPCastTo(
5394                    mce, Ity_I32,
5395                    expr2vbits_Load_guarded_Simple(
5396                       mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
5397                 );
5398          curr = mkUifU32(mce, here, curr);
5399          toDo -= 4;
5400       }
5401       /* chew off 16-bit chunks */
5402       while (toDo >= 2) {
5403          here = mkPCastTo(
5404                    mce, Ity_I32,
5405                    expr2vbits_Load_guarded_Simple(
5406                       mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
5407                 );
5408          curr = mkUifU32(mce, here, curr);
5409          toDo -= 2;
5410       }
5411       /* chew off the remaining 8-bit chunk, if any */
5412       if (toDo == 1) {
5413          here = mkPCastTo(
5414                    mce, Ity_I32,
5415                    expr2vbits_Load_guarded_Simple(
5416                       mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
5417                 );
5418          curr = mkUifU32(mce, here, curr);
5419          toDo -= 1;
5420       }
5421       tl_assert(toDo == 0);
5422    }
5423 
5424    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
5425       all the inputs to the helper.  Now we need to re-distribute the
5426       results to all destinations. */
5427 
5428    /* Outputs: the destination temporary, if there is one. */
5429    if (d->tmp != IRTemp_INVALID) {
5430       dst   = findShadowTmpV(mce, d->tmp);
5431       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
5432       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
5433    }
5434 
5435    /* Outputs: guest state that we write or modify. */
5436    for (i = 0; i < d->nFxState; i++) {
5437       tl_assert(d->fxState[i].fx != Ifx_None);
5438       if (d->fxState[i].fx == Ifx_Read)
5439          continue;
5440 
5441       /* Enumerate the described state segments */
5442       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5443          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5444          gSz  = d->fxState[i].size;
5445 
5446          /* Ignore any sections marked as 'always defined'. */
5447          if (isAlwaysDefd(mce, gOff, gSz))
5448             continue;
5449 
5450          /* This state element is written or modified.  So we need to
5451             consider it.  If larger than 8 bytes, deal with it in
5452             8-byte chunks. */
5453          while (True) {
5454             tl_assert(gSz >= 0);
5455             if (gSz == 0) break;
5456             n = gSz <= 8 ? gSz : 8;
5457             /* Write suitably-casted 'curr' to the state slice
5458                gOff .. gOff+n-1 */
5459             tyDst = szToITy( n );
5460             do_shadow_PUT( mce, gOff,
5461                                 NULL, /* original atom */
5462                                 mkPCastTo( mce, tyDst, curr ), d->guard );
5463             gSz -= n;
5464             gOff += n;
5465          }
5466       }
5467    }
5468 
5469    /* Outputs: memory that we write or modify.  Same comments about
5470       endianness as above apply. */
5471    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5472       toDo   = d->mSize;
5473       /* chew off 32-bit chunks */
5474       while (toDo >= 4) {
5475          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5476                           NULL, /* original data */
5477                           mkPCastTo( mce, Ity_I32, curr ),
5478                           d->guard );
5479          toDo -= 4;
5480       }
5481       /* chew off 16-bit chunks */
5482       while (toDo >= 2) {
5483          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5484                           NULL, /* original data */
5485                           mkPCastTo( mce, Ity_I16, curr ),
5486                           d->guard );
5487          toDo -= 2;
5488       }
5489       /* chew off the remaining 8-bit chunk, if any */
5490       if (toDo == 1) {
5491          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5492                           NULL, /* original data */
5493                           mkPCastTo( mce, Ity_I8, curr ),
5494                           d->guard );
5495          toDo -= 1;
5496       }
5497       tl_assert(toDo == 0);
5498    }
5499 
5500 }
5501 
5502 
5503 /* We have an ABI hint telling us that [base .. base+len-1] is to
5504    become undefined ("writable").  Generate code to call a helper to
5505    notify the A/V bit machinery of this fact.
5506 
5507    We call
5508    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5509                                                     Addr nia );
5510 */
5511 static
do_AbiHint(MCEnv * mce,IRExpr * base,Int len,IRExpr * nia)5512 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
5513 {
5514    IRDirty* di;
5515    /* Minor optimisation: if not doing origin tracking, ignore the
5516       supplied nia and pass zero instead.  This is on the basis that
5517       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5518       almost always generate a shorter instruction to put zero into a
5519       register than any other value. */
5520    if (MC_(clo_mc_level) < 3)
5521       nia = mkIRExpr_HWord(0);
5522 
5523    di = unsafeIRDirty_0_N(
5524            0/*regparms*/,
5525            "MC_(helperc_MAKE_STACK_UNINIT)",
5526            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
5527            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
5528         );
5529    stmt( 'V', mce, IRStmt_Dirty(di) );
5530 }
5531 
5532 
5533 /* ------ Dealing with IRCAS (big and complex) ------ */
5534 
5535 /* FWDS */
5536 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
5537                              IRAtom* baseaddr, Int offset );
5538 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5539 static void    gen_store_b ( MCEnv* mce, Int szB,
5540                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
5541                              IRAtom* guard );
5542 
5543 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5544 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5545 
5546 
5547 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5548    IRExpr.Consts, else this asserts.  If they are both Consts, it
5549    doesn't do anything.  So that just leaves the RdTmp case.
5550 
5551    In which case: this assigns the shadow value SHADOW to the IR
5552    shadow temporary associated with ORIG.  That is, ORIG, being an
5553    original temporary, will have a shadow temporary associated with
5554    it.  However, in the case envisaged here, there will so far have
5555    been no IR emitted to actually write a shadow value into that
5556    temporary.  What this routine does is to (emit IR to) copy the
5557    value in SHADOW into said temporary, so that after this call,
5558    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5559    value in SHADOW.
5560 
5561    Point is to allow callers to compute "by hand" a shadow value for
5562    ORIG, and force it to be associated with ORIG.
5563 
5564    How do we know that that shadow associated with ORIG has not so far
5565    been assigned to?  Well, we don't per se know that, but supposing
5566    it had.  Then this routine would create a second assignment to it,
5567    and later the IR sanity checker would barf.  But that never
5568    happens.  QED.
5569 */
bind_shadow_tmp_to_orig(UChar how,MCEnv * mce,IRAtom * orig,IRAtom * shadow)5570 static void bind_shadow_tmp_to_orig ( UChar how,
5571                                       MCEnv* mce,
5572                                       IRAtom* orig, IRAtom* shadow )
5573 {
5574    tl_assert(isOriginalAtom(mce, orig));
5575    tl_assert(isShadowAtom(mce, shadow));
5576    switch (orig->tag) {
5577       case Iex_Const:
5578          tl_assert(shadow->tag == Iex_Const);
5579          break;
5580       case Iex_RdTmp:
5581          tl_assert(shadow->tag == Iex_RdTmp);
5582          if (how == 'V') {
5583             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5584                    shadow);
5585          } else {
5586             tl_assert(how == 'B');
5587             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5588                    shadow);
5589          }
5590          break;
5591       default:
5592          tl_assert(0);
5593    }
5594 }
5595 
5596 
5597 static
do_shadow_CAS(MCEnv * mce,IRCAS * cas)5598 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5599 {
5600    /* Scheme is (both single- and double- cases):
5601 
5602       1. fetch data#,dataB (the proposed new value)
5603 
5604       2. fetch expd#,expdB (what we expect to see at the address)
5605 
5606       3. check definedness of address
5607 
5608       4. load old#,oldB from shadow memory; this also checks
5609          addressibility of the address
5610 
5611       5. the CAS itself
5612 
5613       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
5614 
5615       7. if "expected == old" (as computed by (6))
5616             store data#,dataB to shadow memory
5617 
5618       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
5619       'data' but 7 stores 'data#'.  Hence it is possible for the
5620       shadow data to be incorrectly checked and/or updated:
5621 
5622       * 7 is at least gated correctly, since the 'expected == old'
5623         condition is derived from outputs of 5.  However, the shadow
5624         write could happen too late: imagine after 5 we are
5625         descheduled, a different thread runs, writes a different
5626         (shadow) value at the address, and then we resume, hence
5627         overwriting the shadow value written by the other thread.
5628 
5629       Because the original memory access is atomic, there's no way to
5630       make both the original and shadow accesses into a single atomic
5631       thing, hence this is unavoidable.
5632 
5633       At least as Valgrind stands, I don't think it's a problem, since
5634       we're single threaded *and* we guarantee that there are no
5635       context switches during the execution of any specific superblock
5636       -- context switches can only happen at superblock boundaries.
5637 
5638       If Valgrind ever becomes MT in the future, then it might be more
5639       of a problem.  A possible kludge would be to artificially
5640       associate with the location, a lock, which we must acquire and
5641       release around the transaction as a whole.  Hmm, that probably
5642       would't work properly since it only guards us against other
5643       threads doing CASs on the same location, not against other
5644       threads doing normal reads and writes.
5645 
5646       ------------------------------------------------------------
5647 
5648       COMMENT_ON_CasCmpEQ:
5649 
5650       Note two things.  Firstly, in the sequence above, we compute
5651       "expected == old", but we don't check definedness of it.  Why
5652       not?  Also, the x86 and amd64 front ends use
5653       Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
5654       determination (expected == old ?) for themselves, and we also
5655       don't check definedness for those primops; we just say that the
5656       result is defined.  Why?  Details follow.
5657 
5658       x86/amd64 contains various forms of locked insns:
5659       * lock prefix before all basic arithmetic insn;
5660         eg lock xorl %reg1,(%reg2)
5661       * atomic exchange reg-mem
5662       * compare-and-swaps
5663 
5664       Rather than attempt to represent them all, which would be a
5665       royal PITA, I used a result from Maurice Herlihy
5666       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5667       demonstrates that compare-and-swap is a primitive more general
5668       than the other two, and so can be used to represent all of them.
5669       So the translation scheme for (eg) lock incl (%reg) is as
5670       follows:
5671 
5672         again:
5673          old = * %reg
5674          new = old + 1
5675          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5676 
5677       The "atomically" is the CAS bit.  The scheme is always the same:
5678       get old value from memory, compute new value, atomically stuff
5679       new value back in memory iff the old value has not changed (iow,
5680       no other thread modified it in the meantime).  If it has changed
5681       then we've been out-raced and we have to start over.
5682 
5683       Now that's all very neat, but it has the bad side effect of
5684       introducing an explicit equality test into the translation.
5685       Consider the behaviour of said code on a memory location which
5686       is uninitialised.  We will wind up doing a comparison on
5687       uninitialised data, and mc duly complains.
5688 
5689       What's difficult about this is, the common case is that the
5690       location is uncontended, and so we're usually comparing the same
5691       value (* %reg) with itself.  So we shouldn't complain even if it
5692       is undefined.  But mc doesn't know that.
5693 
5694       My solution is to mark the == in the IR specially, so as to tell
5695       mc that it almost certainly compares a value with itself, and we
5696       should just regard the result as always defined.  Rather than
5697       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5698       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5699 
5700       So there's always the question of, can this give a false
5701       negative?  eg, imagine that initially, * %reg is defined; and we
5702       read that; but then in the gap between the read and the CAS, a
5703       different thread writes an undefined (and different) value at
5704       the location.  Then the CAS in this thread will fail and we will
5705       go back to "again:", but without knowing that the trip back
5706       there was based on an undefined comparison.  No matter; at least
5707       the other thread won the race and the location is correctly
5708       marked as undefined.  What if it wrote an uninitialised version
5709       of the same value that was there originally, though?
5710 
5711       etc etc.  Seems like there's a small corner case in which we
5712       might lose the fact that something's defined -- we're out-raced
5713       in between the "old = * reg" and the "atomically {", _and_ the
5714       other thread is writing in an undefined version of what's
5715       already there.  Well, that seems pretty unlikely.
5716 
5717       ---
5718 
5719       If we ever need to reinstate it .. code which generates a
5720       definedness test for "expected == old" was removed at r10432 of
5721       this file.
5722    */
5723    if (cas->oldHi == IRTemp_INVALID) {
5724       do_shadow_CAS_single( mce, cas );
5725    } else {
5726       do_shadow_CAS_double( mce, cas );
5727    }
5728 }
5729 
5730 
do_shadow_CAS_single(MCEnv * mce,IRCAS * cas)5731 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5732 {
5733    IRAtom *vdataLo = NULL, *bdataLo = NULL;
5734    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5735    IRAtom *voldLo  = NULL, *boldLo  = NULL;
5736    IRAtom *expd_eq_old = NULL;
5737    IROp   opCasCmpEQ;
5738    Int    elemSzB;
5739    IRType elemTy;
5740    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5741 
5742    /* single CAS */
5743    tl_assert(cas->oldHi == IRTemp_INVALID);
5744    tl_assert(cas->expdHi == NULL);
5745    tl_assert(cas->dataHi == NULL);
5746 
5747    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5748    switch (elemTy) {
5749       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
5750       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5751       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5752       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
5753       default: tl_assert(0); /* IR defn disallows any other types */
5754    }
5755 
5756    /* 1. fetch data# (the proposed new value) */
5757    tl_assert(isOriginalAtom(mce, cas->dataLo));
5758    vdataLo
5759       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5760    tl_assert(isShadowAtom(mce, vdataLo));
5761    if (otrak) {
5762       bdataLo
5763          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5764       tl_assert(isShadowAtom(mce, bdataLo));
5765    }
5766 
5767    /* 2. fetch expected# (what we expect to see at the address) */
5768    tl_assert(isOriginalAtom(mce, cas->expdLo));
5769    vexpdLo
5770       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5771    tl_assert(isShadowAtom(mce, vexpdLo));
5772    if (otrak) {
5773       bexpdLo
5774          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5775       tl_assert(isShadowAtom(mce, bexpdLo));
5776    }
5777 
5778    /* 3. check definedness of address */
5779    /* 4. fetch old# from shadow memory; this also checks
5780          addressibility of the address */
5781    voldLo
5782       = assignNew(
5783            'V', mce, elemTy,
5784            expr2vbits_Load(
5785               mce,
5786               cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5787               NULL/*always happens*/
5788         ));
5789    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5790    if (otrak) {
5791       boldLo
5792          = assignNew('B', mce, Ity_I32,
5793                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
5794       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5795    }
5796 
5797    /* 5. the CAS itself */
5798    stmt( 'C', mce, IRStmt_CAS(cas) );
5799 
5800    /* 6. compute "expected == old" */
5801    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5802    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5803       tree, but it's not copied from the input block. */
5804    expd_eq_old
5805       = assignNew('C', mce, Ity_I1,
5806                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
5807 
5808    /* 7. if "expected == old"
5809             store data# to shadow memory */
5810    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5811                     NULL/*data*/, vdataLo/*vdata*/,
5812                     expd_eq_old/*guard for store*/ );
5813    if (otrak) {
5814       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5815                    bdataLo/*bdata*/,
5816                    expd_eq_old/*guard for store*/ );
5817    }
5818 }
5819 
5820 
do_shadow_CAS_double(MCEnv * mce,IRCAS * cas)5821 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5822 {
5823    IRAtom *vdataHi = NULL, *bdataHi = NULL;
5824    IRAtom *vdataLo = NULL, *bdataLo = NULL;
5825    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5826    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5827    IRAtom *voldHi  = NULL, *boldHi  = NULL;
5828    IRAtom *voldLo  = NULL, *boldLo  = NULL;
5829    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5830    IRAtom *expd_eq_old = NULL, *zero = NULL;
5831    IROp   opCasCmpEQ, opOr, opXor;
5832    Int    elemSzB, memOffsLo, memOffsHi;
5833    IRType elemTy;
5834    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5835 
5836    /* double CAS */
5837    tl_assert(cas->oldHi != IRTemp_INVALID);
5838    tl_assert(cas->expdHi != NULL);
5839    tl_assert(cas->dataHi != NULL);
5840 
5841    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5842    switch (elemTy) {
5843       case Ity_I8:
5844          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
5845          elemSzB = 1; zero = mkU8(0);
5846          break;
5847       case Ity_I16:
5848          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
5849          elemSzB = 2; zero = mkU16(0);
5850          break;
5851       case Ity_I32:
5852          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
5853          elemSzB = 4; zero = mkU32(0);
5854          break;
5855       case Ity_I64:
5856          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
5857          elemSzB = 8; zero = mkU64(0);
5858          break;
5859       default:
5860          tl_assert(0); /* IR defn disallows any other types */
5861    }
5862 
5863    /* 1. fetch data# (the proposed new value) */
5864    tl_assert(isOriginalAtom(mce, cas->dataHi));
5865    tl_assert(isOriginalAtom(mce, cas->dataLo));
5866    vdataHi
5867       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5868    vdataLo
5869       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5870    tl_assert(isShadowAtom(mce, vdataHi));
5871    tl_assert(isShadowAtom(mce, vdataLo));
5872    if (otrak) {
5873       bdataHi
5874          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5875       bdataLo
5876          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5877       tl_assert(isShadowAtom(mce, bdataHi));
5878       tl_assert(isShadowAtom(mce, bdataLo));
5879    }
5880 
5881    /* 2. fetch expected# (what we expect to see at the address) */
5882    tl_assert(isOriginalAtom(mce, cas->expdHi));
5883    tl_assert(isOriginalAtom(mce, cas->expdLo));
5884    vexpdHi
5885       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5886    vexpdLo
5887       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5888    tl_assert(isShadowAtom(mce, vexpdHi));
5889    tl_assert(isShadowAtom(mce, vexpdLo));
5890    if (otrak) {
5891       bexpdHi
5892          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5893       bexpdLo
5894          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5895       tl_assert(isShadowAtom(mce, bexpdHi));
5896       tl_assert(isShadowAtom(mce, bexpdLo));
5897    }
5898 
5899    /* 3. check definedness of address */
5900    /* 4. fetch old# from shadow memory; this also checks
5901          addressibility of the address */
5902    if (cas->end == Iend_LE) {
5903       memOffsLo = 0;
5904       memOffsHi = elemSzB;
5905    } else {
5906       tl_assert(cas->end == Iend_BE);
5907       memOffsLo = elemSzB;
5908       memOffsHi = 0;
5909    }
5910    voldHi
5911       = assignNew(
5912            'V', mce, elemTy,
5913            expr2vbits_Load(
5914               mce,
5915               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5916               NULL/*always happens*/
5917         ));
5918    voldLo
5919       = assignNew(
5920            'V', mce, elemTy,
5921            expr2vbits_Load(
5922               mce,
5923               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5924               NULL/*always happens*/
5925         ));
5926    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5927    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5928    if (otrak) {
5929       boldHi
5930          = assignNew('B', mce, Ity_I32,
5931                      gen_load_b(mce, elemSzB, cas->addr,
5932                                 memOffsHi/*addr bias*/));
5933       boldLo
5934          = assignNew('B', mce, Ity_I32,
5935                      gen_load_b(mce, elemSzB, cas->addr,
5936                                 memOffsLo/*addr bias*/));
5937       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5938       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5939    }
5940 
5941    /* 5. the CAS itself */
5942    stmt( 'C', mce, IRStmt_CAS(cas) );
5943 
5944    /* 6. compute "expected == old" */
5945    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5946    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5947       tree, but it's not copied from the input block. */
5948    /*
5949       xHi = oldHi ^ expdHi;
5950       xLo = oldLo ^ expdLo;
5951       xHL = xHi | xLo;
5952       expd_eq_old = xHL == 0;
5953    */
5954    xHi = assignNew('C', mce, elemTy,
5955                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5956    xLo = assignNew('C', mce, elemTy,
5957                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5958    xHL = assignNew('C', mce, elemTy,
5959                    binop(opOr, xHi, xLo));
5960    expd_eq_old
5961       = assignNew('C', mce, Ity_I1,
5962                   binop(opCasCmpEQ, xHL, zero));
5963 
5964    /* 7. if "expected == old"
5965             store data# to shadow memory */
5966    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5967                     NULL/*data*/, vdataHi/*vdata*/,
5968                     expd_eq_old/*guard for store*/ );
5969    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5970                     NULL/*data*/, vdataLo/*vdata*/,
5971                     expd_eq_old/*guard for store*/ );
5972    if (otrak) {
5973       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5974                    bdataHi/*bdata*/,
5975                    expd_eq_old/*guard for store*/ );
5976       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5977                    bdataLo/*bdata*/,
5978                    expd_eq_old/*guard for store*/ );
5979    }
5980 }
5981 
5982 
5983 /* ------ Dealing with LL/SC (not difficult) ------ */
5984 
do_shadow_LLSC(MCEnv * mce,IREndness stEnd,IRTemp stResult,IRExpr * stAddr,IRExpr * stStoredata)5985 static void do_shadow_LLSC ( MCEnv*    mce,
5986                              IREndness stEnd,
5987                              IRTemp    stResult,
5988                              IRExpr*   stAddr,
5989                              IRExpr*   stStoredata )
5990 {
5991    /* In short: treat a load-linked like a normal load followed by an
5992       assignment of the loaded (shadow) data to the result temporary.
5993       Treat a store-conditional like a normal store, and mark the
5994       result temporary as defined. */
5995    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
5996    IRTemp resTmp = findShadowTmpV(mce, stResult);
5997 
5998    tl_assert(isIRAtom(stAddr));
5999    if (stStoredata)
6000       tl_assert(isIRAtom(stStoredata));
6001 
6002    if (stStoredata == NULL) {
6003       /* Load Linked */
6004       /* Just treat this as a normal load, followed by an assignment of
6005          the value to .result. */
6006       /* Stay sane */
6007       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6008                 || resTy == Ity_I16 || resTy == Ity_I8);
6009       assign( 'V', mce, resTmp,
6010                    expr2vbits_Load(
6011                       mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6012                       NULL/*always happens*/) );
6013    } else {
6014       /* Store Conditional */
6015       /* Stay sane */
6016       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6017                                    stStoredata);
6018       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6019                 || dataTy == Ity_I16 || dataTy == Ity_I8);
6020       do_shadow_Store( mce, stEnd,
6021                             stAddr, 0/* addr bias */,
6022                             stStoredata,
6023                             NULL /* shadow data */,
6024                             NULL/*guard*/ );
6025       /* This is a store conditional, so it writes to .result a value
6026          indicating whether or not the store succeeded.  Just claim
6027          this value is always defined.  In the PowerPC interpretation
6028          of store-conditional, definedness of the success indication
6029          depends on whether the address of the store matches the
6030          reservation address.  But we can't tell that here (and
6031          anyway, we're not being PowerPC-specific).  At least we are
6032          guaranteed that the definedness of the store address, and its
6033          addressibility, will be checked as per normal.  So it seems
6034          pretty safe to just say that the success indication is always
6035          defined.
6036 
6037          In schemeS, for origin tracking, we must correspondingly set
6038          a no-origin value for the origin shadow of .result.
6039       */
6040       tl_assert(resTy == Ity_I1);
6041       assign( 'V', mce, resTmp, definedOfType(resTy) );
6042    }
6043 }
6044 
6045 
6046 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6047 
do_shadow_StoreG(MCEnv * mce,IRStoreG * sg)6048 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6049 {
6050    complainIfUndefined(mce, sg->guard, NULL);
6051    /* do_shadow_Store will generate code to check the definedness and
6052       validity of sg->addr, in the case where sg->guard evaluates to
6053       True at run-time. */
6054    do_shadow_Store( mce, sg->end,
6055                     sg->addr, 0/* addr bias */,
6056                     sg->data,
6057                     NULL /* shadow data */,
6058                     sg->guard );
6059 }
6060 
do_shadow_LoadG(MCEnv * mce,IRLoadG * lg)6061 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6062 {
6063    complainIfUndefined(mce, lg->guard, NULL);
6064    /* expr2vbits_Load_guarded_General will generate code to check the
6065       definedness and validity of lg->addr, in the case where
6066       lg->guard evaluates to True at run-time. */
6067 
6068    /* Look at the LoadG's built-in conversion operation, to determine
6069       the source (actual loaded data) type, and the equivalent IROp.
6070       NOTE that implicitly we are taking a widening operation to be
6071       applied to original atoms and producing one that applies to V
6072       bits.  Since signed and unsigned widening are self-shadowing,
6073       this is a straight copy of the op (modulo swapping from the
6074       IRLoadGOp form to the IROp form).  Note also therefore that this
6075       implicitly duplicates the logic to do with said widening ops in
6076       expr2vbits_Unop.  See comment at the start of expr2vbits_Unop. */
6077    IROp   vwiden   = Iop_INVALID;
6078    IRType loadedTy = Ity_INVALID;
6079    switch (lg->cvt) {
6080       case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6081       case ILGop_Ident64:   loadedTy = Ity_I64;  vwiden = Iop_INVALID; break;
6082       case ILGop_Ident32:   loadedTy = Ity_I32;  vwiden = Iop_INVALID; break;
6083       case ILGop_16Uto32:   loadedTy = Ity_I16;  vwiden = Iop_16Uto32; break;
6084       case ILGop_16Sto32:   loadedTy = Ity_I16;  vwiden = Iop_16Sto32; break;
6085       case ILGop_8Uto32:    loadedTy = Ity_I8;   vwiden = Iop_8Uto32;  break;
6086       case ILGop_8Sto32:    loadedTy = Ity_I8;   vwiden = Iop_8Sto32;  break;
6087       default: VG_(tool_panic)("do_shadow_LoadG");
6088    }
6089 
6090    IRAtom* vbits_alt
6091       = expr2vbits( mce, lg->alt );
6092    IRAtom* vbits_final
6093       = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6094                                         lg->addr, 0/*addr bias*/,
6095                                         lg->guard, vwiden, vbits_alt );
6096    /* And finally, bind the V bits to the destination temporary. */
6097    assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6098 }
6099 
6100 
6101 /*------------------------------------------------------------*/
6102 /*--- Memcheck main                                        ---*/
6103 /*------------------------------------------------------------*/
6104 
6105 static void schemeS ( MCEnv* mce, IRStmt* st );
6106 
isBogusAtom(IRAtom * at)6107 static Bool isBogusAtom ( IRAtom* at )
6108 {
6109    ULong n = 0;
6110    IRConst* con;
6111    tl_assert(isIRAtom(at));
6112    if (at->tag == Iex_RdTmp)
6113       return False;
6114    tl_assert(at->tag == Iex_Const);
6115    con = at->Iex.Const.con;
6116    switch (con->tag) {
6117       case Ico_U1:   return False;
6118       case Ico_U8:   n = (ULong)con->Ico.U8; break;
6119       case Ico_U16:  n = (ULong)con->Ico.U16; break;
6120       case Ico_U32:  n = (ULong)con->Ico.U32; break;
6121       case Ico_U64:  n = (ULong)con->Ico.U64; break;
6122       case Ico_F32:  return False;
6123       case Ico_F64:  return False;
6124       case Ico_F32i: return False;
6125       case Ico_F64i: return False;
6126       case Ico_V128: return False;
6127       case Ico_V256: return False;
6128       default: ppIRExpr(at); tl_assert(0);
6129    }
6130    /* VG_(printf)("%llx\n", n); */
6131    return (/*32*/    n == 0xFEFEFEFFULL
6132            /*32*/ || n == 0x80808080ULL
6133            /*32*/ || n == 0x7F7F7F7FULL
6134            /*32*/ || n == 0x7EFEFEFFULL
6135            /*32*/ || n == 0x81010100ULL
6136            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
6137            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
6138            /*64*/ || n == 0x0000000000008080ULL
6139            /*64*/ || n == 0x8080808080808080ULL
6140            /*64*/ || n == 0x0101010101010101ULL
6141           );
6142 }
6143 
checkForBogusLiterals(IRStmt * st)6144 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
6145 {
6146    Int      i;
6147    IRExpr*  e;
6148    IRDirty* d;
6149    IRCAS*   cas;
6150    switch (st->tag) {
6151       case Ist_WrTmp:
6152          e = st->Ist.WrTmp.data;
6153          switch (e->tag) {
6154             case Iex_Get:
6155             case Iex_RdTmp:
6156                return False;
6157             case Iex_Const:
6158                return isBogusAtom(e);
6159             case Iex_Unop:
6160                return isBogusAtom(e->Iex.Unop.arg)
6161                       || e->Iex.Unop.op == Iop_GetMSBs8x16;
6162             case Iex_GetI:
6163                return isBogusAtom(e->Iex.GetI.ix);
6164             case Iex_Binop:
6165                return isBogusAtom(e->Iex.Binop.arg1)
6166                       || isBogusAtom(e->Iex.Binop.arg2);
6167             case Iex_Triop:
6168                return isBogusAtom(e->Iex.Triop.details->arg1)
6169                       || isBogusAtom(e->Iex.Triop.details->arg2)
6170                       || isBogusAtom(e->Iex.Triop.details->arg3);
6171             case Iex_Qop:
6172                return isBogusAtom(e->Iex.Qop.details->arg1)
6173                       || isBogusAtom(e->Iex.Qop.details->arg2)
6174                       || isBogusAtom(e->Iex.Qop.details->arg3)
6175                       || isBogusAtom(e->Iex.Qop.details->arg4);
6176             case Iex_ITE:
6177                return isBogusAtom(e->Iex.ITE.cond)
6178                       || isBogusAtom(e->Iex.ITE.iftrue)
6179                       || isBogusAtom(e->Iex.ITE.iffalse);
6180             case Iex_Load:
6181                return isBogusAtom(e->Iex.Load.addr);
6182             case Iex_CCall:
6183                for (i = 0; e->Iex.CCall.args[i]; i++)
6184                   if (isBogusAtom(e->Iex.CCall.args[i]))
6185                      return True;
6186                return False;
6187             default:
6188                goto unhandled;
6189          }
6190       case Ist_Dirty:
6191          d = st->Ist.Dirty.details;
6192          for (i = 0; d->args[i]; i++) {
6193             IRAtom* atom = d->args[i];
6194             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) {
6195                if (isBogusAtom(atom))
6196                   return True;
6197             }
6198          }
6199          if (isBogusAtom(d->guard))
6200             return True;
6201          if (d->mAddr && isBogusAtom(d->mAddr))
6202             return True;
6203          return False;
6204       case Ist_Put:
6205          return isBogusAtom(st->Ist.Put.data);
6206       case Ist_PutI:
6207          return isBogusAtom(st->Ist.PutI.details->ix)
6208                 || isBogusAtom(st->Ist.PutI.details->data);
6209       case Ist_Store:
6210          return isBogusAtom(st->Ist.Store.addr)
6211                 || isBogusAtom(st->Ist.Store.data);
6212       case Ist_StoreG: {
6213          IRStoreG* sg = st->Ist.StoreG.details;
6214          return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
6215                 || isBogusAtom(sg->guard);
6216       }
6217       case Ist_LoadG: {
6218          IRLoadG* lg = st->Ist.LoadG.details;
6219          return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
6220                 || isBogusAtom(lg->guard);
6221       }
6222       case Ist_Exit:
6223          return isBogusAtom(st->Ist.Exit.guard);
6224       case Ist_AbiHint:
6225          return isBogusAtom(st->Ist.AbiHint.base)
6226                 || isBogusAtom(st->Ist.AbiHint.nia);
6227       case Ist_NoOp:
6228       case Ist_IMark:
6229       case Ist_MBE:
6230          return False;
6231       case Ist_CAS:
6232          cas = st->Ist.CAS.details;
6233          return isBogusAtom(cas->addr)
6234                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
6235                 || isBogusAtom(cas->expdLo)
6236                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
6237                 || isBogusAtom(cas->dataLo);
6238       case Ist_LLSC:
6239          return isBogusAtom(st->Ist.LLSC.addr)
6240                 || (st->Ist.LLSC.storedata
6241                        ? isBogusAtom(st->Ist.LLSC.storedata)
6242                        : False);
6243       default:
6244       unhandled:
6245          ppIRStmt(st);
6246          VG_(tool_panic)("hasBogusLiterals");
6247    }
6248 }
6249 
6250 
MC_(instrument)6251 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
6252                         IRSB* sb_in,
6253                         const VexGuestLayout* layout,
6254                         const VexGuestExtents* vge,
6255                         const VexArchInfo* archinfo_host,
6256                         IRType gWordTy, IRType hWordTy )
6257 {
6258    Bool    verboze = 0||False;
6259    Int     i, j, first_stmt;
6260    IRStmt* st;
6261    MCEnv   mce;
6262    IRSB*   sb_out;
6263 
6264    if (gWordTy != hWordTy) {
6265       /* We don't currently support this case. */
6266       VG_(tool_panic)("host/guest word size mismatch");
6267    }
6268 
6269    /* Check we're not completely nuts */
6270    tl_assert(sizeof(UWord)  == sizeof(void*));
6271    tl_assert(sizeof(Word)   == sizeof(void*));
6272    tl_assert(sizeof(Addr)   == sizeof(void*));
6273    tl_assert(sizeof(ULong)  == 8);
6274    tl_assert(sizeof(Long)   == 8);
6275    tl_assert(sizeof(UInt)   == 4);
6276    tl_assert(sizeof(Int)    == 4);
6277 
6278    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
6279 
6280    /* Set up SB */
6281    sb_out = deepCopyIRSBExceptStmts(sb_in);
6282 
6283    /* Set up the running environment.  Both .sb and .tmpMap are
6284       modified as we go along.  Note that tmps are added to both
6285       .sb->tyenv and .tmpMap together, so the valid index-set for
6286       those two arrays should always be identical. */
6287    VG_(memset)(&mce, 0, sizeof(mce));
6288    mce.sb             = sb_out;
6289    mce.trace          = verboze;
6290    mce.layout         = layout;
6291    mce.hWordTy        = hWordTy;
6292    mce.bogusLiterals  = False;
6293 
6294    /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
6295       Darwin.  10.7 is mostly built with LLVM, which uses these for
6296       bitfield inserts, and we get a lot of false errors if the cheap
6297       interpretation is used, alas.  Could solve this much better if
6298       we knew which of such adds came from x86/amd64 LEA instructions,
6299       since these are the only ones really needing the expensive
6300       interpretation, but that would require some way to tag them in
6301       the _toIR.c front ends, which is a lot of faffing around.  So
6302       for now just use the slow and blunt-instrument solution. */
6303    mce.useLLVMworkarounds = False;
6304 #  if defined(VGO_darwin)
6305    mce.useLLVMworkarounds = True;
6306 #  endif
6307 
6308    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6309                             sizeof(TempMapEnt));
6310    VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
6311    for (i = 0; i < sb_in->tyenv->types_used; i++) {
6312       TempMapEnt ent;
6313       ent.kind    = Orig;
6314       ent.shadowV = IRTemp_INVALID;
6315       ent.shadowB = IRTemp_INVALID;
6316       VG_(addToXA)( mce.tmpMap, &ent );
6317    }
6318    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
6319 
6320    if (MC_(clo_expensive_definedness_checks)) {
6321       /* For expensive definedness checking skip looking for bogus
6322          literals. */
6323       mce.bogusLiterals = True;
6324    } else {
6325       /* Make a preliminary inspection of the statements, to see if there
6326          are any dodgy-looking literals.  If there are, we generate
6327          extra-detailed (hence extra-expensive) instrumentation in
6328          places.  Scan the whole bb even if dodgyness is found earlier,
6329          so that the flatness assertion is applied to all stmts. */
6330       Bool bogus = False;
6331 
6332       for (i = 0; i < sb_in->stmts_used; i++) {
6333          st = sb_in->stmts[i];
6334          tl_assert(st);
6335          tl_assert(isFlatIRStmt(st));
6336 
6337          if (!bogus) {
6338             bogus = checkForBogusLiterals(st);
6339             if (0 && bogus) {
6340                VG_(printf)("bogus: ");
6341                ppIRStmt(st);
6342                VG_(printf)("\n");
6343             }
6344             if (bogus) break;
6345          }
6346       }
6347       mce.bogusLiterals = bogus;
6348    }
6349 
6350    /* Copy verbatim any IR preamble preceding the first IMark */
6351 
6352    tl_assert(mce.sb == sb_out);
6353    tl_assert(mce.sb != sb_in);
6354 
6355    i = 0;
6356    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
6357 
6358       st = sb_in->stmts[i];
6359       tl_assert(st);
6360       tl_assert(isFlatIRStmt(st));
6361 
6362       stmt( 'C', &mce, sb_in->stmts[i] );
6363       i++;
6364    }
6365 
6366    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
6367       cause the IR following the preamble to contain references to IR
6368       temporaries defined in the preamble.  Because the preamble isn't
6369       instrumented, these temporaries don't have any shadows.
6370       Nevertheless uses of them following the preamble will cause
6371       memcheck to generate references to their shadows.  End effect is
6372       to cause IR sanity check failures, due to references to
6373       non-existent shadows.  This is only evident for the complex
6374       preambles used for function wrapping on TOC-afflicted platforms
6375       (ppc64-linux).
6376 
6377       The following loop therefore scans the preamble looking for
6378       assignments to temporaries.  For each one found it creates an
6379       assignment to the corresponding (V) shadow temp, marking it as
6380       'defined'.  This is the same resulting IR as if the main
6381       instrumentation loop before had been applied to the statement
6382       'tmp = CONSTANT'.
6383 
6384       Similarly, if origin tracking is enabled, we must generate an
6385       assignment for the corresponding origin (B) shadow, claiming
6386       no-origin, as appropriate for a defined value.
6387    */
6388    for (j = 0; j < i; j++) {
6389       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
6390          /* findShadowTmpV checks its arg is an original tmp;
6391             no need to assert that here. */
6392          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
6393          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
6394          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
6395          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6396          if (MC_(clo_mc_level) == 3) {
6397             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
6398             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
6399             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6400          }
6401          if (0) {
6402             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6403             ppIRType( ty_v );
6404             VG_(printf)("\n");
6405          }
6406       }
6407    }
6408 
6409    /* Iterate over the remaining stmts to generate instrumentation. */
6410 
6411    tl_assert(sb_in->stmts_used > 0);
6412    tl_assert(i >= 0);
6413    tl_assert(i < sb_in->stmts_used);
6414    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
6415 
6416    for (/* use current i*/; i < sb_in->stmts_used; i++) {
6417 
6418       st = sb_in->stmts[i];
6419       first_stmt = sb_out->stmts_used;
6420 
6421       if (verboze) {
6422          VG_(printf)("\n");
6423          ppIRStmt(st);
6424          VG_(printf)("\n");
6425       }
6426 
6427       if (MC_(clo_mc_level) == 3) {
6428          /* See comments on case Ist_CAS below. */
6429          if (st->tag != Ist_CAS)
6430             schemeS( &mce, st );
6431       }
6432 
6433       /* Generate instrumentation code for each stmt ... */
6434 
6435       switch (st->tag) {
6436 
6437          case Ist_WrTmp:
6438             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6439                                expr2vbits( &mce, st->Ist.WrTmp.data) );
6440             break;
6441 
6442          case Ist_Put:
6443             do_shadow_PUT( &mce,
6444                            st->Ist.Put.offset,
6445                            st->Ist.Put.data,
6446                            NULL /* shadow atom */, NULL /* guard */ );
6447             break;
6448 
6449          case Ist_PutI:
6450             do_shadow_PUTI( &mce, st->Ist.PutI.details);
6451             break;
6452 
6453          case Ist_Store:
6454             do_shadow_Store( &mce, st->Ist.Store.end,
6455                                    st->Ist.Store.addr, 0/* addr bias */,
6456                                    st->Ist.Store.data,
6457                                    NULL /* shadow data */,
6458                                    NULL/*guard*/ );
6459             break;
6460 
6461          case Ist_StoreG:
6462             do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6463             break;
6464 
6465          case Ist_LoadG:
6466             do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6467             break;
6468 
6469          case Ist_Exit:
6470             complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
6471             break;
6472 
6473          case Ist_IMark:
6474             break;
6475 
6476          case Ist_NoOp:
6477          case Ist_MBE:
6478             break;
6479 
6480          case Ist_Dirty:
6481             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
6482             break;
6483 
6484          case Ist_AbiHint:
6485             do_AbiHint( &mce, st->Ist.AbiHint.base,
6486                               st->Ist.AbiHint.len,
6487                               st->Ist.AbiHint.nia );
6488             break;
6489 
6490          case Ist_CAS:
6491             do_shadow_CAS( &mce, st->Ist.CAS.details );
6492             /* Note, do_shadow_CAS copies the CAS itself to the output
6493                block, because it needs to add instrumentation both
6494                before and after it.  Hence skip the copy below.  Also
6495                skip the origin-tracking stuff (call to schemeS) above,
6496                since that's all tangled up with it too; do_shadow_CAS
6497                does it all. */
6498             break;
6499 
6500          case Ist_LLSC:
6501             do_shadow_LLSC( &mce,
6502                             st->Ist.LLSC.end,
6503                             st->Ist.LLSC.result,
6504                             st->Ist.LLSC.addr,
6505                             st->Ist.LLSC.storedata );
6506             break;
6507 
6508          default:
6509             VG_(printf)("\n");
6510             ppIRStmt(st);
6511             VG_(printf)("\n");
6512             VG_(tool_panic)("memcheck: unhandled IRStmt");
6513 
6514       } /* switch (st->tag) */
6515 
6516       if (0 && verboze) {
6517          for (j = first_stmt; j < sb_out->stmts_used; j++) {
6518             VG_(printf)("   ");
6519             ppIRStmt(sb_out->stmts[j]);
6520             VG_(printf)("\n");
6521          }
6522          VG_(printf)("\n");
6523       }
6524 
6525       /* ... and finally copy the stmt itself to the output.  Except,
6526          skip the copy of IRCASs; see comments on case Ist_CAS
6527          above. */
6528       if (st->tag != Ist_CAS)
6529          stmt('C', &mce, st);
6530    }
6531 
6532    /* Now we need to complain if the jump target is undefined. */
6533    first_stmt = sb_out->stmts_used;
6534 
6535    if (verboze) {
6536       VG_(printf)("sb_in->next = ");
6537       ppIRExpr(sb_in->next);
6538       VG_(printf)("\n\n");
6539    }
6540 
6541    complainIfUndefined( &mce, sb_in->next, NULL );
6542 
6543    if (0 && verboze) {
6544       for (j = first_stmt; j < sb_out->stmts_used; j++) {
6545          VG_(printf)("   ");
6546          ppIRStmt(sb_out->stmts[j]);
6547          VG_(printf)("\n");
6548       }
6549       VG_(printf)("\n");
6550    }
6551 
6552    /* If this fails, there's been some serious snafu with tmp management,
6553       that should be investigated. */
6554    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6555    VG_(deleteXA)( mce.tmpMap );
6556 
6557    tl_assert(mce.sb == sb_out);
6558    return sb_out;
6559 }
6560 
6561 /*------------------------------------------------------------*/
6562 /*--- Post-tree-build final tidying                        ---*/
6563 /*------------------------------------------------------------*/
6564 
6565 /* This exploits the observation that Memcheck often produces
6566    repeated conditional calls of the form
6567 
6568    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
6569 
6570    with the same guard expression G guarding the same helper call.
6571    The second and subsequent calls are redundant.  This usually
6572    results from instrumentation of guest code containing multiple
6573    memory references at different constant offsets from the same base
6574    register.  After optimisation of the instrumentation, you get a
6575    test for the definedness of the base register for each memory
6576    reference, which is kinda pointless.  MC_(final_tidy) therefore
6577    looks for such repeated calls and removes all but the first. */
6578 
6579 /* A struct for recording which (helper, guard) pairs we have already
6580    seen. */
6581 typedef
6582    struct { void* entry; IRExpr* guard; }
6583    Pair;
6584 
6585 /* Return True if e1 and e2 definitely denote the same value (used to
6586    compare guards).  Return False if unknown; False is the safe
6587    answer.  Since guest registers and guest memory do not have the
6588    SSA property we must return False if any Gets or Loads appear in
6589    the expression. */
6590 
sameIRValue(IRExpr * e1,IRExpr * e2)6591 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6592 {
6593    if (e1->tag != e2->tag)
6594       return False;
6595    switch (e1->tag) {
6596       case Iex_Const:
6597          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6598       case Iex_Binop:
6599          return e1->Iex.Binop.op == e2->Iex.Binop.op
6600                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6601                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6602       case Iex_Unop:
6603          return e1->Iex.Unop.op == e2->Iex.Unop.op
6604                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6605       case Iex_RdTmp:
6606          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
6607       case Iex_ITE:
6608          return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6609                 && sameIRValue( e1->Iex.ITE.iftrue,  e2->Iex.ITE.iftrue )
6610                 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
6611       case Iex_Qop:
6612       case Iex_Triop:
6613       case Iex_CCall:
6614          /* be lazy.  Could define equality for these, but they never
6615             appear to be used. */
6616          return False;
6617       case Iex_Get:
6618       case Iex_GetI:
6619       case Iex_Load:
6620          /* be conservative - these may not give the same value each
6621             time */
6622          return False;
6623       case Iex_Binder:
6624          /* should never see this */
6625          /* fallthrough */
6626       default:
6627          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6628          ppIRExpr(e1);
6629          VG_(tool_panic)("memcheck:sameIRValue");
6630          return False;
6631    }
6632 }
6633 
6634 /* See if 'pairs' already has an entry for (entry, guard).  Return
6635    True if so.  If not, add an entry. */
6636 
6637 static
check_or_add(XArray * pairs,IRExpr * guard,void * entry)6638 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6639 {
6640    Pair  p;
6641    Pair* pp;
6642    Int   i, n = VG_(sizeXA)( pairs );
6643    for (i = 0; i < n; i++) {
6644       pp = VG_(indexXA)( pairs, i );
6645       if (pp->entry == entry && sameIRValue(pp->guard, guard))
6646          return True;
6647    }
6648    p.guard = guard;
6649    p.entry = entry;
6650    VG_(addToXA)( pairs, &p );
6651    return False;
6652 }
6653 
is_helperc_value_checkN_fail(const HChar * name)6654 static Bool is_helperc_value_checkN_fail ( const HChar* name )
6655 {
6656    return
6657       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6658       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6659       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6660       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6661       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6662       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6663       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6664       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
6665 }
6666 
MC_(final_tidy)6667 IRSB* MC_(final_tidy) ( IRSB* sb_in )
6668 {
6669    Int i;
6670    IRStmt*   st;
6671    IRDirty*  di;
6672    IRExpr*   guard;
6673    IRCallee* cee;
6674    Bool      alreadyPresent;
6675    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6676                                  VG_(free), sizeof(Pair) );
6677    /* Scan forwards through the statements.  Each time a call to one
6678       of the relevant helpers is seen, check if we have made a
6679       previous call to the same helper using the same guard
6680       expression, and if so, delete the call. */
6681    for (i = 0; i < sb_in->stmts_used; i++) {
6682       st = sb_in->stmts[i];
6683       tl_assert(st);
6684       if (st->tag != Ist_Dirty)
6685          continue;
6686       di = st->Ist.Dirty.details;
6687       guard = di->guard;
6688       tl_assert(guard);
6689       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6690       cee = di->cee;
6691       if (!is_helperc_value_checkN_fail( cee->name ))
6692          continue;
6693        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6694           guard 'guard'.  Check if we have already seen a call to this
6695           function with the same guard.  If so, delete it.  If not,
6696           add it to the set of calls we do know about. */
6697       alreadyPresent = check_or_add( pairs, guard, cee->addr );
6698       if (alreadyPresent) {
6699          sb_in->stmts[i] = IRStmt_NoOp();
6700          if (0) VG_(printf)("XX\n");
6701       }
6702    }
6703    VG_(deleteXA)( pairs );
6704    return sb_in;
6705 }
6706 
6707 
6708 /*------------------------------------------------------------*/
6709 /*--- Origin tracking stuff                                ---*/
6710 /*------------------------------------------------------------*/
6711 
6712 /* Almost identical to findShadowTmpV. */
findShadowTmpB(MCEnv * mce,IRTemp orig)6713 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6714 {
6715    TempMapEnt* ent;
6716    /* VG_(indexXA) range-checks 'orig', hence no need to check
6717       here. */
6718    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6719    tl_assert(ent->kind == Orig);
6720    if (ent->shadowB == IRTemp_INVALID) {
6721       IRTemp tmpB
6722         = newTemp( mce, Ity_I32, BSh );
6723       /* newTemp may cause mce->tmpMap to resize, hence previous results
6724          from VG_(indexXA) are invalid. */
6725       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6726       tl_assert(ent->kind == Orig);
6727       tl_assert(ent->shadowB == IRTemp_INVALID);
6728       ent->shadowB = tmpB;
6729    }
6730    return ent->shadowB;
6731 }
6732 
gen_maxU32(MCEnv * mce,IRAtom * b1,IRAtom * b2)6733 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6734 {
6735    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6736 }
6737 
6738 
6739 /* Make a guarded origin load, with no special handling in the
6740    didn't-happen case.  A GUARD of NULL is assumed to mean "always
6741    True".
6742 
6743    Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6744    return the otag.  The loaded size is SZB.  If GUARD evaluates to
6745    False at run time then the returned otag is zero.
6746 */
gen_guarded_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRExpr * guard)6747 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6748                                     IRAtom* baseaddr,
6749                                     Int offset, IRExpr* guard )
6750 {
6751    void*    hFun;
6752    const HChar* hName;
6753    IRTemp   bTmp;
6754    IRDirty* di;
6755    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6756    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6757    IRAtom*  ea    = baseaddr;
6758    if (offset != 0) {
6759       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6760                                    : mkU64( (Long)(Int)offset );
6761       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6762    }
6763    bTmp = newTemp(mce, mce->hWordTy, BSh);
6764 
6765    switch (szB) {
6766       case 1: hFun  = (void*)&MC_(helperc_b_load1);
6767               hName = "MC_(helperc_b_load1)";
6768               break;
6769       case 2: hFun  = (void*)&MC_(helperc_b_load2);
6770               hName = "MC_(helperc_b_load2)";
6771               break;
6772       case 4: hFun  = (void*)&MC_(helperc_b_load4);
6773               hName = "MC_(helperc_b_load4)";
6774               break;
6775       case 8: hFun  = (void*)&MC_(helperc_b_load8);
6776               hName = "MC_(helperc_b_load8)";
6777               break;
6778       case 16: hFun  = (void*)&MC_(helperc_b_load16);
6779                hName = "MC_(helperc_b_load16)";
6780                break;
6781       case 32: hFun  = (void*)&MC_(helperc_b_load32);
6782                hName = "MC_(helperc_b_load32)";
6783                break;
6784       default:
6785          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6786          tl_assert(0);
6787    }
6788    di = unsafeIRDirty_1_N(
6789            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6790            mkIRExprVec_1( ea )
6791         );
6792    if (guard) {
6793       di->guard = guard;
6794       /* Ideally the didn't-happen return value here would be
6795          all-zeroes (unknown-origin), so it'd be harmless if it got
6796          used inadvertently.  We slum it out with the IR-mandated
6797          default value (0b01 repeating, 0x55 etc) as that'll probably
6798          trump all legitimate otags via Max32, and it's pretty
6799          obviously bogus. */
6800    }
6801    /* no need to mess with any annotations.  This call accesses
6802       neither guest state nor guest memory. */
6803    stmt( 'B', mce, IRStmt_Dirty(di) );
6804    if (mce->hWordTy == Ity_I64) {
6805       /* 64-bit host */
6806       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
6807       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6808       return mkexpr(bTmp32);
6809    } else {
6810       /* 32-bit host */
6811       return mkexpr(bTmp);
6812    }
6813 }
6814 
6815 
6816 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET.  The
6817    loaded size is SZB.  The load is regarded as unconditional (always
6818    happens).
6819 */
gen_load_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset)6820 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6821                             Int offset )
6822 {
6823    return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
6824 }
6825 
6826 
6827 /* The most general handler for guarded origin loads.  A GUARD of NULL
6828    is assumed to mean "always True".
6829 
6830    Generate IR to do a shadow origin load from ADDR+BIAS and return
6831    the B bits.  The loaded type is TY.  If GUARD evaluates to False at
6832    run time then the returned B bits are simply BALT instead.
6833 */
6834 static
expr2ori_Load_guarded_General(MCEnv * mce,IRType ty,IRAtom * addr,UInt bias,IRAtom * guard,IRAtom * balt)6835 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6836                                         IRType ty,
6837                                         IRAtom* addr, UInt bias,
6838                                         IRAtom* guard, IRAtom* balt )
6839 {
6840    /* If the guard evaluates to True, this will hold the loaded
6841       origin.  If the guard evaluates to False, this will be zero,
6842       meaning "unknown origin", in which case we will have to replace
6843       it using an ITE below. */
6844    IRAtom* iftrue
6845       = assignNew('B', mce, Ity_I32,
6846                   gen_guarded_load_b(mce, sizeofIRType(ty),
6847                                      addr, bias, guard));
6848    /* These are the bits we will return if the load doesn't take
6849       place. */
6850    IRAtom* iffalse
6851       = balt;
6852    /* Prepare the cond for the ITE.  Convert a NULL cond into
6853       something that iropt knows how to fold out later. */
6854    IRAtom* cond
6855       = guard == NULL  ? mkU1(1)  : guard;
6856    /* And assemble the final result. */
6857    return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
6858 }
6859 
6860 
6861 /* Generate a shadow origins store.  guard :: Ity_I1 controls whether
6862    the store really happens; NULL means it unconditionally does. */
gen_store_b(MCEnv * mce,Int szB,IRAtom * baseaddr,Int offset,IRAtom * dataB,IRAtom * guard)6863 static void gen_store_b ( MCEnv* mce, Int szB,
6864                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
6865                           IRAtom* guard )
6866 {
6867    void*    hFun;
6868    const HChar* hName;
6869    IRDirty* di;
6870    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6871    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6872    IRAtom*  ea    = baseaddr;
6873    if (guard) {
6874       tl_assert(isOriginalAtom(mce, guard));
6875       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6876    }
6877    if (offset != 0) {
6878       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6879                                    : mkU64( (Long)(Int)offset );
6880       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
6881    }
6882    if (mce->hWordTy == Ity_I64)
6883       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6884 
6885    switch (szB) {
6886       case 1: hFun  = (void*)&MC_(helperc_b_store1);
6887               hName = "MC_(helperc_b_store1)";
6888               break;
6889       case 2: hFun  = (void*)&MC_(helperc_b_store2);
6890               hName = "MC_(helperc_b_store2)";
6891               break;
6892       case 4: hFun  = (void*)&MC_(helperc_b_store4);
6893               hName = "MC_(helperc_b_store4)";
6894               break;
6895       case 8: hFun  = (void*)&MC_(helperc_b_store8);
6896               hName = "MC_(helperc_b_store8)";
6897               break;
6898       case 16: hFun  = (void*)&MC_(helperc_b_store16);
6899                hName = "MC_(helperc_b_store16)";
6900                break;
6901       case 32: hFun  = (void*)&MC_(helperc_b_store32);
6902                hName = "MC_(helperc_b_store32)";
6903                break;
6904       default:
6905          tl_assert(0);
6906    }
6907    di = unsafeIRDirty_0_N( 2/*regparms*/,
6908            hName, VG_(fnptr_to_fnentry)( hFun ),
6909            mkIRExprVec_2( ea, dataB )
6910         );
6911    /* no need to mess with any annotations.  This call accesses
6912       neither guest state nor guest memory. */
6913    if (guard) di->guard = guard;
6914    stmt( 'B', mce, IRStmt_Dirty(di) );
6915 }
6916 
narrowTo32(MCEnv * mce,IRAtom * e)6917 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
6918    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6919    if (eTy == Ity_I64)
6920       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6921    if (eTy == Ity_I32)
6922       return e;
6923    tl_assert(0);
6924 }
6925 
zWidenFrom32(MCEnv * mce,IRType dstTy,IRAtom * e)6926 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
6927    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6928    tl_assert(eTy == Ity_I32);
6929    if (dstTy == Ity_I64)
6930       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6931    tl_assert(0);
6932 }
6933 
6934 
schemeE(MCEnv * mce,IRExpr * e)6935 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6936 {
6937    tl_assert(MC_(clo_mc_level) == 3);
6938 
6939    switch (e->tag) {
6940 
6941       case Iex_GetI: {
6942          IRRegArray* descr_b;
6943          IRAtom      *t1, *t2, *t3, *t4;
6944          IRRegArray* descr      = e->Iex.GetI.descr;
6945          IRType equivIntTy
6946             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6947          /* If this array is unshadowable for whatever reason, use the
6948             usual approximation. */
6949          if (equivIntTy == Ity_INVALID)
6950             return mkU32(0);
6951          tl_assert(sizeofIRType(equivIntTy) >= 4);
6952          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6953          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6954                                  equivIntTy, descr->nElems );
6955          /* Do a shadow indexed get of the same size, giving t1.  Take
6956             the bottom 32 bits of it, giving t2.  Compute into t3 the
6957             origin for the index (almost certainly zero, but there's
6958             no harm in being completely general here, since iropt will
6959             remove any useless code), and fold it in, giving a final
6960             value t4. */
6961          t1 = assignNew( 'B', mce, equivIntTy,
6962                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6963                                                 e->Iex.GetI.bias ));
6964          t2 = narrowTo32( mce, t1 );
6965          t3 = schemeE( mce, e->Iex.GetI.ix );
6966          t4 = gen_maxU32( mce, t2, t3 );
6967          return t4;
6968       }
6969       case Iex_CCall: {
6970          Int i;
6971          IRAtom*  here;
6972          IRExpr** args = e->Iex.CCall.args;
6973          IRAtom*  curr = mkU32(0);
6974          for (i = 0; args[i]; i++) {
6975             tl_assert(i < 32);
6976             tl_assert(isOriginalAtom(mce, args[i]));
6977             /* Only take notice of this arg if the callee's
6978                mc-exclusion mask does not say it is to be excluded. */
6979             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6980                /* the arg is to be excluded from definedness checking.
6981                   Do nothing. */
6982                if (0) VG_(printf)("excluding %s(%d)\n",
6983                                   e->Iex.CCall.cee->name, i);
6984             } else {
6985                /* calculate the arg's definedness, and pessimistically
6986                   merge it in. */
6987                here = schemeE( mce, args[i] );
6988                curr = gen_maxU32( mce, curr, here );
6989             }
6990          }
6991          return curr;
6992       }
6993       case Iex_Load: {
6994          Int dszB;
6995          dszB = sizeofIRType(e->Iex.Load.ty);
6996          /* assert that the B value for the address is already
6997             available (somewhere) */
6998          tl_assert(isIRAtom(e->Iex.Load.addr));
6999          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
7000          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
7001       }
7002       case Iex_ITE: {
7003          IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
7004          IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
7005          IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
7006          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
7007       }
7008       case Iex_Qop: {
7009          IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
7010          IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
7011          IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
7012          IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
7013          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
7014                                  gen_maxU32( mce, b3, b4 ) );
7015       }
7016       case Iex_Triop: {
7017          IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
7018          IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
7019          IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
7020          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
7021       }
7022       case Iex_Binop: {
7023          switch (e->Iex.Binop.op) {
7024             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
7025             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
7026             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
7027             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
7028                /* Just say these all produce a defined result,
7029                   regardless of their arguments.  See
7030                   COMMENT_ON_CasCmpEQ in this file. */
7031                return mkU32(0);
7032             default: {
7033                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
7034                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
7035                return gen_maxU32( mce, b1, b2 );
7036             }
7037          }
7038          tl_assert(0);
7039          /*NOTREACHED*/
7040       }
7041       case Iex_Unop: {
7042          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
7043          return b1;
7044       }
7045       case Iex_Const:
7046          return mkU32(0);
7047       case Iex_RdTmp:
7048          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
7049       case Iex_Get: {
7050          Int b_offset = MC_(get_otrack_shadow_offset)(
7051                            e->Iex.Get.offset,
7052                            sizeofIRType(e->Iex.Get.ty)
7053                         );
7054          tl_assert(b_offset >= -1
7055                    && b_offset <= mce->layout->total_sizeB -4);
7056          if (b_offset >= 0) {
7057             /* FIXME: this isn't an atom! */
7058             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
7059                                Ity_I32 );
7060          }
7061          return mkU32(0);
7062       }
7063       default:
7064          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
7065          ppIRExpr(e);
7066          VG_(tool_panic)("memcheck:schemeE");
7067    }
7068 }
7069 
7070 
do_origins_Dirty(MCEnv * mce,IRDirty * d)7071 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
7072 {
7073    // This is a hacked version of do_shadow_Dirty
7074    Int       i, k, n, toDo, gSz, gOff;
7075    IRAtom    *here, *curr;
7076    IRTemp    dst;
7077 
7078    /* First check the guard. */
7079    curr = schemeE( mce, d->guard );
7080 
7081    /* Now round up all inputs and maxU32 over them. */
7082 
7083    /* Inputs: unmasked args
7084       Note: arguments are evaluated REGARDLESS of the guard expression */
7085    for (i = 0; d->args[i]; i++) {
7086       IRAtom* arg = d->args[i];
7087       if ( (d->cee->mcx_mask & (1<<i))
7088            || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
7089          /* ignore this arg */
7090       } else {
7091          here = schemeE( mce, arg );
7092          curr = gen_maxU32( mce, curr, here );
7093       }
7094    }
7095 
7096    /* Inputs: guest state that we read. */
7097    for (i = 0; i < d->nFxState; i++) {
7098       tl_assert(d->fxState[i].fx != Ifx_None);
7099       if (d->fxState[i].fx == Ifx_Write)
7100          continue;
7101 
7102       /* Enumerate the described state segments */
7103       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7104          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7105          gSz  = d->fxState[i].size;
7106 
7107          /* Ignore any sections marked as 'always defined'. */
7108          if (isAlwaysDefd(mce, gOff, gSz)) {
7109             if (0)
7110             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7111                         gOff, gSz);
7112             continue;
7113          }
7114 
7115          /* This state element is read or modified.  So we need to
7116             consider it.  If larger than 4 bytes, deal with it in
7117             4-byte chunks. */
7118          while (True) {
7119             Int b_offset;
7120             tl_assert(gSz >= 0);
7121             if (gSz == 0) break;
7122             n = gSz <= 4 ? gSz : 4;
7123             /* update 'curr' with maxU32 of the state slice
7124                gOff .. gOff+n-1 */
7125             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7126             if (b_offset != -1) {
7127                /* Observe the guard expression. If it is false use 0, i.e.
7128                   nothing is known about the origin */
7129                IRAtom *cond, *iffalse, *iftrue;
7130 
7131                cond = assignNew( 'B', mce, Ity_I1, d->guard);
7132                iffalse = mkU32(0);
7133                iftrue  = assignNew( 'B', mce, Ity_I32,
7134                                     IRExpr_Get(b_offset
7135                                                  + 2*mce->layout->total_sizeB,
7136                                                Ity_I32));
7137                here = assignNew( 'B', mce, Ity_I32,
7138                                  IRExpr_ITE(cond, iftrue, iffalse));
7139                curr = gen_maxU32( mce, curr, here );
7140             }
7141             gSz -= n;
7142             gOff += n;
7143          }
7144       }
7145    }
7146 
7147    /* Inputs: memory */
7148 
7149    if (d->mFx != Ifx_None) {
7150       /* Because we may do multiple shadow loads/stores from the same
7151          base address, it's best to do a single test of its
7152          definedness right now.  Post-instrumentation optimisation
7153          should remove all but this test. */
7154       tl_assert(d->mAddr);
7155       here = schemeE( mce, d->mAddr );
7156       curr = gen_maxU32( mce, curr, here );
7157    }
7158 
7159    /* Deal with memory inputs (reads or modifies) */
7160    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
7161       toDo   = d->mSize;
7162       /* chew off 32-bit chunks.  We don't care about the endianness
7163          since it's all going to be condensed down to a single bit,
7164          but nevertheless choose an endianness which is hopefully
7165          native to the platform. */
7166       while (toDo >= 4) {
7167          here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
7168                                     d->guard );
7169          curr = gen_maxU32( mce, curr, here );
7170          toDo -= 4;
7171       }
7172       /* handle possible 16-bit excess */
7173       while (toDo >= 2) {
7174          here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
7175                                     d->guard );
7176          curr = gen_maxU32( mce, curr, here );
7177          toDo -= 2;
7178       }
7179       /* chew off the remaining 8-bit chunk, if any */
7180       if (toDo == 1) {
7181          here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
7182                                     d->guard );
7183          curr = gen_maxU32( mce, curr, here );
7184          toDo -= 1;
7185       }
7186       tl_assert(toDo == 0);
7187    }
7188 
7189    /* Whew!  So curr is a 32-bit B-value which should give an origin
7190       of some use if any of the inputs to the helper are undefined.
7191       Now we need to re-distribute the results to all destinations. */
7192 
7193    /* Outputs: the destination temporary, if there is one. */
7194    if (d->tmp != IRTemp_INVALID) {
7195       dst   = findShadowTmpB(mce, d->tmp);
7196       assign( 'V', mce, dst, curr );
7197    }
7198 
7199    /* Outputs: guest state that we write or modify. */
7200    for (i = 0; i < d->nFxState; i++) {
7201       tl_assert(d->fxState[i].fx != Ifx_None);
7202       if (d->fxState[i].fx == Ifx_Read)
7203          continue;
7204 
7205       /* Enumerate the described state segments */
7206       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7207          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7208          gSz  = d->fxState[i].size;
7209 
7210          /* Ignore any sections marked as 'always defined'. */
7211          if (isAlwaysDefd(mce, gOff, gSz))
7212             continue;
7213 
7214          /* This state element is written or modified.  So we need to
7215             consider it.  If larger than 4 bytes, deal with it in
7216             4-byte chunks. */
7217          while (True) {
7218             Int b_offset;
7219             tl_assert(gSz >= 0);
7220             if (gSz == 0) break;
7221             n = gSz <= 4 ? gSz : 4;
7222             /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7223             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7224             if (b_offset != -1) {
7225 
7226                /* If the guard expression evaluates to false we simply Put
7227                   the value that is already stored in the guest state slot */
7228                IRAtom *cond, *iffalse;
7229 
7230                cond    = assignNew('B', mce, Ity_I1,
7231                                    d->guard);
7232                iffalse = assignNew('B', mce, Ity_I32,
7233                                    IRExpr_Get(b_offset +
7234                                               2*mce->layout->total_sizeB,
7235                                               Ity_I32));
7236                curr = assignNew('V', mce, Ity_I32,
7237                                 IRExpr_ITE(cond, curr, iffalse));
7238 
7239                stmt( 'B', mce, IRStmt_Put(b_offset
7240                                           + 2*mce->layout->total_sizeB,
7241                                           curr ));
7242             }
7243             gSz -= n;
7244             gOff += n;
7245          }
7246       }
7247    }
7248 
7249    /* Outputs: memory that we write or modify.  Same comments about
7250       endianness as above apply. */
7251    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7252       toDo   = d->mSize;
7253       /* chew off 32-bit chunks */
7254       while (toDo >= 4) {
7255          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7256                       d->guard );
7257          toDo -= 4;
7258       }
7259       /* handle possible 16-bit excess */
7260       while (toDo >= 2) {
7261          gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7262                       d->guard );
7263          toDo -= 2;
7264       }
7265       /* chew off the remaining 8-bit chunk, if any */
7266       if (toDo == 1) {
7267          gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7268                       d->guard );
7269          toDo -= 1;
7270       }
7271       tl_assert(toDo == 0);
7272    }
7273 }
7274 
7275 
7276 /* Generate IR for origin shadowing for a general guarded store. */
do_origins_Store_guarded(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData,IRExpr * guard)7277 static void do_origins_Store_guarded ( MCEnv* mce,
7278                                        IREndness stEnd,
7279                                        IRExpr* stAddr,
7280                                        IRExpr* stData,
7281                                        IRExpr* guard )
7282 {
7283    Int     dszB;
7284    IRAtom* dataB;
7285    /* assert that the B value for the address is already available
7286       (somewhere), since the call to schemeE will want to see it.
7287       XXXX how does this actually ensure that?? */
7288    tl_assert(isIRAtom(stAddr));
7289    tl_assert(isIRAtom(stData));
7290    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7291    dataB = schemeE( mce, stData );
7292    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7293 }
7294 
7295 
7296 /* Generate IR for origin shadowing for a plain store. */
do_origins_Store_plain(MCEnv * mce,IREndness stEnd,IRExpr * stAddr,IRExpr * stData)7297 static void do_origins_Store_plain ( MCEnv* mce,
7298                                      IREndness stEnd,
7299                                      IRExpr* stAddr,
7300                                      IRExpr* stData )
7301 {
7302    do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7303                               NULL/*guard*/ );
7304 }
7305 
7306 
7307 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7308 
do_origins_StoreG(MCEnv * mce,IRStoreG * sg)7309 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7310 {
7311    do_origins_Store_guarded( mce, sg->end, sg->addr,
7312                              sg->data, sg->guard );
7313 }
7314 
do_origins_LoadG(MCEnv * mce,IRLoadG * lg)7315 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7316 {
7317    IRType loadedTy = Ity_INVALID;
7318    switch (lg->cvt) {
7319       case ILGop_IdentV128: loadedTy = Ity_V128; break;
7320       case ILGop_Ident64:   loadedTy = Ity_I64;  break;
7321       case ILGop_Ident32:   loadedTy = Ity_I32;  break;
7322       case ILGop_16Uto32:   loadedTy = Ity_I16;  break;
7323       case ILGop_16Sto32:   loadedTy = Ity_I16;  break;
7324       case ILGop_8Uto32:    loadedTy = Ity_I8;   break;
7325       case ILGop_8Sto32:    loadedTy = Ity_I8;   break;
7326       default: VG_(tool_panic)("schemeS.IRLoadG");
7327    }
7328    IRAtom* ori_alt
7329       = schemeE( mce,lg->alt );
7330    IRAtom* ori_final
7331       = expr2ori_Load_guarded_General(mce, loadedTy,
7332                                       lg->addr, 0/*addr bias*/,
7333                                       lg->guard, ori_alt );
7334    /* And finally, bind the origin to the destination temporary. */
7335    assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7336 }
7337 
7338 
schemeS(MCEnv * mce,IRStmt * st)7339 static void schemeS ( MCEnv* mce, IRStmt* st )
7340 {
7341    tl_assert(MC_(clo_mc_level) == 3);
7342 
7343    switch (st->tag) {
7344 
7345       case Ist_AbiHint:
7346          /* The value-check instrumenter handles this - by arranging
7347             to pass the address of the next instruction to
7348             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
7349             happen for origin tracking w.r.t. AbiHints.  So there is
7350             nothing to do here. */
7351          break;
7352 
7353       case Ist_PutI: {
7354          IRPutI *puti = st->Ist.PutI.details;
7355          IRRegArray* descr_b;
7356          IRAtom      *t1, *t2, *t3, *t4;
7357          IRRegArray* descr = puti->descr;
7358          IRType equivIntTy
7359             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7360          /* If this array is unshadowable for whatever reason,
7361             generate no code. */
7362          if (equivIntTy == Ity_INVALID)
7363             break;
7364          tl_assert(sizeofIRType(equivIntTy) >= 4);
7365          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7366          descr_b
7367             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7368                             equivIntTy, descr->nElems );
7369          /* Compute a value to Put - the conjoinment of the origin for
7370             the data to be Put-ted (obviously) and of the index value
7371             (not so obviously). */
7372          t1 = schemeE( mce, puti->data );
7373          t2 = schemeE( mce, puti->ix );
7374          t3 = gen_maxU32( mce, t1, t2 );
7375          t4 = zWidenFrom32( mce, equivIntTy, t3 );
7376          stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7377                                                puti->bias, t4) ));
7378          break;
7379       }
7380 
7381       case Ist_Dirty:
7382          do_origins_Dirty( mce, st->Ist.Dirty.details );
7383          break;
7384 
7385       case Ist_Store:
7386          do_origins_Store_plain( mce, st->Ist.Store.end,
7387                                       st->Ist.Store.addr,
7388                                       st->Ist.Store.data );
7389          break;
7390 
7391       case Ist_StoreG:
7392          do_origins_StoreG( mce, st->Ist.StoreG.details );
7393          break;
7394 
7395       case Ist_LoadG:
7396          do_origins_LoadG( mce, st->Ist.LoadG.details );
7397          break;
7398 
7399       case Ist_LLSC: {
7400          /* In short: treat a load-linked like a normal load followed
7401             by an assignment of the loaded (shadow) data the result
7402             temporary.  Treat a store-conditional like a normal store,
7403             and mark the result temporary as defined. */
7404          if (st->Ist.LLSC.storedata == NULL) {
7405             /* Load Linked */
7406             IRType resTy
7407                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7408             IRExpr* vanillaLoad
7409                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7410             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7411                       || resTy == Ity_I16 || resTy == Ity_I8);
7412             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7413                               schemeE(mce, vanillaLoad));
7414          } else {
7415             /* Store conditional */
7416             do_origins_Store_plain( mce, st->Ist.LLSC.end,
7417                                     st->Ist.LLSC.addr,
7418                                     st->Ist.LLSC.storedata );
7419             /* For the rationale behind this, see comments at the
7420                place where the V-shadow for .result is constructed, in
7421                do_shadow_LLSC.  In short, we regard .result as
7422                always-defined. */
7423             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7424                               mkU32(0) );
7425          }
7426          break;
7427       }
7428 
7429       case Ist_Put: {
7430          Int b_offset
7431             = MC_(get_otrack_shadow_offset)(
7432                  st->Ist.Put.offset,
7433                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7434               );
7435          if (b_offset >= 0) {
7436             /* FIXME: this isn't an atom! */
7437             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7438                                        schemeE( mce, st->Ist.Put.data )) );
7439          }
7440          break;
7441       }
7442 
7443       case Ist_WrTmp:
7444          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7445                            schemeE(mce, st->Ist.WrTmp.data) );
7446          break;
7447 
7448       case Ist_MBE:
7449       case Ist_NoOp:
7450       case Ist_Exit:
7451       case Ist_IMark:
7452          break;
7453 
7454       default:
7455          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7456          ppIRStmt(st);
7457          VG_(tool_panic)("memcheck:schemeS");
7458    }
7459 }
7460 
7461 
7462 /*--------------------------------------------------------------------*/
7463 /*--- end                                           mc_translate.c ---*/
7464 /*--------------------------------------------------------------------*/
7465