1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*---------------------------------------------------------------*/
4 /*--- begin                                          ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2004-2015 OpenWorks LLP
12       info@open-works.net
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27    02110-1301, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 
31    Neither the names of the U.S. Department of Energy nor the
32    University of California nor the names of its contributors may be
33    used to endorse or promote products derived from this software
34    without prior written permission.
35 */
36 
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
40 
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "ir_opt.h"
44 
45 
46 /* Set to 1 for lots of debugging output. */
47 #define DEBUG_IROPT 0
48 
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50 #define STATS_IROPT 0
51 
52 
53 /* What iropt does, 29 Dec 04.
54 
55    It takes an IRSB and produces a new one with the same meaning,
56    defined thus:
57 
58    After execution of the new BB, all guest state and guest memory is
59    the same as after execution of the original.  This is true
60    regardless of how the block was exited (at the end vs side exit).
61 
62    In addition, parts of the guest state will be identical to that
63    created by execution of the original at the following observation
64    points:
65 
66    * In a dirty helper call, any parts of the guest state that the
67      helper states that it reads or modifies will be up to date.
68      Also, guest memory will be up to date.  Parts of the guest state
69      not marked as being read or modified by the helper cannot be
70      assumed to be up-to-date at the point where the helper is called.
71 
72    * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73      The guest state is only up to date only as explained above
74      (i.e. at SB exits and as specified by dirty helper call).
75      Also, the stack pointer register is up to date at memory
76      exception points (as this is needed for the stack extension
77      logic in m_signals.c).
78 
79    * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80      Immediately prior to any load or store, those parts of the guest
81      state marked as requiring precise exceptions will be up to date.
82      Also, guest memory will be up to date.  Parts of the guest state
83      not marked as requiring precise exceptions cannot be assumed to
84      be up-to-date at the point of the load/store.
85 
86    * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87      Same as minimal, but all the guest state is up to date at memory
88      exception points.
89 
90    * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91      Guest state is up to date at each instruction.
92 
93    The relative order of loads and stores (including loads/stores of
94    guest memory done by dirty helpers annotated as such) is not
95    changed.  However, the relative order of loads with no intervening
96    stores/modifies may be changed.
97 
98    Transformation order
99    ~~~~~~~~~~~~~~~~~~~~
100 
101    There are three levels of optimisation, controlled by
102    vex_control.iropt_level.  Define first:
103 
104    "Cheap transformations" are the following sequence:
105       * Redundant-Get removal
106       * Redundant-Put removal
107       * Constant propagation/folding
108       * Dead code removal
109       * Specialisation of clean helper functions
110       * Dead code removal
111 
112    "Expensive transformations" are the following sequence:
113       * CSE
114       * Folding of add/sub chains
115       * Redundant-GetI removal
116       * Redundant-PutI removal
117       * Dead code removal
118 
119    Then the transformations are as follows, as defined by
120    vex_control.iropt_level:
121 
122    Level 0:
123       * Flatten into atomic form.
124 
125    Level 1: the following sequence:
126       * Flatten into atomic form.
127       * Cheap transformations.
128 
129    Level 2: the following sequence
130       * Flatten into atomic form.
131       * Cheap transformations.
132       * If block contains any floating or vector types, CSE.
133       * If block contains GetI or PutI, Expensive transformations.
134       * Try unrolling loops.  Three possible outcomes:
135         - No effect: do nothing more.
136         - Unrolled a loop, and block does not contain GetI or PutI:
137           Do: * CSE
138               * Dead code removal
139         - Unrolled a loop, and block contains GetI or PutI:
140           Do: * Expensive transformations
141               * Cheap transformations
142 */
143 
144 /* Implementation notes, 29 Dec 04.
145 
146    TODO (important): I think rPutI removal ignores precise exceptions
147    and is therefore in a sense, wrong.  In the sense that PutIs are
148    assumed not to write parts of the guest state that we need to have
149    up-to-date at loads/stores.  So far on x86 guest that has not
150    mattered since indeed only the x87 FP registers and tags are
151    accessed using GetI/PutI, and there is no need so far for them to
152    be up to date at mem exception points.  The rPutI pass should be
153    fixed.
154 
155    TODO: improve pessimistic handling of precise exceptions
156      in the tree builder.
157 
158    TODO: check interaction of rGetI and dirty helpers.
159 
160    F64i constants are treated differently from other constants.
161    They are not regarded as atoms, and instead lifted off and
162    bound to temps.  This allows them to participate in CSE, which
163    is important for getting good performance for x86 guest code.
164 
165    CSE up F64 literals (already doing F64is)
166 
167    CSE: consider carefully the requirement for precise exns
168         prior to making CSE any more aggressive.  */
169 
170 
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort                               ---*/
173 /*---------------------------------------------------------------*/
174 
175 /* General map from HWord-sized thing HWord-sized thing.  Could be by
176    hashing, but it's not clear whether or not this would really be any
177    faster. */
178 
179 typedef
180    struct {
181       Bool*  inuse;
182       HWord* key;
183       HWord* val;
184       Int    size;
185       Int    used;
186    }
187    HashHW;
188 
newHHW(void)189 static HashHW* newHHW ( void )
190 {
191    HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192    h->size   = 8;
193    h->used   = 0;
194    h->inuse  = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195    h->key    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196    h->val    = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197    return h;
198 }
199 
200 
201 /* Look up key in the map. */
202 
lookupHHW(HashHW * h,HWord * val,HWord key)203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204 {
205    Int i;
206    /* vex_printf("lookupHHW(%llx)\n", key ); */
207    for (i = 0; i < h->used; i++) {
208       if (h->inuse[i] && h->key[i] == key) {
209          if (val)
210             *val = h->val[i];
211          return True;
212       }
213    }
214    return False;
215 }
216 
217 
218 /* Add key->val to the map.  Replaces any existing binding for key. */
219 
addToHHW(HashHW * h,HWord key,HWord val)220 static void addToHHW ( HashHW* h, HWord key, HWord val )
221 {
222    Int i, j;
223    /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224 
225    /* Find and replace existing binding, if any. */
226    for (i = 0; i < h->used; i++) {
227       if (h->inuse[i] && h->key[i] == key) {
228          h->val[i] = val;
229          return;
230       }
231    }
232 
233    /* Ensure a space is available. */
234    if (h->used == h->size) {
235       /* Copy into arrays twice the size. */
236       Bool*  inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237       HWord* key2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238       HWord* val2   = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239       for (i = j = 0; i < h->size; i++) {
240          if (!h->inuse[i]) continue;
241          inuse2[j] = True;
242          key2[j] = h->key[i];
243          val2[j] = h->val[i];
244          j++;
245       }
246       h->used = j;
247       h->size *= 2;
248       h->inuse = inuse2;
249       h->key = key2;
250       h->val = val2;
251    }
252 
253    /* Finally, add it. */
254    vassert(h->used < h->size);
255    h->inuse[h->used] = True;
256    h->key[h->used] = key;
257    h->val[h->used] = val;
258    h->used++;
259 }
260 
261 
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form                ---*/
264 /*---------------------------------------------------------------*/
265 
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267    copies made by flattening.  If in doubt return False. */
268 
isFlat(IRExpr * e)269 static Bool isFlat ( IRExpr* e )
270 {
271    if (e->tag == Iex_Get)
272       return True;
273    if (e->tag == Iex_Binop)
274       return toBool( isIRAtom(e->Iex.Binop.arg1)
275                      && isIRAtom(e->Iex.Binop.arg2) );
276    if (e->tag == Iex_Load)
277       return isIRAtom(e->Iex.Load.addr);
278    return False;
279 }
280 
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282    the same value, after having appended extra IRTemp assignments to
283    the end of 'bb'. */
284 
flatten_Expr(IRSB * bb,IRExpr * ex)285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286 {
287    Int i;
288    IRExpr** newargs;
289    IRType ty = typeOfIRExpr(bb->tyenv, ex);
290    IRTemp t1;
291 
292    switch (ex->tag) {
293 
294       case Iex_GetI:
295          t1 = newIRTemp(bb->tyenv, ty);
296          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297             IRExpr_GetI(ex->Iex.GetI.descr,
298                         flatten_Expr(bb, ex->Iex.GetI.ix),
299                         ex->Iex.GetI.bias)));
300          return IRExpr_RdTmp(t1);
301 
302       case Iex_Get:
303          t1 = newIRTemp(bb->tyenv, ty);
304          addStmtToIRSB(bb,
305             IRStmt_WrTmp(t1, ex));
306          return IRExpr_RdTmp(t1);
307 
308       case Iex_Qop: {
309          IRQop* qop = ex->Iex.Qop.details;
310          t1 = newIRTemp(bb->tyenv, ty);
311          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312             IRExpr_Qop(qop->op,
313                          flatten_Expr(bb, qop->arg1),
314                          flatten_Expr(bb, qop->arg2),
315                          flatten_Expr(bb, qop->arg3),
316                          flatten_Expr(bb, qop->arg4))));
317          return IRExpr_RdTmp(t1);
318       }
319 
320       case Iex_Triop: {
321          IRTriop* triop = ex->Iex.Triop.details;
322          t1 = newIRTemp(bb->tyenv, ty);
323          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324             IRExpr_Triop(triop->op,
325                          flatten_Expr(bb, triop->arg1),
326                          flatten_Expr(bb, triop->arg2),
327                          flatten_Expr(bb, triop->arg3))));
328          return IRExpr_RdTmp(t1);
329       }
330 
331       case Iex_Binop:
332          t1 = newIRTemp(bb->tyenv, ty);
333          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334             IRExpr_Binop(ex->Iex.Binop.op,
335                          flatten_Expr(bb, ex->Iex.Binop.arg1),
336                          flatten_Expr(bb, ex->Iex.Binop.arg2))));
337          return IRExpr_RdTmp(t1);
338 
339       case Iex_Unop:
340          t1 = newIRTemp(bb->tyenv, ty);
341          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342             IRExpr_Unop(ex->Iex.Unop.op,
343                         flatten_Expr(bb, ex->Iex.Unop.arg))));
344          return IRExpr_RdTmp(t1);
345 
346       case Iex_Load:
347          t1 = newIRTemp(bb->tyenv, ty);
348          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349             IRExpr_Load(ex->Iex.Load.end,
350                         ex->Iex.Load.ty,
351                         flatten_Expr(bb, ex->Iex.Load.addr))));
352          return IRExpr_RdTmp(t1);
353 
354       case Iex_CCall:
355          newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356          for (i = 0; newargs[i]; i++)
357             newargs[i] = flatten_Expr(bb, newargs[i]);
358          t1 = newIRTemp(bb->tyenv, ty);
359          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360             IRExpr_CCall(ex->Iex.CCall.cee,
361                          ex->Iex.CCall.retty,
362                          newargs)));
363          return IRExpr_RdTmp(t1);
364 
365       case Iex_ITE:
366          t1 = newIRTemp(bb->tyenv, ty);
367          addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368             IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369                        flatten_Expr(bb, ex->Iex.ITE.iftrue),
370                        flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371          return IRExpr_RdTmp(t1);
372 
373       case Iex_Const:
374          /* Lift F64i constants out onto temps so they can be CSEd
375             later. */
376          if (ex->Iex.Const.con->tag == Ico_F64i) {
377             t1 = newIRTemp(bb->tyenv, ty);
378             addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379                IRExpr_Const(ex->Iex.Const.con)));
380             return IRExpr_RdTmp(t1);
381          } else {
382             /* Leave all other constants alone. */
383             return ex;
384          }
385 
386       case Iex_RdTmp:
387          return ex;
388 
389       default:
390          vex_printf("\n");
391          ppIRExpr(ex);
392          vex_printf("\n");
393          vpanic("flatten_Expr");
394    }
395 }
396 
397 
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
399 
flatten_Stmt(IRSB * bb,IRStmt * st)400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401 {
402    Int i;
403    IRExpr   *e1, *e2, *e3, *e4, *e5;
404    IRDirty  *d,  *d2;
405    IRCAS    *cas, *cas2;
406    IRPutI   *puti, *puti2;
407    IRLoadG  *lg;
408    IRStoreG *sg;
409    switch (st->tag) {
410       case Ist_Put:
411          if (isIRAtom(st->Ist.Put.data)) {
412             /* optimisation to reduce the amount of heap wasted
413                by the flattener */
414             addStmtToIRSB(bb, st);
415          } else {
416             /* general case, always correct */
417             e1 = flatten_Expr(bb, st->Ist.Put.data);
418             addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419          }
420          break;
421       case Ist_PutI:
422          puti = st->Ist.PutI.details;
423          e1 = flatten_Expr(bb, puti->ix);
424          e2 = flatten_Expr(bb, puti->data);
425          puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426          addStmtToIRSB(bb, IRStmt_PutI(puti2));
427          break;
428       case Ist_WrTmp:
429          if (isFlat(st->Ist.WrTmp.data)) {
430             /* optimisation, to reduce the number of tmp-tmp
431                copies generated */
432             addStmtToIRSB(bb, st);
433          } else {
434             /* general case, always correct */
435             e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436             addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437          }
438          break;
439       case Ist_Store:
440          e1 = flatten_Expr(bb, st->Ist.Store.addr);
441          e2 = flatten_Expr(bb, st->Ist.Store.data);
442          addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443          break;
444       case Ist_StoreG:
445          sg = st->Ist.StoreG.details;
446          e1 = flatten_Expr(bb, sg->addr);
447          e2 = flatten_Expr(bb, sg->data);
448          e3 = flatten_Expr(bb, sg->guard);
449          addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450          break;
451       case Ist_LoadG:
452          lg = st->Ist.LoadG.details;
453          e1 = flatten_Expr(bb, lg->addr);
454          e2 = flatten_Expr(bb, lg->alt);
455          e3 = flatten_Expr(bb, lg->guard);
456          addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457                                         e1, e2, e3));
458          break;
459       case Ist_CAS:
460          cas  = st->Ist.CAS.details;
461          e1   = flatten_Expr(bb, cas->addr);
462          e2   = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463          e3   = flatten_Expr(bb, cas->expdLo);
464          e4   = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465          e5   = flatten_Expr(bb, cas->dataLo);
466          cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467                          e1, e2, e3, e4, e5 );
468          addStmtToIRSB(bb, IRStmt_CAS(cas2));
469          break;
470       case Ist_LLSC:
471          e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472          e2 = st->Ist.LLSC.storedata
473                  ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474                  : NULL;
475          addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476                                        st->Ist.LLSC.result, e1, e2));
477          break;
478       case Ist_Dirty:
479          d = st->Ist.Dirty.details;
480          d2 = emptyIRDirty();
481          *d2 = *d;
482          d2->args = shallowCopyIRExprVec(d2->args);
483          if (d2->mFx != Ifx_None) {
484             d2->mAddr = flatten_Expr(bb, d2->mAddr);
485          } else {
486             vassert(d2->mAddr == NULL);
487          }
488          d2->guard = flatten_Expr(bb, d2->guard);
489          for (i = 0; d2->args[i]; i++) {
490             IRExpr* arg = d2->args[i];
491             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
492                d2->args[i] = flatten_Expr(bb, arg);
493          }
494          addStmtToIRSB(bb, IRStmt_Dirty(d2));
495          break;
496       case Ist_NoOp:
497       case Ist_MBE:
498       case Ist_IMark:
499          addStmtToIRSB(bb, st);
500          break;
501       case Ist_AbiHint:
502          e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503          e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504          addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505          break;
506       case Ist_Exit:
507          e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508          addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509                                        st->Ist.Exit.dst,
510                                        st->Ist.Exit.offsIP));
511          break;
512       default:
513          vex_printf("\n");
514          ppIRStmt(st);
515          vex_printf("\n");
516          vpanic("flatten_Stmt");
517    }
518 }
519 
520 
flatten_BB(IRSB * in)521 static IRSB* flatten_BB ( IRSB* in )
522 {
523    Int   i;
524    IRSB* out;
525    out = emptyIRSB();
526    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527    for (i = 0; i < in->stmts_used; i++)
528       if (in->stmts[i])
529          flatten_Stmt( out, in->stmts[i] );
530    out->next     = flatten_Expr( out, in->next );
531    out->jumpkind = in->jumpkind;
532    out->offsIP   = in->offsIP;
533    return out;
534 }
535 
536 
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs                      ---*/
539 /*---------------------------------------------------------------*/
540 
541 /* Scan forwards, building up an environment binding (min offset, max
542    offset) pairs to values, which will either be temps or constants.
543 
544    On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545    env and if it matches, replace the Get with the stored value.  If
546    there is no match, add a (minoff,maxoff) :-> t binding.
547 
548    On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549    any binding which fully or partially overlaps with (minoff,maxoff).
550    Then add a new (minoff,maxoff) :-> t or c binding.  */
551 
552 /* Extract the min/max offsets from a guest state array descriptor. */
553 
554 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)555 static void getArrayBounds ( IRRegArray* descr,
556                              UInt* minoff, UInt* maxoff )
557 {
558    *minoff = descr->base;
559    *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560    vassert((*minoff & ~0xFFFF) == 0);
561    vassert((*maxoff & ~0xFFFF) == 0);
562    vassert(*minoff <= *maxoff);
563 }
564 
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
566 
mk_key_GetPut(Int offset,IRType ty)567 static UInt mk_key_GetPut ( Int offset, IRType ty )
568 {
569    /* offset should fit in 16 bits. */
570    UInt minoff = offset;
571    UInt maxoff = minoff + sizeofIRType(ty) - 1;
572    vassert((minoff & ~0xFFFF) == 0);
573    vassert((maxoff & ~0xFFFF) == 0);
574    return (minoff << 16) | maxoff;
575 }
576 
mk_key_GetIPutI(IRRegArray * descr)577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
578 {
579    UInt minoff, maxoff;
580    getArrayBounds( descr, &minoff, &maxoff );
581    vassert((minoff & ~0xFFFF) == 0);
582    vassert((maxoff & ~0xFFFF) == 0);
583    return (minoff << 16) | maxoff;
584 }
585 
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587    mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588    .. k_hi).
589 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591 {
592    Int  j;
593    UInt e_lo, e_hi;
594    vassert(k_lo <= k_hi);
595    /* invalidate any env entries which in any way overlap (k_lo
596       .. k_hi) */
597    /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598 
599    for (j = 0; j < h->used; j++) {
600       if (!h->inuse[j])
601          continue;
602       e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603       e_hi = ((UInt)h->key[j]) & 0xFFFF;
604       vassert(e_lo <= e_hi);
605       if (e_hi < k_lo || k_hi < e_lo)
606          continue; /* no overlap possible */
607       else
608          /* overlap; invalidate */
609          h->inuse[j] = False;
610    }
611 }
612 
613 
redundant_get_removal_BB(IRSB * bb)614 static void redundant_get_removal_BB ( IRSB* bb )
615 {
616    HashHW* env = newHHW();
617    UInt    key = 0; /* keep gcc -O happy */
618    Int     i, j;
619    HWord   val;
620 
621    for (i = 0; i < bb->stmts_used; i++) {
622       IRStmt* st = bb->stmts[i];
623 
624       if (st->tag == Ist_NoOp)
625          continue;
626 
627       /* Deal with Gets */
628       if (st->tag == Ist_WrTmp
629           && st->Ist.WrTmp.data->tag == Iex_Get) {
630          /* st is 't = Get(...)'.  Look up in the environment and see
631             if the Get can be replaced. */
632          IRExpr* get = st->Ist.WrTmp.data;
633          key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634                                      get->Iex.Get.ty );
635          if (lookupHHW(env, &val, (HWord)key)) {
636             /* found it */
637             /* Note, we could do better here.  If the types are
638                different we don't do the substitution, since doing so
639                could lead to invalidly-typed IR.  An improvement would
640                be to stick in a reinterpret-style cast, although that
641                would make maintaining flatness more difficult. */
642             IRExpr* valE    = (IRExpr*)val;
643             Bool    typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644                                       == st->Ist.WrTmp.data->Iex.Get.ty );
645             if (typesOK && DEBUG_IROPT) {
646                vex_printf("rGET: "); ppIRExpr(get);
647                vex_printf("  ->  "); ppIRExpr(valE);
648                vex_printf("\n");
649             }
650             if (typesOK)
651                bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652          } else {
653             /* Not found, but at least we know that t and the Get(...)
654                are now associated.  So add a binding to reflect that
655                fact. */
656             addToHHW( env, (HWord)key,
657                            (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658          }
659       }
660 
661       /* Deal with Puts: invalidate any env entries overlapped by this
662          Put */
663       if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664          UInt k_lo, k_hi;
665          if (st->tag == Ist_Put) {
666             key = mk_key_GetPut( st->Ist.Put.offset,
667                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668          } else {
669             vassert(st->tag == Ist_PutI);
670             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671          }
672 
673          k_lo = (key >> 16) & 0xFFFF;
674          k_hi = key & 0xFFFF;
675          invalidateOverlaps(env, k_lo, k_hi);
676       }
677       else
678       if (st->tag == Ist_Dirty) {
679          /* Deal with dirty helpers which write or modify guest state.
680             Invalidate the entire env.  We could do a lot better
681             here. */
682          IRDirty* d      = st->Ist.Dirty.details;
683          Bool     writes = False;
684          for (j = 0; j < d->nFxState; j++) {
685             if (d->fxState[j].fx == Ifx_Modify
686                 || d->fxState[j].fx == Ifx_Write)
687             writes = True;
688          }
689          if (writes) {
690             /* dump the entire env (not clever, but correct ...) */
691             for (j = 0; j < env->used; j++)
692                env->inuse[j] = False;
693             if (0) vex_printf("rGET: trash env due to dirty helper\n");
694          }
695       }
696 
697       /* add this one to the env, if appropriate */
698       if (st->tag == Ist_Put) {
699          vassert(isIRAtom(st->Ist.Put.data));
700          addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701       }
702 
703    } /* for (i = 0; i < bb->stmts_used; i++) */
704 
705 }
706 
707 
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs                      ---*/
710 /*---------------------------------------------------------------*/
711 
712 /* Find any Get uses in st and invalidate any partially or fully
713    overlapping ranges listed in env.  Due to the flattening phase, the
714    only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715 
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)716 static void handle_gets_Stmt (
717                HashHW* env,
718                IRStmt* st,
719                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720                VexRegisterUpdates pxControl
721             )
722 {
723    Int     j;
724    UInt    key = 0; /* keep gcc -O happy */
725    Bool    isGet;
726    Bool    memRW = False;
727    IRExpr* e;
728 
729    switch (st->tag) {
730 
731       /* This is the only interesting case.  Deal with Gets in the RHS
732          expression. */
733       case Ist_WrTmp:
734          e = st->Ist.WrTmp.data;
735          switch (e->tag) {
736             case Iex_Get:
737                isGet = True;
738                key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739                break;
740             case Iex_GetI:
741                isGet = True;
742                key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743                break;
744             case Iex_Load:
745                isGet = False;
746                memRW = True;
747                break;
748             default:
749                isGet = False;
750          }
751          if (isGet) {
752             UInt k_lo, k_hi;
753             k_lo = (key >> 16) & 0xFFFF;
754             k_hi = key & 0xFFFF;
755             invalidateOverlaps(env, k_lo, k_hi);
756          }
757          break;
758 
759       /* Be very conservative for dirty helper calls; dump the entire
760          environment.  The helper might read guest state, in which
761          case it needs to be flushed first.  Also, the helper might
762          access guest memory, in which case all parts of the guest
763          state requiring precise exceptions needs to be flushed.  The
764          crude solution is just to flush everything; we could easily
765          enough do a lot better if needed. */
766       /* Probably also overly-conservative, but also dump everything
767          if we hit a memory bus event (fence, lock, unlock).  Ditto
768          AbiHints, CASs, LLs and SCs. */
769       case Ist_AbiHint:
770          vassert(isIRAtom(st->Ist.AbiHint.base));
771          vassert(isIRAtom(st->Ist.AbiHint.nia));
772          /* fall through */
773       case Ist_MBE:
774       case Ist_Dirty:
775       case Ist_CAS:
776       case Ist_LLSC:
777          for (j = 0; j < env->used; j++)
778             env->inuse[j] = False;
779          break;
780 
781       /* all other cases are boring. */
782       case Ist_Store:
783          vassert(isIRAtom(st->Ist.Store.addr));
784          vassert(isIRAtom(st->Ist.Store.data));
785          memRW = True;
786          break;
787       case Ist_StoreG: {
788          IRStoreG* sg = st->Ist.StoreG.details;
789          vassert(isIRAtom(sg->addr));
790          vassert(isIRAtom(sg->data));
791          vassert(isIRAtom(sg->guard));
792          memRW = True;
793          break;
794       }
795       case Ist_LoadG: {
796          IRLoadG* lg = st->Ist.LoadG.details;
797          vassert(isIRAtom(lg->addr));
798          vassert(isIRAtom(lg->alt));
799          vassert(isIRAtom(lg->guard));
800          memRW = True;
801          break;
802       }
803       case Ist_Exit:
804          vassert(isIRAtom(st->Ist.Exit.guard));
805          break;
806 
807       case Ist_Put:
808          vassert(isIRAtom(st->Ist.Put.data));
809          break;
810 
811       case Ist_PutI:
812          vassert(isIRAtom(st->Ist.PutI.details->ix));
813          vassert(isIRAtom(st->Ist.PutI.details->data));
814          break;
815 
816       case Ist_NoOp:
817       case Ist_IMark:
818          break;
819 
820       default:
821          vex_printf("\n");
822          ppIRStmt(st);
823          vex_printf("\n");
824          vpanic("handle_gets_Stmt");
825    }
826 
827    if (memRW) {
828       /* This statement accesses memory.  So we might need to dump all parts
829          of the environment corresponding to guest state that may not
830          be reordered with respect to memory references.  That means
831          at least the stack pointer. */
832       switch (pxControl) {
833          case VexRegUpdAllregsAtMemAccess:
834             /* Precise exceptions required at mem access.
835                Flush all guest state. */
836             for (j = 0; j < env->used; j++)
837                env->inuse[j] = False;
838             break;
839          case VexRegUpdSpAtMemAccess:
840             /* We need to dump the stack pointer
841                (needed for stack extension in m_signals.c).
842                preciseMemExnsFn will use vex_control.iropt_register_updates
843                to verify only the sp is to be checked. */
844             /* fallthrough */
845          case VexRegUpdUnwindregsAtMemAccess:
846             for (j = 0; j < env->used; j++) {
847                if (!env->inuse[j])
848                   continue;
849                /* Just flush the minimal amount required, as computed by
850                   preciseMemExnsFn. */
851                HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852                HWord k_hi = env->key[j] & 0xFFFF;
853                if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854                   env->inuse[j] = False;
855             }
856             break;
857          case VexRegUpdAllregsAtEachInsn:
858             // VexRegUpdAllregsAtEachInsn cannot happen here.
859             // fall through
860          case VexRegUpd_INVALID:
861          default:
862             vassert(0);
863       }
864    } /* if (memRW) */
865 
866 }
867 
868 
869 /* Scan backwards, building up a set of (min offset, max
870    offset) pairs, indicating those parts of the guest state
871    for which the next event is a write.
872 
873    On seeing a conditional exit, empty the set.
874 
875    On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876    completely within the set, remove the Put.  Otherwise, add
877    (minoff,maxoff) to the set.
878 
879    On seeing 'Get (minoff,maxoff)', remove any part of the set
880    overlapping (minoff,maxoff).  The same has to happen for any events
881    which implicitly read parts of the guest state: dirty helper calls
882    and loads/stores.
883 */
884 
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)885 static void redundant_put_removal_BB (
886                IRSB* bb,
887                Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888                VexRegisterUpdates pxControl
889             )
890 {
891    Int     i, j;
892    Bool    isPut;
893    IRStmt* st;
894    UInt    key = 0; /* keep gcc -O happy */
895 
896    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897 
898    HashHW* env = newHHW();
899 
900    /* Initialise the running env with the fact that the final exit
901       writes the IP (or, whatever it claims to write.  We don't
902       care.) */
903    key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904    addToHHW(env, (HWord)key, 0);
905 
906    /* And now scan backwards through the statements. */
907    for (i = bb->stmts_used-1; i >= 0; i--) {
908       st = bb->stmts[i];
909 
910       if (st->tag == Ist_NoOp)
911          continue;
912 
913       /* Deal with conditional exits. */
914       if (st->tag == Ist_Exit) {
915          //Bool re_add;
916          /* Need to throw out from the env, any part of it which
917             doesn't overlap with the guest state written by this exit.
918             Since the exit only writes one section, it's simplest to
919             do this: (1) check whether env contains a write that
920             completely overlaps the write done by this exit; (2) empty
921             out env; and (3) if (1) was true, add the write done by
922             this exit.
923 
924             To make (1) a bit simpler, merely search for a write that
925             exactly matches the one done by this exit.  That's safe
926             because it will fail as often or more often than a full
927             overlap check, and failure to find an overlapping write in
928             env is the safe case (we just nuke env if that
929             happens). */
930          //vassert(isIRAtom(st->Ist.Exit.guard));
931          /* (1) */
932          //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933          //                    typeOfIRConst(st->Ist.Exit.dst));
934          //re_add = lookupHHW(env, NULL, key);
935          /* (2) */
936          for (j = 0; j < env->used; j++)
937             env->inuse[j] = False;
938          /* (3) */
939          //if (0 && re_add)
940          //   addToHHW(env, (HWord)key, 0);
941          continue;
942       }
943 
944       /* Deal with Puts */
945       switch (st->tag) {
946          case Ist_Put:
947             isPut = True;
948             key = mk_key_GetPut( st->Ist.Put.offset,
949                                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950             vassert(isIRAtom(st->Ist.Put.data));
951             break;
952          case Ist_PutI:
953             isPut = True;
954             key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955             vassert(isIRAtom(st->Ist.PutI.details->ix));
956             vassert(isIRAtom(st->Ist.PutI.details->data));
957             break;
958          default:
959             isPut = False;
960       }
961       if (isPut && st->tag != Ist_PutI) {
962          /* See if any single entry in env overlaps this Put.  This is
963             simplistic in that the transformation is valid if, say, two
964             or more entries in the env overlap this Put, but the use of
965             lookupHHW will only find a single entry which exactly
966             overlaps this Put.  This is suboptimal but safe. */
967          if (lookupHHW(env, NULL, (HWord)key)) {
968             /* This Put is redundant because a later one will overwrite
969                it.  So NULL (nop) it out. */
970             if (DEBUG_IROPT) {
971                vex_printf("rPUT: "); ppIRStmt(st);
972                vex_printf("\n");
973             }
974             bb->stmts[i] = IRStmt_NoOp();
975          } else {
976             /* We can't demonstrate that this Put is redundant, so add it
977                to the running collection. */
978             addToHHW(env, (HWord)key, 0);
979          }
980          continue;
981       }
982 
983       /* Deal with Gets.  These remove bits of the environment since
984          appearance of a Get means that the next event for that slice
985          of the guest state is no longer a write, but a read.  Also
986          deals with implicit reads of guest state needed to maintain
987          precise exceptions. */
988       handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989    }
990 }
991 
992 
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding                        ---*/
995 /*---------------------------------------------------------------*/
996 
997 #if STATS_IROPT
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count;
1004 /* How often recursing through assignments to IRTemps helped
1005    establishing equality. */
1006 static UInt recursion_success_count;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008    establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011    IRTemp assignment */
1012 static Bool recursed;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited;
1015 #endif /* STATS_IROPT */
1016 
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited;
1019 
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021    This is to guard against performance degradation by visiting large
1022    trees without success. */
1023 #define NODE_LIMIT 30
1024 
1025 
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027    that is, an array indexed by IRTemp. */
1028 
1029 /* Do both expressions compute the same value? The answer is generally
1030    conservative, i.e. it will report that the expressions do not compute
1031    the same value when in fact they do. The reason is that we do not
1032    keep track of changes in the guest state and memory. Thusly, two
1033    Get's, GetI's or Load's, even when accessing the same location, will be
1034    assumed to compute different values. After all the accesses may happen
1035    at different times and the guest state / memory can have changed in
1036    the meantime.
1037 
1038    XXX IMPORTANT XXX the two expressions must have the same IR type.
1039    DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040 
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042    wrapper that deals with the common tags-don't-match case, and a
1043    slower out of line general case.  Saves a few insns. */
1044 
1045 __attribute__((noinline))
1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047 
1048 inline
sameIRExprs_aux(IRExpr ** env,IRExpr * e1,IRExpr * e2)1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050 {
1051    if (e1->tag != e2->tag) return False;
1052    return sameIRExprs_aux2(env, e1, e2);
1053 }
1054 
1055 __attribute__((noinline))
sameIRExprs_aux2(IRExpr ** env,IRExpr * e1,IRExpr * e2)1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057 {
1058    if (num_nodes_visited++ > NODE_LIMIT) return False;
1059 
1060    switch (e1->tag) {
1061       case Iex_RdTmp:
1062          if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063 
1064          if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065             Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066                                         env[e2->Iex.RdTmp.tmp]);
1067 #if STATS_IROPT
1068             recursed = True;
1069             if (same) recursion_helped = True;
1070 #endif
1071             return same;
1072          }
1073          return False;
1074 
1075       case Iex_Get:
1076       case Iex_GetI:
1077       case Iex_Load:
1078          /* Guest state / memory could have changed in the meantime. */
1079          return False;
1080 
1081       case Iex_Binop:
1082          return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083                         && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084                                                  e2->Iex.Binop.arg1 )
1085                         && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086                                                  e2->Iex.Binop.arg2 ));
1087 
1088       case Iex_Unop:
1089          return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090                         && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091                                                  e2->Iex.Unop.arg ));
1092 
1093       case Iex_Const: {
1094          IRConst *c1 = e1->Iex.Const.con;
1095          IRConst *c2 = e2->Iex.Const.con;
1096          vassert(c1->tag == c2->tag);
1097          switch (c1->tag) {
1098             case Ico_U1:   return toBool( c1->Ico.U1  == c2->Ico.U1 );
1099             case Ico_U8:   return toBool( c1->Ico.U8  == c2->Ico.U8 );
1100             case Ico_U16:  return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101             case Ico_U32:  return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102             case Ico_U64:  return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103             default: break;
1104          }
1105          return False;
1106       }
1107 
1108       case Iex_Triop: {
1109          IRTriop *tri1 = e1->Iex.Triop.details;
1110          IRTriop *tri2 = e2->Iex.Triop.details;
1111          return toBool( tri1->op == tri2->op
1112                         && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113                         && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114                         && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115       }
1116 
1117       case Iex_ITE:
1118          return toBool(    sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119                                                  e2->Iex.ITE.cond )
1120                         && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121                                                  e2->Iex.ITE.iftrue )
1122                         && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123                                                  e2->Iex.ITE.iffalse ));
1124 
1125       default:
1126          /* Not very likely to be "same". */
1127          break;
1128    }
1129 
1130    return False;
1131 }
1132 
1133 inline
sameIRExprs(IRExpr ** env,IRExpr * e1,IRExpr * e2)1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135 {
1136    Bool same;
1137 
1138    num_nodes_visited = 0;
1139    same = sameIRExprs_aux(env, e1, e2);
1140 
1141 #if STATS_IROPT
1142    ++invocation_count;
1143    if (recursed) ++recursion_count;
1144    success_count += same;
1145    if (same && recursion_helped)
1146       ++recursion_success_count;
1147    if (num_nodes_visited > max_nodes_visited)
1148       max_nodes_visited = num_nodes_visited;
1149    recursed = False; /* reset */
1150    recursion_helped = False;  /* reset */
1151 #endif /* STATS_IROPT */
1152 
1153    return same;
1154 }
1155 
1156 
1157 /* Debugging-only hack (not used in production runs): make a guess
1158    whether sameIRExprs might assert due to the two args being of
1159    different types.  If in doubt return False.  Is only used when
1160    --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161    Bad because it duplicates functionality from typeOfIRExpr.  See
1162    comment on the single use point below for rationale. */
1163 static
debug_only_hack_sameIRExprs_might_assert(IRExpr * e1,IRExpr * e2)1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165 {
1166    if (e1->tag != e2->tag) return False;
1167    switch (e1->tag) {
1168       case Iex_Const: {
1169          /* The only interesting case */
1170          IRConst *c1 = e1->Iex.Const.con;
1171          IRConst *c2 = e2->Iex.Const.con;
1172          return c1->tag != c2->tag;
1173       }
1174       default:
1175          break;
1176    }
1177    return False;
1178 }
1179 
1180 
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)1182 static Bool isZeroU32 ( IRExpr* e )
1183 {
1184    return toBool( e->tag == Iex_Const
1185                   && e->Iex.Const.con->tag == Ico_U32
1186                   && e->Iex.Const.con->Ico.U32 == 0);
1187 }
1188 
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190    Currently unused; commented out to avoid compiler warning */
1191 #if 0
1192 static Bool isZeroU64 ( IRExpr* e )
1193 {
1194    return toBool( e->tag == Iex_Const
1195                   && e->Iex.Const.con->tag == Ico_U64
1196                   && e->Iex.Const.con->Ico.U64 == 0);
1197 }
1198 #endif
1199 
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
isZeroV128(IRExpr * e)1201 static Bool isZeroV128 ( IRExpr* e )
1202 {
1203    return toBool( e->tag == Iex_Const
1204                   && e->Iex.Const.con->tag == Ico_V128
1205                   && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 }
1207 
1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
isZeroV256(IRExpr * e)1209 static Bool isZeroV256 ( IRExpr* e )
1210 {
1211    return toBool( e->tag == Iex_Const
1212                   && e->Iex.Const.con->tag == Ico_V256
1213                   && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214 }
1215 
1216 /* Is this an integer constant with value 0 ? */
isZeroU(IRExpr * e)1217 static Bool isZeroU ( IRExpr* e )
1218 {
1219    if (e->tag != Iex_Const) return False;
1220    switch (e->Iex.Const.con->tag) {
1221       case Ico_U1:    return toBool( e->Iex.Const.con->Ico.U1  == 0);
1222       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0);
1223       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226       default: vpanic("isZeroU");
1227    }
1228 }
1229 
1230 /* Is this an integer constant with value 1---1b ? */
isOnesU(IRExpr * e)1231 static Bool isOnesU ( IRExpr* e )
1232 {
1233    if (e->tag != Iex_Const) return False;
1234    switch (e->Iex.Const.con->tag) {
1235       case Ico_U8:    return toBool( e->Iex.Const.con->Ico.U8  == 0xFF);
1236       case Ico_U16:   return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1237       case Ico_U32:   return toBool( e->Iex.Const.con->Ico.U32
1238                                      == 0xFFFFFFFF);
1239       case Ico_U64:   return toBool( e->Iex.Const.con->Ico.U64
1240                                      == 0xFFFFFFFFFFFFFFFFULL);
1241       default: ppIRExpr(e); vpanic("isOnesU");
1242    }
1243 }
1244 
notBool(Bool b)1245 static Bool notBool ( Bool b )
1246 {
1247    if (b == True) return False;
1248    if (b == False) return True;
1249    vpanic("notBool");
1250 }
1251 
1252 /* Make a zero which has the same type as the result of the given
1253    primop. */
mkZeroOfPrimopResultType(IROp op)1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1255 {
1256    switch (op) {
1257       case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1258       case Iop_Xor8:  return IRExpr_Const(IRConst_U8(0));
1259       case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1260       case Iop_Sub32:
1261       case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1262       case Iop_And64:
1263       case Iop_Sub64:
1264       case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1265       case Iop_XorV128:
1266       case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1267       case Iop_XorV256:
1268       case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1269       default: vpanic("mkZeroOfPrimopResultType: bad primop");
1270    }
1271 }
1272 
1273 /* Make a value containing all 1-bits, which has the same type as the
1274    result of the given primop. */
mkOnesOfPrimopResultType(IROp op)1275 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1276 {
1277    switch (op) {
1278       case Iop_CmpEQ32:
1279       case Iop_CmpEQ64:
1280          return IRExpr_Const(IRConst_U1(toBool(1)));
1281       case Iop_Or8:
1282          return IRExpr_Const(IRConst_U8(0xFF));
1283       case Iop_Or16:
1284          return IRExpr_Const(IRConst_U16(0xFFFF));
1285       case Iop_Or32:
1286          return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1287       case Iop_CmpEQ8x8:
1288       case Iop_Or64:
1289          return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1290       case Iop_CmpEQ8x16:
1291       case Iop_CmpEQ16x8:
1292       case Iop_CmpEQ32x4:
1293          return IRExpr_Const(IRConst_V128(0xFFFF));
1294       default:
1295          ppIROp(op);
1296          vpanic("mkOnesOfPrimopResultType: bad primop");
1297    }
1298 }
1299 
1300 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)1301 static UInt fold_Clz64 ( ULong value )
1302 {
1303    UInt i;
1304    vassert(value != 0ULL); /* no defined semantics for arg==0 */
1305    for (i = 0; i < 64; ++i) {
1306       if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1307    }
1308    vassert(0);
1309    /*NOTREACHED*/
1310    return 0;
1311 }
1312 
fold_Clz32(UInt value)1313 static UInt fold_Clz32 ( UInt value )
1314 {
1315    UInt i;
1316    vassert(value != 0); /* no defined semantics for arg==0 */
1317    for (i = 0; i < 32; ++i) {
1318       if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1319    }
1320    vassert(0);
1321    /*NOTREACHED*/
1322    return 0;
1323 }
1324 
1325 /* V64 holds 8 summary-constant bits in V128/V256 style.  Convert to
1326    the corresponding real constant. */
1327 //XXX re-check this before use
1328 //static ULong de_summarise_V64 ( UChar v64 )
1329 //{
1330 //   ULong r = 0;
1331 //   if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1332 //   if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1333 //   if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1334 //   if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1335 //   if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1336 //   if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1337 //   if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1338 //   if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1339 //   return r;
1340 //}
1341 
1342 /* Helper for arbitrary expression pattern matching in flat IR.  If
1343    'e' is a reference to a tmp, look it up in env -- repeatedly, if
1344    necessary -- until it resolves to a non-tmp.  Note that this can
1345    return NULL if it can't resolve 'e' to a new expression, which will
1346    be the case if 'e' is instead defined by an IRStmt (IRDirty or
1347    LLSC). */
chase(IRExpr ** env,IRExpr * e)1348 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1349 {
1350    /* Why is this loop guaranteed to terminate?  Because all tmps must
1351       have definitions before use, hence a tmp cannot be bound
1352       (directly or indirectly) to itself. */
1353    while (e->tag == Iex_RdTmp) {
1354       if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1355       e = env[(Int)e->Iex.RdTmp.tmp];
1356       if (e == NULL) break;
1357    }
1358    return e;
1359 }
1360 
1361 /* Similar to |chase|, but follows at most one level of tmp reference. */
chase1(IRExpr ** env,IRExpr * e)1362 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1363 {
1364    if (e == NULL || e->tag != Iex_RdTmp)
1365       return e;
1366    else
1367       return env[(Int)e->Iex.RdTmp.tmp];
1368 }
1369 
fold_Expr(IRExpr ** env,IRExpr * e)1370 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1371 {
1372    Int     shift;
1373    IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1374 
1375    switch (e->tag) {
1376    case Iex_Unop:
1377       /* UNARY ops */
1378       if (e->Iex.Unop.arg->tag == Iex_Const) {
1379          switch (e->Iex.Unop.op) {
1380          case Iop_1Uto8:
1381             e2 = IRExpr_Const(IRConst_U8(toUChar(
1382                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1383                     ? 1 : 0)));
1384             break;
1385          case Iop_1Uto32:
1386             e2 = IRExpr_Const(IRConst_U32(
1387                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1388                     ? 1 : 0));
1389             break;
1390          case Iop_1Uto64:
1391             e2 = IRExpr_Const(IRConst_U64(
1392                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1393                     ? 1 : 0));
1394             break;
1395 
1396          case Iop_1Sto8:
1397             e2 = IRExpr_Const(IRConst_U8(toUChar(
1398                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1399                     ? 0xFF : 0)));
1400             break;
1401          case Iop_1Sto16:
1402             e2 = IRExpr_Const(IRConst_U16(toUShort(
1403                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1404                     ? 0xFFFF : 0)));
1405             break;
1406          case Iop_1Sto32:
1407             e2 = IRExpr_Const(IRConst_U32(
1408                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1409                     ? 0xFFFFFFFF : 0));
1410             break;
1411          case Iop_1Sto64:
1412             e2 = IRExpr_Const(IRConst_U64(
1413                     e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1414                     ? 0xFFFFFFFFFFFFFFFFULL : 0));
1415             break;
1416 
1417          case Iop_8Sto32: {
1418             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1419             u32 <<= 24;
1420             u32 = (Int)u32 >> 24;   /* signed shift */
1421             e2 = IRExpr_Const(IRConst_U32(u32));
1422             break;
1423          }
1424          case Iop_16Sto32: {
1425             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1426             u32 <<= 16;
1427             u32 = (Int)u32 >> 16;   /* signed shift */
1428             e2 = IRExpr_Const(IRConst_U32(u32));
1429             break;
1430          }
1431          case Iop_8Uto64:
1432             e2 = IRExpr_Const(IRConst_U64(
1433                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1434             break;
1435          case Iop_16Uto64:
1436             e2 = IRExpr_Const(IRConst_U64(
1437                     0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1438             break;
1439          case Iop_8Uto32:
1440             e2 = IRExpr_Const(IRConst_U32(
1441                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1442             break;
1443          case Iop_8Sto16: {
1444             UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1445             u16 <<= 8;
1446             u16 = (Short)u16 >> 8;  /* signed shift */
1447             e2 = IRExpr_Const(IRConst_U16(u16));
1448             break;
1449          }
1450          case Iop_8Uto16:
1451             e2 = IRExpr_Const(IRConst_U16(
1452                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1453             break;
1454          case Iop_16Uto32:
1455             e2 = IRExpr_Const(IRConst_U32(
1456                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1457             break;
1458          case Iop_32to16:
1459             e2 = IRExpr_Const(IRConst_U16(toUShort(
1460                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1461             break;
1462          case Iop_32to8:
1463             e2 = IRExpr_Const(IRConst_U8(toUChar(
1464                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1465             break;
1466          case Iop_32to1:
1467             e2 = IRExpr_Const(IRConst_U1(toBool(
1468                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1469                  )));
1470             break;
1471          case Iop_64to1:
1472             e2 = IRExpr_Const(IRConst_U1(toBool(
1473                     1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1474                  )));
1475             break;
1476 
1477          case Iop_NotV128:
1478             e2 = IRExpr_Const(IRConst_V128(
1479                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1480             break;
1481          case Iop_Not64:
1482             e2 = IRExpr_Const(IRConst_U64(
1483                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1484             break;
1485          case Iop_Not32:
1486             e2 = IRExpr_Const(IRConst_U32(
1487                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1488             break;
1489          case Iop_Not16:
1490             e2 = IRExpr_Const(IRConst_U16(toUShort(
1491                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1492             break;
1493          case Iop_Not8:
1494             e2 = IRExpr_Const(IRConst_U8(toUChar(
1495                     ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1496             break;
1497 
1498          case Iop_Not1:
1499             e2 = IRExpr_Const(IRConst_U1(
1500                     notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1501             break;
1502 
1503          case Iop_64to8: {
1504             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1505             w64 &= 0xFFULL;
1506             e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1507             break;
1508          }
1509          case Iop_64to16: {
1510             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1511             w64 &= 0xFFFFULL;
1512             e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1513             break;
1514          }
1515          case Iop_64to32: {
1516             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1517             w64 &= 0x00000000FFFFFFFFULL;
1518             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1519             break;
1520          }
1521          case Iop_64HIto32: {
1522             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1523             w64 >>= 32;
1524             e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1525             break;
1526          }
1527          case Iop_32Uto64:
1528             e2 = IRExpr_Const(IRConst_U64(
1529                     0xFFFFFFFFULL
1530                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1531             break;
1532          case Iop_16Sto64: {
1533             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1534             u64 <<= 48;
1535             u64 = (Long)u64 >> 48;   /* signed shift */
1536             e2 = IRExpr_Const(IRConst_U64(u64));
1537             break;
1538          }
1539          case Iop_32Sto64: {
1540             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1541             u64 <<= 32;
1542             u64 = (Long)u64 >> 32;   /* signed shift */
1543             e2 = IRExpr_Const(IRConst_U64(u64));
1544             break;
1545          }
1546 
1547          case Iop_16to8: {
1548             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1549             w16 &= 0xFF;
1550             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1551             break;
1552          }
1553          case Iop_16HIto8: {
1554             UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1555             w16 >>= 8;
1556             w16 &= 0xFF;
1557             e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1558             break;
1559          }
1560 
1561          case Iop_CmpNEZ8:
1562             e2 = IRExpr_Const(IRConst_U1(toBool(
1563                     0 !=
1564                     (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1565                  )));
1566             break;
1567          case Iop_CmpNEZ32:
1568             e2 = IRExpr_Const(IRConst_U1(toBool(
1569                     0 !=
1570                     (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1571                  )));
1572             break;
1573          case Iop_CmpNEZ64:
1574             e2 = IRExpr_Const(IRConst_U1(toBool(
1575                     0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1576                  )));
1577             break;
1578 
1579          case Iop_CmpwNEZ32: {
1580             UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1581             if (w32 == 0)
1582                e2 = IRExpr_Const(IRConst_U32( 0 ));
1583             else
1584                e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1585             break;
1586          }
1587          case Iop_CmpwNEZ64: {
1588             ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1589             if (w64 == 0)
1590                e2 = IRExpr_Const(IRConst_U64( 0 ));
1591             else
1592                e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1593             break;
1594          }
1595 
1596          case Iop_Left32: {
1597             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1598             Int  s32 = (Int)(u32 & 0xFFFFFFFF);
1599             s32 = (s32 | (-s32));
1600             e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1601             break;
1602          }
1603 
1604          case Iop_Left64: {
1605             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1606             Long  s64 = (Long)u64;
1607             s64 = (s64 | (-s64));
1608             e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1609             break;
1610          }
1611 
1612          case Iop_Clz32: {
1613             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1614             if (u32 != 0)
1615                e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1616             break;
1617          }
1618          case Iop_Clz64: {
1619             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1620             if (u64 != 0ULL)
1621                e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1622             break;
1623          }
1624 
1625          /* For these vector ones, can't fold all cases, but at least
1626             do the most obvious one.  Could do better here using
1627             summarise/desummarise of vector constants, but too
1628             difficult to verify; hence just handle the zero cases. */
1629          case Iop_32UtoV128: {
1630             UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1631             if (0 == u32) {
1632                e2 = IRExpr_Const(IRConst_V128(0x0000));
1633             } else {
1634                goto unhandled;
1635             }
1636             break;
1637          }
1638          case Iop_V128to64: {
1639             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1640             if (0 == ((v128 >> 0) & 0xFF)) {
1641                e2 = IRExpr_Const(IRConst_U64(0));
1642             } else {
1643                goto unhandled;
1644             }
1645             break;
1646          }
1647          case Iop_V128HIto64: {
1648             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1649             if (0 == ((v128 >> 8) & 0xFF)) {
1650                e2 = IRExpr_Const(IRConst_U64(0));
1651             } else {
1652                goto unhandled;
1653             }
1654             break;
1655          }
1656          case Iop_64UtoV128: {
1657             ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1658             if (0 == u64) {
1659                e2 = IRExpr_Const(IRConst_V128(0x0000));
1660             } else {
1661                goto unhandled;
1662             }
1663             break;
1664          }
1665 
1666          /* Even stupider (although still correct ..) */
1667          case Iop_V256to64_0: case Iop_V256to64_1:
1668          case Iop_V256to64_2: case Iop_V256to64_3: {
1669             UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1670             if (v256 == 0x00000000) {
1671                e2 = IRExpr_Const(IRConst_U64(0));
1672             } else {
1673                goto unhandled;
1674             }
1675             break;
1676          }
1677 
1678          case Iop_ZeroHI64ofV128: {
1679             /* Could do better here -- only need to look at the bottom 64 bits
1680                of the argument, really. */
1681             UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1682             if (v128 == 0x0000) {
1683                e2 = IRExpr_Const(IRConst_V128(0x0000));
1684             } else {
1685                goto unhandled;
1686             }
1687             break;
1688          }
1689 
1690          default:
1691             goto unhandled;
1692       }
1693       }
1694       break;
1695 
1696    case Iex_Binop:
1697       /* BINARY ops */
1698       if (e->Iex.Binop.arg1->tag == Iex_Const
1699           && e->Iex.Binop.arg2->tag == Iex_Const) {
1700          /* cases where both args are consts */
1701          switch (e->Iex.Binop.op) {
1702 
1703             /* -- Or -- */
1704             case Iop_Or8:
1705                e2 = IRExpr_Const(IRConst_U8(toUChar(
1706                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1707                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1708                break;
1709             case Iop_Or16:
1710                e2 = IRExpr_Const(IRConst_U16(toUShort(
1711                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1712                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1713                break;
1714             case Iop_Or32:
1715                e2 = IRExpr_Const(IRConst_U32(
1716                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1717                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1718                break;
1719             case Iop_Or64:
1720                e2 = IRExpr_Const(IRConst_U64(
1721                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1722                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1723                break;
1724             case Iop_OrV128:
1725                e2 = IRExpr_Const(IRConst_V128(
1726                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1727                         | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1728                break;
1729 
1730             /* -- Xor -- */
1731             case Iop_Xor8:
1732                e2 = IRExpr_Const(IRConst_U8(toUChar(
1733                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1734                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1735                break;
1736             case Iop_Xor16:
1737                e2 = IRExpr_Const(IRConst_U16(toUShort(
1738                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1739                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1740                break;
1741             case Iop_Xor32:
1742                e2 = IRExpr_Const(IRConst_U32(
1743                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1744                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1745                break;
1746             case Iop_Xor64:
1747                e2 = IRExpr_Const(IRConst_U64(
1748                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1749                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1750                break;
1751             case Iop_XorV128:
1752                e2 = IRExpr_Const(IRConst_V128(
1753                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1754                         ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1755                break;
1756 
1757             /* -- And -- */
1758             case Iop_And8:
1759                e2 = IRExpr_Const(IRConst_U8(toUChar(
1760                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1761                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1762                break;
1763             case Iop_And16:
1764                e2 = IRExpr_Const(IRConst_U16(toUShort(
1765                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1766                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1767                break;
1768             case Iop_And32:
1769                e2 = IRExpr_Const(IRConst_U32(
1770                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1771                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1772                break;
1773             case Iop_And64:
1774                e2 = IRExpr_Const(IRConst_U64(
1775                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1776                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1777                break;
1778             case Iop_AndV128:
1779                e2 = IRExpr_Const(IRConst_V128(
1780                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1781                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1782                break;
1783 
1784             /* -- Add -- */
1785             case Iop_Add8:
1786                e2 = IRExpr_Const(IRConst_U8(toUChar(
1787                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1788                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1789                break;
1790             case Iop_Add32:
1791                e2 = IRExpr_Const(IRConst_U32(
1792                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1793                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1794                break;
1795             case Iop_Add64:
1796                e2 = IRExpr_Const(IRConst_U64(
1797                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1798                         + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1799                break;
1800 
1801             /* -- Sub -- */
1802             case Iop_Sub8:
1803                e2 = IRExpr_Const(IRConst_U8(toUChar(
1804                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1805                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1806                break;
1807             case Iop_Sub32:
1808                e2 = IRExpr_Const(IRConst_U32(
1809                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1810                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1811                break;
1812             case Iop_Sub64:
1813                e2 = IRExpr_Const(IRConst_U64(
1814                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1815                         - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1816                break;
1817 
1818             /* -- Max32U -- */
1819             case Iop_Max32U: {
1820                UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1821                UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1822                UInt res  = u32a > u32b ? u32a : u32b;
1823                e2 = IRExpr_Const(IRConst_U32(res));
1824                break;
1825             }
1826 
1827             /* -- Mul -- */
1828             case Iop_Mul32:
1829                e2 = IRExpr_Const(IRConst_U32(
1830                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1831                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1832                break;
1833             case Iop_Mul64:
1834                e2 = IRExpr_Const(IRConst_U64(
1835                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1836                         * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1837                break;
1838 
1839             case Iop_MullS32: {
1840                /* very paranoid */
1841                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1842                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1843                Int   s32a = (Int)u32a;
1844                Int   s32b = (Int)u32b;
1845                Long  s64a = (Long)s32a;
1846                Long  s64b = (Long)s32b;
1847                Long  sres = s64a * s64b;
1848                ULong ures = (ULong)sres;
1849                e2 = IRExpr_Const(IRConst_U64(ures));
1850                break;
1851             }
1852 
1853             /* -- Shl -- */
1854             case Iop_Shl32:
1855                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1856                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1857                if (shift >= 0 && shift <= 31)
1858                   e2 = IRExpr_Const(IRConst_U32(
1859                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1860                            << shift)));
1861                break;
1862             case Iop_Shl64:
1863                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1864                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1865                if (shift >= 0 && shift <= 63)
1866                   e2 = IRExpr_Const(IRConst_U64(
1867                           (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1868                            << shift)));
1869                break;
1870 
1871             /* -- Sar -- */
1872             case Iop_Sar32: {
1873                /* paranoid ... */
1874                /*signed*/ Int s32;
1875                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1876                s32   = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1877                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1878                if (shift >= 0 && shift <= 31) {
1879                   s32 >>=/*signed*/ shift;
1880                   e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1881                }
1882                break;
1883             }
1884             case Iop_Sar64: {
1885                /* paranoid ... */
1886                /*signed*/ Long s64;
1887                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1888                s64   = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1889                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1890                if (shift >= 0 && shift <= 63) {
1891                   s64 >>=/*signed*/ shift;
1892                   e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1893                }
1894                break;
1895             }
1896 
1897             /* -- Shr -- */
1898             case Iop_Shr32: {
1899                /* paranoid ... */
1900                /*unsigned*/ UInt u32;
1901                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1902                u32   = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1903                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1904                if (shift >= 0 && shift <= 31) {
1905                   u32 >>=/*unsigned*/ shift;
1906                   e2 = IRExpr_Const(IRConst_U32(u32));
1907                }
1908                break;
1909             }
1910             case Iop_Shr64: {
1911                /* paranoid ... */
1912                /*unsigned*/ ULong u64;
1913                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1914                u64   = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1915                shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1916                if (shift >= 0 && shift <= 63) {
1917                   u64 >>=/*unsigned*/ shift;
1918                   e2 = IRExpr_Const(IRConst_U64(u64));
1919                }
1920                break;
1921             }
1922 
1923             /* -- CmpEQ -- */
1924             case Iop_CmpEQ32:
1925                e2 = IRExpr_Const(IRConst_U1(toBool(
1926                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1927                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1928                break;
1929             case Iop_CmpEQ64:
1930                e2 = IRExpr_Const(IRConst_U1(toBool(
1931                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1932                         == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1933                break;
1934 
1935             /* -- CmpNE -- */
1936             case Iop_CmpNE8:
1937             case Iop_CasCmpNE8:
1938             case Iop_ExpCmpNE8:
1939                e2 = IRExpr_Const(IRConst_U1(toBool(
1940                        ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1941                         != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1942                break;
1943             case Iop_CmpNE32:
1944             case Iop_CasCmpNE32:
1945             case Iop_ExpCmpNE32:
1946                e2 = IRExpr_Const(IRConst_U1(toBool(
1947                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1948                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1949                break;
1950             case Iop_CmpNE64:
1951             case Iop_CasCmpNE64:
1952             case Iop_ExpCmpNE64:
1953                e2 = IRExpr_Const(IRConst_U1(toBool(
1954                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1955                         != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1956                break;
1957 
1958             /* -- CmpLEU -- */
1959             case Iop_CmpLE32U:
1960                e2 = IRExpr_Const(IRConst_U1(toBool(
1961                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1962                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1963                break;
1964             case Iop_CmpLE64U:
1965                e2 = IRExpr_Const(IRConst_U1(toBool(
1966                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1967                         <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1968                break;
1969 
1970             /* -- CmpLES -- */
1971             case Iop_CmpLE32S:
1972                e2 = IRExpr_Const(IRConst_U1(toBool(
1973                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1974                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1975                break;
1976             case Iop_CmpLE64S:
1977                e2 = IRExpr_Const(IRConst_U1(toBool(
1978                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1979                         <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1980                break;
1981 
1982             /* -- CmpLTS -- */
1983             case Iop_CmpLT32S:
1984                e2 = IRExpr_Const(IRConst_U1(toBool(
1985                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1986                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1987                break;
1988             case Iop_CmpLT64S:
1989                e2 = IRExpr_Const(IRConst_U1(toBool(
1990                        ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1991                         < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1992                break;
1993 
1994             /* -- CmpLTU -- */
1995             case Iop_CmpLT32U:
1996                e2 = IRExpr_Const(IRConst_U1(toBool(
1997                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1998                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1999                break;
2000             case Iop_CmpLT64U:
2001                e2 = IRExpr_Const(IRConst_U1(toBool(
2002                        ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2003                         < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2004                break;
2005 
2006             /* -- CmpORD -- */
2007             case Iop_CmpORD32S: {
2008                /* very paranoid */
2009                UInt  u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2010                UInt  u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2011                Int   s32a = (Int)u32a;
2012                Int   s32b = (Int)u32b;
2013                Int   r = 0x2; /* EQ */
2014                if (s32a < s32b) {
2015                   r = 0x8; /* LT */
2016                }
2017                else if (s32a > s32b) {
2018                   r = 0x4; /* GT */
2019                }
2020                e2 = IRExpr_Const(IRConst_U32(r));
2021                break;
2022             }
2023 
2024             /* -- nHLto2n -- */
2025             case Iop_32HLto64:
2026                e2 = IRExpr_Const(IRConst_U64(
2027                        (((ULong)(e->Iex.Binop.arg1
2028                                   ->Iex.Const.con->Ico.U32)) << 32)
2029                        | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2030                     ));
2031                break;
2032             case Iop_64HLto128:
2033                /* We can't fold this, because there is no way to
2034                   express he result in IR, but at least pretend to
2035                   handle it, so as to stop getting blasted with
2036                   no-rule-for-this-primop messages. */
2037                break;
2038             /* For this vector one, can't fold all cases, but at
2039                least do the most obvious one.  Could do better here
2040                using summarise/desummarise of vector constants, but
2041                too difficult to verify; hence just handle the zero
2042                cases. */
2043             case Iop_64HLtoV128: {
2044                ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2045                ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2046                if (0 == argHi && 0 == argLo) {
2047                   e2 = IRExpr_Const(IRConst_V128(0));
2048                } else {
2049                   goto unhandled;
2050                }
2051                break;
2052             }
2053             /* Same reasoning for the 256-bit version. */
2054             case Iop_V128HLtoV256: {
2055                IRExpr* argHi = e->Iex.Binop.arg1;
2056                IRExpr* argLo = e->Iex.Binop.arg2;
2057                if (isZeroV128(argHi) && isZeroV128(argLo)) {
2058                   e2 = IRExpr_Const(IRConst_V256(0));
2059                } else {
2060                   goto unhandled;
2061                }
2062                break;
2063             }
2064 
2065             /* -- V128 stuff -- */
2066             case Iop_InterleaveLO8x16: {
2067                /* This turns up a lot in Memcheck instrumentation of
2068                   Icc generated code.  I don't know why. */
2069                UShort arg1 =  e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2070                UShort arg2 =  e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2071                if (0 == arg1 && 0 == arg2) {
2072                   e2 = IRExpr_Const(IRConst_V128(0));
2073                } else {
2074                   goto unhandled;
2075                }
2076                break;
2077             }
2078 
2079             default:
2080                goto unhandled;
2081          }
2082 
2083       } else {
2084 
2085          /* other cases (identities, etc) */
2086          switch (e->Iex.Binop.op) {
2087 
2088             case Iop_Shl32:
2089             case Iop_Shl64:
2090             case Iop_Shr64:
2091             case Iop_Sar64:
2092                /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2093                if (isZeroU(e->Iex.Binop.arg2)) {
2094                   e2 = e->Iex.Binop.arg1;
2095                   break;
2096                }
2097                /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2098                if (isZeroU(e->Iex.Binop.arg1)) {
2099                   e2 = e->Iex.Binop.arg1;
2100                   break;
2101                }
2102                break;
2103 
2104             case Iop_Sar32:
2105             case Iop_Shr32:
2106                /* Shr32/Sar32(x,0) ==> x */
2107                if (isZeroU(e->Iex.Binop.arg2)) {
2108                   e2 = e->Iex.Binop.arg1;
2109                   break;
2110                }
2111                break;
2112 
2113             case Iop_Or8:
2114             case Iop_Or16:
2115             case Iop_Or32:
2116             case Iop_Or64:
2117             case Iop_Max32U:
2118                /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2119                if (isZeroU(e->Iex.Binop.arg2)) {
2120                   e2 = e->Iex.Binop.arg1;
2121                   break;
2122                }
2123                /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2124                if (isZeroU(e->Iex.Binop.arg1)) {
2125                   e2 = e->Iex.Binop.arg2;
2126                   break;
2127                }
2128                /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2129                /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2130                if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2131                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2132                   break;
2133                }
2134                /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2135                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2136                   e2 = e->Iex.Binop.arg1;
2137                   break;
2138                }
2139                break;
2140 
2141             case Iop_Add8:
2142                /* Add8(t,t) ==> t << 1.
2143                   Memcheck doesn't understand that
2144                   x+x produces a defined least significant bit, and it seems
2145                   simplest just to get rid of the problem by rewriting it
2146                   out, since the opportunity to do so exists. */
2147                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2148                   e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2149                                     IRExpr_Const(IRConst_U8(1)));
2150                   break;
2151                }
2152                break;
2153 
2154                /* NB no Add16(t,t) case yet as no known test case exists */
2155 
2156             case Iop_Add32:
2157             case Iop_Add64:
2158                /* Add32/Add64(x,0) ==> x */
2159                if (isZeroU(e->Iex.Binop.arg2)) {
2160                   e2 = e->Iex.Binop.arg1;
2161                   break;
2162                }
2163                /* Add32/Add64(0,x) ==> x */
2164                if (isZeroU(e->Iex.Binop.arg1)) {
2165                   e2 = e->Iex.Binop.arg2;
2166                   break;
2167                }
2168                /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2169                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2170                   e2 = IRExpr_Binop(
2171                           e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2172                           e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2173                   break;
2174                }
2175                break;
2176 
2177             case Iop_Sub32:
2178             case Iop_Sub64:
2179                /* Sub32/Sub64(x,0) ==> x */
2180                if (isZeroU(e->Iex.Binop.arg2)) {
2181                   e2 = e->Iex.Binop.arg1;
2182                   break;
2183                }
2184                /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2185                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2186                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2187                   break;
2188                }
2189                break;
2190             case Iop_Sub8x16:
2191                /* Sub8x16(x,0) ==> x */
2192                if (isZeroV128(e->Iex.Binop.arg2)) {
2193                   e2 = e->Iex.Binop.arg1;
2194                   break;
2195                }
2196                break;
2197 
2198             case Iop_And8:
2199             case Iop_And16:
2200             case Iop_And32:
2201             case Iop_And64:
2202                /* And8/And16/And32/And64(x,1---1b) ==> x */
2203                if (isOnesU(e->Iex.Binop.arg2)) {
2204                   e2 = e->Iex.Binop.arg1;
2205                   break;
2206                }
2207                /* And8/And16/And32/And64(1---1b,x) ==> x */
2208                if (isOnesU(e->Iex.Binop.arg1)) {
2209                   e2 = e->Iex.Binop.arg2;
2210                   break;
2211                }
2212                /* And8/And16/And32/And64(x,0) ==> 0 */
2213                if (isZeroU(e->Iex.Binop.arg2)) {
2214                   e2 = e->Iex.Binop.arg2;
2215                   break;
2216                }
2217                /* And8/And16/And32/And64(0,x) ==> 0 */
2218                if (isZeroU(e->Iex.Binop.arg1)) {
2219                   e2 = e->Iex.Binop.arg1;
2220                   break;
2221                }
2222                /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2223                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2224                   e2 = e->Iex.Binop.arg1;
2225                   break;
2226                }
2227                break;
2228 
2229             case Iop_AndV128:
2230             case Iop_AndV256:
2231                /* And8/And16/AndV128/AndV256(t,t)
2232                   ==> t, for some IRTemp t */
2233                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2234                   e2 = e->Iex.Binop.arg1;
2235                   break;
2236                }
2237                /* Deal with either arg zero.  Could handle other And
2238                   cases here too. */
2239                if (e->Iex.Binop.op == Iop_AndV256
2240                    && (isZeroV256(e->Iex.Binop.arg1)
2241                        || isZeroV256(e->Iex.Binop.arg2))) {
2242                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2243                   break;
2244                } else if (e->Iex.Binop.op == Iop_AndV128
2245                           && (isZeroV128(e->Iex.Binop.arg1)
2246                               || isZeroV128(e->Iex.Binop.arg2))) {
2247                   e2 =  mkZeroOfPrimopResultType(e->Iex.Binop.op);
2248                   break;
2249                }
2250                break;
2251 
2252             case Iop_OrV128:
2253             case Iop_OrV256:
2254                /* V128/V256(t,t) ==> t, for some IRTemp t */
2255                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2256                   e2 = e->Iex.Binop.arg1;
2257                   break;
2258                }
2259                /* OrV128(t,0) ==> t */
2260                if (e->Iex.Binop.op == Iop_OrV128) {
2261                   if (isZeroV128(e->Iex.Binop.arg2)) {
2262                      e2 = e->Iex.Binop.arg1;
2263                      break;
2264                   }
2265                   if (isZeroV128(e->Iex.Binop.arg1)) {
2266                      e2 = e->Iex.Binop.arg2;
2267                      break;
2268                   }
2269                }
2270                /* OrV256(t,0) ==> t */
2271                if (e->Iex.Binop.op == Iop_OrV256) {
2272                   if (isZeroV256(e->Iex.Binop.arg2)) {
2273                      e2 = e->Iex.Binop.arg1;
2274                      break;
2275                   }
2276                   //Disabled because there's no known test case right now.
2277                   //if (isZeroV256(e->Iex.Binop.arg1)) {
2278                   //   e2 = e->Iex.Binop.arg2;
2279                   //   break;
2280                   //}
2281                }
2282                break;
2283 
2284             case Iop_Xor8:
2285             case Iop_Xor16:
2286             case Iop_Xor32:
2287             case Iop_Xor64:
2288             case Iop_XorV128:
2289             case Iop_XorV256:
2290                /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2291                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2292                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2293                   break;
2294                }
2295                /* XorV128(t,0) ==> t */
2296                if (e->Iex.Binop.op == Iop_XorV128) {
2297                   if (isZeroV128(e->Iex.Binop.arg2)) {
2298                      e2 = e->Iex.Binop.arg1;
2299                      break;
2300                   }
2301                   //Disabled because there's no known test case right now.
2302                   //if (isZeroV128(e->Iex.Binop.arg1)) {
2303                   //   e2 = e->Iex.Binop.arg2;
2304                   //   break;
2305                   //}
2306                } else {
2307                   /* Xor8/16/32/64(0,t) ==> t */
2308                   if (isZeroU(e->Iex.Binop.arg1)) {
2309                      e2 = e->Iex.Binop.arg2;
2310                      break;
2311                   }
2312                   /* Xor8/16/32/64(t,0) ==> t */
2313                   if (isZeroU(e->Iex.Binop.arg2)) {
2314                      e2 = e->Iex.Binop.arg1;
2315                      break;
2316                   }
2317                }
2318                break;
2319 
2320             case Iop_CmpNE32:
2321                /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2322                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2323                   e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2324                   break;
2325                }
2326                /* CmpNE32(1Uto32(b), 0) ==> b */
2327                if (isZeroU32(e->Iex.Binop.arg2)) {
2328                   IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2329                   if (a1 && a1->tag == Iex_Unop
2330                          && a1->Iex.Unop.op == Iop_1Uto32) {
2331                      e2 = a1->Iex.Unop.arg;
2332                      break;
2333                   }
2334                }
2335                break;
2336 
2337             case Iop_CmpEQ32:
2338             case Iop_CmpEQ64:
2339             case Iop_CmpEQ8x8:
2340             case Iop_CmpEQ8x16:
2341             case Iop_CmpEQ16x8:
2342             case Iop_CmpEQ32x4:
2343                if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2344                   e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2345                   break;
2346                }
2347                break;
2348 
2349             default:
2350                break;
2351          }
2352       }
2353       break;
2354 
2355    case Iex_ITE:
2356       /* ITE */
2357       /* is the discriminant is a constant? */
2358       if (e->Iex.ITE.cond->tag == Iex_Const) {
2359          /* assured us by the IR type rules */
2360          vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2361          e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2362                  ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2363       }
2364       else
2365       /* are the arms identical? (pretty weedy test) */
2366       if (sameIRExprs(env, e->Iex.ITE.iftrue,
2367                            e->Iex.ITE.iffalse)) {
2368          e2 = e->Iex.ITE.iffalse;
2369       }
2370       break;
2371 
2372    default:
2373       /* not considered */
2374       break;
2375    }
2376 
2377    /* Show cases where we've found but not folded 'op(t,t)'.  Be
2378       careful not to call sameIRExprs with values of different types,
2379       though, else it will assert (and so it should!).  We can't
2380       conveniently call typeOfIRExpr on the two args without a whole
2381       bunch of extra plumbing to pass in a type env, so just use a
2382       hacky test to check the arguments are not anything that might
2383       sameIRExprs to assert.  This is only OK because this kludge is
2384       only used for debug printing, not for "real" operation.  For
2385       "real" operation (ie, all other calls to sameIRExprs), it is
2386       essential that the to args have the same type.
2387 
2388       The "right" solution is to plumb the containing block's
2389       IRTypeEnv through to here and use typeOfIRExpr to be sure.  But
2390       that's a bunch of extra parameter passing which will just slow
2391       down the normal case, for no purpose. */
2392    if (vex_control.iropt_verbosity > 0
2393        && e == e2
2394        && e->tag == Iex_Binop
2395        && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2396                                                     e->Iex.Binop.arg2)
2397        && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2398       vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2399       ppIRExpr(e);
2400       vex_printf("\n");
2401    }
2402 
2403    /* Show the overall results of folding. */
2404    if (DEBUG_IROPT && e2 != e) {
2405       vex_printf("FOLD: ");
2406       ppIRExpr(e); vex_printf("  ->  ");
2407       ppIRExpr(e2); vex_printf("\n");
2408    }
2409 
2410    return e2;
2411 
2412  unhandled:
2413 #  if 0
2414    vex_printf("\n\n");
2415    ppIRExpr(e);
2416    vpanic("fold_Expr: no rule for the above");
2417 #  else
2418    if (vex_control.iropt_verbosity > 0) {
2419       vex_printf("vex iropt: fold_Expr: no const rule for: ");
2420       ppIRExpr(e);
2421       vex_printf("\n");
2422    }
2423    return e2;
2424 #  endif
2425 }
2426 
2427 
2428 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2429    1-level due to previous flattening pass. */
2430 
subst_Expr(IRExpr ** env,IRExpr * ex)2431 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2432 {
2433    switch (ex->tag) {
2434       case Iex_RdTmp:
2435          if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2436             IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2437             if (rhs->tag == Iex_RdTmp)
2438                return rhs;
2439             if (rhs->tag == Iex_Const
2440                 && rhs->Iex.Const.con->tag != Ico_F64i)
2441                return rhs;
2442          }
2443          /* not bound in env */
2444          return ex;
2445 
2446       case Iex_Const:
2447       case Iex_Get:
2448          return ex;
2449 
2450       case Iex_GetI:
2451          vassert(isIRAtom(ex->Iex.GetI.ix));
2452          return IRExpr_GetI(
2453             ex->Iex.GetI.descr,
2454             subst_Expr(env, ex->Iex.GetI.ix),
2455             ex->Iex.GetI.bias
2456          );
2457 
2458       case Iex_Qop: {
2459          IRQop* qop = ex->Iex.Qop.details;
2460          vassert(isIRAtom(qop->arg1));
2461          vassert(isIRAtom(qop->arg2));
2462          vassert(isIRAtom(qop->arg3));
2463          vassert(isIRAtom(qop->arg4));
2464          return IRExpr_Qop(
2465                    qop->op,
2466                    subst_Expr(env, qop->arg1),
2467                    subst_Expr(env, qop->arg2),
2468                    subst_Expr(env, qop->arg3),
2469                    subst_Expr(env, qop->arg4)
2470                 );
2471       }
2472 
2473       case Iex_Triop: {
2474          IRTriop* triop = ex->Iex.Triop.details;
2475          vassert(isIRAtom(triop->arg1));
2476          vassert(isIRAtom(triop->arg2));
2477          vassert(isIRAtom(triop->arg3));
2478          return IRExpr_Triop(
2479                    triop->op,
2480                    subst_Expr(env, triop->arg1),
2481                    subst_Expr(env, triop->arg2),
2482                    subst_Expr(env, triop->arg3)
2483                 );
2484       }
2485 
2486       case Iex_Binop:
2487          vassert(isIRAtom(ex->Iex.Binop.arg1));
2488          vassert(isIRAtom(ex->Iex.Binop.arg2));
2489          return IRExpr_Binop(
2490                    ex->Iex.Binop.op,
2491                    subst_Expr(env, ex->Iex.Binop.arg1),
2492                    subst_Expr(env, ex->Iex.Binop.arg2)
2493                 );
2494 
2495       case Iex_Unop:
2496          vassert(isIRAtom(ex->Iex.Unop.arg));
2497          return IRExpr_Unop(
2498                    ex->Iex.Unop.op,
2499                    subst_Expr(env, ex->Iex.Unop.arg)
2500                 );
2501 
2502       case Iex_Load:
2503          vassert(isIRAtom(ex->Iex.Load.addr));
2504          return IRExpr_Load(
2505                    ex->Iex.Load.end,
2506                    ex->Iex.Load.ty,
2507                    subst_Expr(env, ex->Iex.Load.addr)
2508                 );
2509 
2510       case Iex_CCall: {
2511          Int      i;
2512          IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2513          for (i = 0; args2[i]; i++) {
2514             vassert(isIRAtom(args2[i]));
2515             args2[i] = subst_Expr(env, args2[i]);
2516          }
2517          return IRExpr_CCall(
2518                    ex->Iex.CCall.cee,
2519                    ex->Iex.CCall.retty,
2520                    args2
2521                 );
2522       }
2523 
2524       case Iex_ITE:
2525          vassert(isIRAtom(ex->Iex.ITE.cond));
2526          vassert(isIRAtom(ex->Iex.ITE.iftrue));
2527          vassert(isIRAtom(ex->Iex.ITE.iffalse));
2528          return IRExpr_ITE(
2529                    subst_Expr(env, ex->Iex.ITE.cond),
2530                    subst_Expr(env, ex->Iex.ITE.iftrue),
2531                    subst_Expr(env, ex->Iex.ITE.iffalse)
2532                 );
2533 
2534       default:
2535          vex_printf("\n\n"); ppIRExpr(ex);
2536          vpanic("subst_Expr");
2537 
2538    }
2539 }
2540 
2541 
2542 /* Apply the subst to stmt, then fold the result as much as possible.
2543    Much simplified due to stmt being previously flattened.  As a
2544    result of this, the stmt may wind up being turned into a no-op.
2545 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)2546 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2547 {
2548 #  if 0
2549    vex_printf("\nsubst and fold stmt\n");
2550    ppIRStmt(st);
2551    vex_printf("\n");
2552 #  endif
2553 
2554    switch (st->tag) {
2555       case Ist_AbiHint:
2556          vassert(isIRAtom(st->Ist.AbiHint.base));
2557          vassert(isIRAtom(st->Ist.AbiHint.nia));
2558          return IRStmt_AbiHint(
2559                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2560                    st->Ist.AbiHint.len,
2561                    fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2562                 );
2563       case Ist_Put:
2564          vassert(isIRAtom(st->Ist.Put.data));
2565          return IRStmt_Put(
2566                    st->Ist.Put.offset,
2567                    fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2568                 );
2569 
2570       case Ist_PutI: {
2571          IRPutI *puti, *puti2;
2572          puti = st->Ist.PutI.details;
2573          vassert(isIRAtom(puti->ix));
2574          vassert(isIRAtom(puti->data));
2575          puti2 = mkIRPutI(puti->descr,
2576                           fold_Expr(env, subst_Expr(env, puti->ix)),
2577                           puti->bias,
2578                           fold_Expr(env, subst_Expr(env, puti->data)));
2579          return IRStmt_PutI(puti2);
2580       }
2581 
2582       case Ist_WrTmp:
2583          /* This is the one place where an expr (st->Ist.WrTmp.data) is
2584             allowed to be more than just a constant or a tmp. */
2585          return IRStmt_WrTmp(
2586                    st->Ist.WrTmp.tmp,
2587                    fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2588                 );
2589 
2590       case Ist_Store:
2591          vassert(isIRAtom(st->Ist.Store.addr));
2592          vassert(isIRAtom(st->Ist.Store.data));
2593          return IRStmt_Store(
2594                    st->Ist.Store.end,
2595                    fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2596                    fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2597                 );
2598 
2599       case Ist_StoreG: {
2600          IRStoreG* sg = st->Ist.StoreG.details;
2601          vassert(isIRAtom(sg->addr));
2602          vassert(isIRAtom(sg->data));
2603          vassert(isIRAtom(sg->guard));
2604          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, sg->addr));
2605          IRExpr* fdata  = fold_Expr(env, subst_Expr(env, sg->data));
2606          IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2607          if (fguard->tag == Iex_Const) {
2608             /* The condition on this store has folded down to a constant. */
2609             vassert(fguard->Iex.Const.con->tag == Ico_U1);
2610             if (fguard->Iex.Const.con->Ico.U1 == False) {
2611                return IRStmt_NoOp();
2612             } else {
2613                vassert(fguard->Iex.Const.con->Ico.U1 == True);
2614                return IRStmt_Store(sg->end, faddr, fdata);
2615             }
2616          }
2617          return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2618       }
2619 
2620       case Ist_LoadG: {
2621          /* This is complicated.  If the guard folds down to 'false',
2622             we can replace it with an assignment 'dst := alt', but if
2623             the guard folds down to 'true', we can't conveniently
2624             replace it with an unconditional load, because doing so
2625             requires generating a new temporary, and that is not easy
2626             to do at this point. */
2627          IRLoadG* lg = st->Ist.LoadG.details;
2628          vassert(isIRAtom(lg->addr));
2629          vassert(isIRAtom(lg->alt));
2630          vassert(isIRAtom(lg->guard));
2631          IRExpr* faddr  = fold_Expr(env, subst_Expr(env, lg->addr));
2632          IRExpr* falt   = fold_Expr(env, subst_Expr(env, lg->alt));
2633          IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2634          if (fguard->tag == Iex_Const) {
2635             /* The condition on this load has folded down to a constant. */
2636             vassert(fguard->Iex.Const.con->tag == Ico_U1);
2637             if (fguard->Iex.Const.con->Ico.U1 == False) {
2638                /* The load is not going to happen -- instead 'alt' is
2639                   assigned to 'dst'.  */
2640                return IRStmt_WrTmp(lg->dst, falt);
2641             } else {
2642                vassert(fguard->Iex.Const.con->Ico.U1 == True);
2643                /* The load is always going to happen.  We want to
2644                   convert to an unconditional load and assign to 'dst'
2645                   (IRStmt_WrTmp).  Problem is we need an extra temp to
2646                   hold the loaded value, but none is available.
2647                   Instead, reconstitute the conditional load (with
2648                   folded args, of course) and let the caller of this
2649                   routine deal with the problem. */
2650             }
2651          }
2652          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2653       }
2654 
2655       case Ist_CAS: {
2656          IRCAS *cas, *cas2;
2657          cas = st->Ist.CAS.details;
2658          vassert(isIRAtom(cas->addr));
2659          vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2660          vassert(isIRAtom(cas->expdLo));
2661          vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2662          vassert(isIRAtom(cas->dataLo));
2663          cas2 = mkIRCAS(
2664                    cas->oldHi, cas->oldLo, cas->end,
2665                    fold_Expr(env, subst_Expr(env, cas->addr)),
2666                    cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2667                                : NULL,
2668                    fold_Expr(env, subst_Expr(env, cas->expdLo)),
2669                    cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2670                                : NULL,
2671                    fold_Expr(env, subst_Expr(env, cas->dataLo))
2672                 );
2673          return IRStmt_CAS(cas2);
2674       }
2675 
2676       case Ist_LLSC:
2677          vassert(isIRAtom(st->Ist.LLSC.addr));
2678          if (st->Ist.LLSC.storedata)
2679             vassert(isIRAtom(st->Ist.LLSC.storedata));
2680          return IRStmt_LLSC(
2681                    st->Ist.LLSC.end,
2682                    st->Ist.LLSC.result,
2683                    fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2684                    st->Ist.LLSC.storedata
2685                       ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2686                       : NULL
2687                 );
2688 
2689       case Ist_Dirty: {
2690          Int     i;
2691          IRDirty *d, *d2;
2692          d = st->Ist.Dirty.details;
2693          d2 = emptyIRDirty();
2694          *d2 = *d;
2695          d2->args = shallowCopyIRExprVec(d2->args);
2696          if (d2->mFx != Ifx_None) {
2697             vassert(isIRAtom(d2->mAddr));
2698             d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2699          }
2700          vassert(isIRAtom(d2->guard));
2701          d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2702          for (i = 0; d2->args[i]; i++) {
2703             IRExpr* arg = d2->args[i];
2704             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
2705                vassert(isIRAtom(arg));
2706                d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2707             }
2708          }
2709          return IRStmt_Dirty(d2);
2710       }
2711 
2712       case Ist_IMark:
2713          return IRStmt_IMark(st->Ist.IMark.addr,
2714                              st->Ist.IMark.len,
2715                              st->Ist.IMark.delta);
2716 
2717       case Ist_NoOp:
2718          return IRStmt_NoOp();
2719 
2720       case Ist_MBE:
2721          return IRStmt_MBE(st->Ist.MBE.event);
2722 
2723       case Ist_Exit: {
2724          IRExpr* fcond;
2725          vassert(isIRAtom(st->Ist.Exit.guard));
2726          fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2727          if (fcond->tag == Iex_Const) {
2728             /* Interesting.  The condition on this exit has folded down to
2729                a constant. */
2730             vassert(fcond->Iex.Const.con->tag == Ico_U1);
2731             if (fcond->Iex.Const.con->Ico.U1 == False) {
2732                /* exit is never going to happen, so dump the statement. */
2733                return IRStmt_NoOp();
2734             } else {
2735                vassert(fcond->Iex.Const.con->Ico.U1 == True);
2736                /* Hmmm.  The exit has become unconditional.  Leave it
2737                   as it is for now, since we'd have to truncate the BB
2738                   at this point, which is tricky.  Such truncation is
2739                   done later by the dead-code elimination pass. */
2740                /* fall out into the reconstruct-the-exit code. */
2741                if (vex_control.iropt_verbosity > 0)
2742                   /* really a misuse of vex_control.iropt_verbosity */
2743                   vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2744             }
2745          }
2746          return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2747                                    st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2748       }
2749 
2750    default:
2751       vex_printf("\n"); ppIRStmt(st);
2752       vpanic("subst_and_fold_Stmt");
2753    }
2754 }
2755 
2756 
cprop_BB(IRSB * in)2757 IRSB* cprop_BB ( IRSB* in )
2758 {
2759    Int      i;
2760    IRSB*    out;
2761    IRStmt*  st2;
2762    Int      n_tmps = in->tyenv->types_used;
2763    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2764    /* Keep track of IRStmt_LoadGs that we need to revisit after
2765       processing all the other statements. */
2766    const Int N_FIXUPS = 16;
2767    Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2768    Int n_fixups = 0;
2769 
2770    out = emptyIRSB();
2771    out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2772 
2773    /* Set up the env with which travels forward.  This holds a
2774       substitution, mapping IRTemps to IRExprs. The environment
2775       is to be applied as we move along.  Keys are IRTemps.
2776       Values are IRExpr*s.
2777    */
2778    for (i = 0; i < n_tmps; i++)
2779       env[i] = NULL;
2780 
2781    /* For each original SSA-form stmt ... */
2782    for (i = 0; i < in->stmts_used; i++) {
2783 
2784       /* First apply the substitution to the current stmt.  This
2785          propagates in any constants and tmp-tmp assignments
2786          accumulated prior to this point.  As part of the subst_Stmt
2787          call, also then fold any constant expressions resulting. */
2788 
2789       st2 = in->stmts[i];
2790 
2791       /* perhaps st2 is already a no-op? */
2792       if (st2->tag == Ist_NoOp) continue;
2793 
2794       st2 = subst_and_fold_Stmt( env, st2 );
2795 
2796       /* Deal with some post-folding special cases. */
2797       switch (st2->tag) {
2798 
2799          /* If the statement has been folded into a no-op, forget
2800             it. */
2801          case Ist_NoOp:
2802             continue;
2803 
2804          /* If the statement assigns to an IRTemp add it to the
2805             running environment. This is for the benefit of copy
2806             propagation and to allow sameIRExpr look through
2807             IRTemps. */
2808          case Ist_WrTmp: {
2809             vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2810             env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2811 
2812             /* 't1 = t2' -- don't add to BB; will be optimized out */
2813             if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2814                continue;
2815 
2816             /* 't = const' && 'const != F64i' -- don't add to BB
2817                Note, we choose not to propagate const when const is an
2818                F64i, so that F64i literals can be CSE'd later.  This
2819                helps x86 floating point code generation. */
2820             if (st2->Ist.WrTmp.data->tag == Iex_Const
2821                 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2822                continue;
2823             }
2824             /* else add it to the output, as normal */
2825             break;
2826          }
2827 
2828          case Ist_LoadG: {
2829             IRLoadG* lg    = st2->Ist.LoadG.details;
2830             IRExpr*  guard = lg->guard;
2831             if (guard->tag == Iex_Const) {
2832                /* The guard has folded to a constant, and that
2833                   constant must be 1:I1, since subst_and_fold_Stmt
2834                   folds out the case 0:I1 by itself. */
2835                vassert(guard->Iex.Const.con->tag == Ico_U1);
2836                vassert(guard->Iex.Const.con->Ico.U1 == True);
2837                /* Add a NoOp here as a placeholder, and make a note of
2838                   where it is in the output block.  Afterwards we'll
2839                   come back here and transform the NoOp and the LoadG
2840                   into a load-convert pair.  The fixups[] entry
2841                   refers to the inserted NoOp, and we expect to find
2842                   the relevant LoadG immediately after it. */
2843                vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2844                if (n_fixups < N_FIXUPS) {
2845                   fixups[n_fixups++] = out->stmts_used;
2846                   addStmtToIRSB( out, IRStmt_NoOp() );
2847                }
2848             }
2849             /* And always add the LoadG to the output, regardless. */
2850             break;
2851          }
2852 
2853       default:
2854          break;
2855       }
2856 
2857       /* Not interesting, copy st2 into the output block. */
2858       addStmtToIRSB( out, st2 );
2859    }
2860 
2861 #  if STATS_IROPT
2862    vex_printf("sameIRExpr: invoked = %u/%u  equal = %u/%u max_nodes = %u\n",
2863               invocation_count, recursion_count, success_count,
2864               recursion_success_count, max_nodes_visited);
2865 #  endif
2866 
2867    out->next     = subst_Expr( env, in->next );
2868    out->jumpkind = in->jumpkind;
2869    out->offsIP   = in->offsIP;
2870 
2871    /* Process any leftover unconditional LoadGs that we noticed
2872       in the main pass. */
2873    vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2874    for (i = 0; i < n_fixups; i++) {
2875       Int ix = fixups[i];
2876       /* Carefully verify that the LoadG has the expected form. */
2877       vassert(ix >= 0 && ix+1 < out->stmts_used);
2878       IRStmt* nop = out->stmts[ix];
2879       IRStmt* lgu = out->stmts[ix+1];
2880       vassert(nop->tag == Ist_NoOp);
2881       vassert(lgu->tag == Ist_LoadG);
2882       IRLoadG* lg    = lgu->Ist.LoadG.details;
2883       IRExpr*  guard = lg->guard;
2884       vassert(guard->Iex.Const.con->tag == Ico_U1);
2885       vassert(guard->Iex.Const.con->Ico.U1 == True);
2886       /* Figure out the load and result types, and the implied
2887          conversion operation. */
2888       IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2889       typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2890       IROp cvtOp = Iop_INVALID;
2891       switch (lg->cvt) {
2892          case ILGop_IdentV128:
2893          case ILGop_Ident64:
2894          case ILGop_Ident32: break;
2895          case ILGop_8Uto32:  cvtOp = Iop_8Uto32;  break;
2896          case ILGop_8Sto32:  cvtOp = Iop_8Sto32;  break;
2897          case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2898          case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2899          default: vpanic("cprop_BB: unhandled ILGOp");
2900       }
2901       /* Replace the placeholder NoOp by the required unconditional
2902          load. */
2903       IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2904       out->stmts[ix]
2905          = IRStmt_WrTmp(tLoaded,
2906                         IRExpr_Load(lg->end, cvtArg, lg->addr));
2907       /* Replace the LoadG by a conversion from the loaded value's
2908          type to the required result type. */
2909       out->stmts[ix+1]
2910          = IRStmt_WrTmp(
2911               lg->dst, cvtOp == Iop_INVALID
2912                           ? IRExpr_RdTmp(tLoaded)
2913                           : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2914    }
2915 
2916    return out;
2917 }
2918 
2919 
2920 /*---------------------------------------------------------------*/
2921 /*--- Dead code (t = E) removal                               ---*/
2922 /*---------------------------------------------------------------*/
2923 
2924 /* As a side effect, also removes all code following an unconditional
2925    side exit. */
2926 
2927 /* The type of the HashHW map is: a map from IRTemp to nothing
2928    -- really just operating a set or IRTemps.
2929 */
2930 
2931 inline
addUses_Temp(Bool * set,IRTemp tmp)2932 static void addUses_Temp ( Bool* set, IRTemp tmp )
2933 {
2934    set[(Int)tmp] = True;
2935 }
2936 
addUses_Expr(Bool * set,IRExpr * e)2937 static void addUses_Expr ( Bool* set, IRExpr* e )
2938 {
2939    Int i;
2940    switch (e->tag) {
2941       case Iex_GetI:
2942          addUses_Expr(set, e->Iex.GetI.ix);
2943          return;
2944       case Iex_ITE:
2945          addUses_Expr(set, e->Iex.ITE.cond);
2946          addUses_Expr(set, e->Iex.ITE.iftrue);
2947          addUses_Expr(set, e->Iex.ITE.iffalse);
2948          return;
2949       case Iex_CCall:
2950          for (i = 0; e->Iex.CCall.args[i]; i++)
2951             addUses_Expr(set, e->Iex.CCall.args[i]);
2952          return;
2953       case Iex_Load:
2954          addUses_Expr(set, e->Iex.Load.addr);
2955          return;
2956       case Iex_Qop:
2957          addUses_Expr(set, e->Iex.Qop.details->arg1);
2958          addUses_Expr(set, e->Iex.Qop.details->arg2);
2959          addUses_Expr(set, e->Iex.Qop.details->arg3);
2960          addUses_Expr(set, e->Iex.Qop.details->arg4);
2961          return;
2962       case Iex_Triop:
2963          addUses_Expr(set, e->Iex.Triop.details->arg1);
2964          addUses_Expr(set, e->Iex.Triop.details->arg2);
2965          addUses_Expr(set, e->Iex.Triop.details->arg3);
2966          return;
2967       case Iex_Binop:
2968          addUses_Expr(set, e->Iex.Binop.arg1);
2969          addUses_Expr(set, e->Iex.Binop.arg2);
2970          return;
2971       case Iex_Unop:
2972          addUses_Expr(set, e->Iex.Unop.arg);
2973          return;
2974       case Iex_RdTmp:
2975          addUses_Temp(set, e->Iex.RdTmp.tmp);
2976          return;
2977       case Iex_Const:
2978       case Iex_Get:
2979          return;
2980       default:
2981          vex_printf("\n");
2982          ppIRExpr(e);
2983          vpanic("addUses_Expr");
2984    }
2985 }
2986 
addUses_Stmt(Bool * set,IRStmt * st)2987 static void addUses_Stmt ( Bool* set, IRStmt* st )
2988 {
2989    Int      i;
2990    IRDirty* d;
2991    IRCAS*   cas;
2992    switch (st->tag) {
2993       case Ist_AbiHint:
2994          addUses_Expr(set, st->Ist.AbiHint.base);
2995          addUses_Expr(set, st->Ist.AbiHint.nia);
2996          return;
2997       case Ist_PutI:
2998          addUses_Expr(set, st->Ist.PutI.details->ix);
2999          addUses_Expr(set, st->Ist.PutI.details->data);
3000          return;
3001       case Ist_WrTmp:
3002          addUses_Expr(set, st->Ist.WrTmp.data);
3003          return;
3004       case Ist_Put:
3005          addUses_Expr(set, st->Ist.Put.data);
3006          return;
3007       case Ist_Store:
3008          addUses_Expr(set, st->Ist.Store.addr);
3009          addUses_Expr(set, st->Ist.Store.data);
3010          return;
3011       case Ist_StoreG: {
3012          IRStoreG* sg = st->Ist.StoreG.details;
3013          addUses_Expr(set, sg->addr);
3014          addUses_Expr(set, sg->data);
3015          addUses_Expr(set, sg->guard);
3016          return;
3017       }
3018       case Ist_LoadG: {
3019          IRLoadG* lg = st->Ist.LoadG.details;
3020          addUses_Expr(set, lg->addr);
3021          addUses_Expr(set, lg->alt);
3022          addUses_Expr(set, lg->guard);
3023          return;
3024       }
3025       case Ist_CAS:
3026          cas = st->Ist.CAS.details;
3027          addUses_Expr(set, cas->addr);
3028          if (cas->expdHi)
3029             addUses_Expr(set, cas->expdHi);
3030          addUses_Expr(set, cas->expdLo);
3031          if (cas->dataHi)
3032             addUses_Expr(set, cas->dataHi);
3033          addUses_Expr(set, cas->dataLo);
3034          return;
3035       case Ist_LLSC:
3036          addUses_Expr(set, st->Ist.LLSC.addr);
3037          if (st->Ist.LLSC.storedata)
3038             addUses_Expr(set, st->Ist.LLSC.storedata);
3039          return;
3040       case Ist_Dirty:
3041          d = st->Ist.Dirty.details;
3042          if (d->mFx != Ifx_None)
3043             addUses_Expr(set, d->mAddr);
3044          addUses_Expr(set, d->guard);
3045          for (i = 0; d->args[i] != NULL; i++) {
3046             IRExpr* arg = d->args[i];
3047             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
3048                addUses_Expr(set, arg);
3049          }
3050          return;
3051       case Ist_NoOp:
3052       case Ist_IMark:
3053       case Ist_MBE:
3054          return;
3055       case Ist_Exit:
3056          addUses_Expr(set, st->Ist.Exit.guard);
3057          return;
3058       default:
3059          vex_printf("\n");
3060          ppIRStmt(st);
3061          vpanic("addUses_Stmt");
3062    }
3063 }
3064 
3065 
3066 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)3067 static Bool isZeroU1 ( IRExpr* e )
3068 {
3069    return toBool( e->tag == Iex_Const
3070                   && e->Iex.Const.con->tag == Ico_U1
3071                   && e->Iex.Const.con->Ico.U1 == False );
3072 }
3073 
3074 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)3075 static Bool isOneU1 ( IRExpr* e )
3076 {
3077    return toBool( e->tag == Iex_Const
3078                   && e->Iex.Const.con->tag == Ico_U1
3079                   && e->Iex.Const.con->Ico.U1 == True );
3080 }
3081 
3082 
3083 /* Note, this destructively modifies the given IRSB. */
3084 
3085 /* Scan backwards through statements, carrying a set of IRTemps which
3086    are known to be used after the current point.  On encountering 't =
3087    E', delete the binding if it is not used.  Otherwise, add any temp
3088    uses to the set and keep on moving backwards.
3089 
3090    As an enhancement, the first (backwards) pass searches for IR exits
3091    with always-taken conditions and notes the location of the earliest
3092    one in the block.  If any such are found, a second pass copies the
3093    exit destination and jump kind to the bb-end.  Then, the exit and
3094    all statements following it are turned into no-ops.
3095 */
3096 
do_deadcode_BB(IRSB * bb)3097 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3098 {
3099    Int     i, i_unconditional_exit;
3100    Int     n_tmps = bb->tyenv->types_used;
3101    Bool*   set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3102    IRStmt* st;
3103 
3104    for (i = 0; i < n_tmps; i++)
3105       set[i] = False;
3106 
3107    /* start off by recording IRTemp uses in the next field. */
3108    addUses_Expr(set, bb->next);
3109 
3110    /* First pass */
3111 
3112    /* Work backwards through the stmts */
3113    i_unconditional_exit = -1;
3114    for (i = bb->stmts_used-1; i >= 0; i--) {
3115       st = bb->stmts[i];
3116       if (st->tag == Ist_NoOp)
3117          continue;
3118       /* take note of any unconditional exits */
3119       if (st->tag == Ist_Exit
3120           && isOneU1(st->Ist.Exit.guard))
3121          i_unconditional_exit = i;
3122       if (st->tag == Ist_WrTmp
3123           && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3124           /* it's an IRTemp which never got used.  Delete it. */
3125          if (DEBUG_IROPT) {
3126             vex_printf("DEAD: ");
3127             ppIRStmt(st);
3128             vex_printf("\n");
3129          }
3130          bb->stmts[i] = IRStmt_NoOp();
3131       }
3132       else
3133       if (st->tag == Ist_Dirty
3134           && st->Ist.Dirty.details->guard
3135           && isZeroU1(st->Ist.Dirty.details->guard)) {
3136          /* This is a dirty helper which will never get called.
3137             Delete it. */
3138          bb->stmts[i] = IRStmt_NoOp();
3139        }
3140        else {
3141          /* Note any IRTemp uses made by the current statement. */
3142          addUses_Stmt(set, st);
3143       }
3144    }
3145 
3146    /* Optional second pass: if any unconditional exits were found,
3147       delete them and all following statements. */
3148 
3149    if (i_unconditional_exit != -1) {
3150       if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3151                         i_unconditional_exit);
3152       vassert(i_unconditional_exit >= 0
3153               && i_unconditional_exit < bb->stmts_used);
3154       bb->next
3155          = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3156       bb->jumpkind
3157          = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3158       bb->offsIP
3159          = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3160       for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3161          bb->stmts[i] = IRStmt_NoOp();
3162    }
3163 }
3164 
3165 
3166 /*---------------------------------------------------------------*/
3167 /*--- Specialisation of helper function calls, in             ---*/
3168 /*--- collaboration with the front end                        ---*/
3169 /*---------------------------------------------------------------*/
3170 
3171 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int))3172 IRSB* spec_helpers_BB(
3173          IRSB* bb,
3174          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3175       )
3176 {
3177    Int     i;
3178    IRStmt* st;
3179    IRExpr* ex;
3180    Bool    any = False;
3181 
3182    for (i = bb->stmts_used-1; i >= 0; i--) {
3183       st = bb->stmts[i];
3184 
3185       if (st->tag != Ist_WrTmp
3186           || st->Ist.WrTmp.data->tag != Iex_CCall)
3187          continue;
3188 
3189       ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3190                           st->Ist.WrTmp.data->Iex.CCall.args,
3191                           &bb->stmts[0], i );
3192       if (!ex)
3193         /* the front end can't think of a suitable replacement */
3194         continue;
3195 
3196       /* We got something better.  Install it in the bb. */
3197       any = True;
3198       bb->stmts[i]
3199          = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3200 
3201       if (0) {
3202          vex_printf("SPEC: ");
3203          ppIRExpr(st->Ist.WrTmp.data);
3204          vex_printf("  -->  ");
3205          ppIRExpr(ex);
3206          vex_printf("\n");
3207       }
3208    }
3209 
3210    if (any)
3211       bb = flatten_BB(bb);
3212    return bb;
3213 }
3214 
3215 
3216 /*---------------------------------------------------------------*/
3217 /*--- Determination of guest state aliasing relationships     ---*/
3218 /*---------------------------------------------------------------*/
3219 
3220 /* These are helper functions for CSE and GetI/PutI transformations.
3221 
3222    Determine, to the extent possible, the relationship between two
3223    guest state accesses.  The possible outcomes are:
3224 
3225    * Exact alias.  These two accesses denote precisely the same
3226      piece of the guest state.
3227 
3228    * Definitely no alias.  These two accesses are guaranteed not to
3229      overlap any part of the guest state.
3230 
3231    * Unknown -- if neither of the above can be established.
3232 
3233    If in doubt, return Unknown.  */
3234 
3235 typedef
3236    enum { ExactAlias, NoAlias, UnknownAlias }
3237    GSAliasing;
3238 
3239 
3240 /* Produces the alias relation between an indexed guest
3241    state access and a non-indexed access. */
3242 
3243 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)3244 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3245                                     Int offset2, IRType ty2 )
3246 {
3247    UInt minoff1, maxoff1, minoff2, maxoff2;
3248 
3249    getArrayBounds( descr1, &minoff1, &maxoff1 );
3250    minoff2 = offset2;
3251    maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3252 
3253    if (maxoff1 < minoff2 || maxoff2 < minoff1)
3254       return NoAlias;
3255 
3256    /* Could probably do better here if required.  For the moment
3257       however just claim not to know anything more. */
3258    return UnknownAlias;
3259 }
3260 
3261 
3262 /* Produces the alias relation between two indexed guest state
3263    accesses. */
3264 
3265 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)3266 GSAliasing getAliasingRelation_II (
3267               IRRegArray* descr1, IRExpr* ix1, Int bias1,
3268               IRRegArray* descr2, IRExpr* ix2, Int bias2
3269            )
3270 {
3271    UInt minoff1, maxoff1, minoff2, maxoff2;
3272    Int  iters;
3273 
3274    /* First try hard to show they don't alias. */
3275    getArrayBounds( descr1, &minoff1, &maxoff1 );
3276    getArrayBounds( descr2, &minoff2, &maxoff2 );
3277    if (maxoff1 < minoff2 || maxoff2 < minoff1)
3278       return NoAlias;
3279 
3280    /* So the two arrays at least partially overlap.  To get any
3281       further we'll have to be sure that the descriptors are
3282       identical. */
3283    if (!eqIRRegArray(descr1, descr2))
3284       return UnknownAlias;
3285 
3286    /* The descriptors are identical.  Now the only difference can be
3287       in the index expressions.  If they cannot be shown to be
3288       identical, we have to say we don't know what the aliasing
3289       relation will be.  Now, since the IR is flattened, the index
3290       expressions should be atoms -- either consts or tmps.  So that
3291       makes the comparison simple. */
3292    vassert(isIRAtom(ix1));
3293    vassert(isIRAtom(ix2));
3294    if (!eqIRAtom(ix1,ix2))
3295       return UnknownAlias;
3296 
3297    /* Ok, the index expressions are identical.  So now the only way
3298       they can be different is in the bias.  Normalise this
3299       paranoidly, to reliably establish equality/non-equality. */
3300 
3301    /* So now we know that the GetI and PutI index the same array
3302       with the same base.  Are the offsets the same, modulo the
3303       array size?  Do this paranoidly. */
3304    vassert(descr1->nElems == descr2->nElems);
3305    vassert(descr1->elemTy == descr2->elemTy);
3306    vassert(descr1->base   == descr2->base);
3307    iters = 0;
3308    while (bias1 < 0 || bias2 < 0) {
3309       bias1 += descr1->nElems;
3310       bias2 += descr1->nElems;
3311       iters++;
3312       if (iters > 10)
3313          vpanic("getAliasingRelation: iters");
3314    }
3315    vassert(bias1 >= 0 && bias2 >= 0);
3316    bias1 %= descr1->nElems;
3317    bias2 %= descr1->nElems;
3318    vassert(bias1 >= 0 && bias1 < descr1->nElems);
3319    vassert(bias2 >= 0 && bias2 < descr1->nElems);
3320 
3321    /* Finally, biasP and biasG are normalised into the range
3322       0 .. descrP/G->nElems - 1.  And so we can establish
3323       equality/non-equality. */
3324 
3325    return bias1==bias2 ? ExactAlias : NoAlias;
3326 }
3327 
3328 
3329 /*---------------------------------------------------------------*/
3330 /*--- Common Subexpression Elimination                        ---*/
3331 /*---------------------------------------------------------------*/
3332 
3333 /* Expensive in time and space. */
3334 
3335 /* Uses two environments:
3336    a IRTemp -> IRTemp mapping
3337    a mapping from AvailExpr* to IRTemp
3338 */
3339 
3340 typedef
3341    struct {
3342       enum { TCc, TCt } tag;
3343       union { IRTemp tmp; IRConst* con; } u;
3344    }
3345    TmpOrConst;
3346 
eqTmpOrConst(TmpOrConst * tc1,TmpOrConst * tc2)3347 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3348 {
3349    if (tc1->tag != tc2->tag)
3350       return False;
3351    switch (tc1->tag) {
3352       case TCc:
3353          return eqIRConst(tc1->u.con, tc2->u.con);
3354       case TCt:
3355          return tc1->u.tmp == tc2->u.tmp;
3356       default:
3357          vpanic("eqTmpOrConst");
3358    }
3359 }
3360 
eqIRCallee(IRCallee * cee1,IRCallee * cee2)3361 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3362 {
3363    Bool eq = cee1->addr == cee2->addr;
3364    if (eq) {
3365       vassert(cee1->regparms == cee2->regparms);
3366       vassert(cee1->mcx_mask == cee2->mcx_mask);
3367       /* Names should be the same too, but we don't bother to
3368          check. */
3369    }
3370    return eq;
3371 }
3372 
3373 /* Convert an atomic IRExpr* to a TmpOrConst. */
irExpr_to_TmpOrConst(TmpOrConst * tc,IRExpr * e)3374 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3375 {
3376    switch (e->tag) {
3377       case Iex_RdTmp:
3378          tc->tag   = TCt;
3379          tc->u.tmp = e->Iex.RdTmp.tmp;
3380          break;
3381       case Iex_Const:
3382          tc->tag   = TCc;
3383          tc->u.con = e->Iex.Const.con;
3384          break;
3385       default:
3386          /* Getting here is a serious error.  It means that the
3387             presented arg isn't an IR atom, as it should be. */
3388          vpanic("irExpr_to_TmpOrConst");
3389    }
3390 }
3391 
3392 /* Convert a TmpOrConst to an atomic IRExpr*. */
tmpOrConst_to_IRExpr(TmpOrConst * tc)3393 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3394 {
3395    switch (tc->tag) {
3396       case TCc: return IRExpr_Const(tc->u.con);
3397       case TCt: return IRExpr_RdTmp(tc->u.tmp);
3398       default:  vpanic("tmpOrConst_to_IRExpr");
3399    }
3400 }
3401 
3402 /* Convert a NULL terminated IRExpr* vector to an array of
3403    TmpOrConsts, and a length. */
irExprVec_to_TmpOrConsts(TmpOrConst ** outs,Int * nOuts,IRExpr ** ins)3404 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3405                                        /*OUT*/Int* nOuts,
3406                                        IRExpr** ins )
3407 {
3408    Int i, n;
3409    /* We have to make two passes, one to count, one to copy. */
3410    for (n = 0; ins[n]; n++)
3411       ;
3412    *outs  = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3413    *nOuts = n;
3414    /* and now copy .. */
3415    for (i = 0; i < n; i++) {
3416       IRExpr*     arg = ins[i];
3417       TmpOrConst* dst = &(*outs)[i];
3418       irExpr_to_TmpOrConst(dst, arg);
3419    }
3420 }
3421 
3422 typedef
3423    struct {
3424       enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3425              CCall, Load
3426       } tag;
3427       union {
3428          /* unop(tmp) */
3429          struct {
3430             IROp   op;
3431             IRTemp arg;
3432          } Ut;
3433          /* binop(tmp,tmp) */
3434          struct {
3435             IROp   op;
3436             IRTemp arg1;
3437             IRTemp arg2;
3438          } Btt;
3439          /* binop(tmp,const) */
3440          struct {
3441             IROp    op;
3442             IRTemp  arg1;
3443             IRConst con2;
3444          } Btc;
3445          /* binop(const,tmp) */
3446          struct {
3447             IROp    op;
3448             IRConst con1;
3449             IRTemp  arg2;
3450          } Bct;
3451          /* F64i-style const */
3452          struct {
3453             ULong f64i;
3454          } Cf64i;
3455          /* ITE(tmp,tmp,tmp) */
3456          struct {
3457             IRTemp co;
3458             IRTemp e1;
3459             IRTemp e0;
3460          } Ittt;
3461          /* ITE(tmp,tmp,const) */
3462          struct {
3463             IRTemp  co;
3464             IRTemp  e1;
3465             IRConst con0;
3466          } Ittc;
3467          /* ITE(tmp,const,tmp) */
3468          struct {
3469             IRTemp  co;
3470             IRConst con1;
3471             IRTemp  e0;
3472          } Itct;
3473          /* ITE(tmp,const,const) */
3474          struct {
3475             IRTemp  co;
3476             IRConst con1;
3477             IRConst con0;
3478          } Itcc;
3479          /* GetI(descr,tmp,bias)*/
3480          struct {
3481             IRRegArray* descr;
3482             IRTemp      ix;
3483             Int         bias;
3484          } GetIt;
3485          /* Clean helper call */
3486          struct {
3487             IRCallee*   cee;
3488             TmpOrConst* args;
3489             Int         nArgs;
3490             IRType      retty;
3491          } CCall;
3492          /* Load(end,ty,addr) */
3493          struct {
3494             IREndness  end;
3495             IRType     ty;
3496             TmpOrConst addr;
3497          } Load;
3498       } u;
3499    }
3500    AvailExpr;
3501 
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)3502 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3503 {
3504    if (LIKELY(a1->tag != a2->tag))
3505       return False;
3506    switch (a1->tag) {
3507       case Ut:
3508          return toBool(
3509                 a1->u.Ut.op == a2->u.Ut.op
3510                 && a1->u.Ut.arg == a2->u.Ut.arg);
3511       case Btt:
3512          return toBool(
3513                 a1->u.Btt.op == a2->u.Btt.op
3514                 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3515                 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3516       case Btc:
3517          return toBool(
3518                 a1->u.Btc.op == a2->u.Btc.op
3519                 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3520                 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3521       case Bct:
3522          return toBool(
3523                 a1->u.Bct.op == a2->u.Bct.op
3524                 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3525                 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3526       case Cf64i:
3527          return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3528       case Ittt:
3529          return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3530                        && a1->u.Ittt.e1 == a2->u.Ittt.e1
3531                        && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3532       case Ittc:
3533          return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3534                        && a1->u.Ittc.e1 == a2->u.Ittc.e1
3535                        && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3536       case Itct:
3537          return toBool(a1->u.Itct.co == a2->u.Itct.co
3538                        && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3539                        && a1->u.Itct.e0 == a2->u.Itct.e0);
3540       case Itcc:
3541          return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3542                        && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3543                        && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3544       case GetIt:
3545          return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3546                        && a1->u.GetIt.ix == a2->u.GetIt.ix
3547                        && a1->u.GetIt.bias == a2->u.GetIt.bias);
3548       case CCall: {
3549          Int  i, n;
3550          Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3551                    && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3552          if (eq) {
3553             n = a1->u.CCall.nArgs;
3554             for (i = 0; i < n; i++) {
3555                if (!eqTmpOrConst( &a1->u.CCall.args[i],
3556                                   &a2->u.CCall.args[i] )) {
3557                   eq = False;
3558                   break;
3559                }
3560             }
3561          }
3562          if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3563          return eq;
3564       }
3565       case Load: {
3566          Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3567                           && a1->u.Load.ty == a2->u.Load.ty
3568                           && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3569          return eq;
3570       }
3571       default:
3572          vpanic("eq_AvailExpr");
3573    }
3574 }
3575 
availExpr_to_IRExpr(AvailExpr * ae)3576 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3577 {
3578    IRConst *con, *con0, *con1;
3579    switch (ae->tag) {
3580       case Ut:
3581          return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3582       case Btt:
3583          return IRExpr_Binop( ae->u.Btt.op,
3584                               IRExpr_RdTmp(ae->u.Btt.arg1),
3585                               IRExpr_RdTmp(ae->u.Btt.arg2) );
3586       case Btc:
3587          con = LibVEX_Alloc_inline(sizeof(IRConst));
3588          *con = ae->u.Btc.con2;
3589          return IRExpr_Binop( ae->u.Btc.op,
3590                               IRExpr_RdTmp(ae->u.Btc.arg1),
3591                               IRExpr_Const(con) );
3592       case Bct:
3593          con = LibVEX_Alloc_inline(sizeof(IRConst));
3594          *con = ae->u.Bct.con1;
3595          return IRExpr_Binop( ae->u.Bct.op,
3596                               IRExpr_Const(con),
3597                               IRExpr_RdTmp(ae->u.Bct.arg2) );
3598       case Cf64i:
3599          return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3600       case Ittt:
3601          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3602                            IRExpr_RdTmp(ae->u.Ittt.e1),
3603                            IRExpr_RdTmp(ae->u.Ittt.e0));
3604       case Ittc:
3605          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3606          *con0 = ae->u.Ittc.con0;
3607          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3608                            IRExpr_RdTmp(ae->u.Ittc.e1),
3609                            IRExpr_Const(con0));
3610       case Itct:
3611          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3612          *con1 = ae->u.Itct.con1;
3613          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3614                            IRExpr_Const(con1),
3615                            IRExpr_RdTmp(ae->u.Itct.e0));
3616 
3617       case Itcc:
3618          con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3619          con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3620          *con0 = ae->u.Itcc.con0;
3621          *con1 = ae->u.Itcc.con1;
3622          return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3623                            IRExpr_Const(con1),
3624                            IRExpr_Const(con0));
3625       case GetIt:
3626          return IRExpr_GetI(ae->u.GetIt.descr,
3627                             IRExpr_RdTmp(ae->u.GetIt.ix),
3628                             ae->u.GetIt.bias);
3629       case CCall: {
3630          Int i, n = ae->u.CCall.nArgs;
3631          vassert(n >= 0);
3632          IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3633          vec[n] = NULL;
3634          for (i = 0; i < n; i++) {
3635             vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3636          }
3637          return IRExpr_CCall(ae->u.CCall.cee,
3638                              ae->u.CCall.retty,
3639                              vec);
3640       }
3641       case Load:
3642          return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3643                             tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3644       default:
3645          vpanic("availExpr_to_IRExpr");
3646    }
3647 }
3648 
3649 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)3650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3651 {
3652    HWord res;
3653    /* env :: IRTemp -> IRTemp */
3654    if (lookupHHW( env, &res, (HWord)tmp ))
3655       return (IRTemp)res;
3656    else
3657       return tmp;
3658 }
3659 
3660 inline
subst_AvailExpr_TmpOrConst(TmpOrConst * tc,HashHW * env)3661 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3662                                           HashHW* env )
3663 {
3664    /* env :: IRTemp -> IRTemp */
3665    if (tc->tag == TCt) {
3666       tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3667    }
3668 }
3669 
subst_AvailExpr(HashHW * env,AvailExpr * ae)3670 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3671 {
3672    /* env :: IRTemp -> IRTemp */
3673    switch (ae->tag) {
3674       case Ut:
3675          ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3676          break;
3677       case Btt:
3678          ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3679          ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3680          break;
3681       case Btc:
3682          ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3683          break;
3684       case Bct:
3685          ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3686          break;
3687       case Cf64i:
3688          break;
3689       case Ittt:
3690          ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3691          ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3692          ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3693          break;
3694       case Ittc:
3695          ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3696          ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3697          break;
3698       case Itct:
3699          ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3700          ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3701          break;
3702       case Itcc:
3703          ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3704          break;
3705       case GetIt:
3706          ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3707          break;
3708       case CCall: {
3709          Int i, n = ae->u.CCall.nArgs;;
3710          for (i = 0; i < n; i++) {
3711             subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3712          }
3713          break;
3714       }
3715       case Load:
3716          subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3717          break;
3718       default:
3719          vpanic("subst_AvailExpr");
3720    }
3721 }
3722 
irExpr_to_AvailExpr(IRExpr * e,Bool allowLoadsToBeCSEd)3723 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3724 {
3725    AvailExpr* ae;
3726 
3727    switch (e->tag) {
3728       case Iex_Unop:
3729          if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3730             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3731             ae->tag      = Ut;
3732             ae->u.Ut.op  = e->Iex.Unop.op;
3733             ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3734             return ae;
3735          }
3736          break;
3737 
3738       case Iex_Binop:
3739          if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3740             if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3741                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3742                ae->tag        = Btt;
3743                ae->u.Btt.op   = e->Iex.Binop.op;
3744                ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3745                ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3746                return ae;
3747             }
3748             if (e->Iex.Binop.arg2->tag == Iex_Const) {
3749                ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3750                ae->tag        = Btc;
3751                ae->u.Btc.op   = e->Iex.Binop.op;
3752                ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3753                ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3754                return ae;
3755             }
3756          } else if (e->Iex.Binop.arg1->tag == Iex_Const
3757                     && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3758             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3759             ae->tag        = Bct;
3760             ae->u.Bct.op   = e->Iex.Binop.op;
3761             ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3762             ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3763             return ae;
3764          }
3765          break;
3766 
3767       case Iex_Const:
3768          if (e->Iex.Const.con->tag == Ico_F64i) {
3769             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3770             ae->tag          = Cf64i;
3771             ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3772             return ae;
3773          }
3774          break;
3775 
3776       case Iex_ITE:
3777          if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3778             if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3779                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3780                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3781                   ae->tag       = Ittt;
3782                   ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3783                   ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3784                   ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3785                   return ae;
3786                }
3787                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3788                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3789                   ae->tag       = Itct;
3790                   ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3791                   ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3792                   ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3793                   return ae;
3794                }
3795             } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3796                if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3797                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3798                   ae->tag       = Ittc;
3799                   ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3800                   ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3801                   ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3802                   return ae;
3803                }
3804                if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3805                   ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3806                   ae->tag       = Itcc;
3807                   ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3808                   ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3809                   ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3810                   return ae;
3811                }
3812             }
3813          }
3814          break;
3815 
3816       case Iex_GetI:
3817          if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3818             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3819             ae->tag           = GetIt;
3820             ae->u.GetIt.descr = e->Iex.GetI.descr;
3821             ae->u.GetIt.ix    = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3822             ae->u.GetIt.bias  = e->Iex.GetI.bias;
3823             return ae;
3824          }
3825          break;
3826 
3827       case Iex_CCall:
3828          ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3829          ae->tag = CCall;
3830          /* Ok to share only the cee, since it is immutable. */
3831          ae->u.CCall.cee   = e->Iex.CCall.cee;
3832          ae->u.CCall.retty = e->Iex.CCall.retty;
3833          /* irExprVec_to_TmpOrConsts will assert if the args are
3834             neither tmps nor constants, but that's ok .. that's all they
3835             should be. */
3836          irExprVec_to_TmpOrConsts(
3837                                   &ae->u.CCall.args, &ae->u.CCall.nArgs,
3838                                   e->Iex.CCall.args
3839                                  );
3840          return ae;
3841 
3842       case Iex_Load:
3843          /* If the caller of do_cse_BB has requested that loads also
3844             be CSEd, convert them into AvailExprs.  If not, we'll just
3845             return NULL here, and the load never becomes considered
3846             "available", which effectively disables CSEing of them, as
3847             desired. */
3848          if (allowLoadsToBeCSEd) {
3849             ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3850             ae->tag        = Load;
3851             ae->u.Load.end = e->Iex.Load.end;
3852             ae->u.Load.ty  = e->Iex.Load.ty;
3853             irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3854             return ae;
3855          }
3856          break;
3857 
3858       default:
3859          break;
3860    }
3861 
3862    return NULL;
3863 }
3864 
3865 
3866 /* The BB is modified in-place.  Returns True if any changes were
3867    made.  The caller can choose whether or not loads should be CSEd.
3868    In the normal course of things we don't do that, since CSEing loads
3869    is something of a dodgy proposition if the guest program is doing
3870    some screwy stuff to do with races and spinloops. */
3871 
do_cse_BB(IRSB * bb,Bool allowLoadsToBeCSEd)3872 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3873 {
3874    Int        i, j, paranoia;
3875    IRTemp     t, q;
3876    IRStmt*    st;
3877    AvailExpr* eprime;
3878    AvailExpr* ae;
3879    Bool       invalidate;
3880    Bool       anyDone = False;
3881 
3882    HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3883    HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3884 
3885    vassert(sizeof(IRTemp) <= sizeof(HWord));
3886 
3887    if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3888 
3889    /* Iterate forwards over the stmts.
3890       On seeing "t = E", where E is one of the AvailExpr forms:
3891          let E' = apply tenv substitution to E
3892          search aenv for E'
3893             if a mapping E' -> q is found,
3894                replace this stmt by "t = q"
3895                and add binding t -> q to tenv
3896             else
3897                add binding E' -> t to aenv
3898                replace this stmt by "t = E'"
3899 
3900       Other statements are only interesting to the extent that they
3901       might invalidate some of the expressions in aenv.  So there is
3902       an invalidate-bindings check for each statement seen.
3903    */
3904    for (i = 0; i < bb->stmts_used; i++) {
3905       st = bb->stmts[i];
3906 
3907       /* ------ BEGIN invalidate aenv bindings ------ */
3908       /* This is critical: remove from aenv any E' -> .. bindings
3909          which might be invalidated by this statement.  The only
3910          vulnerable kind of bindings are the GetI and Load kinds.
3911             Dirty call - dump (paranoia level -> 2)
3912             Store      - dump (ditto)
3913             Put, PutI  - dump unless no-overlap is proven (.. -> 1)
3914          Uses getAliasingRelation_IC and getAliasingRelation_II
3915          to do the no-overlap assessments needed for Put/PutI.
3916       */
3917       switch (st->tag) {
3918          case Ist_Dirty: case Ist_Store: case Ist_MBE:
3919          case Ist_CAS: case Ist_LLSC:
3920          case Ist_StoreG:
3921             paranoia = 2; break;
3922          case Ist_Put: case Ist_PutI:
3923             paranoia = 1; break;
3924          case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3925          case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3926             paranoia = 0; break;
3927          default:
3928             vpanic("do_cse_BB(1)");
3929       }
3930 
3931       if (paranoia > 0) {
3932          for (j = 0; j < aenv->used; j++) {
3933             if (!aenv->inuse[j])
3934                continue;
3935             ae = (AvailExpr*)aenv->key[j];
3936             if (ae->tag != GetIt && ae->tag != Load)
3937                continue;
3938             invalidate = False;
3939             if (paranoia >= 2) {
3940                invalidate = True;
3941             } else {
3942                vassert(paranoia == 1);
3943                if (ae->tag == Load) {
3944                   /* Loads can be invalidated by anything that could
3945                      possibly touch memory.  But in that case we
3946                      should have |paranoia| == 2 and we won't get
3947                      here.  So there's nothing to do; we don't have to
3948                      invalidate the load. */
3949                }
3950                else
3951                if (st->tag == Ist_Put) {
3952                   if (getAliasingRelation_IC(
3953                          ae->u.GetIt.descr,
3954                          IRExpr_RdTmp(ae->u.GetIt.ix),
3955                          st->Ist.Put.offset,
3956                          typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3957                       ) != NoAlias)
3958                      invalidate = True;
3959                }
3960                else
3961                if (st->tag == Ist_PutI) {
3962                   IRPutI *puti = st->Ist.PutI.details;
3963                   if (getAliasingRelation_II(
3964                          ae->u.GetIt.descr,
3965                          IRExpr_RdTmp(ae->u.GetIt.ix),
3966                          ae->u.GetIt.bias,
3967                          puti->descr,
3968                          puti->ix,
3969                          puti->bias
3970                       ) != NoAlias)
3971                      invalidate = True;
3972                }
3973                else
3974                   vpanic("do_cse_BB(2)");
3975             }
3976 
3977             if (invalidate) {
3978                aenv->inuse[j] = False;
3979                aenv->key[j]   = (HWord)NULL;  /* be sure */
3980             }
3981          } /* for j */
3982       } /* paranoia > 0 */
3983 
3984       /* ------ ENV invalidate aenv bindings ------ */
3985 
3986       /* ignore not-interestings */
3987       if (st->tag != Ist_WrTmp)
3988          continue;
3989 
3990       t = st->Ist.WrTmp.tmp;
3991       eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3992       /* ignore if not of AvailExpr form */
3993       if (!eprime)
3994          continue;
3995 
3996       /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3997 
3998       /* apply tenv */
3999       subst_AvailExpr( tenv, eprime );
4000 
4001       /* search aenv for eprime, unfortunately the hard way */
4002       for (j = 0; j < aenv->used; j++)
4003          if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4004             break;
4005 
4006       if (j < aenv->used) {
4007          /* A binding E' -> q was found.  Replace stmt by "t = q" and
4008             note the t->q binding in tenv. */
4009          /* (this is the core of the CSE action) */
4010          q = (IRTemp)aenv->val[j];
4011          bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4012          addToHHW( tenv, (HWord)t, (HWord)q );
4013          anyDone = True;
4014       } else {
4015          /* No binding was found, so instead we add E' -> t to our
4016             collection of available expressions, replace this stmt
4017             with "t = E'", and move on. */
4018          bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4019          addToHHW( aenv, (HWord)eprime, (HWord)t );
4020       }
4021    }
4022 
4023    /*
4024    ppIRSB(bb);
4025    sanityCheckIRSB(bb, Ity_I32);
4026    vex_printf("\n\n");
4027    */
4028    return anyDone;
4029 }
4030 
4031 
4032 /*---------------------------------------------------------------*/
4033 /*--- Add32/Sub32 chain collapsing                            ---*/
4034 /*---------------------------------------------------------------*/
4035 
4036 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4037 
4038 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ?  If
4039    yes, set *tmp and *i32 appropriately.  *i32 is set as if the
4040    root node is Add32, not Sub32. */
4041 
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)4042 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4043 {
4044    if (e->tag != Iex_Binop)
4045       return False;
4046    if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4047       return False;
4048    if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4049       return False;
4050    if (e->Iex.Binop.arg2->tag != Iex_Const)
4051       return False;
4052    *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4053    *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4054    if (e->Iex.Binop.op == Iop_Sub32)
4055       *i32 = -*i32;
4056    return True;
4057 }
4058 
4059 
4060 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4061    other tmp2.  Scan backwards from the specified start point -- an
4062    optimisation. */
4063 
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)4064 static Bool collapseChain ( IRSB* bb, Int startHere,
4065                             IRTemp tmp,
4066                             IRTemp* tmp2, Int* i32 )
4067 {
4068    Int     j, ii;
4069    IRTemp  vv;
4070    IRStmt* st;
4071    IRExpr* e;
4072 
4073    /* the (var, con) pair contain the current 'representation' for
4074       'tmp'.  We start with 'tmp + 0'.  */
4075    IRTemp var = tmp;
4076    Int    con = 0;
4077 
4078    /* Scan backwards to see if tmp can be replaced by some other tmp
4079      +/- a constant. */
4080    for (j = startHere; j >= 0; j--) {
4081       st = bb->stmts[j];
4082       if (st->tag != Ist_WrTmp)
4083          continue;
4084       if (st->Ist.WrTmp.tmp != var)
4085          continue;
4086       e = st->Ist.WrTmp.data;
4087       if (!isAdd32OrSub32(e, &vv, &ii))
4088          break;
4089       var = vv;
4090       con += ii;
4091    }
4092    if (j == -1)
4093       /* no earlier binding for var .. ill-formed IR */
4094       vpanic("collapseChain");
4095 
4096    /* so, did we find anything interesting? */
4097    if (var == tmp)
4098       return False; /* no .. */
4099 
4100    *tmp2 = var;
4101    *i32  = con;
4102    return True;
4103 }
4104 
4105 
4106 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4107 
collapse_AddSub_chains_BB(IRSB * bb)4108 static void collapse_AddSub_chains_BB ( IRSB* bb )
4109 {
4110    IRStmt *st;
4111    IRTemp var, var2;
4112    Int    i, con, con2;
4113 
4114    for (i = bb->stmts_used-1; i >= 0; i--) {
4115       st = bb->stmts[i];
4116       if (st->tag == Ist_NoOp)
4117          continue;
4118 
4119       /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4120 
4121       if (st->tag == Ist_WrTmp
4122           && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4123 
4124          /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4125             Find out if var can be expressed as var2 + con2. */
4126          if (collapseChain(bb, i-1, var, &var2, &con2)) {
4127             if (DEBUG_IROPT) {
4128                vex_printf("replacing1 ");
4129                ppIRStmt(st);
4130                vex_printf(" with ");
4131             }
4132             con2 += con;
4133             bb->stmts[i]
4134                = IRStmt_WrTmp(
4135                     st->Ist.WrTmp.tmp,
4136                     (con2 >= 0)
4137                       ? IRExpr_Binop(Iop_Add32,
4138                                      IRExpr_RdTmp(var2),
4139                                      IRExpr_Const(IRConst_U32(con2)))
4140                       : IRExpr_Binop(Iop_Sub32,
4141                                      IRExpr_RdTmp(var2),
4142                                      IRExpr_Const(IRConst_U32(-con2)))
4143                  );
4144             if (DEBUG_IROPT) {
4145                ppIRStmt(bb->stmts[i]);
4146                vex_printf("\n");
4147             }
4148          }
4149 
4150          continue;
4151       }
4152 
4153       /* Try to collapse 't1 = GetI[t2, con]'. */
4154 
4155       if (st->tag == Ist_WrTmp
4156           && st->Ist.WrTmp.data->tag == Iex_GetI
4157           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4158           && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4159                                       ->Iex.RdTmp.tmp, &var2, &con2)) {
4160          if (DEBUG_IROPT) {
4161             vex_printf("replacing3 ");
4162             ppIRStmt(st);
4163             vex_printf(" with ");
4164          }
4165          con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4166          bb->stmts[i]
4167             = IRStmt_WrTmp(
4168                  st->Ist.WrTmp.tmp,
4169                  IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4170                              IRExpr_RdTmp(var2),
4171                              con2));
4172          if (DEBUG_IROPT) {
4173             ppIRStmt(bb->stmts[i]);
4174             vex_printf("\n");
4175          }
4176          continue;
4177       }
4178 
4179       /* Perhaps st is PutI[t, con] ? */
4180       IRPutI *puti = st->Ist.PutI.details;
4181       if (st->tag == Ist_PutI
4182           && puti->ix->tag == Iex_RdTmp
4183           && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4184                                &var2, &con2)) {
4185          if (DEBUG_IROPT) {
4186             vex_printf("replacing2 ");
4187             ppIRStmt(st);
4188             vex_printf(" with ");
4189          }
4190          con2 += puti->bias;
4191          bb->stmts[i]
4192             = IRStmt_PutI(mkIRPutI(puti->descr,
4193                                    IRExpr_RdTmp(var2),
4194                                    con2,
4195                                    puti->data));
4196          if (DEBUG_IROPT) {
4197             ppIRStmt(bb->stmts[i]);
4198             vex_printf("\n");
4199          }
4200          continue;
4201       }
4202 
4203    } /* for */
4204 }
4205 
4206 
4207 /*---------------------------------------------------------------*/
4208 /*--- PutI/GetI transformations                               ---*/
4209 /*---------------------------------------------------------------*/
4210 
4211 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4212    the given starting point to find, if any, a PutI which writes
4213    exactly the same piece of guest state, and so return the expression
4214    that the PutI writes.  This is the core of PutI-GetI forwarding. */
4215 
4216 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)4217 IRExpr* findPutI ( IRSB* bb, Int startHere,
4218                    IRRegArray* descrG, IRExpr* ixG, Int biasG )
4219 {
4220    Int        j;
4221    IRStmt*    st;
4222    GSAliasing relation;
4223 
4224    if (0) {
4225       vex_printf("\nfindPutI ");
4226       ppIRRegArray(descrG);
4227       vex_printf(" ");
4228       ppIRExpr(ixG);
4229       vex_printf(" %d\n", biasG);
4230    }
4231 
4232    /* Scan backwards in bb from startHere to find a suitable PutI
4233       binding for (descrG, ixG, biasG), if any. */
4234 
4235    for (j = startHere; j >= 0; j--) {
4236       st = bb->stmts[j];
4237       if (st->tag == Ist_NoOp)
4238          continue;
4239 
4240       if (st->tag == Ist_Put) {
4241          /* Non-indexed Put.  This can't give a binding, but we do
4242             need to check it doesn't invalidate the search by
4243             overlapping any part of the indexed guest state. */
4244 
4245          relation
4246             = getAliasingRelation_IC(
4247                  descrG, ixG,
4248                  st->Ist.Put.offset,
4249                  typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4250 
4251          if (relation == NoAlias) {
4252             /* we're OK; keep going */
4253             continue;
4254          } else {
4255             /* relation == UnknownAlias || relation == ExactAlias */
4256             /* If this assertion fails, we've found a Put which writes
4257                an area of guest state which is read by a GetI.  Which
4258                is unlikely (although not per se wrong). */
4259             vassert(relation != ExactAlias);
4260             /* This Put potentially writes guest state that the GetI
4261                reads; we must fail. */
4262             return NULL;
4263          }
4264       }
4265 
4266       if (st->tag == Ist_PutI) {
4267          IRPutI *puti = st->Ist.PutI.details;
4268 
4269          relation = getAliasingRelation_II(
4270                        descrG, ixG, biasG,
4271                        puti->descr,
4272                        puti->ix,
4273                        puti->bias
4274                     );
4275 
4276          if (relation == NoAlias) {
4277             /* This PutI definitely doesn't overlap.  Ignore it and
4278                keep going. */
4279             continue; /* the for j loop */
4280          }
4281 
4282          if (relation == UnknownAlias) {
4283             /* We don't know if this PutI writes to the same guest
4284                state that the GetI, or not.  So we have to give up. */
4285             return NULL;
4286          }
4287 
4288          /* Otherwise, we've found what we're looking for.  */
4289          vassert(relation == ExactAlias);
4290          return puti->data;
4291 
4292       } /* if (st->tag == Ist_PutI) */
4293 
4294       if (st->tag == Ist_Dirty) {
4295          /* Be conservative.  If the dirty call has any guest effects at
4296             all, give up.  We could do better -- only give up if there
4297             are any guest writes/modifies. */
4298          if (st->Ist.Dirty.details->nFxState > 0)
4299             return NULL;
4300       }
4301 
4302    } /* for */
4303 
4304    /* No valid replacement was found. */
4305    return NULL;
4306 }
4307 
4308 
4309 
4310 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4311    that it writes exactly the same piece of guest state) ?  Safe
4312    answer: False. */
4313 
identicalPutIs(IRStmt * pi,IRStmt * s2)4314 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4315 {
4316    vassert(pi->tag == Ist_PutI);
4317    if (s2->tag != Ist_PutI)
4318       return False;
4319 
4320    IRPutI *p1 = pi->Ist.PutI.details;
4321    IRPutI *p2 = s2->Ist.PutI.details;
4322 
4323    return toBool(
4324           getAliasingRelation_II(
4325              p1->descr, p1->ix, p1->bias,
4326              p2->descr, p2->ix, p2->bias
4327           )
4328           == ExactAlias
4329           );
4330 }
4331 
4332 
4333 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4334    overlap it?  Safe answer: True.  Note, we could do a lot better
4335    than this if needed. */
4336 
4337 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)4338 Bool guestAccessWhichMightOverlapPutI (
4339         IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4340      )
4341 {
4342    GSAliasing relation;
4343    UInt       minoffP, maxoffP;
4344 
4345    vassert(pi->tag == Ist_PutI);
4346 
4347    IRPutI *p1 = pi->Ist.PutI.details;
4348 
4349    getArrayBounds(p1->descr, &minoffP, &maxoffP);
4350    switch (s2->tag) {
4351 
4352       case Ist_NoOp:
4353       case Ist_IMark:
4354          return False;
4355 
4356       case Ist_MBE:
4357       case Ist_AbiHint:
4358          /* just be paranoid ... these should be rare. */
4359          return True;
4360 
4361       case Ist_CAS:
4362          /* This is unbelievably lame, but it's probably not
4363             significant from a performance point of view.  Really, a
4364             CAS is a load-store op, so it should be safe to say False.
4365             However .. */
4366          return True;
4367 
4368       case Ist_Dirty:
4369          /* If the dirty call has any guest effects at all, give up.
4370             Probably could do better. */
4371          if (s2->Ist.Dirty.details->nFxState > 0)
4372             return True;
4373          return False;
4374 
4375       case Ist_Put:
4376          vassert(isIRAtom(s2->Ist.Put.data));
4377          relation
4378             = getAliasingRelation_IC(
4379                  p1->descr, p1->ix,
4380                  s2->Ist.Put.offset,
4381                  typeOfIRExpr(tyenv,s2->Ist.Put.data)
4382               );
4383          goto have_relation;
4384 
4385       case Ist_PutI: {
4386          IRPutI *p2 = s2->Ist.PutI.details;
4387 
4388          vassert(isIRAtom(p2->ix));
4389          vassert(isIRAtom(p2->data));
4390          relation
4391             = getAliasingRelation_II(
4392                  p1->descr, p1->ix, p1->bias,
4393                  p2->descr, p2->ix, p2->bias
4394               );
4395          goto have_relation;
4396       }
4397 
4398       case Ist_WrTmp:
4399          if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4400             relation
4401                = getAliasingRelation_II(
4402                     p1->descr, p1->ix, p1->bias,
4403                     s2->Ist.WrTmp.data->Iex.GetI.descr,
4404                     s2->Ist.WrTmp.data->Iex.GetI.ix,
4405                     s2->Ist.WrTmp.data->Iex.GetI.bias
4406                  );
4407             goto have_relation;
4408          }
4409          if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4410             relation
4411                = getAliasingRelation_IC(
4412                     p1->descr, p1->ix,
4413                     s2->Ist.WrTmp.data->Iex.Get.offset,
4414                     s2->Ist.WrTmp.data->Iex.Get.ty
4415                  );
4416             goto have_relation;
4417          }
4418          return False;
4419 
4420       case Ist_Store:
4421          vassert(isIRAtom(s2->Ist.Store.addr));
4422          vassert(isIRAtom(s2->Ist.Store.data));
4423          return False;
4424 
4425       default:
4426          vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4427          vpanic("guestAccessWhichMightOverlapPutI");
4428    }
4429 
4430   have_relation:
4431    if (relation == NoAlias)
4432       return False;
4433    else
4434       return True; /* ExactAlias or UnknownAlias */
4435 }
4436 
4437 
4438 
4439 /* ---------- PutI/GetI transformations main functions --------- */
4440 
4441 /* Remove redundant GetIs, to the extent that they can be detected.
4442    bb is modified in-place. */
4443 
4444 static
do_redundant_GetI_elimination(IRSB * bb)4445 void do_redundant_GetI_elimination ( IRSB* bb )
4446 {
4447    Int     i;
4448    IRStmt* st;
4449 
4450    for (i = bb->stmts_used-1; i >= 0; i--) {
4451       st = bb->stmts[i];
4452       if (st->tag == Ist_NoOp)
4453          continue;
4454 
4455       if (st->tag == Ist_WrTmp
4456           && st->Ist.WrTmp.data->tag == Iex_GetI
4457           && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4458          IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4459          IRExpr*     ix    = st->Ist.WrTmp.data->Iex.GetI.ix;
4460          Int         bias  = st->Ist.WrTmp.data->Iex.GetI.bias;
4461          IRExpr*     replacement = findPutI(bb, i-1, descr, ix, bias);
4462          if (replacement
4463              && isIRAtom(replacement)
4464              /* Make sure we're doing a type-safe transformation! */
4465              && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4466             if (DEBUG_IROPT) {
4467                vex_printf("rGI:  ");
4468                ppIRExpr(st->Ist.WrTmp.data);
4469                vex_printf(" -> ");
4470                ppIRExpr(replacement);
4471                vex_printf("\n");
4472             }
4473             bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4474          }
4475       }
4476    }
4477 
4478 }
4479 
4480 
4481 /* Remove redundant PutIs, to the extent which they can be detected.
4482    bb is modified in-place. */
4483 
4484 static
do_redundant_PutI_elimination(IRSB * bb,VexRegisterUpdates pxControl)4485 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4486 {
4487    Int    i, j;
4488    Bool   delete;
4489    IRStmt *st, *stj;
4490 
4491    vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4492 
4493    for (i = 0; i < bb->stmts_used; i++) {
4494       st = bb->stmts[i];
4495       if (st->tag != Ist_PutI)
4496          continue;
4497       /* Ok, search forwards from here to see if we can find another
4498          PutI which makes this one redundant, and dodging various
4499          hazards.  Search forwards:
4500          * If conditional exit, give up (because anything after that
4501            does not postdominate this put).
4502          * If a Get which might overlap, give up (because this PutI
4503            not necessarily dead).
4504          * If a Put which is identical, stop with success.
4505          * If a Put which might overlap, but is not identical, give up.
4506          * If a dirty helper call which might write guest state, give up.
4507          * If a Put which definitely doesn't overlap, or any other
4508            kind of stmt, continue.
4509       */
4510       delete = False;
4511       for (j = i+1; j < bb->stmts_used; j++) {
4512          stj = bb->stmts[j];
4513          if (stj->tag == Ist_NoOp)
4514             continue;
4515          if (identicalPutIs(st, stj)) {
4516             /* success! */
4517             delete = True;
4518             break;
4519          }
4520          if (stj->tag == Ist_Exit)
4521             /* give up */
4522             break;
4523          if (st->tag == Ist_Dirty)
4524             /* give up; could do better here */
4525             break;
4526          if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4527             /* give up */
4528            break;
4529       }
4530 
4531       if (delete) {
4532          if (DEBUG_IROPT) {
4533             vex_printf("rPI:  ");
4534             ppIRStmt(st);
4535             vex_printf("\n");
4536          }
4537          bb->stmts[i] = IRStmt_NoOp();
4538       }
4539 
4540    }
4541 }
4542 
4543 
4544 /*---------------------------------------------------------------*/
4545 /*--- Loop unrolling                                          ---*/
4546 /*---------------------------------------------------------------*/
4547 
4548 /* Adjust all tmp values (names) in e by delta.  e is destructively
4549    modified. */
4550 
deltaIRExpr(IRExpr * e,Int delta)4551 static void deltaIRExpr ( IRExpr* e, Int delta )
4552 {
4553    Int i;
4554    switch (e->tag) {
4555       case Iex_RdTmp:
4556          e->Iex.RdTmp.tmp += delta;
4557          break;
4558       case Iex_Get:
4559       case Iex_Const:
4560          break;
4561       case Iex_GetI:
4562          deltaIRExpr(e->Iex.GetI.ix, delta);
4563          break;
4564       case Iex_Qop:
4565          deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4566          deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4567          deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4568          deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4569          break;
4570       case Iex_Triop:
4571          deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4572          deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4573          deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4574          break;
4575       case Iex_Binop:
4576          deltaIRExpr(e->Iex.Binop.arg1, delta);
4577          deltaIRExpr(e->Iex.Binop.arg2, delta);
4578          break;
4579       case Iex_Unop:
4580          deltaIRExpr(e->Iex.Unop.arg, delta);
4581          break;
4582       case Iex_Load:
4583          deltaIRExpr(e->Iex.Load.addr, delta);
4584          break;
4585       case Iex_CCall:
4586          for (i = 0; e->Iex.CCall.args[i]; i++)
4587             deltaIRExpr(e->Iex.CCall.args[i], delta);
4588          break;
4589       case Iex_ITE:
4590          deltaIRExpr(e->Iex.ITE.cond, delta);
4591          deltaIRExpr(e->Iex.ITE.iftrue, delta);
4592          deltaIRExpr(e->Iex.ITE.iffalse, delta);
4593          break;
4594       default:
4595          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4596          vpanic("deltaIRExpr");
4597    }
4598 }
4599 
4600 /* Adjust all tmp values (names) in st by delta.  st is destructively
4601    modified. */
4602 
deltaIRStmt(IRStmt * st,Int delta)4603 static void deltaIRStmt ( IRStmt* st, Int delta )
4604 {
4605    Int      i;
4606    IRDirty* d;
4607    switch (st->tag) {
4608       case Ist_NoOp:
4609       case Ist_IMark:
4610       case Ist_MBE:
4611          break;
4612       case Ist_AbiHint:
4613          deltaIRExpr(st->Ist.AbiHint.base, delta);
4614          deltaIRExpr(st->Ist.AbiHint.nia, delta);
4615          break;
4616       case Ist_Put:
4617          deltaIRExpr(st->Ist.Put.data, delta);
4618          break;
4619       case Ist_PutI:
4620          deltaIRExpr(st->Ist.PutI.details->ix, delta);
4621          deltaIRExpr(st->Ist.PutI.details->data, delta);
4622          break;
4623       case Ist_WrTmp:
4624          st->Ist.WrTmp.tmp += delta;
4625          deltaIRExpr(st->Ist.WrTmp.data, delta);
4626          break;
4627       case Ist_Exit:
4628          deltaIRExpr(st->Ist.Exit.guard, delta);
4629          break;
4630       case Ist_Store:
4631          deltaIRExpr(st->Ist.Store.addr, delta);
4632          deltaIRExpr(st->Ist.Store.data, delta);
4633          break;
4634       case Ist_StoreG: {
4635          IRStoreG* sg = st->Ist.StoreG.details;
4636          deltaIRExpr(sg->addr, delta);
4637          deltaIRExpr(sg->data, delta);
4638          deltaIRExpr(sg->guard, delta);
4639          break;
4640       }
4641       case Ist_LoadG: {
4642          IRLoadG* lg = st->Ist.LoadG.details;
4643          lg->dst += delta;
4644          deltaIRExpr(lg->addr, delta);
4645          deltaIRExpr(lg->alt, delta);
4646          deltaIRExpr(lg->guard, delta);
4647          break;
4648       }
4649       case Ist_CAS:
4650          if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4651             st->Ist.CAS.details->oldHi += delta;
4652          st->Ist.CAS.details->oldLo += delta;
4653          deltaIRExpr(st->Ist.CAS.details->addr, delta);
4654          if (st->Ist.CAS.details->expdHi)
4655             deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4656          deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4657          if (st->Ist.CAS.details->dataHi)
4658             deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4659          deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4660          break;
4661       case Ist_LLSC:
4662          st->Ist.LLSC.result += delta;
4663          deltaIRExpr(st->Ist.LLSC.addr, delta);
4664          if (st->Ist.LLSC.storedata)
4665             deltaIRExpr(st->Ist.LLSC.storedata, delta);
4666          break;
4667       case Ist_Dirty:
4668          d = st->Ist.Dirty.details;
4669          deltaIRExpr(d->guard, delta);
4670          for (i = 0; d->args[i]; i++) {
4671             IRExpr* arg = d->args[i];
4672             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
4673                deltaIRExpr(arg, delta);
4674          }
4675          if (d->tmp != IRTemp_INVALID)
4676             d->tmp += delta;
4677          if (d->mAddr)
4678             deltaIRExpr(d->mAddr, delta);
4679          break;
4680       default:
4681          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4682          vpanic("deltaIRStmt");
4683    }
4684 }
4685 
4686 
4687 /* If possible, return a loop-unrolled version of bb0.  The original
4688    is changed.  If not possible, return NULL.  */
4689 
4690 /* The two schemas considered are:
4691 
4692      X: BODY; goto X
4693 
4694      which unrolls to (eg)  X: BODY;BODY; goto X
4695 
4696    and
4697 
4698        X: BODY; if (c) goto X; goto Y
4699    which trivially transforms to
4700        X: BODY; if (!c) goto Y; goto X;
4701    so it falls in the scope of the first case.
4702 
4703    X and Y must be literal (guest) addresses.
4704 */
4705 
calc_unroll_factor(IRSB * bb)4706 static Int calc_unroll_factor( IRSB* bb )
4707 {
4708    Int n_stmts, i;
4709 
4710    n_stmts = 0;
4711    for (i = 0; i < bb->stmts_used; i++) {
4712       if (bb->stmts[i]->tag != Ist_NoOp)
4713          n_stmts++;
4714    }
4715 
4716    if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4717       if (vex_control.iropt_verbosity > 0)
4718          vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4719                     n_stmts, 8* n_stmts);
4720       return 8;
4721    }
4722    if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4723       if (vex_control.iropt_verbosity > 0)
4724          vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4725                     n_stmts, 4* n_stmts);
4726       return 4;
4727    }
4728 
4729    if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4730       if (vex_control.iropt_verbosity > 0)
4731          vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4732                     n_stmts, 2* n_stmts);
4733       return 2;
4734    }
4735 
4736    if (vex_control.iropt_verbosity > 0)
4737       vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4738 
4739    return 1;
4740 }
4741 
4742 
maybe_loop_unroll_BB(IRSB * bb0,Addr my_addr)4743 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4744 {
4745    Int      i, j, jmax, n_vars;
4746    Bool     xxx_known;
4747    Addr64   xxx_value, yyy_value;
4748    IRExpr*  udst;
4749    IRStmt*  st;
4750    IRConst* con;
4751    IRSB     *bb1, *bb2;
4752    Int      unroll_factor;
4753 
4754    if (vex_control.iropt_unroll_thresh <= 0)
4755       return NULL;
4756 
4757    /* First off, figure out if we can unroll this loop.  Do this
4758       without modifying bb0. */
4759 
4760    if (bb0->jumpkind != Ijk_Boring)
4761       return NULL;
4762 
4763    xxx_known = False;
4764    xxx_value = 0;
4765 
4766    /* Extract the next-guest address.  If it isn't a literal, we
4767       have to give up. */
4768 
4769    udst = bb0->next;
4770    if (udst->tag == Iex_Const
4771        && (udst->Iex.Const.con->tag == Ico_U32
4772            || udst->Iex.Const.con->tag == Ico_U64)) {
4773       /* The BB ends in a jump to a literal location. */
4774       xxx_known = True;
4775       xxx_value = udst->Iex.Const.con->tag == Ico_U64
4776                     ?  udst->Iex.Const.con->Ico.U64
4777                     : (Addr64)(udst->Iex.Const.con->Ico.U32);
4778    }
4779 
4780    if (!xxx_known)
4781       return NULL;
4782 
4783    /* Now we know the BB ends to a jump to a literal location.  If
4784       it's a jump to itself (viz, idiom #1), move directly to the
4785       unrolling stage, first cloning the bb so the original isn't
4786       modified. */
4787    if (xxx_value == my_addr) {
4788       unroll_factor = calc_unroll_factor( bb0 );
4789       if (unroll_factor < 2)
4790          return NULL;
4791       bb1 = deepCopyIRSB( bb0 );
4792       bb0 = NULL;
4793       udst = NULL; /* is now invalid */
4794       goto do_unroll;
4795    }
4796 
4797    /* Search for the second idiomatic form:
4798         X: BODY; if (c) goto X; goto Y
4799       We know Y, but need to establish that the last stmt
4800       is 'if (c) goto X'.
4801    */
4802    yyy_value = xxx_value;
4803    for (i = bb0->stmts_used-1; i >= 0; i--)
4804       if (bb0->stmts[i])
4805          break;
4806 
4807    if (i < 0)
4808       return NULL; /* block with no stmts.  Strange. */
4809 
4810    st = bb0->stmts[i];
4811    if (st->tag != Ist_Exit)
4812       return NULL;
4813    if (st->Ist.Exit.jk != Ijk_Boring)
4814       return NULL;
4815 
4816    con = st->Ist.Exit.dst;
4817    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4818 
4819    xxx_value = con->tag == Ico_U64
4820                   ? st->Ist.Exit.dst->Ico.U64
4821                   : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4822 
4823    /* If this assertion fails, we have some kind of type error. */
4824    vassert(con->tag == udst->Iex.Const.con->tag);
4825 
4826    if (xxx_value != my_addr)
4827       /* We didn't find either idiom.  Give up. */
4828       return NULL;
4829 
4830    /* Ok, we found idiom #2.  Copy the BB, switch around the xxx and
4831       yyy values (which makes it look like idiom #1), and go into
4832       unrolling proper.  This means finding (again) the last stmt, in
4833       the copied BB. */
4834 
4835    unroll_factor = calc_unroll_factor( bb0 );
4836    if (unroll_factor < 2)
4837       return NULL;
4838 
4839    bb1 = deepCopyIRSB( bb0 );
4840    bb0 = NULL;
4841    udst = NULL; /* is now invalid */
4842    for (i = bb1->stmts_used-1; i >= 0; i--)
4843       if (bb1->stmts[i])
4844          break;
4845 
4846    /* The next bunch of assertions should be true since we already
4847       found and checked the last stmt in the original bb. */
4848 
4849    vassert(i >= 0);
4850 
4851    st = bb1->stmts[i];
4852    vassert(st->tag == Ist_Exit);
4853 
4854    con = st->Ist.Exit.dst;
4855    vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4856 
4857    udst = bb1->next;
4858    vassert(udst->tag == Iex_Const);
4859    vassert(udst->Iex.Const.con->tag == Ico_U32
4860           || udst->Iex.Const.con->tag == Ico_U64);
4861    vassert(con->tag == udst->Iex.Const.con->tag);
4862 
4863    /* switch the xxx and yyy fields around */
4864    if (con->tag == Ico_U64) {
4865       udst->Iex.Const.con->Ico.U64 = xxx_value;
4866       con->Ico.U64 = yyy_value;
4867    } else {
4868       udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4869       con->Ico.U32 = (UInt)yyy_value;
4870    }
4871 
4872    /* negate the test condition */
4873    st->Ist.Exit.guard
4874       = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4875 
4876    /* --- The unroller proper.  Both idioms are by now --- */
4877    /* --- now converted to idiom 1. --- */
4878 
4879   do_unroll:
4880 
4881    vassert(unroll_factor == 2
4882            || unroll_factor == 4
4883            || unroll_factor == 8);
4884 
4885    jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4886    for (j = 1; j <= jmax; j++) {
4887 
4888       n_vars = bb1->tyenv->types_used;
4889 
4890       bb2 = deepCopyIRSB(bb1);
4891       for (i = 0; i < n_vars; i++)
4892          (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4893 
4894       for (i = 0; i < bb2->stmts_used; i++) {
4895          /* deltaIRStmt destructively modifies the stmt, but
4896             that's OK since bb2 is a complete fresh copy of bb1. */
4897          deltaIRStmt(bb2->stmts[i], n_vars);
4898          addStmtToIRSB(bb1, bb2->stmts[i]);
4899       }
4900    }
4901 
4902    if (DEBUG_IROPT) {
4903       vex_printf("\nUNROLLED (%lx)\n", my_addr);
4904       ppIRSB(bb1);
4905       vex_printf("\n");
4906    }
4907 
4908    /* Flattening; sigh.  The unroller succeeds in breaking flatness
4909       by negating the test condition.  This should be fixed properly.
4910       For the moment use this shotgun approach.  */
4911    return flatten_BB(bb1);
4912 }
4913 
4914 
4915 /*---------------------------------------------------------------*/
4916 /*--- The tree builder                                        ---*/
4917 /*---------------------------------------------------------------*/
4918 
4919 /* This isn't part of IR optimisation.  Really it's a pass done prior
4920    to instruction selection, which improves the code that the
4921    instruction selector can produce. */
4922 
4923 /* --- The 'tmp' environment is the central data structure here --- */
4924 
4925 /* The number of outstanding bindings we're prepared to track.
4926    The number of times the env becomes full and we have to dump
4927    the oldest binding (hence reducing code quality) falls very
4928    rapidly as the env size increases.  8 gives reasonable performance
4929    under most circumstances. */
4930 #define A_NENV 10
4931 
4932 /* An interval. Used to record the bytes in the guest state accessed
4933    by a Put[I] statement or by (one or more) Get[I] expression(s). In
4934    case of several Get[I] expressions, the lower/upper bounds are recorded.
4935    This is conservative but cheap.
4936    E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4937    E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4938    offset 200 would be recorded as [100,203] */
4939 typedef
4940    struct {
4941       Bool present;
4942       Int  low;
4943       Int  high;
4944    }
4945    Interval;
4946 
4947 static inline Bool
intervals_overlap(Interval i1,Interval i2)4948 intervals_overlap(Interval i1, Interval i2)
4949 {
4950    return (i1.low >= i2.low && i1.low <= i2.high) ||
4951           (i2.low >= i1.low && i2.low <= i1.high);
4952 }
4953 
4954 static inline void
update_interval(Interval * i,Int low,Int high)4955 update_interval(Interval *i, Int low, Int high)
4956 {
4957    vassert(low <= high);
4958 
4959    if (i->present) {
4960       if (low  < i->low)  i->low  = low;
4961       if (high > i->high) i->high = high;
4962    } else {
4963       i->present = True;
4964       i->low  = low;
4965       i->high = high;
4966    }
4967 }
4968 
4969 
4970 /* bindee == NULL   ===  slot is not in use
4971    bindee != NULL   ===  slot is in use
4972 */
4973 typedef
4974    struct {
4975       IRTemp  binder;
4976       IRExpr* bindee;
4977       Bool    doesLoad;
4978       /* Record the bytes of the guest state BINDEE reads from. */
4979       Interval getInterval;
4980    }
4981    ATmpInfo;
4982 
4983 __attribute__((unused))
ppAEnv(ATmpInfo * env)4984 static void ppAEnv ( ATmpInfo* env )
4985 {
4986    Int i;
4987    for (i = 0; i < A_NENV; i++) {
4988       vex_printf("%d  tmp %d  val ", i, (Int)env[i].binder);
4989       if (env[i].bindee)
4990          ppIRExpr(env[i].bindee);
4991       else
4992          vex_printf("(null)");
4993       vex_printf("\n");
4994    }
4995 }
4996 
4997 /* --- Tree-traversal fns --- */
4998 
4999 /* Traverse an expr, and detect if any part of it reads memory or does
5000    a Get.  Be careful ... this really controls how much the
5001    tree-builder can reorder the code, so getting it right is critical.
5002 */
setHints_Expr(Bool * doesLoad,Interval * getInterval,IRExpr * e)5003 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5004 {
5005    Int i;
5006    switch (e->tag) {
5007       case Iex_CCall:
5008          for (i = 0; e->Iex.CCall.args[i]; i++)
5009             setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5010          return;
5011       case Iex_ITE:
5012          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5013          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5014          setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5015          return;
5016       case Iex_Qop:
5017          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5018          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5019          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5020          setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5021          return;
5022       case Iex_Triop:
5023          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5024          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5025          setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5026          return;
5027       case Iex_Binop:
5028          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5029          setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5030          return;
5031       case Iex_Unop:
5032          setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5033          return;
5034       case Iex_Load:
5035          *doesLoad = True;
5036          setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5037          return;
5038       case Iex_Get: {
5039          Int low = e->Iex.Get.offset;
5040          Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5041          update_interval(getInterval, low, high);
5042          return;
5043       }
5044       case Iex_GetI: {
5045          IRRegArray *descr = e->Iex.GetI.descr;
5046          Int size = sizeofIRType(descr->elemTy);
5047          Int low  = descr->base;
5048          Int high = low + descr->nElems * size - 1;
5049          update_interval(getInterval, low, high);
5050          setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5051          return;
5052       }
5053       case Iex_RdTmp:
5054       case Iex_Const:
5055          return;
5056       default:
5057          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5058          vpanic("setHints_Expr");
5059    }
5060 }
5061 
5062 
5063 /* Add a binding to the front of the env and slide all the rest
5064    backwards.  It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)5065 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5066 {
5067    Int i;
5068    vassert(env[A_NENV-1].bindee == NULL);
5069    for (i = A_NENV-1; i >= 1; i--)
5070       env[i] = env[i-1];
5071    env[0].binder   = binder;
5072    env[0].bindee   = bindee;
5073    env[0].doesLoad = False; /* filled in later */
5074    env[0].getInterval.present  = False; /* filled in later */
5075    env[0].getInterval.low  = -1; /* filled in later */
5076    env[0].getInterval.high = -1; /* filled in later */
5077 }
5078 
5079 /* Given uses :: array of UShort, indexed by IRTemp
5080    Add the use-occurrences of temps in this expression
5081    to the env.
5082 */
aoccCount_Expr(UShort * uses,IRExpr * e)5083 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5084 {
5085    Int i;
5086 
5087    switch (e->tag) {
5088 
5089       case Iex_RdTmp: /* the only interesting case */
5090          uses[e->Iex.RdTmp.tmp]++;
5091          return;
5092 
5093       case Iex_ITE:
5094          aoccCount_Expr(uses, e->Iex.ITE.cond);
5095          aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5096          aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5097          return;
5098 
5099       case Iex_Qop:
5100          aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5101          aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5102          aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5103          aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5104          return;
5105 
5106       case Iex_Triop:
5107          aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5108          aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5109          aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5110          return;
5111 
5112       case Iex_Binop:
5113          aoccCount_Expr(uses, e->Iex.Binop.arg1);
5114          aoccCount_Expr(uses, e->Iex.Binop.arg2);
5115          return;
5116 
5117       case Iex_Unop:
5118          aoccCount_Expr(uses, e->Iex.Unop.arg);
5119          return;
5120 
5121       case Iex_Load:
5122          aoccCount_Expr(uses, e->Iex.Load.addr);
5123          return;
5124 
5125       case Iex_CCall:
5126          for (i = 0; e->Iex.CCall.args[i]; i++)
5127             aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5128          return;
5129 
5130       case Iex_GetI:
5131          aoccCount_Expr(uses, e->Iex.GetI.ix);
5132          return;
5133 
5134       case Iex_Const:
5135       case Iex_Get:
5136          return;
5137 
5138       default:
5139          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5140          vpanic("aoccCount_Expr");
5141     }
5142 }
5143 
5144 
5145 /* Given uses :: array of UShort, indexed by IRTemp
5146    Add the use-occurrences of temps in this statement
5147    to the env.
5148 */
aoccCount_Stmt(UShort * uses,IRStmt * st)5149 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5150 {
5151    Int      i;
5152    IRDirty* d;
5153    IRCAS*   cas;
5154    switch (st->tag) {
5155       case Ist_AbiHint:
5156          aoccCount_Expr(uses, st->Ist.AbiHint.base);
5157          aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5158          return;
5159       case Ist_WrTmp:
5160          aoccCount_Expr(uses, st->Ist.WrTmp.data);
5161          return;
5162       case Ist_Put:
5163          aoccCount_Expr(uses, st->Ist.Put.data);
5164          return;
5165       case Ist_PutI:
5166          aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5167          aoccCount_Expr(uses, st->Ist.PutI.details->data);
5168          return;
5169       case Ist_Store:
5170          aoccCount_Expr(uses, st->Ist.Store.addr);
5171          aoccCount_Expr(uses, st->Ist.Store.data);
5172          return;
5173       case Ist_StoreG: {
5174          IRStoreG* sg = st->Ist.StoreG.details;
5175          aoccCount_Expr(uses, sg->addr);
5176          aoccCount_Expr(uses, sg->data);
5177          aoccCount_Expr(uses, sg->guard);
5178          return;
5179       }
5180       case Ist_LoadG: {
5181          IRLoadG* lg = st->Ist.LoadG.details;
5182          aoccCount_Expr(uses, lg->addr);
5183          aoccCount_Expr(uses, lg->alt);
5184          aoccCount_Expr(uses, lg->guard);
5185          return;
5186       }
5187       case Ist_CAS:
5188          cas = st->Ist.CAS.details;
5189          aoccCount_Expr(uses, cas->addr);
5190          if (cas->expdHi)
5191             aoccCount_Expr(uses, cas->expdHi);
5192          aoccCount_Expr(uses, cas->expdLo);
5193          if (cas->dataHi)
5194             aoccCount_Expr(uses, cas->dataHi);
5195          aoccCount_Expr(uses, cas->dataLo);
5196          return;
5197       case Ist_LLSC:
5198          aoccCount_Expr(uses, st->Ist.LLSC.addr);
5199          if (st->Ist.LLSC.storedata)
5200             aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5201          return;
5202       case Ist_Dirty:
5203          d = st->Ist.Dirty.details;
5204          if (d->mFx != Ifx_None)
5205             aoccCount_Expr(uses, d->mAddr);
5206          aoccCount_Expr(uses, d->guard);
5207          for (i = 0; d->args[i]; i++) {
5208             IRExpr* arg = d->args[i];
5209             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5210                aoccCount_Expr(uses, arg);
5211          }
5212          return;
5213       case Ist_NoOp:
5214       case Ist_IMark:
5215       case Ist_MBE:
5216          return;
5217       case Ist_Exit:
5218          aoccCount_Expr(uses, st->Ist.Exit.guard);
5219          return;
5220       default:
5221          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5222          vpanic("aoccCount_Stmt");
5223    }
5224 }
5225 
5226 /* Look up a binding for tmp in the env.  If found, return the bound
5227    expression, and set the env's binding to NULL so it is marked as
5228    used.  If not found, return NULL. */
5229 
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)5230 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5231 {
5232    Int i;
5233    for (i = 0; i < A_NENV; i++) {
5234       if (env[i].binder == tmp && env[i].bindee != NULL) {
5235          IRExpr* bindee = env[i].bindee;
5236          env[i].bindee = NULL;
5237          return bindee;
5238       }
5239    }
5240    return NULL;
5241 }
5242 
5243 /* Traverse e, looking for temps.  For each observed temp, see if env
5244    contains a binding for the temp, and if so return the bound value.
5245    The env has the property that any binding it holds is
5246    'single-shot', so once a binding is used, it is marked as no longer
5247    available, by setting its .bindee field to NULL. */
5248 
is_Unop(IRExpr * e,IROp op)5249 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5250    return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5251 }
is_Binop(IRExpr * e,IROp op)5252 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5253    return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5254 }
5255 
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)5256 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5257 {
5258    switch (op) {
5259    case Iop_Or32:
5260       /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) )  */
5261       if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5262          return IRExpr_Unop( Iop_CmpwNEZ32,
5263                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5264                                                      a2->Iex.Unop.arg ) );
5265       break;
5266 
5267    case Iop_CmpNE32:
5268       /* Since X has type Ity_I1 we can simplify:
5269          CmpNE32(1Uto32(X),0)) ==> X */
5270       if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5271          return a1->Iex.Unop.arg;
5272       break;
5273 
5274    default:
5275       break;
5276    }
5277    /* no reduction rule applies */
5278    return IRExpr_Binop( op, a1, a2 );
5279 }
5280 
fold_IRExpr_Unop(IROp op,IRExpr * aa)5281 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5282 {
5283    switch (op) {
5284    case Iop_CmpwNEZ64:
5285       /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5286       if (is_Unop(aa, Iop_CmpwNEZ64))
5287          return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5288       /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5289       if (is_Binop(aa, Iop_Or64)
5290           && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5291          return fold_IRExpr_Unop(
5292                    Iop_CmpwNEZ64,
5293                    IRExpr_Binop(Iop_Or64,
5294                                 aa->Iex.Binop.arg1->Iex.Unop.arg,
5295                                 aa->Iex.Binop.arg2));
5296       /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5297       if (is_Binop(aa, Iop_Or64)
5298           && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5299          return fold_IRExpr_Unop(
5300                    Iop_CmpwNEZ64,
5301                    IRExpr_Binop(Iop_Or64,
5302                                 aa->Iex.Binop.arg1,
5303                                 aa->Iex.Binop.arg2->Iex.Unop.arg));
5304       break;
5305    case Iop_CmpNEZ64:
5306       /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5307       if (is_Unop(aa, Iop_Left64))
5308          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5309       /* CmpNEZ64( 1Uto64(X) ) --> X */
5310       if (is_Unop(aa, Iop_1Uto64))
5311          return aa->Iex.Unop.arg;
5312       break;
5313    case Iop_CmpwNEZ32:
5314       /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5315       if (is_Unop(aa, Iop_CmpwNEZ32))
5316          return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5317       break;
5318    case Iop_CmpNEZ32:
5319       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5320       if (is_Unop(aa, Iop_Left32))
5321          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5322       /* CmpNEZ32( 1Uto32(X) ) --> X */
5323       if (is_Unop(aa, Iop_1Uto32))
5324          return aa->Iex.Unop.arg;
5325       /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5326       if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5327          return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5328       break;
5329    case Iop_CmpNEZ8:
5330       /* CmpNEZ8( 1Uto8(X) ) --> X */
5331       if (is_Unop(aa, Iop_1Uto8))
5332          return aa->Iex.Unop.arg;
5333       break;
5334    case Iop_Left32:
5335       /* Left32( Left32(x) ) --> Left32(x) */
5336       if (is_Unop(aa, Iop_Left32))
5337          return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5338       break;
5339    case Iop_Left64:
5340       /* Left64( Left64(x) ) --> Left64(x) */
5341       if (is_Unop(aa, Iop_Left64))
5342          return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5343       break;
5344    case Iop_ZeroHI64ofV128:
5345       /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5346       if (is_Unop(aa, Iop_ZeroHI64ofV128))
5347          return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5348       break;
5349    case Iop_32to1:
5350       /* 32to1( 1Uto32 ( x ) ) --> x */
5351       if (is_Unop(aa, Iop_1Uto32))
5352          return aa->Iex.Unop.arg;
5353       /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5354       if (is_Unop(aa, Iop_CmpwNEZ32))
5355          return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5356       break;
5357    case Iop_64to1:
5358       /* 64to1( 1Uto64 ( x ) ) --> x */
5359       if (is_Unop(aa, Iop_1Uto64))
5360          return aa->Iex.Unop.arg;
5361       /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5362       if (is_Unop(aa, Iop_CmpwNEZ64))
5363          return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5364       break;
5365    case Iop_64to32:
5366       /* 64to32( 32Uto64 ( x )) --> x */
5367       if (is_Unop(aa, Iop_32Uto64))
5368          return aa->Iex.Unop.arg;
5369       /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5370       if (is_Unop(aa, Iop_8Uto64))
5371          return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5372       break;
5373 
5374    case Iop_32Uto64:
5375       /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5376       if (is_Unop(aa, Iop_8Uto32))
5377          return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5378       /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5379       if (is_Unop(aa, Iop_16Uto32))
5380          return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5381       /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5382                      --> Shr64( 32Uto64(64to32(x)), sh )) */
5383       if (is_Unop(aa, Iop_64to32)
5384           && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5385           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5386           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5387                      Iop_64to32)) {
5388          return aa->Iex.Unop.arg;
5389       }
5390       /*     32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5391          --> 32Uto64(64to32( Shl64(                x,   sh )) */
5392       if (is_Unop(aa, Iop_64to32)
5393           && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5394           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5395           && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5396                      Iop_64to32)) {
5397          return
5398             IRExpr_Unop(
5399                Iop_32Uto64,
5400                IRExpr_Unop(
5401                   Iop_64to32,
5402                   IRExpr_Binop(
5403                      Iop_Shl64,
5404                      aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5405                      aa->Iex.Unop.arg->Iex.Binop.arg2
5406             )));
5407       }
5408       break;
5409 
5410    case Iop_1Sto32:
5411       /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5412       if (is_Unop(aa, Iop_CmpNEZ8)
5413           && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5414           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5415           && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5416                      Iop_CmpNEZ32)) {
5417          return IRExpr_Unop( Iop_CmpwNEZ32,
5418                              aa->Iex.Unop.arg->Iex.Unop.arg
5419                                ->Iex.Unop.arg->Iex.Unop.arg);
5420       }
5421       break;
5422 
5423    default:
5424       break;
5425    }
5426    /* no reduction rule applies */
5427    return IRExpr_Unop( op, aa );
5428 }
5429 
atbSubst_Expr(ATmpInfo * env,IRExpr * e)5430 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5431 {
5432    IRExpr*  e2;
5433    IRExpr** args2;
5434    Int      i;
5435 
5436    switch (e->tag) {
5437 
5438       case Iex_CCall:
5439          args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5440          for (i = 0; args2[i]; i++)
5441             args2[i] = atbSubst_Expr(env,args2[i]);
5442          return IRExpr_CCall(
5443                    e->Iex.CCall.cee,
5444                    e->Iex.CCall.retty,
5445                    args2
5446                 );
5447       case Iex_RdTmp:
5448          e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5449          return e2 ? e2 : e;
5450       case Iex_ITE:
5451          return IRExpr_ITE(
5452                    atbSubst_Expr(env, e->Iex.ITE.cond),
5453                    atbSubst_Expr(env, e->Iex.ITE.iftrue),
5454                    atbSubst_Expr(env, e->Iex.ITE.iffalse)
5455                 );
5456       case Iex_Qop:
5457          return IRExpr_Qop(
5458                    e->Iex.Qop.details->op,
5459                    atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5460                    atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5461                    atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5462                    atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5463                 );
5464       case Iex_Triop:
5465          return IRExpr_Triop(
5466                    e->Iex.Triop.details->op,
5467                    atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5468                    atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5469                    atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5470                 );
5471       case Iex_Binop:
5472          return fold_IRExpr_Binop(
5473                    e->Iex.Binop.op,
5474                    atbSubst_Expr(env, e->Iex.Binop.arg1),
5475                    atbSubst_Expr(env, e->Iex.Binop.arg2)
5476                 );
5477       case Iex_Unop:
5478          return fold_IRExpr_Unop(
5479                    e->Iex.Unop.op,
5480                    atbSubst_Expr(env, e->Iex.Unop.arg)
5481                 );
5482       case Iex_Load:
5483          return IRExpr_Load(
5484                    e->Iex.Load.end,
5485                    e->Iex.Load.ty,
5486                    atbSubst_Expr(env, e->Iex.Load.addr)
5487                 );
5488       case Iex_GetI:
5489          return IRExpr_GetI(
5490                    e->Iex.GetI.descr,
5491                    atbSubst_Expr(env, e->Iex.GetI.ix),
5492                    e->Iex.GetI.bias
5493                 );
5494       case Iex_Const:
5495       case Iex_Get:
5496          return e;
5497       default:
5498          vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5499          vpanic("atbSubst_Expr");
5500    }
5501 }
5502 
5503 /* Same deal as atbSubst_Expr, except for stmts. */
5504 
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)5505 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5506 {
5507    Int     i;
5508    IRDirty *d, *d2;
5509    IRCAS   *cas, *cas2;
5510    IRPutI  *puti, *puti2;
5511 
5512    switch (st->tag) {
5513       case Ist_AbiHint:
5514          return IRStmt_AbiHint(
5515                    atbSubst_Expr(env, st->Ist.AbiHint.base),
5516                    st->Ist.AbiHint.len,
5517                    atbSubst_Expr(env, st->Ist.AbiHint.nia)
5518                 );
5519       case Ist_Store:
5520          return IRStmt_Store(
5521                    st->Ist.Store.end,
5522                    atbSubst_Expr(env, st->Ist.Store.addr),
5523                    atbSubst_Expr(env, st->Ist.Store.data)
5524                 );
5525       case Ist_StoreG: {
5526          IRStoreG* sg = st->Ist.StoreG.details;
5527          return IRStmt_StoreG(sg->end,
5528                               atbSubst_Expr(env, sg->addr),
5529                               atbSubst_Expr(env, sg->data),
5530                               atbSubst_Expr(env, sg->guard));
5531       }
5532       case Ist_LoadG: {
5533          IRLoadG* lg = st->Ist.LoadG.details;
5534          return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5535                              atbSubst_Expr(env, lg->addr),
5536                              atbSubst_Expr(env, lg->alt),
5537                              atbSubst_Expr(env, lg->guard));
5538       }
5539       case Ist_WrTmp:
5540          return IRStmt_WrTmp(
5541                    st->Ist.WrTmp.tmp,
5542                    atbSubst_Expr(env, st->Ist.WrTmp.data)
5543                 );
5544       case Ist_Put:
5545          return IRStmt_Put(
5546                    st->Ist.Put.offset,
5547                    atbSubst_Expr(env, st->Ist.Put.data)
5548                 );
5549       case Ist_PutI:
5550          puti  = st->Ist.PutI.details;
5551          puti2 = mkIRPutI(puti->descr,
5552                           atbSubst_Expr(env, puti->ix),
5553                           puti->bias,
5554                           atbSubst_Expr(env, puti->data));
5555          return IRStmt_PutI(puti2);
5556 
5557       case Ist_Exit:
5558          return IRStmt_Exit(
5559                    atbSubst_Expr(env, st->Ist.Exit.guard),
5560                    st->Ist.Exit.jk,
5561                    st->Ist.Exit.dst,
5562                    st->Ist.Exit.offsIP
5563                 );
5564       case Ist_IMark:
5565          return IRStmt_IMark(st->Ist.IMark.addr,
5566                              st->Ist.IMark.len,
5567                              st->Ist.IMark.delta);
5568       case Ist_NoOp:
5569          return IRStmt_NoOp();
5570       case Ist_MBE:
5571          return IRStmt_MBE(st->Ist.MBE.event);
5572       case Ist_CAS:
5573          cas  = st->Ist.CAS.details;
5574          cas2 = mkIRCAS(
5575                    cas->oldHi, cas->oldLo, cas->end,
5576                    atbSubst_Expr(env, cas->addr),
5577                    cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5578                    atbSubst_Expr(env, cas->expdLo),
5579                    cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5580                    atbSubst_Expr(env, cas->dataLo)
5581                 );
5582          return IRStmt_CAS(cas2);
5583       case Ist_LLSC:
5584          return IRStmt_LLSC(
5585                    st->Ist.LLSC.end,
5586                    st->Ist.LLSC.result,
5587                    atbSubst_Expr(env, st->Ist.LLSC.addr),
5588                    st->Ist.LLSC.storedata
5589                       ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5590                 );
5591       case Ist_Dirty:
5592          d  = st->Ist.Dirty.details;
5593          d2 = emptyIRDirty();
5594          *d2 = *d;
5595          if (d2->mFx != Ifx_None)
5596             d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5597          d2->guard = atbSubst_Expr(env, d2->guard);
5598          for (i = 0; d2->args[i]; i++) {
5599             IRExpr* arg = d2->args[i];
5600             if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5601                d2->args[i] = atbSubst_Expr(env, arg);
5602          }
5603          return IRStmt_Dirty(d2);
5604       default:
5605          vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5606          vpanic("atbSubst_Stmt");
5607    }
5608 }
5609 
5610 inline
dirty_helper_stores(const IRDirty * d)5611 static Bool dirty_helper_stores ( const IRDirty *d )
5612 {
5613    return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5614 }
5615 
5616 inline
dirty_helper_puts(const IRDirty * d,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5617 static Interval dirty_helper_puts (
5618                    const IRDirty *d,
5619                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5620                    VexRegisterUpdates pxControl,
5621                    /*OUT*/Bool *requiresPreciseMemExns
5622                 )
5623 {
5624    Int i;
5625    Interval interval;
5626 
5627    /* Passing the guest state pointer opens the door to modifying the
5628       guest state under the covers.  It's not allowed, but let's be
5629       extra conservative and assume the worst. */
5630    for (i = 0; d->args[i]; i++) {
5631       if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
5632          *requiresPreciseMemExns = True;
5633          /* Assume all guest state is written. */
5634          interval.present = True;
5635          interval.low  = 0;
5636          interval.high = 0x7FFFFFFF;
5637          return interval;
5638       }
5639    }
5640 
5641    /* Check the side effects on the guest state */
5642    interval.present = False;
5643    interval.low = interval.high = -1;
5644    *requiresPreciseMemExns = False;
5645 
5646    for (i = 0; i < d->nFxState; ++i) {
5647       if (d->fxState[i].fx != Ifx_Read) {
5648          Int offset = d->fxState[i].offset;
5649          Int size = d->fxState[i].size;
5650          Int nRepeats = d->fxState[i].nRepeats;
5651          Int repeatLen = d->fxState[i].repeatLen;
5652 
5653          if (preciseMemExnsFn(offset,
5654                               offset + nRepeats * repeatLen + size - 1,
5655                               pxControl)) {
5656             *requiresPreciseMemExns = True;
5657          }
5658          update_interval(&interval, offset,
5659                          offset + nRepeats * repeatLen + size - 1);
5660       }
5661    }
5662 
5663    return interval;
5664 }
5665 
5666 /* Return an interval if st modifies the guest state.  Via
5667    requiresPreciseMemExns return whether or not that modification
5668    requires precise exceptions. */
stmt_modifies_guest_state(IRSB * bb,const IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5669 static Interval stmt_modifies_guest_state (
5670                    IRSB *bb, const IRStmt *st,
5671                    Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5672                    VexRegisterUpdates pxControl,
5673                    /*OUT*/Bool *requiresPreciseMemExns
5674                 )
5675 {
5676    Interval interval;
5677 
5678    switch (st->tag) {
5679    case Ist_Put: {
5680       Int offset = st->Ist.Put.offset;
5681       Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5682 
5683       *requiresPreciseMemExns
5684          = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5685       interval.present = True;
5686       interval.low  = offset;
5687       interval.high = offset + size - 1;
5688       return interval;
5689    }
5690 
5691    case Ist_PutI: {
5692       IRRegArray *descr = st->Ist.PutI.details->descr;
5693       Int offset = descr->base;
5694       Int size = sizeofIRType(descr->elemTy);
5695 
5696       /* We quietly assume here that all segments are contiguous and there
5697          are no holes. This is to avoid a loop. The assumption is conservative
5698          in the sense that we might report that precise memory exceptions are
5699          needed when in fact they are not. */
5700       *requiresPreciseMemExns
5701          = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5702                             pxControl);
5703       interval.present = True;
5704       interval.low  = offset;
5705       interval.high = offset + descr->nElems * size - 1;
5706       return interval;
5707    }
5708 
5709    case Ist_Dirty:
5710       return dirty_helper_puts(st->Ist.Dirty.details,
5711                                preciseMemExnsFn, pxControl,
5712                                requiresPreciseMemExns);
5713 
5714    default:
5715       *requiresPreciseMemExns = False;
5716       interval.present = False;
5717       interval.low  = -1;
5718       interval.high = -1;
5719       return interval;
5720    }
5721 }
5722 
ado_treebuild_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)5723 /* notstatic */ Addr ado_treebuild_BB (
5724                         IRSB* bb,
5725                         Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5726                         VexRegisterUpdates pxControl
5727                      )
5728 {
5729    Int      i, j, k, m;
5730    Bool     stmtStores, invalidateMe;
5731    Interval putInterval;
5732    IRStmt*  st;
5733    IRStmt*  st2;
5734    ATmpInfo env[A_NENV];
5735 
5736    Bool   max_ga_known = False;
5737    Addr   max_ga       = 0;
5738 
5739    Int       n_tmps = bb->tyenv->types_used;
5740    UShort*   uses   = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5741 
5742    /* Phase 1.  Scan forwards in bb, counting use occurrences of each
5743       temp.  Also count occurrences in the bb->next field.  Take the
5744       opportunity to also find the maximum guest address in the block,
5745       since that will be needed later for deciding when we can safely
5746       elide event checks. */
5747 
5748    for (i = 0; i < n_tmps; i++)
5749       uses[i] = 0;
5750 
5751    for (i = 0; i < bb->stmts_used; i++) {
5752       st = bb->stmts[i];
5753       switch (st->tag) {
5754          case Ist_NoOp:
5755             continue;
5756          case Ist_IMark: {
5757             UInt len = st->Ist.IMark.len;
5758             Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5759             max_ga_known = True;
5760             if (mga > max_ga)
5761                max_ga = mga;
5762             break;
5763          }
5764          default:
5765             break;
5766       }
5767       aoccCount_Stmt( uses, st );
5768    }
5769    aoccCount_Expr(uses, bb->next );
5770 
5771 #  if 0
5772    for (i = 0; i < n_tmps; i++) {
5773       if (uses[i] == 0)
5774         continue;
5775       ppIRTemp( (IRTemp)i );
5776       vex_printf("  used %d\n", (Int)uses[i] );
5777    }
5778 #  endif
5779 
5780    /* Phase 2.  Scan forwards in bb.  For each statement in turn:
5781 
5782          If the env is full, emit the end element.  This guarantees
5783          there is at least one free slot in the following.
5784 
5785          On seeing 't = E', occ(t)==1,
5786             let E'=env(E)
5787             delete this stmt
5788             add t -> E' to the front of the env
5789             Examine E' and set the hints for E' appropriately
5790               (doesLoad? doesGet?)
5791 
5792          On seeing any other stmt,
5793             let stmt' = env(stmt)
5794             remove from env any 't=E' binds invalidated by stmt
5795                 emit the invalidated stmts
5796             emit stmt'
5797             compact any holes in env
5798               by sliding entries towards the front
5799 
5800       Finally, apply env to bb->next.
5801    */
5802 
5803    for (i = 0; i < A_NENV; i++) {
5804       env[i].bindee = NULL;
5805       env[i].binder = IRTemp_INVALID;
5806    }
5807 
5808    /* The stmts in bb are being reordered, and we are guaranteed to
5809       end up with no more than the number we started with.  Use i to
5810       be the cursor of the current stmt examined and j <= i to be that
5811       for the current stmt being written.
5812    */
5813    j = 0;
5814    for (i = 0; i < bb->stmts_used; i++) {
5815 
5816       st = bb->stmts[i];
5817       if (st->tag == Ist_NoOp)
5818          continue;
5819 
5820       /* Ensure there's at least one space in the env, by emitting
5821          the oldest binding if necessary. */
5822       if (env[A_NENV-1].bindee != NULL) {
5823          bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5824                                       env[A_NENV-1].bindee );
5825          j++;
5826          vassert(j <= i);
5827          env[A_NENV-1].bindee = NULL;
5828       }
5829 
5830       /* Consider current stmt. */
5831       if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5832          IRExpr *e, *e2;
5833 
5834          /* optional extra: dump dead bindings as we find them.
5835             Removes the need for a prior dead-code removal pass. */
5836          if (uses[st->Ist.WrTmp.tmp] == 0) {
5837 	    if (0) vex_printf("DEAD binding\n");
5838             continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5839          }
5840          vassert(uses[st->Ist.WrTmp.tmp] == 1);
5841 
5842          /* ok, we have 't = E', occ(t)==1.  Do the abovementioned
5843             actions. */
5844          e  = st->Ist.WrTmp.data;
5845          e2 = atbSubst_Expr(env, e);
5846          addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5847          setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5848          /* don't advance j, as we are deleting this stmt and instead
5849             holding it temporarily in the env. */
5850          continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5851       }
5852 
5853       /* we get here for any other kind of statement. */
5854       /* 'use up' any bindings required by the current statement. */
5855       st2 = atbSubst_Stmt(env, st);
5856 
5857       /* Now, before this stmt, dump any bindings in env that it
5858          invalidates.  These need to be dumped in the order in which
5859          they originally entered env -- that means from oldest to
5860          youngest. */
5861 
5862       /* putInterval/stmtStores characterise what the stmt under
5863          consideration does, or might do (sidely safe @ True). */
5864 
5865       Bool putRequiresPreciseMemExns;
5866       putInterval = stmt_modifies_guest_state(
5867                        bb, st, preciseMemExnsFn, pxControl,
5868                        &putRequiresPreciseMemExns
5869                     );
5870 
5871       /* be True if this stmt writes memory or might do (==> we don't
5872          want to reorder other loads or stores relative to it).  Also,
5873          both LL and SC fall under this classification, since we
5874          really ought to be conservative and not reorder any other
5875          memory transactions relative to them. */
5876       stmtStores
5877          = toBool( st->tag == Ist_Store
5878                    || (st->tag == Ist_Dirty
5879                        && dirty_helper_stores(st->Ist.Dirty.details))
5880                    || st->tag == Ist_LLSC
5881                    || st->tag == Ist_CAS );
5882 
5883       for (k = A_NENV-1; k >= 0; k--) {
5884          if (env[k].bindee == NULL)
5885             continue;
5886          /* Compare the actions of this stmt with the actions of
5887             binding 'k', to see if they invalidate the binding. */
5888          invalidateMe
5889             = toBool(
5890               /* a store invalidates loaded data */
5891               (env[k].doesLoad && stmtStores)
5892               /* a put invalidates get'd data, if they overlap */
5893               || ((env[k].getInterval.present && putInterval.present) &&
5894                   intervals_overlap(env[k].getInterval, putInterval))
5895               /* a put invalidates loaded data. That means, in essense, that
5896                  a load expression cannot be substituted into a statement
5897                  that follows the put. But there is nothing wrong doing so
5898                  except when the put statement requries precise exceptions.
5899                  Think of a load that is moved past a put where the put
5900                  updates the IP in the guest state. If the load generates
5901                  a segfault, the wrong address (line number) would be
5902                  reported. */
5903               || (env[k].doesLoad && putInterval.present &&
5904                   putRequiresPreciseMemExns)
5905               /* probably overly conservative: a memory bus event
5906                  invalidates absolutely everything, so that all
5907                  computation prior to it is forced to complete before
5908                  proceeding with the event (fence,lock,unlock). */
5909               || st->tag == Ist_MBE
5910               /* also be (probably overly) paranoid re AbiHints */
5911               || st->tag == Ist_AbiHint
5912               );
5913          if (invalidateMe) {
5914             bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5915             j++;
5916             vassert(j <= i);
5917             env[k].bindee = NULL;
5918          }
5919       }
5920 
5921       /* Slide in-use entries in env up to the front */
5922       m = 0;
5923       for (k = 0; k < A_NENV; k++) {
5924          if (env[k].bindee != NULL) {
5925             env[m] = env[k];
5926             m++;
5927 	 }
5928       }
5929       for (m = m; m < A_NENV; m++) {
5930          env[m].bindee = NULL;
5931       }
5932 
5933       /* finally, emit the substituted statement */
5934       bb->stmts[j] = st2;
5935       /* vex_printf("**2  "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5936       j++;
5937 
5938       vassert(j <= i+1);
5939    } /* for each stmt in the original bb ... */
5940 
5941    /* Finally ... substitute the ->next field as much as possible, and
5942       dump any left-over bindings.  Hmm.  Perhaps there should be no
5943       left over bindings?  Or any left-over bindings are
5944       by definition dead? */
5945    bb->next = atbSubst_Expr(env, bb->next);
5946    bb->stmts_used = j;
5947 
5948    return max_ga_known ? max_ga : ~(Addr)0;
5949 }
5950 
5951 
5952 /*---------------------------------------------------------------*/
5953 /*--- MSVC specific transformation hacks                      ---*/
5954 /*---------------------------------------------------------------*/
5955 
5956 /* The purpose of all this is to find MSVC's idiom for non-constant
5957    bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5958    gcc's idiom "(a & ~c) | (b & c)".  Motivation is that Memcheck has
5959    generates a lot of false positives from the MSVC version because it
5960    doesn't understand that XORing an undefined bit with itself gives a
5961    defined result.
5962 
5963    This isn't a problem for the simple case "x ^ x", because iropt
5964    folds it to a constant zero before Memcheck ever sees it.  But in
5965    this case we have an intervening "& c" which defeats the simple
5966    case.  So we have to carefully inspect all expressions rooted at an
5967    XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5968    7 other variants resulting from swapping the order of arguments to
5969    the three binary operations.  If we get a match, we then replace
5970    the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5971 
5972    The key difficulty is to spot the two uses of "a".  To normalise
5973    the IR to maximise the chances of success, we first do a CSE pass,
5974    with CSEing of loads enabled, since the two "a" expressions may be
5975    loads, which need to be commoned up.  Then we do a constant folding
5976    pass, so as to remove any tmp-to-tmp assignment chains that would
5977    make matching the original expression more difficult.
5978 */
5979 
5980 
5981 /* Helper function for debug printing */
5982 __attribute__((unused))
print_flat_expr(IRExpr ** env,IRExpr * e)5983 static void print_flat_expr ( IRExpr** env, IRExpr* e )
5984 {
5985    if (e == NULL) {
5986       vex_printf("?");
5987       return;
5988    }
5989    switch (e->tag) {
5990       case Iex_Binop: {
5991          ppIROp(e->Iex.Binop.op);
5992          vex_printf("(");
5993          print_flat_expr(env, e->Iex.Binop.arg1);
5994          vex_printf(",");
5995          print_flat_expr(env, e->Iex.Binop.arg2);
5996          vex_printf(")");
5997          break;
5998       }
5999       case Iex_Unop: {
6000          ppIROp(e->Iex.Unop.op);
6001          vex_printf("(");
6002          print_flat_expr(env, e->Iex.Unop.arg);
6003          vex_printf(")");
6004          break;
6005       }
6006       case Iex_RdTmp:
6007          ppIRTemp(e->Iex.RdTmp.tmp);
6008          vex_printf("=");
6009          print_flat_expr(env, chase(env, e));
6010          break;
6011       case Iex_Const:
6012       case Iex_CCall:
6013       case Iex_Load:
6014       case Iex_ITE:
6015       case Iex_Get:
6016          ppIRExpr(e);
6017          break;
6018       default:
6019          vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6020          vassert(0);
6021    }
6022 }
6023 
6024 /* Spot   a ^ ((a ^ b) & c)   for a,b and c tmp-or-const (atoms)
6025    or any of the other 7 variants generated by switching the order
6026    of arguments to the outer ^, the inner ^ and the &.
6027 */
spotBitfieldAssignment(IRExpr ** aa,IRExpr ** bb,IRExpr ** cc,IRExpr ** env,IRExpr * e,IROp opAND,IROp opXOR)6028 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6029                                      /*OUT*/IRExpr** cc,
6030                                      IRExpr** env, IRExpr* e,
6031                                      IROp opAND, IROp opXOR)
6032 {
6033 #  define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6034                               && (_e)->Iex.Binop.op == (_op))
6035 #  define ISATOM(_e)    isIRAtom(_e)
6036 #  define STEP(_e)      chase1(env, (_e))
6037 #  define LL(_e)        ((_e)->Iex.Binop.arg1)
6038 #  define RR(_e)        ((_e)->Iex.Binop.arg2)
6039 
6040    IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6041 
6042    /* This is common to all 8 cases */
6043    if (!ISBIN(e, opXOR)) goto fail;
6044 
6045    /*                        -----and------ */
6046    /*                         --xor---      */
6047    /* find variant 1:   a1 ^ ((a2 ^ b) & c) */
6048    /* find variant 2:   a1 ^ ((b ^ a2) & c) */
6049    a1 = and = xor = c = a2bL = a2bR = NULL;
6050 
6051    a1   = LL(e);
6052    and  = STEP(RR(e));
6053    if (!ISBIN(and, opAND)) goto v34;
6054    xor  = STEP(LL(and));
6055    c    = RR(and);
6056    if (!ISBIN(xor, opXOR)) goto v34;
6057    a2bL = LL(xor);
6058    a2bR = RR(xor);
6059 
6060    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6061       *aa = a1;
6062       *bb = a2bR;
6063       *cc = c;
6064       return 1;
6065    }
6066    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6067       *aa = a1;
6068       *bb = a2bL;
6069       *cc = c;
6070       return 2;
6071    }
6072 
6073   v34:
6074    /*                   -----and------      */
6075    /*                    --xor---           */
6076    /* find variant 3:   ((a2 ^ b) & c) ^ a1 */
6077    /* find variant 4:   ((b ^ a2) & c) ^ a1 */
6078    a1 = and = xor = c = a2bL = a2bR = NULL;
6079 
6080    a1   = RR(e);
6081    and  = STEP(LL(e));
6082    if (!ISBIN(and, opAND)) goto v56;
6083    xor  = STEP(LL(and));
6084    c    = RR(and);
6085    if (!ISBIN(xor, opXOR)) goto v56;
6086    a2bL = LL(xor);
6087    a2bR = RR(xor);
6088 
6089    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6090       *aa = a1;
6091       *bb = a2bR;
6092       *cc = c;
6093       return 3;
6094    }
6095    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6096       *aa = a1;
6097       *bb = a2bL;
6098       *cc = c;
6099       return 4;
6100    }
6101 
6102   v56:
6103    /*                        -----and------ */
6104    /*                         --xor---      */
6105    /* find variant 5:   a1 ^ (c & (a2 ^ b)) */
6106    /* find variant 6:   a1 ^ (c & (b ^ a2)) */
6107    a1 = and = xor = c = a2bL = a2bR = NULL;
6108 
6109    a1   = LL(e);
6110    and  = STEP(RR(e));
6111    if (!ISBIN(and, opAND)) goto v78;
6112    xor  = STEP(RR(and));
6113    c    = LL(and);
6114    if (!ISBIN(xor, opXOR)) goto v78;
6115    a2bL = LL(xor);
6116    a2bR = RR(xor);
6117 
6118    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6119       *aa = a1;
6120       *bb = a2bR;
6121       *cc = c;
6122       vassert(5-5); // ATC
6123       return 5;
6124    }
6125    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6126       *aa = a1;
6127       *bb = a2bL;
6128       *cc = c;
6129       vassert(6-6); // ATC
6130       return 6;
6131    }
6132 
6133  v78:
6134    /*                   -----and------      */
6135    /*                    --xor---           */
6136    /* find variant 7:   (c & (a2 ^ b)) ^ a1 */
6137    /* find variant 8:   (c & (b ^ a2)) ^ a1 */
6138    a1 = and = xor = c = a2bL = a2bR = NULL;
6139 
6140    a1   = RR(e);
6141    and  = STEP(LL(e));
6142    if (!ISBIN(and, opAND)) goto fail;
6143    xor  = STEP(RR(and));
6144    c    = LL(and);
6145    if (!ISBIN(xor, opXOR)) goto fail;
6146    a2bL = LL(xor);
6147    a2bR = RR(xor);
6148 
6149    if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6150       *aa = a1;
6151       *bb = a2bR;
6152       *cc = c;
6153       return 7;
6154    }
6155    if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6156       *aa = a1;
6157       *bb = a2bL;
6158       *cc = c;
6159       return 8;
6160    }
6161 
6162  fail:
6163    return 0;
6164 
6165 #  undef ISBIN
6166 #  undef ISATOM
6167 #  undef STEP
6168 #  undef LL
6169 #  undef RR
6170 }
6171 
6172 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6173    variants thereof generated by switching arguments around), return
6174    the IRExpr* for (a & ~c) | (b & c).  Else return NULL. */
do_XOR_TRANSFORMS_IRExpr(IRExpr ** env,IRExpr * e)6175 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6176 {
6177    if (e->tag != Iex_Binop)
6178       return NULL;
6179 
6180    const HChar* tyNm = NULL;
6181    IROp   opOR  = Iop_INVALID;
6182    IROp   opAND = Iop_INVALID;
6183    IROp   opNOT = Iop_INVALID;
6184    IROp   opXOR = Iop_INVALID;
6185    switch (e->Iex.Binop.op) {
6186       case Iop_Xor32:
6187          tyNm  = "I32";
6188          opOR  = Iop_Or32;  opAND = Iop_And32;
6189          opNOT = Iop_Not32; opXOR = Iop_Xor32;
6190          break;
6191       case Iop_Xor16:
6192          tyNm  = "I16";
6193          opOR  = Iop_Or16;  opAND = Iop_And16;
6194          opNOT = Iop_Not16; opXOR = Iop_Xor16;
6195          break;
6196       case Iop_Xor8:
6197          tyNm  = "I8";
6198          opOR  = Iop_Or8;  opAND = Iop_And8;
6199          opNOT = Iop_Not8; opXOR = Iop_Xor8;
6200          break;
6201       default:
6202          return NULL;
6203    }
6204 
6205    IRExpr* aa = NULL;
6206    IRExpr* bb = NULL;
6207    IRExpr* cc = NULL;
6208    UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6209    if (variant > 0) {
6210       static UInt ctr = 0;
6211       if (0)
6212          vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6213                     "type %s, variant %u\n",
6214                     ++ctr, tyNm, variant);
6215       /* It's vitally important that the returned aa, bb and cc are
6216          atoms -- either constants or tmps.  If it's anything else
6217          (eg, a GET) then incorporating them in a tree at this point
6218          in the SB may erroneously pull them forwards (eg of a PUT
6219          that originally was after the GET) and so transform the IR
6220          wrongly.  spotBitfieldAssignment should guarantee only to
6221          give us atoms, but we check here anyway. */
6222       vassert(aa && isIRAtom(aa));
6223       vassert(bb && isIRAtom(bb));
6224       vassert(cc && isIRAtom(cc));
6225       return IRExpr_Binop(
6226                 opOR,
6227                 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6228                 IRExpr_Binop(opAND, bb, cc)
6229              );
6230    }
6231    return NULL;
6232 }
6233 
6234 
6235 /* SB is modified in-place.  Visit all the IRExprs and, for those
6236    which are allowed to be non-atomic, perform the XOR transform if
6237    possible.  This makes |sb| be non-flat, but that's ok, the caller
6238    can re-flatten it.  Returns True iff any changes were made. */
do_XOR_TRANSFORM_IRSB(IRSB * sb)6239 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6240 {
6241    Int  i;
6242    Bool changed = False;
6243 
6244    /* Make the tmp->expr environment, so we can use it for
6245       chasing expressions. */
6246    Int      n_tmps = sb->tyenv->types_used;
6247    IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6248    for (i = 0; i < n_tmps; i++)
6249       env[i] = NULL;
6250 
6251    for (i = 0; i < sb->stmts_used; i++) {
6252       IRStmt* st = sb->stmts[i];
6253       if (st->tag != Ist_WrTmp)
6254          continue;
6255       IRTemp t = st->Ist.WrTmp.tmp;
6256       vassert(t >= 0 && t < n_tmps);
6257       env[t] = st->Ist.WrTmp.data;
6258    }
6259 
6260    for (i = 0; i < sb->stmts_used; i++) {
6261       IRStmt* st = sb->stmts[i];
6262 
6263       switch (st->tag) {
6264          case Ist_AbiHint:
6265             vassert(isIRAtom(st->Ist.AbiHint.base));
6266             vassert(isIRAtom(st->Ist.AbiHint.nia));
6267             break;
6268          case Ist_Put:
6269             vassert(isIRAtom(st->Ist.Put.data));
6270             break;
6271          case Ist_PutI: {
6272             IRPutI* puti = st->Ist.PutI.details;
6273             vassert(isIRAtom(puti->ix));
6274             vassert(isIRAtom(puti->data));
6275             break;
6276          }
6277          case Ist_WrTmp: {
6278             /* This is the one place where an expr (st->Ist.WrTmp.data) is
6279                allowed to be more than just a constant or a tmp. */
6280             IRExpr* mb_new_data
6281                = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6282             if (mb_new_data) {
6283                //ppIRSB(sb);
6284                st->Ist.WrTmp.data = mb_new_data;
6285                //ppIRSB(sb);
6286                changed = True;
6287             }
6288             break;
6289          }
6290          case Ist_Store:
6291             vassert(isIRAtom(st->Ist.Store.addr));
6292             vassert(isIRAtom(st->Ist.Store.data));
6293             break;
6294          case Ist_StoreG: {
6295             IRStoreG* sg = st->Ist.StoreG.details;
6296             vassert(isIRAtom(sg->addr));
6297             vassert(isIRAtom(sg->data));
6298             vassert(isIRAtom(sg->guard));
6299             break;
6300          }
6301          case Ist_LoadG: {
6302             IRLoadG* lg = st->Ist.LoadG.details;
6303             vassert(isIRAtom(lg->addr));
6304             vassert(isIRAtom(lg->alt));
6305             vassert(isIRAtom(lg->guard));
6306             break;
6307          }
6308          case Ist_CAS: {
6309             IRCAS* cas = st->Ist.CAS.details;
6310             vassert(isIRAtom(cas->addr));
6311             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6312             vassert(isIRAtom(cas->expdLo));
6313             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6314             vassert(isIRAtom(cas->dataLo));
6315             break;
6316          }
6317          case Ist_LLSC:
6318             vassert(isIRAtom(st->Ist.LLSC.addr));
6319             if (st->Ist.LLSC.storedata)
6320                vassert(isIRAtom(st->Ist.LLSC.storedata));
6321             break;
6322          case Ist_Dirty: {
6323             IRDirty* d = st->Ist.Dirty.details;
6324             if (d->mFx != Ifx_None) {
6325                vassert(isIRAtom(d->mAddr));
6326             }
6327             vassert(isIRAtom(d->guard));
6328             for (Int j = 0; d->args[j]; j++) {
6329                IRExpr* arg = d->args[j];
6330                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
6331                   vassert(isIRAtom(arg));
6332                }
6333             }
6334             break;
6335          }
6336          case Ist_IMark:
6337          case Ist_NoOp:
6338          case Ist_MBE:
6339             break;
6340          case Ist_Exit:
6341             vassert(isIRAtom(st->Ist.Exit.guard));
6342             break;
6343          default:
6344             vex_printf("\n"); ppIRStmt(st);
6345             vpanic("do_XOR_TRANSFORMS_IRSB");
6346       }
6347    }
6348 
6349    vassert(isIRAtom(sb->next));
6350    return changed;
6351 }
6352 
6353 
do_MSVC_HACKS(IRSB * sb)6354 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6355 {
6356    // Normalise as much as we can.  This is the one-and-only place
6357    // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6358    Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6359    if (any_cse_changes) {
6360       // CSEing might have created dead code.  Remove it.
6361       sb = cprop_BB ( sb );
6362       do_deadcode_BB(sb);
6363    }
6364 
6365    // Visit all atoms, do the transformation proper.  bb is modified
6366    // in-place.
6367    Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6368 
6369    if (changed) {
6370       // The transformation generates non-flat expressions, so we now
6371       // need to re-flatten the block.
6372       sb = flatten_BB(sb);
6373    }
6374 
6375    return sb;
6376 }
6377 
6378 
6379 /*---------------------------------------------------------------*/
6380 /*--- iropt main                                              ---*/
6381 /*---------------------------------------------------------------*/
6382 
6383 static Bool iropt_verbose = False; /* True; */
6384 
6385 
6386 /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
6387    redundant Put removal, constant propagation, dead code removal,
6388    clean helper specialisation, and dead code removal (again).
6389 */
6390 
6391 
6392 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)6393 IRSB* cheap_transformations (
6394          IRSB* bb,
6395          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6396          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6397          VexRegisterUpdates pxControl
6398       )
6399 {
6400    redundant_get_removal_BB ( bb );
6401    if (iropt_verbose) {
6402       vex_printf("\n========= REDUNDANT GET\n\n" );
6403       ppIRSB(bb);
6404    }
6405 
6406    if (pxControl < VexRegUpdAllregsAtEachInsn) {
6407       redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6408    }
6409    if (iropt_verbose) {
6410       vex_printf("\n========= REDUNDANT PUT\n\n" );
6411       ppIRSB(bb);
6412    }
6413 
6414    bb = cprop_BB ( bb );
6415    if (iropt_verbose) {
6416       vex_printf("\n========= CPROPD\n\n" );
6417       ppIRSB(bb);
6418    }
6419 
6420    do_deadcode_BB ( bb );
6421    if (iropt_verbose) {
6422       vex_printf("\n========= DEAD\n\n" );
6423       ppIRSB(bb);
6424    }
6425 
6426    bb = spec_helpers_BB ( bb, specHelper );
6427    do_deadcode_BB ( bb );
6428    if (iropt_verbose) {
6429       vex_printf("\n========= SPECd \n\n" );
6430       ppIRSB(bb);
6431    }
6432 
6433    return bb;
6434 }
6435 
6436 
6437 /* Do some more expensive transformations on bb, which are aimed at
6438    optimising as much as possible in the presence of GetI and PutI.  */
6439 
6440 static
expensive_transformations(IRSB * bb,VexRegisterUpdates pxControl)6441 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6442 {
6443    (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6444    collapse_AddSub_chains_BB( bb );
6445    do_redundant_GetI_elimination( bb );
6446    if (pxControl < VexRegUpdAllregsAtEachInsn) {
6447       do_redundant_PutI_elimination( bb, pxControl );
6448    }
6449    do_deadcode_BB( bb );
6450    return bb;
6451 }
6452 
6453 
6454 /* Scan a flattened BB to look for signs that more expensive
6455    optimisations might be useful:
6456    - find out if there are any GetIs and PutIs
6457    - find out if there are any floating or vector-typed temporaries
6458 */
6459 
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)6460 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6461                                  /*OUT*/Bool* hasVorFtemps,
6462                                  IRSB* bb )
6463 {
6464    Int      i, j;
6465    IRStmt*  st;
6466    IRDirty* d;
6467    IRCAS*   cas;
6468 
6469    *hasGetIorPutI = False;
6470    *hasVorFtemps  = False;
6471 
6472    for (i = 0; i < bb->stmts_used; i++) {
6473       st = bb->stmts[i];
6474       switch (st->tag) {
6475          case Ist_AbiHint:
6476             vassert(isIRAtom(st->Ist.AbiHint.base));
6477             vassert(isIRAtom(st->Ist.AbiHint.nia));
6478             break;
6479          case Ist_PutI:
6480             *hasGetIorPutI = True;
6481             break;
6482          case Ist_WrTmp:
6483             if (st->Ist.WrTmp.data->tag == Iex_GetI)
6484                *hasGetIorPutI = True;
6485             switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6486                case Ity_I1: case Ity_I8: case Ity_I16:
6487                case Ity_I32: case Ity_I64: case Ity_I128:
6488                   break;
6489                case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6490                case Ity_V128: case Ity_V256:
6491                   *hasVorFtemps = True;
6492                   break;
6493                case Ity_D32: case Ity_D64: case Ity_D128:
6494                   *hasVorFtemps = True;
6495                   break;
6496                default:
6497                   goto bad;
6498             }
6499             break;
6500          case Ist_Put:
6501             vassert(isIRAtom(st->Ist.Put.data));
6502             break;
6503          case Ist_Store:
6504             vassert(isIRAtom(st->Ist.Store.addr));
6505             vassert(isIRAtom(st->Ist.Store.data));
6506             break;
6507          case Ist_StoreG: {
6508             IRStoreG* sg = st->Ist.StoreG.details;
6509             vassert(isIRAtom(sg->addr));
6510             vassert(isIRAtom(sg->data));
6511             vassert(isIRAtom(sg->guard));
6512             break;
6513          }
6514          case Ist_LoadG: {
6515             IRLoadG* lg = st->Ist.LoadG.details;
6516             vassert(isIRAtom(lg->addr));
6517             vassert(isIRAtom(lg->alt));
6518             vassert(isIRAtom(lg->guard));
6519             break;
6520          }
6521          case Ist_CAS:
6522             cas = st->Ist.CAS.details;
6523             vassert(isIRAtom(cas->addr));
6524             vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6525             vassert(isIRAtom(cas->expdLo));
6526             vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6527             vassert(isIRAtom(cas->dataLo));
6528             break;
6529          case Ist_LLSC:
6530             vassert(isIRAtom(st->Ist.LLSC.addr));
6531             if (st->Ist.LLSC.storedata)
6532                vassert(isIRAtom(st->Ist.LLSC.storedata));
6533             break;
6534          case Ist_Dirty:
6535             d = st->Ist.Dirty.details;
6536             vassert(isIRAtom(d->guard));
6537             for (j = 0; d->args[j]; j++) {
6538                IRExpr* arg = d->args[j];
6539                if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
6540                   vassert(isIRAtom(arg));
6541             }
6542             if (d->mFx != Ifx_None)
6543                vassert(isIRAtom(d->mAddr));
6544             break;
6545          case Ist_NoOp:
6546          case Ist_IMark:
6547          case Ist_MBE:
6548             break;
6549          case Ist_Exit:
6550             vassert(isIRAtom(st->Ist.Exit.guard));
6551             break;
6552          default:
6553          bad:
6554             ppIRStmt(st);
6555             vpanic("considerExpensives");
6556       }
6557    }
6558 }
6559 
6560 
6561 /* ---------------- The main iropt entry point. ---------------- */
6562 
6563 /* exported from this file */
6564 /* Rules of the game:
6565 
6566    - IRExpr/IRStmt trees should be treated as immutable, as they
6567      may get shared.  So never change a field of such a tree node;
6568      instead construct and return a new one if needed.
6569 */
6570 
6571 
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Addr guest_addr,VexArch guest_arch)6572 IRSB* do_iropt_BB(
6573          IRSB* bb0,
6574          IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6575          Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6576          VexRegisterUpdates pxControl,
6577          Addr    guest_addr,
6578          VexArch guest_arch
6579       )
6580 {
6581    static Int n_total     = 0;
6582    static Int n_expensive = 0;
6583 
6584    Bool hasGetIorPutI, hasVorFtemps;
6585    IRSB *bb, *bb2;
6586 
6587    n_total++;
6588 
6589    /* First flatten the block out, since all other
6590       phases assume flat code. */
6591 
6592    bb = flatten_BB ( bb0 );
6593 
6594    if (iropt_verbose) {
6595       vex_printf("\n========= FLAT\n\n" );
6596       ppIRSB(bb);
6597    }
6598 
6599    /* If at level 0, stop now. */
6600    if (vex_control.iropt_level <= 0) return bb;
6601 
6602    /* Now do a preliminary cleanup pass, and figure out if we also
6603       need to do 'expensive' optimisations.  Expensive optimisations
6604       are deemed necessary if the block contains any GetIs or PutIs.
6605       If needed, do expensive transformations and then another cheap
6606       cleanup pass. */
6607 
6608    bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6609 
6610    if (guest_arch == VexArchARM) {
6611       /* Translating Thumb2 code produces a lot of chaff.  We have to
6612          work extra hard to get rid of it. */
6613       bb = cprop_BB(bb);
6614       bb = spec_helpers_BB ( bb, specHelper );
6615       if (pxControl < VexRegUpdAllregsAtEachInsn) {
6616          redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6617       }
6618       do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6619       do_deadcode_BB( bb );
6620    }
6621 
6622    if (vex_control.iropt_level > 1) {
6623 
6624       /* Peer at what we have, to decide how much more effort to throw
6625          at it. */
6626       considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6627 
6628       if (hasVorFtemps && !hasGetIorPutI) {
6629          /* If any evidence of FP or Vector activity, CSE, as that
6630             tends to mop up all manner of lardy code to do with
6631             rounding modes.  Don't bother if hasGetIorPutI since that
6632             case leads into the expensive transformations, which do
6633             CSE anyway. */
6634          (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6635          do_deadcode_BB( bb );
6636       }
6637 
6638       if (hasGetIorPutI) {
6639          Bool cses;
6640          n_expensive++;
6641          if (DEBUG_IROPT)
6642             vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6643          bb = expensive_transformations( bb, pxControl );
6644          bb = cheap_transformations( bb, specHelper,
6645                                      preciseMemExnsFn, pxControl );
6646          /* Potentially common up GetIs */
6647          cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6648          if (cses)
6649             bb = cheap_transformations( bb, specHelper,
6650                                         preciseMemExnsFn, pxControl );
6651       }
6652 
6653       ///////////////////////////////////////////////////////////
6654       // BEGIN MSVC optimised code transformation hacks
6655       if (0)
6656          bb = do_MSVC_HACKS(bb);
6657       // END   MSVC optimised code transformation hacks
6658       ///////////////////////////////////////////////////////////
6659 
6660       /* Now have a go at unrolling simple (single-BB) loops.  If
6661          successful, clean up the results as much as possible. */
6662 
6663       bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6664       if (bb2) {
6665          bb = cheap_transformations( bb2, specHelper,
6666                                      preciseMemExnsFn, pxControl );
6667          if (hasGetIorPutI) {
6668             bb = expensive_transformations( bb, pxControl );
6669             bb = cheap_transformations( bb, specHelper,
6670                                         preciseMemExnsFn, pxControl );
6671          } else {
6672             /* at least do CSE and dead code removal */
6673             do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6674             do_deadcode_BB( bb );
6675          }
6676          if (0) vex_printf("vex iropt: unrolled a loop\n");
6677       }
6678 
6679    }
6680 
6681    return bb;
6682 }
6683 
6684 
6685 /*---------------------------------------------------------------*/
6686 /*--- end                                            ir_opt.c ---*/
6687 /*---------------------------------------------------------------*/
6688