1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2004-2015 OpenWorks LLP
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
35 */
36
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
40
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "ir_opt.h"
44
45
46 /* Set to 1 for lots of debugging output. */
47 #define DEBUG_IROPT 0
48
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50 #define STATS_IROPT 0
51
52
53 /* What iropt does, 29 Dec 04.
54
55 It takes an IRSB and produces a new one with the same meaning,
56 defined thus:
57
58 After execution of the new BB, all guest state and guest memory is
59 the same as after execution of the original. This is true
60 regardless of how the block was exited (at the end vs side exit).
61
62 In addition, parts of the guest state will be identical to that
63 created by execution of the original at the following observation
64 points:
65
66 * In a dirty helper call, any parts of the guest state that the
67 helper states that it reads or modifies will be up to date.
68 Also, guest memory will be up to date. Parts of the guest state
69 not marked as being read or modified by the helper cannot be
70 assumed to be up-to-date at the point where the helper is called.
71
72 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73 The guest state is only up to date only as explained above
74 (i.e. at SB exits and as specified by dirty helper call).
75 Also, the stack pointer register is up to date at memory
76 exception points (as this is needed for the stack extension
77 logic in m_signals.c).
78
79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80 Immediately prior to any load or store, those parts of the guest
81 state marked as requiring precise exceptions will be up to date.
82 Also, guest memory will be up to date. Parts of the guest state
83 not marked as requiring precise exceptions cannot be assumed to
84 be up-to-date at the point of the load/store.
85
86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87 Same as minimal, but all the guest state is up to date at memory
88 exception points.
89
90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91 Guest state is up to date at each instruction.
92
93 The relative order of loads and stores (including loads/stores of
94 guest memory done by dirty helpers annotated as such) is not
95 changed. However, the relative order of loads with no intervening
96 stores/modifies may be changed.
97
98 Transformation order
99 ~~~~~~~~~~~~~~~~~~~~
100
101 There are three levels of optimisation, controlled by
102 vex_control.iropt_level. Define first:
103
104 "Cheap transformations" are the following sequence:
105 * Redundant-Get removal
106 * Redundant-Put removal
107 * Constant propagation/folding
108 * Dead code removal
109 * Specialisation of clean helper functions
110 * Dead code removal
111
112 "Expensive transformations" are the following sequence:
113 * CSE
114 * Folding of add/sub chains
115 * Redundant-GetI removal
116 * Redundant-PutI removal
117 * Dead code removal
118
119 Then the transformations are as follows, as defined by
120 vex_control.iropt_level:
121
122 Level 0:
123 * Flatten into atomic form.
124
125 Level 1: the following sequence:
126 * Flatten into atomic form.
127 * Cheap transformations.
128
129 Level 2: the following sequence
130 * Flatten into atomic form.
131 * Cheap transformations.
132 * If block contains any floating or vector types, CSE.
133 * If block contains GetI or PutI, Expensive transformations.
134 * Try unrolling loops. Three possible outcomes:
135 - No effect: do nothing more.
136 - Unrolled a loop, and block does not contain GetI or PutI:
137 Do: * CSE
138 * Dead code removal
139 - Unrolled a loop, and block contains GetI or PutI:
140 Do: * Expensive transformations
141 * Cheap transformations
142 */
143
144 /* Implementation notes, 29 Dec 04.
145
146 TODO (important): I think rPutI removal ignores precise exceptions
147 and is therefore in a sense, wrong. In the sense that PutIs are
148 assumed not to write parts of the guest state that we need to have
149 up-to-date at loads/stores. So far on x86 guest that has not
150 mattered since indeed only the x87 FP registers and tags are
151 accessed using GetI/PutI, and there is no need so far for them to
152 be up to date at mem exception points. The rPutI pass should be
153 fixed.
154
155 TODO: improve pessimistic handling of precise exceptions
156 in the tree builder.
157
158 TODO: check interaction of rGetI and dirty helpers.
159
160 F64i constants are treated differently from other constants.
161 They are not regarded as atoms, and instead lifted off and
162 bound to temps. This allows them to participate in CSE, which
163 is important for getting good performance for x86 guest code.
164
165 CSE up F64 literals (already doing F64is)
166
167 CSE: consider carefully the requirement for precise exns
168 prior to making CSE any more aggressive. */
169
170
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort ---*/
173 /*---------------------------------------------------------------*/
174
175 /* General map from HWord-sized thing HWord-sized thing. Could be by
176 hashing, but it's not clear whether or not this would really be any
177 faster. */
178
179 typedef
180 struct {
181 Bool* inuse;
182 HWord* key;
183 HWord* val;
184 Int size;
185 Int used;
186 }
187 HashHW;
188
newHHW(void)189 static HashHW* newHHW ( void )
190 {
191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192 h->size = 8;
193 h->used = 0;
194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197 return h;
198 }
199
200
201 /* Look up key in the map. */
202
lookupHHW(HashHW * h,HWord * val,HWord key)203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204 {
205 Int i;
206 /* vex_printf("lookupHHW(%llx)\n", key ); */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
209 if (val)
210 *val = h->val[i];
211 return True;
212 }
213 }
214 return False;
215 }
216
217
218 /* Add key->val to the map. Replaces any existing binding for key. */
219
addToHHW(HashHW * h,HWord key,HWord val)220 static void addToHHW ( HashHW* h, HWord key, HWord val )
221 {
222 Int i, j;
223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225 /* Find and replace existing binding, if any. */
226 for (i = 0; i < h->used; i++) {
227 if (h->inuse[i] && h->key[i] == key) {
228 h->val[i] = val;
229 return;
230 }
231 }
232
233 /* Ensure a space is available. */
234 if (h->used == h->size) {
235 /* Copy into arrays twice the size. */
236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239 for (i = j = 0; i < h->size; i++) {
240 if (!h->inuse[i]) continue;
241 inuse2[j] = True;
242 key2[j] = h->key[i];
243 val2[j] = h->val[i];
244 j++;
245 }
246 h->used = j;
247 h->size *= 2;
248 h->inuse = inuse2;
249 h->key = key2;
250 h->val = val2;
251 }
252
253 /* Finally, add it. */
254 vassert(h->used < h->size);
255 h->inuse[h->used] = True;
256 h->key[h->used] = key;
257 h->val[h->used] = val;
258 h->used++;
259 }
260
261
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form ---*/
264 /*---------------------------------------------------------------*/
265
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267 copies made by flattening. If in doubt return False. */
268
isFlat(IRExpr * e)269 static Bool isFlat ( IRExpr* e )
270 {
271 if (e->tag == Iex_Get)
272 return True;
273 if (e->tag == Iex_Binop)
274 return toBool( isIRAtom(e->Iex.Binop.arg1)
275 && isIRAtom(e->Iex.Binop.arg2) );
276 if (e->tag == Iex_Load)
277 return isIRAtom(e->Iex.Load.addr);
278 return False;
279 }
280
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282 the same value, after having appended extra IRTemp assignments to
283 the end of 'bb'. */
284
flatten_Expr(IRSB * bb,IRExpr * ex)285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286 {
287 Int i;
288 IRExpr** newargs;
289 IRType ty = typeOfIRExpr(bb->tyenv, ex);
290 IRTemp t1;
291
292 switch (ex->tag) {
293
294 case Iex_GetI:
295 t1 = newIRTemp(bb->tyenv, ty);
296 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297 IRExpr_GetI(ex->Iex.GetI.descr,
298 flatten_Expr(bb, ex->Iex.GetI.ix),
299 ex->Iex.GetI.bias)));
300 return IRExpr_RdTmp(t1);
301
302 case Iex_Get:
303 t1 = newIRTemp(bb->tyenv, ty);
304 addStmtToIRSB(bb,
305 IRStmt_WrTmp(t1, ex));
306 return IRExpr_RdTmp(t1);
307
308 case Iex_Qop: {
309 IRQop* qop = ex->Iex.Qop.details;
310 t1 = newIRTemp(bb->tyenv, ty);
311 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312 IRExpr_Qop(qop->op,
313 flatten_Expr(bb, qop->arg1),
314 flatten_Expr(bb, qop->arg2),
315 flatten_Expr(bb, qop->arg3),
316 flatten_Expr(bb, qop->arg4))));
317 return IRExpr_RdTmp(t1);
318 }
319
320 case Iex_Triop: {
321 IRTriop* triop = ex->Iex.Triop.details;
322 t1 = newIRTemp(bb->tyenv, ty);
323 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324 IRExpr_Triop(triop->op,
325 flatten_Expr(bb, triop->arg1),
326 flatten_Expr(bb, triop->arg2),
327 flatten_Expr(bb, triop->arg3))));
328 return IRExpr_RdTmp(t1);
329 }
330
331 case Iex_Binop:
332 t1 = newIRTemp(bb->tyenv, ty);
333 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334 IRExpr_Binop(ex->Iex.Binop.op,
335 flatten_Expr(bb, ex->Iex.Binop.arg1),
336 flatten_Expr(bb, ex->Iex.Binop.arg2))));
337 return IRExpr_RdTmp(t1);
338
339 case Iex_Unop:
340 t1 = newIRTemp(bb->tyenv, ty);
341 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342 IRExpr_Unop(ex->Iex.Unop.op,
343 flatten_Expr(bb, ex->Iex.Unop.arg))));
344 return IRExpr_RdTmp(t1);
345
346 case Iex_Load:
347 t1 = newIRTemp(bb->tyenv, ty);
348 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349 IRExpr_Load(ex->Iex.Load.end,
350 ex->Iex.Load.ty,
351 flatten_Expr(bb, ex->Iex.Load.addr))));
352 return IRExpr_RdTmp(t1);
353
354 case Iex_CCall:
355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356 for (i = 0; newargs[i]; i++)
357 newargs[i] = flatten_Expr(bb, newargs[i]);
358 t1 = newIRTemp(bb->tyenv, ty);
359 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360 IRExpr_CCall(ex->Iex.CCall.cee,
361 ex->Iex.CCall.retty,
362 newargs)));
363 return IRExpr_RdTmp(t1);
364
365 case Iex_ITE:
366 t1 = newIRTemp(bb->tyenv, ty);
367 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369 flatten_Expr(bb, ex->Iex.ITE.iftrue),
370 flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371 return IRExpr_RdTmp(t1);
372
373 case Iex_Const:
374 /* Lift F64i constants out onto temps so they can be CSEd
375 later. */
376 if (ex->Iex.Const.con->tag == Ico_F64i) {
377 t1 = newIRTemp(bb->tyenv, ty);
378 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379 IRExpr_Const(ex->Iex.Const.con)));
380 return IRExpr_RdTmp(t1);
381 } else {
382 /* Leave all other constants alone. */
383 return ex;
384 }
385
386 case Iex_RdTmp:
387 return ex;
388
389 default:
390 vex_printf("\n");
391 ppIRExpr(ex);
392 vex_printf("\n");
393 vpanic("flatten_Expr");
394 }
395 }
396
397
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
399
flatten_Stmt(IRSB * bb,IRStmt * st)400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401 {
402 Int i;
403 IRExpr *e1, *e2, *e3, *e4, *e5;
404 IRDirty *d, *d2;
405 IRCAS *cas, *cas2;
406 IRPutI *puti, *puti2;
407 IRLoadG *lg;
408 IRStoreG *sg;
409 switch (st->tag) {
410 case Ist_Put:
411 if (isIRAtom(st->Ist.Put.data)) {
412 /* optimisation to reduce the amount of heap wasted
413 by the flattener */
414 addStmtToIRSB(bb, st);
415 } else {
416 /* general case, always correct */
417 e1 = flatten_Expr(bb, st->Ist.Put.data);
418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419 }
420 break;
421 case Ist_PutI:
422 puti = st->Ist.PutI.details;
423 e1 = flatten_Expr(bb, puti->ix);
424 e2 = flatten_Expr(bb, puti->data);
425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426 addStmtToIRSB(bb, IRStmt_PutI(puti2));
427 break;
428 case Ist_WrTmp:
429 if (isFlat(st->Ist.WrTmp.data)) {
430 /* optimisation, to reduce the number of tmp-tmp
431 copies generated */
432 addStmtToIRSB(bb, st);
433 } else {
434 /* general case, always correct */
435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437 }
438 break;
439 case Ist_Store:
440 e1 = flatten_Expr(bb, st->Ist.Store.addr);
441 e2 = flatten_Expr(bb, st->Ist.Store.data);
442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443 break;
444 case Ist_StoreG:
445 sg = st->Ist.StoreG.details;
446 e1 = flatten_Expr(bb, sg->addr);
447 e2 = flatten_Expr(bb, sg->data);
448 e3 = flatten_Expr(bb, sg->guard);
449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450 break;
451 case Ist_LoadG:
452 lg = st->Ist.LoadG.details;
453 e1 = flatten_Expr(bb, lg->addr);
454 e2 = flatten_Expr(bb, lg->alt);
455 e3 = flatten_Expr(bb, lg->guard);
456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457 e1, e2, e3));
458 break;
459 case Ist_CAS:
460 cas = st->Ist.CAS.details;
461 e1 = flatten_Expr(bb, cas->addr);
462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463 e3 = flatten_Expr(bb, cas->expdLo);
464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465 e5 = flatten_Expr(bb, cas->dataLo);
466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467 e1, e2, e3, e4, e5 );
468 addStmtToIRSB(bb, IRStmt_CAS(cas2));
469 break;
470 case Ist_LLSC:
471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472 e2 = st->Ist.LLSC.storedata
473 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474 : NULL;
475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476 st->Ist.LLSC.result, e1, e2));
477 break;
478 case Ist_Dirty:
479 d = st->Ist.Dirty.details;
480 d2 = emptyIRDirty();
481 *d2 = *d;
482 d2->args = shallowCopyIRExprVec(d2->args);
483 if (d2->mFx != Ifx_None) {
484 d2->mAddr = flatten_Expr(bb, d2->mAddr);
485 } else {
486 vassert(d2->mAddr == NULL);
487 }
488 d2->guard = flatten_Expr(bb, d2->guard);
489 for (i = 0; d2->args[i]; i++) {
490 IRExpr* arg = d2->args[i];
491 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
492 d2->args[i] = flatten_Expr(bb, arg);
493 }
494 addStmtToIRSB(bb, IRStmt_Dirty(d2));
495 break;
496 case Ist_NoOp:
497 case Ist_MBE:
498 case Ist_IMark:
499 addStmtToIRSB(bb, st);
500 break;
501 case Ist_AbiHint:
502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505 break;
506 case Ist_Exit:
507 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509 st->Ist.Exit.dst,
510 st->Ist.Exit.offsIP));
511 break;
512 default:
513 vex_printf("\n");
514 ppIRStmt(st);
515 vex_printf("\n");
516 vpanic("flatten_Stmt");
517 }
518 }
519
520
flatten_BB(IRSB * in)521 static IRSB* flatten_BB ( IRSB* in )
522 {
523 Int i;
524 IRSB* out;
525 out = emptyIRSB();
526 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527 for (i = 0; i < in->stmts_used; i++)
528 if (in->stmts[i])
529 flatten_Stmt( out, in->stmts[i] );
530 out->next = flatten_Expr( out, in->next );
531 out->jumpkind = in->jumpkind;
532 out->offsIP = in->offsIP;
533 return out;
534 }
535
536
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs ---*/
539 /*---------------------------------------------------------------*/
540
541 /* Scan forwards, building up an environment binding (min offset, max
542 offset) pairs to values, which will either be temps or constants.
543
544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545 env and if it matches, replace the Get with the stored value. If
546 there is no match, add a (minoff,maxoff) :-> t binding.
547
548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549 any binding which fully or partially overlaps with (minoff,maxoff).
550 Then add a new (minoff,maxoff) :-> t or c binding. */
551
552 /* Extract the min/max offsets from a guest state array descriptor. */
553
554 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)555 static void getArrayBounds ( IRRegArray* descr,
556 UInt* minoff, UInt* maxoff )
557 {
558 *minoff = descr->base;
559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560 vassert((*minoff & ~0xFFFF) == 0);
561 vassert((*maxoff & ~0xFFFF) == 0);
562 vassert(*minoff <= *maxoff);
563 }
564
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
mk_key_GetPut(Int offset,IRType ty)567 static UInt mk_key_GetPut ( Int offset, IRType ty )
568 {
569 /* offset should fit in 16 bits. */
570 UInt minoff = offset;
571 UInt maxoff = minoff + sizeofIRType(ty) - 1;
572 vassert((minoff & ~0xFFFF) == 0);
573 vassert((maxoff & ~0xFFFF) == 0);
574 return (minoff << 16) | maxoff;
575 }
576
mk_key_GetIPutI(IRRegArray * descr)577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
578 {
579 UInt minoff, maxoff;
580 getArrayBounds( descr, &minoff, &maxoff );
581 vassert((minoff & ~0xFFFF) == 0);
582 vassert((maxoff & ~0xFFFF) == 0);
583 return (minoff << 16) | maxoff;
584 }
585
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588 .. k_hi).
589 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591 {
592 Int j;
593 UInt e_lo, e_hi;
594 vassert(k_lo <= k_hi);
595 /* invalidate any env entries which in any way overlap (k_lo
596 .. k_hi) */
597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599 for (j = 0; j < h->used; j++) {
600 if (!h->inuse[j])
601 continue;
602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603 e_hi = ((UInt)h->key[j]) & 0xFFFF;
604 vassert(e_lo <= e_hi);
605 if (e_hi < k_lo || k_hi < e_lo)
606 continue; /* no overlap possible */
607 else
608 /* overlap; invalidate */
609 h->inuse[j] = False;
610 }
611 }
612
613
redundant_get_removal_BB(IRSB * bb)614 static void redundant_get_removal_BB ( IRSB* bb )
615 {
616 HashHW* env = newHHW();
617 UInt key = 0; /* keep gcc -O happy */
618 Int i, j;
619 HWord val;
620
621 for (i = 0; i < bb->stmts_used; i++) {
622 IRStmt* st = bb->stmts[i];
623
624 if (st->tag == Ist_NoOp)
625 continue;
626
627 /* Deal with Gets */
628 if (st->tag == Ist_WrTmp
629 && st->Ist.WrTmp.data->tag == Iex_Get) {
630 /* st is 't = Get(...)'. Look up in the environment and see
631 if the Get can be replaced. */
632 IRExpr* get = st->Ist.WrTmp.data;
633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634 get->Iex.Get.ty );
635 if (lookupHHW(env, &val, (HWord)key)) {
636 /* found it */
637 /* Note, we could do better here. If the types are
638 different we don't do the substitution, since doing so
639 could lead to invalidly-typed IR. An improvement would
640 be to stick in a reinterpret-style cast, although that
641 would make maintaining flatness more difficult. */
642 IRExpr* valE = (IRExpr*)val;
643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644 == st->Ist.WrTmp.data->Iex.Get.ty );
645 if (typesOK && DEBUG_IROPT) {
646 vex_printf("rGET: "); ppIRExpr(get);
647 vex_printf(" -> "); ppIRExpr(valE);
648 vex_printf("\n");
649 }
650 if (typesOK)
651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652 } else {
653 /* Not found, but at least we know that t and the Get(...)
654 are now associated. So add a binding to reflect that
655 fact. */
656 addToHHW( env, (HWord)key,
657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658 }
659 }
660
661 /* Deal with Puts: invalidate any env entries overlapped by this
662 Put */
663 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664 UInt k_lo, k_hi;
665 if (st->tag == Ist_Put) {
666 key = mk_key_GetPut( st->Ist.Put.offset,
667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668 } else {
669 vassert(st->tag == Ist_PutI);
670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671 }
672
673 k_lo = (key >> 16) & 0xFFFF;
674 k_hi = key & 0xFFFF;
675 invalidateOverlaps(env, k_lo, k_hi);
676 }
677 else
678 if (st->tag == Ist_Dirty) {
679 /* Deal with dirty helpers which write or modify guest state.
680 Invalidate the entire env. We could do a lot better
681 here. */
682 IRDirty* d = st->Ist.Dirty.details;
683 Bool writes = False;
684 for (j = 0; j < d->nFxState; j++) {
685 if (d->fxState[j].fx == Ifx_Modify
686 || d->fxState[j].fx == Ifx_Write)
687 writes = True;
688 }
689 if (writes) {
690 /* dump the entire env (not clever, but correct ...) */
691 for (j = 0; j < env->used; j++)
692 env->inuse[j] = False;
693 if (0) vex_printf("rGET: trash env due to dirty helper\n");
694 }
695 }
696
697 /* add this one to the env, if appropriate */
698 if (st->tag == Ist_Put) {
699 vassert(isIRAtom(st->Ist.Put.data));
700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701 }
702
703 } /* for (i = 0; i < bb->stmts_used; i++) */
704
705 }
706
707
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs ---*/
710 /*---------------------------------------------------------------*/
711
712 /* Find any Get uses in st and invalidate any partially or fully
713 overlapping ranges listed in env. Due to the flattening phase, the
714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)716 static void handle_gets_Stmt (
717 HashHW* env,
718 IRStmt* st,
719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720 VexRegisterUpdates pxControl
721 )
722 {
723 Int j;
724 UInt key = 0; /* keep gcc -O happy */
725 Bool isGet;
726 Bool memRW = False;
727 IRExpr* e;
728
729 switch (st->tag) {
730
731 /* This is the only interesting case. Deal with Gets in the RHS
732 expression. */
733 case Ist_WrTmp:
734 e = st->Ist.WrTmp.data;
735 switch (e->tag) {
736 case Iex_Get:
737 isGet = True;
738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739 break;
740 case Iex_GetI:
741 isGet = True;
742 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743 break;
744 case Iex_Load:
745 isGet = False;
746 memRW = True;
747 break;
748 default:
749 isGet = False;
750 }
751 if (isGet) {
752 UInt k_lo, k_hi;
753 k_lo = (key >> 16) & 0xFFFF;
754 k_hi = key & 0xFFFF;
755 invalidateOverlaps(env, k_lo, k_hi);
756 }
757 break;
758
759 /* Be very conservative for dirty helper calls; dump the entire
760 environment. The helper might read guest state, in which
761 case it needs to be flushed first. Also, the helper might
762 access guest memory, in which case all parts of the guest
763 state requiring precise exceptions needs to be flushed. The
764 crude solution is just to flush everything; we could easily
765 enough do a lot better if needed. */
766 /* Probably also overly-conservative, but also dump everything
767 if we hit a memory bus event (fence, lock, unlock). Ditto
768 AbiHints, CASs, LLs and SCs. */
769 case Ist_AbiHint:
770 vassert(isIRAtom(st->Ist.AbiHint.base));
771 vassert(isIRAtom(st->Ist.AbiHint.nia));
772 /* fall through */
773 case Ist_MBE:
774 case Ist_Dirty:
775 case Ist_CAS:
776 case Ist_LLSC:
777 for (j = 0; j < env->used; j++)
778 env->inuse[j] = False;
779 break;
780
781 /* all other cases are boring. */
782 case Ist_Store:
783 vassert(isIRAtom(st->Ist.Store.addr));
784 vassert(isIRAtom(st->Ist.Store.data));
785 memRW = True;
786 break;
787 case Ist_StoreG: {
788 IRStoreG* sg = st->Ist.StoreG.details;
789 vassert(isIRAtom(sg->addr));
790 vassert(isIRAtom(sg->data));
791 vassert(isIRAtom(sg->guard));
792 memRW = True;
793 break;
794 }
795 case Ist_LoadG: {
796 IRLoadG* lg = st->Ist.LoadG.details;
797 vassert(isIRAtom(lg->addr));
798 vassert(isIRAtom(lg->alt));
799 vassert(isIRAtom(lg->guard));
800 memRW = True;
801 break;
802 }
803 case Ist_Exit:
804 vassert(isIRAtom(st->Ist.Exit.guard));
805 break;
806
807 case Ist_Put:
808 vassert(isIRAtom(st->Ist.Put.data));
809 break;
810
811 case Ist_PutI:
812 vassert(isIRAtom(st->Ist.PutI.details->ix));
813 vassert(isIRAtom(st->Ist.PutI.details->data));
814 break;
815
816 case Ist_NoOp:
817 case Ist_IMark:
818 break;
819
820 default:
821 vex_printf("\n");
822 ppIRStmt(st);
823 vex_printf("\n");
824 vpanic("handle_gets_Stmt");
825 }
826
827 if (memRW) {
828 /* This statement accesses memory. So we might need to dump all parts
829 of the environment corresponding to guest state that may not
830 be reordered with respect to memory references. That means
831 at least the stack pointer. */
832 switch (pxControl) {
833 case VexRegUpdAllregsAtMemAccess:
834 /* Precise exceptions required at mem access.
835 Flush all guest state. */
836 for (j = 0; j < env->used; j++)
837 env->inuse[j] = False;
838 break;
839 case VexRegUpdSpAtMemAccess:
840 /* We need to dump the stack pointer
841 (needed for stack extension in m_signals.c).
842 preciseMemExnsFn will use vex_control.iropt_register_updates
843 to verify only the sp is to be checked. */
844 /* fallthrough */
845 case VexRegUpdUnwindregsAtMemAccess:
846 for (j = 0; j < env->used; j++) {
847 if (!env->inuse[j])
848 continue;
849 /* Just flush the minimal amount required, as computed by
850 preciseMemExnsFn. */
851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852 HWord k_hi = env->key[j] & 0xFFFF;
853 if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854 env->inuse[j] = False;
855 }
856 break;
857 case VexRegUpdAllregsAtEachInsn:
858 // VexRegUpdAllregsAtEachInsn cannot happen here.
859 // fall through
860 case VexRegUpd_INVALID:
861 default:
862 vassert(0);
863 }
864 } /* if (memRW) */
865
866 }
867
868
869 /* Scan backwards, building up a set of (min offset, max
870 offset) pairs, indicating those parts of the guest state
871 for which the next event is a write.
872
873 On seeing a conditional exit, empty the set.
874
875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876 completely within the set, remove the Put. Otherwise, add
877 (minoff,maxoff) to the set.
878
879 On seeing 'Get (minoff,maxoff)', remove any part of the set
880 overlapping (minoff,maxoff). The same has to happen for any events
881 which implicitly read parts of the guest state: dirty helper calls
882 and loads/stores.
883 */
884
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)885 static void redundant_put_removal_BB (
886 IRSB* bb,
887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888 VexRegisterUpdates pxControl
889 )
890 {
891 Int i, j;
892 Bool isPut;
893 IRStmt* st;
894 UInt key = 0; /* keep gcc -O happy */
895
896 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897
898 HashHW* env = newHHW();
899
900 /* Initialise the running env with the fact that the final exit
901 writes the IP (or, whatever it claims to write. We don't
902 care.) */
903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904 addToHHW(env, (HWord)key, 0);
905
906 /* And now scan backwards through the statements. */
907 for (i = bb->stmts_used-1; i >= 0; i--) {
908 st = bb->stmts[i];
909
910 if (st->tag == Ist_NoOp)
911 continue;
912
913 /* Deal with conditional exits. */
914 if (st->tag == Ist_Exit) {
915 //Bool re_add;
916 /* Need to throw out from the env, any part of it which
917 doesn't overlap with the guest state written by this exit.
918 Since the exit only writes one section, it's simplest to
919 do this: (1) check whether env contains a write that
920 completely overlaps the write done by this exit; (2) empty
921 out env; and (3) if (1) was true, add the write done by
922 this exit.
923
924 To make (1) a bit simpler, merely search for a write that
925 exactly matches the one done by this exit. That's safe
926 because it will fail as often or more often than a full
927 overlap check, and failure to find an overlapping write in
928 env is the safe case (we just nuke env if that
929 happens). */
930 //vassert(isIRAtom(st->Ist.Exit.guard));
931 /* (1) */
932 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933 // typeOfIRConst(st->Ist.Exit.dst));
934 //re_add = lookupHHW(env, NULL, key);
935 /* (2) */
936 for (j = 0; j < env->used; j++)
937 env->inuse[j] = False;
938 /* (3) */
939 //if (0 && re_add)
940 // addToHHW(env, (HWord)key, 0);
941 continue;
942 }
943
944 /* Deal with Puts */
945 switch (st->tag) {
946 case Ist_Put:
947 isPut = True;
948 key = mk_key_GetPut( st->Ist.Put.offset,
949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950 vassert(isIRAtom(st->Ist.Put.data));
951 break;
952 case Ist_PutI:
953 isPut = True;
954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955 vassert(isIRAtom(st->Ist.PutI.details->ix));
956 vassert(isIRAtom(st->Ist.PutI.details->data));
957 break;
958 default:
959 isPut = False;
960 }
961 if (isPut && st->tag != Ist_PutI) {
962 /* See if any single entry in env overlaps this Put. This is
963 simplistic in that the transformation is valid if, say, two
964 or more entries in the env overlap this Put, but the use of
965 lookupHHW will only find a single entry which exactly
966 overlaps this Put. This is suboptimal but safe. */
967 if (lookupHHW(env, NULL, (HWord)key)) {
968 /* This Put is redundant because a later one will overwrite
969 it. So NULL (nop) it out. */
970 if (DEBUG_IROPT) {
971 vex_printf("rPUT: "); ppIRStmt(st);
972 vex_printf("\n");
973 }
974 bb->stmts[i] = IRStmt_NoOp();
975 } else {
976 /* We can't demonstrate that this Put is redundant, so add it
977 to the running collection. */
978 addToHHW(env, (HWord)key, 0);
979 }
980 continue;
981 }
982
983 /* Deal with Gets. These remove bits of the environment since
984 appearance of a Get means that the next event for that slice
985 of the guest state is no longer a write, but a read. Also
986 deals with implicit reads of guest state needed to maintain
987 precise exceptions. */
988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989 }
990 }
991
992
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding ---*/
995 /*---------------------------------------------------------------*/
996
997 #if STATS_IROPT
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count;
1004 /* How often recursing through assignments to IRTemps helped
1005 establishing equality. */
1006 static UInt recursion_success_count;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008 establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011 IRTemp assignment */
1012 static Bool recursed;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited;
1015 #endif /* STATS_IROPT */
1016
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited;
1019
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021 This is to guard against performance degradation by visiting large
1022 trees without success. */
1023 #define NODE_LIMIT 30
1024
1025
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027 that is, an array indexed by IRTemp. */
1028
1029 /* Do both expressions compute the same value? The answer is generally
1030 conservative, i.e. it will report that the expressions do not compute
1031 the same value when in fact they do. The reason is that we do not
1032 keep track of changes in the guest state and memory. Thusly, two
1033 Get's, GetI's or Load's, even when accessing the same location, will be
1034 assumed to compute different values. After all the accesses may happen
1035 at different times and the guest state / memory can have changed in
1036 the meantime.
1037
1038 XXX IMPORTANT XXX the two expressions must have the same IR type.
1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042 wrapper that deals with the common tags-don't-match case, and a
1043 slower out of line general case. Saves a few insns. */
1044
1045 __attribute__((noinline))
1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047
1048 inline
sameIRExprs_aux(IRExpr ** env,IRExpr * e1,IRExpr * e2)1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050 {
1051 if (e1->tag != e2->tag) return False;
1052 return sameIRExprs_aux2(env, e1, e2);
1053 }
1054
1055 __attribute__((noinline))
sameIRExprs_aux2(IRExpr ** env,IRExpr * e1,IRExpr * e2)1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057 {
1058 if (num_nodes_visited++ > NODE_LIMIT) return False;
1059
1060 switch (e1->tag) {
1061 case Iex_RdTmp:
1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063
1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066 env[e2->Iex.RdTmp.tmp]);
1067 #if STATS_IROPT
1068 recursed = True;
1069 if (same) recursion_helped = True;
1070 #endif
1071 return same;
1072 }
1073 return False;
1074
1075 case Iex_Get:
1076 case Iex_GetI:
1077 case Iex_Load:
1078 /* Guest state / memory could have changed in the meantime. */
1079 return False;
1080
1081 case Iex_Binop:
1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084 e2->Iex.Binop.arg1 )
1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086 e2->Iex.Binop.arg2 ));
1087
1088 case Iex_Unop:
1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091 e2->Iex.Unop.arg ));
1092
1093 case Iex_Const: {
1094 IRConst *c1 = e1->Iex.Const.con;
1095 IRConst *c2 = e2->Iex.Const.con;
1096 vassert(c1->tag == c2->tag);
1097 switch (c1->tag) {
1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103 default: break;
1104 }
1105 return False;
1106 }
1107
1108 case Iex_Triop: {
1109 IRTriop *tri1 = e1->Iex.Triop.details;
1110 IRTriop *tri2 = e2->Iex.Triop.details;
1111 return toBool( tri1->op == tri2->op
1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115 }
1116
1117 case Iex_ITE:
1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119 e2->Iex.ITE.cond )
1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121 e2->Iex.ITE.iftrue )
1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123 e2->Iex.ITE.iffalse ));
1124
1125 default:
1126 /* Not very likely to be "same". */
1127 break;
1128 }
1129
1130 return False;
1131 }
1132
1133 inline
sameIRExprs(IRExpr ** env,IRExpr * e1,IRExpr * e2)1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135 {
1136 Bool same;
1137
1138 num_nodes_visited = 0;
1139 same = sameIRExprs_aux(env, e1, e2);
1140
1141 #if STATS_IROPT
1142 ++invocation_count;
1143 if (recursed) ++recursion_count;
1144 success_count += same;
1145 if (same && recursion_helped)
1146 ++recursion_success_count;
1147 if (num_nodes_visited > max_nodes_visited)
1148 max_nodes_visited = num_nodes_visited;
1149 recursed = False; /* reset */
1150 recursion_helped = False; /* reset */
1151 #endif /* STATS_IROPT */
1152
1153 return same;
1154 }
1155
1156
1157 /* Debugging-only hack (not used in production runs): make a guess
1158 whether sameIRExprs might assert due to the two args being of
1159 different types. If in doubt return False. Is only used when
1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161 Bad because it duplicates functionality from typeOfIRExpr. See
1162 comment on the single use point below for rationale. */
1163 static
debug_only_hack_sameIRExprs_might_assert(IRExpr * e1,IRExpr * e2)1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165 {
1166 if (e1->tag != e2->tag) return False;
1167 switch (e1->tag) {
1168 case Iex_Const: {
1169 /* The only interesting case */
1170 IRConst *c1 = e1->Iex.Const.con;
1171 IRConst *c2 = e2->Iex.Const.con;
1172 return c1->tag != c2->tag;
1173 }
1174 default:
1175 break;
1176 }
1177 return False;
1178 }
1179
1180
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)1182 static Bool isZeroU32 ( IRExpr* e )
1183 {
1184 return toBool( e->tag == Iex_Const
1185 && e->Iex.Const.con->tag == Ico_U32
1186 && e->Iex.Const.con->Ico.U32 == 0);
1187 }
1188
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190 Currently unused; commented out to avoid compiler warning */
1191 #if 0
1192 static Bool isZeroU64 ( IRExpr* e )
1193 {
1194 return toBool( e->tag == Iex_Const
1195 && e->Iex.Const.con->tag == Ico_U64
1196 && e->Iex.Const.con->Ico.U64 == 0);
1197 }
1198 #endif
1199
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
isZeroV128(IRExpr * e)1201 static Bool isZeroV128 ( IRExpr* e )
1202 {
1203 return toBool( e->tag == Iex_Const
1204 && e->Iex.Const.con->tag == Ico_V128
1205 && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 }
1207
1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
isZeroV256(IRExpr * e)1209 static Bool isZeroV256 ( IRExpr* e )
1210 {
1211 return toBool( e->tag == Iex_Const
1212 && e->Iex.Const.con->tag == Ico_V256
1213 && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214 }
1215
1216 /* Is this an integer constant with value 0 ? */
isZeroU(IRExpr * e)1217 static Bool isZeroU ( IRExpr* e )
1218 {
1219 if (e->tag != Iex_Const) return False;
1220 switch (e->Iex.Const.con->tag) {
1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0);
1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0);
1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226 default: vpanic("isZeroU");
1227 }
1228 }
1229
1230 /* Is this an integer constant with value 1---1b ? */
isOnesU(IRExpr * e)1231 static Bool isOnesU ( IRExpr* e )
1232 {
1233 if (e->tag != Iex_Const) return False;
1234 switch (e->Iex.Const.con->tag) {
1235 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF);
1236 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1237 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32
1238 == 0xFFFFFFFF);
1239 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64
1240 == 0xFFFFFFFFFFFFFFFFULL);
1241 default: ppIRExpr(e); vpanic("isOnesU");
1242 }
1243 }
1244
notBool(Bool b)1245 static Bool notBool ( Bool b )
1246 {
1247 if (b == True) return False;
1248 if (b == False) return True;
1249 vpanic("notBool");
1250 }
1251
1252 /* Make a zero which has the same type as the result of the given
1253 primop. */
mkZeroOfPrimopResultType(IROp op)1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1255 {
1256 switch (op) {
1257 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1258 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
1259 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1260 case Iop_Sub32:
1261 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1262 case Iop_And64:
1263 case Iop_Sub64:
1264 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1265 case Iop_XorV128:
1266 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1267 case Iop_XorV256:
1268 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1269 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1270 }
1271 }
1272
1273 /* Make a value containing all 1-bits, which has the same type as the
1274 result of the given primop. */
mkOnesOfPrimopResultType(IROp op)1275 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1276 {
1277 switch (op) {
1278 case Iop_CmpEQ32:
1279 case Iop_CmpEQ64:
1280 return IRExpr_Const(IRConst_U1(toBool(1)));
1281 case Iop_Or8:
1282 return IRExpr_Const(IRConst_U8(0xFF));
1283 case Iop_Or16:
1284 return IRExpr_Const(IRConst_U16(0xFFFF));
1285 case Iop_Or32:
1286 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1287 case Iop_CmpEQ8x8:
1288 case Iop_Or64:
1289 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1290 case Iop_CmpEQ8x16:
1291 case Iop_CmpEQ16x8:
1292 case Iop_CmpEQ32x4:
1293 return IRExpr_Const(IRConst_V128(0xFFFF));
1294 default:
1295 ppIROp(op);
1296 vpanic("mkOnesOfPrimopResultType: bad primop");
1297 }
1298 }
1299
1300 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)1301 static UInt fold_Clz64 ( ULong value )
1302 {
1303 UInt i;
1304 vassert(value != 0ULL); /* no defined semantics for arg==0 */
1305 for (i = 0; i < 64; ++i) {
1306 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1307 }
1308 vassert(0);
1309 /*NOTREACHED*/
1310 return 0;
1311 }
1312
fold_Clz32(UInt value)1313 static UInt fold_Clz32 ( UInt value )
1314 {
1315 UInt i;
1316 vassert(value != 0); /* no defined semantics for arg==0 */
1317 for (i = 0; i < 32; ++i) {
1318 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1319 }
1320 vassert(0);
1321 /*NOTREACHED*/
1322 return 0;
1323 }
1324
1325 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1326 the corresponding real constant. */
1327 //XXX re-check this before use
1328 //static ULong de_summarise_V64 ( UChar v64 )
1329 //{
1330 // ULong r = 0;
1331 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1332 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1333 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1334 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1335 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1336 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1337 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1338 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1339 // return r;
1340 //}
1341
1342 /* Helper for arbitrary expression pattern matching in flat IR. If
1343 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1344 necessary -- until it resolves to a non-tmp. Note that this can
1345 return NULL if it can't resolve 'e' to a new expression, which will
1346 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1347 LLSC). */
chase(IRExpr ** env,IRExpr * e)1348 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1349 {
1350 /* Why is this loop guaranteed to terminate? Because all tmps must
1351 have definitions before use, hence a tmp cannot be bound
1352 (directly or indirectly) to itself. */
1353 while (e->tag == Iex_RdTmp) {
1354 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1355 e = env[(Int)e->Iex.RdTmp.tmp];
1356 if (e == NULL) break;
1357 }
1358 return e;
1359 }
1360
1361 /* Similar to |chase|, but follows at most one level of tmp reference. */
chase1(IRExpr ** env,IRExpr * e)1362 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1363 {
1364 if (e == NULL || e->tag != Iex_RdTmp)
1365 return e;
1366 else
1367 return env[(Int)e->Iex.RdTmp.tmp];
1368 }
1369
fold_Expr(IRExpr ** env,IRExpr * e)1370 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1371 {
1372 Int shift;
1373 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1374
1375 switch (e->tag) {
1376 case Iex_Unop:
1377 /* UNARY ops */
1378 if (e->Iex.Unop.arg->tag == Iex_Const) {
1379 switch (e->Iex.Unop.op) {
1380 case Iop_1Uto8:
1381 e2 = IRExpr_Const(IRConst_U8(toUChar(
1382 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1383 ? 1 : 0)));
1384 break;
1385 case Iop_1Uto32:
1386 e2 = IRExpr_Const(IRConst_U32(
1387 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1388 ? 1 : 0));
1389 break;
1390 case Iop_1Uto64:
1391 e2 = IRExpr_Const(IRConst_U64(
1392 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1393 ? 1 : 0));
1394 break;
1395
1396 case Iop_1Sto8:
1397 e2 = IRExpr_Const(IRConst_U8(toUChar(
1398 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1399 ? 0xFF : 0)));
1400 break;
1401 case Iop_1Sto16:
1402 e2 = IRExpr_Const(IRConst_U16(toUShort(
1403 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1404 ? 0xFFFF : 0)));
1405 break;
1406 case Iop_1Sto32:
1407 e2 = IRExpr_Const(IRConst_U32(
1408 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1409 ? 0xFFFFFFFF : 0));
1410 break;
1411 case Iop_1Sto64:
1412 e2 = IRExpr_Const(IRConst_U64(
1413 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1414 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1415 break;
1416
1417 case Iop_8Sto32: {
1418 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1419 u32 <<= 24;
1420 u32 = (Int)u32 >> 24; /* signed shift */
1421 e2 = IRExpr_Const(IRConst_U32(u32));
1422 break;
1423 }
1424 case Iop_16Sto32: {
1425 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1426 u32 <<= 16;
1427 u32 = (Int)u32 >> 16; /* signed shift */
1428 e2 = IRExpr_Const(IRConst_U32(u32));
1429 break;
1430 }
1431 case Iop_8Uto64:
1432 e2 = IRExpr_Const(IRConst_U64(
1433 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1434 break;
1435 case Iop_16Uto64:
1436 e2 = IRExpr_Const(IRConst_U64(
1437 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1438 break;
1439 case Iop_8Uto32:
1440 e2 = IRExpr_Const(IRConst_U32(
1441 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1442 break;
1443 case Iop_8Sto16: {
1444 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1445 u16 <<= 8;
1446 u16 = (Short)u16 >> 8; /* signed shift */
1447 e2 = IRExpr_Const(IRConst_U16(u16));
1448 break;
1449 }
1450 case Iop_8Uto16:
1451 e2 = IRExpr_Const(IRConst_U16(
1452 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1453 break;
1454 case Iop_16Uto32:
1455 e2 = IRExpr_Const(IRConst_U32(
1456 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1457 break;
1458 case Iop_32to16:
1459 e2 = IRExpr_Const(IRConst_U16(toUShort(
1460 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1461 break;
1462 case Iop_32to8:
1463 e2 = IRExpr_Const(IRConst_U8(toUChar(
1464 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1465 break;
1466 case Iop_32to1:
1467 e2 = IRExpr_Const(IRConst_U1(toBool(
1468 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1469 )));
1470 break;
1471 case Iop_64to1:
1472 e2 = IRExpr_Const(IRConst_U1(toBool(
1473 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1474 )));
1475 break;
1476
1477 case Iop_NotV128:
1478 e2 = IRExpr_Const(IRConst_V128(
1479 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1480 break;
1481 case Iop_Not64:
1482 e2 = IRExpr_Const(IRConst_U64(
1483 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1484 break;
1485 case Iop_Not32:
1486 e2 = IRExpr_Const(IRConst_U32(
1487 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1488 break;
1489 case Iop_Not16:
1490 e2 = IRExpr_Const(IRConst_U16(toUShort(
1491 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1492 break;
1493 case Iop_Not8:
1494 e2 = IRExpr_Const(IRConst_U8(toUChar(
1495 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1496 break;
1497
1498 case Iop_Not1:
1499 e2 = IRExpr_Const(IRConst_U1(
1500 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1501 break;
1502
1503 case Iop_64to8: {
1504 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1505 w64 &= 0xFFULL;
1506 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1507 break;
1508 }
1509 case Iop_64to16: {
1510 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1511 w64 &= 0xFFFFULL;
1512 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1513 break;
1514 }
1515 case Iop_64to32: {
1516 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1517 w64 &= 0x00000000FFFFFFFFULL;
1518 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1519 break;
1520 }
1521 case Iop_64HIto32: {
1522 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1523 w64 >>= 32;
1524 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1525 break;
1526 }
1527 case Iop_32Uto64:
1528 e2 = IRExpr_Const(IRConst_U64(
1529 0xFFFFFFFFULL
1530 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1531 break;
1532 case Iop_16Sto64: {
1533 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1534 u64 <<= 48;
1535 u64 = (Long)u64 >> 48; /* signed shift */
1536 e2 = IRExpr_Const(IRConst_U64(u64));
1537 break;
1538 }
1539 case Iop_32Sto64: {
1540 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1541 u64 <<= 32;
1542 u64 = (Long)u64 >> 32; /* signed shift */
1543 e2 = IRExpr_Const(IRConst_U64(u64));
1544 break;
1545 }
1546
1547 case Iop_16to8: {
1548 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1549 w16 &= 0xFF;
1550 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1551 break;
1552 }
1553 case Iop_16HIto8: {
1554 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1555 w16 >>= 8;
1556 w16 &= 0xFF;
1557 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1558 break;
1559 }
1560
1561 case Iop_CmpNEZ8:
1562 e2 = IRExpr_Const(IRConst_U1(toBool(
1563 0 !=
1564 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1565 )));
1566 break;
1567 case Iop_CmpNEZ32:
1568 e2 = IRExpr_Const(IRConst_U1(toBool(
1569 0 !=
1570 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1571 )));
1572 break;
1573 case Iop_CmpNEZ64:
1574 e2 = IRExpr_Const(IRConst_U1(toBool(
1575 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1576 )));
1577 break;
1578
1579 case Iop_CmpwNEZ32: {
1580 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1581 if (w32 == 0)
1582 e2 = IRExpr_Const(IRConst_U32( 0 ));
1583 else
1584 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1585 break;
1586 }
1587 case Iop_CmpwNEZ64: {
1588 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1589 if (w64 == 0)
1590 e2 = IRExpr_Const(IRConst_U64( 0 ));
1591 else
1592 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1593 break;
1594 }
1595
1596 case Iop_Left32: {
1597 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1598 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1599 s32 = (s32 | (-s32));
1600 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1601 break;
1602 }
1603
1604 case Iop_Left64: {
1605 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1606 Long s64 = (Long)u64;
1607 s64 = (s64 | (-s64));
1608 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1609 break;
1610 }
1611
1612 case Iop_Clz32: {
1613 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1614 if (u32 != 0)
1615 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1616 break;
1617 }
1618 case Iop_Clz64: {
1619 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1620 if (u64 != 0ULL)
1621 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1622 break;
1623 }
1624
1625 /* For these vector ones, can't fold all cases, but at least
1626 do the most obvious one. Could do better here using
1627 summarise/desummarise of vector constants, but too
1628 difficult to verify; hence just handle the zero cases. */
1629 case Iop_32UtoV128: {
1630 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1631 if (0 == u32) {
1632 e2 = IRExpr_Const(IRConst_V128(0x0000));
1633 } else {
1634 goto unhandled;
1635 }
1636 break;
1637 }
1638 case Iop_V128to64: {
1639 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1640 if (0 == ((v128 >> 0) & 0xFF)) {
1641 e2 = IRExpr_Const(IRConst_U64(0));
1642 } else {
1643 goto unhandled;
1644 }
1645 break;
1646 }
1647 case Iop_V128HIto64: {
1648 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1649 if (0 == ((v128 >> 8) & 0xFF)) {
1650 e2 = IRExpr_Const(IRConst_U64(0));
1651 } else {
1652 goto unhandled;
1653 }
1654 break;
1655 }
1656 case Iop_64UtoV128: {
1657 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1658 if (0 == u64) {
1659 e2 = IRExpr_Const(IRConst_V128(0x0000));
1660 } else {
1661 goto unhandled;
1662 }
1663 break;
1664 }
1665
1666 /* Even stupider (although still correct ..) */
1667 case Iop_V256to64_0: case Iop_V256to64_1:
1668 case Iop_V256to64_2: case Iop_V256to64_3: {
1669 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1670 if (v256 == 0x00000000) {
1671 e2 = IRExpr_Const(IRConst_U64(0));
1672 } else {
1673 goto unhandled;
1674 }
1675 break;
1676 }
1677
1678 case Iop_ZeroHI64ofV128: {
1679 /* Could do better here -- only need to look at the bottom 64 bits
1680 of the argument, really. */
1681 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1682 if (v128 == 0x0000) {
1683 e2 = IRExpr_Const(IRConst_V128(0x0000));
1684 } else {
1685 goto unhandled;
1686 }
1687 break;
1688 }
1689
1690 default:
1691 goto unhandled;
1692 }
1693 }
1694 break;
1695
1696 case Iex_Binop:
1697 /* BINARY ops */
1698 if (e->Iex.Binop.arg1->tag == Iex_Const
1699 && e->Iex.Binop.arg2->tag == Iex_Const) {
1700 /* cases where both args are consts */
1701 switch (e->Iex.Binop.op) {
1702
1703 /* -- Or -- */
1704 case Iop_Or8:
1705 e2 = IRExpr_Const(IRConst_U8(toUChar(
1706 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1707 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1708 break;
1709 case Iop_Or16:
1710 e2 = IRExpr_Const(IRConst_U16(toUShort(
1711 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1712 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1713 break;
1714 case Iop_Or32:
1715 e2 = IRExpr_Const(IRConst_U32(
1716 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1717 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1718 break;
1719 case Iop_Or64:
1720 e2 = IRExpr_Const(IRConst_U64(
1721 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1722 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1723 break;
1724 case Iop_OrV128:
1725 e2 = IRExpr_Const(IRConst_V128(
1726 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1727 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1728 break;
1729
1730 /* -- Xor -- */
1731 case Iop_Xor8:
1732 e2 = IRExpr_Const(IRConst_U8(toUChar(
1733 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1734 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1735 break;
1736 case Iop_Xor16:
1737 e2 = IRExpr_Const(IRConst_U16(toUShort(
1738 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1739 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1740 break;
1741 case Iop_Xor32:
1742 e2 = IRExpr_Const(IRConst_U32(
1743 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1744 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1745 break;
1746 case Iop_Xor64:
1747 e2 = IRExpr_Const(IRConst_U64(
1748 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1749 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1750 break;
1751 case Iop_XorV128:
1752 e2 = IRExpr_Const(IRConst_V128(
1753 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1754 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1755 break;
1756
1757 /* -- And -- */
1758 case Iop_And8:
1759 e2 = IRExpr_Const(IRConst_U8(toUChar(
1760 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1761 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1762 break;
1763 case Iop_And16:
1764 e2 = IRExpr_Const(IRConst_U16(toUShort(
1765 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1766 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1767 break;
1768 case Iop_And32:
1769 e2 = IRExpr_Const(IRConst_U32(
1770 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1771 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1772 break;
1773 case Iop_And64:
1774 e2 = IRExpr_Const(IRConst_U64(
1775 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1776 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1777 break;
1778 case Iop_AndV128:
1779 e2 = IRExpr_Const(IRConst_V128(
1780 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1781 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1782 break;
1783
1784 /* -- Add -- */
1785 case Iop_Add8:
1786 e2 = IRExpr_Const(IRConst_U8(toUChar(
1787 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1788 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1789 break;
1790 case Iop_Add32:
1791 e2 = IRExpr_Const(IRConst_U32(
1792 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1793 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1794 break;
1795 case Iop_Add64:
1796 e2 = IRExpr_Const(IRConst_U64(
1797 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1798 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1799 break;
1800
1801 /* -- Sub -- */
1802 case Iop_Sub8:
1803 e2 = IRExpr_Const(IRConst_U8(toUChar(
1804 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1805 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1806 break;
1807 case Iop_Sub32:
1808 e2 = IRExpr_Const(IRConst_U32(
1809 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1810 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1811 break;
1812 case Iop_Sub64:
1813 e2 = IRExpr_Const(IRConst_U64(
1814 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1815 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1816 break;
1817
1818 /* -- Max32U -- */
1819 case Iop_Max32U: {
1820 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1821 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1822 UInt res = u32a > u32b ? u32a : u32b;
1823 e2 = IRExpr_Const(IRConst_U32(res));
1824 break;
1825 }
1826
1827 /* -- Mul -- */
1828 case Iop_Mul32:
1829 e2 = IRExpr_Const(IRConst_U32(
1830 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1831 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1832 break;
1833 case Iop_Mul64:
1834 e2 = IRExpr_Const(IRConst_U64(
1835 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1836 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1837 break;
1838
1839 case Iop_MullS32: {
1840 /* very paranoid */
1841 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1842 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1843 Int s32a = (Int)u32a;
1844 Int s32b = (Int)u32b;
1845 Long s64a = (Long)s32a;
1846 Long s64b = (Long)s32b;
1847 Long sres = s64a * s64b;
1848 ULong ures = (ULong)sres;
1849 e2 = IRExpr_Const(IRConst_U64(ures));
1850 break;
1851 }
1852
1853 /* -- Shl -- */
1854 case Iop_Shl32:
1855 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1856 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1857 if (shift >= 0 && shift <= 31)
1858 e2 = IRExpr_Const(IRConst_U32(
1859 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1860 << shift)));
1861 break;
1862 case Iop_Shl64:
1863 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1864 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1865 if (shift >= 0 && shift <= 63)
1866 e2 = IRExpr_Const(IRConst_U64(
1867 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1868 << shift)));
1869 break;
1870
1871 /* -- Sar -- */
1872 case Iop_Sar32: {
1873 /* paranoid ... */
1874 /*signed*/ Int s32;
1875 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1876 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1877 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1878 if (shift >= 0 && shift <= 31) {
1879 s32 >>=/*signed*/ shift;
1880 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1881 }
1882 break;
1883 }
1884 case Iop_Sar64: {
1885 /* paranoid ... */
1886 /*signed*/ Long s64;
1887 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1888 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1889 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1890 if (shift >= 0 && shift <= 63) {
1891 s64 >>=/*signed*/ shift;
1892 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1893 }
1894 break;
1895 }
1896
1897 /* -- Shr -- */
1898 case Iop_Shr32: {
1899 /* paranoid ... */
1900 /*unsigned*/ UInt u32;
1901 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1902 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1903 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1904 if (shift >= 0 && shift <= 31) {
1905 u32 >>=/*unsigned*/ shift;
1906 e2 = IRExpr_Const(IRConst_U32(u32));
1907 }
1908 break;
1909 }
1910 case Iop_Shr64: {
1911 /* paranoid ... */
1912 /*unsigned*/ ULong u64;
1913 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1914 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1915 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1916 if (shift >= 0 && shift <= 63) {
1917 u64 >>=/*unsigned*/ shift;
1918 e2 = IRExpr_Const(IRConst_U64(u64));
1919 }
1920 break;
1921 }
1922
1923 /* -- CmpEQ -- */
1924 case Iop_CmpEQ32:
1925 e2 = IRExpr_Const(IRConst_U1(toBool(
1926 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1927 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1928 break;
1929 case Iop_CmpEQ64:
1930 e2 = IRExpr_Const(IRConst_U1(toBool(
1931 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1932 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1933 break;
1934
1935 /* -- CmpNE -- */
1936 case Iop_CmpNE8:
1937 case Iop_CasCmpNE8:
1938 case Iop_ExpCmpNE8:
1939 e2 = IRExpr_Const(IRConst_U1(toBool(
1940 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1941 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1942 break;
1943 case Iop_CmpNE32:
1944 case Iop_CasCmpNE32:
1945 case Iop_ExpCmpNE32:
1946 e2 = IRExpr_Const(IRConst_U1(toBool(
1947 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1948 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1949 break;
1950 case Iop_CmpNE64:
1951 case Iop_CasCmpNE64:
1952 case Iop_ExpCmpNE64:
1953 e2 = IRExpr_Const(IRConst_U1(toBool(
1954 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1955 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1956 break;
1957
1958 /* -- CmpLEU -- */
1959 case Iop_CmpLE32U:
1960 e2 = IRExpr_Const(IRConst_U1(toBool(
1961 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1962 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1963 break;
1964 case Iop_CmpLE64U:
1965 e2 = IRExpr_Const(IRConst_U1(toBool(
1966 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1967 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1968 break;
1969
1970 /* -- CmpLES -- */
1971 case Iop_CmpLE32S:
1972 e2 = IRExpr_Const(IRConst_U1(toBool(
1973 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1974 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1975 break;
1976 case Iop_CmpLE64S:
1977 e2 = IRExpr_Const(IRConst_U1(toBool(
1978 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1979 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1980 break;
1981
1982 /* -- CmpLTS -- */
1983 case Iop_CmpLT32S:
1984 e2 = IRExpr_Const(IRConst_U1(toBool(
1985 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1986 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1987 break;
1988 case Iop_CmpLT64S:
1989 e2 = IRExpr_Const(IRConst_U1(toBool(
1990 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1991 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1992 break;
1993
1994 /* -- CmpLTU -- */
1995 case Iop_CmpLT32U:
1996 e2 = IRExpr_Const(IRConst_U1(toBool(
1997 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1998 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1999 break;
2000 case Iop_CmpLT64U:
2001 e2 = IRExpr_Const(IRConst_U1(toBool(
2002 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2003 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2004 break;
2005
2006 /* -- CmpORD -- */
2007 case Iop_CmpORD32S: {
2008 /* very paranoid */
2009 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2010 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2011 Int s32a = (Int)u32a;
2012 Int s32b = (Int)u32b;
2013 Int r = 0x2; /* EQ */
2014 if (s32a < s32b) {
2015 r = 0x8; /* LT */
2016 }
2017 else if (s32a > s32b) {
2018 r = 0x4; /* GT */
2019 }
2020 e2 = IRExpr_Const(IRConst_U32(r));
2021 break;
2022 }
2023
2024 /* -- nHLto2n -- */
2025 case Iop_32HLto64:
2026 e2 = IRExpr_Const(IRConst_U64(
2027 (((ULong)(e->Iex.Binop.arg1
2028 ->Iex.Const.con->Ico.U32)) << 32)
2029 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2030 ));
2031 break;
2032 case Iop_64HLto128:
2033 /* We can't fold this, because there is no way to
2034 express he result in IR, but at least pretend to
2035 handle it, so as to stop getting blasted with
2036 no-rule-for-this-primop messages. */
2037 break;
2038 /* For this vector one, can't fold all cases, but at
2039 least do the most obvious one. Could do better here
2040 using summarise/desummarise of vector constants, but
2041 too difficult to verify; hence just handle the zero
2042 cases. */
2043 case Iop_64HLtoV128: {
2044 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2045 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2046 if (0 == argHi && 0 == argLo) {
2047 e2 = IRExpr_Const(IRConst_V128(0));
2048 } else {
2049 goto unhandled;
2050 }
2051 break;
2052 }
2053 /* Same reasoning for the 256-bit version. */
2054 case Iop_V128HLtoV256: {
2055 IRExpr* argHi = e->Iex.Binop.arg1;
2056 IRExpr* argLo = e->Iex.Binop.arg2;
2057 if (isZeroV128(argHi) && isZeroV128(argLo)) {
2058 e2 = IRExpr_Const(IRConst_V256(0));
2059 } else {
2060 goto unhandled;
2061 }
2062 break;
2063 }
2064
2065 /* -- V128 stuff -- */
2066 case Iop_InterleaveLO8x16: {
2067 /* This turns up a lot in Memcheck instrumentation of
2068 Icc generated code. I don't know why. */
2069 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2070 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2071 if (0 == arg1 && 0 == arg2) {
2072 e2 = IRExpr_Const(IRConst_V128(0));
2073 } else {
2074 goto unhandled;
2075 }
2076 break;
2077 }
2078
2079 default:
2080 goto unhandled;
2081 }
2082
2083 } else {
2084
2085 /* other cases (identities, etc) */
2086 switch (e->Iex.Binop.op) {
2087
2088 case Iop_Shl32:
2089 case Iop_Shl64:
2090 case Iop_Shr64:
2091 case Iop_Sar64:
2092 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2093 if (isZeroU(e->Iex.Binop.arg2)) {
2094 e2 = e->Iex.Binop.arg1;
2095 break;
2096 }
2097 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2098 if (isZeroU(e->Iex.Binop.arg1)) {
2099 e2 = e->Iex.Binop.arg1;
2100 break;
2101 }
2102 break;
2103
2104 case Iop_Sar32:
2105 case Iop_Shr32:
2106 /* Shr32/Sar32(x,0) ==> x */
2107 if (isZeroU(e->Iex.Binop.arg2)) {
2108 e2 = e->Iex.Binop.arg1;
2109 break;
2110 }
2111 break;
2112
2113 case Iop_Or8:
2114 case Iop_Or16:
2115 case Iop_Or32:
2116 case Iop_Or64:
2117 case Iop_Max32U:
2118 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2119 if (isZeroU(e->Iex.Binop.arg2)) {
2120 e2 = e->Iex.Binop.arg1;
2121 break;
2122 }
2123 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2124 if (isZeroU(e->Iex.Binop.arg1)) {
2125 e2 = e->Iex.Binop.arg2;
2126 break;
2127 }
2128 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2129 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2130 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2131 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2132 break;
2133 }
2134 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2135 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2136 e2 = e->Iex.Binop.arg1;
2137 break;
2138 }
2139 break;
2140
2141 case Iop_Add8:
2142 /* Add8(t,t) ==> t << 1.
2143 Memcheck doesn't understand that
2144 x+x produces a defined least significant bit, and it seems
2145 simplest just to get rid of the problem by rewriting it
2146 out, since the opportunity to do so exists. */
2147 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2148 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2149 IRExpr_Const(IRConst_U8(1)));
2150 break;
2151 }
2152 break;
2153
2154 /* NB no Add16(t,t) case yet as no known test case exists */
2155
2156 case Iop_Add32:
2157 case Iop_Add64:
2158 /* Add32/Add64(x,0) ==> x */
2159 if (isZeroU(e->Iex.Binop.arg2)) {
2160 e2 = e->Iex.Binop.arg1;
2161 break;
2162 }
2163 /* Add32/Add64(0,x) ==> x */
2164 if (isZeroU(e->Iex.Binop.arg1)) {
2165 e2 = e->Iex.Binop.arg2;
2166 break;
2167 }
2168 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2169 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2170 e2 = IRExpr_Binop(
2171 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2172 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2173 break;
2174 }
2175 break;
2176
2177 case Iop_Sub32:
2178 case Iop_Sub64:
2179 /* Sub32/Sub64(x,0) ==> x */
2180 if (isZeroU(e->Iex.Binop.arg2)) {
2181 e2 = e->Iex.Binop.arg1;
2182 break;
2183 }
2184 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2185 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2186 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2187 break;
2188 }
2189 break;
2190 case Iop_Sub8x16:
2191 /* Sub8x16(x,0) ==> x */
2192 if (isZeroV128(e->Iex.Binop.arg2)) {
2193 e2 = e->Iex.Binop.arg1;
2194 break;
2195 }
2196 break;
2197
2198 case Iop_And8:
2199 case Iop_And16:
2200 case Iop_And32:
2201 case Iop_And64:
2202 /* And8/And16/And32/And64(x,1---1b) ==> x */
2203 if (isOnesU(e->Iex.Binop.arg2)) {
2204 e2 = e->Iex.Binop.arg1;
2205 break;
2206 }
2207 /* And8/And16/And32/And64(1---1b,x) ==> x */
2208 if (isOnesU(e->Iex.Binop.arg1)) {
2209 e2 = e->Iex.Binop.arg2;
2210 break;
2211 }
2212 /* And8/And16/And32/And64(x,0) ==> 0 */
2213 if (isZeroU(e->Iex.Binop.arg2)) {
2214 e2 = e->Iex.Binop.arg2;
2215 break;
2216 }
2217 /* And8/And16/And32/And64(0,x) ==> 0 */
2218 if (isZeroU(e->Iex.Binop.arg1)) {
2219 e2 = e->Iex.Binop.arg1;
2220 break;
2221 }
2222 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2223 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2224 e2 = e->Iex.Binop.arg1;
2225 break;
2226 }
2227 break;
2228
2229 case Iop_AndV128:
2230 case Iop_AndV256:
2231 /* And8/And16/AndV128/AndV256(t,t)
2232 ==> t, for some IRTemp t */
2233 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2234 e2 = e->Iex.Binop.arg1;
2235 break;
2236 }
2237 /* Deal with either arg zero. Could handle other And
2238 cases here too. */
2239 if (e->Iex.Binop.op == Iop_AndV256
2240 && (isZeroV256(e->Iex.Binop.arg1)
2241 || isZeroV256(e->Iex.Binop.arg2))) {
2242 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2243 break;
2244 } else if (e->Iex.Binop.op == Iop_AndV128
2245 && (isZeroV128(e->Iex.Binop.arg1)
2246 || isZeroV128(e->Iex.Binop.arg2))) {
2247 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2248 break;
2249 }
2250 break;
2251
2252 case Iop_OrV128:
2253 case Iop_OrV256:
2254 /* V128/V256(t,t) ==> t, for some IRTemp t */
2255 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2256 e2 = e->Iex.Binop.arg1;
2257 break;
2258 }
2259 /* OrV128(t,0) ==> t */
2260 if (e->Iex.Binop.op == Iop_OrV128) {
2261 if (isZeroV128(e->Iex.Binop.arg2)) {
2262 e2 = e->Iex.Binop.arg1;
2263 break;
2264 }
2265 if (isZeroV128(e->Iex.Binop.arg1)) {
2266 e2 = e->Iex.Binop.arg2;
2267 break;
2268 }
2269 }
2270 /* OrV256(t,0) ==> t */
2271 if (e->Iex.Binop.op == Iop_OrV256) {
2272 if (isZeroV256(e->Iex.Binop.arg2)) {
2273 e2 = e->Iex.Binop.arg1;
2274 break;
2275 }
2276 //Disabled because there's no known test case right now.
2277 //if (isZeroV256(e->Iex.Binop.arg1)) {
2278 // e2 = e->Iex.Binop.arg2;
2279 // break;
2280 //}
2281 }
2282 break;
2283
2284 case Iop_Xor8:
2285 case Iop_Xor16:
2286 case Iop_Xor32:
2287 case Iop_Xor64:
2288 case Iop_XorV128:
2289 case Iop_XorV256:
2290 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2291 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2292 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2293 break;
2294 }
2295 /* XorV128(t,0) ==> t */
2296 if (e->Iex.Binop.op == Iop_XorV128) {
2297 if (isZeroV128(e->Iex.Binop.arg2)) {
2298 e2 = e->Iex.Binop.arg1;
2299 break;
2300 }
2301 //Disabled because there's no known test case right now.
2302 //if (isZeroV128(e->Iex.Binop.arg1)) {
2303 // e2 = e->Iex.Binop.arg2;
2304 // break;
2305 //}
2306 } else {
2307 /* Xor8/16/32/64(0,t) ==> t */
2308 if (isZeroU(e->Iex.Binop.arg1)) {
2309 e2 = e->Iex.Binop.arg2;
2310 break;
2311 }
2312 /* Xor8/16/32/64(t,0) ==> t */
2313 if (isZeroU(e->Iex.Binop.arg2)) {
2314 e2 = e->Iex.Binop.arg1;
2315 break;
2316 }
2317 }
2318 break;
2319
2320 case Iop_CmpNE32:
2321 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2322 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2323 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2324 break;
2325 }
2326 /* CmpNE32(1Uto32(b), 0) ==> b */
2327 if (isZeroU32(e->Iex.Binop.arg2)) {
2328 IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2329 if (a1 && a1->tag == Iex_Unop
2330 && a1->Iex.Unop.op == Iop_1Uto32) {
2331 e2 = a1->Iex.Unop.arg;
2332 break;
2333 }
2334 }
2335 break;
2336
2337 case Iop_CmpEQ32:
2338 case Iop_CmpEQ64:
2339 case Iop_CmpEQ8x8:
2340 case Iop_CmpEQ8x16:
2341 case Iop_CmpEQ16x8:
2342 case Iop_CmpEQ32x4:
2343 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2344 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2345 break;
2346 }
2347 break;
2348
2349 default:
2350 break;
2351 }
2352 }
2353 break;
2354
2355 case Iex_ITE:
2356 /* ITE */
2357 /* is the discriminant is a constant? */
2358 if (e->Iex.ITE.cond->tag == Iex_Const) {
2359 /* assured us by the IR type rules */
2360 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2361 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2362 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2363 }
2364 else
2365 /* are the arms identical? (pretty weedy test) */
2366 if (sameIRExprs(env, e->Iex.ITE.iftrue,
2367 e->Iex.ITE.iffalse)) {
2368 e2 = e->Iex.ITE.iffalse;
2369 }
2370 break;
2371
2372 default:
2373 /* not considered */
2374 break;
2375 }
2376
2377 /* Show cases where we've found but not folded 'op(t,t)'. Be
2378 careful not to call sameIRExprs with values of different types,
2379 though, else it will assert (and so it should!). We can't
2380 conveniently call typeOfIRExpr on the two args without a whole
2381 bunch of extra plumbing to pass in a type env, so just use a
2382 hacky test to check the arguments are not anything that might
2383 sameIRExprs to assert. This is only OK because this kludge is
2384 only used for debug printing, not for "real" operation. For
2385 "real" operation (ie, all other calls to sameIRExprs), it is
2386 essential that the to args have the same type.
2387
2388 The "right" solution is to plumb the containing block's
2389 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2390 that's a bunch of extra parameter passing which will just slow
2391 down the normal case, for no purpose. */
2392 if (vex_control.iropt_verbosity > 0
2393 && e == e2
2394 && e->tag == Iex_Binop
2395 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2396 e->Iex.Binop.arg2)
2397 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2398 vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2399 ppIRExpr(e);
2400 vex_printf("\n");
2401 }
2402
2403 /* Show the overall results of folding. */
2404 if (DEBUG_IROPT && e2 != e) {
2405 vex_printf("FOLD: ");
2406 ppIRExpr(e); vex_printf(" -> ");
2407 ppIRExpr(e2); vex_printf("\n");
2408 }
2409
2410 return e2;
2411
2412 unhandled:
2413 # if 0
2414 vex_printf("\n\n");
2415 ppIRExpr(e);
2416 vpanic("fold_Expr: no rule for the above");
2417 # else
2418 if (vex_control.iropt_verbosity > 0) {
2419 vex_printf("vex iropt: fold_Expr: no const rule for: ");
2420 ppIRExpr(e);
2421 vex_printf("\n");
2422 }
2423 return e2;
2424 # endif
2425 }
2426
2427
2428 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2429 1-level due to previous flattening pass. */
2430
subst_Expr(IRExpr ** env,IRExpr * ex)2431 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2432 {
2433 switch (ex->tag) {
2434 case Iex_RdTmp:
2435 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2436 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2437 if (rhs->tag == Iex_RdTmp)
2438 return rhs;
2439 if (rhs->tag == Iex_Const
2440 && rhs->Iex.Const.con->tag != Ico_F64i)
2441 return rhs;
2442 }
2443 /* not bound in env */
2444 return ex;
2445
2446 case Iex_Const:
2447 case Iex_Get:
2448 return ex;
2449
2450 case Iex_GetI:
2451 vassert(isIRAtom(ex->Iex.GetI.ix));
2452 return IRExpr_GetI(
2453 ex->Iex.GetI.descr,
2454 subst_Expr(env, ex->Iex.GetI.ix),
2455 ex->Iex.GetI.bias
2456 );
2457
2458 case Iex_Qop: {
2459 IRQop* qop = ex->Iex.Qop.details;
2460 vassert(isIRAtom(qop->arg1));
2461 vassert(isIRAtom(qop->arg2));
2462 vassert(isIRAtom(qop->arg3));
2463 vassert(isIRAtom(qop->arg4));
2464 return IRExpr_Qop(
2465 qop->op,
2466 subst_Expr(env, qop->arg1),
2467 subst_Expr(env, qop->arg2),
2468 subst_Expr(env, qop->arg3),
2469 subst_Expr(env, qop->arg4)
2470 );
2471 }
2472
2473 case Iex_Triop: {
2474 IRTriop* triop = ex->Iex.Triop.details;
2475 vassert(isIRAtom(triop->arg1));
2476 vassert(isIRAtom(triop->arg2));
2477 vassert(isIRAtom(triop->arg3));
2478 return IRExpr_Triop(
2479 triop->op,
2480 subst_Expr(env, triop->arg1),
2481 subst_Expr(env, triop->arg2),
2482 subst_Expr(env, triop->arg3)
2483 );
2484 }
2485
2486 case Iex_Binop:
2487 vassert(isIRAtom(ex->Iex.Binop.arg1));
2488 vassert(isIRAtom(ex->Iex.Binop.arg2));
2489 return IRExpr_Binop(
2490 ex->Iex.Binop.op,
2491 subst_Expr(env, ex->Iex.Binop.arg1),
2492 subst_Expr(env, ex->Iex.Binop.arg2)
2493 );
2494
2495 case Iex_Unop:
2496 vassert(isIRAtom(ex->Iex.Unop.arg));
2497 return IRExpr_Unop(
2498 ex->Iex.Unop.op,
2499 subst_Expr(env, ex->Iex.Unop.arg)
2500 );
2501
2502 case Iex_Load:
2503 vassert(isIRAtom(ex->Iex.Load.addr));
2504 return IRExpr_Load(
2505 ex->Iex.Load.end,
2506 ex->Iex.Load.ty,
2507 subst_Expr(env, ex->Iex.Load.addr)
2508 );
2509
2510 case Iex_CCall: {
2511 Int i;
2512 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2513 for (i = 0; args2[i]; i++) {
2514 vassert(isIRAtom(args2[i]));
2515 args2[i] = subst_Expr(env, args2[i]);
2516 }
2517 return IRExpr_CCall(
2518 ex->Iex.CCall.cee,
2519 ex->Iex.CCall.retty,
2520 args2
2521 );
2522 }
2523
2524 case Iex_ITE:
2525 vassert(isIRAtom(ex->Iex.ITE.cond));
2526 vassert(isIRAtom(ex->Iex.ITE.iftrue));
2527 vassert(isIRAtom(ex->Iex.ITE.iffalse));
2528 return IRExpr_ITE(
2529 subst_Expr(env, ex->Iex.ITE.cond),
2530 subst_Expr(env, ex->Iex.ITE.iftrue),
2531 subst_Expr(env, ex->Iex.ITE.iffalse)
2532 );
2533
2534 default:
2535 vex_printf("\n\n"); ppIRExpr(ex);
2536 vpanic("subst_Expr");
2537
2538 }
2539 }
2540
2541
2542 /* Apply the subst to stmt, then fold the result as much as possible.
2543 Much simplified due to stmt being previously flattened. As a
2544 result of this, the stmt may wind up being turned into a no-op.
2545 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)2546 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2547 {
2548 # if 0
2549 vex_printf("\nsubst and fold stmt\n");
2550 ppIRStmt(st);
2551 vex_printf("\n");
2552 # endif
2553
2554 switch (st->tag) {
2555 case Ist_AbiHint:
2556 vassert(isIRAtom(st->Ist.AbiHint.base));
2557 vassert(isIRAtom(st->Ist.AbiHint.nia));
2558 return IRStmt_AbiHint(
2559 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2560 st->Ist.AbiHint.len,
2561 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2562 );
2563 case Ist_Put:
2564 vassert(isIRAtom(st->Ist.Put.data));
2565 return IRStmt_Put(
2566 st->Ist.Put.offset,
2567 fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2568 );
2569
2570 case Ist_PutI: {
2571 IRPutI *puti, *puti2;
2572 puti = st->Ist.PutI.details;
2573 vassert(isIRAtom(puti->ix));
2574 vassert(isIRAtom(puti->data));
2575 puti2 = mkIRPutI(puti->descr,
2576 fold_Expr(env, subst_Expr(env, puti->ix)),
2577 puti->bias,
2578 fold_Expr(env, subst_Expr(env, puti->data)));
2579 return IRStmt_PutI(puti2);
2580 }
2581
2582 case Ist_WrTmp:
2583 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2584 allowed to be more than just a constant or a tmp. */
2585 return IRStmt_WrTmp(
2586 st->Ist.WrTmp.tmp,
2587 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2588 );
2589
2590 case Ist_Store:
2591 vassert(isIRAtom(st->Ist.Store.addr));
2592 vassert(isIRAtom(st->Ist.Store.data));
2593 return IRStmt_Store(
2594 st->Ist.Store.end,
2595 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2596 fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2597 );
2598
2599 case Ist_StoreG: {
2600 IRStoreG* sg = st->Ist.StoreG.details;
2601 vassert(isIRAtom(sg->addr));
2602 vassert(isIRAtom(sg->data));
2603 vassert(isIRAtom(sg->guard));
2604 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr));
2605 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data));
2606 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2607 if (fguard->tag == Iex_Const) {
2608 /* The condition on this store has folded down to a constant. */
2609 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2610 if (fguard->Iex.Const.con->Ico.U1 == False) {
2611 return IRStmt_NoOp();
2612 } else {
2613 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2614 return IRStmt_Store(sg->end, faddr, fdata);
2615 }
2616 }
2617 return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2618 }
2619
2620 case Ist_LoadG: {
2621 /* This is complicated. If the guard folds down to 'false',
2622 we can replace it with an assignment 'dst := alt', but if
2623 the guard folds down to 'true', we can't conveniently
2624 replace it with an unconditional load, because doing so
2625 requires generating a new temporary, and that is not easy
2626 to do at this point. */
2627 IRLoadG* lg = st->Ist.LoadG.details;
2628 vassert(isIRAtom(lg->addr));
2629 vassert(isIRAtom(lg->alt));
2630 vassert(isIRAtom(lg->guard));
2631 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr));
2632 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt));
2633 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2634 if (fguard->tag == Iex_Const) {
2635 /* The condition on this load has folded down to a constant. */
2636 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2637 if (fguard->Iex.Const.con->Ico.U1 == False) {
2638 /* The load is not going to happen -- instead 'alt' is
2639 assigned to 'dst'. */
2640 return IRStmt_WrTmp(lg->dst, falt);
2641 } else {
2642 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2643 /* The load is always going to happen. We want to
2644 convert to an unconditional load and assign to 'dst'
2645 (IRStmt_WrTmp). Problem is we need an extra temp to
2646 hold the loaded value, but none is available.
2647 Instead, reconstitute the conditional load (with
2648 folded args, of course) and let the caller of this
2649 routine deal with the problem. */
2650 }
2651 }
2652 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2653 }
2654
2655 case Ist_CAS: {
2656 IRCAS *cas, *cas2;
2657 cas = st->Ist.CAS.details;
2658 vassert(isIRAtom(cas->addr));
2659 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2660 vassert(isIRAtom(cas->expdLo));
2661 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2662 vassert(isIRAtom(cas->dataLo));
2663 cas2 = mkIRCAS(
2664 cas->oldHi, cas->oldLo, cas->end,
2665 fold_Expr(env, subst_Expr(env, cas->addr)),
2666 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2667 : NULL,
2668 fold_Expr(env, subst_Expr(env, cas->expdLo)),
2669 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2670 : NULL,
2671 fold_Expr(env, subst_Expr(env, cas->dataLo))
2672 );
2673 return IRStmt_CAS(cas2);
2674 }
2675
2676 case Ist_LLSC:
2677 vassert(isIRAtom(st->Ist.LLSC.addr));
2678 if (st->Ist.LLSC.storedata)
2679 vassert(isIRAtom(st->Ist.LLSC.storedata));
2680 return IRStmt_LLSC(
2681 st->Ist.LLSC.end,
2682 st->Ist.LLSC.result,
2683 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2684 st->Ist.LLSC.storedata
2685 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2686 : NULL
2687 );
2688
2689 case Ist_Dirty: {
2690 Int i;
2691 IRDirty *d, *d2;
2692 d = st->Ist.Dirty.details;
2693 d2 = emptyIRDirty();
2694 *d2 = *d;
2695 d2->args = shallowCopyIRExprVec(d2->args);
2696 if (d2->mFx != Ifx_None) {
2697 vassert(isIRAtom(d2->mAddr));
2698 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2699 }
2700 vassert(isIRAtom(d2->guard));
2701 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2702 for (i = 0; d2->args[i]; i++) {
2703 IRExpr* arg = d2->args[i];
2704 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
2705 vassert(isIRAtom(arg));
2706 d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2707 }
2708 }
2709 return IRStmt_Dirty(d2);
2710 }
2711
2712 case Ist_IMark:
2713 return IRStmt_IMark(st->Ist.IMark.addr,
2714 st->Ist.IMark.len,
2715 st->Ist.IMark.delta);
2716
2717 case Ist_NoOp:
2718 return IRStmt_NoOp();
2719
2720 case Ist_MBE:
2721 return IRStmt_MBE(st->Ist.MBE.event);
2722
2723 case Ist_Exit: {
2724 IRExpr* fcond;
2725 vassert(isIRAtom(st->Ist.Exit.guard));
2726 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2727 if (fcond->tag == Iex_Const) {
2728 /* Interesting. The condition on this exit has folded down to
2729 a constant. */
2730 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2731 if (fcond->Iex.Const.con->Ico.U1 == False) {
2732 /* exit is never going to happen, so dump the statement. */
2733 return IRStmt_NoOp();
2734 } else {
2735 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2736 /* Hmmm. The exit has become unconditional. Leave it
2737 as it is for now, since we'd have to truncate the BB
2738 at this point, which is tricky. Such truncation is
2739 done later by the dead-code elimination pass. */
2740 /* fall out into the reconstruct-the-exit code. */
2741 if (vex_control.iropt_verbosity > 0)
2742 /* really a misuse of vex_control.iropt_verbosity */
2743 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2744 }
2745 }
2746 return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2747 st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2748 }
2749
2750 default:
2751 vex_printf("\n"); ppIRStmt(st);
2752 vpanic("subst_and_fold_Stmt");
2753 }
2754 }
2755
2756
cprop_BB(IRSB * in)2757 IRSB* cprop_BB ( IRSB* in )
2758 {
2759 Int i;
2760 IRSB* out;
2761 IRStmt* st2;
2762 Int n_tmps = in->tyenv->types_used;
2763 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2764 /* Keep track of IRStmt_LoadGs that we need to revisit after
2765 processing all the other statements. */
2766 const Int N_FIXUPS = 16;
2767 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2768 Int n_fixups = 0;
2769
2770 out = emptyIRSB();
2771 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2772
2773 /* Set up the env with which travels forward. This holds a
2774 substitution, mapping IRTemps to IRExprs. The environment
2775 is to be applied as we move along. Keys are IRTemps.
2776 Values are IRExpr*s.
2777 */
2778 for (i = 0; i < n_tmps; i++)
2779 env[i] = NULL;
2780
2781 /* For each original SSA-form stmt ... */
2782 for (i = 0; i < in->stmts_used; i++) {
2783
2784 /* First apply the substitution to the current stmt. This
2785 propagates in any constants and tmp-tmp assignments
2786 accumulated prior to this point. As part of the subst_Stmt
2787 call, also then fold any constant expressions resulting. */
2788
2789 st2 = in->stmts[i];
2790
2791 /* perhaps st2 is already a no-op? */
2792 if (st2->tag == Ist_NoOp) continue;
2793
2794 st2 = subst_and_fold_Stmt( env, st2 );
2795
2796 /* Deal with some post-folding special cases. */
2797 switch (st2->tag) {
2798
2799 /* If the statement has been folded into a no-op, forget
2800 it. */
2801 case Ist_NoOp:
2802 continue;
2803
2804 /* If the statement assigns to an IRTemp add it to the
2805 running environment. This is for the benefit of copy
2806 propagation and to allow sameIRExpr look through
2807 IRTemps. */
2808 case Ist_WrTmp: {
2809 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2810 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2811
2812 /* 't1 = t2' -- don't add to BB; will be optimized out */
2813 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2814 continue;
2815
2816 /* 't = const' && 'const != F64i' -- don't add to BB
2817 Note, we choose not to propagate const when const is an
2818 F64i, so that F64i literals can be CSE'd later. This
2819 helps x86 floating point code generation. */
2820 if (st2->Ist.WrTmp.data->tag == Iex_Const
2821 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2822 continue;
2823 }
2824 /* else add it to the output, as normal */
2825 break;
2826 }
2827
2828 case Ist_LoadG: {
2829 IRLoadG* lg = st2->Ist.LoadG.details;
2830 IRExpr* guard = lg->guard;
2831 if (guard->tag == Iex_Const) {
2832 /* The guard has folded to a constant, and that
2833 constant must be 1:I1, since subst_and_fold_Stmt
2834 folds out the case 0:I1 by itself. */
2835 vassert(guard->Iex.Const.con->tag == Ico_U1);
2836 vassert(guard->Iex.Const.con->Ico.U1 == True);
2837 /* Add a NoOp here as a placeholder, and make a note of
2838 where it is in the output block. Afterwards we'll
2839 come back here and transform the NoOp and the LoadG
2840 into a load-convert pair. The fixups[] entry
2841 refers to the inserted NoOp, and we expect to find
2842 the relevant LoadG immediately after it. */
2843 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2844 if (n_fixups < N_FIXUPS) {
2845 fixups[n_fixups++] = out->stmts_used;
2846 addStmtToIRSB( out, IRStmt_NoOp() );
2847 }
2848 }
2849 /* And always add the LoadG to the output, regardless. */
2850 break;
2851 }
2852
2853 default:
2854 break;
2855 }
2856
2857 /* Not interesting, copy st2 into the output block. */
2858 addStmtToIRSB( out, st2 );
2859 }
2860
2861 # if STATS_IROPT
2862 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
2863 invocation_count, recursion_count, success_count,
2864 recursion_success_count, max_nodes_visited);
2865 # endif
2866
2867 out->next = subst_Expr( env, in->next );
2868 out->jumpkind = in->jumpkind;
2869 out->offsIP = in->offsIP;
2870
2871 /* Process any leftover unconditional LoadGs that we noticed
2872 in the main pass. */
2873 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2874 for (i = 0; i < n_fixups; i++) {
2875 Int ix = fixups[i];
2876 /* Carefully verify that the LoadG has the expected form. */
2877 vassert(ix >= 0 && ix+1 < out->stmts_used);
2878 IRStmt* nop = out->stmts[ix];
2879 IRStmt* lgu = out->stmts[ix+1];
2880 vassert(nop->tag == Ist_NoOp);
2881 vassert(lgu->tag == Ist_LoadG);
2882 IRLoadG* lg = lgu->Ist.LoadG.details;
2883 IRExpr* guard = lg->guard;
2884 vassert(guard->Iex.Const.con->tag == Ico_U1);
2885 vassert(guard->Iex.Const.con->Ico.U1 == True);
2886 /* Figure out the load and result types, and the implied
2887 conversion operation. */
2888 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2889 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2890 IROp cvtOp = Iop_INVALID;
2891 switch (lg->cvt) {
2892 case ILGop_IdentV128:
2893 case ILGop_Ident64:
2894 case ILGop_Ident32: break;
2895 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
2896 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
2897 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2898 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2899 default: vpanic("cprop_BB: unhandled ILGOp");
2900 }
2901 /* Replace the placeholder NoOp by the required unconditional
2902 load. */
2903 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2904 out->stmts[ix]
2905 = IRStmt_WrTmp(tLoaded,
2906 IRExpr_Load(lg->end, cvtArg, lg->addr));
2907 /* Replace the LoadG by a conversion from the loaded value's
2908 type to the required result type. */
2909 out->stmts[ix+1]
2910 = IRStmt_WrTmp(
2911 lg->dst, cvtOp == Iop_INVALID
2912 ? IRExpr_RdTmp(tLoaded)
2913 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2914 }
2915
2916 return out;
2917 }
2918
2919
2920 /*---------------------------------------------------------------*/
2921 /*--- Dead code (t = E) removal ---*/
2922 /*---------------------------------------------------------------*/
2923
2924 /* As a side effect, also removes all code following an unconditional
2925 side exit. */
2926
2927 /* The type of the HashHW map is: a map from IRTemp to nothing
2928 -- really just operating a set or IRTemps.
2929 */
2930
2931 inline
addUses_Temp(Bool * set,IRTemp tmp)2932 static void addUses_Temp ( Bool* set, IRTemp tmp )
2933 {
2934 set[(Int)tmp] = True;
2935 }
2936
addUses_Expr(Bool * set,IRExpr * e)2937 static void addUses_Expr ( Bool* set, IRExpr* e )
2938 {
2939 Int i;
2940 switch (e->tag) {
2941 case Iex_GetI:
2942 addUses_Expr(set, e->Iex.GetI.ix);
2943 return;
2944 case Iex_ITE:
2945 addUses_Expr(set, e->Iex.ITE.cond);
2946 addUses_Expr(set, e->Iex.ITE.iftrue);
2947 addUses_Expr(set, e->Iex.ITE.iffalse);
2948 return;
2949 case Iex_CCall:
2950 for (i = 0; e->Iex.CCall.args[i]; i++)
2951 addUses_Expr(set, e->Iex.CCall.args[i]);
2952 return;
2953 case Iex_Load:
2954 addUses_Expr(set, e->Iex.Load.addr);
2955 return;
2956 case Iex_Qop:
2957 addUses_Expr(set, e->Iex.Qop.details->arg1);
2958 addUses_Expr(set, e->Iex.Qop.details->arg2);
2959 addUses_Expr(set, e->Iex.Qop.details->arg3);
2960 addUses_Expr(set, e->Iex.Qop.details->arg4);
2961 return;
2962 case Iex_Triop:
2963 addUses_Expr(set, e->Iex.Triop.details->arg1);
2964 addUses_Expr(set, e->Iex.Triop.details->arg2);
2965 addUses_Expr(set, e->Iex.Triop.details->arg3);
2966 return;
2967 case Iex_Binop:
2968 addUses_Expr(set, e->Iex.Binop.arg1);
2969 addUses_Expr(set, e->Iex.Binop.arg2);
2970 return;
2971 case Iex_Unop:
2972 addUses_Expr(set, e->Iex.Unop.arg);
2973 return;
2974 case Iex_RdTmp:
2975 addUses_Temp(set, e->Iex.RdTmp.tmp);
2976 return;
2977 case Iex_Const:
2978 case Iex_Get:
2979 return;
2980 default:
2981 vex_printf("\n");
2982 ppIRExpr(e);
2983 vpanic("addUses_Expr");
2984 }
2985 }
2986
addUses_Stmt(Bool * set,IRStmt * st)2987 static void addUses_Stmt ( Bool* set, IRStmt* st )
2988 {
2989 Int i;
2990 IRDirty* d;
2991 IRCAS* cas;
2992 switch (st->tag) {
2993 case Ist_AbiHint:
2994 addUses_Expr(set, st->Ist.AbiHint.base);
2995 addUses_Expr(set, st->Ist.AbiHint.nia);
2996 return;
2997 case Ist_PutI:
2998 addUses_Expr(set, st->Ist.PutI.details->ix);
2999 addUses_Expr(set, st->Ist.PutI.details->data);
3000 return;
3001 case Ist_WrTmp:
3002 addUses_Expr(set, st->Ist.WrTmp.data);
3003 return;
3004 case Ist_Put:
3005 addUses_Expr(set, st->Ist.Put.data);
3006 return;
3007 case Ist_Store:
3008 addUses_Expr(set, st->Ist.Store.addr);
3009 addUses_Expr(set, st->Ist.Store.data);
3010 return;
3011 case Ist_StoreG: {
3012 IRStoreG* sg = st->Ist.StoreG.details;
3013 addUses_Expr(set, sg->addr);
3014 addUses_Expr(set, sg->data);
3015 addUses_Expr(set, sg->guard);
3016 return;
3017 }
3018 case Ist_LoadG: {
3019 IRLoadG* lg = st->Ist.LoadG.details;
3020 addUses_Expr(set, lg->addr);
3021 addUses_Expr(set, lg->alt);
3022 addUses_Expr(set, lg->guard);
3023 return;
3024 }
3025 case Ist_CAS:
3026 cas = st->Ist.CAS.details;
3027 addUses_Expr(set, cas->addr);
3028 if (cas->expdHi)
3029 addUses_Expr(set, cas->expdHi);
3030 addUses_Expr(set, cas->expdLo);
3031 if (cas->dataHi)
3032 addUses_Expr(set, cas->dataHi);
3033 addUses_Expr(set, cas->dataLo);
3034 return;
3035 case Ist_LLSC:
3036 addUses_Expr(set, st->Ist.LLSC.addr);
3037 if (st->Ist.LLSC.storedata)
3038 addUses_Expr(set, st->Ist.LLSC.storedata);
3039 return;
3040 case Ist_Dirty:
3041 d = st->Ist.Dirty.details;
3042 if (d->mFx != Ifx_None)
3043 addUses_Expr(set, d->mAddr);
3044 addUses_Expr(set, d->guard);
3045 for (i = 0; d->args[i] != NULL; i++) {
3046 IRExpr* arg = d->args[i];
3047 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
3048 addUses_Expr(set, arg);
3049 }
3050 return;
3051 case Ist_NoOp:
3052 case Ist_IMark:
3053 case Ist_MBE:
3054 return;
3055 case Ist_Exit:
3056 addUses_Expr(set, st->Ist.Exit.guard);
3057 return;
3058 default:
3059 vex_printf("\n");
3060 ppIRStmt(st);
3061 vpanic("addUses_Stmt");
3062 }
3063 }
3064
3065
3066 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)3067 static Bool isZeroU1 ( IRExpr* e )
3068 {
3069 return toBool( e->tag == Iex_Const
3070 && e->Iex.Const.con->tag == Ico_U1
3071 && e->Iex.Const.con->Ico.U1 == False );
3072 }
3073
3074 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)3075 static Bool isOneU1 ( IRExpr* e )
3076 {
3077 return toBool( e->tag == Iex_Const
3078 && e->Iex.Const.con->tag == Ico_U1
3079 && e->Iex.Const.con->Ico.U1 == True );
3080 }
3081
3082
3083 /* Note, this destructively modifies the given IRSB. */
3084
3085 /* Scan backwards through statements, carrying a set of IRTemps which
3086 are known to be used after the current point. On encountering 't =
3087 E', delete the binding if it is not used. Otherwise, add any temp
3088 uses to the set and keep on moving backwards.
3089
3090 As an enhancement, the first (backwards) pass searches for IR exits
3091 with always-taken conditions and notes the location of the earliest
3092 one in the block. If any such are found, a second pass copies the
3093 exit destination and jump kind to the bb-end. Then, the exit and
3094 all statements following it are turned into no-ops.
3095 */
3096
do_deadcode_BB(IRSB * bb)3097 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3098 {
3099 Int i, i_unconditional_exit;
3100 Int n_tmps = bb->tyenv->types_used;
3101 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3102 IRStmt* st;
3103
3104 for (i = 0; i < n_tmps; i++)
3105 set[i] = False;
3106
3107 /* start off by recording IRTemp uses in the next field. */
3108 addUses_Expr(set, bb->next);
3109
3110 /* First pass */
3111
3112 /* Work backwards through the stmts */
3113 i_unconditional_exit = -1;
3114 for (i = bb->stmts_used-1; i >= 0; i--) {
3115 st = bb->stmts[i];
3116 if (st->tag == Ist_NoOp)
3117 continue;
3118 /* take note of any unconditional exits */
3119 if (st->tag == Ist_Exit
3120 && isOneU1(st->Ist.Exit.guard))
3121 i_unconditional_exit = i;
3122 if (st->tag == Ist_WrTmp
3123 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3124 /* it's an IRTemp which never got used. Delete it. */
3125 if (DEBUG_IROPT) {
3126 vex_printf("DEAD: ");
3127 ppIRStmt(st);
3128 vex_printf("\n");
3129 }
3130 bb->stmts[i] = IRStmt_NoOp();
3131 }
3132 else
3133 if (st->tag == Ist_Dirty
3134 && st->Ist.Dirty.details->guard
3135 && isZeroU1(st->Ist.Dirty.details->guard)) {
3136 /* This is a dirty helper which will never get called.
3137 Delete it. */
3138 bb->stmts[i] = IRStmt_NoOp();
3139 }
3140 else {
3141 /* Note any IRTemp uses made by the current statement. */
3142 addUses_Stmt(set, st);
3143 }
3144 }
3145
3146 /* Optional second pass: if any unconditional exits were found,
3147 delete them and all following statements. */
3148
3149 if (i_unconditional_exit != -1) {
3150 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3151 i_unconditional_exit);
3152 vassert(i_unconditional_exit >= 0
3153 && i_unconditional_exit < bb->stmts_used);
3154 bb->next
3155 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3156 bb->jumpkind
3157 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3158 bb->offsIP
3159 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3160 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3161 bb->stmts[i] = IRStmt_NoOp();
3162 }
3163 }
3164
3165
3166 /*---------------------------------------------------------------*/
3167 /*--- Specialisation of helper function calls, in ---*/
3168 /*--- collaboration with the front end ---*/
3169 /*---------------------------------------------------------------*/
3170
3171 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int))3172 IRSB* spec_helpers_BB(
3173 IRSB* bb,
3174 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3175 )
3176 {
3177 Int i;
3178 IRStmt* st;
3179 IRExpr* ex;
3180 Bool any = False;
3181
3182 for (i = bb->stmts_used-1; i >= 0; i--) {
3183 st = bb->stmts[i];
3184
3185 if (st->tag != Ist_WrTmp
3186 || st->Ist.WrTmp.data->tag != Iex_CCall)
3187 continue;
3188
3189 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3190 st->Ist.WrTmp.data->Iex.CCall.args,
3191 &bb->stmts[0], i );
3192 if (!ex)
3193 /* the front end can't think of a suitable replacement */
3194 continue;
3195
3196 /* We got something better. Install it in the bb. */
3197 any = True;
3198 bb->stmts[i]
3199 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3200
3201 if (0) {
3202 vex_printf("SPEC: ");
3203 ppIRExpr(st->Ist.WrTmp.data);
3204 vex_printf(" --> ");
3205 ppIRExpr(ex);
3206 vex_printf("\n");
3207 }
3208 }
3209
3210 if (any)
3211 bb = flatten_BB(bb);
3212 return bb;
3213 }
3214
3215
3216 /*---------------------------------------------------------------*/
3217 /*--- Determination of guest state aliasing relationships ---*/
3218 /*---------------------------------------------------------------*/
3219
3220 /* These are helper functions for CSE and GetI/PutI transformations.
3221
3222 Determine, to the extent possible, the relationship between two
3223 guest state accesses. The possible outcomes are:
3224
3225 * Exact alias. These two accesses denote precisely the same
3226 piece of the guest state.
3227
3228 * Definitely no alias. These two accesses are guaranteed not to
3229 overlap any part of the guest state.
3230
3231 * Unknown -- if neither of the above can be established.
3232
3233 If in doubt, return Unknown. */
3234
3235 typedef
3236 enum { ExactAlias, NoAlias, UnknownAlias }
3237 GSAliasing;
3238
3239
3240 /* Produces the alias relation between an indexed guest
3241 state access and a non-indexed access. */
3242
3243 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)3244 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3245 Int offset2, IRType ty2 )
3246 {
3247 UInt minoff1, maxoff1, minoff2, maxoff2;
3248
3249 getArrayBounds( descr1, &minoff1, &maxoff1 );
3250 minoff2 = offset2;
3251 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3252
3253 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3254 return NoAlias;
3255
3256 /* Could probably do better here if required. For the moment
3257 however just claim not to know anything more. */
3258 return UnknownAlias;
3259 }
3260
3261
3262 /* Produces the alias relation between two indexed guest state
3263 accesses. */
3264
3265 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)3266 GSAliasing getAliasingRelation_II (
3267 IRRegArray* descr1, IRExpr* ix1, Int bias1,
3268 IRRegArray* descr2, IRExpr* ix2, Int bias2
3269 )
3270 {
3271 UInt minoff1, maxoff1, minoff2, maxoff2;
3272 Int iters;
3273
3274 /* First try hard to show they don't alias. */
3275 getArrayBounds( descr1, &minoff1, &maxoff1 );
3276 getArrayBounds( descr2, &minoff2, &maxoff2 );
3277 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3278 return NoAlias;
3279
3280 /* So the two arrays at least partially overlap. To get any
3281 further we'll have to be sure that the descriptors are
3282 identical. */
3283 if (!eqIRRegArray(descr1, descr2))
3284 return UnknownAlias;
3285
3286 /* The descriptors are identical. Now the only difference can be
3287 in the index expressions. If they cannot be shown to be
3288 identical, we have to say we don't know what the aliasing
3289 relation will be. Now, since the IR is flattened, the index
3290 expressions should be atoms -- either consts or tmps. So that
3291 makes the comparison simple. */
3292 vassert(isIRAtom(ix1));
3293 vassert(isIRAtom(ix2));
3294 if (!eqIRAtom(ix1,ix2))
3295 return UnknownAlias;
3296
3297 /* Ok, the index expressions are identical. So now the only way
3298 they can be different is in the bias. Normalise this
3299 paranoidly, to reliably establish equality/non-equality. */
3300
3301 /* So now we know that the GetI and PutI index the same array
3302 with the same base. Are the offsets the same, modulo the
3303 array size? Do this paranoidly. */
3304 vassert(descr1->nElems == descr2->nElems);
3305 vassert(descr1->elemTy == descr2->elemTy);
3306 vassert(descr1->base == descr2->base);
3307 iters = 0;
3308 while (bias1 < 0 || bias2 < 0) {
3309 bias1 += descr1->nElems;
3310 bias2 += descr1->nElems;
3311 iters++;
3312 if (iters > 10)
3313 vpanic("getAliasingRelation: iters");
3314 }
3315 vassert(bias1 >= 0 && bias2 >= 0);
3316 bias1 %= descr1->nElems;
3317 bias2 %= descr1->nElems;
3318 vassert(bias1 >= 0 && bias1 < descr1->nElems);
3319 vassert(bias2 >= 0 && bias2 < descr1->nElems);
3320
3321 /* Finally, biasP and biasG are normalised into the range
3322 0 .. descrP/G->nElems - 1. And so we can establish
3323 equality/non-equality. */
3324
3325 return bias1==bias2 ? ExactAlias : NoAlias;
3326 }
3327
3328
3329 /*---------------------------------------------------------------*/
3330 /*--- Common Subexpression Elimination ---*/
3331 /*---------------------------------------------------------------*/
3332
3333 /* Expensive in time and space. */
3334
3335 /* Uses two environments:
3336 a IRTemp -> IRTemp mapping
3337 a mapping from AvailExpr* to IRTemp
3338 */
3339
3340 typedef
3341 struct {
3342 enum { TCc, TCt } tag;
3343 union { IRTemp tmp; IRConst* con; } u;
3344 }
3345 TmpOrConst;
3346
eqTmpOrConst(TmpOrConst * tc1,TmpOrConst * tc2)3347 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3348 {
3349 if (tc1->tag != tc2->tag)
3350 return False;
3351 switch (tc1->tag) {
3352 case TCc:
3353 return eqIRConst(tc1->u.con, tc2->u.con);
3354 case TCt:
3355 return tc1->u.tmp == tc2->u.tmp;
3356 default:
3357 vpanic("eqTmpOrConst");
3358 }
3359 }
3360
eqIRCallee(IRCallee * cee1,IRCallee * cee2)3361 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3362 {
3363 Bool eq = cee1->addr == cee2->addr;
3364 if (eq) {
3365 vassert(cee1->regparms == cee2->regparms);
3366 vassert(cee1->mcx_mask == cee2->mcx_mask);
3367 /* Names should be the same too, but we don't bother to
3368 check. */
3369 }
3370 return eq;
3371 }
3372
3373 /* Convert an atomic IRExpr* to a TmpOrConst. */
irExpr_to_TmpOrConst(TmpOrConst * tc,IRExpr * e)3374 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3375 {
3376 switch (e->tag) {
3377 case Iex_RdTmp:
3378 tc->tag = TCt;
3379 tc->u.tmp = e->Iex.RdTmp.tmp;
3380 break;
3381 case Iex_Const:
3382 tc->tag = TCc;
3383 tc->u.con = e->Iex.Const.con;
3384 break;
3385 default:
3386 /* Getting here is a serious error. It means that the
3387 presented arg isn't an IR atom, as it should be. */
3388 vpanic("irExpr_to_TmpOrConst");
3389 }
3390 }
3391
3392 /* Convert a TmpOrConst to an atomic IRExpr*. */
tmpOrConst_to_IRExpr(TmpOrConst * tc)3393 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3394 {
3395 switch (tc->tag) {
3396 case TCc: return IRExpr_Const(tc->u.con);
3397 case TCt: return IRExpr_RdTmp(tc->u.tmp);
3398 default: vpanic("tmpOrConst_to_IRExpr");
3399 }
3400 }
3401
3402 /* Convert a NULL terminated IRExpr* vector to an array of
3403 TmpOrConsts, and a length. */
irExprVec_to_TmpOrConsts(TmpOrConst ** outs,Int * nOuts,IRExpr ** ins)3404 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3405 /*OUT*/Int* nOuts,
3406 IRExpr** ins )
3407 {
3408 Int i, n;
3409 /* We have to make two passes, one to count, one to copy. */
3410 for (n = 0; ins[n]; n++)
3411 ;
3412 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3413 *nOuts = n;
3414 /* and now copy .. */
3415 for (i = 0; i < n; i++) {
3416 IRExpr* arg = ins[i];
3417 TmpOrConst* dst = &(*outs)[i];
3418 irExpr_to_TmpOrConst(dst, arg);
3419 }
3420 }
3421
3422 typedef
3423 struct {
3424 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3425 CCall, Load
3426 } tag;
3427 union {
3428 /* unop(tmp) */
3429 struct {
3430 IROp op;
3431 IRTemp arg;
3432 } Ut;
3433 /* binop(tmp,tmp) */
3434 struct {
3435 IROp op;
3436 IRTemp arg1;
3437 IRTemp arg2;
3438 } Btt;
3439 /* binop(tmp,const) */
3440 struct {
3441 IROp op;
3442 IRTemp arg1;
3443 IRConst con2;
3444 } Btc;
3445 /* binop(const,tmp) */
3446 struct {
3447 IROp op;
3448 IRConst con1;
3449 IRTemp arg2;
3450 } Bct;
3451 /* F64i-style const */
3452 struct {
3453 ULong f64i;
3454 } Cf64i;
3455 /* ITE(tmp,tmp,tmp) */
3456 struct {
3457 IRTemp co;
3458 IRTemp e1;
3459 IRTemp e0;
3460 } Ittt;
3461 /* ITE(tmp,tmp,const) */
3462 struct {
3463 IRTemp co;
3464 IRTemp e1;
3465 IRConst con0;
3466 } Ittc;
3467 /* ITE(tmp,const,tmp) */
3468 struct {
3469 IRTemp co;
3470 IRConst con1;
3471 IRTemp e0;
3472 } Itct;
3473 /* ITE(tmp,const,const) */
3474 struct {
3475 IRTemp co;
3476 IRConst con1;
3477 IRConst con0;
3478 } Itcc;
3479 /* GetI(descr,tmp,bias)*/
3480 struct {
3481 IRRegArray* descr;
3482 IRTemp ix;
3483 Int bias;
3484 } GetIt;
3485 /* Clean helper call */
3486 struct {
3487 IRCallee* cee;
3488 TmpOrConst* args;
3489 Int nArgs;
3490 IRType retty;
3491 } CCall;
3492 /* Load(end,ty,addr) */
3493 struct {
3494 IREndness end;
3495 IRType ty;
3496 TmpOrConst addr;
3497 } Load;
3498 } u;
3499 }
3500 AvailExpr;
3501
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)3502 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3503 {
3504 if (LIKELY(a1->tag != a2->tag))
3505 return False;
3506 switch (a1->tag) {
3507 case Ut:
3508 return toBool(
3509 a1->u.Ut.op == a2->u.Ut.op
3510 && a1->u.Ut.arg == a2->u.Ut.arg);
3511 case Btt:
3512 return toBool(
3513 a1->u.Btt.op == a2->u.Btt.op
3514 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3515 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3516 case Btc:
3517 return toBool(
3518 a1->u.Btc.op == a2->u.Btc.op
3519 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3520 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3521 case Bct:
3522 return toBool(
3523 a1->u.Bct.op == a2->u.Bct.op
3524 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3525 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3526 case Cf64i:
3527 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3528 case Ittt:
3529 return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3530 && a1->u.Ittt.e1 == a2->u.Ittt.e1
3531 && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3532 case Ittc:
3533 return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3534 && a1->u.Ittc.e1 == a2->u.Ittc.e1
3535 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3536 case Itct:
3537 return toBool(a1->u.Itct.co == a2->u.Itct.co
3538 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3539 && a1->u.Itct.e0 == a2->u.Itct.e0);
3540 case Itcc:
3541 return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3542 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3543 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3544 case GetIt:
3545 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3546 && a1->u.GetIt.ix == a2->u.GetIt.ix
3547 && a1->u.GetIt.bias == a2->u.GetIt.bias);
3548 case CCall: {
3549 Int i, n;
3550 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3551 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3552 if (eq) {
3553 n = a1->u.CCall.nArgs;
3554 for (i = 0; i < n; i++) {
3555 if (!eqTmpOrConst( &a1->u.CCall.args[i],
3556 &a2->u.CCall.args[i] )) {
3557 eq = False;
3558 break;
3559 }
3560 }
3561 }
3562 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3563 return eq;
3564 }
3565 case Load: {
3566 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3567 && a1->u.Load.ty == a2->u.Load.ty
3568 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3569 return eq;
3570 }
3571 default:
3572 vpanic("eq_AvailExpr");
3573 }
3574 }
3575
availExpr_to_IRExpr(AvailExpr * ae)3576 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3577 {
3578 IRConst *con, *con0, *con1;
3579 switch (ae->tag) {
3580 case Ut:
3581 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3582 case Btt:
3583 return IRExpr_Binop( ae->u.Btt.op,
3584 IRExpr_RdTmp(ae->u.Btt.arg1),
3585 IRExpr_RdTmp(ae->u.Btt.arg2) );
3586 case Btc:
3587 con = LibVEX_Alloc_inline(sizeof(IRConst));
3588 *con = ae->u.Btc.con2;
3589 return IRExpr_Binop( ae->u.Btc.op,
3590 IRExpr_RdTmp(ae->u.Btc.arg1),
3591 IRExpr_Const(con) );
3592 case Bct:
3593 con = LibVEX_Alloc_inline(sizeof(IRConst));
3594 *con = ae->u.Bct.con1;
3595 return IRExpr_Binop( ae->u.Bct.op,
3596 IRExpr_Const(con),
3597 IRExpr_RdTmp(ae->u.Bct.arg2) );
3598 case Cf64i:
3599 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3600 case Ittt:
3601 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3602 IRExpr_RdTmp(ae->u.Ittt.e1),
3603 IRExpr_RdTmp(ae->u.Ittt.e0));
3604 case Ittc:
3605 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3606 *con0 = ae->u.Ittc.con0;
3607 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3608 IRExpr_RdTmp(ae->u.Ittc.e1),
3609 IRExpr_Const(con0));
3610 case Itct:
3611 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3612 *con1 = ae->u.Itct.con1;
3613 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3614 IRExpr_Const(con1),
3615 IRExpr_RdTmp(ae->u.Itct.e0));
3616
3617 case Itcc:
3618 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3619 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3620 *con0 = ae->u.Itcc.con0;
3621 *con1 = ae->u.Itcc.con1;
3622 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3623 IRExpr_Const(con1),
3624 IRExpr_Const(con0));
3625 case GetIt:
3626 return IRExpr_GetI(ae->u.GetIt.descr,
3627 IRExpr_RdTmp(ae->u.GetIt.ix),
3628 ae->u.GetIt.bias);
3629 case CCall: {
3630 Int i, n = ae->u.CCall.nArgs;
3631 vassert(n >= 0);
3632 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3633 vec[n] = NULL;
3634 for (i = 0; i < n; i++) {
3635 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3636 }
3637 return IRExpr_CCall(ae->u.CCall.cee,
3638 ae->u.CCall.retty,
3639 vec);
3640 }
3641 case Load:
3642 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3643 tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3644 default:
3645 vpanic("availExpr_to_IRExpr");
3646 }
3647 }
3648
3649 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)3650 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3651 {
3652 HWord res;
3653 /* env :: IRTemp -> IRTemp */
3654 if (lookupHHW( env, &res, (HWord)tmp ))
3655 return (IRTemp)res;
3656 else
3657 return tmp;
3658 }
3659
3660 inline
subst_AvailExpr_TmpOrConst(TmpOrConst * tc,HashHW * env)3661 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3662 HashHW* env )
3663 {
3664 /* env :: IRTemp -> IRTemp */
3665 if (tc->tag == TCt) {
3666 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3667 }
3668 }
3669
subst_AvailExpr(HashHW * env,AvailExpr * ae)3670 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3671 {
3672 /* env :: IRTemp -> IRTemp */
3673 switch (ae->tag) {
3674 case Ut:
3675 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3676 break;
3677 case Btt:
3678 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3679 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3680 break;
3681 case Btc:
3682 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3683 break;
3684 case Bct:
3685 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3686 break;
3687 case Cf64i:
3688 break;
3689 case Ittt:
3690 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3691 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3692 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3693 break;
3694 case Ittc:
3695 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3696 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3697 break;
3698 case Itct:
3699 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3700 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3701 break;
3702 case Itcc:
3703 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3704 break;
3705 case GetIt:
3706 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3707 break;
3708 case CCall: {
3709 Int i, n = ae->u.CCall.nArgs;;
3710 for (i = 0; i < n; i++) {
3711 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3712 }
3713 break;
3714 }
3715 case Load:
3716 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3717 break;
3718 default:
3719 vpanic("subst_AvailExpr");
3720 }
3721 }
3722
irExpr_to_AvailExpr(IRExpr * e,Bool allowLoadsToBeCSEd)3723 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3724 {
3725 AvailExpr* ae;
3726
3727 switch (e->tag) {
3728 case Iex_Unop:
3729 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3730 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3731 ae->tag = Ut;
3732 ae->u.Ut.op = e->Iex.Unop.op;
3733 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3734 return ae;
3735 }
3736 break;
3737
3738 case Iex_Binop:
3739 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3740 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3741 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3742 ae->tag = Btt;
3743 ae->u.Btt.op = e->Iex.Binop.op;
3744 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3745 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3746 return ae;
3747 }
3748 if (e->Iex.Binop.arg2->tag == Iex_Const) {
3749 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3750 ae->tag = Btc;
3751 ae->u.Btc.op = e->Iex.Binop.op;
3752 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3753 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3754 return ae;
3755 }
3756 } else if (e->Iex.Binop.arg1->tag == Iex_Const
3757 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3758 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3759 ae->tag = Bct;
3760 ae->u.Bct.op = e->Iex.Binop.op;
3761 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3762 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3763 return ae;
3764 }
3765 break;
3766
3767 case Iex_Const:
3768 if (e->Iex.Const.con->tag == Ico_F64i) {
3769 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3770 ae->tag = Cf64i;
3771 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3772 return ae;
3773 }
3774 break;
3775
3776 case Iex_ITE:
3777 if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3778 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3779 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3780 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3781 ae->tag = Ittt;
3782 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3783 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3784 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3785 return ae;
3786 }
3787 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3788 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3789 ae->tag = Itct;
3790 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3791 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3792 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3793 return ae;
3794 }
3795 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3796 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3797 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3798 ae->tag = Ittc;
3799 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3800 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3801 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3802 return ae;
3803 }
3804 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3805 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3806 ae->tag = Itcc;
3807 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3808 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3809 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3810 return ae;
3811 }
3812 }
3813 }
3814 break;
3815
3816 case Iex_GetI:
3817 if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3818 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3819 ae->tag = GetIt;
3820 ae->u.GetIt.descr = e->Iex.GetI.descr;
3821 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3822 ae->u.GetIt.bias = e->Iex.GetI.bias;
3823 return ae;
3824 }
3825 break;
3826
3827 case Iex_CCall:
3828 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3829 ae->tag = CCall;
3830 /* Ok to share only the cee, since it is immutable. */
3831 ae->u.CCall.cee = e->Iex.CCall.cee;
3832 ae->u.CCall.retty = e->Iex.CCall.retty;
3833 /* irExprVec_to_TmpOrConsts will assert if the args are
3834 neither tmps nor constants, but that's ok .. that's all they
3835 should be. */
3836 irExprVec_to_TmpOrConsts(
3837 &ae->u.CCall.args, &ae->u.CCall.nArgs,
3838 e->Iex.CCall.args
3839 );
3840 return ae;
3841
3842 case Iex_Load:
3843 /* If the caller of do_cse_BB has requested that loads also
3844 be CSEd, convert them into AvailExprs. If not, we'll just
3845 return NULL here, and the load never becomes considered
3846 "available", which effectively disables CSEing of them, as
3847 desired. */
3848 if (allowLoadsToBeCSEd) {
3849 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3850 ae->tag = Load;
3851 ae->u.Load.end = e->Iex.Load.end;
3852 ae->u.Load.ty = e->Iex.Load.ty;
3853 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3854 return ae;
3855 }
3856 break;
3857
3858 default:
3859 break;
3860 }
3861
3862 return NULL;
3863 }
3864
3865
3866 /* The BB is modified in-place. Returns True if any changes were
3867 made. The caller can choose whether or not loads should be CSEd.
3868 In the normal course of things we don't do that, since CSEing loads
3869 is something of a dodgy proposition if the guest program is doing
3870 some screwy stuff to do with races and spinloops. */
3871
do_cse_BB(IRSB * bb,Bool allowLoadsToBeCSEd)3872 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3873 {
3874 Int i, j, paranoia;
3875 IRTemp t, q;
3876 IRStmt* st;
3877 AvailExpr* eprime;
3878 AvailExpr* ae;
3879 Bool invalidate;
3880 Bool anyDone = False;
3881
3882 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3883 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3884
3885 vassert(sizeof(IRTemp) <= sizeof(HWord));
3886
3887 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3888
3889 /* Iterate forwards over the stmts.
3890 On seeing "t = E", where E is one of the AvailExpr forms:
3891 let E' = apply tenv substitution to E
3892 search aenv for E'
3893 if a mapping E' -> q is found,
3894 replace this stmt by "t = q"
3895 and add binding t -> q to tenv
3896 else
3897 add binding E' -> t to aenv
3898 replace this stmt by "t = E'"
3899
3900 Other statements are only interesting to the extent that they
3901 might invalidate some of the expressions in aenv. So there is
3902 an invalidate-bindings check for each statement seen.
3903 */
3904 for (i = 0; i < bb->stmts_used; i++) {
3905 st = bb->stmts[i];
3906
3907 /* ------ BEGIN invalidate aenv bindings ------ */
3908 /* This is critical: remove from aenv any E' -> .. bindings
3909 which might be invalidated by this statement. The only
3910 vulnerable kind of bindings are the GetI and Load kinds.
3911 Dirty call - dump (paranoia level -> 2)
3912 Store - dump (ditto)
3913 Put, PutI - dump unless no-overlap is proven (.. -> 1)
3914 Uses getAliasingRelation_IC and getAliasingRelation_II
3915 to do the no-overlap assessments needed for Put/PutI.
3916 */
3917 switch (st->tag) {
3918 case Ist_Dirty: case Ist_Store: case Ist_MBE:
3919 case Ist_CAS: case Ist_LLSC:
3920 case Ist_StoreG:
3921 paranoia = 2; break;
3922 case Ist_Put: case Ist_PutI:
3923 paranoia = 1; break;
3924 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3925 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3926 paranoia = 0; break;
3927 default:
3928 vpanic("do_cse_BB(1)");
3929 }
3930
3931 if (paranoia > 0) {
3932 for (j = 0; j < aenv->used; j++) {
3933 if (!aenv->inuse[j])
3934 continue;
3935 ae = (AvailExpr*)aenv->key[j];
3936 if (ae->tag != GetIt && ae->tag != Load)
3937 continue;
3938 invalidate = False;
3939 if (paranoia >= 2) {
3940 invalidate = True;
3941 } else {
3942 vassert(paranoia == 1);
3943 if (ae->tag == Load) {
3944 /* Loads can be invalidated by anything that could
3945 possibly touch memory. But in that case we
3946 should have |paranoia| == 2 and we won't get
3947 here. So there's nothing to do; we don't have to
3948 invalidate the load. */
3949 }
3950 else
3951 if (st->tag == Ist_Put) {
3952 if (getAliasingRelation_IC(
3953 ae->u.GetIt.descr,
3954 IRExpr_RdTmp(ae->u.GetIt.ix),
3955 st->Ist.Put.offset,
3956 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3957 ) != NoAlias)
3958 invalidate = True;
3959 }
3960 else
3961 if (st->tag == Ist_PutI) {
3962 IRPutI *puti = st->Ist.PutI.details;
3963 if (getAliasingRelation_II(
3964 ae->u.GetIt.descr,
3965 IRExpr_RdTmp(ae->u.GetIt.ix),
3966 ae->u.GetIt.bias,
3967 puti->descr,
3968 puti->ix,
3969 puti->bias
3970 ) != NoAlias)
3971 invalidate = True;
3972 }
3973 else
3974 vpanic("do_cse_BB(2)");
3975 }
3976
3977 if (invalidate) {
3978 aenv->inuse[j] = False;
3979 aenv->key[j] = (HWord)NULL; /* be sure */
3980 }
3981 } /* for j */
3982 } /* paranoia > 0 */
3983
3984 /* ------ ENV invalidate aenv bindings ------ */
3985
3986 /* ignore not-interestings */
3987 if (st->tag != Ist_WrTmp)
3988 continue;
3989
3990 t = st->Ist.WrTmp.tmp;
3991 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3992 /* ignore if not of AvailExpr form */
3993 if (!eprime)
3994 continue;
3995
3996 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3997
3998 /* apply tenv */
3999 subst_AvailExpr( tenv, eprime );
4000
4001 /* search aenv for eprime, unfortunately the hard way */
4002 for (j = 0; j < aenv->used; j++)
4003 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4004 break;
4005
4006 if (j < aenv->used) {
4007 /* A binding E' -> q was found. Replace stmt by "t = q" and
4008 note the t->q binding in tenv. */
4009 /* (this is the core of the CSE action) */
4010 q = (IRTemp)aenv->val[j];
4011 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4012 addToHHW( tenv, (HWord)t, (HWord)q );
4013 anyDone = True;
4014 } else {
4015 /* No binding was found, so instead we add E' -> t to our
4016 collection of available expressions, replace this stmt
4017 with "t = E'", and move on. */
4018 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4019 addToHHW( aenv, (HWord)eprime, (HWord)t );
4020 }
4021 }
4022
4023 /*
4024 ppIRSB(bb);
4025 sanityCheckIRSB(bb, Ity_I32);
4026 vex_printf("\n\n");
4027 */
4028 return anyDone;
4029 }
4030
4031
4032 /*---------------------------------------------------------------*/
4033 /*--- Add32/Sub32 chain collapsing ---*/
4034 /*---------------------------------------------------------------*/
4035
4036 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4037
4038 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4039 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4040 root node is Add32, not Sub32. */
4041
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)4042 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4043 {
4044 if (e->tag != Iex_Binop)
4045 return False;
4046 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4047 return False;
4048 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4049 return False;
4050 if (e->Iex.Binop.arg2->tag != Iex_Const)
4051 return False;
4052 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4053 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4054 if (e->Iex.Binop.op == Iop_Sub32)
4055 *i32 = -*i32;
4056 return True;
4057 }
4058
4059
4060 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4061 other tmp2. Scan backwards from the specified start point -- an
4062 optimisation. */
4063
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)4064 static Bool collapseChain ( IRSB* bb, Int startHere,
4065 IRTemp tmp,
4066 IRTemp* tmp2, Int* i32 )
4067 {
4068 Int j, ii;
4069 IRTemp vv;
4070 IRStmt* st;
4071 IRExpr* e;
4072
4073 /* the (var, con) pair contain the current 'representation' for
4074 'tmp'. We start with 'tmp + 0'. */
4075 IRTemp var = tmp;
4076 Int con = 0;
4077
4078 /* Scan backwards to see if tmp can be replaced by some other tmp
4079 +/- a constant. */
4080 for (j = startHere; j >= 0; j--) {
4081 st = bb->stmts[j];
4082 if (st->tag != Ist_WrTmp)
4083 continue;
4084 if (st->Ist.WrTmp.tmp != var)
4085 continue;
4086 e = st->Ist.WrTmp.data;
4087 if (!isAdd32OrSub32(e, &vv, &ii))
4088 break;
4089 var = vv;
4090 con += ii;
4091 }
4092 if (j == -1)
4093 /* no earlier binding for var .. ill-formed IR */
4094 vpanic("collapseChain");
4095
4096 /* so, did we find anything interesting? */
4097 if (var == tmp)
4098 return False; /* no .. */
4099
4100 *tmp2 = var;
4101 *i32 = con;
4102 return True;
4103 }
4104
4105
4106 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4107
collapse_AddSub_chains_BB(IRSB * bb)4108 static void collapse_AddSub_chains_BB ( IRSB* bb )
4109 {
4110 IRStmt *st;
4111 IRTemp var, var2;
4112 Int i, con, con2;
4113
4114 for (i = bb->stmts_used-1; i >= 0; i--) {
4115 st = bb->stmts[i];
4116 if (st->tag == Ist_NoOp)
4117 continue;
4118
4119 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4120
4121 if (st->tag == Ist_WrTmp
4122 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4123
4124 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4125 Find out if var can be expressed as var2 + con2. */
4126 if (collapseChain(bb, i-1, var, &var2, &con2)) {
4127 if (DEBUG_IROPT) {
4128 vex_printf("replacing1 ");
4129 ppIRStmt(st);
4130 vex_printf(" with ");
4131 }
4132 con2 += con;
4133 bb->stmts[i]
4134 = IRStmt_WrTmp(
4135 st->Ist.WrTmp.tmp,
4136 (con2 >= 0)
4137 ? IRExpr_Binop(Iop_Add32,
4138 IRExpr_RdTmp(var2),
4139 IRExpr_Const(IRConst_U32(con2)))
4140 : IRExpr_Binop(Iop_Sub32,
4141 IRExpr_RdTmp(var2),
4142 IRExpr_Const(IRConst_U32(-con2)))
4143 );
4144 if (DEBUG_IROPT) {
4145 ppIRStmt(bb->stmts[i]);
4146 vex_printf("\n");
4147 }
4148 }
4149
4150 continue;
4151 }
4152
4153 /* Try to collapse 't1 = GetI[t2, con]'. */
4154
4155 if (st->tag == Ist_WrTmp
4156 && st->Ist.WrTmp.data->tag == Iex_GetI
4157 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4158 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4159 ->Iex.RdTmp.tmp, &var2, &con2)) {
4160 if (DEBUG_IROPT) {
4161 vex_printf("replacing3 ");
4162 ppIRStmt(st);
4163 vex_printf(" with ");
4164 }
4165 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4166 bb->stmts[i]
4167 = IRStmt_WrTmp(
4168 st->Ist.WrTmp.tmp,
4169 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4170 IRExpr_RdTmp(var2),
4171 con2));
4172 if (DEBUG_IROPT) {
4173 ppIRStmt(bb->stmts[i]);
4174 vex_printf("\n");
4175 }
4176 continue;
4177 }
4178
4179 /* Perhaps st is PutI[t, con] ? */
4180 IRPutI *puti = st->Ist.PutI.details;
4181 if (st->tag == Ist_PutI
4182 && puti->ix->tag == Iex_RdTmp
4183 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4184 &var2, &con2)) {
4185 if (DEBUG_IROPT) {
4186 vex_printf("replacing2 ");
4187 ppIRStmt(st);
4188 vex_printf(" with ");
4189 }
4190 con2 += puti->bias;
4191 bb->stmts[i]
4192 = IRStmt_PutI(mkIRPutI(puti->descr,
4193 IRExpr_RdTmp(var2),
4194 con2,
4195 puti->data));
4196 if (DEBUG_IROPT) {
4197 ppIRStmt(bb->stmts[i]);
4198 vex_printf("\n");
4199 }
4200 continue;
4201 }
4202
4203 } /* for */
4204 }
4205
4206
4207 /*---------------------------------------------------------------*/
4208 /*--- PutI/GetI transformations ---*/
4209 /*---------------------------------------------------------------*/
4210
4211 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4212 the given starting point to find, if any, a PutI which writes
4213 exactly the same piece of guest state, and so return the expression
4214 that the PutI writes. This is the core of PutI-GetI forwarding. */
4215
4216 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)4217 IRExpr* findPutI ( IRSB* bb, Int startHere,
4218 IRRegArray* descrG, IRExpr* ixG, Int biasG )
4219 {
4220 Int j;
4221 IRStmt* st;
4222 GSAliasing relation;
4223
4224 if (0) {
4225 vex_printf("\nfindPutI ");
4226 ppIRRegArray(descrG);
4227 vex_printf(" ");
4228 ppIRExpr(ixG);
4229 vex_printf(" %d\n", biasG);
4230 }
4231
4232 /* Scan backwards in bb from startHere to find a suitable PutI
4233 binding for (descrG, ixG, biasG), if any. */
4234
4235 for (j = startHere; j >= 0; j--) {
4236 st = bb->stmts[j];
4237 if (st->tag == Ist_NoOp)
4238 continue;
4239
4240 if (st->tag == Ist_Put) {
4241 /* Non-indexed Put. This can't give a binding, but we do
4242 need to check it doesn't invalidate the search by
4243 overlapping any part of the indexed guest state. */
4244
4245 relation
4246 = getAliasingRelation_IC(
4247 descrG, ixG,
4248 st->Ist.Put.offset,
4249 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4250
4251 if (relation == NoAlias) {
4252 /* we're OK; keep going */
4253 continue;
4254 } else {
4255 /* relation == UnknownAlias || relation == ExactAlias */
4256 /* If this assertion fails, we've found a Put which writes
4257 an area of guest state which is read by a GetI. Which
4258 is unlikely (although not per se wrong). */
4259 vassert(relation != ExactAlias);
4260 /* This Put potentially writes guest state that the GetI
4261 reads; we must fail. */
4262 return NULL;
4263 }
4264 }
4265
4266 if (st->tag == Ist_PutI) {
4267 IRPutI *puti = st->Ist.PutI.details;
4268
4269 relation = getAliasingRelation_II(
4270 descrG, ixG, biasG,
4271 puti->descr,
4272 puti->ix,
4273 puti->bias
4274 );
4275
4276 if (relation == NoAlias) {
4277 /* This PutI definitely doesn't overlap. Ignore it and
4278 keep going. */
4279 continue; /* the for j loop */
4280 }
4281
4282 if (relation == UnknownAlias) {
4283 /* We don't know if this PutI writes to the same guest
4284 state that the GetI, or not. So we have to give up. */
4285 return NULL;
4286 }
4287
4288 /* Otherwise, we've found what we're looking for. */
4289 vassert(relation == ExactAlias);
4290 return puti->data;
4291
4292 } /* if (st->tag == Ist_PutI) */
4293
4294 if (st->tag == Ist_Dirty) {
4295 /* Be conservative. If the dirty call has any guest effects at
4296 all, give up. We could do better -- only give up if there
4297 are any guest writes/modifies. */
4298 if (st->Ist.Dirty.details->nFxState > 0)
4299 return NULL;
4300 }
4301
4302 } /* for */
4303
4304 /* No valid replacement was found. */
4305 return NULL;
4306 }
4307
4308
4309
4310 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4311 that it writes exactly the same piece of guest state) ? Safe
4312 answer: False. */
4313
identicalPutIs(IRStmt * pi,IRStmt * s2)4314 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4315 {
4316 vassert(pi->tag == Ist_PutI);
4317 if (s2->tag != Ist_PutI)
4318 return False;
4319
4320 IRPutI *p1 = pi->Ist.PutI.details;
4321 IRPutI *p2 = s2->Ist.PutI.details;
4322
4323 return toBool(
4324 getAliasingRelation_II(
4325 p1->descr, p1->ix, p1->bias,
4326 p2->descr, p2->ix, p2->bias
4327 )
4328 == ExactAlias
4329 );
4330 }
4331
4332
4333 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4334 overlap it? Safe answer: True. Note, we could do a lot better
4335 than this if needed. */
4336
4337 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)4338 Bool guestAccessWhichMightOverlapPutI (
4339 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4340 )
4341 {
4342 GSAliasing relation;
4343 UInt minoffP, maxoffP;
4344
4345 vassert(pi->tag == Ist_PutI);
4346
4347 IRPutI *p1 = pi->Ist.PutI.details;
4348
4349 getArrayBounds(p1->descr, &minoffP, &maxoffP);
4350 switch (s2->tag) {
4351
4352 case Ist_NoOp:
4353 case Ist_IMark:
4354 return False;
4355
4356 case Ist_MBE:
4357 case Ist_AbiHint:
4358 /* just be paranoid ... these should be rare. */
4359 return True;
4360
4361 case Ist_CAS:
4362 /* This is unbelievably lame, but it's probably not
4363 significant from a performance point of view. Really, a
4364 CAS is a load-store op, so it should be safe to say False.
4365 However .. */
4366 return True;
4367
4368 case Ist_Dirty:
4369 /* If the dirty call has any guest effects at all, give up.
4370 Probably could do better. */
4371 if (s2->Ist.Dirty.details->nFxState > 0)
4372 return True;
4373 return False;
4374
4375 case Ist_Put:
4376 vassert(isIRAtom(s2->Ist.Put.data));
4377 relation
4378 = getAliasingRelation_IC(
4379 p1->descr, p1->ix,
4380 s2->Ist.Put.offset,
4381 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4382 );
4383 goto have_relation;
4384
4385 case Ist_PutI: {
4386 IRPutI *p2 = s2->Ist.PutI.details;
4387
4388 vassert(isIRAtom(p2->ix));
4389 vassert(isIRAtom(p2->data));
4390 relation
4391 = getAliasingRelation_II(
4392 p1->descr, p1->ix, p1->bias,
4393 p2->descr, p2->ix, p2->bias
4394 );
4395 goto have_relation;
4396 }
4397
4398 case Ist_WrTmp:
4399 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4400 relation
4401 = getAliasingRelation_II(
4402 p1->descr, p1->ix, p1->bias,
4403 s2->Ist.WrTmp.data->Iex.GetI.descr,
4404 s2->Ist.WrTmp.data->Iex.GetI.ix,
4405 s2->Ist.WrTmp.data->Iex.GetI.bias
4406 );
4407 goto have_relation;
4408 }
4409 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4410 relation
4411 = getAliasingRelation_IC(
4412 p1->descr, p1->ix,
4413 s2->Ist.WrTmp.data->Iex.Get.offset,
4414 s2->Ist.WrTmp.data->Iex.Get.ty
4415 );
4416 goto have_relation;
4417 }
4418 return False;
4419
4420 case Ist_Store:
4421 vassert(isIRAtom(s2->Ist.Store.addr));
4422 vassert(isIRAtom(s2->Ist.Store.data));
4423 return False;
4424
4425 default:
4426 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4427 vpanic("guestAccessWhichMightOverlapPutI");
4428 }
4429
4430 have_relation:
4431 if (relation == NoAlias)
4432 return False;
4433 else
4434 return True; /* ExactAlias or UnknownAlias */
4435 }
4436
4437
4438
4439 /* ---------- PutI/GetI transformations main functions --------- */
4440
4441 /* Remove redundant GetIs, to the extent that they can be detected.
4442 bb is modified in-place. */
4443
4444 static
do_redundant_GetI_elimination(IRSB * bb)4445 void do_redundant_GetI_elimination ( IRSB* bb )
4446 {
4447 Int i;
4448 IRStmt* st;
4449
4450 for (i = bb->stmts_used-1; i >= 0; i--) {
4451 st = bb->stmts[i];
4452 if (st->tag == Ist_NoOp)
4453 continue;
4454
4455 if (st->tag == Ist_WrTmp
4456 && st->Ist.WrTmp.data->tag == Iex_GetI
4457 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4458 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4459 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
4460 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
4461 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
4462 if (replacement
4463 && isIRAtom(replacement)
4464 /* Make sure we're doing a type-safe transformation! */
4465 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4466 if (DEBUG_IROPT) {
4467 vex_printf("rGI: ");
4468 ppIRExpr(st->Ist.WrTmp.data);
4469 vex_printf(" -> ");
4470 ppIRExpr(replacement);
4471 vex_printf("\n");
4472 }
4473 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4474 }
4475 }
4476 }
4477
4478 }
4479
4480
4481 /* Remove redundant PutIs, to the extent which they can be detected.
4482 bb is modified in-place. */
4483
4484 static
do_redundant_PutI_elimination(IRSB * bb,VexRegisterUpdates pxControl)4485 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4486 {
4487 Int i, j;
4488 Bool delete;
4489 IRStmt *st, *stj;
4490
4491 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4492
4493 for (i = 0; i < bb->stmts_used; i++) {
4494 st = bb->stmts[i];
4495 if (st->tag != Ist_PutI)
4496 continue;
4497 /* Ok, search forwards from here to see if we can find another
4498 PutI which makes this one redundant, and dodging various
4499 hazards. Search forwards:
4500 * If conditional exit, give up (because anything after that
4501 does not postdominate this put).
4502 * If a Get which might overlap, give up (because this PutI
4503 not necessarily dead).
4504 * If a Put which is identical, stop with success.
4505 * If a Put which might overlap, but is not identical, give up.
4506 * If a dirty helper call which might write guest state, give up.
4507 * If a Put which definitely doesn't overlap, or any other
4508 kind of stmt, continue.
4509 */
4510 delete = False;
4511 for (j = i+1; j < bb->stmts_used; j++) {
4512 stj = bb->stmts[j];
4513 if (stj->tag == Ist_NoOp)
4514 continue;
4515 if (identicalPutIs(st, stj)) {
4516 /* success! */
4517 delete = True;
4518 break;
4519 }
4520 if (stj->tag == Ist_Exit)
4521 /* give up */
4522 break;
4523 if (st->tag == Ist_Dirty)
4524 /* give up; could do better here */
4525 break;
4526 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4527 /* give up */
4528 break;
4529 }
4530
4531 if (delete) {
4532 if (DEBUG_IROPT) {
4533 vex_printf("rPI: ");
4534 ppIRStmt(st);
4535 vex_printf("\n");
4536 }
4537 bb->stmts[i] = IRStmt_NoOp();
4538 }
4539
4540 }
4541 }
4542
4543
4544 /*---------------------------------------------------------------*/
4545 /*--- Loop unrolling ---*/
4546 /*---------------------------------------------------------------*/
4547
4548 /* Adjust all tmp values (names) in e by delta. e is destructively
4549 modified. */
4550
deltaIRExpr(IRExpr * e,Int delta)4551 static void deltaIRExpr ( IRExpr* e, Int delta )
4552 {
4553 Int i;
4554 switch (e->tag) {
4555 case Iex_RdTmp:
4556 e->Iex.RdTmp.tmp += delta;
4557 break;
4558 case Iex_Get:
4559 case Iex_Const:
4560 break;
4561 case Iex_GetI:
4562 deltaIRExpr(e->Iex.GetI.ix, delta);
4563 break;
4564 case Iex_Qop:
4565 deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4566 deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4567 deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4568 deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4569 break;
4570 case Iex_Triop:
4571 deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4572 deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4573 deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4574 break;
4575 case Iex_Binop:
4576 deltaIRExpr(e->Iex.Binop.arg1, delta);
4577 deltaIRExpr(e->Iex.Binop.arg2, delta);
4578 break;
4579 case Iex_Unop:
4580 deltaIRExpr(e->Iex.Unop.arg, delta);
4581 break;
4582 case Iex_Load:
4583 deltaIRExpr(e->Iex.Load.addr, delta);
4584 break;
4585 case Iex_CCall:
4586 for (i = 0; e->Iex.CCall.args[i]; i++)
4587 deltaIRExpr(e->Iex.CCall.args[i], delta);
4588 break;
4589 case Iex_ITE:
4590 deltaIRExpr(e->Iex.ITE.cond, delta);
4591 deltaIRExpr(e->Iex.ITE.iftrue, delta);
4592 deltaIRExpr(e->Iex.ITE.iffalse, delta);
4593 break;
4594 default:
4595 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4596 vpanic("deltaIRExpr");
4597 }
4598 }
4599
4600 /* Adjust all tmp values (names) in st by delta. st is destructively
4601 modified. */
4602
deltaIRStmt(IRStmt * st,Int delta)4603 static void deltaIRStmt ( IRStmt* st, Int delta )
4604 {
4605 Int i;
4606 IRDirty* d;
4607 switch (st->tag) {
4608 case Ist_NoOp:
4609 case Ist_IMark:
4610 case Ist_MBE:
4611 break;
4612 case Ist_AbiHint:
4613 deltaIRExpr(st->Ist.AbiHint.base, delta);
4614 deltaIRExpr(st->Ist.AbiHint.nia, delta);
4615 break;
4616 case Ist_Put:
4617 deltaIRExpr(st->Ist.Put.data, delta);
4618 break;
4619 case Ist_PutI:
4620 deltaIRExpr(st->Ist.PutI.details->ix, delta);
4621 deltaIRExpr(st->Ist.PutI.details->data, delta);
4622 break;
4623 case Ist_WrTmp:
4624 st->Ist.WrTmp.tmp += delta;
4625 deltaIRExpr(st->Ist.WrTmp.data, delta);
4626 break;
4627 case Ist_Exit:
4628 deltaIRExpr(st->Ist.Exit.guard, delta);
4629 break;
4630 case Ist_Store:
4631 deltaIRExpr(st->Ist.Store.addr, delta);
4632 deltaIRExpr(st->Ist.Store.data, delta);
4633 break;
4634 case Ist_StoreG: {
4635 IRStoreG* sg = st->Ist.StoreG.details;
4636 deltaIRExpr(sg->addr, delta);
4637 deltaIRExpr(sg->data, delta);
4638 deltaIRExpr(sg->guard, delta);
4639 break;
4640 }
4641 case Ist_LoadG: {
4642 IRLoadG* lg = st->Ist.LoadG.details;
4643 lg->dst += delta;
4644 deltaIRExpr(lg->addr, delta);
4645 deltaIRExpr(lg->alt, delta);
4646 deltaIRExpr(lg->guard, delta);
4647 break;
4648 }
4649 case Ist_CAS:
4650 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4651 st->Ist.CAS.details->oldHi += delta;
4652 st->Ist.CAS.details->oldLo += delta;
4653 deltaIRExpr(st->Ist.CAS.details->addr, delta);
4654 if (st->Ist.CAS.details->expdHi)
4655 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4656 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4657 if (st->Ist.CAS.details->dataHi)
4658 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4659 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4660 break;
4661 case Ist_LLSC:
4662 st->Ist.LLSC.result += delta;
4663 deltaIRExpr(st->Ist.LLSC.addr, delta);
4664 if (st->Ist.LLSC.storedata)
4665 deltaIRExpr(st->Ist.LLSC.storedata, delta);
4666 break;
4667 case Ist_Dirty:
4668 d = st->Ist.Dirty.details;
4669 deltaIRExpr(d->guard, delta);
4670 for (i = 0; d->args[i]; i++) {
4671 IRExpr* arg = d->args[i];
4672 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
4673 deltaIRExpr(arg, delta);
4674 }
4675 if (d->tmp != IRTemp_INVALID)
4676 d->tmp += delta;
4677 if (d->mAddr)
4678 deltaIRExpr(d->mAddr, delta);
4679 break;
4680 default:
4681 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4682 vpanic("deltaIRStmt");
4683 }
4684 }
4685
4686
4687 /* If possible, return a loop-unrolled version of bb0. The original
4688 is changed. If not possible, return NULL. */
4689
4690 /* The two schemas considered are:
4691
4692 X: BODY; goto X
4693
4694 which unrolls to (eg) X: BODY;BODY; goto X
4695
4696 and
4697
4698 X: BODY; if (c) goto X; goto Y
4699 which trivially transforms to
4700 X: BODY; if (!c) goto Y; goto X;
4701 so it falls in the scope of the first case.
4702
4703 X and Y must be literal (guest) addresses.
4704 */
4705
calc_unroll_factor(IRSB * bb)4706 static Int calc_unroll_factor( IRSB* bb )
4707 {
4708 Int n_stmts, i;
4709
4710 n_stmts = 0;
4711 for (i = 0; i < bb->stmts_used; i++) {
4712 if (bb->stmts[i]->tag != Ist_NoOp)
4713 n_stmts++;
4714 }
4715
4716 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4717 if (vex_control.iropt_verbosity > 0)
4718 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4719 n_stmts, 8* n_stmts);
4720 return 8;
4721 }
4722 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4723 if (vex_control.iropt_verbosity > 0)
4724 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4725 n_stmts, 4* n_stmts);
4726 return 4;
4727 }
4728
4729 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4730 if (vex_control.iropt_verbosity > 0)
4731 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4732 n_stmts, 2* n_stmts);
4733 return 2;
4734 }
4735
4736 if (vex_control.iropt_verbosity > 0)
4737 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4738
4739 return 1;
4740 }
4741
4742
maybe_loop_unroll_BB(IRSB * bb0,Addr my_addr)4743 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4744 {
4745 Int i, j, jmax, n_vars;
4746 Bool xxx_known;
4747 Addr64 xxx_value, yyy_value;
4748 IRExpr* udst;
4749 IRStmt* st;
4750 IRConst* con;
4751 IRSB *bb1, *bb2;
4752 Int unroll_factor;
4753
4754 if (vex_control.iropt_unroll_thresh <= 0)
4755 return NULL;
4756
4757 /* First off, figure out if we can unroll this loop. Do this
4758 without modifying bb0. */
4759
4760 if (bb0->jumpkind != Ijk_Boring)
4761 return NULL;
4762
4763 xxx_known = False;
4764 xxx_value = 0;
4765
4766 /* Extract the next-guest address. If it isn't a literal, we
4767 have to give up. */
4768
4769 udst = bb0->next;
4770 if (udst->tag == Iex_Const
4771 && (udst->Iex.Const.con->tag == Ico_U32
4772 || udst->Iex.Const.con->tag == Ico_U64)) {
4773 /* The BB ends in a jump to a literal location. */
4774 xxx_known = True;
4775 xxx_value = udst->Iex.Const.con->tag == Ico_U64
4776 ? udst->Iex.Const.con->Ico.U64
4777 : (Addr64)(udst->Iex.Const.con->Ico.U32);
4778 }
4779
4780 if (!xxx_known)
4781 return NULL;
4782
4783 /* Now we know the BB ends to a jump to a literal location. If
4784 it's a jump to itself (viz, idiom #1), move directly to the
4785 unrolling stage, first cloning the bb so the original isn't
4786 modified. */
4787 if (xxx_value == my_addr) {
4788 unroll_factor = calc_unroll_factor( bb0 );
4789 if (unroll_factor < 2)
4790 return NULL;
4791 bb1 = deepCopyIRSB( bb0 );
4792 bb0 = NULL;
4793 udst = NULL; /* is now invalid */
4794 goto do_unroll;
4795 }
4796
4797 /* Search for the second idiomatic form:
4798 X: BODY; if (c) goto X; goto Y
4799 We know Y, but need to establish that the last stmt
4800 is 'if (c) goto X'.
4801 */
4802 yyy_value = xxx_value;
4803 for (i = bb0->stmts_used-1; i >= 0; i--)
4804 if (bb0->stmts[i])
4805 break;
4806
4807 if (i < 0)
4808 return NULL; /* block with no stmts. Strange. */
4809
4810 st = bb0->stmts[i];
4811 if (st->tag != Ist_Exit)
4812 return NULL;
4813 if (st->Ist.Exit.jk != Ijk_Boring)
4814 return NULL;
4815
4816 con = st->Ist.Exit.dst;
4817 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4818
4819 xxx_value = con->tag == Ico_U64
4820 ? st->Ist.Exit.dst->Ico.U64
4821 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4822
4823 /* If this assertion fails, we have some kind of type error. */
4824 vassert(con->tag == udst->Iex.Const.con->tag);
4825
4826 if (xxx_value != my_addr)
4827 /* We didn't find either idiom. Give up. */
4828 return NULL;
4829
4830 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4831 yyy values (which makes it look like idiom #1), and go into
4832 unrolling proper. This means finding (again) the last stmt, in
4833 the copied BB. */
4834
4835 unroll_factor = calc_unroll_factor( bb0 );
4836 if (unroll_factor < 2)
4837 return NULL;
4838
4839 bb1 = deepCopyIRSB( bb0 );
4840 bb0 = NULL;
4841 udst = NULL; /* is now invalid */
4842 for (i = bb1->stmts_used-1; i >= 0; i--)
4843 if (bb1->stmts[i])
4844 break;
4845
4846 /* The next bunch of assertions should be true since we already
4847 found and checked the last stmt in the original bb. */
4848
4849 vassert(i >= 0);
4850
4851 st = bb1->stmts[i];
4852 vassert(st->tag == Ist_Exit);
4853
4854 con = st->Ist.Exit.dst;
4855 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4856
4857 udst = bb1->next;
4858 vassert(udst->tag == Iex_Const);
4859 vassert(udst->Iex.Const.con->tag == Ico_U32
4860 || udst->Iex.Const.con->tag == Ico_U64);
4861 vassert(con->tag == udst->Iex.Const.con->tag);
4862
4863 /* switch the xxx and yyy fields around */
4864 if (con->tag == Ico_U64) {
4865 udst->Iex.Const.con->Ico.U64 = xxx_value;
4866 con->Ico.U64 = yyy_value;
4867 } else {
4868 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4869 con->Ico.U32 = (UInt)yyy_value;
4870 }
4871
4872 /* negate the test condition */
4873 st->Ist.Exit.guard
4874 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4875
4876 /* --- The unroller proper. Both idioms are by now --- */
4877 /* --- now converted to idiom 1. --- */
4878
4879 do_unroll:
4880
4881 vassert(unroll_factor == 2
4882 || unroll_factor == 4
4883 || unroll_factor == 8);
4884
4885 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4886 for (j = 1; j <= jmax; j++) {
4887
4888 n_vars = bb1->tyenv->types_used;
4889
4890 bb2 = deepCopyIRSB(bb1);
4891 for (i = 0; i < n_vars; i++)
4892 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4893
4894 for (i = 0; i < bb2->stmts_used; i++) {
4895 /* deltaIRStmt destructively modifies the stmt, but
4896 that's OK since bb2 is a complete fresh copy of bb1. */
4897 deltaIRStmt(bb2->stmts[i], n_vars);
4898 addStmtToIRSB(bb1, bb2->stmts[i]);
4899 }
4900 }
4901
4902 if (DEBUG_IROPT) {
4903 vex_printf("\nUNROLLED (%lx)\n", my_addr);
4904 ppIRSB(bb1);
4905 vex_printf("\n");
4906 }
4907
4908 /* Flattening; sigh. The unroller succeeds in breaking flatness
4909 by negating the test condition. This should be fixed properly.
4910 For the moment use this shotgun approach. */
4911 return flatten_BB(bb1);
4912 }
4913
4914
4915 /*---------------------------------------------------------------*/
4916 /*--- The tree builder ---*/
4917 /*---------------------------------------------------------------*/
4918
4919 /* This isn't part of IR optimisation. Really it's a pass done prior
4920 to instruction selection, which improves the code that the
4921 instruction selector can produce. */
4922
4923 /* --- The 'tmp' environment is the central data structure here --- */
4924
4925 /* The number of outstanding bindings we're prepared to track.
4926 The number of times the env becomes full and we have to dump
4927 the oldest binding (hence reducing code quality) falls very
4928 rapidly as the env size increases. 8 gives reasonable performance
4929 under most circumstances. */
4930 #define A_NENV 10
4931
4932 /* An interval. Used to record the bytes in the guest state accessed
4933 by a Put[I] statement or by (one or more) Get[I] expression(s). In
4934 case of several Get[I] expressions, the lower/upper bounds are recorded.
4935 This is conservative but cheap.
4936 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4937 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4938 offset 200 would be recorded as [100,203] */
4939 typedef
4940 struct {
4941 Bool present;
4942 Int low;
4943 Int high;
4944 }
4945 Interval;
4946
4947 static inline Bool
intervals_overlap(Interval i1,Interval i2)4948 intervals_overlap(Interval i1, Interval i2)
4949 {
4950 return (i1.low >= i2.low && i1.low <= i2.high) ||
4951 (i2.low >= i1.low && i2.low <= i1.high);
4952 }
4953
4954 static inline void
update_interval(Interval * i,Int low,Int high)4955 update_interval(Interval *i, Int low, Int high)
4956 {
4957 vassert(low <= high);
4958
4959 if (i->present) {
4960 if (low < i->low) i->low = low;
4961 if (high > i->high) i->high = high;
4962 } else {
4963 i->present = True;
4964 i->low = low;
4965 i->high = high;
4966 }
4967 }
4968
4969
4970 /* bindee == NULL === slot is not in use
4971 bindee != NULL === slot is in use
4972 */
4973 typedef
4974 struct {
4975 IRTemp binder;
4976 IRExpr* bindee;
4977 Bool doesLoad;
4978 /* Record the bytes of the guest state BINDEE reads from. */
4979 Interval getInterval;
4980 }
4981 ATmpInfo;
4982
4983 __attribute__((unused))
ppAEnv(ATmpInfo * env)4984 static void ppAEnv ( ATmpInfo* env )
4985 {
4986 Int i;
4987 for (i = 0; i < A_NENV; i++) {
4988 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
4989 if (env[i].bindee)
4990 ppIRExpr(env[i].bindee);
4991 else
4992 vex_printf("(null)");
4993 vex_printf("\n");
4994 }
4995 }
4996
4997 /* --- Tree-traversal fns --- */
4998
4999 /* Traverse an expr, and detect if any part of it reads memory or does
5000 a Get. Be careful ... this really controls how much the
5001 tree-builder can reorder the code, so getting it right is critical.
5002 */
setHints_Expr(Bool * doesLoad,Interval * getInterval,IRExpr * e)5003 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5004 {
5005 Int i;
5006 switch (e->tag) {
5007 case Iex_CCall:
5008 for (i = 0; e->Iex.CCall.args[i]; i++)
5009 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5010 return;
5011 case Iex_ITE:
5012 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5013 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5014 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5015 return;
5016 case Iex_Qop:
5017 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5018 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5019 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5020 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5021 return;
5022 case Iex_Triop:
5023 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5024 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5025 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5026 return;
5027 case Iex_Binop:
5028 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5029 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5030 return;
5031 case Iex_Unop:
5032 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5033 return;
5034 case Iex_Load:
5035 *doesLoad = True;
5036 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5037 return;
5038 case Iex_Get: {
5039 Int low = e->Iex.Get.offset;
5040 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5041 update_interval(getInterval, low, high);
5042 return;
5043 }
5044 case Iex_GetI: {
5045 IRRegArray *descr = e->Iex.GetI.descr;
5046 Int size = sizeofIRType(descr->elemTy);
5047 Int low = descr->base;
5048 Int high = low + descr->nElems * size - 1;
5049 update_interval(getInterval, low, high);
5050 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5051 return;
5052 }
5053 case Iex_RdTmp:
5054 case Iex_Const:
5055 return;
5056 default:
5057 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5058 vpanic("setHints_Expr");
5059 }
5060 }
5061
5062
5063 /* Add a binding to the front of the env and slide all the rest
5064 backwards. It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)5065 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5066 {
5067 Int i;
5068 vassert(env[A_NENV-1].bindee == NULL);
5069 for (i = A_NENV-1; i >= 1; i--)
5070 env[i] = env[i-1];
5071 env[0].binder = binder;
5072 env[0].bindee = bindee;
5073 env[0].doesLoad = False; /* filled in later */
5074 env[0].getInterval.present = False; /* filled in later */
5075 env[0].getInterval.low = -1; /* filled in later */
5076 env[0].getInterval.high = -1; /* filled in later */
5077 }
5078
5079 /* Given uses :: array of UShort, indexed by IRTemp
5080 Add the use-occurrences of temps in this expression
5081 to the env.
5082 */
aoccCount_Expr(UShort * uses,IRExpr * e)5083 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5084 {
5085 Int i;
5086
5087 switch (e->tag) {
5088
5089 case Iex_RdTmp: /* the only interesting case */
5090 uses[e->Iex.RdTmp.tmp]++;
5091 return;
5092
5093 case Iex_ITE:
5094 aoccCount_Expr(uses, e->Iex.ITE.cond);
5095 aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5096 aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5097 return;
5098
5099 case Iex_Qop:
5100 aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5101 aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5102 aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5103 aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5104 return;
5105
5106 case Iex_Triop:
5107 aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5108 aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5109 aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5110 return;
5111
5112 case Iex_Binop:
5113 aoccCount_Expr(uses, e->Iex.Binop.arg1);
5114 aoccCount_Expr(uses, e->Iex.Binop.arg2);
5115 return;
5116
5117 case Iex_Unop:
5118 aoccCount_Expr(uses, e->Iex.Unop.arg);
5119 return;
5120
5121 case Iex_Load:
5122 aoccCount_Expr(uses, e->Iex.Load.addr);
5123 return;
5124
5125 case Iex_CCall:
5126 for (i = 0; e->Iex.CCall.args[i]; i++)
5127 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5128 return;
5129
5130 case Iex_GetI:
5131 aoccCount_Expr(uses, e->Iex.GetI.ix);
5132 return;
5133
5134 case Iex_Const:
5135 case Iex_Get:
5136 return;
5137
5138 default:
5139 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5140 vpanic("aoccCount_Expr");
5141 }
5142 }
5143
5144
5145 /* Given uses :: array of UShort, indexed by IRTemp
5146 Add the use-occurrences of temps in this statement
5147 to the env.
5148 */
aoccCount_Stmt(UShort * uses,IRStmt * st)5149 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5150 {
5151 Int i;
5152 IRDirty* d;
5153 IRCAS* cas;
5154 switch (st->tag) {
5155 case Ist_AbiHint:
5156 aoccCount_Expr(uses, st->Ist.AbiHint.base);
5157 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5158 return;
5159 case Ist_WrTmp:
5160 aoccCount_Expr(uses, st->Ist.WrTmp.data);
5161 return;
5162 case Ist_Put:
5163 aoccCount_Expr(uses, st->Ist.Put.data);
5164 return;
5165 case Ist_PutI:
5166 aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5167 aoccCount_Expr(uses, st->Ist.PutI.details->data);
5168 return;
5169 case Ist_Store:
5170 aoccCount_Expr(uses, st->Ist.Store.addr);
5171 aoccCount_Expr(uses, st->Ist.Store.data);
5172 return;
5173 case Ist_StoreG: {
5174 IRStoreG* sg = st->Ist.StoreG.details;
5175 aoccCount_Expr(uses, sg->addr);
5176 aoccCount_Expr(uses, sg->data);
5177 aoccCount_Expr(uses, sg->guard);
5178 return;
5179 }
5180 case Ist_LoadG: {
5181 IRLoadG* lg = st->Ist.LoadG.details;
5182 aoccCount_Expr(uses, lg->addr);
5183 aoccCount_Expr(uses, lg->alt);
5184 aoccCount_Expr(uses, lg->guard);
5185 return;
5186 }
5187 case Ist_CAS:
5188 cas = st->Ist.CAS.details;
5189 aoccCount_Expr(uses, cas->addr);
5190 if (cas->expdHi)
5191 aoccCount_Expr(uses, cas->expdHi);
5192 aoccCount_Expr(uses, cas->expdLo);
5193 if (cas->dataHi)
5194 aoccCount_Expr(uses, cas->dataHi);
5195 aoccCount_Expr(uses, cas->dataLo);
5196 return;
5197 case Ist_LLSC:
5198 aoccCount_Expr(uses, st->Ist.LLSC.addr);
5199 if (st->Ist.LLSC.storedata)
5200 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5201 return;
5202 case Ist_Dirty:
5203 d = st->Ist.Dirty.details;
5204 if (d->mFx != Ifx_None)
5205 aoccCount_Expr(uses, d->mAddr);
5206 aoccCount_Expr(uses, d->guard);
5207 for (i = 0; d->args[i]; i++) {
5208 IRExpr* arg = d->args[i];
5209 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5210 aoccCount_Expr(uses, arg);
5211 }
5212 return;
5213 case Ist_NoOp:
5214 case Ist_IMark:
5215 case Ist_MBE:
5216 return;
5217 case Ist_Exit:
5218 aoccCount_Expr(uses, st->Ist.Exit.guard);
5219 return;
5220 default:
5221 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5222 vpanic("aoccCount_Stmt");
5223 }
5224 }
5225
5226 /* Look up a binding for tmp in the env. If found, return the bound
5227 expression, and set the env's binding to NULL so it is marked as
5228 used. If not found, return NULL. */
5229
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)5230 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5231 {
5232 Int i;
5233 for (i = 0; i < A_NENV; i++) {
5234 if (env[i].binder == tmp && env[i].bindee != NULL) {
5235 IRExpr* bindee = env[i].bindee;
5236 env[i].bindee = NULL;
5237 return bindee;
5238 }
5239 }
5240 return NULL;
5241 }
5242
5243 /* Traverse e, looking for temps. For each observed temp, see if env
5244 contains a binding for the temp, and if so return the bound value.
5245 The env has the property that any binding it holds is
5246 'single-shot', so once a binding is used, it is marked as no longer
5247 available, by setting its .bindee field to NULL. */
5248
is_Unop(IRExpr * e,IROp op)5249 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5250 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5251 }
is_Binop(IRExpr * e,IROp op)5252 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5253 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5254 }
5255
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)5256 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5257 {
5258 switch (op) {
5259 case Iop_Or32:
5260 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5261 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5262 return IRExpr_Unop( Iop_CmpwNEZ32,
5263 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5264 a2->Iex.Unop.arg ) );
5265 break;
5266
5267 case Iop_CmpNE32:
5268 /* Since X has type Ity_I1 we can simplify:
5269 CmpNE32(1Uto32(X),0)) ==> X */
5270 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5271 return a1->Iex.Unop.arg;
5272 break;
5273
5274 default:
5275 break;
5276 }
5277 /* no reduction rule applies */
5278 return IRExpr_Binop( op, a1, a2 );
5279 }
5280
fold_IRExpr_Unop(IROp op,IRExpr * aa)5281 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5282 {
5283 switch (op) {
5284 case Iop_CmpwNEZ64:
5285 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5286 if (is_Unop(aa, Iop_CmpwNEZ64))
5287 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5288 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5289 if (is_Binop(aa, Iop_Or64)
5290 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5291 return fold_IRExpr_Unop(
5292 Iop_CmpwNEZ64,
5293 IRExpr_Binop(Iop_Or64,
5294 aa->Iex.Binop.arg1->Iex.Unop.arg,
5295 aa->Iex.Binop.arg2));
5296 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5297 if (is_Binop(aa, Iop_Or64)
5298 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5299 return fold_IRExpr_Unop(
5300 Iop_CmpwNEZ64,
5301 IRExpr_Binop(Iop_Or64,
5302 aa->Iex.Binop.arg1,
5303 aa->Iex.Binop.arg2->Iex.Unop.arg));
5304 break;
5305 case Iop_CmpNEZ64:
5306 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5307 if (is_Unop(aa, Iop_Left64))
5308 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5309 /* CmpNEZ64( 1Uto64(X) ) --> X */
5310 if (is_Unop(aa, Iop_1Uto64))
5311 return aa->Iex.Unop.arg;
5312 break;
5313 case Iop_CmpwNEZ32:
5314 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5315 if (is_Unop(aa, Iop_CmpwNEZ32))
5316 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5317 break;
5318 case Iop_CmpNEZ32:
5319 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5320 if (is_Unop(aa, Iop_Left32))
5321 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5322 /* CmpNEZ32( 1Uto32(X) ) --> X */
5323 if (is_Unop(aa, Iop_1Uto32))
5324 return aa->Iex.Unop.arg;
5325 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5326 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5327 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5328 break;
5329 case Iop_CmpNEZ8:
5330 /* CmpNEZ8( 1Uto8(X) ) --> X */
5331 if (is_Unop(aa, Iop_1Uto8))
5332 return aa->Iex.Unop.arg;
5333 break;
5334 case Iop_Left32:
5335 /* Left32( Left32(x) ) --> Left32(x) */
5336 if (is_Unop(aa, Iop_Left32))
5337 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5338 break;
5339 case Iop_Left64:
5340 /* Left64( Left64(x) ) --> Left64(x) */
5341 if (is_Unop(aa, Iop_Left64))
5342 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5343 break;
5344 case Iop_ZeroHI64ofV128:
5345 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5346 if (is_Unop(aa, Iop_ZeroHI64ofV128))
5347 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5348 break;
5349 case Iop_32to1:
5350 /* 32to1( 1Uto32 ( x ) ) --> x */
5351 if (is_Unop(aa, Iop_1Uto32))
5352 return aa->Iex.Unop.arg;
5353 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5354 if (is_Unop(aa, Iop_CmpwNEZ32))
5355 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5356 break;
5357 case Iop_64to1:
5358 /* 64to1( 1Uto64 ( x ) ) --> x */
5359 if (is_Unop(aa, Iop_1Uto64))
5360 return aa->Iex.Unop.arg;
5361 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5362 if (is_Unop(aa, Iop_CmpwNEZ64))
5363 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5364 break;
5365 case Iop_64to32:
5366 /* 64to32( 32Uto64 ( x )) --> x */
5367 if (is_Unop(aa, Iop_32Uto64))
5368 return aa->Iex.Unop.arg;
5369 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5370 if (is_Unop(aa, Iop_8Uto64))
5371 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5372 break;
5373
5374 case Iop_32Uto64:
5375 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5376 if (is_Unop(aa, Iop_8Uto32))
5377 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5378 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5379 if (is_Unop(aa, Iop_16Uto32))
5380 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5381 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5382 --> Shr64( 32Uto64(64to32(x)), sh )) */
5383 if (is_Unop(aa, Iop_64to32)
5384 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5385 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5386 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5387 Iop_64to32)) {
5388 return aa->Iex.Unop.arg;
5389 }
5390 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5391 --> 32Uto64(64to32( Shl64( x, sh )) */
5392 if (is_Unop(aa, Iop_64to32)
5393 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5394 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5395 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5396 Iop_64to32)) {
5397 return
5398 IRExpr_Unop(
5399 Iop_32Uto64,
5400 IRExpr_Unop(
5401 Iop_64to32,
5402 IRExpr_Binop(
5403 Iop_Shl64,
5404 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5405 aa->Iex.Unop.arg->Iex.Binop.arg2
5406 )));
5407 }
5408 break;
5409
5410 case Iop_1Sto32:
5411 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5412 if (is_Unop(aa, Iop_CmpNEZ8)
5413 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5414 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5415 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5416 Iop_CmpNEZ32)) {
5417 return IRExpr_Unop( Iop_CmpwNEZ32,
5418 aa->Iex.Unop.arg->Iex.Unop.arg
5419 ->Iex.Unop.arg->Iex.Unop.arg);
5420 }
5421 break;
5422
5423 default:
5424 break;
5425 }
5426 /* no reduction rule applies */
5427 return IRExpr_Unop( op, aa );
5428 }
5429
atbSubst_Expr(ATmpInfo * env,IRExpr * e)5430 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5431 {
5432 IRExpr* e2;
5433 IRExpr** args2;
5434 Int i;
5435
5436 switch (e->tag) {
5437
5438 case Iex_CCall:
5439 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5440 for (i = 0; args2[i]; i++)
5441 args2[i] = atbSubst_Expr(env,args2[i]);
5442 return IRExpr_CCall(
5443 e->Iex.CCall.cee,
5444 e->Iex.CCall.retty,
5445 args2
5446 );
5447 case Iex_RdTmp:
5448 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5449 return e2 ? e2 : e;
5450 case Iex_ITE:
5451 return IRExpr_ITE(
5452 atbSubst_Expr(env, e->Iex.ITE.cond),
5453 atbSubst_Expr(env, e->Iex.ITE.iftrue),
5454 atbSubst_Expr(env, e->Iex.ITE.iffalse)
5455 );
5456 case Iex_Qop:
5457 return IRExpr_Qop(
5458 e->Iex.Qop.details->op,
5459 atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5460 atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5461 atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5462 atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5463 );
5464 case Iex_Triop:
5465 return IRExpr_Triop(
5466 e->Iex.Triop.details->op,
5467 atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5468 atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5469 atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5470 );
5471 case Iex_Binop:
5472 return fold_IRExpr_Binop(
5473 e->Iex.Binop.op,
5474 atbSubst_Expr(env, e->Iex.Binop.arg1),
5475 atbSubst_Expr(env, e->Iex.Binop.arg2)
5476 );
5477 case Iex_Unop:
5478 return fold_IRExpr_Unop(
5479 e->Iex.Unop.op,
5480 atbSubst_Expr(env, e->Iex.Unop.arg)
5481 );
5482 case Iex_Load:
5483 return IRExpr_Load(
5484 e->Iex.Load.end,
5485 e->Iex.Load.ty,
5486 atbSubst_Expr(env, e->Iex.Load.addr)
5487 );
5488 case Iex_GetI:
5489 return IRExpr_GetI(
5490 e->Iex.GetI.descr,
5491 atbSubst_Expr(env, e->Iex.GetI.ix),
5492 e->Iex.GetI.bias
5493 );
5494 case Iex_Const:
5495 case Iex_Get:
5496 return e;
5497 default:
5498 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5499 vpanic("atbSubst_Expr");
5500 }
5501 }
5502
5503 /* Same deal as atbSubst_Expr, except for stmts. */
5504
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)5505 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5506 {
5507 Int i;
5508 IRDirty *d, *d2;
5509 IRCAS *cas, *cas2;
5510 IRPutI *puti, *puti2;
5511
5512 switch (st->tag) {
5513 case Ist_AbiHint:
5514 return IRStmt_AbiHint(
5515 atbSubst_Expr(env, st->Ist.AbiHint.base),
5516 st->Ist.AbiHint.len,
5517 atbSubst_Expr(env, st->Ist.AbiHint.nia)
5518 );
5519 case Ist_Store:
5520 return IRStmt_Store(
5521 st->Ist.Store.end,
5522 atbSubst_Expr(env, st->Ist.Store.addr),
5523 atbSubst_Expr(env, st->Ist.Store.data)
5524 );
5525 case Ist_StoreG: {
5526 IRStoreG* sg = st->Ist.StoreG.details;
5527 return IRStmt_StoreG(sg->end,
5528 atbSubst_Expr(env, sg->addr),
5529 atbSubst_Expr(env, sg->data),
5530 atbSubst_Expr(env, sg->guard));
5531 }
5532 case Ist_LoadG: {
5533 IRLoadG* lg = st->Ist.LoadG.details;
5534 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5535 atbSubst_Expr(env, lg->addr),
5536 atbSubst_Expr(env, lg->alt),
5537 atbSubst_Expr(env, lg->guard));
5538 }
5539 case Ist_WrTmp:
5540 return IRStmt_WrTmp(
5541 st->Ist.WrTmp.tmp,
5542 atbSubst_Expr(env, st->Ist.WrTmp.data)
5543 );
5544 case Ist_Put:
5545 return IRStmt_Put(
5546 st->Ist.Put.offset,
5547 atbSubst_Expr(env, st->Ist.Put.data)
5548 );
5549 case Ist_PutI:
5550 puti = st->Ist.PutI.details;
5551 puti2 = mkIRPutI(puti->descr,
5552 atbSubst_Expr(env, puti->ix),
5553 puti->bias,
5554 atbSubst_Expr(env, puti->data));
5555 return IRStmt_PutI(puti2);
5556
5557 case Ist_Exit:
5558 return IRStmt_Exit(
5559 atbSubst_Expr(env, st->Ist.Exit.guard),
5560 st->Ist.Exit.jk,
5561 st->Ist.Exit.dst,
5562 st->Ist.Exit.offsIP
5563 );
5564 case Ist_IMark:
5565 return IRStmt_IMark(st->Ist.IMark.addr,
5566 st->Ist.IMark.len,
5567 st->Ist.IMark.delta);
5568 case Ist_NoOp:
5569 return IRStmt_NoOp();
5570 case Ist_MBE:
5571 return IRStmt_MBE(st->Ist.MBE.event);
5572 case Ist_CAS:
5573 cas = st->Ist.CAS.details;
5574 cas2 = mkIRCAS(
5575 cas->oldHi, cas->oldLo, cas->end,
5576 atbSubst_Expr(env, cas->addr),
5577 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5578 atbSubst_Expr(env, cas->expdLo),
5579 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5580 atbSubst_Expr(env, cas->dataLo)
5581 );
5582 return IRStmt_CAS(cas2);
5583 case Ist_LLSC:
5584 return IRStmt_LLSC(
5585 st->Ist.LLSC.end,
5586 st->Ist.LLSC.result,
5587 atbSubst_Expr(env, st->Ist.LLSC.addr),
5588 st->Ist.LLSC.storedata
5589 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5590 );
5591 case Ist_Dirty:
5592 d = st->Ist.Dirty.details;
5593 d2 = emptyIRDirty();
5594 *d2 = *d;
5595 if (d2->mFx != Ifx_None)
5596 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5597 d2->guard = atbSubst_Expr(env, d2->guard);
5598 for (i = 0; d2->args[i]; i++) {
5599 IRExpr* arg = d2->args[i];
5600 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5601 d2->args[i] = atbSubst_Expr(env, arg);
5602 }
5603 return IRStmt_Dirty(d2);
5604 default:
5605 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5606 vpanic("atbSubst_Stmt");
5607 }
5608 }
5609
5610 inline
dirty_helper_stores(const IRDirty * d)5611 static Bool dirty_helper_stores ( const IRDirty *d )
5612 {
5613 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5614 }
5615
5616 inline
dirty_helper_puts(const IRDirty * d,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5617 static Interval dirty_helper_puts (
5618 const IRDirty *d,
5619 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5620 VexRegisterUpdates pxControl,
5621 /*OUT*/Bool *requiresPreciseMemExns
5622 )
5623 {
5624 Int i;
5625 Interval interval;
5626
5627 /* Passing the guest state pointer opens the door to modifying the
5628 guest state under the covers. It's not allowed, but let's be
5629 extra conservative and assume the worst. */
5630 for (i = 0; d->args[i]; i++) {
5631 if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
5632 *requiresPreciseMemExns = True;
5633 /* Assume all guest state is written. */
5634 interval.present = True;
5635 interval.low = 0;
5636 interval.high = 0x7FFFFFFF;
5637 return interval;
5638 }
5639 }
5640
5641 /* Check the side effects on the guest state */
5642 interval.present = False;
5643 interval.low = interval.high = -1;
5644 *requiresPreciseMemExns = False;
5645
5646 for (i = 0; i < d->nFxState; ++i) {
5647 if (d->fxState[i].fx != Ifx_Read) {
5648 Int offset = d->fxState[i].offset;
5649 Int size = d->fxState[i].size;
5650 Int nRepeats = d->fxState[i].nRepeats;
5651 Int repeatLen = d->fxState[i].repeatLen;
5652
5653 if (preciseMemExnsFn(offset,
5654 offset + nRepeats * repeatLen + size - 1,
5655 pxControl)) {
5656 *requiresPreciseMemExns = True;
5657 }
5658 update_interval(&interval, offset,
5659 offset + nRepeats * repeatLen + size - 1);
5660 }
5661 }
5662
5663 return interval;
5664 }
5665
5666 /* Return an interval if st modifies the guest state. Via
5667 requiresPreciseMemExns return whether or not that modification
5668 requires precise exceptions. */
stmt_modifies_guest_state(IRSB * bb,const IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5669 static Interval stmt_modifies_guest_state (
5670 IRSB *bb, const IRStmt *st,
5671 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5672 VexRegisterUpdates pxControl,
5673 /*OUT*/Bool *requiresPreciseMemExns
5674 )
5675 {
5676 Interval interval;
5677
5678 switch (st->tag) {
5679 case Ist_Put: {
5680 Int offset = st->Ist.Put.offset;
5681 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5682
5683 *requiresPreciseMemExns
5684 = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5685 interval.present = True;
5686 interval.low = offset;
5687 interval.high = offset + size - 1;
5688 return interval;
5689 }
5690
5691 case Ist_PutI: {
5692 IRRegArray *descr = st->Ist.PutI.details->descr;
5693 Int offset = descr->base;
5694 Int size = sizeofIRType(descr->elemTy);
5695
5696 /* We quietly assume here that all segments are contiguous and there
5697 are no holes. This is to avoid a loop. The assumption is conservative
5698 in the sense that we might report that precise memory exceptions are
5699 needed when in fact they are not. */
5700 *requiresPreciseMemExns
5701 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5702 pxControl);
5703 interval.present = True;
5704 interval.low = offset;
5705 interval.high = offset + descr->nElems * size - 1;
5706 return interval;
5707 }
5708
5709 case Ist_Dirty:
5710 return dirty_helper_puts(st->Ist.Dirty.details,
5711 preciseMemExnsFn, pxControl,
5712 requiresPreciseMemExns);
5713
5714 default:
5715 *requiresPreciseMemExns = False;
5716 interval.present = False;
5717 interval.low = -1;
5718 interval.high = -1;
5719 return interval;
5720 }
5721 }
5722
ado_treebuild_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)5723 /* notstatic */ Addr ado_treebuild_BB (
5724 IRSB* bb,
5725 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5726 VexRegisterUpdates pxControl
5727 )
5728 {
5729 Int i, j, k, m;
5730 Bool stmtStores, invalidateMe;
5731 Interval putInterval;
5732 IRStmt* st;
5733 IRStmt* st2;
5734 ATmpInfo env[A_NENV];
5735
5736 Bool max_ga_known = False;
5737 Addr max_ga = 0;
5738
5739 Int n_tmps = bb->tyenv->types_used;
5740 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5741
5742 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5743 temp. Also count occurrences in the bb->next field. Take the
5744 opportunity to also find the maximum guest address in the block,
5745 since that will be needed later for deciding when we can safely
5746 elide event checks. */
5747
5748 for (i = 0; i < n_tmps; i++)
5749 uses[i] = 0;
5750
5751 for (i = 0; i < bb->stmts_used; i++) {
5752 st = bb->stmts[i];
5753 switch (st->tag) {
5754 case Ist_NoOp:
5755 continue;
5756 case Ist_IMark: {
5757 UInt len = st->Ist.IMark.len;
5758 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5759 max_ga_known = True;
5760 if (mga > max_ga)
5761 max_ga = mga;
5762 break;
5763 }
5764 default:
5765 break;
5766 }
5767 aoccCount_Stmt( uses, st );
5768 }
5769 aoccCount_Expr(uses, bb->next );
5770
5771 # if 0
5772 for (i = 0; i < n_tmps; i++) {
5773 if (uses[i] == 0)
5774 continue;
5775 ppIRTemp( (IRTemp)i );
5776 vex_printf(" used %d\n", (Int)uses[i] );
5777 }
5778 # endif
5779
5780 /* Phase 2. Scan forwards in bb. For each statement in turn:
5781
5782 If the env is full, emit the end element. This guarantees
5783 there is at least one free slot in the following.
5784
5785 On seeing 't = E', occ(t)==1,
5786 let E'=env(E)
5787 delete this stmt
5788 add t -> E' to the front of the env
5789 Examine E' and set the hints for E' appropriately
5790 (doesLoad? doesGet?)
5791
5792 On seeing any other stmt,
5793 let stmt' = env(stmt)
5794 remove from env any 't=E' binds invalidated by stmt
5795 emit the invalidated stmts
5796 emit stmt'
5797 compact any holes in env
5798 by sliding entries towards the front
5799
5800 Finally, apply env to bb->next.
5801 */
5802
5803 for (i = 0; i < A_NENV; i++) {
5804 env[i].bindee = NULL;
5805 env[i].binder = IRTemp_INVALID;
5806 }
5807
5808 /* The stmts in bb are being reordered, and we are guaranteed to
5809 end up with no more than the number we started with. Use i to
5810 be the cursor of the current stmt examined and j <= i to be that
5811 for the current stmt being written.
5812 */
5813 j = 0;
5814 for (i = 0; i < bb->stmts_used; i++) {
5815
5816 st = bb->stmts[i];
5817 if (st->tag == Ist_NoOp)
5818 continue;
5819
5820 /* Ensure there's at least one space in the env, by emitting
5821 the oldest binding if necessary. */
5822 if (env[A_NENV-1].bindee != NULL) {
5823 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5824 env[A_NENV-1].bindee );
5825 j++;
5826 vassert(j <= i);
5827 env[A_NENV-1].bindee = NULL;
5828 }
5829
5830 /* Consider current stmt. */
5831 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5832 IRExpr *e, *e2;
5833
5834 /* optional extra: dump dead bindings as we find them.
5835 Removes the need for a prior dead-code removal pass. */
5836 if (uses[st->Ist.WrTmp.tmp] == 0) {
5837 if (0) vex_printf("DEAD binding\n");
5838 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5839 }
5840 vassert(uses[st->Ist.WrTmp.tmp] == 1);
5841
5842 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
5843 actions. */
5844 e = st->Ist.WrTmp.data;
5845 e2 = atbSubst_Expr(env, e);
5846 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5847 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5848 /* don't advance j, as we are deleting this stmt and instead
5849 holding it temporarily in the env. */
5850 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5851 }
5852
5853 /* we get here for any other kind of statement. */
5854 /* 'use up' any bindings required by the current statement. */
5855 st2 = atbSubst_Stmt(env, st);
5856
5857 /* Now, before this stmt, dump any bindings in env that it
5858 invalidates. These need to be dumped in the order in which
5859 they originally entered env -- that means from oldest to
5860 youngest. */
5861
5862 /* putInterval/stmtStores characterise what the stmt under
5863 consideration does, or might do (sidely safe @ True). */
5864
5865 Bool putRequiresPreciseMemExns;
5866 putInterval = stmt_modifies_guest_state(
5867 bb, st, preciseMemExnsFn, pxControl,
5868 &putRequiresPreciseMemExns
5869 );
5870
5871 /* be True if this stmt writes memory or might do (==> we don't
5872 want to reorder other loads or stores relative to it). Also,
5873 both LL and SC fall under this classification, since we
5874 really ought to be conservative and not reorder any other
5875 memory transactions relative to them. */
5876 stmtStores
5877 = toBool( st->tag == Ist_Store
5878 || (st->tag == Ist_Dirty
5879 && dirty_helper_stores(st->Ist.Dirty.details))
5880 || st->tag == Ist_LLSC
5881 || st->tag == Ist_CAS );
5882
5883 for (k = A_NENV-1; k >= 0; k--) {
5884 if (env[k].bindee == NULL)
5885 continue;
5886 /* Compare the actions of this stmt with the actions of
5887 binding 'k', to see if they invalidate the binding. */
5888 invalidateMe
5889 = toBool(
5890 /* a store invalidates loaded data */
5891 (env[k].doesLoad && stmtStores)
5892 /* a put invalidates get'd data, if they overlap */
5893 || ((env[k].getInterval.present && putInterval.present) &&
5894 intervals_overlap(env[k].getInterval, putInterval))
5895 /* a put invalidates loaded data. That means, in essense, that
5896 a load expression cannot be substituted into a statement
5897 that follows the put. But there is nothing wrong doing so
5898 except when the put statement requries precise exceptions.
5899 Think of a load that is moved past a put where the put
5900 updates the IP in the guest state. If the load generates
5901 a segfault, the wrong address (line number) would be
5902 reported. */
5903 || (env[k].doesLoad && putInterval.present &&
5904 putRequiresPreciseMemExns)
5905 /* probably overly conservative: a memory bus event
5906 invalidates absolutely everything, so that all
5907 computation prior to it is forced to complete before
5908 proceeding with the event (fence,lock,unlock). */
5909 || st->tag == Ist_MBE
5910 /* also be (probably overly) paranoid re AbiHints */
5911 || st->tag == Ist_AbiHint
5912 );
5913 if (invalidateMe) {
5914 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5915 j++;
5916 vassert(j <= i);
5917 env[k].bindee = NULL;
5918 }
5919 }
5920
5921 /* Slide in-use entries in env up to the front */
5922 m = 0;
5923 for (k = 0; k < A_NENV; k++) {
5924 if (env[k].bindee != NULL) {
5925 env[m] = env[k];
5926 m++;
5927 }
5928 }
5929 for (m = m; m < A_NENV; m++) {
5930 env[m].bindee = NULL;
5931 }
5932
5933 /* finally, emit the substituted statement */
5934 bb->stmts[j] = st2;
5935 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5936 j++;
5937
5938 vassert(j <= i+1);
5939 } /* for each stmt in the original bb ... */
5940
5941 /* Finally ... substitute the ->next field as much as possible, and
5942 dump any left-over bindings. Hmm. Perhaps there should be no
5943 left over bindings? Or any left-over bindings are
5944 by definition dead? */
5945 bb->next = atbSubst_Expr(env, bb->next);
5946 bb->stmts_used = j;
5947
5948 return max_ga_known ? max_ga : ~(Addr)0;
5949 }
5950
5951
5952 /*---------------------------------------------------------------*/
5953 /*--- MSVC specific transformation hacks ---*/
5954 /*---------------------------------------------------------------*/
5955
5956 /* The purpose of all this is to find MSVC's idiom for non-constant
5957 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5958 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
5959 generates a lot of false positives from the MSVC version because it
5960 doesn't understand that XORing an undefined bit with itself gives a
5961 defined result.
5962
5963 This isn't a problem for the simple case "x ^ x", because iropt
5964 folds it to a constant zero before Memcheck ever sees it. But in
5965 this case we have an intervening "& c" which defeats the simple
5966 case. So we have to carefully inspect all expressions rooted at an
5967 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5968 7 other variants resulting from swapping the order of arguments to
5969 the three binary operations. If we get a match, we then replace
5970 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5971
5972 The key difficulty is to spot the two uses of "a". To normalise
5973 the IR to maximise the chances of success, we first do a CSE pass,
5974 with CSEing of loads enabled, since the two "a" expressions may be
5975 loads, which need to be commoned up. Then we do a constant folding
5976 pass, so as to remove any tmp-to-tmp assignment chains that would
5977 make matching the original expression more difficult.
5978 */
5979
5980
5981 /* Helper function for debug printing */
5982 __attribute__((unused))
print_flat_expr(IRExpr ** env,IRExpr * e)5983 static void print_flat_expr ( IRExpr** env, IRExpr* e )
5984 {
5985 if (e == NULL) {
5986 vex_printf("?");
5987 return;
5988 }
5989 switch (e->tag) {
5990 case Iex_Binop: {
5991 ppIROp(e->Iex.Binop.op);
5992 vex_printf("(");
5993 print_flat_expr(env, e->Iex.Binop.arg1);
5994 vex_printf(",");
5995 print_flat_expr(env, e->Iex.Binop.arg2);
5996 vex_printf(")");
5997 break;
5998 }
5999 case Iex_Unop: {
6000 ppIROp(e->Iex.Unop.op);
6001 vex_printf("(");
6002 print_flat_expr(env, e->Iex.Unop.arg);
6003 vex_printf(")");
6004 break;
6005 }
6006 case Iex_RdTmp:
6007 ppIRTemp(e->Iex.RdTmp.tmp);
6008 vex_printf("=");
6009 print_flat_expr(env, chase(env, e));
6010 break;
6011 case Iex_Const:
6012 case Iex_CCall:
6013 case Iex_Load:
6014 case Iex_ITE:
6015 case Iex_Get:
6016 ppIRExpr(e);
6017 break;
6018 default:
6019 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6020 vassert(0);
6021 }
6022 }
6023
6024 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6025 or any of the other 7 variants generated by switching the order
6026 of arguments to the outer ^, the inner ^ and the &.
6027 */
spotBitfieldAssignment(IRExpr ** aa,IRExpr ** bb,IRExpr ** cc,IRExpr ** env,IRExpr * e,IROp opAND,IROp opXOR)6028 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6029 /*OUT*/IRExpr** cc,
6030 IRExpr** env, IRExpr* e,
6031 IROp opAND, IROp opXOR)
6032 {
6033 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6034 && (_e)->Iex.Binop.op == (_op))
6035 # define ISATOM(_e) isIRAtom(_e)
6036 # define STEP(_e) chase1(env, (_e))
6037 # define LL(_e) ((_e)->Iex.Binop.arg1)
6038 # define RR(_e) ((_e)->Iex.Binop.arg2)
6039
6040 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6041
6042 /* This is common to all 8 cases */
6043 if (!ISBIN(e, opXOR)) goto fail;
6044
6045 /* -----and------ */
6046 /* --xor--- */
6047 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6048 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6049 a1 = and = xor = c = a2bL = a2bR = NULL;
6050
6051 a1 = LL(e);
6052 and = STEP(RR(e));
6053 if (!ISBIN(and, opAND)) goto v34;
6054 xor = STEP(LL(and));
6055 c = RR(and);
6056 if (!ISBIN(xor, opXOR)) goto v34;
6057 a2bL = LL(xor);
6058 a2bR = RR(xor);
6059
6060 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6061 *aa = a1;
6062 *bb = a2bR;
6063 *cc = c;
6064 return 1;
6065 }
6066 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6067 *aa = a1;
6068 *bb = a2bL;
6069 *cc = c;
6070 return 2;
6071 }
6072
6073 v34:
6074 /* -----and------ */
6075 /* --xor--- */
6076 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6077 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6078 a1 = and = xor = c = a2bL = a2bR = NULL;
6079
6080 a1 = RR(e);
6081 and = STEP(LL(e));
6082 if (!ISBIN(and, opAND)) goto v56;
6083 xor = STEP(LL(and));
6084 c = RR(and);
6085 if (!ISBIN(xor, opXOR)) goto v56;
6086 a2bL = LL(xor);
6087 a2bR = RR(xor);
6088
6089 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6090 *aa = a1;
6091 *bb = a2bR;
6092 *cc = c;
6093 return 3;
6094 }
6095 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6096 *aa = a1;
6097 *bb = a2bL;
6098 *cc = c;
6099 return 4;
6100 }
6101
6102 v56:
6103 /* -----and------ */
6104 /* --xor--- */
6105 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6106 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6107 a1 = and = xor = c = a2bL = a2bR = NULL;
6108
6109 a1 = LL(e);
6110 and = STEP(RR(e));
6111 if (!ISBIN(and, opAND)) goto v78;
6112 xor = STEP(RR(and));
6113 c = LL(and);
6114 if (!ISBIN(xor, opXOR)) goto v78;
6115 a2bL = LL(xor);
6116 a2bR = RR(xor);
6117
6118 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6119 *aa = a1;
6120 *bb = a2bR;
6121 *cc = c;
6122 vassert(5-5); // ATC
6123 return 5;
6124 }
6125 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6126 *aa = a1;
6127 *bb = a2bL;
6128 *cc = c;
6129 vassert(6-6); // ATC
6130 return 6;
6131 }
6132
6133 v78:
6134 /* -----and------ */
6135 /* --xor--- */
6136 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6137 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6138 a1 = and = xor = c = a2bL = a2bR = NULL;
6139
6140 a1 = RR(e);
6141 and = STEP(LL(e));
6142 if (!ISBIN(and, opAND)) goto fail;
6143 xor = STEP(RR(and));
6144 c = LL(and);
6145 if (!ISBIN(xor, opXOR)) goto fail;
6146 a2bL = LL(xor);
6147 a2bR = RR(xor);
6148
6149 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6150 *aa = a1;
6151 *bb = a2bR;
6152 *cc = c;
6153 return 7;
6154 }
6155 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6156 *aa = a1;
6157 *bb = a2bL;
6158 *cc = c;
6159 return 8;
6160 }
6161
6162 fail:
6163 return 0;
6164
6165 # undef ISBIN
6166 # undef ISATOM
6167 # undef STEP
6168 # undef LL
6169 # undef RR
6170 }
6171
6172 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6173 variants thereof generated by switching arguments around), return
6174 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
do_XOR_TRANSFORMS_IRExpr(IRExpr ** env,IRExpr * e)6175 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6176 {
6177 if (e->tag != Iex_Binop)
6178 return NULL;
6179
6180 const HChar* tyNm = NULL;
6181 IROp opOR = Iop_INVALID;
6182 IROp opAND = Iop_INVALID;
6183 IROp opNOT = Iop_INVALID;
6184 IROp opXOR = Iop_INVALID;
6185 switch (e->Iex.Binop.op) {
6186 case Iop_Xor32:
6187 tyNm = "I32";
6188 opOR = Iop_Or32; opAND = Iop_And32;
6189 opNOT = Iop_Not32; opXOR = Iop_Xor32;
6190 break;
6191 case Iop_Xor16:
6192 tyNm = "I16";
6193 opOR = Iop_Or16; opAND = Iop_And16;
6194 opNOT = Iop_Not16; opXOR = Iop_Xor16;
6195 break;
6196 case Iop_Xor8:
6197 tyNm = "I8";
6198 opOR = Iop_Or8; opAND = Iop_And8;
6199 opNOT = Iop_Not8; opXOR = Iop_Xor8;
6200 break;
6201 default:
6202 return NULL;
6203 }
6204
6205 IRExpr* aa = NULL;
6206 IRExpr* bb = NULL;
6207 IRExpr* cc = NULL;
6208 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6209 if (variant > 0) {
6210 static UInt ctr = 0;
6211 if (0)
6212 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6213 "type %s, variant %u\n",
6214 ++ctr, tyNm, variant);
6215 /* It's vitally important that the returned aa, bb and cc are
6216 atoms -- either constants or tmps. If it's anything else
6217 (eg, a GET) then incorporating them in a tree at this point
6218 in the SB may erroneously pull them forwards (eg of a PUT
6219 that originally was after the GET) and so transform the IR
6220 wrongly. spotBitfieldAssignment should guarantee only to
6221 give us atoms, but we check here anyway. */
6222 vassert(aa && isIRAtom(aa));
6223 vassert(bb && isIRAtom(bb));
6224 vassert(cc && isIRAtom(cc));
6225 return IRExpr_Binop(
6226 opOR,
6227 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6228 IRExpr_Binop(opAND, bb, cc)
6229 );
6230 }
6231 return NULL;
6232 }
6233
6234
6235 /* SB is modified in-place. Visit all the IRExprs and, for those
6236 which are allowed to be non-atomic, perform the XOR transform if
6237 possible. This makes |sb| be non-flat, but that's ok, the caller
6238 can re-flatten it. Returns True iff any changes were made. */
do_XOR_TRANSFORM_IRSB(IRSB * sb)6239 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6240 {
6241 Int i;
6242 Bool changed = False;
6243
6244 /* Make the tmp->expr environment, so we can use it for
6245 chasing expressions. */
6246 Int n_tmps = sb->tyenv->types_used;
6247 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6248 for (i = 0; i < n_tmps; i++)
6249 env[i] = NULL;
6250
6251 for (i = 0; i < sb->stmts_used; i++) {
6252 IRStmt* st = sb->stmts[i];
6253 if (st->tag != Ist_WrTmp)
6254 continue;
6255 IRTemp t = st->Ist.WrTmp.tmp;
6256 vassert(t >= 0 && t < n_tmps);
6257 env[t] = st->Ist.WrTmp.data;
6258 }
6259
6260 for (i = 0; i < sb->stmts_used; i++) {
6261 IRStmt* st = sb->stmts[i];
6262
6263 switch (st->tag) {
6264 case Ist_AbiHint:
6265 vassert(isIRAtom(st->Ist.AbiHint.base));
6266 vassert(isIRAtom(st->Ist.AbiHint.nia));
6267 break;
6268 case Ist_Put:
6269 vassert(isIRAtom(st->Ist.Put.data));
6270 break;
6271 case Ist_PutI: {
6272 IRPutI* puti = st->Ist.PutI.details;
6273 vassert(isIRAtom(puti->ix));
6274 vassert(isIRAtom(puti->data));
6275 break;
6276 }
6277 case Ist_WrTmp: {
6278 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6279 allowed to be more than just a constant or a tmp. */
6280 IRExpr* mb_new_data
6281 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6282 if (mb_new_data) {
6283 //ppIRSB(sb);
6284 st->Ist.WrTmp.data = mb_new_data;
6285 //ppIRSB(sb);
6286 changed = True;
6287 }
6288 break;
6289 }
6290 case Ist_Store:
6291 vassert(isIRAtom(st->Ist.Store.addr));
6292 vassert(isIRAtom(st->Ist.Store.data));
6293 break;
6294 case Ist_StoreG: {
6295 IRStoreG* sg = st->Ist.StoreG.details;
6296 vassert(isIRAtom(sg->addr));
6297 vassert(isIRAtom(sg->data));
6298 vassert(isIRAtom(sg->guard));
6299 break;
6300 }
6301 case Ist_LoadG: {
6302 IRLoadG* lg = st->Ist.LoadG.details;
6303 vassert(isIRAtom(lg->addr));
6304 vassert(isIRAtom(lg->alt));
6305 vassert(isIRAtom(lg->guard));
6306 break;
6307 }
6308 case Ist_CAS: {
6309 IRCAS* cas = st->Ist.CAS.details;
6310 vassert(isIRAtom(cas->addr));
6311 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6312 vassert(isIRAtom(cas->expdLo));
6313 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6314 vassert(isIRAtom(cas->dataLo));
6315 break;
6316 }
6317 case Ist_LLSC:
6318 vassert(isIRAtom(st->Ist.LLSC.addr));
6319 if (st->Ist.LLSC.storedata)
6320 vassert(isIRAtom(st->Ist.LLSC.storedata));
6321 break;
6322 case Ist_Dirty: {
6323 IRDirty* d = st->Ist.Dirty.details;
6324 if (d->mFx != Ifx_None) {
6325 vassert(isIRAtom(d->mAddr));
6326 }
6327 vassert(isIRAtom(d->guard));
6328 for (Int j = 0; d->args[j]; j++) {
6329 IRExpr* arg = d->args[j];
6330 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
6331 vassert(isIRAtom(arg));
6332 }
6333 }
6334 break;
6335 }
6336 case Ist_IMark:
6337 case Ist_NoOp:
6338 case Ist_MBE:
6339 break;
6340 case Ist_Exit:
6341 vassert(isIRAtom(st->Ist.Exit.guard));
6342 break;
6343 default:
6344 vex_printf("\n"); ppIRStmt(st);
6345 vpanic("do_XOR_TRANSFORMS_IRSB");
6346 }
6347 }
6348
6349 vassert(isIRAtom(sb->next));
6350 return changed;
6351 }
6352
6353
do_MSVC_HACKS(IRSB * sb)6354 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6355 {
6356 // Normalise as much as we can. This is the one-and-only place
6357 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6358 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6359 if (any_cse_changes) {
6360 // CSEing might have created dead code. Remove it.
6361 sb = cprop_BB ( sb );
6362 do_deadcode_BB(sb);
6363 }
6364
6365 // Visit all atoms, do the transformation proper. bb is modified
6366 // in-place.
6367 Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6368
6369 if (changed) {
6370 // The transformation generates non-flat expressions, so we now
6371 // need to re-flatten the block.
6372 sb = flatten_BB(sb);
6373 }
6374
6375 return sb;
6376 }
6377
6378
6379 /*---------------------------------------------------------------*/
6380 /*--- iropt main ---*/
6381 /*---------------------------------------------------------------*/
6382
6383 static Bool iropt_verbose = False; /* True; */
6384
6385
6386 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6387 redundant Put removal, constant propagation, dead code removal,
6388 clean helper specialisation, and dead code removal (again).
6389 */
6390
6391
6392 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)6393 IRSB* cheap_transformations (
6394 IRSB* bb,
6395 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6396 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6397 VexRegisterUpdates pxControl
6398 )
6399 {
6400 redundant_get_removal_BB ( bb );
6401 if (iropt_verbose) {
6402 vex_printf("\n========= REDUNDANT GET\n\n" );
6403 ppIRSB(bb);
6404 }
6405
6406 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6407 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6408 }
6409 if (iropt_verbose) {
6410 vex_printf("\n========= REDUNDANT PUT\n\n" );
6411 ppIRSB(bb);
6412 }
6413
6414 bb = cprop_BB ( bb );
6415 if (iropt_verbose) {
6416 vex_printf("\n========= CPROPD\n\n" );
6417 ppIRSB(bb);
6418 }
6419
6420 do_deadcode_BB ( bb );
6421 if (iropt_verbose) {
6422 vex_printf("\n========= DEAD\n\n" );
6423 ppIRSB(bb);
6424 }
6425
6426 bb = spec_helpers_BB ( bb, specHelper );
6427 do_deadcode_BB ( bb );
6428 if (iropt_verbose) {
6429 vex_printf("\n========= SPECd \n\n" );
6430 ppIRSB(bb);
6431 }
6432
6433 return bb;
6434 }
6435
6436
6437 /* Do some more expensive transformations on bb, which are aimed at
6438 optimising as much as possible in the presence of GetI and PutI. */
6439
6440 static
expensive_transformations(IRSB * bb,VexRegisterUpdates pxControl)6441 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6442 {
6443 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6444 collapse_AddSub_chains_BB( bb );
6445 do_redundant_GetI_elimination( bb );
6446 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6447 do_redundant_PutI_elimination( bb, pxControl );
6448 }
6449 do_deadcode_BB( bb );
6450 return bb;
6451 }
6452
6453
6454 /* Scan a flattened BB to look for signs that more expensive
6455 optimisations might be useful:
6456 - find out if there are any GetIs and PutIs
6457 - find out if there are any floating or vector-typed temporaries
6458 */
6459
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)6460 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6461 /*OUT*/Bool* hasVorFtemps,
6462 IRSB* bb )
6463 {
6464 Int i, j;
6465 IRStmt* st;
6466 IRDirty* d;
6467 IRCAS* cas;
6468
6469 *hasGetIorPutI = False;
6470 *hasVorFtemps = False;
6471
6472 for (i = 0; i < bb->stmts_used; i++) {
6473 st = bb->stmts[i];
6474 switch (st->tag) {
6475 case Ist_AbiHint:
6476 vassert(isIRAtom(st->Ist.AbiHint.base));
6477 vassert(isIRAtom(st->Ist.AbiHint.nia));
6478 break;
6479 case Ist_PutI:
6480 *hasGetIorPutI = True;
6481 break;
6482 case Ist_WrTmp:
6483 if (st->Ist.WrTmp.data->tag == Iex_GetI)
6484 *hasGetIorPutI = True;
6485 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6486 case Ity_I1: case Ity_I8: case Ity_I16:
6487 case Ity_I32: case Ity_I64: case Ity_I128:
6488 break;
6489 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6490 case Ity_V128: case Ity_V256:
6491 *hasVorFtemps = True;
6492 break;
6493 case Ity_D32: case Ity_D64: case Ity_D128:
6494 *hasVorFtemps = True;
6495 break;
6496 default:
6497 goto bad;
6498 }
6499 break;
6500 case Ist_Put:
6501 vassert(isIRAtom(st->Ist.Put.data));
6502 break;
6503 case Ist_Store:
6504 vassert(isIRAtom(st->Ist.Store.addr));
6505 vassert(isIRAtom(st->Ist.Store.data));
6506 break;
6507 case Ist_StoreG: {
6508 IRStoreG* sg = st->Ist.StoreG.details;
6509 vassert(isIRAtom(sg->addr));
6510 vassert(isIRAtom(sg->data));
6511 vassert(isIRAtom(sg->guard));
6512 break;
6513 }
6514 case Ist_LoadG: {
6515 IRLoadG* lg = st->Ist.LoadG.details;
6516 vassert(isIRAtom(lg->addr));
6517 vassert(isIRAtom(lg->alt));
6518 vassert(isIRAtom(lg->guard));
6519 break;
6520 }
6521 case Ist_CAS:
6522 cas = st->Ist.CAS.details;
6523 vassert(isIRAtom(cas->addr));
6524 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6525 vassert(isIRAtom(cas->expdLo));
6526 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6527 vassert(isIRAtom(cas->dataLo));
6528 break;
6529 case Ist_LLSC:
6530 vassert(isIRAtom(st->Ist.LLSC.addr));
6531 if (st->Ist.LLSC.storedata)
6532 vassert(isIRAtom(st->Ist.LLSC.storedata));
6533 break;
6534 case Ist_Dirty:
6535 d = st->Ist.Dirty.details;
6536 vassert(isIRAtom(d->guard));
6537 for (j = 0; d->args[j]; j++) {
6538 IRExpr* arg = d->args[j];
6539 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
6540 vassert(isIRAtom(arg));
6541 }
6542 if (d->mFx != Ifx_None)
6543 vassert(isIRAtom(d->mAddr));
6544 break;
6545 case Ist_NoOp:
6546 case Ist_IMark:
6547 case Ist_MBE:
6548 break;
6549 case Ist_Exit:
6550 vassert(isIRAtom(st->Ist.Exit.guard));
6551 break;
6552 default:
6553 bad:
6554 ppIRStmt(st);
6555 vpanic("considerExpensives");
6556 }
6557 }
6558 }
6559
6560
6561 /* ---------------- The main iropt entry point. ---------------- */
6562
6563 /* exported from this file */
6564 /* Rules of the game:
6565
6566 - IRExpr/IRStmt trees should be treated as immutable, as they
6567 may get shared. So never change a field of such a tree node;
6568 instead construct and return a new one if needed.
6569 */
6570
6571
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Addr guest_addr,VexArch guest_arch)6572 IRSB* do_iropt_BB(
6573 IRSB* bb0,
6574 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6575 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6576 VexRegisterUpdates pxControl,
6577 Addr guest_addr,
6578 VexArch guest_arch
6579 )
6580 {
6581 static Int n_total = 0;
6582 static Int n_expensive = 0;
6583
6584 Bool hasGetIorPutI, hasVorFtemps;
6585 IRSB *bb, *bb2;
6586
6587 n_total++;
6588
6589 /* First flatten the block out, since all other
6590 phases assume flat code. */
6591
6592 bb = flatten_BB ( bb0 );
6593
6594 if (iropt_verbose) {
6595 vex_printf("\n========= FLAT\n\n" );
6596 ppIRSB(bb);
6597 }
6598
6599 /* If at level 0, stop now. */
6600 if (vex_control.iropt_level <= 0) return bb;
6601
6602 /* Now do a preliminary cleanup pass, and figure out if we also
6603 need to do 'expensive' optimisations. Expensive optimisations
6604 are deemed necessary if the block contains any GetIs or PutIs.
6605 If needed, do expensive transformations and then another cheap
6606 cleanup pass. */
6607
6608 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6609
6610 if (guest_arch == VexArchARM) {
6611 /* Translating Thumb2 code produces a lot of chaff. We have to
6612 work extra hard to get rid of it. */
6613 bb = cprop_BB(bb);
6614 bb = spec_helpers_BB ( bb, specHelper );
6615 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6616 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6617 }
6618 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6619 do_deadcode_BB( bb );
6620 }
6621
6622 if (vex_control.iropt_level > 1) {
6623
6624 /* Peer at what we have, to decide how much more effort to throw
6625 at it. */
6626 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6627
6628 if (hasVorFtemps && !hasGetIorPutI) {
6629 /* If any evidence of FP or Vector activity, CSE, as that
6630 tends to mop up all manner of lardy code to do with
6631 rounding modes. Don't bother if hasGetIorPutI since that
6632 case leads into the expensive transformations, which do
6633 CSE anyway. */
6634 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6635 do_deadcode_BB( bb );
6636 }
6637
6638 if (hasGetIorPutI) {
6639 Bool cses;
6640 n_expensive++;
6641 if (DEBUG_IROPT)
6642 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6643 bb = expensive_transformations( bb, pxControl );
6644 bb = cheap_transformations( bb, specHelper,
6645 preciseMemExnsFn, pxControl );
6646 /* Potentially common up GetIs */
6647 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6648 if (cses)
6649 bb = cheap_transformations( bb, specHelper,
6650 preciseMemExnsFn, pxControl );
6651 }
6652
6653 ///////////////////////////////////////////////////////////
6654 // BEGIN MSVC optimised code transformation hacks
6655 if (0)
6656 bb = do_MSVC_HACKS(bb);
6657 // END MSVC optimised code transformation hacks
6658 ///////////////////////////////////////////////////////////
6659
6660 /* Now have a go at unrolling simple (single-BB) loops. If
6661 successful, clean up the results as much as possible. */
6662
6663 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6664 if (bb2) {
6665 bb = cheap_transformations( bb2, specHelper,
6666 preciseMemExnsFn, pxControl );
6667 if (hasGetIorPutI) {
6668 bb = expensive_transformations( bb, pxControl );
6669 bb = cheap_transformations( bb, specHelper,
6670 preciseMemExnsFn, pxControl );
6671 } else {
6672 /* at least do CSE and dead code removal */
6673 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6674 do_deadcode_BB( bb );
6675 }
6676 if (0) vex_printf("vex iropt: unrolled a loop\n");
6677 }
6678
6679 }
6680
6681 return bb;
6682 }
6683
6684
6685 /*---------------------------------------------------------------*/
6686 /*--- end ir_opt.c ---*/
6687 /*---------------------------------------------------------------*/
6688