1 /* -*- mode: C; c-basic-offset: 3; -*- */
2
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2004-2013 OpenWorks LLP
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30
31 Neither the names of the U.S. Department of Energy nor the
32 University of California nor the names of its contributors may be
33 used to endorse or promote products derived from this software
34 without prior written permission.
35 */
36
37 #include "libvex_basictypes.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
40
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "ir_opt.h"
44
45
46 /* Set to 1 for lots of debugging output. */
47 #define DEBUG_IROPT 0
48
49 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
50 #define STATS_IROPT 0
51
52
53 /* What iropt does, 29 Dec 04.
54
55 It takes an IRSB and produces a new one with the same meaning,
56 defined thus:
57
58 After execution of the new BB, all guest state and guest memory is
59 the same as after execution of the original. This is true
60 regardless of how the block was exited (at the end vs side exit).
61
62 In addition, parts of the guest state will be identical to that
63 created by execution of the original at the following observation
64 points:
65
66 * In a dirty helper call, any parts of the guest state that the
67 helper states that it reads or modifies will be up to date.
68 Also, guest memory will be up to date. Parts of the guest state
69 not marked as being read or modified by the helper cannot be
70 assumed to be up-to-date at the point where the helper is called.
71
72 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
73 The guest state is only up to date only as explained above
74 (i.e. at SB exits and as specified by dirty helper call).
75 Also, the stack pointer register is up to date at memory
76 exception points (as this is needed for the stack extension
77 logic in m_signals.c).
78
79 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
80 Immediately prior to any load or store, those parts of the guest
81 state marked as requiring precise exceptions will be up to date.
82 Also, guest memory will be up to date. Parts of the guest state
83 not marked as requiring precise exceptions cannot be assumed to
84 be up-to-date at the point of the load/store.
85
86 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
87 Same as minimal, but all the guest state is up to date at memory
88 exception points.
89
90 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
91 Guest state is up to date at each instruction.
92
93 The relative order of loads and stores (including loads/stores of
94 guest memory done by dirty helpers annotated as such) is not
95 changed. However, the relative order of loads with no intervening
96 stores/modifies may be changed.
97
98 Transformation order
99 ~~~~~~~~~~~~~~~~~~~~
100
101 There are three levels of optimisation, controlled by
102 vex_control.iropt_level. Define first:
103
104 "Cheap transformations" are the following sequence:
105 * Redundant-Get removal
106 * Redundant-Put removal
107 * Constant propagation/folding
108 * Dead code removal
109 * Specialisation of clean helper functions
110 * Dead code removal
111
112 "Expensive transformations" are the following sequence:
113 * CSE
114 * Folding of add/sub chains
115 * Redundant-GetI removal
116 * Redundant-PutI removal
117 * Dead code removal
118
119 Then the transformations are as follows, as defined by
120 vex_control.iropt_level:
121
122 Level 0:
123 * Flatten into atomic form.
124
125 Level 1: the following sequence:
126 * Flatten into atomic form.
127 * Cheap transformations.
128
129 Level 2: the following sequence
130 * Flatten into atomic form.
131 * Cheap transformations.
132 * If block contains any floating or vector types, CSE.
133 * If block contains GetI or PutI, Expensive transformations.
134 * Try unrolling loops. Three possible outcomes:
135 - No effect: do nothing more.
136 - Unrolled a loop, and block does not contain GetI or PutI:
137 Do: * CSE
138 * Dead code removal
139 - Unrolled a loop, and block contains GetI or PutI:
140 Do: * Expensive transformations
141 * Cheap transformations
142 */
143
144 /* Implementation notes, 29 Dec 04.
145
146 TODO (important): I think rPutI removal ignores precise exceptions
147 and is therefore in a sense, wrong. In the sense that PutIs are
148 assumed not to write parts of the guest state that we need to have
149 up-to-date at loads/stores. So far on x86 guest that has not
150 mattered since indeed only the x87 FP registers and tags are
151 accessed using GetI/PutI, and there is no need so far for them to
152 be up to date at mem exception points. The rPutI pass should be
153 fixed.
154
155 TODO: improve pessimistic handling of precise exceptions
156 in the tree builder.
157
158 TODO: check interaction of rGetI and dirty helpers.
159
160 F64i constants are treated differently from other constants.
161 They are not regarded as atoms, and instead lifted off and
162 bound to temps. This allows them to participate in CSE, which
163 is important for getting good performance for x86 guest code.
164
165 CSE up F64 literals (already doing F64is)
166
167 CSE: consider carefully the requirement for precise exns
168 prior to making CSE any more aggressive. */
169
170
171 /*---------------------------------------------------------------*/
172 /*--- Finite mappery, of a sort ---*/
173 /*---------------------------------------------------------------*/
174
175 /* General map from HWord-sized thing HWord-sized thing. Could be by
176 hashing, but it's not clear whether or not this would really be any
177 faster. */
178
179 typedef
180 struct {
181 Bool* inuse;
182 HWord* key;
183 HWord* val;
184 Int size;
185 Int used;
186 }
187 HashHW;
188
newHHW(void)189 static HashHW* newHHW ( void )
190 {
191 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
192 h->size = 8;
193 h->used = 0;
194 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool));
195 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord));
196 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord));
197 return h;
198 }
199
200
201 /* Look up key in the map. */
202
lookupHHW(HashHW * h,HWord * val,HWord key)203 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
204 {
205 Int i;
206 /* vex_printf("lookupHHW(%llx)\n", key ); */
207 for (i = 0; i < h->used; i++) {
208 if (h->inuse[i] && h->key[i] == key) {
209 if (val)
210 *val = h->val[i];
211 return True;
212 }
213 }
214 return False;
215 }
216
217
218 /* Add key->val to the map. Replaces any existing binding for key. */
219
addToHHW(HashHW * h,HWord key,HWord val)220 static void addToHHW ( HashHW* h, HWord key, HWord val )
221 {
222 Int i, j;
223 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
224
225 /* Find and replace existing binding, if any. */
226 for (i = 0; i < h->used; i++) {
227 if (h->inuse[i] && h->key[i] == key) {
228 h->val[i] = val;
229 return;
230 }
231 }
232
233 /* Ensure a space is available. */
234 if (h->used == h->size) {
235 /* Copy into arrays twice the size. */
236 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
237 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
238 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
239 for (i = j = 0; i < h->size; i++) {
240 if (!h->inuse[i]) continue;
241 inuse2[j] = True;
242 key2[j] = h->key[i];
243 val2[j] = h->val[i];
244 j++;
245 }
246 h->used = j;
247 h->size *= 2;
248 h->inuse = inuse2;
249 h->key = key2;
250 h->val = val2;
251 }
252
253 /* Finally, add it. */
254 vassert(h->used < h->size);
255 h->inuse[h->used] = True;
256 h->key[h->used] = key;
257 h->val[h->used] = val;
258 h->used++;
259 }
260
261
262 /*---------------------------------------------------------------*/
263 /*--- Flattening out a BB into atomic SSA form ---*/
264 /*---------------------------------------------------------------*/
265
266 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
267 copies made by flattening. If in doubt return False. */
268
isFlat(IRExpr * e)269 static Bool isFlat ( IRExpr* e )
270 {
271 if (e->tag == Iex_Get)
272 return True;
273 if (e->tag == Iex_Binop)
274 return toBool( isIRAtom(e->Iex.Binop.arg1)
275 && isIRAtom(e->Iex.Binop.arg2) );
276 if (e->tag == Iex_Load)
277 return isIRAtom(e->Iex.Load.addr);
278 return False;
279 }
280
281 /* Flatten out 'ex' so it is atomic, returning a new expression with
282 the same value, after having appended extra IRTemp assignments to
283 the end of 'bb'. */
284
flatten_Expr(IRSB * bb,IRExpr * ex)285 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
286 {
287 Int i;
288 IRExpr** newargs;
289 IRType ty = typeOfIRExpr(bb->tyenv, ex);
290 IRTemp t1;
291
292 switch (ex->tag) {
293
294 case Iex_GetI:
295 t1 = newIRTemp(bb->tyenv, ty);
296 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
297 IRExpr_GetI(ex->Iex.GetI.descr,
298 flatten_Expr(bb, ex->Iex.GetI.ix),
299 ex->Iex.GetI.bias)));
300 return IRExpr_RdTmp(t1);
301
302 case Iex_Get:
303 t1 = newIRTemp(bb->tyenv, ty);
304 addStmtToIRSB(bb,
305 IRStmt_WrTmp(t1, ex));
306 return IRExpr_RdTmp(t1);
307
308 case Iex_Qop: {
309 IRQop* qop = ex->Iex.Qop.details;
310 t1 = newIRTemp(bb->tyenv, ty);
311 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
312 IRExpr_Qop(qop->op,
313 flatten_Expr(bb, qop->arg1),
314 flatten_Expr(bb, qop->arg2),
315 flatten_Expr(bb, qop->arg3),
316 flatten_Expr(bb, qop->arg4))));
317 return IRExpr_RdTmp(t1);
318 }
319
320 case Iex_Triop: {
321 IRTriop* triop = ex->Iex.Triop.details;
322 t1 = newIRTemp(bb->tyenv, ty);
323 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
324 IRExpr_Triop(triop->op,
325 flatten_Expr(bb, triop->arg1),
326 flatten_Expr(bb, triop->arg2),
327 flatten_Expr(bb, triop->arg3))));
328 return IRExpr_RdTmp(t1);
329 }
330
331 case Iex_Binop:
332 t1 = newIRTemp(bb->tyenv, ty);
333 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
334 IRExpr_Binop(ex->Iex.Binop.op,
335 flatten_Expr(bb, ex->Iex.Binop.arg1),
336 flatten_Expr(bb, ex->Iex.Binop.arg2))));
337 return IRExpr_RdTmp(t1);
338
339 case Iex_Unop:
340 t1 = newIRTemp(bb->tyenv, ty);
341 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
342 IRExpr_Unop(ex->Iex.Unop.op,
343 flatten_Expr(bb, ex->Iex.Unop.arg))));
344 return IRExpr_RdTmp(t1);
345
346 case Iex_Load:
347 t1 = newIRTemp(bb->tyenv, ty);
348 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
349 IRExpr_Load(ex->Iex.Load.end,
350 ex->Iex.Load.ty,
351 flatten_Expr(bb, ex->Iex.Load.addr))));
352 return IRExpr_RdTmp(t1);
353
354 case Iex_CCall:
355 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
356 for (i = 0; newargs[i]; i++)
357 newargs[i] = flatten_Expr(bb, newargs[i]);
358 t1 = newIRTemp(bb->tyenv, ty);
359 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
360 IRExpr_CCall(ex->Iex.CCall.cee,
361 ex->Iex.CCall.retty,
362 newargs)));
363 return IRExpr_RdTmp(t1);
364
365 case Iex_ITE:
366 t1 = newIRTemp(bb->tyenv, ty);
367 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
368 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
369 flatten_Expr(bb, ex->Iex.ITE.iftrue),
370 flatten_Expr(bb, ex->Iex.ITE.iffalse))));
371 return IRExpr_RdTmp(t1);
372
373 case Iex_Const:
374 /* Lift F64i constants out onto temps so they can be CSEd
375 later. */
376 if (ex->Iex.Const.con->tag == Ico_F64i) {
377 t1 = newIRTemp(bb->tyenv, ty);
378 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
379 IRExpr_Const(ex->Iex.Const.con)));
380 return IRExpr_RdTmp(t1);
381 } else {
382 /* Leave all other constants alone. */
383 return ex;
384 }
385
386 case Iex_RdTmp:
387 return ex;
388
389 default:
390 vex_printf("\n");
391 ppIRExpr(ex);
392 vex_printf("\n");
393 vpanic("flatten_Expr");
394 }
395 }
396
397
398 /* Append a completely flattened form of 'st' to the end of 'bb'. */
399
flatten_Stmt(IRSB * bb,IRStmt * st)400 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
401 {
402 Int i;
403 IRExpr *e1, *e2, *e3, *e4, *e5;
404 IRDirty *d, *d2;
405 IRCAS *cas, *cas2;
406 IRPutI *puti, *puti2;
407 IRLoadG *lg;
408 IRStoreG *sg;
409 switch (st->tag) {
410 case Ist_Put:
411 if (isIRAtom(st->Ist.Put.data)) {
412 /* optimisation to reduce the amount of heap wasted
413 by the flattener */
414 addStmtToIRSB(bb, st);
415 } else {
416 /* general case, always correct */
417 e1 = flatten_Expr(bb, st->Ist.Put.data);
418 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
419 }
420 break;
421 case Ist_PutI:
422 puti = st->Ist.PutI.details;
423 e1 = flatten_Expr(bb, puti->ix);
424 e2 = flatten_Expr(bb, puti->data);
425 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
426 addStmtToIRSB(bb, IRStmt_PutI(puti2));
427 break;
428 case Ist_WrTmp:
429 if (isFlat(st->Ist.WrTmp.data)) {
430 /* optimisation, to reduce the number of tmp-tmp
431 copies generated */
432 addStmtToIRSB(bb, st);
433 } else {
434 /* general case, always correct */
435 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
436 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
437 }
438 break;
439 case Ist_Store:
440 e1 = flatten_Expr(bb, st->Ist.Store.addr);
441 e2 = flatten_Expr(bb, st->Ist.Store.data);
442 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
443 break;
444 case Ist_StoreG:
445 sg = st->Ist.StoreG.details;
446 e1 = flatten_Expr(bb, sg->addr);
447 e2 = flatten_Expr(bb, sg->data);
448 e3 = flatten_Expr(bb, sg->guard);
449 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
450 break;
451 case Ist_LoadG:
452 lg = st->Ist.LoadG.details;
453 e1 = flatten_Expr(bb, lg->addr);
454 e2 = flatten_Expr(bb, lg->alt);
455 e3 = flatten_Expr(bb, lg->guard);
456 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
457 e1, e2, e3));
458 break;
459 case Ist_CAS:
460 cas = st->Ist.CAS.details;
461 e1 = flatten_Expr(bb, cas->addr);
462 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
463 e3 = flatten_Expr(bb, cas->expdLo);
464 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
465 e5 = flatten_Expr(bb, cas->dataLo);
466 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
467 e1, e2, e3, e4, e5 );
468 addStmtToIRSB(bb, IRStmt_CAS(cas2));
469 break;
470 case Ist_LLSC:
471 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
472 e2 = st->Ist.LLSC.storedata
473 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
474 : NULL;
475 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
476 st->Ist.LLSC.result, e1, e2));
477 break;
478 case Ist_Dirty:
479 d = st->Ist.Dirty.details;
480 d2 = emptyIRDirty();
481 *d2 = *d;
482 d2->args = shallowCopyIRExprVec(d2->args);
483 if (d2->mFx != Ifx_None) {
484 d2->mAddr = flatten_Expr(bb, d2->mAddr);
485 } else {
486 vassert(d2->mAddr == NULL);
487 }
488 d2->guard = flatten_Expr(bb, d2->guard);
489 for (i = 0; d2->args[i]; i++) {
490 IRExpr* arg = d2->args[i];
491 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
492 d2->args[i] = flatten_Expr(bb, arg);
493 }
494 addStmtToIRSB(bb, IRStmt_Dirty(d2));
495 break;
496 case Ist_NoOp:
497 case Ist_MBE:
498 case Ist_IMark:
499 addStmtToIRSB(bb, st);
500 break;
501 case Ist_AbiHint:
502 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
503 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
504 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
505 break;
506 case Ist_Exit:
507 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
508 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
509 st->Ist.Exit.dst,
510 st->Ist.Exit.offsIP));
511 break;
512 default:
513 vex_printf("\n");
514 ppIRStmt(st);
515 vex_printf("\n");
516 vpanic("flatten_Stmt");
517 }
518 }
519
520
flatten_BB(IRSB * in)521 static IRSB* flatten_BB ( IRSB* in )
522 {
523 Int i;
524 IRSB* out;
525 out = emptyIRSB();
526 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
527 for (i = 0; i < in->stmts_used; i++)
528 if (in->stmts[i])
529 flatten_Stmt( out, in->stmts[i] );
530 out->next = flatten_Expr( out, in->next );
531 out->jumpkind = in->jumpkind;
532 out->offsIP = in->offsIP;
533 return out;
534 }
535
536
537 /*---------------------------------------------------------------*/
538 /*--- In-place removal of redundant GETs ---*/
539 /*---------------------------------------------------------------*/
540
541 /* Scan forwards, building up an environment binding (min offset, max
542 offset) pairs to values, which will either be temps or constants.
543
544 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
545 env and if it matches, replace the Get with the stored value. If
546 there is no match, add a (minoff,maxoff) :-> t binding.
547
548 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
549 any binding which fully or partially overlaps with (minoff,maxoff).
550 Then add a new (minoff,maxoff) :-> t or c binding. */
551
552 /* Extract the min/max offsets from a guest state array descriptor. */
553
554 inline
getArrayBounds(IRRegArray * descr,UInt * minoff,UInt * maxoff)555 static void getArrayBounds ( IRRegArray* descr,
556 UInt* minoff, UInt* maxoff )
557 {
558 *minoff = descr->base;
559 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
560 vassert((*minoff & ~0xFFFF) == 0);
561 vassert((*maxoff & ~0xFFFF) == 0);
562 vassert(*minoff <= *maxoff);
563 }
564
565 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
566
mk_key_GetPut(Int offset,IRType ty)567 static UInt mk_key_GetPut ( Int offset, IRType ty )
568 {
569 /* offset should fit in 16 bits. */
570 UInt minoff = offset;
571 UInt maxoff = minoff + sizeofIRType(ty) - 1;
572 vassert((minoff & ~0xFFFF) == 0);
573 vassert((maxoff & ~0xFFFF) == 0);
574 return (minoff << 16) | maxoff;
575 }
576
mk_key_GetIPutI(IRRegArray * descr)577 static UInt mk_key_GetIPutI ( IRRegArray* descr )
578 {
579 UInt minoff, maxoff;
580 getArrayBounds( descr, &minoff, &maxoff );
581 vassert((minoff & ~0xFFFF) == 0);
582 vassert((maxoff & ~0xFFFF) == 0);
583 return (minoff << 16) | maxoff;
584 }
585
586 /* Supposing h has keys of the form generated by mk_key_GetPut and
587 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588 .. k_hi).
589 */
invalidateOverlaps(HashHW * h,UInt k_lo,UInt k_hi)590 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
591 {
592 Int j;
593 UInt e_lo, e_hi;
594 vassert(k_lo <= k_hi);
595 /* invalidate any env entries which in any way overlap (k_lo
596 .. k_hi) */
597 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
598
599 for (j = 0; j < h->used; j++) {
600 if (!h->inuse[j])
601 continue;
602 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
603 e_hi = ((UInt)h->key[j]) & 0xFFFF;
604 vassert(e_lo <= e_hi);
605 if (e_hi < k_lo || k_hi < e_lo)
606 continue; /* no overlap possible */
607 else
608 /* overlap; invalidate */
609 h->inuse[j] = False;
610 }
611 }
612
613
redundant_get_removal_BB(IRSB * bb)614 static void redundant_get_removal_BB ( IRSB* bb )
615 {
616 HashHW* env = newHHW();
617 UInt key = 0; /* keep gcc -O happy */
618 Int i, j;
619 HWord val;
620
621 for (i = 0; i < bb->stmts_used; i++) {
622 IRStmt* st = bb->stmts[i];
623
624 if (st->tag == Ist_NoOp)
625 continue;
626
627 /* Deal with Gets */
628 if (st->tag == Ist_WrTmp
629 && st->Ist.WrTmp.data->tag == Iex_Get) {
630 /* st is 't = Get(...)'. Look up in the environment and see
631 if the Get can be replaced. */
632 IRExpr* get = st->Ist.WrTmp.data;
633 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
634 get->Iex.Get.ty );
635 if (lookupHHW(env, &val, (HWord)key)) {
636 /* found it */
637 /* Note, we could do better here. If the types are
638 different we don't do the substitution, since doing so
639 could lead to invalidly-typed IR. An improvement would
640 be to stick in a reinterpret-style cast, although that
641 would make maintaining flatness more difficult. */
642 IRExpr* valE = (IRExpr*)val;
643 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
644 == st->Ist.WrTmp.data->Iex.Get.ty );
645 if (typesOK && DEBUG_IROPT) {
646 vex_printf("rGET: "); ppIRExpr(get);
647 vex_printf(" -> "); ppIRExpr(valE);
648 vex_printf("\n");
649 }
650 if (typesOK)
651 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
652 } else {
653 /* Not found, but at least we know that t and the Get(...)
654 are now associated. So add a binding to reflect that
655 fact. */
656 addToHHW( env, (HWord)key,
657 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
658 }
659 }
660
661 /* Deal with Puts: invalidate any env entries overlapped by this
662 Put */
663 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
664 UInt k_lo, k_hi;
665 if (st->tag == Ist_Put) {
666 key = mk_key_GetPut( st->Ist.Put.offset,
667 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
668 } else {
669 vassert(st->tag == Ist_PutI);
670 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671 }
672
673 k_lo = (key >> 16) & 0xFFFF;
674 k_hi = key & 0xFFFF;
675 invalidateOverlaps(env, k_lo, k_hi);
676 }
677 else
678 if (st->tag == Ist_Dirty) {
679 /* Deal with dirty helpers which write or modify guest state.
680 Invalidate the entire env. We could do a lot better
681 here. */
682 IRDirty* d = st->Ist.Dirty.details;
683 Bool writes = False;
684 for (j = 0; j < d->nFxState; j++) {
685 if (d->fxState[j].fx == Ifx_Modify
686 || d->fxState[j].fx == Ifx_Write)
687 writes = True;
688 }
689 if (writes) {
690 /* dump the entire env (not clever, but correct ...) */
691 for (j = 0; j < env->used; j++)
692 env->inuse[j] = False;
693 if (0) vex_printf("rGET: trash env due to dirty helper\n");
694 }
695 }
696
697 /* add this one to the env, if appropriate */
698 if (st->tag == Ist_Put) {
699 vassert(isIRAtom(st->Ist.Put.data));
700 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701 }
702
703 } /* for (i = 0; i < bb->stmts_used; i++) */
704
705 }
706
707
708 /*---------------------------------------------------------------*/
709 /*--- In-place removal of redundant PUTs ---*/
710 /*---------------------------------------------------------------*/
711
712 /* Find any Get uses in st and invalidate any partially or fully
713 overlapping ranges listed in env. Due to the flattening phase, the
714 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
715
handle_gets_Stmt(HashHW * env,IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)716 static void handle_gets_Stmt (
717 HashHW* env,
718 IRStmt* st,
719 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
720 VexRegisterUpdates pxControl
721 )
722 {
723 Int j;
724 UInt key = 0; /* keep gcc -O happy */
725 Bool isGet;
726 Bool memRW = False;
727 IRExpr* e;
728
729 switch (st->tag) {
730
731 /* This is the only interesting case. Deal with Gets in the RHS
732 expression. */
733 case Ist_WrTmp:
734 e = st->Ist.WrTmp.data;
735 switch (e->tag) {
736 case Iex_Get:
737 isGet = True;
738 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
739 break;
740 case Iex_GetI:
741 isGet = True;
742 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
743 break;
744 case Iex_Load:
745 isGet = False;
746 memRW = True;
747 break;
748 default:
749 isGet = False;
750 }
751 if (isGet) {
752 UInt k_lo, k_hi;
753 k_lo = (key >> 16) & 0xFFFF;
754 k_hi = key & 0xFFFF;
755 invalidateOverlaps(env, k_lo, k_hi);
756 }
757 break;
758
759 /* Be very conservative for dirty helper calls; dump the entire
760 environment. The helper might read guest state, in which
761 case it needs to be flushed first. Also, the helper might
762 access guest memory, in which case all parts of the guest
763 state requiring precise exceptions needs to be flushed. The
764 crude solution is just to flush everything; we could easily
765 enough do a lot better if needed. */
766 /* Probably also overly-conservative, but also dump everything
767 if we hit a memory bus event (fence, lock, unlock). Ditto
768 AbiHints, CASs, LLs and SCs. */
769 case Ist_AbiHint:
770 vassert(isIRAtom(st->Ist.AbiHint.base));
771 vassert(isIRAtom(st->Ist.AbiHint.nia));
772 /* fall through */
773 case Ist_MBE:
774 case Ist_Dirty:
775 case Ist_CAS:
776 case Ist_LLSC:
777 for (j = 0; j < env->used; j++)
778 env->inuse[j] = False;
779 break;
780
781 /* all other cases are boring. */
782 case Ist_Store:
783 vassert(isIRAtom(st->Ist.Store.addr));
784 vassert(isIRAtom(st->Ist.Store.data));
785 memRW = True;
786 break;
787 case Ist_StoreG: {
788 IRStoreG* sg = st->Ist.StoreG.details;
789 vassert(isIRAtom(sg->addr));
790 vassert(isIRAtom(sg->data));
791 vassert(isIRAtom(sg->guard));
792 memRW = True;
793 break;
794 }
795 case Ist_LoadG: {
796 IRLoadG* lg = st->Ist.LoadG.details;
797 vassert(isIRAtom(lg->addr));
798 vassert(isIRAtom(lg->alt));
799 vassert(isIRAtom(lg->guard));
800 memRW = True;
801 break;
802 }
803 case Ist_Exit:
804 vassert(isIRAtom(st->Ist.Exit.guard));
805 break;
806
807 case Ist_Put:
808 vassert(isIRAtom(st->Ist.Put.data));
809 break;
810
811 case Ist_PutI:
812 vassert(isIRAtom(st->Ist.PutI.details->ix));
813 vassert(isIRAtom(st->Ist.PutI.details->data));
814 break;
815
816 case Ist_NoOp:
817 case Ist_IMark:
818 break;
819
820 default:
821 vex_printf("\n");
822 ppIRStmt(st);
823 vex_printf("\n");
824 vpanic("handle_gets_Stmt");
825 }
826
827 if (memRW) {
828 /* This statement accesses memory. So we might need to dump all parts
829 of the environment corresponding to guest state that may not
830 be reordered with respect to memory references. That means
831 at least the stack pointer. */
832 switch (pxControl) {
833 case VexRegUpdAllregsAtMemAccess:
834 /* Precise exceptions required at mem access.
835 Flush all guest state. */
836 for (j = 0; j < env->used; j++)
837 env->inuse[j] = False;
838 break;
839 case VexRegUpdSpAtMemAccess:
840 /* We need to dump the stack pointer
841 (needed for stack extension in m_signals.c).
842 preciseMemExnsFn will use vex_control.iropt_register_updates
843 to verify only the sp is to be checked. */
844 /* fallthrough */
845 case VexRegUpdUnwindregsAtMemAccess:
846 for (j = 0; j < env->used; j++) {
847 if (!env->inuse[j])
848 continue;
849 /* Just flush the minimal amount required, as computed by
850 preciseMemExnsFn. */
851 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
852 HWord k_hi = env->key[j] & 0xFFFF;
853 if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
854 env->inuse[j] = False;
855 }
856 break;
857 case VexRegUpdAllregsAtEachInsn:
858 // VexRegUpdAllregsAtEachInsn cannot happen here.
859 // fall through
860 case VexRegUpd_INVALID:
861 default:
862 vassert(0);
863 }
864 } /* if (memRW) */
865
866 }
867
868
869 /* Scan backwards, building up a set of (min offset, max
870 offset) pairs, indicating those parts of the guest state
871 for which the next event is a write.
872
873 On seeing a conditional exit, empty the set.
874
875 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
876 completely within the set, remove the Put. Otherwise, add
877 (minoff,maxoff) to the set.
878
879 On seeing 'Get (minoff,maxoff)', remove any part of the set
880 overlapping (minoff,maxoff). The same has to happen for any events
881 which implicitly read parts of the guest state: dirty helper calls
882 and loads/stores.
883 */
884
redundant_put_removal_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)885 static void redundant_put_removal_BB (
886 IRSB* bb,
887 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
888 VexRegisterUpdates pxControl
889 )
890 {
891 Int i, j;
892 Bool isPut;
893 IRStmt* st;
894 UInt key = 0; /* keep gcc -O happy */
895
896 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
897
898 HashHW* env = newHHW();
899
900 /* Initialise the running env with the fact that the final exit
901 writes the IP (or, whatever it claims to write. We don't
902 care.) */
903 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
904 addToHHW(env, (HWord)key, 0);
905
906 /* And now scan backwards through the statements. */
907 for (i = bb->stmts_used-1; i >= 0; i--) {
908 st = bb->stmts[i];
909
910 if (st->tag == Ist_NoOp)
911 continue;
912
913 /* Deal with conditional exits. */
914 if (st->tag == Ist_Exit) {
915 //Bool re_add;
916 /* Need to throw out from the env, any part of it which
917 doesn't overlap with the guest state written by this exit.
918 Since the exit only writes one section, it's simplest to
919 do this: (1) check whether env contains a write that
920 completely overlaps the write done by this exit; (2) empty
921 out env; and (3) if (1) was true, add the write done by
922 this exit.
923
924 To make (1) a bit simpler, merely search for a write that
925 exactly matches the one done by this exit. That's safe
926 because it will fail as often or more often than a full
927 overlap check, and failure to find an overlapping write in
928 env is the safe case (we just nuke env if that
929 happens). */
930 //vassert(isIRAtom(st->Ist.Exit.guard));
931 /* (1) */
932 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
933 // typeOfIRConst(st->Ist.Exit.dst));
934 //re_add = lookupHHW(env, NULL, key);
935 /* (2) */
936 for (j = 0; j < env->used; j++)
937 env->inuse[j] = False;
938 /* (3) */
939 //if (0 && re_add)
940 // addToHHW(env, (HWord)key, 0);
941 continue;
942 }
943
944 /* Deal with Puts */
945 switch (st->tag) {
946 case Ist_Put:
947 isPut = True;
948 key = mk_key_GetPut( st->Ist.Put.offset,
949 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
950 vassert(isIRAtom(st->Ist.Put.data));
951 break;
952 case Ist_PutI:
953 isPut = True;
954 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
955 vassert(isIRAtom(st->Ist.PutI.details->ix));
956 vassert(isIRAtom(st->Ist.PutI.details->data));
957 break;
958 default:
959 isPut = False;
960 }
961 if (isPut && st->tag != Ist_PutI) {
962 /* See if any single entry in env overlaps this Put. This is
963 simplistic in that the transformation is valid if, say, two
964 or more entries in the env overlap this Put, but the use of
965 lookupHHW will only find a single entry which exactly
966 overlaps this Put. This is suboptimal but safe. */
967 if (lookupHHW(env, NULL, (HWord)key)) {
968 /* This Put is redundant because a later one will overwrite
969 it. So NULL (nop) it out. */
970 if (DEBUG_IROPT) {
971 vex_printf("rPUT: "); ppIRStmt(st);
972 vex_printf("\n");
973 }
974 bb->stmts[i] = IRStmt_NoOp();
975 } else {
976 /* We can't demonstrate that this Put is redundant, so add it
977 to the running collection. */
978 addToHHW(env, (HWord)key, 0);
979 }
980 continue;
981 }
982
983 /* Deal with Gets. These remove bits of the environment since
984 appearance of a Get means that the next event for that slice
985 of the guest state is no longer a write, but a read. Also
986 deals with implicit reads of guest state needed to maintain
987 precise exceptions. */
988 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
989 }
990 }
991
992
993 /*---------------------------------------------------------------*/
994 /*--- Constant propagation and folding ---*/
995 /*---------------------------------------------------------------*/
996
997 #if STATS_IROPT
998 /* How often sameIRExprs was invoked */
999 static UInt invocation_count;
1000 /* How often sameIRExprs recursed through IRTemp assignments */
1001 static UInt recursion_count;
1002 /* How often sameIRExprs found identical IRExprs */
1003 static UInt success_count;
1004 /* How often recursing through assignments to IRTemps helped
1005 establishing equality. */
1006 static UInt recursion_success_count;
1007 /* Whether or not recursing through an IRTemp assignment helped
1008 establishing IRExpr equality for a given sameIRExprs invocation. */
1009 static Bool recursion_helped;
1010 /* Whether or not a given sameIRExprs invocation recursed through an
1011 IRTemp assignment */
1012 static Bool recursed;
1013 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1014 static UInt max_nodes_visited;
1015 #endif /* STATS_IROPT */
1016
1017 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1018 static UInt num_nodes_visited;
1019
1020 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1021 This is to guard against performance degradation by visiting large
1022 trees without success. */
1023 #define NODE_LIMIT 30
1024
1025
1026 /* The env in this section is a map from IRTemp to IRExpr*,
1027 that is, an array indexed by IRTemp. */
1028
1029 /* Do both expressions compute the same value? The answer is generally
1030 conservative, i.e. it will report that the expressions do not compute
1031 the same value when in fact they do. The reason is that we do not
1032 keep track of changes in the guest state and memory. Thusly, two
1033 Get's, GetI's or Load's, even when accessing the same location, will be
1034 assumed to compute different values. After all the accesses may happen
1035 at different times and the guest state / memory can have changed in
1036 the meantime.
1037
1038 XXX IMPORTANT XXX the two expressions must have the same IR type.
1039 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1040
1041 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1042 wrapper that deals with the common tags-don't-match case, and a
1043 slower out of line general case. Saves a few insns. */
1044
1045 __attribute__((noinline))
1046 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1047
1048 inline
sameIRExprs_aux(IRExpr ** env,IRExpr * e1,IRExpr * e2)1049 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1050 {
1051 if (e1->tag != e2->tag) return False;
1052 return sameIRExprs_aux2(env, e1, e2);
1053 }
1054
1055 __attribute__((noinline))
sameIRExprs_aux2(IRExpr ** env,IRExpr * e1,IRExpr * e2)1056 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1057 {
1058 if (num_nodes_visited++ > NODE_LIMIT) return False;
1059
1060 switch (e1->tag) {
1061 case Iex_RdTmp:
1062 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1063
1064 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1065 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1066 env[e2->Iex.RdTmp.tmp]);
1067 #if STATS_IROPT
1068 recursed = True;
1069 if (same) recursion_helped = True;
1070 #endif
1071 return same;
1072 }
1073 return False;
1074
1075 case Iex_Get:
1076 case Iex_GetI:
1077 case Iex_Load:
1078 /* Guest state / memory could have changed in the meantime. */
1079 return False;
1080
1081 case Iex_Binop:
1082 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1084 e2->Iex.Binop.arg1 )
1085 && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1086 e2->Iex.Binop.arg2 ));
1087
1088 case Iex_Unop:
1089 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1090 && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1091 e2->Iex.Unop.arg ));
1092
1093 case Iex_Const: {
1094 IRConst *c1 = e1->Iex.Const.con;
1095 IRConst *c2 = e2->Iex.Const.con;
1096 vassert(c1->tag == c2->tag);
1097 switch (c1->tag) {
1098 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
1099 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
1100 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
1101 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
1102 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
1103 default: break;
1104 }
1105 return False;
1106 }
1107
1108 case Iex_Triop: {
1109 IRTriop *tri1 = e1->Iex.Triop.details;
1110 IRTriop *tri2 = e2->Iex.Triop.details;
1111 return toBool( tri1->op == tri2->op
1112 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1113 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1114 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115 }
1116
1117 case Iex_ITE:
1118 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond,
1119 e2->Iex.ITE.cond )
1120 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1121 e2->Iex.ITE.iftrue )
1122 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1123 e2->Iex.ITE.iffalse ));
1124
1125 default:
1126 /* Not very likely to be "same". */
1127 break;
1128 }
1129
1130 return False;
1131 }
1132
1133 inline
sameIRExprs(IRExpr ** env,IRExpr * e1,IRExpr * e2)1134 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1135 {
1136 Bool same;
1137
1138 num_nodes_visited = 0;
1139 same = sameIRExprs_aux(env, e1, e2);
1140
1141 #if STATS_IROPT
1142 ++invocation_count;
1143 if (recursed) ++recursion_count;
1144 success_count += same;
1145 if (same && recursion_helped)
1146 ++recursion_success_count;
1147 if (num_nodes_visited > max_nodes_visited)
1148 max_nodes_visited = num_nodes_visited;
1149 recursed = False; /* reset */
1150 recursion_helped = False; /* reset */
1151 #endif /* STATS_IROPT */
1152
1153 return same;
1154 }
1155
1156
1157 /* Debugging-only hack (not used in production runs): make a guess
1158 whether sameIRExprs might assert due to the two args being of
1159 different types. If in doubt return False. Is only used when
1160 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1161 Bad because it duplicates functionality from typeOfIRExpr. See
1162 comment on the single use point below for rationale. */
1163 static
debug_only_hack_sameIRExprs_might_assert(IRExpr * e1,IRExpr * e2)1164 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1165 {
1166 if (e1->tag != e2->tag) return False;
1167 switch (e1->tag) {
1168 case Iex_Const: {
1169 /* The only interesting case */
1170 IRConst *c1 = e1->Iex.Const.con;
1171 IRConst *c2 = e2->Iex.Const.con;
1172 return c1->tag != c2->tag;
1173 }
1174 default:
1175 break;
1176 }
1177 return False;
1178 }
1179
1180
1181 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
isZeroU32(IRExpr * e)1182 static Bool isZeroU32 ( IRExpr* e )
1183 {
1184 return toBool( e->tag == Iex_Const
1185 && e->Iex.Const.con->tag == Ico_U32
1186 && e->Iex.Const.con->Ico.U32 == 0);
1187 }
1188
1189 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1190 Currently unused; commented out to avoid compiler warning */
1191 #if 0
1192 static Bool isZeroU64 ( IRExpr* e )
1193 {
1194 return toBool( e->tag == Iex_Const
1195 && e->Iex.Const.con->tag == Ico_U64
1196 && e->Iex.Const.con->Ico.U64 == 0);
1197 }
1198 #endif
1199
1200 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
isZeroV128(IRExpr * e)1201 static Bool isZeroV128 ( IRExpr* e )
1202 {
1203 return toBool( e->tag == Iex_Const
1204 && e->Iex.Const.con->tag == Ico_V128
1205 && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 }
1207
1208 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
isZeroV256(IRExpr * e)1209 static Bool isZeroV256 ( IRExpr* e )
1210 {
1211 return toBool( e->tag == Iex_Const
1212 && e->Iex.Const.con->tag == Ico_V256
1213 && e->Iex.Const.con->Ico.V256 == 0x00000000);
1214 }
1215
1216 /* Is this an integer constant with value 0 ? */
isZeroU(IRExpr * e)1217 static Bool isZeroU ( IRExpr* e )
1218 {
1219 if (e->tag != Iex_Const) return False;
1220 switch (e->Iex.Const.con->tag) {
1221 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0);
1222 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0);
1223 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0);
1224 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0);
1225 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0);
1226 default: vpanic("isZeroU");
1227 }
1228 }
1229
1230 /* Is this an integer constant with value 1---1b ? */
isOnesU(IRExpr * e)1231 static Bool isOnesU ( IRExpr* e )
1232 {
1233 if (e->tag != Iex_Const) return False;
1234 switch (e->Iex.Const.con->tag) {
1235 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF);
1236 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1237 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32
1238 == 0xFFFFFFFF);
1239 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64
1240 == 0xFFFFFFFFFFFFFFFFULL);
1241 default: ppIRExpr(e); vpanic("isOnesU");
1242 }
1243 }
1244
notBool(Bool b)1245 static Bool notBool ( Bool b )
1246 {
1247 if (b == True) return False;
1248 if (b == False) return True;
1249 vpanic("notBool");
1250 }
1251
1252 /* Make a zero which has the same type as the result of the given
1253 primop. */
mkZeroOfPrimopResultType(IROp op)1254 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1255 {
1256 switch (op) {
1257 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1258 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
1259 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1260 case Iop_Sub32:
1261 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1262 case Iop_And64:
1263 case Iop_Sub64:
1264 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1265 case Iop_XorV128:
1266 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1267 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1268 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1269 }
1270 }
1271
1272 /* Make a value containing all 1-bits, which has the same type as the
1273 result of the given primop. */
mkOnesOfPrimopResultType(IROp op)1274 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1275 {
1276 switch (op) {
1277 case Iop_CmpEQ32:
1278 case Iop_CmpEQ64:
1279 return IRExpr_Const(IRConst_U1(toBool(1)));
1280 case Iop_Or8:
1281 return IRExpr_Const(IRConst_U8(0xFF));
1282 case Iop_Or16:
1283 return IRExpr_Const(IRConst_U16(0xFFFF));
1284 case Iop_Or32:
1285 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1286 case Iop_CmpEQ8x8:
1287 case Iop_Or64:
1288 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1289 case Iop_CmpEQ8x16:
1290 case Iop_CmpEQ16x8:
1291 case Iop_CmpEQ32x4:
1292 return IRExpr_Const(IRConst_V128(0xFFFF));
1293 default:
1294 ppIROp(op);
1295 vpanic("mkOnesOfPrimopResultType: bad primop");
1296 }
1297 }
1298
1299 /* Helpers for folding Clz32/64. */
fold_Clz64(ULong value)1300 static UInt fold_Clz64 ( ULong value )
1301 {
1302 UInt i;
1303 vassert(value != 0ULL); /* no defined semantics for arg==0 */
1304 for (i = 0; i < 64; ++i) {
1305 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1306 }
1307 vassert(0);
1308 /*NOTREACHED*/
1309 return 0;
1310 }
1311
fold_Clz32(UInt value)1312 static UInt fold_Clz32 ( UInt value )
1313 {
1314 UInt i;
1315 vassert(value != 0); /* no defined semantics for arg==0 */
1316 for (i = 0; i < 32; ++i) {
1317 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1318 }
1319 vassert(0);
1320 /*NOTREACHED*/
1321 return 0;
1322 }
1323
1324 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1325 the corresponding real constant. */
1326 //XXX re-check this before use
1327 //static ULong de_summarise_V64 ( UChar v64 )
1328 //{
1329 // ULong r = 0;
1330 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1331 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1332 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1333 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1334 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1335 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1336 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1337 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1338 // return r;
1339 //}
1340
1341 /* Helper for arbitrary expression pattern matching in flat IR. If
1342 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1343 necessary -- until it resolves to a non-tmp. Note that this can
1344 return NULL if it can't resolve 'e' to a new expression, which will
1345 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1346 LLSC). */
chase(IRExpr ** env,IRExpr * e)1347 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1348 {
1349 /* Why is this loop guaranteed to terminate? Because all tmps must
1350 have definitions before use, hence a tmp cannot be bound
1351 (directly or indirectly) to itself. */
1352 while (e->tag == Iex_RdTmp) {
1353 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1354 e = env[(Int)e->Iex.RdTmp.tmp];
1355 if (e == NULL) break;
1356 }
1357 return e;
1358 }
1359
1360 /* Similar to |chase|, but follows at most one level of tmp reference. */
chase1(IRExpr ** env,IRExpr * e)1361 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1362 {
1363 if (e == NULL || e->tag != Iex_RdTmp)
1364 return e;
1365 else
1366 return env[(Int)e->Iex.RdTmp.tmp];
1367 }
1368
fold_Expr(IRExpr ** env,IRExpr * e)1369 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
1370 {
1371 Int shift;
1372 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1373
1374 switch (e->tag) {
1375 case Iex_Unop:
1376 /* UNARY ops */
1377 if (e->Iex.Unop.arg->tag == Iex_Const) {
1378 switch (e->Iex.Unop.op) {
1379 case Iop_1Uto8:
1380 e2 = IRExpr_Const(IRConst_U8(toUChar(
1381 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1382 ? 1 : 0)));
1383 break;
1384 case Iop_1Uto32:
1385 e2 = IRExpr_Const(IRConst_U32(
1386 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1387 ? 1 : 0));
1388 break;
1389 case Iop_1Uto64:
1390 e2 = IRExpr_Const(IRConst_U64(
1391 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1392 ? 1 : 0));
1393 break;
1394
1395 case Iop_1Sto8:
1396 e2 = IRExpr_Const(IRConst_U8(toUChar(
1397 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1398 ? 0xFF : 0)));
1399 break;
1400 case Iop_1Sto16:
1401 e2 = IRExpr_Const(IRConst_U16(toUShort(
1402 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1403 ? 0xFFFF : 0)));
1404 break;
1405 case Iop_1Sto32:
1406 e2 = IRExpr_Const(IRConst_U32(
1407 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1408 ? 0xFFFFFFFF : 0));
1409 break;
1410 case Iop_1Sto64:
1411 e2 = IRExpr_Const(IRConst_U64(
1412 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1413 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1414 break;
1415
1416 case Iop_8Sto32: {
1417 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1418 u32 <<= 24;
1419 u32 = (Int)u32 >> 24; /* signed shift */
1420 e2 = IRExpr_Const(IRConst_U32(u32));
1421 break;
1422 }
1423 case Iop_16Sto32: {
1424 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1425 u32 <<= 16;
1426 u32 = (Int)u32 >> 16; /* signed shift */
1427 e2 = IRExpr_Const(IRConst_U32(u32));
1428 break;
1429 }
1430 case Iop_8Uto64:
1431 e2 = IRExpr_Const(IRConst_U64(
1432 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1433 break;
1434 case Iop_16Uto64:
1435 e2 = IRExpr_Const(IRConst_U64(
1436 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1437 break;
1438 case Iop_8Uto32:
1439 e2 = IRExpr_Const(IRConst_U32(
1440 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1441 break;
1442 case Iop_8Sto16: {
1443 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1444 u16 <<= 8;
1445 u16 = (Short)u16 >> 8; /* signed shift */
1446 e2 = IRExpr_Const(IRConst_U16(u16));
1447 break;
1448 }
1449 case Iop_8Uto16:
1450 e2 = IRExpr_Const(IRConst_U16(
1451 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1452 break;
1453 case Iop_16Uto32:
1454 e2 = IRExpr_Const(IRConst_U32(
1455 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1456 break;
1457 case Iop_32to16:
1458 e2 = IRExpr_Const(IRConst_U16(toUShort(
1459 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1460 break;
1461 case Iop_32to8:
1462 e2 = IRExpr_Const(IRConst_U8(toUChar(
1463 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1464 break;
1465 case Iop_32to1:
1466 e2 = IRExpr_Const(IRConst_U1(toBool(
1467 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1468 )));
1469 break;
1470 case Iop_64to1:
1471 e2 = IRExpr_Const(IRConst_U1(toBool(
1472 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1473 )));
1474 break;
1475
1476 case Iop_NotV128:
1477 e2 = IRExpr_Const(IRConst_V128(
1478 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1479 break;
1480 case Iop_Not64:
1481 e2 = IRExpr_Const(IRConst_U64(
1482 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1483 break;
1484 case Iop_Not32:
1485 e2 = IRExpr_Const(IRConst_U32(
1486 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1487 break;
1488 case Iop_Not16:
1489 e2 = IRExpr_Const(IRConst_U16(toUShort(
1490 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1491 break;
1492 case Iop_Not8:
1493 e2 = IRExpr_Const(IRConst_U8(toUChar(
1494 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1495 break;
1496
1497 case Iop_Not1:
1498 e2 = IRExpr_Const(IRConst_U1(
1499 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1500 break;
1501
1502 case Iop_64to8: {
1503 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1504 w64 &= 0xFFULL;
1505 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1506 break;
1507 }
1508 case Iop_64to16: {
1509 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1510 w64 &= 0xFFFFULL;
1511 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1512 break;
1513 }
1514 case Iop_64to32: {
1515 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1516 w64 &= 0x00000000FFFFFFFFULL;
1517 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1518 break;
1519 }
1520 case Iop_64HIto32: {
1521 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1522 w64 >>= 32;
1523 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1524 break;
1525 }
1526 case Iop_32Uto64:
1527 e2 = IRExpr_Const(IRConst_U64(
1528 0xFFFFFFFFULL
1529 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1530 break;
1531 case Iop_16Sto64: {
1532 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1533 u64 <<= 48;
1534 u64 = (Long)u64 >> 48; /* signed shift */
1535 e2 = IRExpr_Const(IRConst_U64(u64));
1536 break;
1537 }
1538 case Iop_32Sto64: {
1539 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1540 u64 <<= 32;
1541 u64 = (Long)u64 >> 32; /* signed shift */
1542 e2 = IRExpr_Const(IRConst_U64(u64));
1543 break;
1544 }
1545
1546 case Iop_16to8: {
1547 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1548 w16 &= 0xFF;
1549 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1550 break;
1551 }
1552 case Iop_16HIto8: {
1553 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1554 w16 >>= 8;
1555 w16 &= 0xFF;
1556 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1557 break;
1558 }
1559
1560 case Iop_CmpNEZ8:
1561 e2 = IRExpr_Const(IRConst_U1(toBool(
1562 0 !=
1563 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1564 )));
1565 break;
1566 case Iop_CmpNEZ32:
1567 e2 = IRExpr_Const(IRConst_U1(toBool(
1568 0 !=
1569 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1570 )));
1571 break;
1572 case Iop_CmpNEZ64:
1573 e2 = IRExpr_Const(IRConst_U1(toBool(
1574 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1575 )));
1576 break;
1577
1578 case Iop_CmpwNEZ32: {
1579 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1580 if (w32 == 0)
1581 e2 = IRExpr_Const(IRConst_U32( 0 ));
1582 else
1583 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1584 break;
1585 }
1586 case Iop_CmpwNEZ64: {
1587 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1588 if (w64 == 0)
1589 e2 = IRExpr_Const(IRConst_U64( 0 ));
1590 else
1591 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1592 break;
1593 }
1594
1595 case Iop_Left32: {
1596 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1597 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1598 s32 = (s32 | (-s32));
1599 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1600 break;
1601 }
1602
1603 case Iop_Left64: {
1604 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1605 Long s64 = (Long)u64;
1606 s64 = (s64 | (-s64));
1607 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1608 break;
1609 }
1610
1611 case Iop_Clz32: {
1612 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1613 if (u32 != 0)
1614 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1615 break;
1616 }
1617 case Iop_Clz64: {
1618 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1619 if (u64 != 0ULL)
1620 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1621 break;
1622 }
1623
1624 /* For these vector ones, can't fold all cases, but at least
1625 do the most obvious one. Could do better here using
1626 summarise/desummarise of vector constants, but too
1627 difficult to verify; hence just handle the zero cases. */
1628 case Iop_32UtoV128: {
1629 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1630 if (0 == u32) {
1631 e2 = IRExpr_Const(IRConst_V128(0x0000));
1632 } else {
1633 goto unhandled;
1634 }
1635 break;
1636 }
1637 case Iop_V128to64: {
1638 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1639 if (0 == ((v128 >> 0) & 0xFF)) {
1640 e2 = IRExpr_Const(IRConst_U64(0));
1641 } else {
1642 goto unhandled;
1643 }
1644 break;
1645 }
1646 case Iop_V128HIto64: {
1647 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1648 if (0 == ((v128 >> 8) & 0xFF)) {
1649 e2 = IRExpr_Const(IRConst_U64(0));
1650 } else {
1651 goto unhandled;
1652 }
1653 break;
1654 }
1655 case Iop_64UtoV128: {
1656 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1657 if (0 == u64) {
1658 e2 = IRExpr_Const(IRConst_V128(0x0000));
1659 } else {
1660 goto unhandled;
1661 }
1662 break;
1663 }
1664
1665 /* Even stupider (although still correct ..) */
1666 case Iop_V256to64_0: case Iop_V256to64_1:
1667 case Iop_V256to64_2: case Iop_V256to64_3: {
1668 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1669 if (v256 == 0x00000000) {
1670 e2 = IRExpr_Const(IRConst_U64(0));
1671 } else {
1672 goto unhandled;
1673 }
1674 break;
1675 }
1676
1677 case Iop_ZeroHI64ofV128: {
1678 /* Could do better here -- only need to look at the bottom 64 bits
1679 of the argument, really. */
1680 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1681 if (v128 == 0x0000) {
1682 e2 = IRExpr_Const(IRConst_V128(0x0000));
1683 } else {
1684 goto unhandled;
1685 }
1686 break;
1687 }
1688
1689 default:
1690 goto unhandled;
1691 }
1692 }
1693 break;
1694
1695 case Iex_Binop:
1696 /* BINARY ops */
1697 if (e->Iex.Binop.arg1->tag == Iex_Const
1698 && e->Iex.Binop.arg2->tag == Iex_Const) {
1699 /* cases where both args are consts */
1700 switch (e->Iex.Binop.op) {
1701
1702 /* -- Or -- */
1703 case Iop_Or8:
1704 e2 = IRExpr_Const(IRConst_U8(toUChar(
1705 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1706 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1707 break;
1708 case Iop_Or16:
1709 e2 = IRExpr_Const(IRConst_U16(toUShort(
1710 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1711 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1712 break;
1713 case Iop_Or32:
1714 e2 = IRExpr_Const(IRConst_U32(
1715 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1716 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1717 break;
1718 case Iop_Or64:
1719 e2 = IRExpr_Const(IRConst_U64(
1720 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1721 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1722 break;
1723 case Iop_OrV128:
1724 e2 = IRExpr_Const(IRConst_V128(
1725 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1726 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1727 break;
1728
1729 /* -- Xor -- */
1730 case Iop_Xor8:
1731 e2 = IRExpr_Const(IRConst_U8(toUChar(
1732 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1733 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1734 break;
1735 case Iop_Xor16:
1736 e2 = IRExpr_Const(IRConst_U16(toUShort(
1737 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1738 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1739 break;
1740 case Iop_Xor32:
1741 e2 = IRExpr_Const(IRConst_U32(
1742 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1743 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1744 break;
1745 case Iop_Xor64:
1746 e2 = IRExpr_Const(IRConst_U64(
1747 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1748 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1749 break;
1750 case Iop_XorV128:
1751 e2 = IRExpr_Const(IRConst_V128(
1752 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1753 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1754 break;
1755
1756 /* -- And -- */
1757 case Iop_And8:
1758 e2 = IRExpr_Const(IRConst_U8(toUChar(
1759 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1760 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1761 break;
1762 case Iop_And16:
1763 e2 = IRExpr_Const(IRConst_U16(toUShort(
1764 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1765 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1766 break;
1767 case Iop_And32:
1768 e2 = IRExpr_Const(IRConst_U32(
1769 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1770 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1771 break;
1772 case Iop_And64:
1773 e2 = IRExpr_Const(IRConst_U64(
1774 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1775 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1776 break;
1777 case Iop_AndV128:
1778 e2 = IRExpr_Const(IRConst_V128(
1779 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1780 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1781 break;
1782
1783 /* -- Add -- */
1784 case Iop_Add8:
1785 e2 = IRExpr_Const(IRConst_U8(toUChar(
1786 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1787 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1788 break;
1789 case Iop_Add32:
1790 e2 = IRExpr_Const(IRConst_U32(
1791 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1792 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1793 break;
1794 case Iop_Add64:
1795 e2 = IRExpr_Const(IRConst_U64(
1796 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1797 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1798 break;
1799
1800 /* -- Sub -- */
1801 case Iop_Sub8:
1802 e2 = IRExpr_Const(IRConst_U8(toUChar(
1803 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1804 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1805 break;
1806 case Iop_Sub32:
1807 e2 = IRExpr_Const(IRConst_U32(
1808 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1809 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1810 break;
1811 case Iop_Sub64:
1812 e2 = IRExpr_Const(IRConst_U64(
1813 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1814 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1815 break;
1816
1817 /* -- Max32U -- */
1818 case Iop_Max32U: {
1819 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1820 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1821 UInt res = u32a > u32b ? u32a : u32b;
1822 e2 = IRExpr_Const(IRConst_U32(res));
1823 break;
1824 }
1825
1826 /* -- Mul -- */
1827 case Iop_Mul32:
1828 e2 = IRExpr_Const(IRConst_U32(
1829 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1830 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1831 break;
1832 case Iop_Mul64:
1833 e2 = IRExpr_Const(IRConst_U64(
1834 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1835 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1836 break;
1837
1838 case Iop_MullS32: {
1839 /* very paranoid */
1840 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1841 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1842 Int s32a = (Int)u32a;
1843 Int s32b = (Int)u32b;
1844 Long s64a = (Long)s32a;
1845 Long s64b = (Long)s32b;
1846 Long sres = s64a * s64b;
1847 ULong ures = (ULong)sres;
1848 e2 = IRExpr_Const(IRConst_U64(ures));
1849 break;
1850 }
1851
1852 /* -- Shl -- */
1853 case Iop_Shl32:
1854 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1855 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1856 if (shift >= 0 && shift <= 31)
1857 e2 = IRExpr_Const(IRConst_U32(
1858 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1859 << shift)));
1860 break;
1861 case Iop_Shl64:
1862 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1863 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1864 if (shift >= 0 && shift <= 63)
1865 e2 = IRExpr_Const(IRConst_U64(
1866 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1867 << shift)));
1868 break;
1869
1870 /* -- Sar -- */
1871 case Iop_Sar32: {
1872 /* paranoid ... */
1873 /*signed*/ Int s32;
1874 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1875 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1876 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1877 if (shift >= 0 && shift <= 31) {
1878 s32 >>=/*signed*/ shift;
1879 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1880 }
1881 break;
1882 }
1883 case Iop_Sar64: {
1884 /* paranoid ... */
1885 /*signed*/ Long s64;
1886 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1887 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1888 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1889 if (shift >= 0 && shift <= 63) {
1890 s64 >>=/*signed*/ shift;
1891 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1892 }
1893 break;
1894 }
1895
1896 /* -- Shr -- */
1897 case Iop_Shr32: {
1898 /* paranoid ... */
1899 /*unsigned*/ UInt u32;
1900 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1901 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1902 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1903 if (shift >= 0 && shift <= 31) {
1904 u32 >>=/*unsigned*/ shift;
1905 e2 = IRExpr_Const(IRConst_U32(u32));
1906 }
1907 break;
1908 }
1909 case Iop_Shr64: {
1910 /* paranoid ... */
1911 /*unsigned*/ ULong u64;
1912 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1913 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1914 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1915 if (shift >= 0 && shift <= 63) {
1916 u64 >>=/*unsigned*/ shift;
1917 e2 = IRExpr_Const(IRConst_U64(u64));
1918 }
1919 break;
1920 }
1921
1922 /* -- CmpEQ -- */
1923 case Iop_CmpEQ32:
1924 e2 = IRExpr_Const(IRConst_U1(toBool(
1925 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1926 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1927 break;
1928 case Iop_CmpEQ64:
1929 e2 = IRExpr_Const(IRConst_U1(toBool(
1930 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1931 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1932 break;
1933
1934 /* -- CmpNE -- */
1935 case Iop_CmpNE8:
1936 case Iop_CasCmpNE8:
1937 case Iop_ExpCmpNE8:
1938 e2 = IRExpr_Const(IRConst_U1(toBool(
1939 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1940 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1941 break;
1942 case Iop_CmpNE32:
1943 case Iop_CasCmpNE32:
1944 case Iop_ExpCmpNE32:
1945 e2 = IRExpr_Const(IRConst_U1(toBool(
1946 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1947 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
1948 break;
1949 case Iop_CmpNE64:
1950 case Iop_CasCmpNE64:
1951 case Iop_ExpCmpNE64:
1952 e2 = IRExpr_Const(IRConst_U1(toBool(
1953 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1954 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1955 break;
1956
1957 /* -- CmpLEU -- */
1958 case Iop_CmpLE32U:
1959 e2 = IRExpr_Const(IRConst_U1(toBool(
1960 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1961 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1962 break;
1963 case Iop_CmpLE64U:
1964 e2 = IRExpr_Const(IRConst_U1(toBool(
1965 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1966 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1967 break;
1968
1969 /* -- CmpLES -- */
1970 case Iop_CmpLE32S:
1971 e2 = IRExpr_Const(IRConst_U1(toBool(
1972 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1973 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1974 break;
1975 case Iop_CmpLE64S:
1976 e2 = IRExpr_Const(IRConst_U1(toBool(
1977 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1978 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1979 break;
1980
1981 /* -- CmpLTS -- */
1982 case Iop_CmpLT32S:
1983 e2 = IRExpr_Const(IRConst_U1(toBool(
1984 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1985 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1986 break;
1987 case Iop_CmpLT64S:
1988 e2 = IRExpr_Const(IRConst_U1(toBool(
1989 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
1990 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
1991 break;
1992
1993 /* -- CmpLTU -- */
1994 case Iop_CmpLT32U:
1995 e2 = IRExpr_Const(IRConst_U1(toBool(
1996 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
1997 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
1998 break;
1999 case Iop_CmpLT64U:
2000 e2 = IRExpr_Const(IRConst_U1(toBool(
2001 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2002 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2003 break;
2004
2005 /* -- CmpORD -- */
2006 case Iop_CmpORD32S: {
2007 /* very paranoid */
2008 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2009 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2010 Int s32a = (Int)u32a;
2011 Int s32b = (Int)u32b;
2012 Int r = 0x2; /* EQ */
2013 if (s32a < s32b) {
2014 r = 0x8; /* LT */
2015 }
2016 else if (s32a > s32b) {
2017 r = 0x4; /* GT */
2018 }
2019 e2 = IRExpr_Const(IRConst_U32(r));
2020 break;
2021 }
2022
2023 /* -- nHLto2n -- */
2024 case Iop_32HLto64:
2025 e2 = IRExpr_Const(IRConst_U64(
2026 (((ULong)(e->Iex.Binop.arg1
2027 ->Iex.Const.con->Ico.U32)) << 32)
2028 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2029 ));
2030 break;
2031 case Iop_64HLto128:
2032 /* We can't fold this, because there is no way to
2033 express he result in IR, but at least pretend to
2034 handle it, so as to stop getting blasted with
2035 no-rule-for-this-primop messages. */
2036 break;
2037 /* For this vector one, can't fold all cases, but at
2038 least do the most obvious one. Could do better here
2039 using summarise/desummarise of vector constants, but
2040 too difficult to verify; hence just handle the zero
2041 cases. */
2042 case Iop_64HLtoV128: {
2043 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2044 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2045 if (0 == argHi && 0 == argLo) {
2046 e2 = IRExpr_Const(IRConst_V128(0));
2047 } else {
2048 goto unhandled;
2049 }
2050 break;
2051 }
2052 /* Same reasoning for the 256-bit version. */
2053 case Iop_V128HLtoV256: {
2054 IRExpr* argHi = e->Iex.Binop.arg1;
2055 IRExpr* argLo = e->Iex.Binop.arg2;
2056 if (isZeroV128(argHi) && isZeroV128(argLo)) {
2057 e2 = IRExpr_Const(IRConst_V256(0));
2058 } else {
2059 goto unhandled;
2060 }
2061 break;
2062 }
2063
2064 /* -- V128 stuff -- */
2065 case Iop_InterleaveLO8x16: {
2066 /* This turns up a lot in Memcheck instrumentation of
2067 Icc generated code. I don't know why. */
2068 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2069 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2070 if (0 == arg1 && 0 == arg2) {
2071 e2 = IRExpr_Const(IRConst_V128(0));
2072 } else {
2073 goto unhandled;
2074 }
2075 break;
2076 }
2077
2078 default:
2079 goto unhandled;
2080 }
2081
2082 } else {
2083
2084 /* other cases (identities, etc) */
2085 switch (e->Iex.Binop.op) {
2086
2087 case Iop_Shl32:
2088 case Iop_Shl64:
2089 case Iop_Shr64:
2090 case Iop_Sar64:
2091 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2092 if (isZeroU(e->Iex.Binop.arg2)) {
2093 e2 = e->Iex.Binop.arg1;
2094 break;
2095 }
2096 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2097 if (isZeroU(e->Iex.Binop.arg1)) {
2098 e2 = e->Iex.Binop.arg1;
2099 break;
2100 }
2101 break;
2102
2103 case Iop_Sar32:
2104 case Iop_Shr32:
2105 /* Shr32/Sar32(x,0) ==> x */
2106 if (isZeroU(e->Iex.Binop.arg2)) {
2107 e2 = e->Iex.Binop.arg1;
2108 break;
2109 }
2110 break;
2111
2112 case Iop_Or8:
2113 case Iop_Or16:
2114 case Iop_Or32:
2115 case Iop_Or64:
2116 case Iop_Max32U:
2117 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2118 if (isZeroU(e->Iex.Binop.arg2)) {
2119 e2 = e->Iex.Binop.arg1;
2120 break;
2121 }
2122 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2123 if (isZeroU(e->Iex.Binop.arg1)) {
2124 e2 = e->Iex.Binop.arg2;
2125 break;
2126 }
2127 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2128 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2129 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2130 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2131 break;
2132 }
2133 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2134 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2135 e2 = e->Iex.Binop.arg1;
2136 break;
2137 }
2138 break;
2139
2140 case Iop_Add8:
2141 /* Add8(t,t) ==> t << 1.
2142 Memcheck doesn't understand that
2143 x+x produces a defined least significant bit, and it seems
2144 simplest just to get rid of the problem by rewriting it
2145 out, since the opportunity to do so exists. */
2146 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2147 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2148 IRExpr_Const(IRConst_U8(1)));
2149 break;
2150 }
2151 break;
2152
2153 /* NB no Add16(t,t) case yet as no known test case exists */
2154
2155 case Iop_Add32:
2156 case Iop_Add64:
2157 /* Add32/Add64(x,0) ==> x */
2158 if (isZeroU(e->Iex.Binop.arg2)) {
2159 e2 = e->Iex.Binop.arg1;
2160 break;
2161 }
2162 /* Add32/Add64(0,x) ==> x */
2163 if (isZeroU(e->Iex.Binop.arg1)) {
2164 e2 = e->Iex.Binop.arg2;
2165 break;
2166 }
2167 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2168 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2169 e2 = IRExpr_Binop(
2170 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2171 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2172 break;
2173 }
2174 break;
2175
2176 case Iop_Sub32:
2177 case Iop_Sub64:
2178 /* Sub32/Sub64(x,0) ==> x */
2179 if (isZeroU(e->Iex.Binop.arg2)) {
2180 e2 = e->Iex.Binop.arg1;
2181 break;
2182 }
2183 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2184 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2185 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2186 break;
2187 }
2188 break;
2189 case Iop_Sub8x16:
2190 /* Sub8x16(x,0) ==> x */
2191 if (isZeroV128(e->Iex.Binop.arg2)) {
2192 e2 = e->Iex.Binop.arg1;
2193 break;
2194 }
2195 break;
2196
2197 case Iop_And8:
2198 case Iop_And16:
2199 case Iop_And32:
2200 case Iop_And64:
2201 /* And8/And16/And32/And64(x,1---1b) ==> x */
2202 if (isOnesU(e->Iex.Binop.arg2)) {
2203 e2 = e->Iex.Binop.arg1;
2204 break;
2205 }
2206 /* And8/And16/And32/And64(1---1b,x) ==> x */
2207 if (isOnesU(e->Iex.Binop.arg1)) {
2208 e2 = e->Iex.Binop.arg2;
2209 break;
2210 }
2211 /* And8/And16/And32/And64(x,0) ==> 0 */
2212 if (isZeroU(e->Iex.Binop.arg2)) {
2213 e2 = e->Iex.Binop.arg2;
2214 break;
2215 }
2216 /* And8/And16/And32/And64(0,x) ==> 0 */
2217 if (isZeroU(e->Iex.Binop.arg1)) {
2218 e2 = e->Iex.Binop.arg1;
2219 break;
2220 }
2221 /* And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2222 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2223 e2 = e->Iex.Binop.arg1;
2224 break;
2225 }
2226 break;
2227
2228 case Iop_AndV128:
2229 case Iop_AndV256:
2230 /* And8/And16/AndV128/AndV256(t,t)
2231 ==> t, for some IRTemp t */
2232 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2233 e2 = e->Iex.Binop.arg1;
2234 break;
2235 }
2236 /* Deal with either arg zero. Could handle other And
2237 cases here too. */
2238 if (e->Iex.Binop.op == Iop_AndV256
2239 && (isZeroV256(e->Iex.Binop.arg1)
2240 || isZeroV256(e->Iex.Binop.arg2))) {
2241 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2242 break;
2243 } else if (e->Iex.Binop.op == Iop_AndV128
2244 && (isZeroV128(e->Iex.Binop.arg1)
2245 || isZeroV128(e->Iex.Binop.arg2))) {
2246 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2247 break;
2248 }
2249 break;
2250
2251 case Iop_OrV128:
2252 case Iop_OrV256:
2253 /* V128/V256(t,t) ==> t, for some IRTemp t */
2254 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2255 e2 = e->Iex.Binop.arg1;
2256 break;
2257 }
2258 /* OrV128(t,0) ==> t */
2259 if (e->Iex.Binop.op == Iop_OrV128) {
2260 if (isZeroV128(e->Iex.Binop.arg2)) {
2261 e2 = e->Iex.Binop.arg1;
2262 break;
2263 }
2264 if (isZeroV128(e->Iex.Binop.arg1)) {
2265 e2 = e->Iex.Binop.arg2;
2266 break;
2267 }
2268 }
2269 /* OrV256(t,0) ==> t */
2270 if (e->Iex.Binop.op == Iop_OrV256) {
2271 if (isZeroV256(e->Iex.Binop.arg2)) {
2272 e2 = e->Iex.Binop.arg1;
2273 break;
2274 }
2275 //Disabled because there's no known test case right now.
2276 //if (isZeroV256(e->Iex.Binop.arg1)) {
2277 // e2 = e->Iex.Binop.arg2;
2278 // break;
2279 //}
2280 }
2281 break;
2282
2283 case Iop_Xor8:
2284 case Iop_Xor16:
2285 case Iop_Xor32:
2286 case Iop_Xor64:
2287 case Iop_XorV128:
2288 /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
2289 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2290 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2291 break;
2292 }
2293 /* XorV128(t,0) ==> t */
2294 if (e->Iex.Binop.op == Iop_XorV128) {
2295 if (isZeroV128(e->Iex.Binop.arg2)) {
2296 e2 = e->Iex.Binop.arg1;
2297 break;
2298 }
2299 //Disabled because there's no known test case right now.
2300 //if (isZeroV128(e->Iex.Binop.arg1)) {
2301 // e2 = e->Iex.Binop.arg2;
2302 // break;
2303 //}
2304 } else {
2305 /* Xor8/16/32/64(0,t) ==> t */
2306 if (isZeroU(e->Iex.Binop.arg1)) {
2307 e2 = e->Iex.Binop.arg2;
2308 break;
2309 }
2310 /* Xor8/16/32/64(t,0) ==> t */
2311 if (isZeroU(e->Iex.Binop.arg2)) {
2312 e2 = e->Iex.Binop.arg1;
2313 break;
2314 }
2315 }
2316 break;
2317
2318 case Iop_CmpNE32:
2319 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2320 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2321 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2322 break;
2323 }
2324 /* CmpNE32(1Uto32(b), 0) ==> b */
2325 if (isZeroU32(e->Iex.Binop.arg2)) {
2326 IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2327 if (a1 && a1->tag == Iex_Unop
2328 && a1->Iex.Unop.op == Iop_1Uto32) {
2329 e2 = a1->Iex.Unop.arg;
2330 break;
2331 }
2332 }
2333 break;
2334
2335 case Iop_CmpEQ32:
2336 case Iop_CmpEQ64:
2337 case Iop_CmpEQ8x8:
2338 case Iop_CmpEQ8x16:
2339 case Iop_CmpEQ16x8:
2340 case Iop_CmpEQ32x4:
2341 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2342 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2343 break;
2344 }
2345 break;
2346
2347 default:
2348 break;
2349 }
2350 }
2351 break;
2352
2353 case Iex_ITE:
2354 /* ITE */
2355 /* is the discriminant is a constant? */
2356 if (e->Iex.ITE.cond->tag == Iex_Const) {
2357 /* assured us by the IR type rules */
2358 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2359 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2360 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2361 }
2362 else
2363 /* are the arms identical? (pretty weedy test) */
2364 if (sameIRExprs(env, e->Iex.ITE.iftrue,
2365 e->Iex.ITE.iffalse)) {
2366 e2 = e->Iex.ITE.iffalse;
2367 }
2368 break;
2369
2370 default:
2371 /* not considered */
2372 break;
2373 }
2374
2375 /* Show cases where we've found but not folded 'op(t,t)'. Be
2376 careful not to call sameIRExprs with values of different types,
2377 though, else it will assert (and so it should!). We can't
2378 conveniently call typeOfIRExpr on the two args without a whole
2379 bunch of extra plumbing to pass in a type env, so just use a
2380 hacky test to check the arguments are not anything that might
2381 sameIRExprs to assert. This is only OK because this kludge is
2382 only used for debug printing, not for "real" operation. For
2383 "real" operation (ie, all other calls to sameIRExprs), it is
2384 essential that the to args have the same type.
2385
2386 The "right" solution is to plumb the containing block's
2387 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2388 that's a bunch of extra parameter passing which will just slow
2389 down the normal case, for no purpose. */
2390 if (vex_control.iropt_verbosity > 0
2391 && e == e2
2392 && e->tag == Iex_Binop
2393 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2394 e->Iex.Binop.arg2)
2395 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2396 vex_printf("vex iropt: fold_Expr: no ident rule for: ");
2397 ppIRExpr(e);
2398 vex_printf("\n");
2399 }
2400
2401 /* Show the overall results of folding. */
2402 if (DEBUG_IROPT && e2 != e) {
2403 vex_printf("FOLD: ");
2404 ppIRExpr(e); vex_printf(" -> ");
2405 ppIRExpr(e2); vex_printf("\n");
2406 }
2407
2408 return e2;
2409
2410 unhandled:
2411 # if 0
2412 vex_printf("\n\n");
2413 ppIRExpr(e);
2414 vpanic("fold_Expr: no rule for the above");
2415 # else
2416 if (vex_control.iropt_verbosity > 0) {
2417 vex_printf("vex iropt: fold_Expr: no const rule for: ");
2418 ppIRExpr(e);
2419 vex_printf("\n");
2420 }
2421 return e2;
2422 # endif
2423 }
2424
2425
2426 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2427 1-level due to previous flattening pass. */
2428
subst_Expr(IRExpr ** env,IRExpr * ex)2429 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2430 {
2431 switch (ex->tag) {
2432 case Iex_RdTmp:
2433 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2434 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2435 if (rhs->tag == Iex_RdTmp)
2436 return rhs;
2437 if (rhs->tag == Iex_Const
2438 && rhs->Iex.Const.con->tag != Ico_F64i)
2439 return rhs;
2440 }
2441 /* not bound in env */
2442 return ex;
2443
2444 case Iex_Const:
2445 case Iex_Get:
2446 return ex;
2447
2448 case Iex_GetI:
2449 vassert(isIRAtom(ex->Iex.GetI.ix));
2450 return IRExpr_GetI(
2451 ex->Iex.GetI.descr,
2452 subst_Expr(env, ex->Iex.GetI.ix),
2453 ex->Iex.GetI.bias
2454 );
2455
2456 case Iex_Qop: {
2457 IRQop* qop = ex->Iex.Qop.details;
2458 vassert(isIRAtom(qop->arg1));
2459 vassert(isIRAtom(qop->arg2));
2460 vassert(isIRAtom(qop->arg3));
2461 vassert(isIRAtom(qop->arg4));
2462 return IRExpr_Qop(
2463 qop->op,
2464 subst_Expr(env, qop->arg1),
2465 subst_Expr(env, qop->arg2),
2466 subst_Expr(env, qop->arg3),
2467 subst_Expr(env, qop->arg4)
2468 );
2469 }
2470
2471 case Iex_Triop: {
2472 IRTriop* triop = ex->Iex.Triop.details;
2473 vassert(isIRAtom(triop->arg1));
2474 vassert(isIRAtom(triop->arg2));
2475 vassert(isIRAtom(triop->arg3));
2476 return IRExpr_Triop(
2477 triop->op,
2478 subst_Expr(env, triop->arg1),
2479 subst_Expr(env, triop->arg2),
2480 subst_Expr(env, triop->arg3)
2481 );
2482 }
2483
2484 case Iex_Binop:
2485 vassert(isIRAtom(ex->Iex.Binop.arg1));
2486 vassert(isIRAtom(ex->Iex.Binop.arg2));
2487 return IRExpr_Binop(
2488 ex->Iex.Binop.op,
2489 subst_Expr(env, ex->Iex.Binop.arg1),
2490 subst_Expr(env, ex->Iex.Binop.arg2)
2491 );
2492
2493 case Iex_Unop:
2494 vassert(isIRAtom(ex->Iex.Unop.arg));
2495 return IRExpr_Unop(
2496 ex->Iex.Unop.op,
2497 subst_Expr(env, ex->Iex.Unop.arg)
2498 );
2499
2500 case Iex_Load:
2501 vassert(isIRAtom(ex->Iex.Load.addr));
2502 return IRExpr_Load(
2503 ex->Iex.Load.end,
2504 ex->Iex.Load.ty,
2505 subst_Expr(env, ex->Iex.Load.addr)
2506 );
2507
2508 case Iex_CCall: {
2509 Int i;
2510 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2511 for (i = 0; args2[i]; i++) {
2512 vassert(isIRAtom(args2[i]));
2513 args2[i] = subst_Expr(env, args2[i]);
2514 }
2515 return IRExpr_CCall(
2516 ex->Iex.CCall.cee,
2517 ex->Iex.CCall.retty,
2518 args2
2519 );
2520 }
2521
2522 case Iex_ITE:
2523 vassert(isIRAtom(ex->Iex.ITE.cond));
2524 vassert(isIRAtom(ex->Iex.ITE.iftrue));
2525 vassert(isIRAtom(ex->Iex.ITE.iffalse));
2526 return IRExpr_ITE(
2527 subst_Expr(env, ex->Iex.ITE.cond),
2528 subst_Expr(env, ex->Iex.ITE.iftrue),
2529 subst_Expr(env, ex->Iex.ITE.iffalse)
2530 );
2531
2532 default:
2533 vex_printf("\n\n"); ppIRExpr(ex);
2534 vpanic("subst_Expr");
2535
2536 }
2537 }
2538
2539
2540 /* Apply the subst to stmt, then fold the result as much as possible.
2541 Much simplified due to stmt being previously flattened. As a
2542 result of this, the stmt may wind up being turned into a no-op.
2543 */
subst_and_fold_Stmt(IRExpr ** env,IRStmt * st)2544 static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
2545 {
2546 # if 0
2547 vex_printf("\nsubst and fold stmt\n");
2548 ppIRStmt(st);
2549 vex_printf("\n");
2550 # endif
2551
2552 switch (st->tag) {
2553 case Ist_AbiHint:
2554 vassert(isIRAtom(st->Ist.AbiHint.base));
2555 vassert(isIRAtom(st->Ist.AbiHint.nia));
2556 return IRStmt_AbiHint(
2557 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)),
2558 st->Ist.AbiHint.len,
2559 fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia))
2560 );
2561 case Ist_Put:
2562 vassert(isIRAtom(st->Ist.Put.data));
2563 return IRStmt_Put(
2564 st->Ist.Put.offset,
2565 fold_Expr(env, subst_Expr(env, st->Ist.Put.data))
2566 );
2567
2568 case Ist_PutI: {
2569 IRPutI *puti, *puti2;
2570 puti = st->Ist.PutI.details;
2571 vassert(isIRAtom(puti->ix));
2572 vassert(isIRAtom(puti->data));
2573 puti2 = mkIRPutI(puti->descr,
2574 fold_Expr(env, subst_Expr(env, puti->ix)),
2575 puti->bias,
2576 fold_Expr(env, subst_Expr(env, puti->data)));
2577 return IRStmt_PutI(puti2);
2578 }
2579
2580 case Ist_WrTmp:
2581 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2582 allowed to be more than just a constant or a tmp. */
2583 return IRStmt_WrTmp(
2584 st->Ist.WrTmp.tmp,
2585 fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data))
2586 );
2587
2588 case Ist_Store:
2589 vassert(isIRAtom(st->Ist.Store.addr));
2590 vassert(isIRAtom(st->Ist.Store.data));
2591 return IRStmt_Store(
2592 st->Ist.Store.end,
2593 fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)),
2594 fold_Expr(env, subst_Expr(env, st->Ist.Store.data))
2595 );
2596
2597 case Ist_StoreG: {
2598 IRStoreG* sg = st->Ist.StoreG.details;
2599 vassert(isIRAtom(sg->addr));
2600 vassert(isIRAtom(sg->data));
2601 vassert(isIRAtom(sg->guard));
2602 IRExpr* faddr = fold_Expr(env, subst_Expr(env, sg->addr));
2603 IRExpr* fdata = fold_Expr(env, subst_Expr(env, sg->data));
2604 IRExpr* fguard = fold_Expr(env, subst_Expr(env, sg->guard));
2605 if (fguard->tag == Iex_Const) {
2606 /* The condition on this store has folded down to a constant. */
2607 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2608 if (fguard->Iex.Const.con->Ico.U1 == False) {
2609 return IRStmt_NoOp();
2610 } else {
2611 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2612 return IRStmt_Store(sg->end, faddr, fdata);
2613 }
2614 }
2615 return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2616 }
2617
2618 case Ist_LoadG: {
2619 /* This is complicated. If the guard folds down to 'false',
2620 we can replace it with an assignment 'dst := alt', but if
2621 the guard folds down to 'true', we can't conveniently
2622 replace it with an unconditional load, because doing so
2623 requires generating a new temporary, and that is not easy
2624 to do at this point. */
2625 IRLoadG* lg = st->Ist.LoadG.details;
2626 vassert(isIRAtom(lg->addr));
2627 vassert(isIRAtom(lg->alt));
2628 vassert(isIRAtom(lg->guard));
2629 IRExpr* faddr = fold_Expr(env, subst_Expr(env, lg->addr));
2630 IRExpr* falt = fold_Expr(env, subst_Expr(env, lg->alt));
2631 IRExpr* fguard = fold_Expr(env, subst_Expr(env, lg->guard));
2632 if (fguard->tag == Iex_Const) {
2633 /* The condition on this load has folded down to a constant. */
2634 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2635 if (fguard->Iex.Const.con->Ico.U1 == False) {
2636 /* The load is not going to happen -- instead 'alt' is
2637 assigned to 'dst'. */
2638 return IRStmt_WrTmp(lg->dst, falt);
2639 } else {
2640 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2641 /* The load is always going to happen. We want to
2642 convert to an unconditional load and assign to 'dst'
2643 (IRStmt_WrTmp). Problem is we need an extra temp to
2644 hold the loaded value, but none is available.
2645 Instead, reconstitute the conditional load (with
2646 folded args, of course) and let the caller of this
2647 routine deal with the problem. */
2648 }
2649 }
2650 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2651 }
2652
2653 case Ist_CAS: {
2654 IRCAS *cas, *cas2;
2655 cas = st->Ist.CAS.details;
2656 vassert(isIRAtom(cas->addr));
2657 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2658 vassert(isIRAtom(cas->expdLo));
2659 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2660 vassert(isIRAtom(cas->dataLo));
2661 cas2 = mkIRCAS(
2662 cas->oldHi, cas->oldLo, cas->end,
2663 fold_Expr(env, subst_Expr(env, cas->addr)),
2664 cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi))
2665 : NULL,
2666 fold_Expr(env, subst_Expr(env, cas->expdLo)),
2667 cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi))
2668 : NULL,
2669 fold_Expr(env, subst_Expr(env, cas->dataLo))
2670 );
2671 return IRStmt_CAS(cas2);
2672 }
2673
2674 case Ist_LLSC:
2675 vassert(isIRAtom(st->Ist.LLSC.addr));
2676 if (st->Ist.LLSC.storedata)
2677 vassert(isIRAtom(st->Ist.LLSC.storedata));
2678 return IRStmt_LLSC(
2679 st->Ist.LLSC.end,
2680 st->Ist.LLSC.result,
2681 fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)),
2682 st->Ist.LLSC.storedata
2683 ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata))
2684 : NULL
2685 );
2686
2687 case Ist_Dirty: {
2688 Int i;
2689 IRDirty *d, *d2;
2690 d = st->Ist.Dirty.details;
2691 d2 = emptyIRDirty();
2692 *d2 = *d;
2693 d2->args = shallowCopyIRExprVec(d2->args);
2694 if (d2->mFx != Ifx_None) {
2695 vassert(isIRAtom(d2->mAddr));
2696 d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr));
2697 }
2698 vassert(isIRAtom(d2->guard));
2699 d2->guard = fold_Expr(env, subst_Expr(env, d2->guard));
2700 for (i = 0; d2->args[i]; i++) {
2701 IRExpr* arg = d2->args[i];
2702 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
2703 vassert(isIRAtom(arg));
2704 d2->args[i] = fold_Expr(env, subst_Expr(env, arg));
2705 }
2706 }
2707 return IRStmt_Dirty(d2);
2708 }
2709
2710 case Ist_IMark:
2711 return IRStmt_IMark(st->Ist.IMark.addr,
2712 st->Ist.IMark.len,
2713 st->Ist.IMark.delta);
2714
2715 case Ist_NoOp:
2716 return IRStmt_NoOp();
2717
2718 case Ist_MBE:
2719 return IRStmt_MBE(st->Ist.MBE.event);
2720
2721 case Ist_Exit: {
2722 IRExpr* fcond;
2723 vassert(isIRAtom(st->Ist.Exit.guard));
2724 fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard));
2725 if (fcond->tag == Iex_Const) {
2726 /* Interesting. The condition on this exit has folded down to
2727 a constant. */
2728 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2729 if (fcond->Iex.Const.con->Ico.U1 == False) {
2730 /* exit is never going to happen, so dump the statement. */
2731 return IRStmt_NoOp();
2732 } else {
2733 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2734 /* Hmmm. The exit has become unconditional. Leave it
2735 as it is for now, since we'd have to truncate the BB
2736 at this point, which is tricky. Such truncation is
2737 done later by the dead-code elimination pass. */
2738 /* fall out into the reconstruct-the-exit code. */
2739 if (vex_control.iropt_verbosity > 0)
2740 /* really a misuse of vex_control.iropt_verbosity */
2741 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2742 }
2743 }
2744 return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2745 st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2746 }
2747
2748 default:
2749 vex_printf("\n"); ppIRStmt(st);
2750 vpanic("subst_and_fold_Stmt");
2751 }
2752 }
2753
2754
cprop_BB(IRSB * in)2755 IRSB* cprop_BB ( IRSB* in )
2756 {
2757 Int i;
2758 IRSB* out;
2759 IRStmt* st2;
2760 Int n_tmps = in->tyenv->types_used;
2761 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2762 /* Keep track of IRStmt_LoadGs that we need to revisit after
2763 processing all the other statements. */
2764 const Int N_FIXUPS = 16;
2765 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2766 Int n_fixups = 0;
2767
2768 out = emptyIRSB();
2769 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2770
2771 /* Set up the env with which travels forward. This holds a
2772 substitution, mapping IRTemps to IRExprs. The environment
2773 is to be applied as we move along. Keys are IRTemps.
2774 Values are IRExpr*s.
2775 */
2776 for (i = 0; i < n_tmps; i++)
2777 env[i] = NULL;
2778
2779 /* For each original SSA-form stmt ... */
2780 for (i = 0; i < in->stmts_used; i++) {
2781
2782 /* First apply the substitution to the current stmt. This
2783 propagates in any constants and tmp-tmp assignments
2784 accumulated prior to this point. As part of the subst_Stmt
2785 call, also then fold any constant expressions resulting. */
2786
2787 st2 = in->stmts[i];
2788
2789 /* perhaps st2 is already a no-op? */
2790 if (st2->tag == Ist_NoOp) continue;
2791
2792 st2 = subst_and_fold_Stmt( env, st2 );
2793
2794 /* Deal with some post-folding special cases. */
2795 switch (st2->tag) {
2796
2797 /* If the statement has been folded into a no-op, forget
2798 it. */
2799 case Ist_NoOp:
2800 continue;
2801
2802 /* If the statement assigns to an IRTemp add it to the
2803 running environment. This is for the benefit of copy
2804 propagation and to allow sameIRExpr look through
2805 IRTemps. */
2806 case Ist_WrTmp: {
2807 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2808 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2809
2810 /* 't1 = t2' -- don't add to BB; will be optimized out */
2811 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2812 continue;
2813
2814 /* 't = const' && 'const != F64i' -- don't add to BB
2815 Note, we choose not to propagate const when const is an
2816 F64i, so that F64i literals can be CSE'd later. This
2817 helps x86 floating point code generation. */
2818 if (st2->Ist.WrTmp.data->tag == Iex_Const
2819 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2820 continue;
2821 }
2822 /* else add it to the output, as normal */
2823 break;
2824 }
2825
2826 case Ist_LoadG: {
2827 IRLoadG* lg = st2->Ist.LoadG.details;
2828 IRExpr* guard = lg->guard;
2829 if (guard->tag == Iex_Const) {
2830 /* The guard has folded to a constant, and that
2831 constant must be 1:I1, since subst_and_fold_Stmt
2832 folds out the case 0:I1 by itself. */
2833 vassert(guard->Iex.Const.con->tag == Ico_U1);
2834 vassert(guard->Iex.Const.con->Ico.U1 == True);
2835 /* Add a NoOp here as a placeholder, and make a note of
2836 where it is in the output block. Afterwards we'll
2837 come back here and transform the NoOp and the LoadG
2838 into a load-convert pair. The fixups[] entry
2839 refers to the inserted NoOp, and we expect to find
2840 the relevant LoadG immediately after it. */
2841 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2842 if (n_fixups < N_FIXUPS) {
2843 fixups[n_fixups++] = out->stmts_used;
2844 addStmtToIRSB( out, IRStmt_NoOp() );
2845 }
2846 }
2847 /* And always add the LoadG to the output, regardless. */
2848 break;
2849 }
2850
2851 default:
2852 break;
2853 }
2854
2855 /* Not interesting, copy st2 into the output block. */
2856 addStmtToIRSB( out, st2 );
2857 }
2858
2859 # if STATS_IROPT
2860 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
2861 invocation_count, recursion_count, success_count,
2862 recursion_success_count, max_nodes_visited);
2863 # endif
2864
2865 out->next = subst_Expr( env, in->next );
2866 out->jumpkind = in->jumpkind;
2867 out->offsIP = in->offsIP;
2868
2869 /* Process any leftover unconditional LoadGs that we noticed
2870 in the main pass. */
2871 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2872 for (i = 0; i < n_fixups; i++) {
2873 Int ix = fixups[i];
2874 /* Carefully verify that the LoadG has the expected form. */
2875 vassert(ix >= 0 && ix+1 < out->stmts_used);
2876 IRStmt* nop = out->stmts[ix];
2877 IRStmt* lgu = out->stmts[ix+1];
2878 vassert(nop->tag == Ist_NoOp);
2879 vassert(lgu->tag == Ist_LoadG);
2880 IRLoadG* lg = lgu->Ist.LoadG.details;
2881 IRExpr* guard = lg->guard;
2882 vassert(guard->Iex.Const.con->tag == Ico_U1);
2883 vassert(guard->Iex.Const.con->Ico.U1 == True);
2884 /* Figure out the load and result types, and the implied
2885 conversion operation. */
2886 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
2887 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
2888 IROp cvtOp = Iop_INVALID;
2889 switch (lg->cvt) {
2890 case ILGop_Ident32: break;
2891 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
2892 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
2893 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
2894 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
2895 default: vpanic("cprop_BB: unhandled ILGOp");
2896 }
2897 /* Replace the placeholder NoOp by the required unconditional
2898 load. */
2899 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
2900 out->stmts[ix]
2901 = IRStmt_WrTmp(tLoaded,
2902 IRExpr_Load(lg->end, cvtArg, lg->addr));
2903 /* Replace the LoadG by a conversion from the loaded value's
2904 type to the required result type. */
2905 out->stmts[ix+1]
2906 = IRStmt_WrTmp(
2907 lg->dst, cvtOp == Iop_INVALID
2908 ? IRExpr_RdTmp(tLoaded)
2909 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
2910 }
2911
2912 return out;
2913 }
2914
2915
2916 /*---------------------------------------------------------------*/
2917 /*--- Dead code (t = E) removal ---*/
2918 /*---------------------------------------------------------------*/
2919
2920 /* As a side effect, also removes all code following an unconditional
2921 side exit. */
2922
2923 /* The type of the HashHW map is: a map from IRTemp to nothing
2924 -- really just operating a set or IRTemps.
2925 */
2926
2927 inline
addUses_Temp(Bool * set,IRTemp tmp)2928 static void addUses_Temp ( Bool* set, IRTemp tmp )
2929 {
2930 set[(Int)tmp] = True;
2931 }
2932
addUses_Expr(Bool * set,IRExpr * e)2933 static void addUses_Expr ( Bool* set, IRExpr* e )
2934 {
2935 Int i;
2936 switch (e->tag) {
2937 case Iex_GetI:
2938 addUses_Expr(set, e->Iex.GetI.ix);
2939 return;
2940 case Iex_ITE:
2941 addUses_Expr(set, e->Iex.ITE.cond);
2942 addUses_Expr(set, e->Iex.ITE.iftrue);
2943 addUses_Expr(set, e->Iex.ITE.iffalse);
2944 return;
2945 case Iex_CCall:
2946 for (i = 0; e->Iex.CCall.args[i]; i++)
2947 addUses_Expr(set, e->Iex.CCall.args[i]);
2948 return;
2949 case Iex_Load:
2950 addUses_Expr(set, e->Iex.Load.addr);
2951 return;
2952 case Iex_Qop:
2953 addUses_Expr(set, e->Iex.Qop.details->arg1);
2954 addUses_Expr(set, e->Iex.Qop.details->arg2);
2955 addUses_Expr(set, e->Iex.Qop.details->arg3);
2956 addUses_Expr(set, e->Iex.Qop.details->arg4);
2957 return;
2958 case Iex_Triop:
2959 addUses_Expr(set, e->Iex.Triop.details->arg1);
2960 addUses_Expr(set, e->Iex.Triop.details->arg2);
2961 addUses_Expr(set, e->Iex.Triop.details->arg3);
2962 return;
2963 case Iex_Binop:
2964 addUses_Expr(set, e->Iex.Binop.arg1);
2965 addUses_Expr(set, e->Iex.Binop.arg2);
2966 return;
2967 case Iex_Unop:
2968 addUses_Expr(set, e->Iex.Unop.arg);
2969 return;
2970 case Iex_RdTmp:
2971 addUses_Temp(set, e->Iex.RdTmp.tmp);
2972 return;
2973 case Iex_Const:
2974 case Iex_Get:
2975 return;
2976 default:
2977 vex_printf("\n");
2978 ppIRExpr(e);
2979 vpanic("addUses_Expr");
2980 }
2981 }
2982
addUses_Stmt(Bool * set,IRStmt * st)2983 static void addUses_Stmt ( Bool* set, IRStmt* st )
2984 {
2985 Int i;
2986 IRDirty* d;
2987 IRCAS* cas;
2988 switch (st->tag) {
2989 case Ist_AbiHint:
2990 addUses_Expr(set, st->Ist.AbiHint.base);
2991 addUses_Expr(set, st->Ist.AbiHint.nia);
2992 return;
2993 case Ist_PutI:
2994 addUses_Expr(set, st->Ist.PutI.details->ix);
2995 addUses_Expr(set, st->Ist.PutI.details->data);
2996 return;
2997 case Ist_WrTmp:
2998 addUses_Expr(set, st->Ist.WrTmp.data);
2999 return;
3000 case Ist_Put:
3001 addUses_Expr(set, st->Ist.Put.data);
3002 return;
3003 case Ist_Store:
3004 addUses_Expr(set, st->Ist.Store.addr);
3005 addUses_Expr(set, st->Ist.Store.data);
3006 return;
3007 case Ist_StoreG: {
3008 IRStoreG* sg = st->Ist.StoreG.details;
3009 addUses_Expr(set, sg->addr);
3010 addUses_Expr(set, sg->data);
3011 addUses_Expr(set, sg->guard);
3012 return;
3013 }
3014 case Ist_LoadG: {
3015 IRLoadG* lg = st->Ist.LoadG.details;
3016 addUses_Expr(set, lg->addr);
3017 addUses_Expr(set, lg->alt);
3018 addUses_Expr(set, lg->guard);
3019 return;
3020 }
3021 case Ist_CAS:
3022 cas = st->Ist.CAS.details;
3023 addUses_Expr(set, cas->addr);
3024 if (cas->expdHi)
3025 addUses_Expr(set, cas->expdHi);
3026 addUses_Expr(set, cas->expdLo);
3027 if (cas->dataHi)
3028 addUses_Expr(set, cas->dataHi);
3029 addUses_Expr(set, cas->dataLo);
3030 return;
3031 case Ist_LLSC:
3032 addUses_Expr(set, st->Ist.LLSC.addr);
3033 if (st->Ist.LLSC.storedata)
3034 addUses_Expr(set, st->Ist.LLSC.storedata);
3035 return;
3036 case Ist_Dirty:
3037 d = st->Ist.Dirty.details;
3038 if (d->mFx != Ifx_None)
3039 addUses_Expr(set, d->mAddr);
3040 addUses_Expr(set, d->guard);
3041 for (i = 0; d->args[i] != NULL; i++) {
3042 IRExpr* arg = d->args[i];
3043 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
3044 addUses_Expr(set, arg);
3045 }
3046 return;
3047 case Ist_NoOp:
3048 case Ist_IMark:
3049 case Ist_MBE:
3050 return;
3051 case Ist_Exit:
3052 addUses_Expr(set, st->Ist.Exit.guard);
3053 return;
3054 default:
3055 vex_printf("\n");
3056 ppIRStmt(st);
3057 vpanic("addUses_Stmt");
3058 }
3059 }
3060
3061
3062 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
isZeroU1(IRExpr * e)3063 static Bool isZeroU1 ( IRExpr* e )
3064 {
3065 return toBool( e->tag == Iex_Const
3066 && e->Iex.Const.con->tag == Ico_U1
3067 && e->Iex.Const.con->Ico.U1 == False );
3068 }
3069
3070 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
isOneU1(IRExpr * e)3071 static Bool isOneU1 ( IRExpr* e )
3072 {
3073 return toBool( e->tag == Iex_Const
3074 && e->Iex.Const.con->tag == Ico_U1
3075 && e->Iex.Const.con->Ico.U1 == True );
3076 }
3077
3078
3079 /* Note, this destructively modifies the given IRSB. */
3080
3081 /* Scan backwards through statements, carrying a set of IRTemps which
3082 are known to be used after the current point. On encountering 't =
3083 E', delete the binding if it is not used. Otherwise, add any temp
3084 uses to the set and keep on moving backwards.
3085
3086 As an enhancement, the first (backwards) pass searches for IR exits
3087 with always-taken conditions and notes the location of the earliest
3088 one in the block. If any such are found, a second pass copies the
3089 exit destination and jump kind to the bb-end. Then, the exit and
3090 all statements following it are turned into no-ops.
3091 */
3092
do_deadcode_BB(IRSB * bb)3093 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3094 {
3095 Int i, i_unconditional_exit;
3096 Int n_tmps = bb->tyenv->types_used;
3097 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3098 IRStmt* st;
3099
3100 for (i = 0; i < n_tmps; i++)
3101 set[i] = False;
3102
3103 /* start off by recording IRTemp uses in the next field. */
3104 addUses_Expr(set, bb->next);
3105
3106 /* First pass */
3107
3108 /* Work backwards through the stmts */
3109 i_unconditional_exit = -1;
3110 for (i = bb->stmts_used-1; i >= 0; i--) {
3111 st = bb->stmts[i];
3112 if (st->tag == Ist_NoOp)
3113 continue;
3114 /* take note of any unconditional exits */
3115 if (st->tag == Ist_Exit
3116 && isOneU1(st->Ist.Exit.guard))
3117 i_unconditional_exit = i;
3118 if (st->tag == Ist_WrTmp
3119 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3120 /* it's an IRTemp which never got used. Delete it. */
3121 if (DEBUG_IROPT) {
3122 vex_printf("DEAD: ");
3123 ppIRStmt(st);
3124 vex_printf("\n");
3125 }
3126 bb->stmts[i] = IRStmt_NoOp();
3127 }
3128 else
3129 if (st->tag == Ist_Dirty
3130 && st->Ist.Dirty.details->guard
3131 && isZeroU1(st->Ist.Dirty.details->guard)) {
3132 /* This is a dirty helper which will never get called.
3133 Delete it. */
3134 bb->stmts[i] = IRStmt_NoOp();
3135 }
3136 else {
3137 /* Note any IRTemp uses made by the current statement. */
3138 addUses_Stmt(set, st);
3139 }
3140 }
3141
3142 /* Optional second pass: if any unconditional exits were found,
3143 delete them and all following statements. */
3144
3145 if (i_unconditional_exit != -1) {
3146 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3147 i_unconditional_exit);
3148 vassert(i_unconditional_exit >= 0
3149 && i_unconditional_exit < bb->stmts_used);
3150 bb->next
3151 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3152 bb->jumpkind
3153 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3154 bb->offsIP
3155 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3156 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3157 bb->stmts[i] = IRStmt_NoOp();
3158 }
3159 }
3160
3161
3162 /*---------------------------------------------------------------*/
3163 /*--- Specialisation of helper function calls, in ---*/
3164 /*--- collaboration with the front end ---*/
3165 /*---------------------------------------------------------------*/
3166
3167 static
spec_helpers_BB(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int))3168 IRSB* spec_helpers_BB(
3169 IRSB* bb,
3170 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3171 )
3172 {
3173 Int i;
3174 IRStmt* st;
3175 IRExpr* ex;
3176 Bool any = False;
3177
3178 for (i = bb->stmts_used-1; i >= 0; i--) {
3179 st = bb->stmts[i];
3180
3181 if (st->tag != Ist_WrTmp
3182 || st->Ist.WrTmp.data->tag != Iex_CCall)
3183 continue;
3184
3185 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3186 st->Ist.WrTmp.data->Iex.CCall.args,
3187 &bb->stmts[0], i );
3188 if (!ex)
3189 /* the front end can't think of a suitable replacement */
3190 continue;
3191
3192 /* We got something better. Install it in the bb. */
3193 any = True;
3194 bb->stmts[i]
3195 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3196
3197 if (0) {
3198 vex_printf("SPEC: ");
3199 ppIRExpr(st->Ist.WrTmp.data);
3200 vex_printf(" --> ");
3201 ppIRExpr(ex);
3202 vex_printf("\n");
3203 }
3204 }
3205
3206 if (any)
3207 bb = flatten_BB(bb);
3208 return bb;
3209 }
3210
3211
3212 /*---------------------------------------------------------------*/
3213 /*--- Determination of guest state aliasing relationships ---*/
3214 /*---------------------------------------------------------------*/
3215
3216 /* These are helper functions for CSE and GetI/PutI transformations.
3217
3218 Determine, to the extent possible, the relationship between two
3219 guest state accesses. The possible outcomes are:
3220
3221 * Exact alias. These two accesses denote precisely the same
3222 piece of the guest state.
3223
3224 * Definitely no alias. These two accesses are guaranteed not to
3225 overlap any part of the guest state.
3226
3227 * Unknown -- if neither of the above can be established.
3228
3229 If in doubt, return Unknown. */
3230
3231 typedef
3232 enum { ExactAlias, NoAlias, UnknownAlias }
3233 GSAliasing;
3234
3235
3236 /* Produces the alias relation between an indexed guest
3237 state access and a non-indexed access. */
3238
3239 static
getAliasingRelation_IC(IRRegArray * descr1,IRExpr * ix1,Int offset2,IRType ty2)3240 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3241 Int offset2, IRType ty2 )
3242 {
3243 UInt minoff1, maxoff1, minoff2, maxoff2;
3244
3245 getArrayBounds( descr1, &minoff1, &maxoff1 );
3246 minoff2 = offset2;
3247 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3248
3249 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3250 return NoAlias;
3251
3252 /* Could probably do better here if required. For the moment
3253 however just claim not to know anything more. */
3254 return UnknownAlias;
3255 }
3256
3257
3258 /* Produces the alias relation between two indexed guest state
3259 accesses. */
3260
3261 static
getAliasingRelation_II(IRRegArray * descr1,IRExpr * ix1,Int bias1,IRRegArray * descr2,IRExpr * ix2,Int bias2)3262 GSAliasing getAliasingRelation_II (
3263 IRRegArray* descr1, IRExpr* ix1, Int bias1,
3264 IRRegArray* descr2, IRExpr* ix2, Int bias2
3265 )
3266 {
3267 UInt minoff1, maxoff1, minoff2, maxoff2;
3268 Int iters;
3269
3270 /* First try hard to show they don't alias. */
3271 getArrayBounds( descr1, &minoff1, &maxoff1 );
3272 getArrayBounds( descr2, &minoff2, &maxoff2 );
3273 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3274 return NoAlias;
3275
3276 /* So the two arrays at least partially overlap. To get any
3277 further we'll have to be sure that the descriptors are
3278 identical. */
3279 if (!eqIRRegArray(descr1, descr2))
3280 return UnknownAlias;
3281
3282 /* The descriptors are identical. Now the only difference can be
3283 in the index expressions. If they cannot be shown to be
3284 identical, we have to say we don't know what the aliasing
3285 relation will be. Now, since the IR is flattened, the index
3286 expressions should be atoms -- either consts or tmps. So that
3287 makes the comparison simple. */
3288 vassert(isIRAtom(ix1));
3289 vassert(isIRAtom(ix2));
3290 if (!eqIRAtom(ix1,ix2))
3291 return UnknownAlias;
3292
3293 /* Ok, the index expressions are identical. So now the only way
3294 they can be different is in the bias. Normalise this
3295 paranoidly, to reliably establish equality/non-equality. */
3296
3297 /* So now we know that the GetI and PutI index the same array
3298 with the same base. Are the offsets the same, modulo the
3299 array size? Do this paranoidly. */
3300 vassert(descr1->nElems == descr2->nElems);
3301 vassert(descr1->elemTy == descr2->elemTy);
3302 vassert(descr1->base == descr2->base);
3303 iters = 0;
3304 while (bias1 < 0 || bias2 < 0) {
3305 bias1 += descr1->nElems;
3306 bias2 += descr1->nElems;
3307 iters++;
3308 if (iters > 10)
3309 vpanic("getAliasingRelation: iters");
3310 }
3311 vassert(bias1 >= 0 && bias2 >= 0);
3312 bias1 %= descr1->nElems;
3313 bias2 %= descr1->nElems;
3314 vassert(bias1 >= 0 && bias1 < descr1->nElems);
3315 vassert(bias2 >= 0 && bias2 < descr1->nElems);
3316
3317 /* Finally, biasP and biasG are normalised into the range
3318 0 .. descrP/G->nElems - 1. And so we can establish
3319 equality/non-equality. */
3320
3321 return bias1==bias2 ? ExactAlias : NoAlias;
3322 }
3323
3324
3325 /*---------------------------------------------------------------*/
3326 /*--- Common Subexpression Elimination ---*/
3327 /*---------------------------------------------------------------*/
3328
3329 /* Expensive in time and space. */
3330
3331 /* Uses two environments:
3332 a IRTemp -> IRTemp mapping
3333 a mapping from AvailExpr* to IRTemp
3334 */
3335
3336 typedef
3337 struct {
3338 enum { TCc, TCt } tag;
3339 union { IRTemp tmp; IRConst* con; } u;
3340 }
3341 TmpOrConst;
3342
eqTmpOrConst(TmpOrConst * tc1,TmpOrConst * tc2)3343 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3344 {
3345 if (tc1->tag != tc2->tag)
3346 return False;
3347 switch (tc1->tag) {
3348 case TCc:
3349 return eqIRConst(tc1->u.con, tc2->u.con);
3350 case TCt:
3351 return tc1->u.tmp == tc2->u.tmp;
3352 default:
3353 vpanic("eqTmpOrConst");
3354 }
3355 }
3356
eqIRCallee(IRCallee * cee1,IRCallee * cee2)3357 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3358 {
3359 Bool eq = cee1->addr == cee2->addr;
3360 if (eq) {
3361 vassert(cee1->regparms == cee2->regparms);
3362 vassert(cee1->mcx_mask == cee2->mcx_mask);
3363 /* Names should be the same too, but we don't bother to
3364 check. */
3365 }
3366 return eq;
3367 }
3368
3369 /* Convert an atomic IRExpr* to a TmpOrConst. */
irExpr_to_TmpOrConst(TmpOrConst * tc,IRExpr * e)3370 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3371 {
3372 switch (e->tag) {
3373 case Iex_RdTmp:
3374 tc->tag = TCt;
3375 tc->u.tmp = e->Iex.RdTmp.tmp;
3376 break;
3377 case Iex_Const:
3378 tc->tag = TCc;
3379 tc->u.con = e->Iex.Const.con;
3380 break;
3381 default:
3382 /* Getting here is a serious error. It means that the
3383 presented arg isn't an IR atom, as it should be. */
3384 vpanic("irExpr_to_TmpOrConst");
3385 }
3386 }
3387
3388 /* Convert a TmpOrConst to an atomic IRExpr*. */
tmpOrConst_to_IRExpr(TmpOrConst * tc)3389 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3390 {
3391 switch (tc->tag) {
3392 case TCc: return IRExpr_Const(tc->u.con);
3393 case TCt: return IRExpr_RdTmp(tc->u.tmp);
3394 default: vpanic("tmpOrConst_to_IRExpr");
3395 }
3396 }
3397
3398 /* Convert a NULL terminated IRExpr* vector to an array of
3399 TmpOrConsts, and a length. */
irExprVec_to_TmpOrConsts(TmpOrConst ** outs,Int * nOuts,IRExpr ** ins)3400 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3401 /*OUT*/Int* nOuts,
3402 IRExpr** ins )
3403 {
3404 Int i, n;
3405 /* We have to make two passes, one to count, one to copy. */
3406 for (n = 0; ins[n]; n++)
3407 ;
3408 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3409 *nOuts = n;
3410 /* and now copy .. */
3411 for (i = 0; i < n; i++) {
3412 IRExpr* arg = ins[i];
3413 TmpOrConst* dst = &(*outs)[i];
3414 irExpr_to_TmpOrConst(dst, arg);
3415 }
3416 }
3417
3418 typedef
3419 struct {
3420 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3421 CCall, Load
3422 } tag;
3423 union {
3424 /* unop(tmp) */
3425 struct {
3426 IROp op;
3427 IRTemp arg;
3428 } Ut;
3429 /* binop(tmp,tmp) */
3430 struct {
3431 IROp op;
3432 IRTemp arg1;
3433 IRTemp arg2;
3434 } Btt;
3435 /* binop(tmp,const) */
3436 struct {
3437 IROp op;
3438 IRTemp arg1;
3439 IRConst con2;
3440 } Btc;
3441 /* binop(const,tmp) */
3442 struct {
3443 IROp op;
3444 IRConst con1;
3445 IRTemp arg2;
3446 } Bct;
3447 /* F64i-style const */
3448 struct {
3449 ULong f64i;
3450 } Cf64i;
3451 /* ITE(tmp,tmp,tmp) */
3452 struct {
3453 IRTemp co;
3454 IRTemp e1;
3455 IRTemp e0;
3456 } Ittt;
3457 /* ITE(tmp,tmp,const) */
3458 struct {
3459 IRTemp co;
3460 IRTemp e1;
3461 IRConst con0;
3462 } Ittc;
3463 /* ITE(tmp,const,tmp) */
3464 struct {
3465 IRTemp co;
3466 IRConst con1;
3467 IRTemp e0;
3468 } Itct;
3469 /* ITE(tmp,const,const) */
3470 struct {
3471 IRTemp co;
3472 IRConst con1;
3473 IRConst con0;
3474 } Itcc;
3475 /* GetI(descr,tmp,bias)*/
3476 struct {
3477 IRRegArray* descr;
3478 IRTemp ix;
3479 Int bias;
3480 } GetIt;
3481 /* Clean helper call */
3482 struct {
3483 IRCallee* cee;
3484 TmpOrConst* args;
3485 Int nArgs;
3486 IRType retty;
3487 } CCall;
3488 /* Load(end,ty,addr) */
3489 struct {
3490 IREndness end;
3491 IRType ty;
3492 TmpOrConst addr;
3493 } Load;
3494 } u;
3495 }
3496 AvailExpr;
3497
eq_AvailExpr(AvailExpr * a1,AvailExpr * a2)3498 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3499 {
3500 if (LIKELY(a1->tag != a2->tag))
3501 return False;
3502 switch (a1->tag) {
3503 case Ut:
3504 return toBool(
3505 a1->u.Ut.op == a2->u.Ut.op
3506 && a1->u.Ut.arg == a2->u.Ut.arg);
3507 case Btt:
3508 return toBool(
3509 a1->u.Btt.op == a2->u.Btt.op
3510 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3511 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3512 case Btc:
3513 return toBool(
3514 a1->u.Btc.op == a2->u.Btc.op
3515 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3516 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3517 case Bct:
3518 return toBool(
3519 a1->u.Bct.op == a2->u.Bct.op
3520 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3521 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3522 case Cf64i:
3523 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3524 case Ittt:
3525 return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3526 && a1->u.Ittt.e1 == a2->u.Ittt.e1
3527 && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3528 case Ittc:
3529 return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3530 && a1->u.Ittc.e1 == a2->u.Ittc.e1
3531 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3532 case Itct:
3533 return toBool(a1->u.Itct.co == a2->u.Itct.co
3534 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3535 && a1->u.Itct.e0 == a2->u.Itct.e0);
3536 case Itcc:
3537 return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3538 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3539 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3540 case GetIt:
3541 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3542 && a1->u.GetIt.ix == a2->u.GetIt.ix
3543 && a1->u.GetIt.bias == a2->u.GetIt.bias);
3544 case CCall: {
3545 Int i, n;
3546 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3547 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3548 if (eq) {
3549 n = a1->u.CCall.nArgs;
3550 for (i = 0; i < n; i++) {
3551 if (!eqTmpOrConst( &a1->u.CCall.args[i],
3552 &a2->u.CCall.args[i] )) {
3553 eq = False;
3554 break;
3555 }
3556 }
3557 }
3558 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3559 return eq;
3560 }
3561 case Load: {
3562 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3563 && a1->u.Load.ty == a2->u.Load.ty
3564 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3565 return eq;
3566 }
3567 default:
3568 vpanic("eq_AvailExpr");
3569 }
3570 }
3571
availExpr_to_IRExpr(AvailExpr * ae)3572 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3573 {
3574 IRConst *con, *con0, *con1;
3575 switch (ae->tag) {
3576 case Ut:
3577 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3578 case Btt:
3579 return IRExpr_Binop( ae->u.Btt.op,
3580 IRExpr_RdTmp(ae->u.Btt.arg1),
3581 IRExpr_RdTmp(ae->u.Btt.arg2) );
3582 case Btc:
3583 con = LibVEX_Alloc_inline(sizeof(IRConst));
3584 *con = ae->u.Btc.con2;
3585 return IRExpr_Binop( ae->u.Btc.op,
3586 IRExpr_RdTmp(ae->u.Btc.arg1),
3587 IRExpr_Const(con) );
3588 case Bct:
3589 con = LibVEX_Alloc_inline(sizeof(IRConst));
3590 *con = ae->u.Bct.con1;
3591 return IRExpr_Binop( ae->u.Bct.op,
3592 IRExpr_Const(con),
3593 IRExpr_RdTmp(ae->u.Bct.arg2) );
3594 case Cf64i:
3595 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3596 case Ittt:
3597 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3598 IRExpr_RdTmp(ae->u.Ittt.e1),
3599 IRExpr_RdTmp(ae->u.Ittt.e0));
3600 case Ittc:
3601 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3602 *con0 = ae->u.Ittc.con0;
3603 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3604 IRExpr_RdTmp(ae->u.Ittc.e1),
3605 IRExpr_Const(con0));
3606 case Itct:
3607 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3608 *con1 = ae->u.Itct.con1;
3609 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3610 IRExpr_Const(con1),
3611 IRExpr_RdTmp(ae->u.Itct.e0));
3612
3613 case Itcc:
3614 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3615 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3616 *con0 = ae->u.Itcc.con0;
3617 *con1 = ae->u.Itcc.con1;
3618 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3619 IRExpr_Const(con1),
3620 IRExpr_Const(con0));
3621 case GetIt:
3622 return IRExpr_GetI(ae->u.GetIt.descr,
3623 IRExpr_RdTmp(ae->u.GetIt.ix),
3624 ae->u.GetIt.bias);
3625 case CCall: {
3626 Int i, n = ae->u.CCall.nArgs;
3627 vassert(n >= 0);
3628 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3629 vec[n] = NULL;
3630 for (i = 0; i < n; i++) {
3631 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3632 }
3633 return IRExpr_CCall(ae->u.CCall.cee,
3634 ae->u.CCall.retty,
3635 vec);
3636 }
3637 case Load:
3638 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3639 tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3640 default:
3641 vpanic("availExpr_to_IRExpr");
3642 }
3643 }
3644
3645 inline
subst_AvailExpr_Temp(HashHW * env,IRTemp tmp)3646 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3647 {
3648 HWord res;
3649 /* env :: IRTemp -> IRTemp */
3650 if (lookupHHW( env, &res, (HWord)tmp ))
3651 return (IRTemp)res;
3652 else
3653 return tmp;
3654 }
3655
3656 inline
subst_AvailExpr_TmpOrConst(TmpOrConst * tc,HashHW * env)3657 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3658 HashHW* env )
3659 {
3660 /* env :: IRTemp -> IRTemp */
3661 if (tc->tag == TCt) {
3662 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3663 }
3664 }
3665
subst_AvailExpr(HashHW * env,AvailExpr * ae)3666 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3667 {
3668 /* env :: IRTemp -> IRTemp */
3669 switch (ae->tag) {
3670 case Ut:
3671 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3672 break;
3673 case Btt:
3674 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3675 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3676 break;
3677 case Btc:
3678 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3679 break;
3680 case Bct:
3681 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3682 break;
3683 case Cf64i:
3684 break;
3685 case Ittt:
3686 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3687 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3688 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3689 break;
3690 case Ittc:
3691 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3692 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3693 break;
3694 case Itct:
3695 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3696 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3697 break;
3698 case Itcc:
3699 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3700 break;
3701 case GetIt:
3702 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3703 break;
3704 case CCall: {
3705 Int i, n = ae->u.CCall.nArgs;;
3706 for (i = 0; i < n; i++) {
3707 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3708 }
3709 break;
3710 }
3711 case Load:
3712 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3713 break;
3714 default:
3715 vpanic("subst_AvailExpr");
3716 }
3717 }
3718
irExpr_to_AvailExpr(IRExpr * e,Bool allowLoadsToBeCSEd)3719 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3720 {
3721 AvailExpr* ae;
3722
3723 switch (e->tag) {
3724 case Iex_Unop:
3725 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3726 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3727 ae->tag = Ut;
3728 ae->u.Ut.op = e->Iex.Unop.op;
3729 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3730 return ae;
3731 }
3732 break;
3733
3734 case Iex_Binop:
3735 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3736 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3737 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3738 ae->tag = Btt;
3739 ae->u.Btt.op = e->Iex.Binop.op;
3740 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3741 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3742 return ae;
3743 }
3744 if (e->Iex.Binop.arg2->tag == Iex_Const) {
3745 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3746 ae->tag = Btc;
3747 ae->u.Btc.op = e->Iex.Binop.op;
3748 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3749 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3750 return ae;
3751 }
3752 } else if (e->Iex.Binop.arg1->tag == Iex_Const
3753 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3754 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3755 ae->tag = Bct;
3756 ae->u.Bct.op = e->Iex.Binop.op;
3757 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3758 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3759 return ae;
3760 }
3761 break;
3762
3763 case Iex_Const:
3764 if (e->Iex.Const.con->tag == Ico_F64i) {
3765 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3766 ae->tag = Cf64i;
3767 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3768 return ae;
3769 }
3770 break;
3771
3772 case Iex_ITE:
3773 if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3774 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3775 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3776 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3777 ae->tag = Ittt;
3778 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3779 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3780 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3781 return ae;
3782 }
3783 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3784 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3785 ae->tag = Itct;
3786 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3787 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3788 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3789 return ae;
3790 }
3791 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3792 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3793 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3794 ae->tag = Ittc;
3795 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3796 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3797 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3798 return ae;
3799 }
3800 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3801 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3802 ae->tag = Itcc;
3803 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3804 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3805 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3806 return ae;
3807 }
3808 }
3809 }
3810 break;
3811
3812 case Iex_GetI:
3813 if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3814 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3815 ae->tag = GetIt;
3816 ae->u.GetIt.descr = e->Iex.GetI.descr;
3817 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3818 ae->u.GetIt.bias = e->Iex.GetI.bias;
3819 return ae;
3820 }
3821 break;
3822
3823 case Iex_CCall:
3824 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3825 ae->tag = CCall;
3826 /* Ok to share only the cee, since it is immutable. */
3827 ae->u.CCall.cee = e->Iex.CCall.cee;
3828 ae->u.CCall.retty = e->Iex.CCall.retty;
3829 /* irExprVec_to_TmpOrConsts will assert if the args are
3830 neither tmps nor constants, but that's ok .. that's all they
3831 should be. */
3832 irExprVec_to_TmpOrConsts(
3833 &ae->u.CCall.args, &ae->u.CCall.nArgs,
3834 e->Iex.CCall.args
3835 );
3836 return ae;
3837
3838 case Iex_Load:
3839 /* If the caller of do_cse_BB has requested that loads also
3840 be CSEd, convert them into AvailExprs. If not, we'll just
3841 return NULL here, and the load never becomes considered
3842 "available", which effectively disables CSEing of them, as
3843 desired. */
3844 if (allowLoadsToBeCSEd) {
3845 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3846 ae->tag = Load;
3847 ae->u.Load.end = e->Iex.Load.end;
3848 ae->u.Load.ty = e->Iex.Load.ty;
3849 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
3850 return ae;
3851 }
3852 break;
3853
3854 default:
3855 break;
3856 }
3857
3858 return NULL;
3859 }
3860
3861
3862 /* The BB is modified in-place. Returns True if any changes were
3863 made. The caller can choose whether or not loads should be CSEd.
3864 In the normal course of things we don't do that, since CSEing loads
3865 is something of a dodgy proposition if the guest program is doing
3866 some screwy stuff to do with races and spinloops. */
3867
do_cse_BB(IRSB * bb,Bool allowLoadsToBeCSEd)3868 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
3869 {
3870 Int i, j, paranoia;
3871 IRTemp t, q;
3872 IRStmt* st;
3873 AvailExpr* eprime;
3874 AvailExpr* ae;
3875 Bool invalidate;
3876 Bool anyDone = False;
3877
3878 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
3879 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
3880
3881 vassert(sizeof(IRTemp) <= sizeof(HWord));
3882
3883 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
3884
3885 /* Iterate forwards over the stmts.
3886 On seeing "t = E", where E is one of the AvailExpr forms:
3887 let E' = apply tenv substitution to E
3888 search aenv for E'
3889 if a mapping E' -> q is found,
3890 replace this stmt by "t = q"
3891 and add binding t -> q to tenv
3892 else
3893 add binding E' -> t to aenv
3894 replace this stmt by "t = E'"
3895
3896 Other statements are only interesting to the extent that they
3897 might invalidate some of the expressions in aenv. So there is
3898 an invalidate-bindings check for each statement seen.
3899 */
3900 for (i = 0; i < bb->stmts_used; i++) {
3901 st = bb->stmts[i];
3902
3903 /* ------ BEGIN invalidate aenv bindings ------ */
3904 /* This is critical: remove from aenv any E' -> .. bindings
3905 which might be invalidated by this statement. The only
3906 vulnerable kind of bindings are the GetI and Load kinds.
3907 Dirty call - dump (paranoia level -> 2)
3908 Store - dump (ditto)
3909 Put, PutI - dump unless no-overlap is proven (.. -> 1)
3910 Uses getAliasingRelation_IC and getAliasingRelation_II
3911 to do the no-overlap assessments needed for Put/PutI.
3912 */
3913 switch (st->tag) {
3914 case Ist_Dirty: case Ist_Store: case Ist_MBE:
3915 case Ist_CAS: case Ist_LLSC:
3916 case Ist_StoreG:
3917 paranoia = 2; break;
3918 case Ist_Put: case Ist_PutI:
3919 paranoia = 1; break;
3920 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
3921 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
3922 paranoia = 0; break;
3923 default:
3924 vpanic("do_cse_BB(1)");
3925 }
3926
3927 if (paranoia > 0) {
3928 for (j = 0; j < aenv->used; j++) {
3929 if (!aenv->inuse[j])
3930 continue;
3931 ae = (AvailExpr*)aenv->key[j];
3932 if (ae->tag != GetIt && ae->tag != Load)
3933 continue;
3934 invalidate = False;
3935 if (paranoia >= 2) {
3936 invalidate = True;
3937 } else {
3938 vassert(paranoia == 1);
3939 if (ae->tag == Load) {
3940 /* Loads can be invalidated by anything that could
3941 possibly touch memory. But in that case we
3942 should have |paranoia| == 2 and we won't get
3943 here. So there's nothing to do; we don't have to
3944 invalidate the load. */
3945 }
3946 else
3947 if (st->tag == Ist_Put) {
3948 if (getAliasingRelation_IC(
3949 ae->u.GetIt.descr,
3950 IRExpr_RdTmp(ae->u.GetIt.ix),
3951 st->Ist.Put.offset,
3952 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
3953 ) != NoAlias)
3954 invalidate = True;
3955 }
3956 else
3957 if (st->tag == Ist_PutI) {
3958 IRPutI *puti = st->Ist.PutI.details;
3959 if (getAliasingRelation_II(
3960 ae->u.GetIt.descr,
3961 IRExpr_RdTmp(ae->u.GetIt.ix),
3962 ae->u.GetIt.bias,
3963 puti->descr,
3964 puti->ix,
3965 puti->bias
3966 ) != NoAlias)
3967 invalidate = True;
3968 }
3969 else
3970 vpanic("do_cse_BB(2)");
3971 }
3972
3973 if (invalidate) {
3974 aenv->inuse[j] = False;
3975 aenv->key[j] = (HWord)NULL; /* be sure */
3976 }
3977 } /* for j */
3978 } /* paranoia > 0 */
3979
3980 /* ------ ENV invalidate aenv bindings ------ */
3981
3982 /* ignore not-interestings */
3983 if (st->tag != Ist_WrTmp)
3984 continue;
3985
3986 t = st->Ist.WrTmp.tmp;
3987 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
3988 /* ignore if not of AvailExpr form */
3989 if (!eprime)
3990 continue;
3991
3992 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
3993
3994 /* apply tenv */
3995 subst_AvailExpr( tenv, eprime );
3996
3997 /* search aenv for eprime, unfortunately the hard way */
3998 for (j = 0; j < aenv->used; j++)
3999 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4000 break;
4001
4002 if (j < aenv->used) {
4003 /* A binding E' -> q was found. Replace stmt by "t = q" and
4004 note the t->q binding in tenv. */
4005 /* (this is the core of the CSE action) */
4006 q = (IRTemp)aenv->val[j];
4007 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4008 addToHHW( tenv, (HWord)t, (HWord)q );
4009 anyDone = True;
4010 } else {
4011 /* No binding was found, so instead we add E' -> t to our
4012 collection of available expressions, replace this stmt
4013 with "t = E'", and move on. */
4014 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4015 addToHHW( aenv, (HWord)eprime, (HWord)t );
4016 }
4017 }
4018
4019 /*
4020 ppIRSB(bb);
4021 sanityCheckIRSB(bb, Ity_I32);
4022 vex_printf("\n\n");
4023 */
4024 return anyDone;
4025 }
4026
4027
4028 /*---------------------------------------------------------------*/
4029 /*--- Add32/Sub32 chain collapsing ---*/
4030 /*---------------------------------------------------------------*/
4031
4032 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4033
4034 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4035 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4036 root node is Add32, not Sub32. */
4037
isAdd32OrSub32(IRExpr * e,IRTemp * tmp,Int * i32)4038 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4039 {
4040 if (e->tag != Iex_Binop)
4041 return False;
4042 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4043 return False;
4044 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4045 return False;
4046 if (e->Iex.Binop.arg2->tag != Iex_Const)
4047 return False;
4048 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4049 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4050 if (e->Iex.Binop.op == Iop_Sub32)
4051 *i32 = -*i32;
4052 return True;
4053 }
4054
4055
4056 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4057 other tmp2. Scan backwards from the specified start point -- an
4058 optimisation. */
4059
collapseChain(IRSB * bb,Int startHere,IRTemp tmp,IRTemp * tmp2,Int * i32)4060 static Bool collapseChain ( IRSB* bb, Int startHere,
4061 IRTemp tmp,
4062 IRTemp* tmp2, Int* i32 )
4063 {
4064 Int j, ii;
4065 IRTemp vv;
4066 IRStmt* st;
4067 IRExpr* e;
4068
4069 /* the (var, con) pair contain the current 'representation' for
4070 'tmp'. We start with 'tmp + 0'. */
4071 IRTemp var = tmp;
4072 Int con = 0;
4073
4074 /* Scan backwards to see if tmp can be replaced by some other tmp
4075 +/- a constant. */
4076 for (j = startHere; j >= 0; j--) {
4077 st = bb->stmts[j];
4078 if (st->tag != Ist_WrTmp)
4079 continue;
4080 if (st->Ist.WrTmp.tmp != var)
4081 continue;
4082 e = st->Ist.WrTmp.data;
4083 if (!isAdd32OrSub32(e, &vv, &ii))
4084 break;
4085 var = vv;
4086 con += ii;
4087 }
4088 if (j == -1)
4089 /* no earlier binding for var .. ill-formed IR */
4090 vpanic("collapseChain");
4091
4092 /* so, did we find anything interesting? */
4093 if (var == tmp)
4094 return False; /* no .. */
4095
4096 *tmp2 = var;
4097 *i32 = con;
4098 return True;
4099 }
4100
4101
4102 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4103
collapse_AddSub_chains_BB(IRSB * bb)4104 static void collapse_AddSub_chains_BB ( IRSB* bb )
4105 {
4106 IRStmt *st;
4107 IRTemp var, var2;
4108 Int i, con, con2;
4109
4110 for (i = bb->stmts_used-1; i >= 0; i--) {
4111 st = bb->stmts[i];
4112 if (st->tag == Ist_NoOp)
4113 continue;
4114
4115 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4116
4117 if (st->tag == Ist_WrTmp
4118 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4119
4120 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4121 Find out if var can be expressed as var2 + con2. */
4122 if (collapseChain(bb, i-1, var, &var2, &con2)) {
4123 if (DEBUG_IROPT) {
4124 vex_printf("replacing1 ");
4125 ppIRStmt(st);
4126 vex_printf(" with ");
4127 }
4128 con2 += con;
4129 bb->stmts[i]
4130 = IRStmt_WrTmp(
4131 st->Ist.WrTmp.tmp,
4132 (con2 >= 0)
4133 ? IRExpr_Binop(Iop_Add32,
4134 IRExpr_RdTmp(var2),
4135 IRExpr_Const(IRConst_U32(con2)))
4136 : IRExpr_Binop(Iop_Sub32,
4137 IRExpr_RdTmp(var2),
4138 IRExpr_Const(IRConst_U32(-con2)))
4139 );
4140 if (DEBUG_IROPT) {
4141 ppIRStmt(bb->stmts[i]);
4142 vex_printf("\n");
4143 }
4144 }
4145
4146 continue;
4147 }
4148
4149 /* Try to collapse 't1 = GetI[t2, con]'. */
4150
4151 if (st->tag == Ist_WrTmp
4152 && st->Ist.WrTmp.data->tag == Iex_GetI
4153 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4154 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4155 ->Iex.RdTmp.tmp, &var2, &con2)) {
4156 if (DEBUG_IROPT) {
4157 vex_printf("replacing3 ");
4158 ppIRStmt(st);
4159 vex_printf(" with ");
4160 }
4161 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4162 bb->stmts[i]
4163 = IRStmt_WrTmp(
4164 st->Ist.WrTmp.tmp,
4165 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4166 IRExpr_RdTmp(var2),
4167 con2));
4168 if (DEBUG_IROPT) {
4169 ppIRStmt(bb->stmts[i]);
4170 vex_printf("\n");
4171 }
4172 continue;
4173 }
4174
4175 /* Perhaps st is PutI[t, con] ? */
4176 IRPutI *puti = st->Ist.PutI.details;
4177 if (st->tag == Ist_PutI
4178 && puti->ix->tag == Iex_RdTmp
4179 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4180 &var2, &con2)) {
4181 if (DEBUG_IROPT) {
4182 vex_printf("replacing2 ");
4183 ppIRStmt(st);
4184 vex_printf(" with ");
4185 }
4186 con2 += puti->bias;
4187 bb->stmts[i]
4188 = IRStmt_PutI(mkIRPutI(puti->descr,
4189 IRExpr_RdTmp(var2),
4190 con2,
4191 puti->data));
4192 if (DEBUG_IROPT) {
4193 ppIRStmt(bb->stmts[i]);
4194 vex_printf("\n");
4195 }
4196 continue;
4197 }
4198
4199 } /* for */
4200 }
4201
4202
4203 /*---------------------------------------------------------------*/
4204 /*--- PutI/GetI transformations ---*/
4205 /*---------------------------------------------------------------*/
4206
4207 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4208 the given starting point to find, if any, a PutI which writes
4209 exactly the same piece of guest state, and so return the expression
4210 that the PutI writes. This is the core of PutI-GetI forwarding. */
4211
4212 static
findPutI(IRSB * bb,Int startHere,IRRegArray * descrG,IRExpr * ixG,Int biasG)4213 IRExpr* findPutI ( IRSB* bb, Int startHere,
4214 IRRegArray* descrG, IRExpr* ixG, Int biasG )
4215 {
4216 Int j;
4217 IRStmt* st;
4218 GSAliasing relation;
4219
4220 if (0) {
4221 vex_printf("\nfindPutI ");
4222 ppIRRegArray(descrG);
4223 vex_printf(" ");
4224 ppIRExpr(ixG);
4225 vex_printf(" %d\n", biasG);
4226 }
4227
4228 /* Scan backwards in bb from startHere to find a suitable PutI
4229 binding for (descrG, ixG, biasG), if any. */
4230
4231 for (j = startHere; j >= 0; j--) {
4232 st = bb->stmts[j];
4233 if (st->tag == Ist_NoOp)
4234 continue;
4235
4236 if (st->tag == Ist_Put) {
4237 /* Non-indexed Put. This can't give a binding, but we do
4238 need to check it doesn't invalidate the search by
4239 overlapping any part of the indexed guest state. */
4240
4241 relation
4242 = getAliasingRelation_IC(
4243 descrG, ixG,
4244 st->Ist.Put.offset,
4245 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4246
4247 if (relation == NoAlias) {
4248 /* we're OK; keep going */
4249 continue;
4250 } else {
4251 /* relation == UnknownAlias || relation == ExactAlias */
4252 /* If this assertion fails, we've found a Put which writes
4253 an area of guest state which is read by a GetI. Which
4254 is unlikely (although not per se wrong). */
4255 vassert(relation != ExactAlias);
4256 /* This Put potentially writes guest state that the GetI
4257 reads; we must fail. */
4258 return NULL;
4259 }
4260 }
4261
4262 if (st->tag == Ist_PutI) {
4263 IRPutI *puti = st->Ist.PutI.details;
4264
4265 relation = getAliasingRelation_II(
4266 descrG, ixG, biasG,
4267 puti->descr,
4268 puti->ix,
4269 puti->bias
4270 );
4271
4272 if (relation == NoAlias) {
4273 /* This PutI definitely doesn't overlap. Ignore it and
4274 keep going. */
4275 continue; /* the for j loop */
4276 }
4277
4278 if (relation == UnknownAlias) {
4279 /* We don't know if this PutI writes to the same guest
4280 state that the GetI, or not. So we have to give up. */
4281 return NULL;
4282 }
4283
4284 /* Otherwise, we've found what we're looking for. */
4285 vassert(relation == ExactAlias);
4286 return puti->data;
4287
4288 } /* if (st->tag == Ist_PutI) */
4289
4290 if (st->tag == Ist_Dirty) {
4291 /* Be conservative. If the dirty call has any guest effects at
4292 all, give up. We could do better -- only give up if there
4293 are any guest writes/modifies. */
4294 if (st->Ist.Dirty.details->nFxState > 0)
4295 return NULL;
4296 }
4297
4298 } /* for */
4299
4300 /* No valid replacement was found. */
4301 return NULL;
4302 }
4303
4304
4305
4306 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4307 that it writes exactly the same piece of guest state) ? Safe
4308 answer: False. */
4309
identicalPutIs(IRStmt * pi,IRStmt * s2)4310 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4311 {
4312 vassert(pi->tag == Ist_PutI);
4313 if (s2->tag != Ist_PutI)
4314 return False;
4315
4316 IRPutI *p1 = pi->Ist.PutI.details;
4317 IRPutI *p2 = s2->Ist.PutI.details;
4318
4319 return toBool(
4320 getAliasingRelation_II(
4321 p1->descr, p1->ix, p1->bias,
4322 p2->descr, p2->ix, p2->bias
4323 )
4324 == ExactAlias
4325 );
4326 }
4327
4328
4329 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4330 overlap it? Safe answer: True. Note, we could do a lot better
4331 than this if needed. */
4332
4333 static
guestAccessWhichMightOverlapPutI(IRTypeEnv * tyenv,IRStmt * pi,IRStmt * s2)4334 Bool guestAccessWhichMightOverlapPutI (
4335 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4336 )
4337 {
4338 GSAliasing relation;
4339 UInt minoffP, maxoffP;
4340
4341 vassert(pi->tag == Ist_PutI);
4342
4343 IRPutI *p1 = pi->Ist.PutI.details;
4344
4345 getArrayBounds(p1->descr, &minoffP, &maxoffP);
4346 switch (s2->tag) {
4347
4348 case Ist_NoOp:
4349 case Ist_IMark:
4350 return False;
4351
4352 case Ist_MBE:
4353 case Ist_AbiHint:
4354 /* just be paranoid ... these should be rare. */
4355 return True;
4356
4357 case Ist_CAS:
4358 /* This is unbelievably lame, but it's probably not
4359 significant from a performance point of view. Really, a
4360 CAS is a load-store op, so it should be safe to say False.
4361 However .. */
4362 return True;
4363
4364 case Ist_Dirty:
4365 /* If the dirty call has any guest effects at all, give up.
4366 Probably could do better. */
4367 if (s2->Ist.Dirty.details->nFxState > 0)
4368 return True;
4369 return False;
4370
4371 case Ist_Put:
4372 vassert(isIRAtom(s2->Ist.Put.data));
4373 relation
4374 = getAliasingRelation_IC(
4375 p1->descr, p1->ix,
4376 s2->Ist.Put.offset,
4377 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4378 );
4379 goto have_relation;
4380
4381 case Ist_PutI: {
4382 IRPutI *p2 = s2->Ist.PutI.details;
4383
4384 vassert(isIRAtom(p2->ix));
4385 vassert(isIRAtom(p2->data));
4386 relation
4387 = getAliasingRelation_II(
4388 p1->descr, p1->ix, p1->bias,
4389 p2->descr, p2->ix, p2->bias
4390 );
4391 goto have_relation;
4392 }
4393
4394 case Ist_WrTmp:
4395 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4396 relation
4397 = getAliasingRelation_II(
4398 p1->descr, p1->ix, p1->bias,
4399 s2->Ist.WrTmp.data->Iex.GetI.descr,
4400 s2->Ist.WrTmp.data->Iex.GetI.ix,
4401 s2->Ist.WrTmp.data->Iex.GetI.bias
4402 );
4403 goto have_relation;
4404 }
4405 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4406 relation
4407 = getAliasingRelation_IC(
4408 p1->descr, p1->ix,
4409 s2->Ist.WrTmp.data->Iex.Get.offset,
4410 s2->Ist.WrTmp.data->Iex.Get.ty
4411 );
4412 goto have_relation;
4413 }
4414 return False;
4415
4416 case Ist_Store:
4417 vassert(isIRAtom(s2->Ist.Store.addr));
4418 vassert(isIRAtom(s2->Ist.Store.data));
4419 return False;
4420
4421 default:
4422 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4423 vpanic("guestAccessWhichMightOverlapPutI");
4424 }
4425
4426 have_relation:
4427 if (relation == NoAlias)
4428 return False;
4429 else
4430 return True; /* ExactAlias or UnknownAlias */
4431 }
4432
4433
4434
4435 /* ---------- PutI/GetI transformations main functions --------- */
4436
4437 /* Remove redundant GetIs, to the extent that they can be detected.
4438 bb is modified in-place. */
4439
4440 static
do_redundant_GetI_elimination(IRSB * bb)4441 void do_redundant_GetI_elimination ( IRSB* bb )
4442 {
4443 Int i;
4444 IRStmt* st;
4445
4446 for (i = bb->stmts_used-1; i >= 0; i--) {
4447 st = bb->stmts[i];
4448 if (st->tag == Ist_NoOp)
4449 continue;
4450
4451 if (st->tag == Ist_WrTmp
4452 && st->Ist.WrTmp.data->tag == Iex_GetI
4453 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4454 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4455 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
4456 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
4457 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
4458 if (replacement
4459 && isIRAtom(replacement)
4460 /* Make sure we're doing a type-safe transformation! */
4461 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4462 if (DEBUG_IROPT) {
4463 vex_printf("rGI: ");
4464 ppIRExpr(st->Ist.WrTmp.data);
4465 vex_printf(" -> ");
4466 ppIRExpr(replacement);
4467 vex_printf("\n");
4468 }
4469 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4470 }
4471 }
4472 }
4473
4474 }
4475
4476
4477 /* Remove redundant PutIs, to the extent which they can be detected.
4478 bb is modified in-place. */
4479
4480 static
do_redundant_PutI_elimination(IRSB * bb,VexRegisterUpdates pxControl)4481 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4482 {
4483 Int i, j;
4484 Bool delete;
4485 IRStmt *st, *stj;
4486
4487 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4488
4489 for (i = 0; i < bb->stmts_used; i++) {
4490 st = bb->stmts[i];
4491 if (st->tag != Ist_PutI)
4492 continue;
4493 /* Ok, search forwards from here to see if we can find another
4494 PutI which makes this one redundant, and dodging various
4495 hazards. Search forwards:
4496 * If conditional exit, give up (because anything after that
4497 does not postdominate this put).
4498 * If a Get which might overlap, give up (because this PutI
4499 not necessarily dead).
4500 * If a Put which is identical, stop with success.
4501 * If a Put which might overlap, but is not identical, give up.
4502 * If a dirty helper call which might write guest state, give up.
4503 * If a Put which definitely doesn't overlap, or any other
4504 kind of stmt, continue.
4505 */
4506 delete = False;
4507 for (j = i+1; j < bb->stmts_used; j++) {
4508 stj = bb->stmts[j];
4509 if (stj->tag == Ist_NoOp)
4510 continue;
4511 if (identicalPutIs(st, stj)) {
4512 /* success! */
4513 delete = True;
4514 break;
4515 }
4516 if (stj->tag == Ist_Exit)
4517 /* give up */
4518 break;
4519 if (st->tag == Ist_Dirty)
4520 /* give up; could do better here */
4521 break;
4522 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4523 /* give up */
4524 break;
4525 }
4526
4527 if (delete) {
4528 if (DEBUG_IROPT) {
4529 vex_printf("rPI: ");
4530 ppIRStmt(st);
4531 vex_printf("\n");
4532 }
4533 bb->stmts[i] = IRStmt_NoOp();
4534 }
4535
4536 }
4537 }
4538
4539
4540 /*---------------------------------------------------------------*/
4541 /*--- Loop unrolling ---*/
4542 /*---------------------------------------------------------------*/
4543
4544 /* Adjust all tmp values (names) in e by delta. e is destructively
4545 modified. */
4546
deltaIRExpr(IRExpr * e,Int delta)4547 static void deltaIRExpr ( IRExpr* e, Int delta )
4548 {
4549 Int i;
4550 switch (e->tag) {
4551 case Iex_RdTmp:
4552 e->Iex.RdTmp.tmp += delta;
4553 break;
4554 case Iex_Get:
4555 case Iex_Const:
4556 break;
4557 case Iex_GetI:
4558 deltaIRExpr(e->Iex.GetI.ix, delta);
4559 break;
4560 case Iex_Qop:
4561 deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4562 deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4563 deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4564 deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4565 break;
4566 case Iex_Triop:
4567 deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4568 deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4569 deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4570 break;
4571 case Iex_Binop:
4572 deltaIRExpr(e->Iex.Binop.arg1, delta);
4573 deltaIRExpr(e->Iex.Binop.arg2, delta);
4574 break;
4575 case Iex_Unop:
4576 deltaIRExpr(e->Iex.Unop.arg, delta);
4577 break;
4578 case Iex_Load:
4579 deltaIRExpr(e->Iex.Load.addr, delta);
4580 break;
4581 case Iex_CCall:
4582 for (i = 0; e->Iex.CCall.args[i]; i++)
4583 deltaIRExpr(e->Iex.CCall.args[i], delta);
4584 break;
4585 case Iex_ITE:
4586 deltaIRExpr(e->Iex.ITE.cond, delta);
4587 deltaIRExpr(e->Iex.ITE.iftrue, delta);
4588 deltaIRExpr(e->Iex.ITE.iffalse, delta);
4589 break;
4590 default:
4591 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4592 vpanic("deltaIRExpr");
4593 }
4594 }
4595
4596 /* Adjust all tmp values (names) in st by delta. st is destructively
4597 modified. */
4598
deltaIRStmt(IRStmt * st,Int delta)4599 static void deltaIRStmt ( IRStmt* st, Int delta )
4600 {
4601 Int i;
4602 IRDirty* d;
4603 switch (st->tag) {
4604 case Ist_NoOp:
4605 case Ist_IMark:
4606 case Ist_MBE:
4607 break;
4608 case Ist_AbiHint:
4609 deltaIRExpr(st->Ist.AbiHint.base, delta);
4610 deltaIRExpr(st->Ist.AbiHint.nia, delta);
4611 break;
4612 case Ist_Put:
4613 deltaIRExpr(st->Ist.Put.data, delta);
4614 break;
4615 case Ist_PutI:
4616 deltaIRExpr(st->Ist.PutI.details->ix, delta);
4617 deltaIRExpr(st->Ist.PutI.details->data, delta);
4618 break;
4619 case Ist_WrTmp:
4620 st->Ist.WrTmp.tmp += delta;
4621 deltaIRExpr(st->Ist.WrTmp.data, delta);
4622 break;
4623 case Ist_Exit:
4624 deltaIRExpr(st->Ist.Exit.guard, delta);
4625 break;
4626 case Ist_Store:
4627 deltaIRExpr(st->Ist.Store.addr, delta);
4628 deltaIRExpr(st->Ist.Store.data, delta);
4629 break;
4630 case Ist_StoreG: {
4631 IRStoreG* sg = st->Ist.StoreG.details;
4632 deltaIRExpr(sg->addr, delta);
4633 deltaIRExpr(sg->data, delta);
4634 deltaIRExpr(sg->guard, delta);
4635 break;
4636 }
4637 case Ist_LoadG: {
4638 IRLoadG* lg = st->Ist.LoadG.details;
4639 lg->dst += delta;
4640 deltaIRExpr(lg->addr, delta);
4641 deltaIRExpr(lg->alt, delta);
4642 deltaIRExpr(lg->guard, delta);
4643 break;
4644 }
4645 case Ist_CAS:
4646 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4647 st->Ist.CAS.details->oldHi += delta;
4648 st->Ist.CAS.details->oldLo += delta;
4649 deltaIRExpr(st->Ist.CAS.details->addr, delta);
4650 if (st->Ist.CAS.details->expdHi)
4651 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4652 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4653 if (st->Ist.CAS.details->dataHi)
4654 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4655 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4656 break;
4657 case Ist_LLSC:
4658 st->Ist.LLSC.result += delta;
4659 deltaIRExpr(st->Ist.LLSC.addr, delta);
4660 if (st->Ist.LLSC.storedata)
4661 deltaIRExpr(st->Ist.LLSC.storedata, delta);
4662 break;
4663 case Ist_Dirty:
4664 d = st->Ist.Dirty.details;
4665 deltaIRExpr(d->guard, delta);
4666 for (i = 0; d->args[i]; i++) {
4667 IRExpr* arg = d->args[i];
4668 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
4669 deltaIRExpr(arg, delta);
4670 }
4671 if (d->tmp != IRTemp_INVALID)
4672 d->tmp += delta;
4673 if (d->mAddr)
4674 deltaIRExpr(d->mAddr, delta);
4675 break;
4676 default:
4677 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4678 vpanic("deltaIRStmt");
4679 }
4680 }
4681
4682
4683 /* If possible, return a loop-unrolled version of bb0. The original
4684 is changed. If not possible, return NULL. */
4685
4686 /* The two schemas considered are:
4687
4688 X: BODY; goto X
4689
4690 which unrolls to (eg) X: BODY;BODY; goto X
4691
4692 and
4693
4694 X: BODY; if (c) goto X; goto Y
4695 which trivially transforms to
4696 X: BODY; if (!c) goto Y; goto X;
4697 so it falls in the scope of the first case.
4698
4699 X and Y must be literal (guest) addresses.
4700 */
4701
calc_unroll_factor(IRSB * bb)4702 static Int calc_unroll_factor( IRSB* bb )
4703 {
4704 Int n_stmts, i;
4705
4706 n_stmts = 0;
4707 for (i = 0; i < bb->stmts_used; i++) {
4708 if (bb->stmts[i]->tag != Ist_NoOp)
4709 n_stmts++;
4710 }
4711
4712 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4713 if (vex_control.iropt_verbosity > 0)
4714 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4715 n_stmts, 8* n_stmts);
4716 return 8;
4717 }
4718 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4719 if (vex_control.iropt_verbosity > 0)
4720 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4721 n_stmts, 4* n_stmts);
4722 return 4;
4723 }
4724
4725 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4726 if (vex_control.iropt_verbosity > 0)
4727 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4728 n_stmts, 2* n_stmts);
4729 return 2;
4730 }
4731
4732 if (vex_control.iropt_verbosity > 0)
4733 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4734
4735 return 1;
4736 }
4737
4738
maybe_loop_unroll_BB(IRSB * bb0,Addr my_addr)4739 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4740 {
4741 Int i, j, jmax, n_vars;
4742 Bool xxx_known;
4743 Addr64 xxx_value, yyy_value;
4744 IRExpr* udst;
4745 IRStmt* st;
4746 IRConst* con;
4747 IRSB *bb1, *bb2;
4748 Int unroll_factor;
4749
4750 if (vex_control.iropt_unroll_thresh <= 0)
4751 return NULL;
4752
4753 /* First off, figure out if we can unroll this loop. Do this
4754 without modifying bb0. */
4755
4756 if (bb0->jumpkind != Ijk_Boring)
4757 return NULL;
4758
4759 xxx_known = False;
4760 xxx_value = 0;
4761
4762 /* Extract the next-guest address. If it isn't a literal, we
4763 have to give up. */
4764
4765 udst = bb0->next;
4766 if (udst->tag == Iex_Const
4767 && (udst->Iex.Const.con->tag == Ico_U32
4768 || udst->Iex.Const.con->tag == Ico_U64)) {
4769 /* The BB ends in a jump to a literal location. */
4770 xxx_known = True;
4771 xxx_value = udst->Iex.Const.con->tag == Ico_U64
4772 ? udst->Iex.Const.con->Ico.U64
4773 : (Addr64)(udst->Iex.Const.con->Ico.U32);
4774 }
4775
4776 if (!xxx_known)
4777 return NULL;
4778
4779 /* Now we know the BB ends to a jump to a literal location. If
4780 it's a jump to itself (viz, idiom #1), move directly to the
4781 unrolling stage, first cloning the bb so the original isn't
4782 modified. */
4783 if (xxx_value == my_addr) {
4784 unroll_factor = calc_unroll_factor( bb0 );
4785 if (unroll_factor < 2)
4786 return NULL;
4787 bb1 = deepCopyIRSB( bb0 );
4788 bb0 = NULL;
4789 udst = NULL; /* is now invalid */
4790 goto do_unroll;
4791 }
4792
4793 /* Search for the second idiomatic form:
4794 X: BODY; if (c) goto X; goto Y
4795 We know Y, but need to establish that the last stmt
4796 is 'if (c) goto X'.
4797 */
4798 yyy_value = xxx_value;
4799 for (i = bb0->stmts_used-1; i >= 0; i--)
4800 if (bb0->stmts[i])
4801 break;
4802
4803 if (i < 0)
4804 return NULL; /* block with no stmts. Strange. */
4805
4806 st = bb0->stmts[i];
4807 if (st->tag != Ist_Exit)
4808 return NULL;
4809 if (st->Ist.Exit.jk != Ijk_Boring)
4810 return NULL;
4811
4812 con = st->Ist.Exit.dst;
4813 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4814
4815 xxx_value = con->tag == Ico_U64
4816 ? st->Ist.Exit.dst->Ico.U64
4817 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4818
4819 /* If this assertion fails, we have some kind of type error. */
4820 vassert(con->tag == udst->Iex.Const.con->tag);
4821
4822 if (xxx_value != my_addr)
4823 /* We didn't find either idiom. Give up. */
4824 return NULL;
4825
4826 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4827 yyy values (which makes it look like idiom #1), and go into
4828 unrolling proper. This means finding (again) the last stmt, in
4829 the copied BB. */
4830
4831 unroll_factor = calc_unroll_factor( bb0 );
4832 if (unroll_factor < 2)
4833 return NULL;
4834
4835 bb1 = deepCopyIRSB( bb0 );
4836 bb0 = NULL;
4837 udst = NULL; /* is now invalid */
4838 for (i = bb1->stmts_used-1; i >= 0; i--)
4839 if (bb1->stmts[i])
4840 break;
4841
4842 /* The next bunch of assertions should be true since we already
4843 found and checked the last stmt in the original bb. */
4844
4845 vassert(i >= 0);
4846
4847 st = bb1->stmts[i];
4848 vassert(st->tag == Ist_Exit);
4849
4850 con = st->Ist.Exit.dst;
4851 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4852
4853 udst = bb1->next;
4854 vassert(udst->tag == Iex_Const);
4855 vassert(udst->Iex.Const.con->tag == Ico_U32
4856 || udst->Iex.Const.con->tag == Ico_U64);
4857 vassert(con->tag == udst->Iex.Const.con->tag);
4858
4859 /* switch the xxx and yyy fields around */
4860 if (con->tag == Ico_U64) {
4861 udst->Iex.Const.con->Ico.U64 = xxx_value;
4862 con->Ico.U64 = yyy_value;
4863 } else {
4864 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
4865 con->Ico.U32 = (UInt)yyy_value;
4866 }
4867
4868 /* negate the test condition */
4869 st->Ist.Exit.guard
4870 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
4871
4872 /* --- The unroller proper. Both idioms are by now --- */
4873 /* --- now converted to idiom 1. --- */
4874
4875 do_unroll:
4876
4877 vassert(unroll_factor == 2
4878 || unroll_factor == 4
4879 || unroll_factor == 8);
4880
4881 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
4882 for (j = 1; j <= jmax; j++) {
4883
4884 n_vars = bb1->tyenv->types_used;
4885
4886 bb2 = deepCopyIRSB(bb1);
4887 for (i = 0; i < n_vars; i++)
4888 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
4889
4890 for (i = 0; i < bb2->stmts_used; i++) {
4891 /* deltaIRStmt destructively modifies the stmt, but
4892 that's OK since bb2 is a complete fresh copy of bb1. */
4893 deltaIRStmt(bb2->stmts[i], n_vars);
4894 addStmtToIRSB(bb1, bb2->stmts[i]);
4895 }
4896 }
4897
4898 if (DEBUG_IROPT) {
4899 vex_printf("\nUNROLLED (%lx)\n", my_addr);
4900 ppIRSB(bb1);
4901 vex_printf("\n");
4902 }
4903
4904 /* Flattening; sigh. The unroller succeeds in breaking flatness
4905 by negating the test condition. This should be fixed properly.
4906 For the moment use this shotgun approach. */
4907 return flatten_BB(bb1);
4908 }
4909
4910
4911 /*---------------------------------------------------------------*/
4912 /*--- The tree builder ---*/
4913 /*---------------------------------------------------------------*/
4914
4915 /* This isn't part of IR optimisation. Really it's a pass done prior
4916 to instruction selection, which improves the code that the
4917 instruction selector can produce. */
4918
4919 /* --- The 'tmp' environment is the central data structure here --- */
4920
4921 /* The number of outstanding bindings we're prepared to track.
4922 The number of times the env becomes full and we have to dump
4923 the oldest binding (hence reducing code quality) falls very
4924 rapidly as the env size increases. 8 gives reasonable performance
4925 under most circumstances. */
4926 #define A_NENV 10
4927
4928 /* An interval. Used to record the bytes in the guest state accessed
4929 by a Put[I] statement or by (one or more) Get[I] expression(s). In
4930 case of several Get[I] expressions, the lower/upper bounds are recorded.
4931 This is conservative but cheap.
4932 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
4933 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
4934 offset 200 would be recorded as [100,203] */
4935 typedef
4936 struct {
4937 Bool present;
4938 Int low;
4939 Int high;
4940 }
4941 Interval;
4942
4943 static inline Bool
intervals_overlap(Interval i1,Interval i2)4944 intervals_overlap(Interval i1, Interval i2)
4945 {
4946 return (i1.low >= i2.low && i1.low <= i2.high) ||
4947 (i2.low >= i1.low && i2.low <= i1.high);
4948 }
4949
4950 static inline void
update_interval(Interval * i,Int low,Int high)4951 update_interval(Interval *i, Int low, Int high)
4952 {
4953 vassert(low <= high);
4954
4955 if (i->present) {
4956 if (low < i->low) i->low = low;
4957 if (high > i->high) i->high = high;
4958 } else {
4959 i->present = True;
4960 i->low = low;
4961 i->high = high;
4962 }
4963 }
4964
4965
4966 /* bindee == NULL === slot is not in use
4967 bindee != NULL === slot is in use
4968 */
4969 typedef
4970 struct {
4971 IRTemp binder;
4972 IRExpr* bindee;
4973 Bool doesLoad;
4974 /* Record the bytes of the guest state BINDEE reads from. */
4975 Interval getInterval;
4976 }
4977 ATmpInfo;
4978
4979 __attribute__((unused))
ppAEnv(ATmpInfo * env)4980 static void ppAEnv ( ATmpInfo* env )
4981 {
4982 Int i;
4983 for (i = 0; i < A_NENV; i++) {
4984 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
4985 if (env[i].bindee)
4986 ppIRExpr(env[i].bindee);
4987 else
4988 vex_printf("(null)");
4989 vex_printf("\n");
4990 }
4991 }
4992
4993 /* --- Tree-traversal fns --- */
4994
4995 /* Traverse an expr, and detect if any part of it reads memory or does
4996 a Get. Be careful ... this really controls how much the
4997 tree-builder can reorder the code, so getting it right is critical.
4998 */
setHints_Expr(Bool * doesLoad,Interval * getInterval,IRExpr * e)4999 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5000 {
5001 Int i;
5002 switch (e->tag) {
5003 case Iex_CCall:
5004 for (i = 0; e->Iex.CCall.args[i]; i++)
5005 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5006 return;
5007 case Iex_ITE:
5008 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5009 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5010 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5011 return;
5012 case Iex_Qop:
5013 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5014 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5015 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5016 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5017 return;
5018 case Iex_Triop:
5019 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5020 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5021 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5022 return;
5023 case Iex_Binop:
5024 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5025 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5026 return;
5027 case Iex_Unop:
5028 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5029 return;
5030 case Iex_Load:
5031 *doesLoad = True;
5032 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5033 return;
5034 case Iex_Get: {
5035 Int low = e->Iex.Get.offset;
5036 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5037 update_interval(getInterval, low, high);
5038 return;
5039 }
5040 case Iex_GetI: {
5041 IRRegArray *descr = e->Iex.GetI.descr;
5042 Int size = sizeofIRType(descr->elemTy);
5043 Int low = descr->base;
5044 Int high = low + descr->nElems * size - 1;
5045 update_interval(getInterval, low, high);
5046 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5047 return;
5048 }
5049 case Iex_RdTmp:
5050 case Iex_Const:
5051 return;
5052 default:
5053 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5054 vpanic("setHints_Expr");
5055 }
5056 }
5057
5058
5059 /* Add a binding to the front of the env and slide all the rest
5060 backwards. It should be the case that the last slot is free. */
addToEnvFront(ATmpInfo * env,IRTemp binder,IRExpr * bindee)5061 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5062 {
5063 Int i;
5064 vassert(env[A_NENV-1].bindee == NULL);
5065 for (i = A_NENV-1; i >= 1; i--)
5066 env[i] = env[i-1];
5067 env[0].binder = binder;
5068 env[0].bindee = bindee;
5069 env[0].doesLoad = False; /* filled in later */
5070 env[0].getInterval.present = False; /* filled in later */
5071 env[0].getInterval.low = -1; /* filled in later */
5072 env[0].getInterval.high = -1; /* filled in later */
5073 }
5074
5075 /* Given uses :: array of UShort, indexed by IRTemp
5076 Add the use-occurrences of temps in this expression
5077 to the env.
5078 */
aoccCount_Expr(UShort * uses,IRExpr * e)5079 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5080 {
5081 Int i;
5082
5083 switch (e->tag) {
5084
5085 case Iex_RdTmp: /* the only interesting case */
5086 uses[e->Iex.RdTmp.tmp]++;
5087 return;
5088
5089 case Iex_ITE:
5090 aoccCount_Expr(uses, e->Iex.ITE.cond);
5091 aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5092 aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5093 return;
5094
5095 case Iex_Qop:
5096 aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5097 aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5098 aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5099 aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5100 return;
5101
5102 case Iex_Triop:
5103 aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5104 aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5105 aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5106 return;
5107
5108 case Iex_Binop:
5109 aoccCount_Expr(uses, e->Iex.Binop.arg1);
5110 aoccCount_Expr(uses, e->Iex.Binop.arg2);
5111 return;
5112
5113 case Iex_Unop:
5114 aoccCount_Expr(uses, e->Iex.Unop.arg);
5115 return;
5116
5117 case Iex_Load:
5118 aoccCount_Expr(uses, e->Iex.Load.addr);
5119 return;
5120
5121 case Iex_CCall:
5122 for (i = 0; e->Iex.CCall.args[i]; i++)
5123 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5124 return;
5125
5126 case Iex_GetI:
5127 aoccCount_Expr(uses, e->Iex.GetI.ix);
5128 return;
5129
5130 case Iex_Const:
5131 case Iex_Get:
5132 return;
5133
5134 default:
5135 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5136 vpanic("aoccCount_Expr");
5137 }
5138 }
5139
5140
5141 /* Given uses :: array of UShort, indexed by IRTemp
5142 Add the use-occurrences of temps in this statement
5143 to the env.
5144 */
aoccCount_Stmt(UShort * uses,IRStmt * st)5145 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5146 {
5147 Int i;
5148 IRDirty* d;
5149 IRCAS* cas;
5150 switch (st->tag) {
5151 case Ist_AbiHint:
5152 aoccCount_Expr(uses, st->Ist.AbiHint.base);
5153 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5154 return;
5155 case Ist_WrTmp:
5156 aoccCount_Expr(uses, st->Ist.WrTmp.data);
5157 return;
5158 case Ist_Put:
5159 aoccCount_Expr(uses, st->Ist.Put.data);
5160 return;
5161 case Ist_PutI:
5162 aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5163 aoccCount_Expr(uses, st->Ist.PutI.details->data);
5164 return;
5165 case Ist_Store:
5166 aoccCount_Expr(uses, st->Ist.Store.addr);
5167 aoccCount_Expr(uses, st->Ist.Store.data);
5168 return;
5169 case Ist_StoreG: {
5170 IRStoreG* sg = st->Ist.StoreG.details;
5171 aoccCount_Expr(uses, sg->addr);
5172 aoccCount_Expr(uses, sg->data);
5173 aoccCount_Expr(uses, sg->guard);
5174 return;
5175 }
5176 case Ist_LoadG: {
5177 IRLoadG* lg = st->Ist.LoadG.details;
5178 aoccCount_Expr(uses, lg->addr);
5179 aoccCount_Expr(uses, lg->alt);
5180 aoccCount_Expr(uses, lg->guard);
5181 return;
5182 }
5183 case Ist_CAS:
5184 cas = st->Ist.CAS.details;
5185 aoccCount_Expr(uses, cas->addr);
5186 if (cas->expdHi)
5187 aoccCount_Expr(uses, cas->expdHi);
5188 aoccCount_Expr(uses, cas->expdLo);
5189 if (cas->dataHi)
5190 aoccCount_Expr(uses, cas->dataHi);
5191 aoccCount_Expr(uses, cas->dataLo);
5192 return;
5193 case Ist_LLSC:
5194 aoccCount_Expr(uses, st->Ist.LLSC.addr);
5195 if (st->Ist.LLSC.storedata)
5196 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5197 return;
5198 case Ist_Dirty:
5199 d = st->Ist.Dirty.details;
5200 if (d->mFx != Ifx_None)
5201 aoccCount_Expr(uses, d->mAddr);
5202 aoccCount_Expr(uses, d->guard);
5203 for (i = 0; d->args[i]; i++) {
5204 IRExpr* arg = d->args[i];
5205 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5206 aoccCount_Expr(uses, arg);
5207 }
5208 return;
5209 case Ist_NoOp:
5210 case Ist_IMark:
5211 case Ist_MBE:
5212 return;
5213 case Ist_Exit:
5214 aoccCount_Expr(uses, st->Ist.Exit.guard);
5215 return;
5216 default:
5217 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5218 vpanic("aoccCount_Stmt");
5219 }
5220 }
5221
5222 /* Look up a binding for tmp in the env. If found, return the bound
5223 expression, and set the env's binding to NULL so it is marked as
5224 used. If not found, return NULL. */
5225
atbSubst_Temp(ATmpInfo * env,IRTemp tmp)5226 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5227 {
5228 Int i;
5229 for (i = 0; i < A_NENV; i++) {
5230 if (env[i].binder == tmp && env[i].bindee != NULL) {
5231 IRExpr* bindee = env[i].bindee;
5232 env[i].bindee = NULL;
5233 return bindee;
5234 }
5235 }
5236 return NULL;
5237 }
5238
5239 /* Traverse e, looking for temps. For each observed temp, see if env
5240 contains a binding for the temp, and if so return the bound value.
5241 The env has the property that any binding it holds is
5242 'single-shot', so once a binding is used, it is marked as no longer
5243 available, by setting its .bindee field to NULL. */
5244
is_Unop(IRExpr * e,IROp op)5245 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5246 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5247 }
is_Binop(IRExpr * e,IROp op)5248 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5249 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5250 }
5251
fold_IRExpr_Binop(IROp op,IRExpr * a1,IRExpr * a2)5252 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5253 {
5254 switch (op) {
5255 case Iop_Or32:
5256 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5257 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5258 return IRExpr_Unop( Iop_CmpwNEZ32,
5259 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5260 a2->Iex.Unop.arg ) );
5261 break;
5262
5263 case Iop_CmpNE32:
5264 /* Since X has type Ity_I1 we can simplify:
5265 CmpNE32(1Uto32(X),0)) ==> X */
5266 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5267 return a1->Iex.Unop.arg;
5268 break;
5269
5270 default:
5271 break;
5272 }
5273 /* no reduction rule applies */
5274 return IRExpr_Binop( op, a1, a2 );
5275 }
5276
fold_IRExpr_Unop(IROp op,IRExpr * aa)5277 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5278 {
5279 switch (op) {
5280 case Iop_CmpwNEZ64:
5281 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5282 if (is_Unop(aa, Iop_CmpwNEZ64))
5283 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5284 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5285 if (is_Binop(aa, Iop_Or64)
5286 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5287 return fold_IRExpr_Unop(
5288 Iop_CmpwNEZ64,
5289 IRExpr_Binop(Iop_Or64,
5290 aa->Iex.Binop.arg1->Iex.Unop.arg,
5291 aa->Iex.Binop.arg2));
5292 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5293 if (is_Binop(aa, Iop_Or64)
5294 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5295 return fold_IRExpr_Unop(
5296 Iop_CmpwNEZ64,
5297 IRExpr_Binop(Iop_Or64,
5298 aa->Iex.Binop.arg1,
5299 aa->Iex.Binop.arg2->Iex.Unop.arg));
5300 break;
5301 case Iop_CmpNEZ64:
5302 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5303 if (is_Unop(aa, Iop_Left64))
5304 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5305 /* CmpNEZ64( 1Uto64(X) ) --> X */
5306 if (is_Unop(aa, Iop_1Uto64))
5307 return aa->Iex.Unop.arg;
5308 break;
5309 case Iop_CmpwNEZ32:
5310 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5311 if (is_Unop(aa, Iop_CmpwNEZ32))
5312 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5313 break;
5314 case Iop_CmpNEZ32:
5315 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5316 if (is_Unop(aa, Iop_Left32))
5317 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5318 /* CmpNEZ32( 1Uto32(X) ) --> X */
5319 if (is_Unop(aa, Iop_1Uto32))
5320 return aa->Iex.Unop.arg;
5321 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5322 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5323 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5324 break;
5325 case Iop_CmpNEZ8:
5326 /* CmpNEZ8( 1Uto8(X) ) --> X */
5327 if (is_Unop(aa, Iop_1Uto8))
5328 return aa->Iex.Unop.arg;
5329 break;
5330 case Iop_Left32:
5331 /* Left32( Left32(x) ) --> Left32(x) */
5332 if (is_Unop(aa, Iop_Left32))
5333 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5334 break;
5335 case Iop_Left64:
5336 /* Left64( Left64(x) ) --> Left64(x) */
5337 if (is_Unop(aa, Iop_Left64))
5338 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5339 break;
5340 case Iop_ZeroHI64ofV128:
5341 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5342 if (is_Unop(aa, Iop_ZeroHI64ofV128))
5343 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5344 break;
5345 case Iop_32to1:
5346 /* 32to1( 1Uto32 ( x ) ) --> x */
5347 if (is_Unop(aa, Iop_1Uto32))
5348 return aa->Iex.Unop.arg;
5349 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5350 if (is_Unop(aa, Iop_CmpwNEZ32))
5351 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5352 break;
5353 case Iop_64to1:
5354 /* 64to1( 1Uto64 ( x ) ) --> x */
5355 if (is_Unop(aa, Iop_1Uto64))
5356 return aa->Iex.Unop.arg;
5357 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5358 if (is_Unop(aa, Iop_CmpwNEZ64))
5359 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5360 break;
5361 case Iop_64to32:
5362 /* 64to32( 32Uto64 ( x )) --> x */
5363 if (is_Unop(aa, Iop_32Uto64))
5364 return aa->Iex.Unop.arg;
5365 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5366 if (is_Unop(aa, Iop_8Uto64))
5367 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5368 break;
5369
5370 case Iop_32Uto64:
5371 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5372 if (is_Unop(aa, Iop_8Uto32))
5373 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5374 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5375 if (is_Unop(aa, Iop_16Uto32))
5376 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5377 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5378 --> Shr64( 32Uto64(64to32(x)), sh )) */
5379 if (is_Unop(aa, Iop_64to32)
5380 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5381 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5382 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5383 Iop_64to32)) {
5384 return aa->Iex.Unop.arg;
5385 }
5386 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5387 --> 32Uto64(64to32( Shl64( x, sh )) */
5388 if (is_Unop(aa, Iop_64to32)
5389 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5390 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5391 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5392 Iop_64to32)) {
5393 return
5394 IRExpr_Unop(
5395 Iop_32Uto64,
5396 IRExpr_Unop(
5397 Iop_64to32,
5398 IRExpr_Binop(
5399 Iop_Shl64,
5400 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5401 aa->Iex.Unop.arg->Iex.Binop.arg2
5402 )));
5403 }
5404 break;
5405
5406 case Iop_1Sto32:
5407 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5408 if (is_Unop(aa, Iop_CmpNEZ8)
5409 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5410 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5411 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5412 Iop_CmpNEZ32)) {
5413 return IRExpr_Unop( Iop_CmpwNEZ32,
5414 aa->Iex.Unop.arg->Iex.Unop.arg
5415 ->Iex.Unop.arg->Iex.Unop.arg);
5416 }
5417 break;
5418
5419 default:
5420 break;
5421 }
5422 /* no reduction rule applies */
5423 return IRExpr_Unop( op, aa );
5424 }
5425
atbSubst_Expr(ATmpInfo * env,IRExpr * e)5426 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5427 {
5428 IRExpr* e2;
5429 IRExpr** args2;
5430 Int i;
5431
5432 switch (e->tag) {
5433
5434 case Iex_CCall:
5435 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5436 for (i = 0; args2[i]; i++)
5437 args2[i] = atbSubst_Expr(env,args2[i]);
5438 return IRExpr_CCall(
5439 e->Iex.CCall.cee,
5440 e->Iex.CCall.retty,
5441 args2
5442 );
5443 case Iex_RdTmp:
5444 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5445 return e2 ? e2 : e;
5446 case Iex_ITE:
5447 return IRExpr_ITE(
5448 atbSubst_Expr(env, e->Iex.ITE.cond),
5449 atbSubst_Expr(env, e->Iex.ITE.iftrue),
5450 atbSubst_Expr(env, e->Iex.ITE.iffalse)
5451 );
5452 case Iex_Qop:
5453 return IRExpr_Qop(
5454 e->Iex.Qop.details->op,
5455 atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5456 atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5457 atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5458 atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5459 );
5460 case Iex_Triop:
5461 return IRExpr_Triop(
5462 e->Iex.Triop.details->op,
5463 atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5464 atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5465 atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5466 );
5467 case Iex_Binop:
5468 return fold_IRExpr_Binop(
5469 e->Iex.Binop.op,
5470 atbSubst_Expr(env, e->Iex.Binop.arg1),
5471 atbSubst_Expr(env, e->Iex.Binop.arg2)
5472 );
5473 case Iex_Unop:
5474 return fold_IRExpr_Unop(
5475 e->Iex.Unop.op,
5476 atbSubst_Expr(env, e->Iex.Unop.arg)
5477 );
5478 case Iex_Load:
5479 return IRExpr_Load(
5480 e->Iex.Load.end,
5481 e->Iex.Load.ty,
5482 atbSubst_Expr(env, e->Iex.Load.addr)
5483 );
5484 case Iex_GetI:
5485 return IRExpr_GetI(
5486 e->Iex.GetI.descr,
5487 atbSubst_Expr(env, e->Iex.GetI.ix),
5488 e->Iex.GetI.bias
5489 );
5490 case Iex_Const:
5491 case Iex_Get:
5492 return e;
5493 default:
5494 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5495 vpanic("atbSubst_Expr");
5496 }
5497 }
5498
5499 /* Same deal as atbSubst_Expr, except for stmts. */
5500
atbSubst_Stmt(ATmpInfo * env,IRStmt * st)5501 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5502 {
5503 Int i;
5504 IRDirty *d, *d2;
5505 IRCAS *cas, *cas2;
5506 IRPutI *puti, *puti2;
5507
5508 switch (st->tag) {
5509 case Ist_AbiHint:
5510 return IRStmt_AbiHint(
5511 atbSubst_Expr(env, st->Ist.AbiHint.base),
5512 st->Ist.AbiHint.len,
5513 atbSubst_Expr(env, st->Ist.AbiHint.nia)
5514 );
5515 case Ist_Store:
5516 return IRStmt_Store(
5517 st->Ist.Store.end,
5518 atbSubst_Expr(env, st->Ist.Store.addr),
5519 atbSubst_Expr(env, st->Ist.Store.data)
5520 );
5521 case Ist_StoreG: {
5522 IRStoreG* sg = st->Ist.StoreG.details;
5523 return IRStmt_StoreG(sg->end,
5524 atbSubst_Expr(env, sg->addr),
5525 atbSubst_Expr(env, sg->data),
5526 atbSubst_Expr(env, sg->guard));
5527 }
5528 case Ist_LoadG: {
5529 IRLoadG* lg = st->Ist.LoadG.details;
5530 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5531 atbSubst_Expr(env, lg->addr),
5532 atbSubst_Expr(env, lg->alt),
5533 atbSubst_Expr(env, lg->guard));
5534 }
5535 case Ist_WrTmp:
5536 return IRStmt_WrTmp(
5537 st->Ist.WrTmp.tmp,
5538 atbSubst_Expr(env, st->Ist.WrTmp.data)
5539 );
5540 case Ist_Put:
5541 return IRStmt_Put(
5542 st->Ist.Put.offset,
5543 atbSubst_Expr(env, st->Ist.Put.data)
5544 );
5545 case Ist_PutI:
5546 puti = st->Ist.PutI.details;
5547 puti2 = mkIRPutI(puti->descr,
5548 atbSubst_Expr(env, puti->ix),
5549 puti->bias,
5550 atbSubst_Expr(env, puti->data));
5551 return IRStmt_PutI(puti2);
5552
5553 case Ist_Exit:
5554 return IRStmt_Exit(
5555 atbSubst_Expr(env, st->Ist.Exit.guard),
5556 st->Ist.Exit.jk,
5557 st->Ist.Exit.dst,
5558 st->Ist.Exit.offsIP
5559 );
5560 case Ist_IMark:
5561 return IRStmt_IMark(st->Ist.IMark.addr,
5562 st->Ist.IMark.len,
5563 st->Ist.IMark.delta);
5564 case Ist_NoOp:
5565 return IRStmt_NoOp();
5566 case Ist_MBE:
5567 return IRStmt_MBE(st->Ist.MBE.event);
5568 case Ist_CAS:
5569 cas = st->Ist.CAS.details;
5570 cas2 = mkIRCAS(
5571 cas->oldHi, cas->oldLo, cas->end,
5572 atbSubst_Expr(env, cas->addr),
5573 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5574 atbSubst_Expr(env, cas->expdLo),
5575 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5576 atbSubst_Expr(env, cas->dataLo)
5577 );
5578 return IRStmt_CAS(cas2);
5579 case Ist_LLSC:
5580 return IRStmt_LLSC(
5581 st->Ist.LLSC.end,
5582 st->Ist.LLSC.result,
5583 atbSubst_Expr(env, st->Ist.LLSC.addr),
5584 st->Ist.LLSC.storedata
5585 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5586 );
5587 case Ist_Dirty:
5588 d = st->Ist.Dirty.details;
5589 d2 = emptyIRDirty();
5590 *d2 = *d;
5591 if (d2->mFx != Ifx_None)
5592 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5593 d2->guard = atbSubst_Expr(env, d2->guard);
5594 for (i = 0; d2->args[i]; i++) {
5595 IRExpr* arg = d2->args[i];
5596 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
5597 d2->args[i] = atbSubst_Expr(env, arg);
5598 }
5599 return IRStmt_Dirty(d2);
5600 default:
5601 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5602 vpanic("atbSubst_Stmt");
5603 }
5604 }
5605
5606 inline
dirty_helper_stores(const IRDirty * d)5607 static Bool dirty_helper_stores ( const IRDirty *d )
5608 {
5609 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5610 }
5611
5612 inline
dirty_helper_puts(const IRDirty * d,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5613 static Interval dirty_helper_puts (
5614 const IRDirty *d,
5615 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5616 VexRegisterUpdates pxControl,
5617 /*OUT*/Bool *requiresPreciseMemExns
5618 )
5619 {
5620 Int i;
5621 Interval interval;
5622
5623 /* Passing the guest state pointer opens the door to modifying the
5624 guest state under the covers. It's not allowed, but let's be
5625 extra conservative and assume the worst. */
5626 for (i = 0; d->args[i]; i++) {
5627 if (UNLIKELY(d->args[i]->tag == Iex_BBPTR)) {
5628 *requiresPreciseMemExns = True;
5629 /* Assume all guest state is written. */
5630 interval.present = True;
5631 interval.low = 0;
5632 interval.high = 0x7FFFFFFF;
5633 return interval;
5634 }
5635 }
5636
5637 /* Check the side effects on the guest state */
5638 interval.present = False;
5639 interval.low = interval.high = -1;
5640 *requiresPreciseMemExns = False;
5641
5642 for (i = 0; i < d->nFxState; ++i) {
5643 if (d->fxState[i].fx != Ifx_Read) {
5644 Int offset = d->fxState[i].offset;
5645 Int size = d->fxState[i].size;
5646 Int nRepeats = d->fxState[i].nRepeats;
5647 Int repeatLen = d->fxState[i].repeatLen;
5648
5649 if (preciseMemExnsFn(offset,
5650 offset + nRepeats * repeatLen + size - 1,
5651 pxControl)) {
5652 *requiresPreciseMemExns = True;
5653 }
5654 update_interval(&interval, offset,
5655 offset + nRepeats * repeatLen + size - 1);
5656 }
5657 }
5658
5659 return interval;
5660 }
5661
5662 /* Return an interval if st modifies the guest state. Via
5663 requiresPreciseMemExns return whether or not that modification
5664 requires precise exceptions. */
stmt_modifies_guest_state(IRSB * bb,const IRStmt * st,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Bool * requiresPreciseMemExns)5665 static Interval stmt_modifies_guest_state (
5666 IRSB *bb, const IRStmt *st,
5667 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5668 VexRegisterUpdates pxControl,
5669 /*OUT*/Bool *requiresPreciseMemExns
5670 )
5671 {
5672 Interval interval;
5673
5674 switch (st->tag) {
5675 case Ist_Put: {
5676 Int offset = st->Ist.Put.offset;
5677 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5678
5679 *requiresPreciseMemExns
5680 = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5681 interval.present = True;
5682 interval.low = offset;
5683 interval.high = offset + size - 1;
5684 return interval;
5685 }
5686
5687 case Ist_PutI: {
5688 IRRegArray *descr = st->Ist.PutI.details->descr;
5689 Int offset = descr->base;
5690 Int size = sizeofIRType(descr->elemTy);
5691
5692 /* We quietly assume here that all segments are contiguous and there
5693 are no holes. This is to avoid a loop. The assumption is conservative
5694 in the sense that we might report that precise memory exceptions are
5695 needed when in fact they are not. */
5696 *requiresPreciseMemExns
5697 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5698 pxControl);
5699 interval.present = True;
5700 interval.low = offset;
5701 interval.high = offset + descr->nElems * size - 1;
5702 return interval;
5703 }
5704
5705 case Ist_Dirty:
5706 return dirty_helper_puts(st->Ist.Dirty.details,
5707 preciseMemExnsFn, pxControl,
5708 requiresPreciseMemExns);
5709
5710 default:
5711 *requiresPreciseMemExns = False;
5712 interval.present = False;
5713 interval.low = -1;
5714 interval.high = -1;
5715 return interval;
5716 }
5717 }
5718
ado_treebuild_BB(IRSB * bb,Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)5719 /* notstatic */ Addr ado_treebuild_BB (
5720 IRSB* bb,
5721 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5722 VexRegisterUpdates pxControl
5723 )
5724 {
5725 Int i, j, k, m;
5726 Bool stmtStores, invalidateMe;
5727 Interval putInterval;
5728 IRStmt* st;
5729 IRStmt* st2;
5730 ATmpInfo env[A_NENV];
5731
5732 Bool max_ga_known = False;
5733 Addr max_ga = 0;
5734
5735 Int n_tmps = bb->tyenv->types_used;
5736 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5737
5738 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5739 temp. Also count occurrences in the bb->next field. Take the
5740 opportunity to also find the maximum guest address in the block,
5741 since that will be needed later for deciding when we can safely
5742 elide event checks. */
5743
5744 for (i = 0; i < n_tmps; i++)
5745 uses[i] = 0;
5746
5747 for (i = 0; i < bb->stmts_used; i++) {
5748 st = bb->stmts[i];
5749 switch (st->tag) {
5750 case Ist_NoOp:
5751 continue;
5752 case Ist_IMark: {
5753 UInt len = st->Ist.IMark.len;
5754 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5755 max_ga_known = True;
5756 if (mga > max_ga)
5757 max_ga = mga;
5758 break;
5759 }
5760 default:
5761 break;
5762 }
5763 aoccCount_Stmt( uses, st );
5764 }
5765 aoccCount_Expr(uses, bb->next );
5766
5767 # if 0
5768 for (i = 0; i < n_tmps; i++) {
5769 if (uses[i] == 0)
5770 continue;
5771 ppIRTemp( (IRTemp)i );
5772 vex_printf(" used %d\n", (Int)uses[i] );
5773 }
5774 # endif
5775
5776 /* Phase 2. Scan forwards in bb. For each statement in turn:
5777
5778 If the env is full, emit the end element. This guarantees
5779 there is at least one free slot in the following.
5780
5781 On seeing 't = E', occ(t)==1,
5782 let E'=env(E)
5783 delete this stmt
5784 add t -> E' to the front of the env
5785 Examine E' and set the hints for E' appropriately
5786 (doesLoad? doesGet?)
5787
5788 On seeing any other stmt,
5789 let stmt' = env(stmt)
5790 remove from env any 't=E' binds invalidated by stmt
5791 emit the invalidated stmts
5792 emit stmt'
5793 compact any holes in env
5794 by sliding entries towards the front
5795
5796 Finally, apply env to bb->next.
5797 */
5798
5799 for (i = 0; i < A_NENV; i++) {
5800 env[i].bindee = NULL;
5801 env[i].binder = IRTemp_INVALID;
5802 }
5803
5804 /* The stmts in bb are being reordered, and we are guaranteed to
5805 end up with no more than the number we started with. Use i to
5806 be the cursor of the current stmt examined and j <= i to be that
5807 for the current stmt being written.
5808 */
5809 j = 0;
5810 for (i = 0; i < bb->stmts_used; i++) {
5811
5812 st = bb->stmts[i];
5813 if (st->tag == Ist_NoOp)
5814 continue;
5815
5816 /* Ensure there's at least one space in the env, by emitting
5817 the oldest binding if necessary. */
5818 if (env[A_NENV-1].bindee != NULL) {
5819 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5820 env[A_NENV-1].bindee );
5821 j++;
5822 vassert(j <= i);
5823 env[A_NENV-1].bindee = NULL;
5824 }
5825
5826 /* Consider current stmt. */
5827 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5828 IRExpr *e, *e2;
5829
5830 /* optional extra: dump dead bindings as we find them.
5831 Removes the need for a prior dead-code removal pass. */
5832 if (uses[st->Ist.WrTmp.tmp] == 0) {
5833 if (0) vex_printf("DEAD binding\n");
5834 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5835 }
5836 vassert(uses[st->Ist.WrTmp.tmp] == 1);
5837
5838 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
5839 actions. */
5840 e = st->Ist.WrTmp.data;
5841 e2 = atbSubst_Expr(env, e);
5842 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
5843 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
5844 /* don't advance j, as we are deleting this stmt and instead
5845 holding it temporarily in the env. */
5846 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5847 }
5848
5849 /* we get here for any other kind of statement. */
5850 /* 'use up' any bindings required by the current statement. */
5851 st2 = atbSubst_Stmt(env, st);
5852
5853 /* Now, before this stmt, dump any bindings in env that it
5854 invalidates. These need to be dumped in the order in which
5855 they originally entered env -- that means from oldest to
5856 youngest. */
5857
5858 /* putInterval/stmtStores characterise what the stmt under
5859 consideration does, or might do (sidely safe @ True). */
5860
5861 Bool putRequiresPreciseMemExns;
5862 putInterval = stmt_modifies_guest_state(
5863 bb, st, preciseMemExnsFn, pxControl,
5864 &putRequiresPreciseMemExns
5865 );
5866
5867 /* be True if this stmt writes memory or might do (==> we don't
5868 want to reorder other loads or stores relative to it). Also,
5869 both LL and SC fall under this classification, since we
5870 really ought to be conservative and not reorder any other
5871 memory transactions relative to them. */
5872 stmtStores
5873 = toBool( st->tag == Ist_Store
5874 || (st->tag == Ist_Dirty
5875 && dirty_helper_stores(st->Ist.Dirty.details))
5876 || st->tag == Ist_LLSC
5877 || st->tag == Ist_CAS );
5878
5879 for (k = A_NENV-1; k >= 0; k--) {
5880 if (env[k].bindee == NULL)
5881 continue;
5882 /* Compare the actions of this stmt with the actions of
5883 binding 'k', to see if they invalidate the binding. */
5884 invalidateMe
5885 = toBool(
5886 /* a store invalidates loaded data */
5887 (env[k].doesLoad && stmtStores)
5888 /* a put invalidates get'd data, if they overlap */
5889 || ((env[k].getInterval.present && putInterval.present) &&
5890 intervals_overlap(env[k].getInterval, putInterval))
5891 /* a put invalidates loaded data. That means, in essense, that
5892 a load expression cannot be substituted into a statement
5893 that follows the put. But there is nothing wrong doing so
5894 except when the put statement requries precise exceptions.
5895 Think of a load that is moved past a put where the put
5896 updates the IP in the guest state. If the load generates
5897 a segfault, the wrong address (line number) would be
5898 reported. */
5899 || (env[k].doesLoad && putInterval.present &&
5900 putRequiresPreciseMemExns)
5901 /* probably overly conservative: a memory bus event
5902 invalidates absolutely everything, so that all
5903 computation prior to it is forced to complete before
5904 proceeding with the event (fence,lock,unlock). */
5905 || st->tag == Ist_MBE
5906 /* also be (probably overly) paranoid re AbiHints */
5907 || st->tag == Ist_AbiHint
5908 );
5909 if (invalidateMe) {
5910 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
5911 j++;
5912 vassert(j <= i);
5913 env[k].bindee = NULL;
5914 }
5915 }
5916
5917 /* Slide in-use entries in env up to the front */
5918 m = 0;
5919 for (k = 0; k < A_NENV; k++) {
5920 if (env[k].bindee != NULL) {
5921 env[m] = env[k];
5922 m++;
5923 }
5924 }
5925 for (m = m; m < A_NENV; m++) {
5926 env[m].bindee = NULL;
5927 }
5928
5929 /* finally, emit the substituted statement */
5930 bb->stmts[j] = st2;
5931 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
5932 j++;
5933
5934 vassert(j <= i+1);
5935 } /* for each stmt in the original bb ... */
5936
5937 /* Finally ... substitute the ->next field as much as possible, and
5938 dump any left-over bindings. Hmm. Perhaps there should be no
5939 left over bindings? Or any left-over bindings are
5940 by definition dead? */
5941 bb->next = atbSubst_Expr(env, bb->next);
5942 bb->stmts_used = j;
5943
5944 return max_ga_known ? max_ga : ~(Addr)0;
5945 }
5946
5947
5948 /*---------------------------------------------------------------*/
5949 /*--- MSVC specific transformation hacks ---*/
5950 /*---------------------------------------------------------------*/
5951
5952 /* The purpose of all this is to find MSVC's idiom for non-constant
5953 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
5954 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
5955 generates a lot of false positives from the MSVC version because it
5956 doesn't understand that XORing an undefined bit with itself gives a
5957 defined result.
5958
5959 This isn't a problem for the simple case "x ^ x", because iropt
5960 folds it to a constant zero before Memcheck ever sees it. But in
5961 this case we have an intervening "& c" which defeats the simple
5962 case. So we have to carefully inspect all expressions rooted at an
5963 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
5964 7 other variants resulting from swapping the order of arguments to
5965 the three binary operations. If we get a match, we then replace
5966 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
5967
5968 The key difficulty is to spot the two uses of "a". To normalise
5969 the IR to maximise the chances of success, we first do a CSE pass,
5970 with CSEing of loads enabled, since the two "a" expressions may be
5971 loads, which need to be commoned up. Then we do a constant folding
5972 pass, so as to remove any tmp-to-tmp assignment chains that would
5973 make matching the original expression more difficult.
5974 */
5975
5976
5977 /* Helper function for debug printing */
5978 __attribute__((unused))
print_flat_expr(IRExpr ** env,IRExpr * e)5979 static void print_flat_expr ( IRExpr** env, IRExpr* e )
5980 {
5981 if (e == NULL) {
5982 vex_printf("?");
5983 return;
5984 }
5985 switch (e->tag) {
5986 case Iex_Binop: {
5987 ppIROp(e->Iex.Binop.op);
5988 vex_printf("(");
5989 print_flat_expr(env, e->Iex.Binop.arg1);
5990 vex_printf(",");
5991 print_flat_expr(env, e->Iex.Binop.arg2);
5992 vex_printf(")");
5993 break;
5994 }
5995 case Iex_Unop: {
5996 ppIROp(e->Iex.Unop.op);
5997 vex_printf("(");
5998 print_flat_expr(env, e->Iex.Unop.arg);
5999 vex_printf(")");
6000 break;
6001 }
6002 case Iex_RdTmp:
6003 ppIRTemp(e->Iex.RdTmp.tmp);
6004 vex_printf("=");
6005 print_flat_expr(env, chase(env, e));
6006 break;
6007 case Iex_Const:
6008 case Iex_CCall:
6009 case Iex_Load:
6010 case Iex_ITE:
6011 case Iex_Get:
6012 ppIRExpr(e);
6013 break;
6014 default:
6015 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6016 vassert(0);
6017 }
6018 }
6019
6020 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6021 or any of the other 7 variants generated by switching the order
6022 of arguments to the outer ^, the inner ^ and the &.
6023 */
spotBitfieldAssignment(IRExpr ** aa,IRExpr ** bb,IRExpr ** cc,IRExpr ** env,IRExpr * e,IROp opAND,IROp opXOR)6024 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6025 /*OUT*/IRExpr** cc,
6026 IRExpr** env, IRExpr* e,
6027 IROp opAND, IROp opXOR)
6028 {
6029 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6030 && (_e)->Iex.Binop.op == (_op))
6031 # define ISATOM(_e) isIRAtom(_e)
6032 # define STEP(_e) chase1(env, (_e))
6033 # define LL(_e) ((_e)->Iex.Binop.arg1)
6034 # define RR(_e) ((_e)->Iex.Binop.arg2)
6035
6036 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6037
6038 /* This is common to all 8 cases */
6039 if (!ISBIN(e, opXOR)) goto fail;
6040
6041 /* -----and------ */
6042 /* --xor--- */
6043 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6044 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6045 a1 = and = xor = c = a2bL = a2bR = NULL;
6046
6047 a1 = LL(e);
6048 and = STEP(RR(e));
6049 if (!ISBIN(and, opAND)) goto v34;
6050 xor = STEP(LL(and));
6051 c = RR(and);
6052 if (!ISBIN(xor, opXOR)) goto v34;
6053 a2bL = LL(xor);
6054 a2bR = RR(xor);
6055
6056 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6057 *aa = a1;
6058 *bb = a2bR;
6059 *cc = c;
6060 return 1;
6061 }
6062 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6063 *aa = a1;
6064 *bb = a2bL;
6065 *cc = c;
6066 return 2;
6067 }
6068
6069 v34:
6070 /* -----and------ */
6071 /* --xor--- */
6072 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6073 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6074 a1 = and = xor = c = a2bL = a2bR = NULL;
6075
6076 a1 = RR(e);
6077 and = STEP(LL(e));
6078 if (!ISBIN(and, opAND)) goto v56;
6079 xor = STEP(LL(and));
6080 c = RR(and);
6081 if (!ISBIN(xor, opXOR)) goto v56;
6082 a2bL = LL(xor);
6083 a2bR = RR(xor);
6084
6085 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6086 *aa = a1;
6087 *bb = a2bR;
6088 *cc = c;
6089 return 3;
6090 }
6091 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6092 *aa = a1;
6093 *bb = a2bL;
6094 *cc = c;
6095 return 4;
6096 }
6097
6098 v56:
6099 /* -----and------ */
6100 /* --xor--- */
6101 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6102 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6103 a1 = and = xor = c = a2bL = a2bR = NULL;
6104
6105 a1 = LL(e);
6106 and = STEP(RR(e));
6107 if (!ISBIN(and, opAND)) goto v78;
6108 xor = STEP(RR(and));
6109 c = LL(and);
6110 if (!ISBIN(xor, opXOR)) goto v78;
6111 a2bL = LL(xor);
6112 a2bR = RR(xor);
6113
6114 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6115 *aa = a1;
6116 *bb = a2bR;
6117 *cc = c;
6118 vassert(5-5); // ATC
6119 return 5;
6120 }
6121 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6122 *aa = a1;
6123 *bb = a2bL;
6124 *cc = c;
6125 vassert(6-6); // ATC
6126 return 6;
6127 }
6128
6129 v78:
6130 /* -----and------ */
6131 /* --xor--- */
6132 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6133 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6134 a1 = and = xor = c = a2bL = a2bR = NULL;
6135
6136 a1 = RR(e);
6137 and = STEP(LL(e));
6138 if (!ISBIN(and, opAND)) goto fail;
6139 xor = STEP(RR(and));
6140 c = LL(and);
6141 if (!ISBIN(xor, opXOR)) goto fail;
6142 a2bL = LL(xor);
6143 a2bR = RR(xor);
6144
6145 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6146 *aa = a1;
6147 *bb = a2bR;
6148 *cc = c;
6149 return 7;
6150 }
6151 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6152 *aa = a1;
6153 *bb = a2bL;
6154 *cc = c;
6155 return 8;
6156 }
6157
6158 fail:
6159 return 0;
6160
6161 # undef ISBIN
6162 # undef ISATOM
6163 # undef STEP
6164 # undef LL
6165 # undef RR
6166 }
6167
6168 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6169 variants thereof generated by switching arguments around), return
6170 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
do_XOR_TRANSFORMS_IRExpr(IRExpr ** env,IRExpr * e)6171 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6172 {
6173 if (e->tag != Iex_Binop)
6174 return NULL;
6175
6176 const HChar* tyNm = NULL;
6177 IROp opOR = Iop_INVALID;
6178 IROp opAND = Iop_INVALID;
6179 IROp opNOT = Iop_INVALID;
6180 IROp opXOR = Iop_INVALID;
6181 switch (e->Iex.Binop.op) {
6182 case Iop_Xor32:
6183 tyNm = "I32";
6184 opOR = Iop_Or32; opAND = Iop_And32;
6185 opNOT = Iop_Not32; opXOR = Iop_Xor32;
6186 break;
6187 case Iop_Xor16:
6188 tyNm = "I16";
6189 opOR = Iop_Or16; opAND = Iop_And16;
6190 opNOT = Iop_Not16; opXOR = Iop_Xor16;
6191 break;
6192 case Iop_Xor8:
6193 tyNm = "I8";
6194 opOR = Iop_Or8; opAND = Iop_And8;
6195 opNOT = Iop_Not8; opXOR = Iop_Xor8;
6196 break;
6197 default:
6198 return NULL;
6199 }
6200
6201 IRExpr* aa = NULL;
6202 IRExpr* bb = NULL;
6203 IRExpr* cc = NULL;
6204 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6205 if (variant > 0) {
6206 static UInt ctr = 0;
6207 if (0)
6208 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6209 "type %s, variant %u\n",
6210 ++ctr, tyNm, variant);
6211 /* It's vitally important that the returned aa, bb and cc are
6212 atoms -- either constants or tmps. If it's anything else
6213 (eg, a GET) then incorporating them in a tree at this point
6214 in the SB may erroneously pull them forwards (eg of a PUT
6215 that originally was after the GET) and so transform the IR
6216 wrongly. spotBitfieldAssignment should guarantee only to
6217 give us atoms, but we check here anyway. */
6218 vassert(aa && isIRAtom(aa));
6219 vassert(bb && isIRAtom(bb));
6220 vassert(cc && isIRAtom(cc));
6221 return IRExpr_Binop(
6222 opOR,
6223 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6224 IRExpr_Binop(opAND, bb, cc)
6225 );
6226 }
6227 return NULL;
6228 }
6229
6230
6231 /* SB is modified in-place. Visit all the IRExprs and, for those
6232 which are allowed to be non-atomic, perform the XOR transform if
6233 possible. This makes |sb| be non-flat, but that's ok, the caller
6234 can re-flatten it. Returns True iff any changes were made. */
do_XOR_TRANSFORM_IRSB(IRSB * sb)6235 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6236 {
6237 Int i;
6238 Bool changed = False;
6239
6240 /* Make the tmp->expr environment, so we can use it for
6241 chasing expressions. */
6242 Int n_tmps = sb->tyenv->types_used;
6243 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6244 for (i = 0; i < n_tmps; i++)
6245 env[i] = NULL;
6246
6247 for (i = 0; i < sb->stmts_used; i++) {
6248 IRStmt* st = sb->stmts[i];
6249 if (st->tag != Ist_WrTmp)
6250 continue;
6251 IRTemp t = st->Ist.WrTmp.tmp;
6252 vassert(t >= 0 && t < n_tmps);
6253 env[t] = st->Ist.WrTmp.data;
6254 }
6255
6256 for (i = 0; i < sb->stmts_used; i++) {
6257 IRStmt* st = sb->stmts[i];
6258
6259 switch (st->tag) {
6260 case Ist_AbiHint:
6261 vassert(isIRAtom(st->Ist.AbiHint.base));
6262 vassert(isIRAtom(st->Ist.AbiHint.nia));
6263 break;
6264 case Ist_Put:
6265 vassert(isIRAtom(st->Ist.Put.data));
6266 break;
6267 case Ist_PutI: {
6268 IRPutI* puti = st->Ist.PutI.details;
6269 vassert(isIRAtom(puti->ix));
6270 vassert(isIRAtom(puti->data));
6271 break;
6272 }
6273 case Ist_WrTmp: {
6274 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6275 allowed to be more than just a constant or a tmp. */
6276 IRExpr* mb_new_data
6277 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6278 if (mb_new_data) {
6279 //ppIRSB(sb);
6280 st->Ist.WrTmp.data = mb_new_data;
6281 //ppIRSB(sb);
6282 changed = True;
6283 }
6284 break;
6285 }
6286 case Ist_Store:
6287 vassert(isIRAtom(st->Ist.Store.addr));
6288 vassert(isIRAtom(st->Ist.Store.data));
6289 break;
6290 case Ist_StoreG: {
6291 IRStoreG* sg = st->Ist.StoreG.details;
6292 vassert(isIRAtom(sg->addr));
6293 vassert(isIRAtom(sg->data));
6294 vassert(isIRAtom(sg->guard));
6295 break;
6296 }
6297 case Ist_LoadG: {
6298 IRLoadG* lg = st->Ist.LoadG.details;
6299 vassert(isIRAtom(lg->addr));
6300 vassert(isIRAtom(lg->alt));
6301 vassert(isIRAtom(lg->guard));
6302 break;
6303 }
6304 case Ist_CAS: {
6305 IRCAS* cas = st->Ist.CAS.details;
6306 vassert(isIRAtom(cas->addr));
6307 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6308 vassert(isIRAtom(cas->expdLo));
6309 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6310 vassert(isIRAtom(cas->dataLo));
6311 break;
6312 }
6313 case Ist_LLSC:
6314 vassert(isIRAtom(st->Ist.LLSC.addr));
6315 if (st->Ist.LLSC.storedata)
6316 vassert(isIRAtom(st->Ist.LLSC.storedata));
6317 break;
6318 case Ist_Dirty: {
6319 IRDirty* d = st->Ist.Dirty.details;
6320 if (d->mFx != Ifx_None) {
6321 vassert(isIRAtom(d->mAddr));
6322 }
6323 vassert(isIRAtom(d->guard));
6324 for (Int j = 0; d->args[j]; j++) {
6325 IRExpr* arg = d->args[j];
6326 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
6327 vassert(isIRAtom(arg));
6328 }
6329 }
6330 break;
6331 }
6332 case Ist_IMark:
6333 case Ist_NoOp:
6334 case Ist_MBE:
6335 break;
6336 case Ist_Exit:
6337 vassert(isIRAtom(st->Ist.Exit.guard));
6338 break;
6339 default:
6340 vex_printf("\n"); ppIRStmt(st);
6341 vpanic("do_XOR_TRANSFORMS_IRSB");
6342 }
6343 }
6344
6345 vassert(isIRAtom(sb->next));
6346 return changed;
6347 }
6348
6349
do_MSVC_HACKS(IRSB * sb)6350 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6351 {
6352 // Normalise as much as we can. This is the one-and-only place
6353 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6354 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6355 if (any_cse_changes) {
6356 // CSEing might have created dead code. Remove it.
6357 sb = cprop_BB ( sb );
6358 do_deadcode_BB(sb);
6359 }
6360
6361 // Visit all atoms, do the transformation proper. bb is modified
6362 // in-place.
6363 Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6364
6365 if (changed) {
6366 // The transformation generates non-flat expressions, so we now
6367 // need to re-flatten the block.
6368 sb = flatten_BB(sb);
6369 }
6370
6371 return sb;
6372 }
6373
6374
6375 /*---------------------------------------------------------------*/
6376 /*--- iropt main ---*/
6377 /*---------------------------------------------------------------*/
6378
6379 static Bool iropt_verbose = False; /* True; */
6380
6381
6382 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6383 redundant Put removal, constant propagation, dead code removal,
6384 clean helper specialisation, and dead code removal (again).
6385 */
6386
6387
6388 static
cheap_transformations(IRSB * bb,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl)6389 IRSB* cheap_transformations (
6390 IRSB* bb,
6391 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6392 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6393 VexRegisterUpdates pxControl
6394 )
6395 {
6396 redundant_get_removal_BB ( bb );
6397 if (iropt_verbose) {
6398 vex_printf("\n========= REDUNDANT GET\n\n" );
6399 ppIRSB(bb);
6400 }
6401
6402 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6403 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6404 }
6405 if (iropt_verbose) {
6406 vex_printf("\n========= REDUNDANT PUT\n\n" );
6407 ppIRSB(bb);
6408 }
6409
6410 bb = cprop_BB ( bb );
6411 if (iropt_verbose) {
6412 vex_printf("\n========= CPROPD\n\n" );
6413 ppIRSB(bb);
6414 }
6415
6416 do_deadcode_BB ( bb );
6417 if (iropt_verbose) {
6418 vex_printf("\n========= DEAD\n\n" );
6419 ppIRSB(bb);
6420 }
6421
6422 bb = spec_helpers_BB ( bb, specHelper );
6423 do_deadcode_BB ( bb );
6424 if (iropt_verbose) {
6425 vex_printf("\n========= SPECd \n\n" );
6426 ppIRSB(bb);
6427 }
6428
6429 return bb;
6430 }
6431
6432
6433 /* Do some more expensive transformations on bb, which are aimed at
6434 optimising as much as possible in the presence of GetI and PutI. */
6435
6436 static
expensive_transformations(IRSB * bb,VexRegisterUpdates pxControl)6437 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6438 {
6439 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6440 collapse_AddSub_chains_BB( bb );
6441 do_redundant_GetI_elimination( bb );
6442 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6443 do_redundant_PutI_elimination( bb, pxControl );
6444 }
6445 do_deadcode_BB( bb );
6446 return bb;
6447 }
6448
6449
6450 /* Scan a flattened BB to look for signs that more expensive
6451 optimisations might be useful:
6452 - find out if there are any GetIs and PutIs
6453 - find out if there are any floating or vector-typed temporaries
6454 */
6455
considerExpensives(Bool * hasGetIorPutI,Bool * hasVorFtemps,IRSB * bb)6456 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6457 /*OUT*/Bool* hasVorFtemps,
6458 IRSB* bb )
6459 {
6460 Int i, j;
6461 IRStmt* st;
6462 IRDirty* d;
6463 IRCAS* cas;
6464
6465 *hasGetIorPutI = False;
6466 *hasVorFtemps = False;
6467
6468 for (i = 0; i < bb->stmts_used; i++) {
6469 st = bb->stmts[i];
6470 switch (st->tag) {
6471 case Ist_AbiHint:
6472 vassert(isIRAtom(st->Ist.AbiHint.base));
6473 vassert(isIRAtom(st->Ist.AbiHint.nia));
6474 break;
6475 case Ist_PutI:
6476 *hasGetIorPutI = True;
6477 break;
6478 case Ist_WrTmp:
6479 if (st->Ist.WrTmp.data->tag == Iex_GetI)
6480 *hasGetIorPutI = True;
6481 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6482 case Ity_I1: case Ity_I8: case Ity_I16:
6483 case Ity_I32: case Ity_I64: case Ity_I128:
6484 break;
6485 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6486 case Ity_V128: case Ity_V256:
6487 *hasVorFtemps = True;
6488 break;
6489 case Ity_D32: case Ity_D64: case Ity_D128:
6490 *hasVorFtemps = True;
6491 break;
6492 default:
6493 goto bad;
6494 }
6495 break;
6496 case Ist_Put:
6497 vassert(isIRAtom(st->Ist.Put.data));
6498 break;
6499 case Ist_Store:
6500 vassert(isIRAtom(st->Ist.Store.addr));
6501 vassert(isIRAtom(st->Ist.Store.data));
6502 break;
6503 case Ist_StoreG: {
6504 IRStoreG* sg = st->Ist.StoreG.details;
6505 vassert(isIRAtom(sg->addr));
6506 vassert(isIRAtom(sg->data));
6507 vassert(isIRAtom(sg->guard));
6508 break;
6509 }
6510 case Ist_LoadG: {
6511 IRLoadG* lg = st->Ist.LoadG.details;
6512 vassert(isIRAtom(lg->addr));
6513 vassert(isIRAtom(lg->alt));
6514 vassert(isIRAtom(lg->guard));
6515 break;
6516 }
6517 case Ist_CAS:
6518 cas = st->Ist.CAS.details;
6519 vassert(isIRAtom(cas->addr));
6520 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6521 vassert(isIRAtom(cas->expdLo));
6522 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6523 vassert(isIRAtom(cas->dataLo));
6524 break;
6525 case Ist_LLSC:
6526 vassert(isIRAtom(st->Ist.LLSC.addr));
6527 if (st->Ist.LLSC.storedata)
6528 vassert(isIRAtom(st->Ist.LLSC.storedata));
6529 break;
6530 case Ist_Dirty:
6531 d = st->Ist.Dirty.details;
6532 vassert(isIRAtom(d->guard));
6533 for (j = 0; d->args[j]; j++) {
6534 IRExpr* arg = d->args[j];
6535 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
6536 vassert(isIRAtom(arg));
6537 }
6538 if (d->mFx != Ifx_None)
6539 vassert(isIRAtom(d->mAddr));
6540 break;
6541 case Ist_NoOp:
6542 case Ist_IMark:
6543 case Ist_MBE:
6544 break;
6545 case Ist_Exit:
6546 vassert(isIRAtom(st->Ist.Exit.guard));
6547 break;
6548 default:
6549 bad:
6550 ppIRStmt(st);
6551 vpanic("considerExpensives");
6552 }
6553 }
6554 }
6555
6556
6557 /* ---------------- The main iropt entry point. ---------------- */
6558
6559 /* exported from this file */
6560 /* Rules of the game:
6561
6562 - IRExpr/IRStmt trees should be treated as immutable, as they
6563 may get shared. So never change a field of such a tree node;
6564 instead construct and return a new one if needed.
6565 */
6566
6567
do_iropt_BB(IRSB * bb0,IRExpr * (* specHelper)(const HChar *,IRExpr **,IRStmt **,Int),Bool (* preciseMemExnsFn)(Int,Int,VexRegisterUpdates),VexRegisterUpdates pxControl,Addr guest_addr,VexArch guest_arch)6568 IRSB* do_iropt_BB(
6569 IRSB* bb0,
6570 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6571 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6572 VexRegisterUpdates pxControl,
6573 Addr guest_addr,
6574 VexArch guest_arch
6575 )
6576 {
6577 static Int n_total = 0;
6578 static Int n_expensive = 0;
6579
6580 Bool hasGetIorPutI, hasVorFtemps;
6581 IRSB *bb, *bb2;
6582
6583 n_total++;
6584
6585 /* First flatten the block out, since all other
6586 phases assume flat code. */
6587
6588 bb = flatten_BB ( bb0 );
6589
6590 if (iropt_verbose) {
6591 vex_printf("\n========= FLAT\n\n" );
6592 ppIRSB(bb);
6593 }
6594
6595 /* If at level 0, stop now. */
6596 if (vex_control.iropt_level <= 0) return bb;
6597
6598 /* Now do a preliminary cleanup pass, and figure out if we also
6599 need to do 'expensive' optimisations. Expensive optimisations
6600 are deemed necessary if the block contains any GetIs or PutIs.
6601 If needed, do expensive transformations and then another cheap
6602 cleanup pass. */
6603
6604 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn, pxControl );
6605
6606 if (guest_arch == VexArchARM) {
6607 /* Translating Thumb2 code produces a lot of chaff. We have to
6608 work extra hard to get rid of it. */
6609 bb = cprop_BB(bb);
6610 bb = spec_helpers_BB ( bb, specHelper );
6611 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6612 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6613 }
6614 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6615 do_deadcode_BB( bb );
6616 }
6617
6618 if (vex_control.iropt_level > 1) {
6619
6620 /* Peer at what we have, to decide how much more effort to throw
6621 at it. */
6622 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6623
6624 if (hasVorFtemps && !hasGetIorPutI) {
6625 /* If any evidence of FP or Vector activity, CSE, as that
6626 tends to mop up all manner of lardy code to do with
6627 rounding modes. Don't bother if hasGetIorPutI since that
6628 case leads into the expensive transformations, which do
6629 CSE anyway. */
6630 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6631 do_deadcode_BB( bb );
6632 }
6633
6634 if (hasGetIorPutI) {
6635 Bool cses;
6636 n_expensive++;
6637 if (DEBUG_IROPT)
6638 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6639 bb = expensive_transformations( bb, pxControl );
6640 bb = cheap_transformations( bb, specHelper,
6641 preciseMemExnsFn, pxControl );
6642 /* Potentially common up GetIs */
6643 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6644 if (cses)
6645 bb = cheap_transformations( bb, specHelper,
6646 preciseMemExnsFn, pxControl );
6647 }
6648
6649 ///////////////////////////////////////////////////////////
6650 // BEGIN MSVC optimised code transformation hacks
6651 if (0)
6652 bb = do_MSVC_HACKS(bb);
6653 // END MSVC optimised code transformation hacks
6654 ///////////////////////////////////////////////////////////
6655
6656 /* Now have a go at unrolling simple (single-BB) loops. If
6657 successful, clean up the results as much as possible. */
6658
6659 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6660 if (bb2) {
6661 bb = cheap_transformations( bb2, specHelper,
6662 preciseMemExnsFn, pxControl );
6663 if (hasGetIorPutI) {
6664 bb = expensive_transformations( bb, pxControl );
6665 bb = cheap_transformations( bb, specHelper,
6666 preciseMemExnsFn, pxControl );
6667 } else {
6668 /* at least do CSE and dead code removal */
6669 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6670 do_deadcode_BB( bb );
6671 }
6672 if (0) vex_printf("vex iropt: unrolled a loop\n");
6673 }
6674
6675 }
6676
6677 return bb;
6678 }
6679
6680
6681 /*---------------------------------------------------------------*/
6682 /*--- end ir_opt.c ---*/
6683 /*---------------------------------------------------------------*/
6684