1 
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind                                                    ---*/
4 /*---                                                       main.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Callgrind, a Valgrind tool for call graph
9    profiling programs.
10 
11    Copyright (C) 2002-2015, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
12 
13    This tool is derived from and contains code from Cachegrind
14    Copyright (C) 2002-2015 Nicholas Nethercote (njn@valgrind.org)
15 
16    This program is free software; you can redistribute it and/or
17    modify it under the terms of the GNU General Public License as
18    published by the Free Software Foundation; either version 2 of the
19    License, or (at your option) any later version.
20 
21    This program is distributed in the hope that it will be useful, but
22    WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received a copy of the GNU General Public License
27    along with this program; if not, write to the Free Software
28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29    02111-1307, USA.
30 
31    The GNU General Public License is contained in the file COPYING.
32 */
33 
34 #include "config.h"
35 #include "callgrind.h"
36 #include "global.h"
37 
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
40 #include "pub_tool_transtab.h"       // VG_(discard_translations_safely)
41 
42 #include "cg_branchpred.c"
43 
44 /*------------------------------------------------------------*/
45 /*--- Global variables                                     ---*/
46 /*------------------------------------------------------------*/
47 
48 /* for all threads */
49 CommandLineOptions CLG_(clo);
50 Statistics CLG_(stat);
51 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
52 
53 /* thread and signal handler specific */
54 exec_state CLG_(current_state);
55 
56 /* min of L1 and LL cache line sizes.  This only gets set to a
57    non-zero value if we are doing cache simulation. */
58 Int CLG_(min_line_size) = 0;
59 
60 
61 /*------------------------------------------------------------*/
62 /*--- Statistics                                           ---*/
63 /*------------------------------------------------------------*/
64 
CLG_(init_statistics)65 static void CLG_(init_statistics)(Statistics* s)
66 {
67   s->call_counter        = 0;
68   s->jcnd_counter        = 0;
69   s->jump_counter        = 0;
70   s->rec_call_counter    = 0;
71   s->ret_counter         = 0;
72   s->bb_executions       = 0;
73 
74   s->context_counter     = 0;
75   s->bb_retranslations   = 0;
76 
77   s->distinct_objs       = 0;
78   s->distinct_files      = 0;
79   s->distinct_fns        = 0;
80   s->distinct_contexts   = 0;
81   s->distinct_bbs        = 0;
82   s->distinct_bbccs      = 0;
83   s->distinct_instrs     = 0;
84   s->distinct_skips      = 0;
85 
86   s->bb_hash_resizes     = 0;
87   s->bbcc_hash_resizes   = 0;
88   s->jcc_hash_resizes    = 0;
89   s->cxt_hash_resizes    = 0;
90   s->fn_array_resizes    = 0;
91   s->call_stack_resizes  = 0;
92   s->fn_stack_resizes    = 0;
93 
94   s->full_debug_BBs      = 0;
95   s->file_line_debug_BBs = 0;
96   s->fn_name_debug_BBs   = 0;
97   s->no_debug_BBs        = 0;
98   s->bbcc_lru_misses     = 0;
99   s->jcc_lru_misses      = 0;
100   s->cxt_lru_misses      = 0;
101   s->bbcc_clones         = 0;
102 }
103 
104 
105 /*------------------------------------------------------------*/
106 /*--- Simple callbacks (not cache similator)               ---*/
107 /*------------------------------------------------------------*/
108 
109 VG_REGPARM(1)
log_global_event(InstrInfo * ii)110 static void log_global_event(InstrInfo* ii)
111 {
112     ULong* cost_Bus;
113 
114     CLG_DEBUG(6, "log_global_event:  Ir  %#lx/%u\n",
115               CLG_(bb_base) + ii->instr_offset, ii->instr_size);
116 
117     if (!CLG_(current_state).collect) return;
118 
119     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
120 
121     CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
122 
123     if (CLG_(current_state).nonskipped)
124         cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
125     else
126         cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
127     cost_Bus[0]++;
128 }
129 
130 
131 /* For branches, we consult two different predictors, one which
132    predicts taken/untaken for conditional branches, and the other
133    which predicts the branch target address for indirect branches
134    (jump-to-register style ones). */
135 
136 static VG_REGPARM(2)
log_cond_branch(InstrInfo * ii,Word taken)137 void log_cond_branch(InstrInfo* ii, Word taken)
138 {
139     Bool miss;
140     Int fullOffset_Bc;
141     ULong* cost_Bc;
142 
143     CLG_DEBUG(6, "log_cond_branch:  Ir %#lx, taken %ld\n",
144               CLG_(bb_base) + ii->instr_offset, taken);
145 
146     miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
147 
148     if (!CLG_(current_state).collect) return;
149 
150     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
151 
152     if (CLG_(current_state).nonskipped)
153         cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
154     else
155         cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
156 
157     fullOffset_Bc = fullOffset(EG_BC);
158     CLG_(current_state).cost[ fullOffset_Bc ]++;
159     cost_Bc[0]++;
160     if (miss) {
161         CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
162         cost_Bc[1]++;
163     }
164 }
165 
166 static VG_REGPARM(2)
log_ind_branch(InstrInfo * ii,UWord actual_dst)167 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
168 {
169     Bool miss;
170     Int fullOffset_Bi;
171     ULong* cost_Bi;
172 
173     CLG_DEBUG(6, "log_ind_branch:  Ir  %#lx, dst %#lx\n",
174               CLG_(bb_base) + ii->instr_offset, actual_dst);
175 
176     miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
177 
178     if (!CLG_(current_state).collect) return;
179 
180     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
181 
182     if (CLG_(current_state).nonskipped)
183         cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
184     else
185         cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
186 
187     fullOffset_Bi = fullOffset(EG_BI);
188     CLG_(current_state).cost[ fullOffset_Bi ]++;
189     cost_Bi[0]++;
190     if (miss) {
191         CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
192         cost_Bi[1]++;
193     }
194 }
195 
196 /*------------------------------------------------------------*/
197 /*--- Instrumentation structures and event queue handling  ---*/
198 /*------------------------------------------------------------*/
199 
200 /* Maintain an ordered list of memory events which are outstanding, in
201    the sense that no IR has yet been generated to do the relevant
202    helper calls.  The BB is scanned top to bottom and memory events
203    are added to the end of the list, merging with the most recent
204    notified event where possible (Dw immediately following Dr and
205    having the same size and EA can be merged).
206 
207    This merging is done so that for architectures which have
208    load-op-store instructions (x86, amd64), the insn is treated as if
209    it makes just one memory reference (a modify), rather than two (a
210    read followed by a write at the same address).
211 
212    At various points the list will need to be flushed, that is, IR
213    generated from it.  That must happen before any possible exit from
214    the block (the end, or an IRStmt_Exit).  Flushing also takes place
215    when there is no space to add a new event.
216 
217    If we require the simulation statistics to be up to date with
218    respect to possible memory exceptions, then the list would have to
219    be flushed before each memory reference.  That would however lose
220    performance by inhibiting event-merging during flushing.
221 
222    Flushing the list consists of walking it start to end and emitting
223    instrumentation IR for each event, in the order in which they
224    appear.  It may be possible to emit a single call for two adjacent
225    events in order to reduce the number of helper function calls made.
226    For example, it could well be profitable to handle two adjacent Ir
227    events with a single helper call.  */
228 
229 typedef
230    IRExpr
231    IRAtom;
232 
233 typedef
234    enum {
235       Ev_Ir,  // Instruction read
236       Ev_Dr,  // Data read
237       Ev_Dw,  // Data write
238       Ev_Dm,  // Data modify (read then write)
239       Ev_Bc,  // branch conditional
240       Ev_Bi,  // branch indirect (to unknown destination)
241       Ev_G    // Global bus event
242    }
243    EventTag;
244 
245 typedef
246    struct {
247       EventTag   tag;
248       InstrInfo* inode;
249       union {
250 	 struct {
251 	 } Ir;
252 	 struct {
253 	    IRAtom* ea;
254 	    Int     szB;
255 	 } Dr;
256 	 struct {
257 	    IRAtom* ea;
258 	    Int     szB;
259 	 } Dw;
260 	 struct {
261 	    IRAtom* ea;
262 	    Int     szB;
263 	 } Dm;
264          struct {
265             IRAtom* taken; /* :: Ity_I1 */
266          } Bc;
267          struct {
268             IRAtom* dst;
269          } Bi;
270 	 struct {
271 	 } G;
272       } Ev;
273    }
274    Event;
275 
init_Event(Event * ev)276 static void init_Event ( Event* ev ) {
277    VG_(memset)(ev, 0, sizeof(Event));
278 }
279 
get_Event_dea(Event * ev)280 static IRAtom* get_Event_dea ( Event* ev ) {
281    switch (ev->tag) {
282       case Ev_Dr: return ev->Ev.Dr.ea;
283       case Ev_Dw: return ev->Ev.Dw.ea;
284       case Ev_Dm: return ev->Ev.Dm.ea;
285       default:    tl_assert(0);
286    }
287 }
288 
get_Event_dszB(Event * ev)289 static Int get_Event_dszB ( Event* ev ) {
290    switch (ev->tag) {
291       case Ev_Dr: return ev->Ev.Dr.szB;
292       case Ev_Dw: return ev->Ev.Dw.szB;
293       case Ev_Dm: return ev->Ev.Dm.szB;
294       default:    tl_assert(0);
295    }
296 }
297 
298 
299 /* Up to this many unnotified events are allowed.  Number is
300    arbitrary.  Larger numbers allow more event merging to occur, but
301    potentially induce more spilling due to extending live ranges of
302    address temporaries. */
303 #define N_EVENTS 16
304 
305 
306 /* A struct which holds all the running state during instrumentation.
307    Mostly to avoid passing loads of parameters everywhere. */
308 typedef struct {
309     /* The current outstanding-memory-event list. */
310     Event events[N_EVENTS];
311     Int   events_used;
312 
313     /* The array of InstrInfo's is part of BB struct. */
314     BB* bb;
315 
316     /* BB seen before (ie. re-instrumentation) */
317     Bool seen_before;
318 
319     /* Number InstrInfo bins 'used' so far. */
320     UInt ii_index;
321 
322     // current offset of guest instructions from BB start
323     UInt instr_offset;
324 
325     /* The output SB being constructed. */
326     IRSB* sbOut;
327 } ClgState;
328 
329 
showEvent(Event * ev)330 static void showEvent ( Event* ev )
331 {
332    switch (ev->tag) {
333       case Ev_Ir:
334 	 VG_(printf)("Ir (InstrInfo %p) at +%u\n",
335 		     ev->inode, ev->inode->instr_offset);
336 	 break;
337       case Ev_Dr:
338 	 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=",
339 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
340 	 ppIRExpr(ev->Ev.Dr.ea);
341 	 VG_(printf)("\n");
342 	 break;
343       case Ev_Dw:
344 	 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=",
345 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
346 	 ppIRExpr(ev->Ev.Dw.ea);
347 	 VG_(printf)("\n");
348 	 break;
349       case Ev_Dm:
350 	 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=",
351 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
352 	 ppIRExpr(ev->Ev.Dm.ea);
353 	 VG_(printf)("\n");
354 	 break;
355       case Ev_Bc:
356          VG_(printf)("Bc %p   GA=", ev->inode);
357          ppIRExpr(ev->Ev.Bc.taken);
358          VG_(printf)("\n");
359          break;
360       case Ev_Bi:
361          VG_(printf)("Bi %p  DST=", ev->inode);
362          ppIRExpr(ev->Ev.Bi.dst);
363          VG_(printf)("\n");
364          break;
365       case Ev_G:
366          VG_(printf)("G  %p\n", ev->inode);
367          break;
368       default:
369 	 tl_assert(0);
370 	 break;
371    }
372 }
373 
374 /* Generate code for all outstanding memory events, and mark the queue
375    empty.  Code is generated into cgs->sbOut, and this activity
376    'consumes' slots in cgs->bb. */
377 
flushEvents(ClgState * clgs)378 static void flushEvents ( ClgState* clgs )
379 {
380    Int        i, regparms, inew;
381    const HChar* helperName;
382    void*      helperAddr;
383    IRExpr**   argv;
384    IRExpr*    i_node_expr;
385    IRDirty*   di;
386    Event*     ev;
387    Event*     ev2;
388    Event*     ev3;
389 
390    if (!clgs->seen_before) {
391        // extend event sets as needed
392        // available sets: D0 Dr
393        for(i=0; i<clgs->events_used; i++) {
394 	   ev  = &clgs->events[i];
395 	   switch(ev->tag) {
396 	   case Ev_Ir:
397 	       // Ir event always is first for a guest instruction
398 	       CLG_ASSERT(ev->inode->eventset == 0);
399 	       ev->inode->eventset = CLG_(sets).base;
400 	       break;
401 	   case Ev_Dr:
402                // extend event set by Dr counters
403 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
404 							   EG_DR);
405 	       break;
406 	   case Ev_Dw:
407 	   case Ev_Dm:
408                // extend event set by Dw counters
409 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
410 							   EG_DW);
411 	       break;
412            case Ev_Bc:
413                // extend event set by Bc counters
414                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
415                                                            EG_BC);
416                break;
417            case Ev_Bi:
418                // extend event set by Bi counters
419                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
420                                                            EG_BI);
421                break;
422 	   case Ev_G:
423                // extend event set by Bus counter
424 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
425 							   EG_BUS);
426 	       break;
427 	   default:
428 	       tl_assert(0);
429 	   }
430        }
431    }
432 
433    for(i = 0; i < clgs->events_used; i = inew) {
434 
435       helperName = NULL;
436       helperAddr = NULL;
437       argv       = NULL;
438       regparms   = 0;
439 
440       /* generate IR to notify event i and possibly the ones
441 	 immediately following it. */
442       tl_assert(i >= 0 && i < clgs->events_used);
443 
444       ev  = &clgs->events[i];
445       ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
446       ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
447 
448       CLG_DEBUGIF(5) {
449 	 VG_(printf)("   flush ");
450 	 showEvent( ev );
451       }
452 
453       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
454 
455       /* Decide on helper fn to call and args to pass it, and advance
456 	 i appropriately.
457 	 Dm events have same effect as Dw events */
458       switch (ev->tag) {
459 	 case Ev_Ir:
460 	    /* Merge an Ir with a following Dr. */
461 	    if (ev2 && ev2->tag == Ev_Dr) {
462 	       /* Why is this true?  It's because we're merging an Ir
463 		  with a following Dr.  The Ir derives from the
464 		  instruction's IMark and the Dr from data
465 		  references which follow it.  In short it holds
466 		  because each insn starts with an IMark, hence an
467 		  Ev_Ir, and so these Dr must pertain to the
468 		  immediately preceding Ir.  Same applies to analogous
469 		  assertions in the subsequent cases. */
470 	       tl_assert(ev2->inode == ev->inode);
471 	       helperName = CLG_(cachesim).log_1I1Dr_name;
472 	       helperAddr = CLG_(cachesim).log_1I1Dr;
473 	       argv = mkIRExprVec_3( i_node_expr,
474 				     get_Event_dea(ev2),
475 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
476 	       regparms = 3;
477 	       inew = i+2;
478 	    }
479 	    /* Merge an Ir with a following Dw/Dm. */
480 	    else
481 	    if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
482 	       tl_assert(ev2->inode == ev->inode);
483 	       helperName = CLG_(cachesim).log_1I1Dw_name;
484 	       helperAddr = CLG_(cachesim).log_1I1Dw;
485 	       argv = mkIRExprVec_3( i_node_expr,
486 				     get_Event_dea(ev2),
487 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
488 	       regparms = 3;
489 	       inew = i+2;
490 	    }
491 	    /* Merge an Ir with two following Irs. */
492 	    else
493 	    if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
494 	       helperName = CLG_(cachesim).log_3I0D_name;
495 	       helperAddr = CLG_(cachesim).log_3I0D;
496 	       argv = mkIRExprVec_3( i_node_expr,
497 				     mkIRExpr_HWord( (HWord)ev2->inode ),
498 				     mkIRExpr_HWord( (HWord)ev3->inode ) );
499 	       regparms = 3;
500 	       inew = i+3;
501 	    }
502 	    /* Merge an Ir with one following Ir. */
503 	    else
504 	    if (ev2 && ev2->tag == Ev_Ir) {
505 	       helperName = CLG_(cachesim).log_2I0D_name;
506 	       helperAddr = CLG_(cachesim).log_2I0D;
507 	       argv = mkIRExprVec_2( i_node_expr,
508 				     mkIRExpr_HWord( (HWord)ev2->inode ) );
509 	       regparms = 2;
510 	       inew = i+2;
511 	    }
512 	    /* No merging possible; emit as-is. */
513 	    else {
514 	       helperName = CLG_(cachesim).log_1I0D_name;
515 	       helperAddr = CLG_(cachesim).log_1I0D;
516 	       argv = mkIRExprVec_1( i_node_expr );
517 	       regparms = 1;
518 	       inew = i+1;
519 	    }
520 	    break;
521 	 case Ev_Dr:
522 	    /* Data read or modify */
523 	    helperName = CLG_(cachesim).log_0I1Dr_name;
524 	    helperAddr = CLG_(cachesim).log_0I1Dr;
525 	    argv = mkIRExprVec_3( i_node_expr,
526 				  get_Event_dea(ev),
527 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
528 	    regparms = 3;
529 	    inew = i+1;
530 	    break;
531 	 case Ev_Dw:
532 	 case Ev_Dm:
533 	    /* Data write */
534 	    helperName = CLG_(cachesim).log_0I1Dw_name;
535 	    helperAddr = CLG_(cachesim).log_0I1Dw;
536 	    argv = mkIRExprVec_3( i_node_expr,
537 				  get_Event_dea(ev),
538 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
539 	    regparms = 3;
540 	    inew = i+1;
541 	    break;
542          case Ev_Bc:
543             /* Conditional branch */
544             helperName = "log_cond_branch";
545             helperAddr = &log_cond_branch;
546             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
547             regparms = 2;
548             inew = i+1;
549             break;
550          case Ev_Bi:
551             /* Branch to an unknown destination */
552             helperName = "log_ind_branch";
553             helperAddr = &log_ind_branch;
554             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
555             regparms = 2;
556             inew = i+1;
557             break;
558          case Ev_G:
559             /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
560             helperName = "log_global_event";
561             helperAddr = &log_global_event;
562             argv = mkIRExprVec_1( i_node_expr );
563             regparms = 1;
564             inew = i+1;
565             break;
566 	 default:
567 	    tl_assert(0);
568       }
569 
570       CLG_DEBUGIF(5) {
571 	  if (inew > i+1) {
572 	      VG_(printf)("   merge ");
573 	      showEvent( ev2 );
574 	  }
575 	  if (inew > i+2) {
576 	      VG_(printf)("   merge ");
577 	      showEvent( ev3 );
578 	  }
579 	  if (helperAddr)
580 	      VG_(printf)("   call  %s (%p)\n",
581 			  helperName, helperAddr);
582       }
583 
584       /* helper could be unset depending on the simulator used */
585       if (helperAddr == 0) continue;
586 
587       /* Add the helper. */
588       tl_assert(helperName);
589       tl_assert(helperAddr);
590       tl_assert(argv);
591       di = unsafeIRDirty_0_N( regparms,
592 			      helperName, VG_(fnptr_to_fnentry)( helperAddr ),
593 			      argv );
594       addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
595    }
596 
597    clgs->events_used = 0;
598 }
599 
addEvent_Ir(ClgState * clgs,InstrInfo * inode)600 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
601 {
602    Event* evt;
603    tl_assert(clgs->seen_before || (inode->eventset == 0));
604    if (!CLG_(clo).simulate_cache) return;
605 
606    if (clgs->events_used == N_EVENTS)
607       flushEvents(clgs);
608    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
609    evt = &clgs->events[clgs->events_used];
610    init_Event(evt);
611    evt->tag      = Ev_Ir;
612    evt->inode    = inode;
613    clgs->events_used++;
614 }
615 
616 static
addEvent_Dr(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)617 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
618 {
619    Event* evt;
620    tl_assert(isIRAtom(ea));
621    tl_assert(datasize >= 1);
622    if (!CLG_(clo).simulate_cache) return;
623    tl_assert(datasize <= CLG_(min_line_size));
624 
625    if (clgs->events_used == N_EVENTS)
626       flushEvents(clgs);
627    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
628    evt = &clgs->events[clgs->events_used];
629    init_Event(evt);
630    evt->tag       = Ev_Dr;
631    evt->inode     = inode;
632    evt->Ev.Dr.szB = datasize;
633    evt->Ev.Dr.ea  = ea;
634    clgs->events_used++;
635 }
636 
637 static
addEvent_Dw(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)638 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
639 {
640    Event* lastEvt;
641    Event* evt;
642    tl_assert(isIRAtom(ea));
643    tl_assert(datasize >= 1);
644    if (!CLG_(clo).simulate_cache) return;
645    tl_assert(datasize <= CLG_(min_line_size));
646 
647    /* Is it possible to merge this write with the preceding read? */
648    lastEvt = &clgs->events[clgs->events_used-1];
649    if (clgs->events_used > 0
650        && lastEvt->tag       == Ev_Dr
651        && lastEvt->Ev.Dr.szB == datasize
652        && lastEvt->inode     == inode
653        && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
654    {
655       lastEvt->tag   = Ev_Dm;
656       return;
657    }
658 
659    /* No.  Add as normal. */
660    if (clgs->events_used == N_EVENTS)
661       flushEvents(clgs);
662    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
663    evt = &clgs->events[clgs->events_used];
664    init_Event(evt);
665    evt->tag       = Ev_Dw;
666    evt->inode     = inode;
667    evt->Ev.Dw.szB = datasize;
668    evt->Ev.Dw.ea  = ea;
669    clgs->events_used++;
670 }
671 
672 static
addEvent_D_guarded(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea,IRAtom * guard,Bool isWrite)673 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
674                           Int datasize, IRAtom* ea, IRAtom* guard,
675                           Bool isWrite )
676 {
677    tl_assert(isIRAtom(ea));
678    tl_assert(guard);
679    tl_assert(isIRAtom(guard));
680    tl_assert(datasize >= 1);
681    if (!CLG_(clo).simulate_cache) return;
682    tl_assert(datasize <= CLG_(min_line_size));
683 
684    /* Adding guarded memory actions and merging them with the existing
685       queue is too complex.  Simply flush the queue and add this
686       action immediately.  Since guarded loads and stores are pretty
687       rare, this is not thought likely to cause any noticeable
688       performance loss as a result of the loss of event-merging
689       opportunities. */
690    tl_assert(clgs->events_used >= 0);
691    flushEvents(clgs);
692    tl_assert(clgs->events_used == 0);
693    /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
694    IRExpr*      i_node_expr;
695    const HChar* helperName;
696    void*        helperAddr;
697    IRExpr**     argv;
698    Int          regparms;
699    IRDirty*     di;
700    i_node_expr = mkIRExpr_HWord( (HWord)inode );
701    helperName  = isWrite ? CLG_(cachesim).log_0I1Dw_name
702                          : CLG_(cachesim).log_0I1Dr_name;
703    helperAddr  = isWrite ? CLG_(cachesim).log_0I1Dw
704                          : CLG_(cachesim).log_0I1Dr;
705    argv        = mkIRExprVec_3( i_node_expr,
706                                 ea, mkIRExpr_HWord( datasize ) );
707    regparms    = 3;
708    di          = unsafeIRDirty_0_N(
709                     regparms,
710                     helperName, VG_(fnptr_to_fnentry)( helperAddr ),
711                     argv );
712    di->guard = guard;
713    addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
714 }
715 
716 static
addEvent_Bc(ClgState * clgs,InstrInfo * inode,IRAtom * guard)717 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
718 {
719    Event* evt;
720    tl_assert(isIRAtom(guard));
721    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
722              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
723    if (!CLG_(clo).simulate_branch) return;
724 
725    if (clgs->events_used == N_EVENTS)
726       flushEvents(clgs);
727    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
728    evt = &clgs->events[clgs->events_used];
729    init_Event(evt);
730    evt->tag         = Ev_Bc;
731    evt->inode       = inode;
732    evt->Ev.Bc.taken = guard;
733    clgs->events_used++;
734 }
735 
736 static
addEvent_Bi(ClgState * clgs,InstrInfo * inode,IRAtom * whereTo)737 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
738 {
739    Event* evt;
740    tl_assert(isIRAtom(whereTo));
741    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
742              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
743    if (!CLG_(clo).simulate_branch) return;
744 
745    if (clgs->events_used == N_EVENTS)
746       flushEvents(clgs);
747    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
748    evt = &clgs->events[clgs->events_used];
749    init_Event(evt);
750    evt->tag       = Ev_Bi;
751    evt->inode     = inode;
752    evt->Ev.Bi.dst = whereTo;
753    clgs->events_used++;
754 }
755 
756 static
addEvent_G(ClgState * clgs,InstrInfo * inode)757 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
758 {
759    Event* evt;
760    if (!CLG_(clo).collect_bus) return;
761 
762    if (clgs->events_used == N_EVENTS)
763       flushEvents(clgs);
764    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
765    evt = &clgs->events[clgs->events_used];
766    init_Event(evt);
767    evt->tag       = Ev_G;
768    evt->inode     = inode;
769    clgs->events_used++;
770 }
771 
772 /* Initialise or check (if already seen before) an InstrInfo for next insn.
773    We only can set instr_offset/instr_size here. The required event set and
774    resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
775    instructions. The event set is extended as required on flush of the event
776    queue (when Dm events were determined), cost offsets are determined at
777    end of BB instrumentation. */
778 static
next_InstrInfo(ClgState * clgs,UInt instr_size)779 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
780 {
781    InstrInfo* ii;
782    tl_assert(clgs->ii_index >= 0);
783    tl_assert(clgs->ii_index < clgs->bb->instr_count);
784    ii = &clgs->bb->instr[ clgs->ii_index ];
785 
786    if (clgs->seen_before) {
787        CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
788        CLG_ASSERT(ii->instr_size == instr_size);
789    }
790    else {
791        ii->instr_offset = clgs->instr_offset;
792        ii->instr_size = instr_size;
793        ii->cost_offset = 0;
794        ii->eventset = 0;
795    }
796 
797    clgs->ii_index++;
798    clgs->instr_offset += instr_size;
799    CLG_(stat).distinct_instrs++;
800 
801    return ii;
802 }
803 
804 // return total number of cost values needed for this BB
805 static
update_cost_offsets(ClgState * clgs)806 UInt update_cost_offsets( ClgState* clgs )
807 {
808     Int i;
809     InstrInfo* ii;
810     UInt cost_offset = 0;
811 
812     CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
813     for(i=0; i<clgs->ii_index; i++) {
814 	ii = &clgs->bb->instr[i];
815 	if (clgs->seen_before) {
816 	    CLG_ASSERT(ii->cost_offset == cost_offset);
817 	} else
818 	    ii->cost_offset = cost_offset;
819 	cost_offset += ii->eventset ? ii->eventset->size : 0;
820     }
821 
822     return cost_offset;
823 }
824 
825 /*------------------------------------------------------------*/
826 /*--- Instrumentation                                      ---*/
827 /*------------------------------------------------------------*/
828 
829 #if defined(VG_BIGENDIAN)
830 # define CLGEndness Iend_BE
831 #elif defined(VG_LITTLEENDIAN)
832 # define CLGEndness Iend_LE
833 #else
834 # error "Unknown endianness"
835 #endif
836 
837 static
IRConst2Addr(IRConst * con)838 Addr IRConst2Addr(IRConst* con)
839 {
840     Addr addr;
841 
842     if (sizeof(Addr) == 4) {
843 	CLG_ASSERT( con->tag == Ico_U32 );
844 	addr = con->Ico.U32;
845     }
846     else if (sizeof(Addr) == 8) {
847 	CLG_ASSERT( con->tag == Ico_U64 );
848 	addr = con->Ico.U64;
849     }
850     else
851 	VG_(tool_panic)("Callgrind: invalid Addr type");
852 
853     return addr;
854 }
855 
856 /* First pass over a BB to instrument, counting instructions and jumps
857  * This is needed for the size of the BB struct to allocate
858  *
859  * Called from CLG_(get_bb)
860  */
CLG_(collectBlockInfo)861 void CLG_(collectBlockInfo)(IRSB* sbIn,
862 			    /*INOUT*/ UInt* instrs,
863 			    /*INOUT*/ UInt* cjmps,
864 			    /*INOUT*/ Bool* cjmp_inverted)
865 {
866     Int i;
867     IRStmt* st;
868     Addr instrAddr =0, jumpDst;
869     UInt instrLen = 0;
870     Bool toNextInstr = False;
871 
872     // Ist_Exit has to be ignored in preamble code, before first IMark:
873     // preamble code is added by VEX for self modifying code, and has
874     // nothing to do with client code
875     Bool inPreamble = True;
876 
877     if (!sbIn) return;
878 
879     for (i = 0; i < sbIn->stmts_used; i++) {
880 	  st = sbIn->stmts[i];
881 	  if (Ist_IMark == st->tag) {
882 	      inPreamble = False;
883 
884 	      instrAddr = st->Ist.IMark.addr;
885 	      instrLen  = st->Ist.IMark.len;
886 
887 	      (*instrs)++;
888 	      toNextInstr = False;
889 	  }
890 	  if (inPreamble) continue;
891 	  if (Ist_Exit == st->tag) {
892 	      jumpDst = IRConst2Addr(st->Ist.Exit.dst);
893 	      toNextInstr =  (jumpDst == instrAddr + instrLen);
894 
895 	      (*cjmps)++;
896 	  }
897     }
898 
899     /* if the last instructions of BB conditionally jumps to next instruction
900      * (= first instruction of next BB in memory), this is a inverted by VEX.
901      */
902     *cjmp_inverted = toNextInstr;
903 }
904 
905 static
addConstMemStoreStmt(IRSB * bbOut,UWord addr,UInt val,IRType hWordTy)906 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
907 {
908     addStmtToIRSB( bbOut,
909 		   IRStmt_Store(CLGEndness,
910 				IRExpr_Const(hWordTy == Ity_I32 ?
911 					     IRConst_U32( addr ) :
912 					     IRConst_U64( addr )),
913 				IRExpr_Const(IRConst_U32(val)) ));
914 }
915 
916 
917 /* add helper call to setup_bbcc, with pointer to BB struct as argument
918  *
919  * precondition for setup_bbcc:
920  * - jmps_passed has number of cond.jumps passed in last executed BB
921  * - current_bbcc has a pointer to the BBCC of the last executed BB
922  *   Thus, if bbcc_jmpkind is != -1 (JmpNone),
923  *     current_bbcc->bb->jmp_addr
924  *   gives the address of the jump source.
925  *
926  * the setup does 2 things:
927  * - trace call:
928  *   * Unwind own call stack, i.e sync our ESP with real ESP
929  *     This is for ESP manipulation (longjmps, C++ exec handling) and RET
930  *   * For CALLs or JMPs crossing objects, record call arg +
931  *     push are on own call stack
932  *
933  * - prepare for cache log functions:
934  *   set current_bbcc to BBCC that gets the costs for this BB execution
935  *   attached
936  */
937 static
addBBSetupCall(ClgState * clgs)938 void addBBSetupCall(ClgState* clgs)
939 {
940    IRDirty* di;
941    IRExpr  *arg1, **argv;
942 
943    arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
944    argv = mkIRExprVec_1(arg1);
945    di = unsafeIRDirty_0_N( 1, "setup_bbcc",
946 			      VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
947 			      argv);
948    addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
949 }
950 
951 
952 static
CLG_(instrument)953 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
954                         IRSB* sbIn,
955 			const VexGuestLayout* layout,
956 			const VexGuestExtents* vge,
957                         const VexArchInfo* archinfo_host,
958 			IRType gWordTy, IRType hWordTy )
959 {
960    Int        i;
961    IRStmt*    st;
962    Addr       origAddr;
963    InstrInfo* curr_inode = NULL;
964    ClgState   clgs;
965    UInt       cJumps = 0;
966    IRTypeEnv* tyenv = sbIn->tyenv;
967 
968    if (gWordTy != hWordTy) {
969       /* We don't currently support this case. */
970       VG_(tool_panic)("host/guest word size mismatch");
971    }
972 
973    // No instrumentation if it is switched off
974    if (! CLG_(instrument_state)) {
975        CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
976 		 (Addr)closure->readdr);
977        return sbIn;
978    }
979 
980    CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
981 
982    /* Set up SB for instrumented IR */
983    clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
984 
985    // Copy verbatim any IR preamble preceding the first IMark
986    i = 0;
987    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
988       addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
989       i++;
990    }
991 
992    // Get the first statement, and origAddr from it
993    CLG_ASSERT(sbIn->stmts_used >0);
994    CLG_ASSERT(i < sbIn->stmts_used);
995    st = sbIn->stmts[i];
996    CLG_ASSERT(Ist_IMark == st->tag);
997 
998    origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta;
999    CLG_ASSERT(origAddr == st->Ist.IMark.addr
1000                           + st->Ist.IMark.delta);  // XXX: check no overflow
1001 
1002    /* Get BB struct (creating if necessary).
1003     * JS: The hash table is keyed with orig_addr_noredir -- important!
1004     * JW: Why? If it is because of different chasing of the redirection,
1005     *     this is not needed, as chasing is switched off in callgrind
1006     */
1007    clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1008 
1009    addBBSetupCall(&clgs);
1010 
1011    // Set up running state
1012    clgs.events_used = 0;
1013    clgs.ii_index = 0;
1014    clgs.instr_offset = 0;
1015 
1016    for (/*use current i*/; i < sbIn->stmts_used; i++) {
1017 
1018       st = sbIn->stmts[i];
1019       CLG_ASSERT(isFlatIRStmt(st));
1020 
1021       switch (st->tag) {
1022 	 case Ist_NoOp:
1023 	 case Ist_AbiHint:
1024 	 case Ist_Put:
1025 	 case Ist_PutI:
1026 	 case Ist_MBE:
1027 	    break;
1028 
1029 	 case Ist_IMark: {
1030             Addr   cia   = st->Ist.IMark.addr + st->Ist.IMark.delta;
1031             UInt   isize = st->Ist.IMark.len;
1032             CLG_ASSERT(clgs.instr_offset == cia - origAddr);
1033 	    // If Vex fails to decode an instruction, the size will be zero.
1034 	    // Pretend otherwise.
1035 	    if (isize == 0) isize = VG_MIN_INSTR_SZB;
1036 
1037 	    // Sanity-check size.
1038 	    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1039 		     || VG_CLREQ_SZB == isize );
1040 
1041 	    // Init the inode, record it as the current one.
1042 	    // Subsequent Dr/Dw/Dm events from the same instruction will
1043 	    // also use it.
1044 	    curr_inode = next_InstrInfo (&clgs, isize);
1045 
1046 	    addEvent_Ir( &clgs, curr_inode );
1047 	    break;
1048 	 }
1049 
1050 	 case Ist_WrTmp: {
1051 	    IRExpr* data = st->Ist.WrTmp.data;
1052 	    if (data->tag == Iex_Load) {
1053 	       IRExpr* aexpr = data->Iex.Load.addr;
1054 	       // Note also, endianness info is ignored.  I guess
1055 	       // that's not interesting.
1056 	       addEvent_Dr( &clgs, curr_inode,
1057 			    sizeofIRType(data->Iex.Load.ty), aexpr );
1058 	    }
1059 	    break;
1060 	 }
1061 
1062 	 case Ist_Store: {
1063 	    IRExpr* data  = st->Ist.Store.data;
1064 	    IRExpr* aexpr = st->Ist.Store.addr;
1065 	    addEvent_Dw( &clgs, curr_inode,
1066 			 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1067 	    break;
1068 	 }
1069 
1070          case Ist_StoreG: {
1071             IRStoreG* sg   = st->Ist.StoreG.details;
1072             IRExpr*   data = sg->data;
1073             IRExpr*   addr = sg->addr;
1074             IRType    type = typeOfIRExpr(tyenv, data);
1075             tl_assert(type != Ity_INVALID);
1076             addEvent_D_guarded( &clgs, curr_inode,
1077                                 sizeofIRType(type), addr, sg->guard,
1078                                 True/*isWrite*/ );
1079             break;
1080          }
1081 
1082          case Ist_LoadG: {
1083             IRLoadG* lg       = st->Ist.LoadG.details;
1084             IRType   type     = Ity_INVALID; /* loaded type */
1085             IRType   typeWide = Ity_INVALID; /* after implicit widening */
1086             IRExpr*  addr     = lg->addr;
1087             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1088             tl_assert(type != Ity_INVALID);
1089             addEvent_D_guarded( &clgs, curr_inode,
1090                                 sizeofIRType(type), addr, lg->guard,
1091                                 False/*!isWrite*/ );
1092             break;
1093          }
1094 
1095 	 case Ist_Dirty: {
1096 	    Int      dataSize;
1097 	    IRDirty* d = st->Ist.Dirty.details;
1098 	    if (d->mFx != Ifx_None) {
1099 	       /* This dirty helper accesses memory.  Collect the details. */
1100 	       tl_assert(d->mAddr != NULL);
1101 	       tl_assert(d->mSize != 0);
1102 	       dataSize = d->mSize;
1103 	       // Large (eg. 28B, 108B, 512B on x86) data-sized
1104 	       // instructions will be done inaccurately, but they're
1105 	       // very rare and this avoids errors from hitting more
1106 	       // than two cache lines in the simulation.
1107 	       if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1108 		  dataSize = CLG_(min_line_size);
1109 	       if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1110 		  addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1111 	       if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1112 		  addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1113 	    } else {
1114 	       tl_assert(d->mAddr == NULL);
1115 	       tl_assert(d->mSize == 0);
1116 	    }
1117 	    break;
1118 	 }
1119 
1120          case Ist_CAS: {
1121             /* We treat it as a read and a write of the location.  I
1122                think that is the same behaviour as it was before IRCAS
1123                was introduced, since prior to that point, the Vex
1124                front ends would translate a lock-prefixed instruction
1125                into a (normal) read followed by a (normal) write. */
1126             Int    dataSize;
1127             IRCAS* cas = st->Ist.CAS.details;
1128             CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1129             CLG_ASSERT(cas->dataLo);
1130             dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1131             if (cas->dataHi != NULL)
1132                dataSize *= 2; /* since this is a doubleword-cas */
1133             addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1134             addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1135             addEvent_G(  &clgs, curr_inode );
1136             break;
1137          }
1138 
1139          case Ist_LLSC: {
1140             IRType dataTy;
1141             if (st->Ist.LLSC.storedata == NULL) {
1142                /* LL */
1143                dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1144                addEvent_Dr( &clgs, curr_inode,
1145                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1146                /* flush events before LL, should help SC to succeed */
1147                flushEvents( &clgs );
1148             } else {
1149                /* SC */
1150                dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1151                addEvent_Dw( &clgs, curr_inode,
1152                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1153                /* I don't know whether the global-bus-lock cost should
1154                   be attributed to the LL or the SC, but it doesn't
1155                   really matter since they always have to be used in
1156                   pairs anyway.  Hence put it (quite arbitrarily) on
1157                   the SC. */
1158                addEvent_G(  &clgs, curr_inode );
1159             }
1160             break;
1161          }
1162 
1163  	 case Ist_Exit: {
1164             Bool guest_exit, inverted;
1165 
1166             /* VEX code generation sometimes inverts conditional branches.
1167              * As Callgrind counts (conditional) jumps, it has to correct
1168              * inversions. The heuristic is the following:
1169              * (1) Callgrind switches off SB chasing and unrolling, and
1170              *     therefore it assumes that a candidate for inversion only is
1171              *     the last conditional branch in an SB.
1172              * (2) inversion is assumed if the branch jumps to the address of
1173              *     the next guest instruction in memory.
1174              * This heuristic is precalculated in CLG_(collectBlockInfo)().
1175              *
1176              * Branching behavior is also used for branch prediction. Note that
1177              * above heuristic is different from what Cachegrind does.
1178              * Cachegrind uses (2) for all branches.
1179              */
1180             if (cJumps+1 == clgs.bb->cjmp_count)
1181                 inverted = clgs.bb->cjmp_inverted;
1182             else
1183                 inverted = False;
1184 
1185             // call branch predictor only if this is a branch in guest code
1186             guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1187                          (st->Ist.Exit.jk == Ijk_Call) ||
1188                          (st->Ist.Exit.jk == Ijk_Ret);
1189 
1190             if (guest_exit) {
1191                 /* Stuff to widen the guard expression to a host word, so
1192                    we can pass it to the branch predictor simulation
1193                    functions easily. */
1194                 IRType   tyW    = hWordTy;
1195                 IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
1196                 IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
1197                 IRTemp   guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1198                 IRTemp   guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1199                 IRTemp   guard  = newIRTemp(clgs.sbOut->tyenv, tyW);
1200                 IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1201                                                : IRExpr_Const(IRConst_U64(1));
1202 
1203                 /* Widen the guard expression. */
1204                 addStmtToIRSB( clgs.sbOut,
1205                                IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1206                 addStmtToIRSB( clgs.sbOut,
1207                                IRStmt_WrTmp( guardW,
1208                                              IRExpr_Unop(widen,
1209                                                          IRExpr_RdTmp(guard1))) );
1210                 /* If the exit is inverted, invert the sense of the guard. */
1211                 addStmtToIRSB(
1212                         clgs.sbOut,
1213                         IRStmt_WrTmp(
1214                                 guard,
1215                                 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1216                                     : IRExpr_RdTmp(guardW)
1217                                     ));
1218                 /* And post the event. */
1219                 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1220             }
1221 
1222 	    /* We may never reach the next statement, so need to flush
1223 	       all outstanding transactions now. */
1224 	    flushEvents( &clgs );
1225 
1226 	    CLG_ASSERT(clgs.ii_index>0);
1227 	    if (!clgs.seen_before) {
1228 	      ClgJumpKind jk;
1229 
1230 	      if      (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1231 	      else if (st->Ist.Exit.jk == Ijk_Ret)  jk = jk_Return;
1232 	      else {
1233 		if (IRConst2Addr(st->Ist.Exit.dst) ==
1234 		    origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1235 		  jk = jk_None;
1236 		else
1237 		  jk = jk_Jump;
1238 	      }
1239 
1240 	      clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1241 	      clgs.bb->jmp[cJumps].jmpkind = jk;
1242 	    }
1243 
1244 	    /* Update global variable jmps_passed before the jump
1245 	     * A correction is needed if VEX inverted the last jump condition
1246 	    */
1247 	    UInt val = inverted ? cJumps+1 : cJumps;
1248 	    addConstMemStoreStmt( clgs.sbOut,
1249 				  (UWord) &CLG_(current_state).jmps_passed,
1250 				  val, hWordTy);
1251 	    cJumps++;
1252 
1253 	    break;
1254 	 }
1255 
1256 	 default:
1257 	    tl_assert(0);
1258 	    break;
1259       }
1260 
1261       /* Copy the original statement */
1262       addStmtToIRSB( clgs.sbOut, st );
1263 
1264       CLG_DEBUGIF(5) {
1265 	 VG_(printf)("   pass  ");
1266 	 ppIRStmt(st);
1267 	 VG_(printf)("\n");
1268       }
1269    }
1270 
1271    /* Deal with branches to unknown destinations.  Except ignore ones
1272       which are function returns as we assume the return stack
1273       predictor never mispredicts. */
1274    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1275       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1276       switch (sbIn->next->tag) {
1277          case Iex_Const:
1278             break; /* boring - branch to known address */
1279          case Iex_RdTmp:
1280             /* looks like an indirect branch (branch to unknown) */
1281             addEvent_Bi( &clgs, curr_inode, sbIn->next );
1282             break;
1283          default:
1284             /* shouldn't happen - if the incoming IR is properly
1285                flattened, should only have tmp and const cases to
1286                consider. */
1287             tl_assert(0);
1288       }
1289    }
1290 
1291    /* At the end of the bb.  Flush outstandings. */
1292    flushEvents( &clgs );
1293 
1294    /* Update global variable jmps_passed at end of SB.
1295     * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1296     * this can be omitted if there is no conditional jump in this SB.
1297     * A correction is needed if VEX inverted the last jump condition
1298     */
1299    if (cJumps>0) {
1300       UInt jmps_passed = cJumps;
1301       if (clgs.bb->cjmp_inverted) jmps_passed--;
1302       addConstMemStoreStmt( clgs.sbOut,
1303 			    (UWord) &CLG_(current_state).jmps_passed,
1304 			    jmps_passed, hWordTy);
1305    }
1306    CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1307    CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1308 
1309    /* Info for final exit from BB */
1310    {
1311      ClgJumpKind jk;
1312 
1313      if      (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1314      else if (sbIn->jumpkind == Ijk_Ret)  jk = jk_Return;
1315      else {
1316        jk = jk_Jump;
1317        if ((sbIn->next->tag == Iex_Const) &&
1318 	   (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1319 	    origAddr + clgs.instr_offset))
1320 	 jk = jk_None;
1321      }
1322      clgs.bb->jmp[cJumps].jmpkind = jk;
1323      /* Instruction index of the call/ret at BB end
1324       * (it is wrong for fall-through, but does not matter) */
1325      clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1326    }
1327 
1328    /* swap information of last exit with final exit if inverted */
1329    if (clgs.bb->cjmp_inverted) {
1330      ClgJumpKind jk;
1331      UInt instr;
1332 
1333      jk = clgs.bb->jmp[cJumps].jmpkind;
1334      clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1335      clgs.bb->jmp[cJumps-1].jmpkind = jk;
1336      instr = clgs.bb->jmp[cJumps].instr;
1337      clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1338      clgs.bb->jmp[cJumps-1].instr = instr;
1339    }
1340 
1341    if (clgs.seen_before) {
1342        CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1343        CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1344    }
1345    else {
1346        clgs.bb->cost_count = update_cost_offsets(&clgs);
1347        clgs.bb->instr_len = clgs.instr_offset;
1348    }
1349 
1350    CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1351 	     origAddr, clgs.bb->instr_len,
1352 	     clgs.bb->cjmp_count, clgs.bb->cost_count);
1353    if (cJumps>0) {
1354        CLG_DEBUG(3, "                     [ ");
1355        for (i=0;i<cJumps;i++)
1356 	   CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr);
1357        CLG_DEBUG(3, "], last inverted: %s \n",
1358 		 clgs.bb->cjmp_inverted ? "yes":"no");
1359    }
1360 
1361   return clgs.sbOut;
1362 }
1363 
1364 /*--------------------------------------------------------------------*/
1365 /*--- Discarding BB info                                           ---*/
1366 /*--------------------------------------------------------------------*/
1367 
1368 // Called when a translation is removed from the translation cache for
1369 // any reason at all: to free up space, because the guest code was
1370 // unmapped or modified, or for any arbitrary reason.
1371 static
clg_discard_superblock_info(Addr orig_addr,VexGuestExtents vge)1372 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge )
1373 {
1374     tl_assert(vge.n_used > 0);
1375 
1376    if (0)
1377       VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1378                    (void*)orig_addr,
1379                    (void*)vge.base[0], (ULong)vge.len[0]);
1380 
1381    // Get BB info, remove from table, free BB info.  Simple!
1382    // When created, the BB is keyed by the first instruction address,
1383    // (not orig_addr, but eventually redirected address). Thus, we
1384    // use the first instruction address in vge.
1385    CLG_(delete_bb)(vge.base[0]);
1386 }
1387 
1388 
1389 /*------------------------------------------------------------*/
1390 /*--- CLG_(fini)() and related function                     ---*/
1391 /*------------------------------------------------------------*/
1392 
1393 
1394 
zero_thread_cost(thread_info * t)1395 static void zero_thread_cost(thread_info* t)
1396 {
1397   Int i;
1398 
1399   for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1400     if (!CLG_(current_call_stack).entry[i].jcc) continue;
1401 
1402     /* reset call counters to current for active calls */
1403     CLG_(copy_cost)( CLG_(sets).full,
1404 		    CLG_(current_call_stack).entry[i].enter_cost,
1405 		    CLG_(current_state).cost );
1406     CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1407   }
1408 
1409   CLG_(forall_bbccs)(CLG_(zero_bbcc));
1410 
1411   /* set counter for last dump */
1412   CLG_(copy_cost)( CLG_(sets).full,
1413 		  t->lastdump_cost, CLG_(current_state).cost );
1414 }
1415 
CLG_(zero_all_cost)1416 void CLG_(zero_all_cost)(Bool only_current_thread)
1417 {
1418   if (VG_(clo_verbosity) > 1)
1419     VG_(message)(Vg_DebugMsg, "  Zeroing costs...\n");
1420 
1421   if (only_current_thread)
1422     zero_thread_cost(CLG_(get_current_thread)());
1423   else
1424     CLG_(forall_threads)(zero_thread_cost);
1425 
1426   if (VG_(clo_verbosity) > 1)
1427     VG_(message)(Vg_DebugMsg, "  ...done\n");
1428 }
1429 
1430 static
unwind_thread(thread_info * t)1431 void unwind_thread(thread_info* t)
1432 {
1433   /* unwind signal handlers */
1434   while(CLG_(current_state).sig !=0)
1435     CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1436 
1437   /* unwind regular call stack */
1438   while(CLG_(current_call_stack).sp>0)
1439     CLG_(pop_call_stack)();
1440 
1441   /* reset context and function stack for context generation */
1442   CLG_(init_exec_state)( &CLG_(current_state) );
1443   CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1444 }
1445 
1446 static
zero_state_cost(thread_info * t)1447 void zero_state_cost(thread_info* t)
1448 {
1449     CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1450 }
1451 
CLG_(set_instrument_state)1452 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1453 {
1454   if (CLG_(instrument_state) == state) {
1455     CLG_DEBUG(2, "%s: instrumentation already %s\n",
1456 	     reason, state ? "ON" : "OFF");
1457     return;
1458   }
1459   CLG_(instrument_state) = state;
1460   CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1461 	   reason, state ? "ON" : "OFF");
1462 
1463   VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind");
1464 
1465   /* reset internal state: call stacks, simulator */
1466   CLG_(forall_threads)(unwind_thread);
1467   CLG_(forall_threads)(zero_state_cost);
1468   (*CLG_(cachesim).clear)();
1469 
1470   if (VG_(clo_verbosity) > 1)
1471     VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1472 		 reason, state ? "ON" : "OFF");
1473 }
1474 
1475 /* helper for dump_state_togdb */
dump_state_of_thread_togdb(thread_info * ti)1476 static void dump_state_of_thread_togdb(thread_info* ti)
1477 {
1478     static FullCost sum = 0, tmp = 0;
1479     Int t, i;
1480     BBCC *from, *to;
1481     call_entry* ce;
1482     HChar *mcost;
1483 
1484     t = CLG_(current_tid);
1485     CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1486     CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1487     CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1488 			 ti->states.entry[0]->cost);
1489     CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1490     mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1491     VG_(gdb_printf)("events-%d: %s\n", t, mcost);
1492     VG_(free)(mcost);
1493     VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1494 
1495     ce = 0;
1496     for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1497       ce = CLG_(get_call_entry)(i);
1498       /* if this frame is skipped, we don't have counters */
1499       if (!ce->jcc) continue;
1500 
1501       from = ce->jcc->from;
1502       VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1503       VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1504 
1505       /* FIXME: EventSets! */
1506       CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1507       CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1508       CLG_(add_diff_cost)( CLG_(sets).full, sum,
1509 			  ce->enter_cost, CLG_(current_state).cost );
1510       CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1511 
1512       mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1513       VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost);
1514       VG_(free)(mcost);
1515     }
1516     if (ce && ce->jcc) {
1517       to = ce->jcc->to;
1518       VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1519     }
1520 }
1521 
1522 /* Dump current state */
dump_state_togdb(void)1523 static void dump_state_togdb(void)
1524 {
1525     thread_info** th;
1526     int t;
1527     Int orig_tid = CLG_(current_tid);
1528 
1529     VG_(gdb_printf)("instrumentation: %s\n",
1530 		    CLG_(instrument_state) ? "on":"off");
1531     if (!CLG_(instrument_state)) return;
1532 
1533     VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1534     VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1535     VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1536     VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1537     VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1538     VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1539 
1540     /* "events:" line. Given here because it will be dynamic in the future */
1541     HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1542     VG_(gdb_printf)("events: %s\n", evmap);
1543     VG_(free)(evmap);
1544     /* "part:" line (number of last part. Is 0 at start */
1545     VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1546 
1547     /* threads */
1548     th = CLG_(get_threads)();
1549     VG_(gdb_printf)("threads:");
1550     for(t=1;t<VG_N_THREADS;t++) {
1551 	if (!th[t]) continue;
1552 	VG_(gdb_printf)(" %d", t);
1553     }
1554     VG_(gdb_printf)("\n");
1555     VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1556     CLG_(forall_threads)(dump_state_of_thread_togdb);
1557 }
1558 
1559 
print_monitor_help(void)1560 static void print_monitor_help ( void )
1561 {
1562    VG_(gdb_printf) ("\n");
1563    VG_(gdb_printf) ("callgrind monitor commands:\n");
1564    VG_(gdb_printf) ("  dump [<dump_hint>]\n");
1565    VG_(gdb_printf) ("        dump counters\n");
1566    VG_(gdb_printf) ("  zero\n");
1567    VG_(gdb_printf) ("        zero counters\n");
1568    VG_(gdb_printf) ("  status\n");
1569    VG_(gdb_printf) ("        print status\n");
1570    VG_(gdb_printf) ("  instrumentation [on|off]\n");
1571    VG_(gdb_printf) ("        get/set (if on/off given) instrumentation state\n");
1572    VG_(gdb_printf) ("\n");
1573 }
1574 
1575 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,const HChar * req)1576 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1577 {
1578    HChar* wcmd;
1579    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
1580    HChar *ssaveptr;
1581 
1582    VG_(strcpy) (s, req);
1583 
1584    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1585    switch (VG_(keyword_id) ("help dump zero status instrumentation",
1586                             wcmd, kwd_report_duplicated_matches)) {
1587    case -2: /* multiple matches */
1588       return True;
1589    case -1: /* not found */
1590       return False;
1591    case  0: /* help */
1592       print_monitor_help();
1593       return True;
1594    case  1: { /* dump */
1595       CLG_(dump_profile)(req, False);
1596       return True;
1597    }
1598    case  2: { /* zero */
1599       CLG_(zero_all_cost)(False);
1600       return True;
1601    }
1602 
1603    case 3: { /* status */
1604      HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1605      if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1606        /* internal interface to callgrind_control */
1607        dump_state_togdb();
1608        return True;
1609      }
1610 
1611      if (!CLG_(instrument_state)) {
1612        VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1613      } else {
1614        // Status information to be improved ...
1615        thread_info** th = CLG_(get_threads)();
1616        Int t, tcount = 0;
1617        for(t=1;t<VG_N_THREADS;t++)
1618 	 if (th[t]) tcount++;
1619        VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1620      }
1621      return True;
1622    }
1623 
1624    case 4: { /* instrumentation */
1625      HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1626      if (!arg) {
1627        VG_(gdb_printf)("instrumentation: %s\n",
1628 		       CLG_(instrument_state) ? "on":"off");
1629      }
1630      else
1631        CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1632      return True;
1633    }
1634 
1635    default:
1636       tl_assert(0);
1637       return False;
1638    }
1639 }
1640 
1641 static
CLG_(handle_client_request)1642 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1643 {
1644    if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1645        && VG_USERREQ__GDB_MONITOR_COMMAND   != args[0])
1646       return False;
1647 
1648    switch(args[0]) {
1649    case VG_USERREQ__DUMP_STATS:
1650       CLG_(dump_profile)("Client Request", True);
1651       *ret = 0;                 /* meaningless */
1652       break;
1653 
1654    case VG_USERREQ__DUMP_STATS_AT:
1655      {
1656        const HChar *arg = (HChar*)args[1];
1657        HChar buf[30 + VG_(strlen)(arg)];    // large enough
1658        VG_(sprintf)(buf,"Client Request: %s", arg);
1659        CLG_(dump_profile)(buf, True);
1660        *ret = 0;                 /* meaningless */
1661      }
1662      break;
1663 
1664    case VG_USERREQ__ZERO_STATS:
1665      CLG_(zero_all_cost)(True);
1666       *ret = 0;                 /* meaningless */
1667       break;
1668 
1669    case VG_USERREQ__TOGGLE_COLLECT:
1670      CLG_(current_state).collect = !CLG_(current_state).collect;
1671      CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1672 	      CLG_(current_state).collect ? "ON" : "OFF");
1673      *ret = 0;                 /* meaningless */
1674      break;
1675 
1676    case VG_USERREQ__START_INSTRUMENTATION:
1677      CLG_(set_instrument_state)("Client Request", True);
1678      *ret = 0;                 /* meaningless */
1679      break;
1680 
1681    case VG_USERREQ__STOP_INSTRUMENTATION:
1682      CLG_(set_instrument_state)("Client Request", False);
1683      *ret = 0;                 /* meaningless */
1684      break;
1685 
1686    case VG_USERREQ__GDB_MONITOR_COMMAND: {
1687       Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1688       if (handled)
1689          *ret = 1;
1690       else
1691          *ret = 0;
1692       return handled;
1693    }
1694    default:
1695       return False;
1696    }
1697 
1698    return True;
1699 }
1700 
1701 
1702 /* Syscall Timing */
1703 
1704 /* struct timeval syscalltime[VG_N_THREADS]; */
1705 #if CLG_MICROSYSTIME
1706 ULong *syscalltime;
1707 #else
1708 UInt *syscalltime;
1709 #endif
1710 
1711 static
CLG_(pre_syscalltime)1712 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1713                            UWord* args, UInt nArgs)
1714 {
1715   if (CLG_(clo).collect_systime) {
1716 #if CLG_MICROSYSTIME
1717     struct vki_timeval tv_now;
1718     VG_(gettimeofday)(&tv_now, NULL);
1719     syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1720 #else
1721     syscalltime[tid] = VG_(read_millisecond_timer)();
1722 #endif
1723   }
1724 }
1725 
1726 static
CLG_(post_syscalltime)1727 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1728                             UWord* args, UInt nArgs, SysRes res)
1729 {
1730   if (CLG_(clo).collect_systime &&
1731       CLG_(current_state).bbcc) {
1732       Int o;
1733 #if CLG_MICROSYSTIME
1734     struct vki_timeval tv_now;
1735     ULong diff;
1736 
1737     VG_(gettimeofday)(&tv_now, NULL);
1738     diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1739 #else
1740     UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1741 #endif
1742 
1743     /* offset o is for "SysCount", o+1 for "SysTime" */
1744     o = fullOffset(EG_SYS);
1745     CLG_ASSERT(o>=0);
1746     CLG_DEBUG(0,"   Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
1747               (ULong)diff);
1748 
1749     CLG_(current_state).cost[o] ++;
1750     CLG_(current_state).cost[o+1] += diff;
1751     if (!CLG_(current_state).bbcc->skipped)
1752       CLG_(init_cost_lz)(CLG_(sets).full,
1753 			&(CLG_(current_state).bbcc->skipped));
1754     CLG_(current_state).bbcc->skipped[o] ++;
1755     CLG_(current_state).bbcc->skipped[o+1] += diff;
1756   }
1757 }
1758 
ULong_width(ULong n)1759 static UInt ULong_width(ULong n)
1760 {
1761    UInt w = 0;
1762    while (n > 0) {
1763       n = n / 10;
1764       w++;
1765    }
1766    if (w == 0) w = 1;
1767    return w + (w-1)/3;   // add space for commas
1768 }
1769 
1770 static
branchsim_printstat(int l1,int l2,int l3)1771 void branchsim_printstat(int l1, int l2, int l3)
1772 {
1773     static HChar fmt[128];    // large enough
1774     FullCost total;
1775     ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1776     ULong B_total_b, B_total_mp;
1777 
1778     total = CLG_(total_cost);
1779     Bc_total_b  = total[ fullOffset(EG_BC)   ];
1780     Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1781     Bi_total_b  = total[ fullOffset(EG_BI)   ];
1782     Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1783 
1784     /* Make format string, getting width right for numbers */
1785     VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
1786                  l1, l2, l3);
1787 
1788     if (0 == Bc_total_b)  Bc_total_b = 1;
1789     if (0 == Bi_total_b)  Bi_total_b = 1;
1790     B_total_b  = Bc_total_b  + Bi_total_b;
1791     B_total_mp = Bc_total_mp + Bi_total_mp;
1792 
1793     VG_(umsg)("\n");
1794     VG_(umsg)(fmt, "Branches:     ",
1795               B_total_b, Bc_total_b, Bi_total_b);
1796 
1797     VG_(umsg)(fmt, "Mispredicts:  ",
1798               B_total_mp, Bc_total_mp, Bi_total_mp);
1799 
1800     VG_(umsg)("Mispred rate:  %*.1f%% (%*.1f%%     + %*.1f%%   )\n",
1801               l1, B_total_mp  * 100.0 / B_total_b,
1802               l2, Bc_total_mp * 100.0 / Bc_total_b,
1803               l3, Bi_total_mp * 100.0 / Bi_total_b);
1804 }
1805 
1806 static
clg_print_stats(void)1807 void clg_print_stats(void)
1808 {
1809    int BB_lookups =
1810      CLG_(stat).full_debug_BBs +
1811      CLG_(stat).fn_name_debug_BBs +
1812      CLG_(stat).file_line_debug_BBs +
1813      CLG_(stat).no_debug_BBs;
1814 
1815    /* Hash table stats */
1816    VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1817 		CLG_(stat).distinct_objs);
1818    VG_(message)(Vg_DebugMsg, "Distinct files:   %d\n",
1819 		CLG_(stat).distinct_files);
1820    VG_(message)(Vg_DebugMsg, "Distinct fns:     %d\n",
1821 		CLG_(stat).distinct_fns);
1822    VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1823 		CLG_(stat).distinct_contexts);
1824    VG_(message)(Vg_DebugMsg, "Distinct BBs:     %d\n",
1825 		CLG_(stat).distinct_bbs);
1826    VG_(message)(Vg_DebugMsg, "Cost entries:     %u (Chunks %u)\n",
1827 		CLG_(costarray_entries), CLG_(costarray_chunks));
1828    VG_(message)(Vg_DebugMsg, "Distinct BBCCs:   %d\n",
1829 		CLG_(stat).distinct_bbccs);
1830    VG_(message)(Vg_DebugMsg, "Distinct JCCs:    %d\n",
1831 		CLG_(stat).distinct_jccs);
1832    VG_(message)(Vg_DebugMsg, "Distinct skips:   %d\n",
1833 		CLG_(stat).distinct_skips);
1834    VG_(message)(Vg_DebugMsg, "BB lookups:       %d\n",
1835 		BB_lookups);
1836    if (BB_lookups>0) {
1837       VG_(message)(Vg_DebugMsg, "With full      debug info:%3d%% (%d)\n",
1838 		   CLG_(stat).full_debug_BBs    * 100 / BB_lookups,
1839 		   CLG_(stat).full_debug_BBs);
1840       VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1841 		   CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1842 		   CLG_(stat).file_line_debug_BBs);
1843       VG_(message)(Vg_DebugMsg, "With fn name   debug info:%3d%% (%d)\n",
1844 		   CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1845 		   CLG_(stat).fn_name_debug_BBs);
1846       VG_(message)(Vg_DebugMsg, "With no        debug info:%3d%% (%d)\n",
1847 		   CLG_(stat).no_debug_BBs      * 100 / BB_lookups,
1848 		   CLG_(stat).no_debug_BBs);
1849    }
1850    VG_(message)(Vg_DebugMsg, "BBCC Clones:       %d\n",
1851 		CLG_(stat).bbcc_clones);
1852    VG_(message)(Vg_DebugMsg, "BBs Retranslated:  %d\n",
1853 		CLG_(stat).bb_retranslations);
1854    VG_(message)(Vg_DebugMsg, "Distinct instrs:   %d\n",
1855 		CLG_(stat).distinct_instrs);
1856 
1857    VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1858 		CLG_(stat).cxt_lru_misses);
1859    VG_(message)(Vg_DebugMsg, "LRU BBCC Misses:   %d\n",
1860 		CLG_(stat).bbcc_lru_misses);
1861    VG_(message)(Vg_DebugMsg, "LRU JCC Misses:    %d\n",
1862 		CLG_(stat).jcc_lru_misses);
1863    VG_(message)(Vg_DebugMsg, "BBs Executed:      %llu\n",
1864 		CLG_(stat).bb_executions);
1865    VG_(message)(Vg_DebugMsg, "Calls:             %llu\n",
1866 		CLG_(stat).call_counter);
1867    VG_(message)(Vg_DebugMsg, "CondJMP followed:  %llu\n",
1868 		CLG_(stat).jcnd_counter);
1869    VG_(message)(Vg_DebugMsg, "Boring JMPs:       %llu\n",
1870 		CLG_(stat).jump_counter);
1871    VG_(message)(Vg_DebugMsg, "Recursive calls:   %llu\n",
1872 		CLG_(stat).rec_call_counter);
1873    VG_(message)(Vg_DebugMsg, "Returns:           %llu\n",
1874 		CLG_(stat).ret_counter);
1875 }
1876 
1877 
1878 static
finish(void)1879 void finish(void)
1880 {
1881   HChar fmt[128];    // large enough
1882   Int l1, l2, l3;
1883   FullCost total;
1884 
1885   CLG_DEBUG(0, "finish()\n");
1886 
1887   (*CLG_(cachesim).finish)();
1888 
1889   /* pop all remaining items from CallStack for correct sum
1890    */
1891   CLG_(forall_threads)(unwind_thread);
1892 
1893   CLG_(dump_profile)(0, False);
1894 
1895   if (VG_(clo_verbosity) == 0) return;
1896 
1897   if (VG_(clo_stats)) {
1898     VG_(message)(Vg_DebugMsg, "\n");
1899     clg_print_stats();
1900     VG_(message)(Vg_DebugMsg, "\n");
1901   }
1902 
1903   HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1904   VG_(message)(Vg_UserMsg, "Events    : %s\n", evmap);
1905   VG_(free)(evmap);
1906   HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost));
1907   VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost);
1908   VG_(free)(mcost);
1909   VG_(message)(Vg_UserMsg, "\n");
1910 
1911   /* determine value widths for statistics */
1912   total = CLG_(total_cost);
1913   l1 = ULong_width( total[fullOffset(EG_IR)] );
1914   l2 = l3 = 0;
1915   if (CLG_(clo).simulate_cache) {
1916       l2 = ULong_width( total[fullOffset(EG_DR)] );
1917       l3 = ULong_width( total[fullOffset(EG_DW)] );
1918   }
1919   if (CLG_(clo).simulate_branch) {
1920       int l2b = ULong_width( total[fullOffset(EG_BC)] );
1921       int l3b = ULong_width( total[fullOffset(EG_BI)] );
1922       if (l2b > l2) l2 = l2b;
1923       if (l3b > l3) l3 = l3b;
1924   }
1925 
1926   /* Make format string, getting width right for numbers */
1927   VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1928 
1929   /* Always print this */
1930   VG_(umsg)(fmt, "I   refs:     ", total[fullOffset(EG_IR)] );
1931 
1932   if (CLG_(clo).simulate_cache)
1933       (*CLG_(cachesim).printstat)(l1, l2, l3);
1934 
1935   if (CLG_(clo).simulate_branch)
1936       branchsim_printstat(l1, l2, l3);
1937 
1938 }
1939 
1940 
CLG_(fini)1941 void CLG_(fini)(Int exitcode)
1942 {
1943   finish();
1944 }
1945 
1946 
1947 /*--------------------------------------------------------------------*/
1948 /*--- Setup                                                        ---*/
1949 /*--------------------------------------------------------------------*/
1950 
clg_start_client_code_callback(ThreadId tid,ULong blocks_done)1951 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1952 {
1953    static ULong last_blocks_done = 0;
1954 
1955    if (0)
1956       VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1957 
1958    /* throttle calls to CLG_(run_thread) by number of BBs executed */
1959    if (blocks_done - last_blocks_done < 5000) return;
1960    last_blocks_done = blocks_done;
1961 
1962    CLG_(run_thread)( tid );
1963 }
1964 
1965 static
CLG_(post_clo_init)1966 void CLG_(post_clo_init)(void)
1967 {
1968    if (VG_(clo_vex_control).iropt_register_updates_default
1969        != VexRegUpdSpAtMemAccess) {
1970       CLG_DEBUG(1, " Using user specified value for "
1971                 "--vex-iropt-register-updates\n");
1972    } else {
1973       CLG_DEBUG(1,
1974                 " Using default --vex-iropt-register-updates="
1975                 "sp-at-mem-access\n");
1976    }
1977 
1978    if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) {
1979       CLG_DEBUG(1, " Using user specified value for "
1980                 "--px-file-backed\n");
1981    } else {
1982       CLG_DEBUG(1,
1983                 " Using default --px-file-backed="
1984                 "sp-at-mem-access\n");
1985    }
1986 
1987    if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
1988       VG_(message)(Vg_UserMsg,
1989                    "callgrind only works with --vex-iropt-unroll-thresh=0\n"
1990                    "=> resetting it back to 0\n");
1991       VG_(clo_vex_control).iropt_unroll_thresh = 0;   // cannot be overriden.
1992    }
1993    if (VG_(clo_vex_control).guest_chase_thresh != 0) {
1994       VG_(message)(Vg_UserMsg,
1995                    "callgrind only works with --vex-guest-chase-thresh=0\n"
1996                    "=> resetting it back to 0\n");
1997       VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden.
1998    }
1999 
2000    CLG_DEBUG(1, "  dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
2001    CLG_DEBUG(1, "  call sep. : %d\n", CLG_(clo).separate_callers);
2002    CLG_DEBUG(1, "  rec. sep. : %d\n", CLG_(clo).separate_recursions);
2003 
2004    if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2005        VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2006        CLG_(clo).dump_line = True;
2007    }
2008 
2009    CLG_(init_dumps)();
2010 
2011    (*CLG_(cachesim).post_clo_init)();
2012 
2013    CLG_(init_eventsets)();
2014    CLG_(init_statistics)(& CLG_(stat));
2015    CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2016 
2017    /* initialize hash tables */
2018    CLG_(init_obj_table)();
2019    CLG_(init_cxt_table)();
2020    CLG_(init_bb_hash)();
2021 
2022    CLG_(init_threads)();
2023    CLG_(run_thread)(1);
2024 
2025    CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2026 
2027    if (VG_(clo_verbosity > 0)) {
2028       VG_(message)(Vg_UserMsg,
2029                    "For interactive control, run 'callgrind_control%s%s -h'.\n",
2030                    (VG_(arg_vgdb_prefix) ? " " : ""),
2031                    (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2032    }
2033 }
2034 
2035 static
CLG_(pre_clo_init)2036 void CLG_(pre_clo_init)(void)
2037 {
2038     VG_(details_name)            ("Callgrind");
2039     VG_(details_version)         (NULL);
2040     VG_(details_description)     ("a call-graph generating cache profiler");
2041     VG_(details_copyright_author)("Copyright (C) 2002-2015, and GNU GPL'd, "
2042 				  "by Josef Weidendorfer et al.");
2043     VG_(details_bug_reports_to)  (VG_BUGS_TO);
2044     VG_(details_avg_translation_sizeB) ( 500 );
2045 
2046     VG_(clo_vex_control).iropt_register_updates_default
2047        = VG_(clo_px_file_backed)
2048        = VexRegUpdSpAtMemAccess; // overridable by the user.
2049 
2050     VG_(clo_vex_control).iropt_unroll_thresh = 0;   // cannot be overriden.
2051     VG_(clo_vex_control).guest_chase_thresh = 0;    // cannot be overriden.
2052 
2053     VG_(basic_tool_funcs)        (CLG_(post_clo_init),
2054                                   CLG_(instrument),
2055                                   CLG_(fini));
2056 
2057     VG_(needs_superblock_discards)(clg_discard_superblock_info);
2058 
2059 
2060     VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2061 				    CLG_(print_usage),
2062 				    CLG_(print_debug_usage));
2063 
2064     VG_(needs_client_requests)(CLG_(handle_client_request));
2065     VG_(needs_print_stats)    (clg_print_stats);
2066     VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
2067 			       CLG_(post_syscalltime));
2068 
2069     VG_(track_start_client_code)  ( & clg_start_client_code_callback );
2070     VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2071     VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2072 
2073     CLG_(set_clo_defaults)();
2074 
2075     syscalltime = CLG_MALLOC("cl.main.pci.1",
2076                              VG_N_THREADS * sizeof syscalltime[0]);
2077     for (UInt i = 0; i < VG_N_THREADS; ++i) {
2078        syscalltime[i] = 0;
2079     }
2080 }
2081 
2082 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2083 
2084 /*--------------------------------------------------------------------*/
2085 /*--- end                                                   main.c ---*/
2086 /*--------------------------------------------------------------------*/
2087