1 
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself.            ---*/
4 /*---                                                    cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Cachegrind, a Valgrind tool for cache
9    profiling programs.
10 
11    Copyright (C) 2002-2013 Nicholas Nethercote
12       njn@valgrind.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_tool_basics.h"
33 #include "pub_tool_debuginfo.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcfile.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_libcproc.h"
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_oset.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_xarray.h"
44 #include "pub_tool_clientstate.h"
45 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
46 
47 #include "cg_arch.h"
48 #include "cg_sim.c"
49 #include "cg_branchpred.c"
50 
51 /*------------------------------------------------------------*/
52 /*--- Constants                                            ---*/
53 /*------------------------------------------------------------*/
54 
55 /* Set to 1 for very verbose debugging */
56 #define DEBUG_CG 0
57 
58 /*------------------------------------------------------------*/
59 /*--- Options                                              ---*/
60 /*------------------------------------------------------------*/
61 
62 static Bool  clo_cache_sim  = True;  /* do cache simulation? */
63 static Bool  clo_branch_sim = False; /* do branch simulation? */
64 static const HChar* clo_cachegrind_out_file = "cachegrind.out.%p";
65 
66 /*------------------------------------------------------------*/
67 /*--- Cachesim configuration                               ---*/
68 /*------------------------------------------------------------*/
69 
70 static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
71 
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures                            ---*/
74 /*------------------------------------------------------------*/
75 
76 typedef
77    struct {
78       ULong a;  /* total # memory accesses of this kind */
79       ULong m1; /* misses in the first level cache */
80       ULong mL; /* misses in the second level cache */
81    }
82    CacheCC;
83 
84 typedef
85    struct {
86       ULong b;  /* total # branches of this kind */
87       ULong mp; /* number of branches mispredicted */
88    }
89    BranchCC;
90 
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs.  CC indexing done by file/function/line (as
95 //   determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
97 
98 typedef struct {
99    HChar* file;
100    const HChar* fn;
101    Int    line;
102 }
103 CodeLoc;
104 
105 typedef struct {
106    CodeLoc  loc; /* Source location that these counts pertain to */
107    CacheCC  Ir;  /* Insn read counts */
108    CacheCC  Dr;  /* Data read counts */
109    CacheCC  Dw;  /* Data write/modify counts */
110    BranchCC Bc;  /* Conditional branch counts */
111    BranchCC Bi;  /* Indirect branch counts */
112 } LineCC;
113 
114 // First compare file, then fn, then line.
cmp_CodeLoc_LineCC(const void * vloc,const void * vcc)115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
116 {
117    Word res;
118    const CodeLoc* a = (const CodeLoc*)vloc;
119    const CodeLoc* b = &(((const LineCC*)vcc)->loc);
120 
121    res = VG_(strcmp)(a->file, b->file);
122    if (0 != res)
123       return res;
124 
125    res = VG_(strcmp)(a->fn, b->fn);
126    if (0 != res)
127       return res;
128 
129    return a->line - b->line;
130 }
131 
132 static OSet* CC_table;
133 
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 //   instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 //   CC.  This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
142 
143 typedef struct _InstrInfo InstrInfo;
144 struct _InstrInfo {
145    Addr    instr_addr;
146    UChar   instr_len;
147    LineCC* parent;         // parent line-CC
148 };
149 
150 typedef struct _SB_info SB_info;
151 struct _SB_info {
152    Addr      SB_addr;      // key;  MUST BE FIRST
153    Int       n_instrs;
154    InstrInfo instrs[0];
155 };
156 
157 static OSet* instrInfoTable;
158 
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 //   pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 //   is good when printing the output file at the end.
166 
167 static OSet* stringTable;
168 
169 //------------------------------------------------------------
170 // Stats
171 static Int  distinct_files      = 0;
172 static Int  distinct_fns        = 0;
173 static Int  distinct_lines      = 0;
174 static Int  distinct_instrsGen  = 0;
175 static Int  distinct_instrsNoX  = 0;
176 
177 static Int  full_debugs         = 0;
178 static Int  file_line_debugs    = 0;
179 static Int  fn_debugs           = 0;
180 static Int  no_debugs           = 0;
181 
182 /*------------------------------------------------------------*/
183 /*--- String table operations                              ---*/
184 /*------------------------------------------------------------*/
185 
stringCmp(const void * key,const void * elem)186 static Word stringCmp( const void* key, const void* elem )
187 {
188    return VG_(strcmp)(*(const HChar *const *)key, *(const HChar *const *)elem);
189 }
190 
191 // Get a permanent string;  either pull it out of the string table if it's
192 // been encountered before, or dup it and put it into the string table.
get_perm_string(const HChar * s)193 static HChar* get_perm_string(const HChar* s)
194 {
195    HChar** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
196    if (s_ptr) {
197       return *s_ptr;
198    } else {
199       HChar** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(HChar*));
200       *s_node = VG_(strdup)("cg.main.gps.1", s);
201       VG_(OSetGen_Insert)(stringTable, s_node);
202       return *s_node;
203    }
204 }
205 
206 /*------------------------------------------------------------*/
207 /*--- CC table operations                                  ---*/
208 /*------------------------------------------------------------*/
209 
get_debug_info(Addr instr_addr,const HChar ** dir,const HChar ** file,const HChar ** fn,UInt * line)210 static void get_debug_info(Addr instr_addr, const HChar **dir,
211                            const HChar **file, const HChar **fn, UInt* line)
212 {
213    Bool found_file_line = VG_(get_filename_linenum)(
214                              instr_addr,
215                              file, dir,
216                              line
217                           );
218    Bool found_fn        = VG_(get_fnname)(instr_addr, fn);
219 
220    if (!found_file_line) {
221       *file = "???";
222       *line = 0;
223    }
224    if (!found_fn) {
225       *fn = "???";
226    }
227 
228    if (found_file_line) {
229       if (found_fn) full_debugs++;
230       else          file_line_debugs++;
231    } else {
232       if (found_fn) fn_debugs++;
233       else          no_debugs++;
234    }
235 }
236 
237 // Do a three step traversal: by file, then fn, then line.
238 // Returns a pointer to the line CC, creates a new one if necessary.
get_lineCC(Addr origAddr)239 static LineCC* get_lineCC(Addr origAddr)
240 {
241    const HChar *fn, *file, *dir;
242    UInt    line;
243    CodeLoc loc;
244    LineCC* lineCC;
245 
246    get_debug_info(origAddr, &dir, &file, &fn, &line);
247 
248    // Form an absolute pathname if a directory is available
249    HChar absfile[VG_(strlen)(dir) + 1 + VG_(strlen)(file) + 1];
250 
251    if (dir[0]) {
252       VG_(sprintf)(absfile, "%s/%s", dir, file);
253    } else {
254       VG_(sprintf)(absfile, "%s", file);
255    }
256 
257    loc.file = absfile;
258    loc.fn   = fn;
259    loc.line = line;
260 
261    lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
262    if (!lineCC) {
263       // Allocate and zero a new node.
264       lineCC           = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
265       lineCC->loc.file = get_perm_string(loc.file);
266       lineCC->loc.fn   = get_perm_string(loc.fn);
267       lineCC->loc.line = loc.line;
268       lineCC->Ir.a     = 0;
269       lineCC->Ir.m1    = 0;
270       lineCC->Ir.mL    = 0;
271       lineCC->Dr.a     = 0;
272       lineCC->Dr.m1    = 0;
273       lineCC->Dr.mL    = 0;
274       lineCC->Dw.a     = 0;
275       lineCC->Dw.m1    = 0;
276       lineCC->Dw.mL    = 0;
277       lineCC->Bc.b     = 0;
278       lineCC->Bc.mp    = 0;
279       lineCC->Bi.b     = 0;
280       lineCC->Bi.mp    = 0;
281       VG_(OSetGen_Insert)(CC_table, lineCC);
282    }
283 
284    return lineCC;
285 }
286 
287 /*------------------------------------------------------------*/
288 /*--- Cache simulation functions                           ---*/
289 /*------------------------------------------------------------*/
290 
291 /* A common case for an instruction read event is that the
292  * bytes read belong to the same cache line in both L1I and LL
293  * (if cache line sizes of L1 and LL are the same).
294  * As this can be detected at instrumentation time, and results
295  * in faster simulation, special-casing is benefical.
296  *
297  * Abbrevations used in var/function names:
298  *  IrNoX - instruction read does not cross cache lines
299  *  IrGen - generic instruction read; not detected as IrNoX
300  *  Ir    - not known / not important whether it is an IrNoX
301  */
302 
303 // Only used with --cache-sim=no.
304 static VG_REGPARM(1)
log_1Ir(InstrInfo * n)305 void log_1Ir(InstrInfo* n)
306 {
307    n->parent->Ir.a++;
308 }
309 
310 // Only used with --cache-sim=no.
311 static VG_REGPARM(2)
log_2Ir(InstrInfo * n,InstrInfo * n2)312 void log_2Ir(InstrInfo* n, InstrInfo* n2)
313 {
314    n->parent->Ir.a++;
315    n2->parent->Ir.a++;
316 }
317 
318 // Only used with --cache-sim=no.
319 static VG_REGPARM(3)
log_3Ir(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)320 void log_3Ir(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
321 {
322    n->parent->Ir.a++;
323    n2->parent->Ir.a++;
324    n3->parent->Ir.a++;
325 }
326 
327 // Generic case for instruction reads: may cross cache lines.
328 // All other Ir handlers expect IrNoX instruction reads.
329 static VG_REGPARM(1)
log_1IrGen_0D_cache_access(InstrInfo * n)330 void log_1IrGen_0D_cache_access(InstrInfo* n)
331 {
332    //VG_(printf)("1IrGen_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
333    //             n, n->instr_addr, n->instr_len);
334    cachesim_I1_doref_Gen(n->instr_addr, n->instr_len,
335 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
336    n->parent->Ir.a++;
337 }
338 
339 static VG_REGPARM(1)
log_1IrNoX_0D_cache_access(InstrInfo * n)340 void log_1IrNoX_0D_cache_access(InstrInfo* n)
341 {
342    //VG_(printf)("1IrNoX_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
343    //             n, n->instr_addr, n->instr_len);
344    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
345 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
346    n->parent->Ir.a++;
347 }
348 
349 static VG_REGPARM(2)
log_2IrNoX_0D_cache_access(InstrInfo * n,InstrInfo * n2)350 void log_2IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2)
351 {
352    //VG_(printf)("2IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
353    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
354    //            n,  n->instr_addr,  n->instr_len,
355    //            n2, n2->instr_addr, n2->instr_len);
356    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
357 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
358    n->parent->Ir.a++;
359    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
360 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
361    n2->parent->Ir.a++;
362 }
363 
364 static VG_REGPARM(3)
log_3IrNoX_0D_cache_access(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)365 void log_3IrNoX_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
366 {
367    //VG_(printf)("3IrNoX_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
368    //            "            CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
369    //            "            CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
370    //            n,  n->instr_addr,  n->instr_len,
371    //            n2, n2->instr_addr, n2->instr_len,
372    //            n3, n3->instr_addr, n3->instr_len);
373    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
374 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
375    n->parent->Ir.a++;
376    cachesim_I1_doref_NoX(n2->instr_addr, n2->instr_len,
377 			 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
378    n2->parent->Ir.a++;
379    cachesim_I1_doref_NoX(n3->instr_addr, n3->instr_len,
380 			 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
381    n3->parent->Ir.a++;
382 }
383 
384 static VG_REGPARM(3)
log_1IrNoX_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)385 void log_1IrNoX_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
386 {
387    //VG_(printf)("1IrNoX_1Dr:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
388    //            "                               daddr=0x%010lx,  dsize=%lu\n",
389    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
390    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
391 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
392    n->parent->Ir.a++;
393 
394    cachesim_D1_doref(data_addr, data_size,
395                      &n->parent->Dr.m1, &n->parent->Dr.mL);
396    n->parent->Dr.a++;
397 }
398 
399 static VG_REGPARM(3)
log_1IrNoX_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)400 void log_1IrNoX_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
401 {
402    //VG_(printf)("1IrNoX_1Dw:  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n"
403    //            "                               daddr=0x%010lx,  dsize=%lu\n",
404    //            n, n->instr_addr, n->instr_len, data_addr, data_size);
405    cachesim_I1_doref_NoX(n->instr_addr, n->instr_len,
406 			 &n->parent->Ir.m1, &n->parent->Ir.mL);
407    n->parent->Ir.a++;
408 
409    cachesim_D1_doref(data_addr, data_size,
410                      &n->parent->Dw.m1, &n->parent->Dw.mL);
411    n->parent->Dw.a++;
412 }
413 
414 /* Note that addEvent_D_guarded assumes that log_0Ir_1Dr_cache_access
415    and log_0Ir_1Dw_cache_access have exactly the same prototype.  If
416    you change them, you must change addEvent_D_guarded too. */
417 static VG_REGPARM(3)
log_0Ir_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)418 void log_0Ir_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
419 {
420    //VG_(printf)("0Ir_1Dr:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
421    //            n, data_addr, data_size);
422    cachesim_D1_doref(data_addr, data_size,
423                      &n->parent->Dr.m1, &n->parent->Dr.mL);
424    n->parent->Dr.a++;
425 }
426 
427 /* See comment on log_0Ir_1Dr_cache_access. */
428 static VG_REGPARM(3)
log_0Ir_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)429 void log_0Ir_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
430 {
431    //VG_(printf)("0Ir_1Dw:  CCaddr=0x%010lx,  daddr=0x%010lx,  dsize=%lu\n",
432    //            n, data_addr, data_size);
433    cachesim_D1_doref(data_addr, data_size,
434                      &n->parent->Dw.m1, &n->parent->Dw.mL);
435    n->parent->Dw.a++;
436 }
437 
438 /* For branches, we consult two different predictors, one which
439    predicts taken/untaken for conditional branches, and the other
440    which predicts the branch target address for indirect branches
441    (jump-to-register style ones). */
442 
443 static VG_REGPARM(2)
log_cond_branch(InstrInfo * n,Word taken)444 void log_cond_branch(InstrInfo* n, Word taken)
445 {
446    //VG_(printf)("cbrnch:  CCaddr=0x%010lx,  taken=0x%010lx\n",
447    //             n, taken);
448    n->parent->Bc.b++;
449    n->parent->Bc.mp
450       += (1 & do_cond_branch_predict(n->instr_addr, taken));
451 }
452 
453 static VG_REGPARM(2)
log_ind_branch(InstrInfo * n,UWord actual_dst)454 void log_ind_branch(InstrInfo* n, UWord actual_dst)
455 {
456    //VG_(printf)("ibrnch:  CCaddr=0x%010lx,    dst=0x%010lx\n",
457    //             n, actual_dst);
458    n->parent->Bi.b++;
459    n->parent->Bi.mp
460       += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
461 }
462 
463 
464 /*------------------------------------------------------------*/
465 /*--- Instrumentation types and structures                 ---*/
466 /*------------------------------------------------------------*/
467 
468 /* Maintain an ordered list of memory events which are outstanding, in
469    the sense that no IR has yet been generated to do the relevant
470    helper calls.  The BB is scanned top to bottom and memory events
471    are added to the end of the list, merging with the most recent
472    notified event where possible (Dw immediately following Dr and
473    having the same size and EA can be merged).
474 
475    This merging is done so that for architectures which have
476    load-op-store instructions (x86, amd64), the insn is treated as if
477    it makes just one memory reference (a modify), rather than two (a
478    read followed by a write at the same address).
479 
480    At various points the list will need to be flushed, that is, IR
481    generated from it.  That must happen before any possible exit from
482    the block (the end, or an IRStmt_Exit).  Flushing also takes place
483    when there is no space to add a new event.
484 
485    If we require the simulation statistics to be up to date with
486    respect to possible memory exceptions, then the list would have to
487    be flushed before each memory reference.  That would however lose
488    performance by inhibiting event-merging during flushing.
489 
490    Flushing the list consists of walking it start to end and emitting
491    instrumentation IR for each event, in the order in which they
492    appear.  It may be possible to emit a single call for two adjacent
493    events in order to reduce the number of helper function calls made.
494    For example, it could well be profitable to handle two adjacent Ir
495    events with a single helper call.  */
496 
497 typedef
498    IRExpr
499    IRAtom;
500 
501 typedef
502    enum {
503       Ev_IrNoX,  // Instruction read not crossing cache lines
504       Ev_IrGen,  // Generic Ir, not being detected as IrNoX
505       Ev_Dr,     // Data read
506       Ev_Dw,     // Data write
507       Ev_Dm,     // Data modify (read then write)
508       Ev_Bc,     // branch conditional
509       Ev_Bi      // branch indirect (to unknown destination)
510    }
511    EventTag;
512 
513 typedef
514    struct {
515       EventTag   tag;
516       InstrInfo* inode;
517       union {
518          struct {
519          } IrGen;
520          struct {
521          } IrNoX;
522          struct {
523             IRAtom* ea;
524             Int     szB;
525          } Dr;
526          struct {
527             IRAtom* ea;
528             Int     szB;
529          } Dw;
530          struct {
531             IRAtom* ea;
532             Int     szB;
533          } Dm;
534          struct {
535             IRAtom* taken; /* :: Ity_I1 */
536          } Bc;
537          struct {
538             IRAtom* dst;
539          } Bi;
540       } Ev;
541    }
542    Event;
543 
init_Event(Event * ev)544 static void init_Event ( Event* ev ) {
545    VG_(memset)(ev, 0, sizeof(Event));
546 }
547 
get_Event_dea(Event * ev)548 static IRAtom* get_Event_dea ( Event* ev ) {
549    switch (ev->tag) {
550       case Ev_Dr: return ev->Ev.Dr.ea;
551       case Ev_Dw: return ev->Ev.Dw.ea;
552       case Ev_Dm: return ev->Ev.Dm.ea;
553       default:    tl_assert(0);
554    }
555 }
556 
get_Event_dszB(Event * ev)557 static Int get_Event_dszB ( Event* ev ) {
558    switch (ev->tag) {
559       case Ev_Dr: return ev->Ev.Dr.szB;
560       case Ev_Dw: return ev->Ev.Dw.szB;
561       case Ev_Dm: return ev->Ev.Dm.szB;
562       default:    tl_assert(0);
563    }
564 }
565 
566 
567 /* Up to this many unnotified events are allowed.  Number is
568    arbitrary.  Larger numbers allow more event merging to occur, but
569    potentially induce more spilling due to extending live ranges of
570    address temporaries. */
571 #define N_EVENTS 16
572 
573 
574 /* A struct which holds all the running state during instrumentation.
575    Mostly to avoid passing loads of parameters everywhere. */
576 typedef
577    struct {
578       /* The current outstanding-memory-event list. */
579       Event events[N_EVENTS];
580       Int   events_used;
581 
582       /* The array of InstrInfo bins for the BB. */
583       SB_info* sbInfo;
584 
585       /* Number InstrInfo bins 'used' so far. */
586       Int sbInfo_i;
587 
588       /* The output SB being constructed. */
589       IRSB* sbOut;
590    }
591    CgState;
592 
593 
594 /*------------------------------------------------------------*/
595 /*--- Instrumentation main                                 ---*/
596 /*------------------------------------------------------------*/
597 
598 // Note that origAddr is the real origAddr, not the address of the first
599 // instruction in the block (they can be different due to redirection).
600 static
get_SB_info(IRSB * sbIn,Addr origAddr)601 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
602 {
603    Int      i, n_instrs;
604    IRStmt*  st;
605    SB_info* sbInfo;
606 
607    // Count number of original instrs in SB
608    n_instrs = 0;
609    for (i = 0; i < sbIn->stmts_used; i++) {
610       st = sbIn->stmts[i];
611       if (Ist_IMark == st->tag) n_instrs++;
612    }
613 
614    // Check that we don't have an entry for this BB in the instr-info table.
615    // If this assertion fails, there has been some screwup:  some
616    // translations must have been discarded but Cachegrind hasn't discarded
617    // the corresponding entries in the instr-info table.
618    sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
619    tl_assert(NULL == sbInfo);
620 
621    // BB never translated before (at this address, at least;  could have
622    // been unloaded and then reloaded elsewhere in memory)
623    sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
624                                 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
625    sbInfo->SB_addr  = origAddr;
626    sbInfo->n_instrs = n_instrs;
627    VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
628 
629    return sbInfo;
630 }
631 
632 
showEvent(Event * ev)633 static void showEvent ( Event* ev )
634 {
635    switch (ev->tag) {
636       case Ev_IrGen:
637          VG_(printf)("IrGen %p\n", ev->inode);
638          break;
639       case Ev_IrNoX:
640          VG_(printf)("IrNoX %p\n", ev->inode);
641          break;
642       case Ev_Dr:
643          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
644          ppIRExpr(ev->Ev.Dr.ea);
645          VG_(printf)("\n");
646          break;
647       case Ev_Dw:
648          VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
649          ppIRExpr(ev->Ev.Dw.ea);
650          VG_(printf)("\n");
651          break;
652       case Ev_Dm:
653          VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
654          ppIRExpr(ev->Ev.Dm.ea);
655          VG_(printf)("\n");
656          break;
657       case Ev_Bc:
658          VG_(printf)("Bc %p   GA=", ev->inode);
659          ppIRExpr(ev->Ev.Bc.taken);
660          VG_(printf)("\n");
661          break;
662       case Ev_Bi:
663          VG_(printf)("Bi %p  DST=", ev->inode);
664          ppIRExpr(ev->Ev.Bi.dst);
665          VG_(printf)("\n");
666          break;
667       default:
668          tl_assert(0);
669          break;
670    }
671 }
672 
673 // Reserve and initialise an InstrInfo for the first mention of a new insn.
674 static
setup_InstrInfo(CgState * cgs,Addr instr_addr,UInt instr_len)675 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
676 {
677    InstrInfo* i_node;
678    tl_assert(cgs->sbInfo_i >= 0);
679    tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
680    i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
681    i_node->instr_addr = instr_addr;
682    i_node->instr_len  = instr_len;
683    i_node->parent     = get_lineCC(instr_addr);
684    cgs->sbInfo_i++;
685    return i_node;
686 }
687 
688 
689 /* Generate code for all outstanding memory events, and mark the queue
690    empty.  Code is generated into cgs->bbOut, and this activity
691    'consumes' slots in cgs->sbInfo. */
692 
flushEvents(CgState * cgs)693 static void flushEvents ( CgState* cgs )
694 {
695    Int        i, regparms;
696    const HChar* helperName;
697    void*      helperAddr;
698    IRExpr**   argv;
699    IRExpr*    i_node_expr;
700    IRDirty*   di;
701    Event*     ev;
702    Event*     ev2;
703    Event*     ev3;
704 
705    i = 0;
706    while (i < cgs->events_used) {
707 
708       helperName = NULL;
709       helperAddr = NULL;
710       argv       = NULL;
711       regparms   = 0;
712 
713       /* generate IR to notify event i and possibly the ones
714          immediately following it. */
715       tl_assert(i >= 0 && i < cgs->events_used);
716 
717       ev  = &cgs->events[i];
718       ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
719       ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
720 
721       if (DEBUG_CG) {
722          VG_(printf)("   flush ");
723          showEvent( ev );
724       }
725 
726       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
727 
728       /* Decide on helper fn to call and args to pass it, and advance
729          i appropriately. */
730       switch (ev->tag) {
731          case Ev_IrNoX:
732             /* Merge an IrNoX with a following Dr/Dm. */
733             if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
734                /* Why is this true?  It's because we're merging an Ir
735                   with a following Dr or Dm.  The Ir derives from the
736                   instruction's IMark and the Dr/Dm from data
737                   references which follow it.  In short it holds
738                   because each insn starts with an IMark, hence an
739                   Ev_Ir, and so these Dr/Dm must pertain to the
740                   immediately preceding Ir.  Same applies to analogous
741                   assertions in the subsequent cases. */
742                tl_assert(ev2->inode == ev->inode);
743                helperName = "log_1IrNoX_1Dr_cache_access";
744                helperAddr = &log_1IrNoX_1Dr_cache_access;
745                argv = mkIRExprVec_3( i_node_expr,
746                                      get_Event_dea(ev2),
747                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
748                regparms = 3;
749                i += 2;
750             }
751             /* Merge an IrNoX with a following Dw. */
752             else
753             if (ev2 && ev2->tag == Ev_Dw) {
754                tl_assert(ev2->inode == ev->inode);
755                helperName = "log_1IrNoX_1Dw_cache_access";
756                helperAddr = &log_1IrNoX_1Dw_cache_access;
757                argv = mkIRExprVec_3( i_node_expr,
758                                      get_Event_dea(ev2),
759                                      mkIRExpr_HWord( get_Event_dszB(ev2) ) );
760                regparms = 3;
761                i += 2;
762             }
763             /* Merge an IrNoX with two following IrNoX's. */
764             else
765             if (ev2 && ev3 && ev2->tag == Ev_IrNoX && ev3->tag == Ev_IrNoX)
766             {
767                if (clo_cache_sim) {
768                   helperName = "log_3IrNoX_0D_cache_access";
769                   helperAddr = &log_3IrNoX_0D_cache_access;
770                } else {
771                   helperName = "log_3Ir";
772                   helperAddr = &log_3Ir;
773                }
774                argv = mkIRExprVec_3( i_node_expr,
775                                      mkIRExpr_HWord( (HWord)ev2->inode ),
776                                      mkIRExpr_HWord( (HWord)ev3->inode ) );
777                regparms = 3;
778                i += 3;
779             }
780             /* Merge an IrNoX with one following IrNoX. */
781             else
782             if (ev2 && ev2->tag == Ev_IrNoX) {
783                if (clo_cache_sim) {
784                   helperName = "log_2IrNoX_0D_cache_access";
785                   helperAddr = &log_2IrNoX_0D_cache_access;
786                } else {
787                   helperName = "log_2Ir";
788                   helperAddr = &log_2Ir;
789                }
790                argv = mkIRExprVec_2( i_node_expr,
791                                      mkIRExpr_HWord( (HWord)ev2->inode ) );
792                regparms = 2;
793                i += 2;
794             }
795             /* No merging possible; emit as-is. */
796             else {
797                if (clo_cache_sim) {
798                   helperName = "log_1IrNoX_0D_cache_access";
799                   helperAddr = &log_1IrNoX_0D_cache_access;
800                } else {
801                   helperName = "log_1Ir";
802                   helperAddr = &log_1Ir;
803                }
804                argv = mkIRExprVec_1( i_node_expr );
805                regparms = 1;
806                i++;
807             }
808             break;
809          case Ev_IrGen:
810             if (clo_cache_sim) {
811 	       helperName = "log_1IrGen_0D_cache_access";
812 	       helperAddr = &log_1IrGen_0D_cache_access;
813 	    } else {
814 	       helperName = "log_1Ir";
815 	       helperAddr = &log_1Ir;
816 	    }
817 	    argv = mkIRExprVec_1( i_node_expr );
818 	    regparms = 1;
819 	    i++;
820             break;
821          case Ev_Dr:
822          case Ev_Dm:
823             /* Data read or modify */
824             helperName = "log_0Ir_1Dr_cache_access";
825             helperAddr = &log_0Ir_1Dr_cache_access;
826             argv = mkIRExprVec_3( i_node_expr,
827                                   get_Event_dea(ev),
828                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
829             regparms = 3;
830             i++;
831             break;
832          case Ev_Dw:
833             /* Data write */
834             helperName = "log_0Ir_1Dw_cache_access";
835             helperAddr = &log_0Ir_1Dw_cache_access;
836             argv = mkIRExprVec_3( i_node_expr,
837                                   get_Event_dea(ev),
838                                   mkIRExpr_HWord( get_Event_dszB(ev) ) );
839             regparms = 3;
840             i++;
841             break;
842          case Ev_Bc:
843             /* Conditional branch */
844             helperName = "log_cond_branch";
845             helperAddr = &log_cond_branch;
846             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
847             regparms = 2;
848             i++;
849             break;
850          case Ev_Bi:
851             /* Branch to an unknown destination */
852             helperName = "log_ind_branch";
853             helperAddr = &log_ind_branch;
854             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
855             regparms = 2;
856             i++;
857             break;
858          default:
859             tl_assert(0);
860       }
861 
862       /* Add the helper. */
863       tl_assert(helperName);
864       tl_assert(helperAddr);
865       tl_assert(argv);
866       di = unsafeIRDirty_0_N( regparms,
867                               helperName, VG_(fnptr_to_fnentry)( helperAddr ),
868                               argv );
869       addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
870    }
871 
872    cgs->events_used = 0;
873 }
874 
addEvent_Ir(CgState * cgs,InstrInfo * inode)875 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
876 {
877    Event* evt;
878    if (cgs->events_used == N_EVENTS)
879       flushEvents(cgs);
880    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
881    evt = &cgs->events[cgs->events_used];
882    init_Event(evt);
883    evt->inode    = inode;
884    if (cachesim_is_IrNoX(inode->instr_addr, inode->instr_len)) {
885       evt->tag = Ev_IrNoX;
886       distinct_instrsNoX++;
887    } else {
888       evt->tag = Ev_IrGen;
889       distinct_instrsGen++;
890    }
891    cgs->events_used++;
892 }
893 
894 static
addEvent_Dr(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)895 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
896 {
897    Event* evt;
898    tl_assert(isIRAtom(ea));
899    tl_assert(datasize >= 1 && datasize <= min_line_size);
900    if (!clo_cache_sim)
901       return;
902    if (cgs->events_used == N_EVENTS)
903       flushEvents(cgs);
904    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
905    evt = &cgs->events[cgs->events_used];
906    init_Event(evt);
907    evt->tag       = Ev_Dr;
908    evt->inode     = inode;
909    evt->Ev.Dr.szB = datasize;
910    evt->Ev.Dr.ea  = ea;
911    cgs->events_used++;
912 }
913 
914 static
addEvent_Dw(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)915 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
916 {
917    Event* lastEvt;
918    Event* evt;
919 
920    tl_assert(isIRAtom(ea));
921    tl_assert(datasize >= 1 && datasize <= min_line_size);
922 
923    if (!clo_cache_sim)
924       return;
925 
926    /* Is it possible to merge this write with the preceding read? */
927    lastEvt = &cgs->events[cgs->events_used-1];
928    if (cgs->events_used > 0
929        && lastEvt->tag       == Ev_Dr
930        && lastEvt->Ev.Dr.szB == datasize
931        && lastEvt->inode     == inode
932        && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
933    {
934       lastEvt->tag   = Ev_Dm;
935       return;
936    }
937 
938    /* No.  Add as normal. */
939    if (cgs->events_used == N_EVENTS)
940       flushEvents(cgs);
941    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
942    evt = &cgs->events[cgs->events_used];
943    init_Event(evt);
944    evt->tag       = Ev_Dw;
945    evt->inode     = inode;
946    evt->Ev.Dw.szB = datasize;
947    evt->Ev.Dw.ea  = ea;
948    cgs->events_used++;
949 }
950 
951 static
addEvent_D_guarded(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea,IRAtom * guard,Bool isWrite)952 void addEvent_D_guarded ( CgState* cgs, InstrInfo* inode,
953                           Int datasize, IRAtom* ea, IRAtom* guard,
954                           Bool isWrite )
955 {
956    tl_assert(isIRAtom(ea));
957    tl_assert(guard);
958    tl_assert(isIRAtom(guard));
959    tl_assert(datasize >= 1 && datasize <= min_line_size);
960 
961    if (!clo_cache_sim)
962       return;
963 
964    /* Adding guarded memory actions and merging them with the existing
965       queue is too complex.  Simply flush the queue and add this
966       action immediately.  Since guarded loads and stores are pretty
967       rare, this is not thought likely to cause any noticeable
968       performance loss as a result of the loss of event-merging
969       opportunities. */
970    tl_assert(cgs->events_used >= 0);
971    flushEvents(cgs);
972    tl_assert(cgs->events_used == 0);
973    /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
974    IRExpr*      i_node_expr;
975    const HChar* helperName;
976    void*        helperAddr;
977    IRExpr**     argv;
978    Int          regparms;
979    IRDirty*     di;
980    i_node_expr = mkIRExpr_HWord( (HWord)inode );
981    helperName  = isWrite ? "log_0Ir_1Dw_cache_access"
982                          : "log_0Ir_1Dr_cache_access";
983    helperAddr  = isWrite ? &log_0Ir_1Dw_cache_access
984                          : &log_0Ir_1Dr_cache_access;
985    argv        = mkIRExprVec_3( i_node_expr,
986                                 ea, mkIRExpr_HWord( datasize ) );
987    regparms    = 3;
988    di          = unsafeIRDirty_0_N(
989                     regparms,
990                     helperName, VG_(fnptr_to_fnentry)( helperAddr ),
991                     argv );
992    di->guard = guard;
993    addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
994 }
995 
996 
997 static
addEvent_Bc(CgState * cgs,InstrInfo * inode,IRAtom * guard)998 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
999 {
1000    Event* evt;
1001    tl_assert(isIRAtom(guard));
1002    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
1003              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1004    if (!clo_branch_sim)
1005       return;
1006    if (cgs->events_used == N_EVENTS)
1007       flushEvents(cgs);
1008    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1009    evt = &cgs->events[cgs->events_used];
1010    init_Event(evt);
1011    evt->tag         = Ev_Bc;
1012    evt->inode       = inode;
1013    evt->Ev.Bc.taken = guard;
1014    cgs->events_used++;
1015 }
1016 
1017 static
addEvent_Bi(CgState * cgs,InstrInfo * inode,IRAtom * whereTo)1018 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
1019 {
1020    Event* evt;
1021    tl_assert(isIRAtom(whereTo));
1022    tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
1023              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
1024    if (!clo_branch_sim)
1025       return;
1026    if (cgs->events_used == N_EVENTS)
1027       flushEvents(cgs);
1028    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
1029    evt = &cgs->events[cgs->events_used];
1030    init_Event(evt);
1031    evt->tag       = Ev_Bi;
1032    evt->inode     = inode;
1033    evt->Ev.Bi.dst = whereTo;
1034    cgs->events_used++;
1035 }
1036 
1037 ////////////////////////////////////////////////////////////
1038 
1039 
1040 static
cg_instrument(VgCallbackClosure * closure,IRSB * sbIn,const VexGuestLayout * layout,const VexGuestExtents * vge,const VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)1041 IRSB* cg_instrument ( VgCallbackClosure* closure,
1042                       IRSB* sbIn,
1043                       const VexGuestLayout* layout,
1044                       const VexGuestExtents* vge,
1045                       const VexArchInfo* archinfo_host,
1046                       IRType gWordTy, IRType hWordTy )
1047 {
1048    Int        i;
1049    UInt       isize;
1050    IRStmt*    st;
1051    Addr       cia; /* address of current insn */
1052    CgState    cgs;
1053    IRTypeEnv* tyenv = sbIn->tyenv;
1054    InstrInfo* curr_inode = NULL;
1055 
1056    if (gWordTy != hWordTy) {
1057       /* We don't currently support this case. */
1058       VG_(tool_panic)("host/guest word size mismatch");
1059    }
1060 
1061    // Set up new SB
1062    cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
1063 
1064    // Copy verbatim any IR preamble preceding the first IMark
1065    i = 0;
1066    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1067       addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
1068       i++;
1069    }
1070 
1071    // Get the first statement, and initial cia from it
1072    tl_assert(sbIn->stmts_used > 0);
1073    tl_assert(i < sbIn->stmts_used);
1074    st = sbIn->stmts[i];
1075    tl_assert(Ist_IMark == st->tag);
1076 
1077    cia   = st->Ist.IMark.addr;
1078    isize = st->Ist.IMark.len;
1079    // If Vex fails to decode an instruction, the size will be zero.
1080    // Pretend otherwise.
1081    if (isize == 0) isize = VG_MIN_INSTR_SZB;
1082 
1083    // Set up running state and get block info
1084    tl_assert(closure->readdr == vge->base[0]);
1085    cgs.events_used = 0;
1086    cgs.sbInfo      = get_SB_info(sbIn, (Addr)closure->readdr);
1087    cgs.sbInfo_i    = 0;
1088 
1089    if (DEBUG_CG)
1090       VG_(printf)("\n\n---------- cg_instrument ----------\n");
1091 
1092    // Traverse the block, initialising inodes, adding events and flushing as
1093    // necessary.
1094    for (/*use current i*/; i < sbIn->stmts_used; i++) {
1095 
1096       st = sbIn->stmts[i];
1097       tl_assert(isFlatIRStmt(st));
1098 
1099       switch (st->tag) {
1100          case Ist_NoOp:
1101          case Ist_AbiHint:
1102          case Ist_Put:
1103          case Ist_PutI:
1104          case Ist_MBE:
1105             break;
1106 
1107          case Ist_IMark:
1108             cia   = st->Ist.IMark.addr;
1109             isize = st->Ist.IMark.len;
1110 
1111             // If Vex fails to decode an instruction, the size will be zero.
1112             // Pretend otherwise.
1113             if (isize == 0) isize = VG_MIN_INSTR_SZB;
1114 
1115             // Sanity-check size.
1116             tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1117                      || VG_CLREQ_SZB == isize );
1118 
1119             // Get space for and init the inode, record it as the current one.
1120             // Subsequent Dr/Dw/Dm events from the same instruction will
1121             // also use it.
1122             curr_inode = setup_InstrInfo(&cgs, cia, isize);
1123 
1124             addEvent_Ir( &cgs, curr_inode );
1125             break;
1126 
1127          case Ist_WrTmp: {
1128             IRExpr* data = st->Ist.WrTmp.data;
1129             if (data->tag == Iex_Load) {
1130                IRExpr* aexpr = data->Iex.Load.addr;
1131                // Note also, endianness info is ignored.  I guess
1132                // that's not interesting.
1133                addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1134                                   aexpr );
1135             }
1136             break;
1137          }
1138 
1139          case Ist_Store: {
1140             IRExpr* data  = st->Ist.Store.data;
1141             IRExpr* aexpr = st->Ist.Store.addr;
1142             addEvent_Dw( &cgs, curr_inode,
1143                          sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1144             break;
1145          }
1146 
1147          case Ist_StoreG: {
1148             IRStoreG* sg   = st->Ist.StoreG.details;
1149             IRExpr*   data = sg->data;
1150             IRExpr*   addr = sg->addr;
1151             IRType    type = typeOfIRExpr(tyenv, data);
1152             tl_assert(type != Ity_INVALID);
1153             addEvent_D_guarded( &cgs, curr_inode,
1154                                 sizeofIRType(type), addr, sg->guard,
1155                                 True/*isWrite*/ );
1156             break;
1157          }
1158 
1159          case Ist_LoadG: {
1160             IRLoadG* lg       = st->Ist.LoadG.details;
1161             IRType   type     = Ity_INVALID; /* loaded type */
1162             IRType   typeWide = Ity_INVALID; /* after implicit widening */
1163             IRExpr*  addr     = lg->addr;
1164             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1165             tl_assert(type != Ity_INVALID);
1166             addEvent_D_guarded( &cgs, curr_inode,
1167                                 sizeofIRType(type), addr, lg->guard,
1168                                 False/*!isWrite*/ );
1169             break;
1170          }
1171 
1172          case Ist_Dirty: {
1173             Int      dataSize;
1174             IRDirty* d = st->Ist.Dirty.details;
1175             if (d->mFx != Ifx_None) {
1176                /* This dirty helper accesses memory.  Collect the details. */
1177                tl_assert(d->mAddr != NULL);
1178                tl_assert(d->mSize != 0);
1179                dataSize = d->mSize;
1180                // Large (eg. 28B, 108B, 512B on x86) data-sized
1181                // instructions will be done inaccurately, but they're
1182                // very rare and this avoids errors from hitting more
1183                // than two cache lines in the simulation.
1184                if (dataSize > min_line_size)
1185                   dataSize = min_line_size;
1186                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1187                   addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1188                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1189                   addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1190             } else {
1191                tl_assert(d->mAddr == NULL);
1192                tl_assert(d->mSize == 0);
1193             }
1194             break;
1195          }
1196 
1197          case Ist_CAS: {
1198             /* We treat it as a read and a write of the location.  I
1199                think that is the same behaviour as it was before IRCAS
1200                was introduced, since prior to that point, the Vex
1201                front ends would translate a lock-prefixed instruction
1202                into a (normal) read followed by a (normal) write. */
1203             Int    dataSize;
1204             IRCAS* cas = st->Ist.CAS.details;
1205             tl_assert(cas->addr != NULL);
1206             tl_assert(cas->dataLo != NULL);
1207             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1208             if (cas->dataHi != NULL)
1209                dataSize *= 2; /* since it's a doubleword-CAS */
1210             /* I don't think this can ever happen, but play safe. */
1211             if (dataSize > min_line_size)
1212                dataSize = min_line_size;
1213             addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1214             addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1215             break;
1216          }
1217 
1218          case Ist_LLSC: {
1219             IRType dataTy;
1220             if (st->Ist.LLSC.storedata == NULL) {
1221                /* LL */
1222                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1223                addEvent_Dr( &cgs, curr_inode,
1224                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1225                /* flush events before LL, should help SC to succeed */
1226                flushEvents( &cgs );
1227             } else {
1228                /* SC */
1229                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1230                addEvent_Dw( &cgs, curr_inode,
1231                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1232             }
1233             break;
1234          }
1235 
1236          case Ist_Exit: {
1237             // call branch predictor only if this is a branch in guest code
1238             if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1239                  (st->Ist.Exit.jk == Ijk_Call) ||
1240                  (st->Ist.Exit.jk == Ijk_Ret) )
1241             {
1242                /* Stuff to widen the guard expression to a host word, so
1243                   we can pass it to the branch predictor simulation
1244                   functions easily. */
1245                Bool     inverted;
1246                Addr     nia, sea;
1247                IRConst* dst;
1248                IRType   tyW    = hWordTy;
1249                IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
1250                IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
1251                IRTemp   guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1252                IRTemp   guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1253                IRTemp   guard  = newIRTemp(cgs.sbOut->tyenv, tyW);
1254                IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1255                                               : IRExpr_Const(IRConst_U64(1));
1256 
1257                /* First we need to figure out whether the side exit got
1258                   inverted by the ir optimiser.  To do that, figure out
1259                   the next (fallthrough) instruction's address and the
1260                   side exit address and see if they are the same. */
1261                nia = cia + isize;
1262 
1263                /* Side exit address */
1264                dst = st->Ist.Exit.dst;
1265                if (tyW == Ity_I32) {
1266                   tl_assert(dst->tag == Ico_U32);
1267                   sea = dst->Ico.U32;
1268                } else {
1269                   tl_assert(tyW == Ity_I64);
1270                   tl_assert(dst->tag == Ico_U64);
1271                   sea = dst->Ico.U64;
1272                }
1273 
1274                inverted = nia == sea;
1275 
1276                /* Widen the guard expression. */
1277                addStmtToIRSB( cgs.sbOut,
1278                               IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1279                addStmtToIRSB( cgs.sbOut,
1280                               IRStmt_WrTmp( guardW,
1281                                             IRExpr_Unop(widen,
1282                                                         IRExpr_RdTmp(guard1))) );
1283                /* If the exit is inverted, invert the sense of the guard. */
1284                addStmtToIRSB(
1285                      cgs.sbOut,
1286                      IRStmt_WrTmp(
1287                            guard,
1288                            inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1289                                     : IRExpr_RdTmp(guardW)
1290                               ));
1291                /* And post the event. */
1292                addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1293             }
1294 
1295             /* We may never reach the next statement, so need to flush
1296                all outstanding transactions now. */
1297             flushEvents( &cgs );
1298             break;
1299          }
1300 
1301          default:
1302             ppIRStmt(st);
1303             tl_assert(0);
1304             break;
1305       }
1306 
1307       /* Copy the original statement */
1308       addStmtToIRSB( cgs.sbOut, st );
1309 
1310       if (DEBUG_CG) {
1311          ppIRStmt(st);
1312          VG_(printf)("\n");
1313       }
1314    }
1315 
1316    /* Deal with branches to unknown destinations.  Except ignore ones
1317       which are function returns as we assume the return stack
1318       predictor never mispredicts. */
1319    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1320       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1321       switch (sbIn->next->tag) {
1322          case Iex_Const:
1323             break; /* boring - branch to known address */
1324          case Iex_RdTmp:
1325             /* looks like an indirect branch (branch to unknown) */
1326             addEvent_Bi( &cgs, curr_inode, sbIn->next );
1327             break;
1328          default:
1329             /* shouldn't happen - if the incoming IR is properly
1330                flattened, should only have tmp and const cases to
1331                consider. */
1332             tl_assert(0);
1333       }
1334    }
1335 
1336    /* At the end of the bb.  Flush outstandings. */
1337    flushEvents( &cgs );
1338 
1339    /* done.  stay sane ... */
1340    tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1341 
1342    if (DEBUG_CG) {
1343       VG_(printf)( "goto {");
1344       ppIRJumpKind(sbIn->jumpkind);
1345       VG_(printf)( "} ");
1346       ppIRExpr( sbIn->next );
1347       VG_(printf)( "}\n");
1348    }
1349 
1350    return cgs.sbOut;
1351 }
1352 
1353 /*------------------------------------------------------------*/
1354 /*--- Cache configuration                                  ---*/
1355 /*------------------------------------------------------------*/
1356 
1357 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1358 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1359 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1360 
1361 /*------------------------------------------------------------*/
1362 /*--- cg_fini() and related function                       ---*/
1363 /*------------------------------------------------------------*/
1364 
1365 // Total reads/writes/misses.  Calculated during CC traversal at the end.
1366 // All auto-zeroed.
1367 static CacheCC  Ir_total;
1368 static CacheCC  Dr_total;
1369 static CacheCC  Dw_total;
1370 static BranchCC Bc_total;
1371 static BranchCC Bi_total;
1372 
fprint_CC_table_and_calc_totals(void)1373 static void fprint_CC_table_and_calc_totals(void)
1374 {
1375    Int     i;
1376    VgFile  *fp;
1377    HChar   *currFile = NULL;
1378    const HChar *currFn = NULL;
1379    LineCC* lineCC;
1380 
1381    // Setup output filename.  Nb: it's important to do this now, ie. as late
1382    // as possible.  If we do it at start-up and the program forks and the
1383    // output file format string contains a %p (pid) specifier, both the
1384    // parent and child will incorrectly write to the same file;  this
1385    // happened in 3.3.0.
1386    HChar* cachegrind_out_file =
1387       VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1388 
1389    fp = VG_(fopen)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1390                                         VKI_S_IRUSR|VKI_S_IWUSR);
1391    if (fp == NULL) {
1392       // If the file can't be opened for whatever reason (conflict
1393       // between multiple cachegrinded processes?), give up now.
1394       VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1395                 cachegrind_out_file );
1396       VG_(umsg)("       ... so simulation results will be missing.\n");
1397       VG_(free)(cachegrind_out_file);
1398       return;
1399    } else {
1400       VG_(free)(cachegrind_out_file);
1401    }
1402 
1403    // "desc:" lines (giving I1/D1/LL cache configuration).  The spaces after
1404    // the 2nd colon makes cg_annotate's output look nicer.
1405    VG_(fprintf)(fp,  "desc: I1 cache:         %s\n"
1406                      "desc: D1 cache:         %s\n"
1407                      "desc: LL cache:         %s\n",
1408                      I1.desc_line, D1.desc_line, LL.desc_line);
1409 
1410    // "cmd:" line
1411    VG_(fprintf)(fp, "cmd: %s", VG_(args_the_exename));
1412    for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1413       HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1414       VG_(fprintf)(fp, " %s", arg);
1415    }
1416    // "events:" line
1417    if (clo_cache_sim && clo_branch_sim) {
1418       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1419                                   "Bc Bcm Bi Bim\n");
1420    }
1421    else if (clo_cache_sim && !clo_branch_sim) {
1422       VG_(fprintf)(fp, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1423                                   "\n");
1424    }
1425    else if (!clo_cache_sim && clo_branch_sim) {
1426       VG_(fprintf)(fp, "\nevents: Ir Bc Bcm Bi Bim\n");
1427    }
1428    else {
1429       VG_(fprintf)(fp, "\nevents: Ir\n");
1430    }
1431 
1432    // Traverse every lineCC
1433    VG_(OSetGen_ResetIter)(CC_table);
1434    while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1435       Bool just_hit_a_new_file = False;
1436       // If we've hit a new file, print a "fl=" line.  Note that because
1437       // each string is stored exactly once in the string table, we can use
1438       // pointer comparison rather than strcmp() to test for equality, which
1439       // is good because most of the time the comparisons are equal and so
1440       // the whole strings would have to be checked.
1441       if ( lineCC->loc.file != currFile ) {
1442          currFile = lineCC->loc.file;
1443          VG_(fprintf)(fp, "fl=%s\n", currFile);
1444          distinct_files++;
1445          just_hit_a_new_file = True;
1446       }
1447       // If we've hit a new function, print a "fn=" line.  We know to do
1448       // this when the function name changes, and also every time we hit a
1449       // new file (in which case the new function name might be the same as
1450       // in the old file, hence the just_hit_a_new_file test).
1451       if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1452          currFn = lineCC->loc.fn;
1453          VG_(fprintf)(fp, "fn=%s\n", currFn);
1454          distinct_fns++;
1455       }
1456 
1457       // Print the LineCC
1458       if (clo_cache_sim && clo_branch_sim) {
1459          VG_(fprintf)(fp,  "%u %llu %llu %llu"
1460                              " %llu %llu %llu"
1461                              " %llu %llu %llu"
1462                              " %llu %llu %llu %llu\n",
1463                             lineCC->loc.line,
1464                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1465                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1466                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1467                             lineCC->Bc.b, lineCC->Bc.mp,
1468                             lineCC->Bi.b, lineCC->Bi.mp);
1469       }
1470       else if (clo_cache_sim && !clo_branch_sim) {
1471          VG_(fprintf)(fp,  "%u %llu %llu %llu"
1472                              " %llu %llu %llu"
1473                              " %llu %llu %llu\n",
1474                             lineCC->loc.line,
1475                             lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1476                             lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1477                             lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1478       }
1479       else if (!clo_cache_sim && clo_branch_sim) {
1480          VG_(fprintf)(fp,  "%u %llu"
1481                              " %llu %llu %llu %llu\n",
1482                             lineCC->loc.line,
1483                             lineCC->Ir.a,
1484                             lineCC->Bc.b, lineCC->Bc.mp,
1485                             lineCC->Bi.b, lineCC->Bi.mp);
1486       }
1487       else {
1488          VG_(fprintf)(fp,  "%u %llu\n",
1489                             lineCC->loc.line,
1490                             lineCC->Ir.a);
1491       }
1492 
1493       // Update summary stats
1494       Ir_total.a  += lineCC->Ir.a;
1495       Ir_total.m1 += lineCC->Ir.m1;
1496       Ir_total.mL += lineCC->Ir.mL;
1497       Dr_total.a  += lineCC->Dr.a;
1498       Dr_total.m1 += lineCC->Dr.m1;
1499       Dr_total.mL += lineCC->Dr.mL;
1500       Dw_total.a  += lineCC->Dw.a;
1501       Dw_total.m1 += lineCC->Dw.m1;
1502       Dw_total.mL += lineCC->Dw.mL;
1503       Bc_total.b  += lineCC->Bc.b;
1504       Bc_total.mp += lineCC->Bc.mp;
1505       Bi_total.b  += lineCC->Bi.b;
1506       Bi_total.mp += lineCC->Bi.mp;
1507 
1508       distinct_lines++;
1509    }
1510 
1511    // Summary stats must come after rest of table, since we calculate them
1512    // during traversal.  */
1513    if (clo_cache_sim && clo_branch_sim) {
1514       VG_(fprintf)(fp,  "summary:"
1515                         " %llu %llu %llu"
1516                         " %llu %llu %llu"
1517                         " %llu %llu %llu"
1518                         " %llu %llu %llu %llu\n",
1519                         Ir_total.a, Ir_total.m1, Ir_total.mL,
1520                         Dr_total.a, Dr_total.m1, Dr_total.mL,
1521                         Dw_total.a, Dw_total.m1, Dw_total.mL,
1522                         Bc_total.b, Bc_total.mp,
1523                         Bi_total.b, Bi_total.mp);
1524    }
1525    else if (clo_cache_sim && !clo_branch_sim) {
1526       VG_(fprintf)(fp,  "summary:"
1527                         " %llu %llu %llu"
1528                         " %llu %llu %llu"
1529                         " %llu %llu %llu\n",
1530                         Ir_total.a, Ir_total.m1, Ir_total.mL,
1531                         Dr_total.a, Dr_total.m1, Dr_total.mL,
1532                         Dw_total.a, Dw_total.m1, Dw_total.mL);
1533    }
1534    else if (!clo_cache_sim && clo_branch_sim) {
1535       VG_(fprintf)(fp,  "summary:"
1536                         " %llu"
1537                         " %llu %llu %llu %llu\n",
1538                         Ir_total.a,
1539                         Bc_total.b, Bc_total.mp,
1540                         Bi_total.b, Bi_total.mp);
1541    }
1542    else {
1543       VG_(fprintf)(fp, "summary:"
1544                         " %llu\n",
1545                         Ir_total.a);
1546    }
1547 
1548    VG_(fclose)(fp);
1549 }
1550 
ULong_width(ULong n)1551 static UInt ULong_width(ULong n)
1552 {
1553    UInt w = 0;
1554    while (n > 0) {
1555       n = n / 10;
1556       w++;
1557    }
1558    if (w == 0) w = 1;
1559    return w + (w-1)/3;   // add space for commas
1560 }
1561 
cg_fini(Int exitcode)1562 static void cg_fini(Int exitcode)
1563 {
1564    static HChar fmt[128];   // OK; large enough
1565 
1566    CacheCC  D_total;
1567    BranchCC B_total;
1568    ULong LL_total_m, LL_total_mr, LL_total_mw,
1569          LL_total, LL_total_r, LL_total_w;
1570    Int l1, l2, l3;
1571 
1572    fprint_CC_table_and_calc_totals();
1573 
1574    if (VG_(clo_verbosity) == 0)
1575       return;
1576 
1577    // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1578    #define CG_MAX(a, b)  ((a) >= (b) ? (a) : (b))
1579 
1580    /* I cache results.  Use the I_refs value to determine the first column
1581     * width. */
1582    l1 = ULong_width(Ir_total.a);
1583    l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1584    l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1585 
1586    /* Make format string, getting width right for numbers */
1587    VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1588 
1589    /* Always print this */
1590    VG_(umsg)(fmt, "I   refs:     ", Ir_total.a);
1591 
1592    /* If cache profiling is enabled, show D access numbers and all
1593       miss numbers */
1594    if (clo_cache_sim) {
1595       VG_(umsg)(fmt, "I1  misses:   ", Ir_total.m1);
1596       VG_(umsg)(fmt, "LLi misses:   ", Ir_total.mL);
1597 
1598       if (0 == Ir_total.a) Ir_total.a = 1;
1599       VG_(umsg)("I1  miss rate: %*.2f%%\n", l1,
1600                 Ir_total.m1 * 100.0 / Ir_total.a);
1601       VG_(umsg)("LLi miss rate: %*.2f%%\n", l1,
1602                 Ir_total.mL * 100.0 / Ir_total.a);
1603       VG_(umsg)("\n");
1604 
1605       /* D cache results.  Use the D_refs.rd and D_refs.wr values to
1606        * determine the width of columns 2 & 3. */
1607       D_total.a  = Dr_total.a  + Dw_total.a;
1608       D_total.m1 = Dr_total.m1 + Dw_total.m1;
1609       D_total.mL = Dr_total.mL + Dw_total.mL;
1610 
1611       /* Make format string, getting width right for numbers */
1612       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu rd   + %%,%dllu wr)\n",
1613                         l1, l2, l3);
1614 
1615       VG_(umsg)(fmt, "D   refs:     ",
1616                      D_total.a, Dr_total.a, Dw_total.a);
1617       VG_(umsg)(fmt, "D1  misses:   ",
1618                      D_total.m1, Dr_total.m1, Dw_total.m1);
1619       VG_(umsg)(fmt, "LLd misses:   ",
1620                      D_total.mL, Dr_total.mL, Dw_total.mL);
1621 
1622       if (0 == D_total.a)  D_total.a = 1;
1623       if (0 == Dr_total.a) Dr_total.a = 1;
1624       if (0 == Dw_total.a) Dw_total.a = 1;
1625       VG_(umsg)("D1  miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1626                 l1, D_total.m1  * 100.0 / D_total.a,
1627                 l2, Dr_total.m1 * 100.0 / Dr_total.a,
1628                 l3, Dw_total.m1 * 100.0 / Dw_total.a);
1629       VG_(umsg)("LLd miss rate: %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1630                 l1, D_total.mL  * 100.0 / D_total.a,
1631                 l2, Dr_total.mL * 100.0 / Dr_total.a,
1632                 l3, Dw_total.mL * 100.0 / Dw_total.a);
1633       VG_(umsg)("\n");
1634 
1635       /* LL overall results */
1636 
1637       LL_total   = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1638       LL_total_r = Dr_total.m1 + Ir_total.m1;
1639       LL_total_w = Dw_total.m1;
1640       VG_(umsg)(fmt, "LL refs:      ",
1641                      LL_total, LL_total_r, LL_total_w);
1642 
1643       LL_total_m  = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1644       LL_total_mr = Dr_total.mL + Ir_total.mL;
1645       LL_total_mw = Dw_total.mL;
1646       VG_(umsg)(fmt, "LL misses:    ",
1647                      LL_total_m, LL_total_mr, LL_total_mw);
1648 
1649       VG_(umsg)("LL miss rate:  %*.1f%% (%*.1f%%     + %*.1f%%  )\n",
1650                 l1, LL_total_m  * 100.0 / (Ir_total.a + D_total.a),
1651                 l2, LL_total_mr * 100.0 / (Ir_total.a + Dr_total.a),
1652                 l3, LL_total_mw * 100.0 / Dw_total.a);
1653    }
1654 
1655    /* If branch profiling is enabled, show branch overall results. */
1656    if (clo_branch_sim) {
1657       /* Make format string, getting width right for numbers */
1658       VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
1659                         l1, l2, l3);
1660 
1661       if (0 == Bc_total.b)  Bc_total.b = 1;
1662       if (0 == Bi_total.b)  Bi_total.b = 1;
1663       B_total.b  = Bc_total.b  + Bi_total.b;
1664       B_total.mp = Bc_total.mp + Bi_total.mp;
1665 
1666       VG_(umsg)("\n");
1667       VG_(umsg)(fmt, "Branches:     ",
1668                      B_total.b, Bc_total.b, Bi_total.b);
1669 
1670       VG_(umsg)(fmt, "Mispredicts:  ",
1671                      B_total.mp, Bc_total.mp, Bi_total.mp);
1672 
1673       VG_(umsg)("Mispred rate:  %*.1f%% (%*.1f%%     + %*.1f%%   )\n",
1674                 l1, B_total.mp  * 100.0 / B_total.b,
1675                 l2, Bc_total.mp * 100.0 / Bc_total.b,
1676                 l3, Bi_total.mp * 100.0 / Bi_total.b);
1677    }
1678 
1679    // Various stats
1680    if (VG_(clo_stats)) {
1681       Int debug_lookups = full_debugs      + fn_debugs +
1682                           file_line_debugs + no_debugs;
1683 
1684       VG_(dmsg)("\n");
1685       VG_(dmsg)("cachegrind: distinct files     : %d\n", distinct_files);
1686       VG_(dmsg)("cachegrind: distinct functions : %d\n", distinct_fns);
1687       VG_(dmsg)("cachegrind: distinct lines     : %d\n", distinct_lines);
1688       VG_(dmsg)("cachegrind: distinct instrs NoX: %d\n", distinct_instrsNoX);
1689       VG_(dmsg)("cachegrind: distinct instrs Gen: %d\n", distinct_instrsGen);
1690       VG_(dmsg)("cachegrind: debug lookups      : %d\n", debug_lookups);
1691 
1692       VG_(dmsg)("cachegrind: with full      info:%6.1f%% (%d)\n",
1693                 full_debugs * 100.0 / debug_lookups, full_debugs);
1694       VG_(dmsg)("cachegrind: with file/line info:%6.1f%% (%d)\n",
1695                 file_line_debugs * 100.0 / debug_lookups, file_line_debugs);
1696       VG_(dmsg)("cachegrind: with fn name   info:%6.1f%% (%d)\n",
1697                 fn_debugs * 100.0 / debug_lookups, fn_debugs);
1698       VG_(dmsg)("cachegrind: with zero      info:%6.1f%% (%d)\n",
1699                 no_debugs * 100.0 / debug_lookups, no_debugs);
1700 
1701       VG_(dmsg)("cachegrind: string table size: %lu\n",
1702                 VG_(OSetGen_Size)(stringTable));
1703       VG_(dmsg)("cachegrind: CC table size: %lu\n",
1704                 VG_(OSetGen_Size)(CC_table));
1705       VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1706                 VG_(OSetGen_Size)(instrInfoTable));
1707    }
1708 }
1709 
1710 /*--------------------------------------------------------------------*/
1711 /*--- Discarding BB info                                           ---*/
1712 /*--------------------------------------------------------------------*/
1713 
1714 // Called when a translation is removed from the translation cache for
1715 // any reason at all: to free up space, because the guest code was
1716 // unmapped or modified, or for any arbitrary reason.
1717 static
cg_discard_superblock_info(Addr orig_addr64,VexGuestExtents vge)1718 void cg_discard_superblock_info ( Addr orig_addr64, VexGuestExtents vge )
1719 {
1720    SB_info* sbInfo;
1721    Addr     orig_addr = vge.base[0];
1722 
1723    tl_assert(vge.n_used > 0);
1724 
1725    if (DEBUG_CG)
1726       VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1727                    (void*)orig_addr,
1728                    (void*)vge.base[0], (ULong)vge.len[0]);
1729 
1730    // Get BB info, remove from table, free BB info.  Simple!  Note that we
1731    // use orig_addr, not the first instruction address in vge.
1732    sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1733    tl_assert(NULL != sbInfo);
1734    VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1735 }
1736 
1737 /*--------------------------------------------------------------------*/
1738 /*--- Command line processing                                      ---*/
1739 /*--------------------------------------------------------------------*/
1740 
cg_process_cmd_line_option(const HChar * arg)1741 static Bool cg_process_cmd_line_option(const HChar* arg)
1742 {
1743    if (VG_(str_clo_cache_opt)(arg,
1744                               &clo_I1_cache,
1745                               &clo_D1_cache,
1746                               &clo_LL_cache)) {}
1747 
1748    else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1749    else if VG_BOOL_CLO(arg, "--cache-sim",  clo_cache_sim)  {}
1750    else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1751    else
1752       return False;
1753 
1754    return True;
1755 }
1756 
cg_print_usage(void)1757 static void cg_print_usage(void)
1758 {
1759    VG_(print_cache_clo_opts)();
1760    VG_(printf)(
1761 "    --cache-sim=yes|no  [yes]        collect cache stats?\n"
1762 "    --branch-sim=yes|no [no]         collect branch prediction stats?\n"
1763 "    --cachegrind-out-file=<file>     output file name [cachegrind.out.%%p]\n"
1764    );
1765 }
1766 
cg_print_debug_usage(void)1767 static void cg_print_debug_usage(void)
1768 {
1769    VG_(printf)(
1770 "    (none)\n"
1771    );
1772 }
1773 
1774 /*--------------------------------------------------------------------*/
1775 /*--- Setup                                                        ---*/
1776 /*--------------------------------------------------------------------*/
1777 
1778 static void cg_post_clo_init(void); /* just below */
1779 
cg_pre_clo_init(void)1780 static void cg_pre_clo_init(void)
1781 {
1782    VG_(details_name)            ("Cachegrind");
1783    VG_(details_version)         (NULL);
1784    VG_(details_description)     ("a cache and branch-prediction profiler");
1785    VG_(details_copyright_author)(
1786       "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.");
1787    VG_(details_bug_reports_to)  (VG_BUGS_TO);
1788    VG_(details_avg_translation_sizeB) ( 500 );
1789 
1790    VG_(clo_vex_control).iropt_register_updates_default
1791       = VG_(clo_px_file_backed)
1792       = VexRegUpdSpAtMemAccess; // overridable by the user.
1793 
1794    VG_(basic_tool_funcs)          (cg_post_clo_init,
1795                                    cg_instrument,
1796                                    cg_fini);
1797 
1798    VG_(needs_superblock_discards)(cg_discard_superblock_info);
1799    VG_(needs_command_line_options)(cg_process_cmd_line_option,
1800                                    cg_print_usage,
1801                                    cg_print_debug_usage);
1802 }
1803 
cg_post_clo_init(void)1804 static void cg_post_clo_init(void)
1805 {
1806    cache_t I1c, D1c, LLc;
1807 
1808    CC_table =
1809       VG_(OSetGen_Create)(offsetof(LineCC, loc),
1810                           cmp_CodeLoc_LineCC,
1811                           VG_(malloc), "cg.main.cpci.1",
1812                           VG_(free));
1813    instrInfoTable =
1814       VG_(OSetGen_Create)(/*keyOff*/0,
1815                           NULL,
1816                           VG_(malloc), "cg.main.cpci.2",
1817                           VG_(free));
1818    stringTable =
1819       VG_(OSetGen_Create)(/*keyOff*/0,
1820                           stringCmp,
1821                           VG_(malloc), "cg.main.cpci.3",
1822                           VG_(free));
1823 
1824    VG_(post_clo_init_configure_caches)(&I1c, &D1c, &LLc,
1825                                        &clo_I1_cache,
1826                                        &clo_D1_cache,
1827                                        &clo_LL_cache);
1828 
1829    // min_line_size is used to make sure that we never feed
1830    // accesses to the simulator straddling more than two
1831    // cache lines at any cache level
1832    min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
1833    min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
1834 
1835    Int largest_load_or_store_size
1836       = VG_(machine_get_size_of_largest_guest_register)();
1837    if (min_line_size < largest_load_or_store_size) {
1838       /* We can't continue, because the cache simulation might
1839          straddle more than 2 lines, and it will assert.  So let's
1840          just stop before we start. */
1841       VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
1842                 (Int)min_line_size);
1843       VG_(umsg)("  must be equal to or larger than the maximum register size (%d)\n",
1844                 largest_load_or_store_size );
1845       VG_(umsg)("  but it is not.  Exiting now.\n");
1846       VG_(exit)(1);
1847    }
1848 
1849    cachesim_initcaches(I1c, D1c, LLc);
1850 }
1851 
1852 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1853 
1854 /*--------------------------------------------------------------------*/
1855 /*--- end                                                          ---*/
1856 /*--------------------------------------------------------------------*/
1857 
1858