1 
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool.                          lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Lackey, an example Valgrind tool that does
8    some simple program measurement and tracing.
9 
10    Copyright (C) 2002-2013 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 // This tool shows how to do some basic instrumentation.
32 //
33 // There are four kinds of instrumentation it can do.  They can be turned
34 // on/off independently with command line options:
35 //
36 // * --basic-counts   : do basic counts, eg. number of instructions
37 //                      executed, jumps executed, etc.
38 // * --detailed-counts: do more detailed counts:  number of loads, stores
39 //                      and ALU operations of different sizes.
40 // * --trace-mem=yes:   trace all (data) memory accesses.
41 // * --trace-superblocks=yes:
42 //                      trace all superblock entries.  Mostly of interest
43 //                      to the Valgrind developers.
44 //
45 // The code for each kind of instrumentation is guarded by a clo_* variable:
46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47 //
48 // If you want to modify any of the instrumentation code, look for the code
49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50 // If you're not interested in the other kinds of instrumentation you can
51 // remove them.  If you want to do more complex modifications, please read
52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 //
54 //
55 // Specific Details about --trace-mem=yes
56 // --------------------------------------
57 // Lackey's --trace-mem code is a good starting point for building Valgrind
58 // tools that act on memory loads and stores.  It also could be used as is,
59 // with its output used as input to a post-mortem processing step.  However,
60 // because memory traces can be very large, online analysis is generally
61 // better.
62 //
63 // It prints memory data access traces that look like this:
64 //
65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66 //   I  0023C792,5
67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
68 //   I  0025242B,3
69 //    L BE801950,4  # data load at 0xBE801950 of size 4
70 //   I  0023D476,7
71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
72 //   I  0023DC20,2
73 //    L 00254962,1
74 //    L BE801FB3,1
75 //   I  00252305,1
76 //    L 00254AEB,1
77 //    S 00257998,1
78 //
79 // Every instruction executed has an "instr" event representing it.
80 // Instructions that do memory accesses are followed by one or more "load",
81 // "store" or "modify" events.  Some instructions do more than one load or
82 // store, as in the last two examples in the above trace.
83 //
84 // Here are some examples of x86 instructions that do different combinations
85 // of loads, stores, and modifies.
86 //
87 //    Instruction          Memory accesses                  Event sequence
88 //    -----------          ---------------                  --------------
89 //    add %eax, %ebx       No loads or stores               instr
90 //
91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
92 //
93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94 //
95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
96 //
97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98 //
99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
102 //
103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
104 // N times.
105 //
106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
107 // the following reasons:
108 //
109 // - It does not trace into the OS kernel, so system calls and other kernel
110 //   operations (eg. some scheduling and signal handling code) are ignored.
111 //
112 // - It could model loads and stores done at the system call boundary using
113 //   the pre_mem_read/post_mem_write events.  For example, if you call
114 //   fstat() you know that the passed in buffer has been written.  But it
115 //   currently does not do this.
116 //
117 // - Valgrind replaces some code (not much) with its own, notably parts of
118 //   code for scheduling operations and signal handling.  This code is not
119 //   traced.
120 //
121 // - There is no consideration of virtual-to-physical address mapping.
122 //   This may not matter for many purposes.
123 //
124 // - Valgrind modifies the instruction stream in some very minor ways.  For
125 //   example, on x86 the bts, btc, btr instructions are incorrectly
126 //   considered to always touch memory (this is a consequence of these
127 //   instructions being very difficult to simulate).
128 //
129 // - Valgrind tools layout memory differently to normal programs, so the
130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131 //   tools) is suitable for getting relative memory traces -- eg. if you
132 //   want to analyse locality of memory accesses -- but is not good if
133 //   absolute addresses are important.
134 //
135 // Despite all these warnings, Lackey's results should be good enough for a
136 // wide range of purposes.  For example, Cachegrind shares all the above
137 // shortcomings and it is still useful.
138 //
139 // For further inspiration, you should look at cachegrind/cg_main.c which
140 // uses the same basic technique for tracing memory accesses, but also groups
141 // events together for processing into twos and threes so that fewer C calls
142 // are made and things run faster.
143 //
144 // Specific Details about --trace-superblocks=yes
145 // ----------------------------------------------
146 // Valgrind splits code up into single entry, multiple exit blocks
147 // known as superblocks.  By itself, --trace-superblocks=yes just
148 // prints a message as each superblock is run:
149 //
150 //  SB 04013170
151 //  SB 04013177
152 //  SB 04013173
153 //  SB 04013177
154 //
155 // The hex number is the address of the first instruction in the
156 // superblock.  You can see the relationship more obviously if you use
157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158 // message at address X is immediately followed by an "instr:" message
159 // for that address, as the first instruction in the block is
160 // executed, for example:
161 //
162 //  SB 04014073
163 //  I  04014073,3
164 //   L 7FEFFF7F8,8
165 //  I  04014076,4
166 //  I  0401407A,3
167 //  I  0401407D,3
168 //  I  04014080,3
169 //  I  04014083,6
170 
171 
172 #include "pub_tool_basics.h"
173 #include "pub_tool_tooliface.h"
174 #include "pub_tool_libcassert.h"
175 #include "pub_tool_libcprint.h"
176 #include "pub_tool_debuginfo.h"
177 #include "pub_tool_libcbase.h"
178 #include "pub_tool_options.h"
179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180 
181 /*------------------------------------------------------------*/
182 /*--- Command line options                                 ---*/
183 /*------------------------------------------------------------*/
184 
185 /* Command line options controlling instrumentation kinds, as described at
186  * the top of this file. */
187 static Bool clo_basic_counts    = True;
188 static Bool clo_detailed_counts = False;
189 static Bool clo_trace_mem       = False;
190 static Bool clo_trace_sbs       = False;
191 
192 /* The name of the function of which the number of calls (under
193  * --basic-counts=yes) is to be counted, with default. Override with command
194  * line option --fnname. */
195 static const HChar* clo_fnname = "main";
196 
lk_process_cmd_line_option(const HChar * arg)197 static Bool lk_process_cmd_line_option(const HChar* arg)
198 {
199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204    else
205       return False;
206 
207    tl_assert(clo_fnname);
208    tl_assert(clo_fnname[0]);
209    return True;
210 }
211 
lk_print_usage(void)212 static void lk_print_usage(void)
213 {
214    VG_(printf)(
215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219 "    --fnname=<name>           count calls to <name> (only used if\n"
220 "                              --basic-count=yes)  [main]\n"
221    );
222 }
223 
lk_print_debug_usage(void)224 static void lk_print_debug_usage(void)
225 {
226    VG_(printf)(
227 "    (none)\n"
228    );
229 }
230 
231 /*------------------------------------------------------------*/
232 /*--- Stuff for --basic-counts                             ---*/
233 /*------------------------------------------------------------*/
234 
235 /* Nb: use ULongs because the numbers can get very big */
236 static ULong n_func_calls    = 0;
237 static ULong n_SBs_entered   = 0;
238 static ULong n_SBs_completed = 0;
239 static ULong n_IRStmts       = 0;
240 static ULong n_guest_instrs  = 0;
241 static ULong n_Jccs          = 0;
242 static ULong n_Jccs_untaken  = 0;
243 static ULong n_IJccs         = 0;
244 static ULong n_IJccs_untaken = 0;
245 
add_one_func_call(void)246 static void add_one_func_call(void)
247 {
248    n_func_calls++;
249 }
250 
add_one_SB_entered(void)251 static void add_one_SB_entered(void)
252 {
253    n_SBs_entered++;
254 }
255 
add_one_SB_completed(void)256 static void add_one_SB_completed(void)
257 {
258    n_SBs_completed++;
259 }
260 
add_one_IRStmt(void)261 static void add_one_IRStmt(void)
262 {
263    n_IRStmts++;
264 }
265 
add_one_guest_instr(void)266 static void add_one_guest_instr(void)
267 {
268    n_guest_instrs++;
269 }
270 
add_one_Jcc(void)271 static void add_one_Jcc(void)
272 {
273    n_Jccs++;
274 }
275 
add_one_Jcc_untaken(void)276 static void add_one_Jcc_untaken(void)
277 {
278    n_Jccs_untaken++;
279 }
280 
add_one_inverted_Jcc(void)281 static void add_one_inverted_Jcc(void)
282 {
283    n_IJccs++;
284 }
285 
add_one_inverted_Jcc_untaken(void)286 static void add_one_inverted_Jcc_untaken(void)
287 {
288    n_IJccs_untaken++;
289 }
290 
291 /*------------------------------------------------------------*/
292 /*--- Stuff for --detailed-counts                          ---*/
293 /*------------------------------------------------------------*/
294 
295 typedef
296    IRExpr
297    IRAtom;
298 
299 /* --- Operations --- */
300 
301 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
302 
303 #define N_OPS 3
304 
305 
306 /* --- Types --- */
307 
308 #define N_TYPES 14
309 
type2index(IRType ty)310 static Int type2index ( IRType ty )
311 {
312    switch (ty) {
313       case Ity_I1:      return 0;
314       case Ity_I8:      return 1;
315       case Ity_I16:     return 2;
316       case Ity_I32:     return 3;
317       case Ity_I64:     return 4;
318       case Ity_I128:    return 5;
319       case Ity_F32:     return 6;
320       case Ity_F64:     return 7;
321       case Ity_F128:    return 8;
322       case Ity_V128:    return 9;
323       case Ity_V256:    return 10;
324       case Ity_D32:     return 11;
325       case Ity_D64:     return 12;
326       case Ity_D128:    return 13;
327       default: tl_assert(0);
328    }
329 }
330 
nameOfTypeIndex(Int i)331 static const HChar* nameOfTypeIndex ( Int i )
332 {
333    switch (i) {
334       case 0: return "I1";   break;
335       case 1: return "I8";   break;
336       case 2: return "I16";  break;
337       case 3: return "I32";  break;
338       case 4: return "I64";  break;
339       case 5: return "I128"; break;
340       case 6: return "F32";  break;
341       case 7: return "F64";  break;
342       case 8: return "F128";  break;
343       case 9: return "V128";  break;
344       case 10: return "V256"; break;
345       case 11: return "D32";  break;
346       case 12: return "D64";  break;
347       case 13: return "D128"; break;
348       default: tl_assert(0);
349    }
350 }
351 
352 
353 /* --- Counts --- */
354 
355 static ULong detailCounts[N_OPS][N_TYPES];
356 
357 /* The helper that is called from the instrumented code. */
358 static VG_REGPARM(1)
increment_detail(ULong * detail)359 void increment_detail(ULong* detail)
360 {
361    (*detail)++;
362 }
363 
364 /* A helper that adds the instrumentation for a detail.  guard ::
365    Ity_I1 is the guarding condition for the event.  If NULL it is
366    assumed to mean "always True". */
instrument_detail(IRSB * sb,Op op,IRType type,IRAtom * guard)367 static void instrument_detail(IRSB* sb, Op op, IRType type, IRAtom* guard)
368 {
369    IRDirty* di;
370    IRExpr** argv;
371    const UInt typeIx = type2index(type);
372 
373    tl_assert(op < N_OPS);
374    tl_assert(typeIx < N_TYPES);
375 
376    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
377    di = unsafeIRDirty_0_N( 1, "increment_detail",
378                               VG_(fnptr_to_fnentry)( &increment_detail ),
379                               argv);
380    if (guard) di->guard = guard;
381    addStmtToIRSB( sb, IRStmt_Dirty(di) );
382 }
383 
384 /* Summarize and print the details. */
print_details(void)385 static void print_details ( void )
386 {
387    Int typeIx;
388    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
389    VG_(umsg)("   -------------------------------------------\n");
390    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
391       VG_(umsg)("   %-4s %'12llu %'12llu %'12llu\n",
392                 nameOfTypeIndex( typeIx ),
393                 detailCounts[OpLoad ][typeIx],
394                 detailCounts[OpStore][typeIx],
395                 detailCounts[OpAlu  ][typeIx]
396       );
397    }
398 }
399 
400 
401 /*------------------------------------------------------------*/
402 /*--- Stuff for --trace-mem                                ---*/
403 /*------------------------------------------------------------*/
404 
405 #define MAX_DSIZE    512
406 
407 typedef
408    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
409    EventKind;
410 
411 typedef
412    struct {
413       EventKind  ekind;
414       IRAtom*    addr;
415       Int        size;
416       IRAtom*    guard; /* :: Ity_I1, or NULL=="always True" */
417    }
418    Event;
419 
420 /* Up to this many unnotified events are allowed.  Must be at least two,
421    so that reads and writes to the same address can be merged into a modify.
422    Beyond that, larger numbers just potentially induce more spilling due to
423    extending live ranges of address temporaries. */
424 #define N_EVENTS 4
425 
426 /* Maintain an ordered list of memory events which are outstanding, in
427    the sense that no IR has yet been generated to do the relevant
428    helper calls.  The SB is scanned top to bottom and memory events
429    are added to the end of the list, merging with the most recent
430    notified event where possible (Dw immediately following Dr and
431    having the same size and EA can be merged).
432 
433    This merging is done so that for architectures which have
434    load-op-store instructions (x86, amd64), the instr is treated as if
435    it makes just one memory reference (a modify), rather than two (a
436    read followed by a write at the same address).
437 
438    At various points the list will need to be flushed, that is, IR
439    generated from it.  That must happen before any possible exit from
440    the block (the end, or an IRStmt_Exit).  Flushing also takes place
441    when there is no space to add a new event, and before entering a
442    RMW (read-modify-write) section on processors supporting LL/SC.
443 
444    If we require the simulation statistics to be up to date with
445    respect to possible memory exceptions, then the list would have to
446    be flushed before each memory reference.  That's a pain so we don't
447    bother.
448 
449    Flushing the list consists of walking it start to end and emitting
450    instrumentation IR for each event, in the order in which they
451    appear. */
452 
453 static Event events[N_EVENTS];
454 static Int   events_used = 0;
455 
456 
trace_instr(Addr addr,SizeT size)457 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
458 {
459    VG_(printf)("I  %08lx,%lu\n", addr, size);
460 }
461 
trace_load(Addr addr,SizeT size)462 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
463 {
464    VG_(printf)(" L %08lx,%lu\n", addr, size);
465 }
466 
trace_store(Addr addr,SizeT size)467 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
468 {
469    VG_(printf)(" S %08lx,%lu\n", addr, size);
470 }
471 
trace_modify(Addr addr,SizeT size)472 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
473 {
474    VG_(printf)(" M %08lx,%lu\n", addr, size);
475 }
476 
477 
flushEvents(IRSB * sb)478 static void flushEvents(IRSB* sb)
479 {
480    Int        i;
481    const HChar* helperName;
482    void*      helperAddr;
483    IRExpr**   argv;
484    IRDirty*   di;
485    Event*     ev;
486 
487    for (i = 0; i < events_used; i++) {
488 
489       ev = &events[i];
490 
491       // Decide on helper fn to call and args to pass it.
492       switch (ev->ekind) {
493          case Event_Ir: helperName = "trace_instr";
494                         helperAddr =  trace_instr;  break;
495 
496          case Event_Dr: helperName = "trace_load";
497                         helperAddr =  trace_load;   break;
498 
499          case Event_Dw: helperName = "trace_store";
500                         helperAddr =  trace_store;  break;
501 
502          case Event_Dm: helperName = "trace_modify";
503                         helperAddr =  trace_modify; break;
504          default:
505             tl_assert(0);
506       }
507 
508       // Add the helper.
509       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
510       di   = unsafeIRDirty_0_N( /*regparms*/2,
511                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
512                                 argv );
513       if (ev->guard) {
514          di->guard = ev->guard;
515       }
516       addStmtToIRSB( sb, IRStmt_Dirty(di) );
517    }
518 
519    events_used = 0;
520 }
521 
522 // WARNING:  If you aren't interested in instruction reads, you can omit the
523 // code that adds calls to trace_instr() in flushEvents().  However, you
524 // must still call this function, addEvent_Ir() -- it is necessary to add
525 // the Ir events to the events list so that merging of paired load/store
526 // events into modify events works correctly.
addEvent_Ir(IRSB * sb,IRAtom * iaddr,UInt isize)527 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
528 {
529    Event* evt;
530    tl_assert(clo_trace_mem);
531    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
532             || VG_CLREQ_SZB == isize );
533    if (events_used == N_EVENTS)
534       flushEvents(sb);
535    tl_assert(events_used >= 0 && events_used < N_EVENTS);
536    evt = &events[events_used];
537    evt->ekind = Event_Ir;
538    evt->addr  = iaddr;
539    evt->size  = isize;
540    evt->guard = NULL;
541    events_used++;
542 }
543 
544 /* Add a guarded read event. */
545 static
addEvent_Dr_guarded(IRSB * sb,IRAtom * daddr,Int dsize,IRAtom * guard)546 void addEvent_Dr_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
547 {
548    Event* evt;
549    tl_assert(clo_trace_mem);
550    tl_assert(isIRAtom(daddr));
551    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
552    if (events_used == N_EVENTS)
553       flushEvents(sb);
554    tl_assert(events_used >= 0 && events_used < N_EVENTS);
555    evt = &events[events_used];
556    evt->ekind = Event_Dr;
557    evt->addr  = daddr;
558    evt->size  = dsize;
559    evt->guard = guard;
560    events_used++;
561 }
562 
563 /* Add an ordinary read event, by adding a guarded read event with an
564    always-true guard. */
565 static
addEvent_Dr(IRSB * sb,IRAtom * daddr,Int dsize)566 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
567 {
568    addEvent_Dr_guarded(sb, daddr, dsize, NULL);
569 }
570 
571 /* Add a guarded write event. */
572 static
addEvent_Dw_guarded(IRSB * sb,IRAtom * daddr,Int dsize,IRAtom * guard)573 void addEvent_Dw_guarded ( IRSB* sb, IRAtom* daddr, Int dsize, IRAtom* guard )
574 {
575    Event* evt;
576    tl_assert(clo_trace_mem);
577    tl_assert(isIRAtom(daddr));
578    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
579    if (events_used == N_EVENTS)
580       flushEvents(sb);
581    tl_assert(events_used >= 0 && events_used < N_EVENTS);
582    evt = &events[events_used];
583    evt->ekind = Event_Dw;
584    evt->addr  = daddr;
585    evt->size  = dsize;
586    evt->guard = guard;
587    events_used++;
588 }
589 
590 /* Add an ordinary write event.  Try to merge it with an immediately
591    preceding ordinary read event of the same size to the same
592    address. */
593 static
addEvent_Dw(IRSB * sb,IRAtom * daddr,Int dsize)594 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
595 {
596    Event* lastEvt;
597    Event* evt;
598    tl_assert(clo_trace_mem);
599    tl_assert(isIRAtom(daddr));
600    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
601 
602    // Is it possible to merge this write with the preceding read?
603    lastEvt = &events[events_used-1];
604    if (events_used > 0
605        && lastEvt->ekind == Event_Dr
606        && lastEvt->size  == dsize
607        && lastEvt->guard == NULL
608        && eqIRAtom(lastEvt->addr, daddr))
609    {
610       lastEvt->ekind = Event_Dm;
611       return;
612    }
613 
614    // No.  Add as normal.
615    if (events_used == N_EVENTS)
616       flushEvents(sb);
617    tl_assert(events_used >= 0 && events_used < N_EVENTS);
618    evt = &events[events_used];
619    evt->ekind = Event_Dw;
620    evt->size  = dsize;
621    evt->addr  = daddr;
622    evt->guard = NULL;
623    events_used++;
624 }
625 
626 
627 /*------------------------------------------------------------*/
628 /*--- Stuff for --trace-superblocks                        ---*/
629 /*------------------------------------------------------------*/
630 
trace_superblock(Addr addr)631 static void trace_superblock(Addr addr)
632 {
633    VG_(printf)("SB %08lx\n", addr);
634 }
635 
636 
637 /*------------------------------------------------------------*/
638 /*--- Basic tool functions                                 ---*/
639 /*------------------------------------------------------------*/
640 
lk_post_clo_init(void)641 static void lk_post_clo_init(void)
642 {
643    Int op, tyIx;
644 
645    if (clo_detailed_counts) {
646       for (op = 0; op < N_OPS; op++)
647          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
648             detailCounts[op][tyIx] = 0;
649    }
650 }
651 
652 static
lk_instrument(VgCallbackClosure * closure,IRSB * sbIn,const VexGuestLayout * layout,const VexGuestExtents * vge,const VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)653 IRSB* lk_instrument ( VgCallbackClosure* closure,
654                       IRSB* sbIn,
655                       const VexGuestLayout* layout,
656                       const VexGuestExtents* vge,
657                       const VexArchInfo* archinfo_host,
658                       IRType gWordTy, IRType hWordTy )
659 {
660    IRDirty*   di;
661    Int        i;
662    IRSB*      sbOut;
663    IRTypeEnv* tyenv = sbIn->tyenv;
664    Addr       iaddr = 0, dst;
665    UInt       ilen = 0;
666    Bool       condition_inverted = False;
667 
668    if (gWordTy != hWordTy) {
669       /* We don't currently support this case. */
670       VG_(tool_panic)("host/guest word size mismatch");
671    }
672 
673    /* Set up SB */
674    sbOut = deepCopyIRSBExceptStmts(sbIn);
675 
676    // Copy verbatim any IR preamble preceding the first IMark
677    i = 0;
678    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
679       addStmtToIRSB( sbOut, sbIn->stmts[i] );
680       i++;
681    }
682 
683    if (clo_basic_counts) {
684       /* Count this superblock. */
685       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
686                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
687                                  mkIRExprVec_0() );
688       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
689    }
690 
691    if (clo_trace_sbs) {
692       /* Print this superblock's address. */
693       di = unsafeIRDirty_0_N(
694               0, "trace_superblock",
695               VG_(fnptr_to_fnentry)( &trace_superblock ),
696               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
697            );
698       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
699    }
700 
701    if (clo_trace_mem) {
702       events_used = 0;
703    }
704 
705    for (/*use current i*/; i < sbIn->stmts_used; i++) {
706       IRStmt* st = sbIn->stmts[i];
707       if (!st || st->tag == Ist_NoOp) continue;
708 
709       if (clo_basic_counts) {
710          /* Count one VEX statement. */
711          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
712                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
713                                     mkIRExprVec_0() );
714          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
715       }
716 
717       switch (st->tag) {
718          case Ist_NoOp:
719          case Ist_AbiHint:
720          case Ist_Put:
721          case Ist_PutI:
722          case Ist_MBE:
723             addStmtToIRSB( sbOut, st );
724             break;
725 
726          case Ist_IMark:
727             if (clo_basic_counts) {
728                /* Needed to be able to check for inverted condition in Ist_Exit */
729                iaddr = st->Ist.IMark.addr;
730                ilen  = st->Ist.IMark.len;
731 
732                /* Count guest instruction. */
733                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
734                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
735                                           mkIRExprVec_0() );
736                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
737 
738                /* An unconditional branch to a known destination in the
739                 * guest's instructions can be represented, in the IRSB to
740                 * instrument, by the VEX statements that are the
741                 * translation of that known destination. This feature is
742                 * called 'SB chasing' and can be influenced by command
743                 * line option --vex-guest-chase-thresh.
744                 *
745                 * To get an accurate count of the calls to a specific
746                 * function, taking SB chasing into account, we need to
747                 * check for each guest instruction (Ist_IMark) if it is
748                 * the entry point of a function.
749                 */
750                tl_assert(clo_fnname);
751                tl_assert(clo_fnname[0]);
752                const HChar *fnname;
753                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
754                                             &fnname)
755                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
756                   di = unsafeIRDirty_0_N(
757                           0, "add_one_func_call",
758                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
759                              mkIRExprVec_0() );
760                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
761                }
762             }
763             if (clo_trace_mem) {
764                // WARNING: do not remove this function call, even if you
765                // aren't interested in instruction reads.  See the comment
766                // above the function itself for more detail.
767                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
768                             st->Ist.IMark.len );
769             }
770             addStmtToIRSB( sbOut, st );
771             break;
772 
773          case Ist_WrTmp:
774             // Add a call to trace_load() if --trace-mem=yes.
775             if (clo_trace_mem) {
776                IRExpr* data = st->Ist.WrTmp.data;
777                if (data->tag == Iex_Load) {
778                   addEvent_Dr( sbOut, data->Iex.Load.addr,
779                                sizeofIRType(data->Iex.Load.ty) );
780                }
781             }
782             if (clo_detailed_counts) {
783                IRExpr* expr = st->Ist.WrTmp.data;
784                IRType  type = typeOfIRExpr(sbOut->tyenv, expr);
785                tl_assert(type != Ity_INVALID);
786                switch (expr->tag) {
787                   case Iex_Load:
788                     instrument_detail( sbOut, OpLoad, type, NULL/*guard*/ );
789                      break;
790                   case Iex_Unop:
791                   case Iex_Binop:
792                   case Iex_Triop:
793                   case Iex_Qop:
794                   case Iex_ITE:
795                      instrument_detail( sbOut, OpAlu, type, NULL/*guard*/ );
796                      break;
797                   default:
798                      break;
799                }
800             }
801             addStmtToIRSB( sbOut, st );
802             break;
803 
804          case Ist_Store: {
805             IRExpr* data = st->Ist.Store.data;
806             IRType  type = typeOfIRExpr(tyenv, data);
807             tl_assert(type != Ity_INVALID);
808             if (clo_trace_mem) {
809                addEvent_Dw( sbOut, st->Ist.Store.addr,
810                             sizeofIRType(type) );
811             }
812             if (clo_detailed_counts) {
813                instrument_detail( sbOut, OpStore, type, NULL/*guard*/ );
814             }
815             addStmtToIRSB( sbOut, st );
816             break;
817          }
818 
819          case Ist_StoreG: {
820             IRStoreG* sg   = st->Ist.StoreG.details;
821             IRExpr*   data = sg->data;
822             IRType    type = typeOfIRExpr(tyenv, data);
823             tl_assert(type != Ity_INVALID);
824             if (clo_trace_mem) {
825                addEvent_Dw_guarded( sbOut, sg->addr,
826                                     sizeofIRType(type), sg->guard );
827             }
828             if (clo_detailed_counts) {
829                instrument_detail( sbOut, OpStore, type, sg->guard );
830             }
831             addStmtToIRSB( sbOut, st );
832             break;
833          }
834 
835          case Ist_LoadG: {
836             IRLoadG* lg       = st->Ist.LoadG.details;
837             IRType   type     = Ity_INVALID; /* loaded type */
838             IRType   typeWide = Ity_INVALID; /* after implicit widening */
839             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
840             tl_assert(type != Ity_INVALID);
841             if (clo_trace_mem) {
842                addEvent_Dr_guarded( sbOut, lg->addr,
843                                     sizeofIRType(type), lg->guard );
844             }
845             if (clo_detailed_counts) {
846                instrument_detail( sbOut, OpLoad, type, lg->guard );
847             }
848             addStmtToIRSB( sbOut, st );
849             break;
850          }
851 
852          case Ist_Dirty: {
853             if (clo_trace_mem) {
854                Int      dsize;
855                IRDirty* d = st->Ist.Dirty.details;
856                if (d->mFx != Ifx_None) {
857                   // This dirty helper accesses memory.  Collect the details.
858                   tl_assert(d->mAddr != NULL);
859                   tl_assert(d->mSize != 0);
860                   dsize = d->mSize;
861                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
862                      addEvent_Dr( sbOut, d->mAddr, dsize );
863                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
864                      addEvent_Dw( sbOut, d->mAddr, dsize );
865                } else {
866                   tl_assert(d->mAddr == NULL);
867                   tl_assert(d->mSize == 0);
868                }
869             }
870             addStmtToIRSB( sbOut, st );
871             break;
872          }
873 
874          case Ist_CAS: {
875             /* We treat it as a read and a write of the location.  I
876                think that is the same behaviour as it was before IRCAS
877                was introduced, since prior to that point, the Vex
878                front ends would translate a lock-prefixed instruction
879                into a (normal) read followed by a (normal) write. */
880             Int    dataSize;
881             IRType dataTy;
882             IRCAS* cas = st->Ist.CAS.details;
883             tl_assert(cas->addr != NULL);
884             tl_assert(cas->dataLo != NULL);
885             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
886             dataSize = sizeofIRType(dataTy);
887             if (cas->dataHi != NULL)
888                dataSize *= 2; /* since it's a doubleword-CAS */
889             if (clo_trace_mem) {
890                addEvent_Dr( sbOut, cas->addr, dataSize );
891                addEvent_Dw( sbOut, cas->addr, dataSize );
892             }
893             if (clo_detailed_counts) {
894                instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
895                if (cas->dataHi != NULL) /* dcas */
896                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
897                instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
898                if (cas->dataHi != NULL) /* dcas */
899                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
900             }
901             addStmtToIRSB( sbOut, st );
902             break;
903          }
904 
905          case Ist_LLSC: {
906             IRType dataTy;
907             if (st->Ist.LLSC.storedata == NULL) {
908                /* LL */
909                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
910                if (clo_trace_mem) {
911                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
912                                       sizeofIRType(dataTy) );
913                   /* flush events before LL, helps SC to succeed */
914                   flushEvents(sbOut);
915 	       }
916                if (clo_detailed_counts)
917                   instrument_detail( sbOut, OpLoad, dataTy, NULL/*guard*/ );
918             } else {
919                /* SC */
920                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
921                if (clo_trace_mem)
922                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
923                                       sizeofIRType(dataTy) );
924                if (clo_detailed_counts)
925                   instrument_detail( sbOut, OpStore, dataTy, NULL/*guard*/ );
926             }
927             addStmtToIRSB( sbOut, st );
928             break;
929          }
930 
931          case Ist_Exit:
932             if (clo_basic_counts) {
933                // The condition of a branch was inverted by VEX if a taken
934                // branch is in fact a fall trough according to client address
935                tl_assert(iaddr != 0);
936                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
937                                            st->Ist.Exit.dst->Ico.U64;
938                condition_inverted = (dst == iaddr + ilen);
939 
940                /* Count Jcc */
941                if (!condition_inverted)
942                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
943                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
944                                           mkIRExprVec_0() );
945                else
946                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
947                                           VG_(fnptr_to_fnentry)(
948                                              &add_one_inverted_Jcc ),
949                                           mkIRExprVec_0() );
950 
951                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
952             }
953             if (clo_trace_mem) {
954                flushEvents(sbOut);
955             }
956 
957             addStmtToIRSB( sbOut, st );      // Original statement
958 
959             if (clo_basic_counts) {
960                /* Count non-taken Jcc */
961                if (!condition_inverted)
962                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
963                                           VG_(fnptr_to_fnentry)(
964                                              &add_one_Jcc_untaken ),
965                                           mkIRExprVec_0() );
966                else
967                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
968                                           VG_(fnptr_to_fnentry)(
969                                              &add_one_inverted_Jcc_untaken ),
970                                           mkIRExprVec_0() );
971 
972                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
973             }
974             break;
975 
976          default:
977             ppIRStmt(st);
978             tl_assert(0);
979       }
980    }
981 
982    if (clo_basic_counts) {
983       /* Count this basic block. */
984       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
985                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
986                                  mkIRExprVec_0() );
987       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
988    }
989 
990    if (clo_trace_mem) {
991       /* At the end of the sbIn.  Flush outstandings. */
992       flushEvents(sbOut);
993    }
994 
995    return sbOut;
996 }
997 
lk_fini(Int exitcode)998 static void lk_fini(Int exitcode)
999 {
1000    tl_assert(clo_fnname);
1001    tl_assert(clo_fnname[0]);
1002 
1003    if (clo_basic_counts) {
1004       ULong total_Jccs = n_Jccs + n_IJccs;
1005       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
1006 
1007       VG_(umsg)("Counted %'llu call%s to %s()\n",
1008                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
1009 
1010       VG_(umsg)("\n");
1011       VG_(umsg)("Jccs:\n");
1012       VG_(umsg)("  total:         %'llu\n", total_Jccs);
1013       VG_(umsg)("  taken:         %'llu (%.0f%%)\n",
1014                 taken_Jccs, taken_Jccs * 100.0 / total_Jccs ?: 1);
1015 
1016       VG_(umsg)("\n");
1017       VG_(umsg)("Executed:\n");
1018       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
1019       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
1020       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
1021       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
1022 
1023       VG_(umsg)("\n");
1024       VG_(umsg)("Ratios:\n");
1025       tl_assert(n_SBs_entered); // Paranoia time.
1026       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
1027          10 * n_guest_instrs / n_SBs_entered);
1028       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
1029          10 * n_IRStmts / n_SBs_entered);
1030       tl_assert(n_guest_instrs); // Paranoia time.
1031       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
1032          10 * n_IRStmts / n_guest_instrs);
1033    }
1034 
1035    if (clo_detailed_counts) {
1036       VG_(umsg)("\n");
1037       VG_(umsg)("IR-level counts by type:\n");
1038       print_details();
1039    }
1040 
1041    if (clo_basic_counts) {
1042       VG_(umsg)("\n");
1043       VG_(umsg)("Exit code:       %d\n", exitcode);
1044    }
1045 }
1046 
lk_pre_clo_init(void)1047 static void lk_pre_clo_init(void)
1048 {
1049    VG_(details_name)            ("Lackey");
1050    VG_(details_version)         (NULL);
1051    VG_(details_description)     ("an example Valgrind tool");
1052    VG_(details_copyright_author)(
1053       "Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote.");
1054    VG_(details_bug_reports_to)  (VG_BUGS_TO);
1055    VG_(details_avg_translation_sizeB) ( 200 );
1056 
1057    VG_(basic_tool_funcs)          (lk_post_clo_init,
1058                                    lk_instrument,
1059                                    lk_fini);
1060    VG_(needs_command_line_options)(lk_process_cmd_line_option,
1061                                    lk_print_usage,
1062                                    lk_print_debug_usage);
1063 }
1064 
1065 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
1066 
1067 /*--------------------------------------------------------------------*/
1068 /*--- end                                                lk_main.c ---*/
1069 /*--------------------------------------------------------------------*/
1070