1#!/usr/bin/env python
2#
3# trace         Trace a function and print a trace message based on its
4#               parameters, with an optional filter.
5#
6# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S]
7#              [-M MAX_EVENTS] [-T] [-t] [-K] [-U] [-a] [-I header]
8#              probe [probe ...]
9#
10# Licensed under the Apache License, Version 2.0 (the "License")
11# Copyright (C) 2016 Sasha Goldshtein.
12
13from __future__ import print_function
14from bcc import BPF, USDT
15from functools import partial
16from time import sleep, strftime
17import argparse
18import re
19import ctypes as ct
20import os
21import traceback
22import sys
23
24class Probe(object):
25        probe_count = 0
26        streq_index = 0
27        max_events = None
28        event_count = 0
29        first_ts = 0
30        print_time = False
31        use_localtime = True
32        time_field = False
33        print_cpu = False
34        print_address = False
35        tgid = -1
36        pid = -1
37        page_cnt = None
38
39        @classmethod
40        def configure(cls, args):
41                cls.max_events = args.max_events
42                cls.print_time = args.timestamp or args.time
43                cls.use_localtime = not args.timestamp
44                cls.time_field = cls.print_time and (not cls.use_localtime)
45                cls.print_cpu = args.print_cpu
46                cls.print_address = args.address
47                cls.first_ts = BPF.monotonic_time()
48                cls.tgid = args.tgid or -1
49                cls.pid = args.pid or -1
50                cls.page_cnt = args.buffer_pages
51                cls.bin_cmp = args.bin_cmp
52
53        def __init__(self, probe, string_size, kernel_stack, user_stack):
54                self.usdt = None
55                self.streq_functions = ""
56                self.raw_probe = probe
57                self.string_size = string_size
58                self.kernel_stack = kernel_stack
59                self.user_stack = user_stack
60                Probe.probe_count += 1
61                self._parse_probe()
62                self.probe_num = Probe.probe_count
63                self.probe_name = "probe_%s_%d" % \
64                                (self._display_function(), self.probe_num)
65                self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
66                                         self.probe_name)
67
68                # compiler can generate proper codes for function
69                # signatures with "syscall__" prefix
70                if self.is_syscall_kprobe:
71                        self.probe_name = "syscall__" + self.probe_name[6:]
72
73        def __str__(self):
74                return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
75                        self.library, self._display_function(), self.filter,
76                        self.types, self.values)
77
78        def is_default_action(self):
79                return self.python_format == ""
80
81        def _bail(self, error):
82                raise ValueError("error in probe '%s': %s" %
83                                 (self.raw_probe, error))
84
85        def _parse_probe(self):
86                text = self.raw_probe
87
88                # There might be a function signature preceding the actual
89                # filter/print part, or not. Find the probe specifier first --
90                # it ends with either a space or an open paren ( for the
91                # function signature part.
92                #                                          opt. signature
93                #                               probespec       |      rest
94                #                               ---------  ----------   --
95                (spec, sig, rest) = re.match(r'([^ \t\(]+)(\([^\(]*\))?(.*)',
96                                             text).groups()
97
98                self._parse_spec(spec)
99                # Remove the parens
100                self.signature = sig[1:-1] if sig else None
101                if self.signature and self.probe_type in ['u', 't']:
102                        self._bail("USDT and tracepoint probes can't have " +
103                                   "a function signature; use arg1, arg2, " +
104                                   "... instead")
105
106                text = rest.lstrip()
107                # If we now have a (, wait for the balanced closing ) and that
108                # will be the predicate
109                self.filter = None
110                if len(text) > 0 and text[0] == "(":
111                        balance = 1
112                        for i in range(1, len(text)):
113                                if text[i] == "(":
114                                        balance += 1
115                                if text[i] == ")":
116                                        balance -= 1
117                                if balance == 0:
118                                        self._parse_filter(text[:i + 1])
119                                        text = text[i + 1:]
120                                        break
121                        if self.filter is None:
122                                self._bail("unmatched end of predicate")
123
124                if self.filter is None:
125                        self.filter = "1"
126
127                # The remainder of the text is the printf action
128                self._parse_action(text.lstrip())
129
130        def _parse_spec(self, spec):
131                parts = spec.split(":")
132                # Two special cases: 'func' means 'p::func', 'lib:func' means
133                # 'p:lib:func'. Other combinations need to provide an empty
134                # value between delimiters, e.g. 'r::func' for a kretprobe on
135                # the function func.
136                if len(parts) == 1:
137                        parts = ["p", "", parts[0]]
138                elif len(parts) == 2:
139                        parts = ["p", parts[0], parts[1]]
140                if len(parts[0]) == 0:
141                        self.probe_type = "p"
142                elif parts[0] in ["p", "r", "t", "u"]:
143                        self.probe_type = parts[0]
144                else:
145                        self._bail("probe type must be '', 'p', 't', 'r', " +
146                                   "or 'u', but got '%s'" % parts[0])
147                if self.probe_type == "t":
148                        self.tp_category = parts[1]
149                        self.tp_event = parts[2]
150                        self.library = ""       # kernel
151                        self.function = ""      # from TRACEPOINT_PROBE
152                elif self.probe_type == "u":
153                        self.library = ':'.join(parts[1:-1])
154                        self.usdt_name = parts[-1]
155                        self.function = ""      # no function, just address
156                        # We will discover the USDT provider by matching on
157                        # the USDT name in the specified library
158                        self._find_usdt_probe()
159                else:
160                        self.library = ':'.join(parts[1:-1])
161                        self.function = parts[-1]
162
163                # only x64 syscalls needs checking, no other syscall wrapper yet.
164                self.is_syscall_kprobe = False
165                if self.probe_type == "p" and len(self.library) == 0 and \
166                   self.function[:10] == "__x64_sys_":
167                        self.is_syscall_kprobe = True
168
169        def _find_usdt_probe(self):
170                target = Probe.pid if Probe.pid and Probe.pid != -1 \
171                                   else Probe.tgid
172                self.usdt = USDT(path=self.library, pid=target)
173                for probe in self.usdt.enumerate_probes():
174                        if probe.name == self.usdt_name.encode('ascii'):
175                                return  # Found it, will enable later
176                self._bail("unrecognized USDT probe %s" % self.usdt_name)
177
178        def _parse_filter(self, filt):
179                self.filter = self._rewrite_expr(filt)
180
181        def _parse_types(self, fmt):
182                for match in re.finditer(
183                            r'[^%]%(s|u|d|lu|llu|ld|lld|hu|hd|x|lx|llx|c|K|U)', fmt):
184                        self.types.append(match.group(1))
185                fmt = re.sub(r'([^%]%)(u|d|lu|llu|ld|lld|hu|hd)', r'\1d', fmt)
186                fmt = re.sub(r'([^%]%)(x|lx|llx)', r'\1x', fmt)
187                fmt = re.sub('%K|%U', '%s', fmt)
188                self.python_format = fmt.strip('"')
189
190        def _parse_action(self, action):
191                self.values = []
192                self.types = []
193                self.python_format = ""
194                if len(action) == 0:
195                        return
196
197                action = action.strip()
198                match = re.search(r'(\".*?\"),?(.*)', action)
199                if match is None:
200                        self._bail("expected format string in \"s")
201
202                self.raw_format = match.group(1)
203                self._parse_types(self.raw_format)
204                for part in re.split('(?<!"),', match.group(2)):
205                        part = self._rewrite_expr(part)
206                        if len(part) > 0:
207                                self.values.append(part)
208
209        aliases_arg = {
210                "arg1": "PT_REGS_PARM1(ctx)",
211                "arg2": "PT_REGS_PARM2(ctx)",
212                "arg3": "PT_REGS_PARM3(ctx)",
213                "arg4": "PT_REGS_PARM4(ctx)",
214                "arg5": "PT_REGS_PARM5(ctx)",
215                "arg6": "PT_REGS_PARM6(ctx)",
216        }
217
218        aliases_indarg = {
219                "arg1": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM1(ctx);"
220                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
221                "arg2": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM2(ctx);"
222                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
223                "arg3": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM3(ctx);"
224                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
225                "arg4": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM4(ctx);"
226                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
227                "arg5": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM5(ctx);"
228                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
229                "arg6": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM6(ctx);"
230                        "  bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
231        }
232
233        aliases_common = {
234                "retval": "PT_REGS_RC(ctx)",
235                "$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
236                "$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
237                "$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
238                "$tgid": "(unsigned)(bpf_get_current_pid_tgid() >> 32)",
239                "$cpu": "bpf_get_smp_processor_id()",
240                "$task" : "((struct task_struct *)bpf_get_current_task())"
241        }
242
243        def _generate_streq_function(self, string):
244                fname = "streq_%d" % Probe.streq_index
245                Probe.streq_index += 1
246                self.streq_functions += """
247static inline bool %s(char const *ignored, uintptr_t str) {
248        char needle[] = %s;
249        char haystack[sizeof(needle)];
250        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
251        for (int i = 0; i < sizeof(needle) - 1; ++i) {
252                if (needle[i] != haystack[i]) {
253                        return false;
254                }
255        }
256        return true;
257}
258                """ % (fname, string)
259                return fname
260
261        def _rewrite_expr(self, expr):
262                if self.is_syscall_kprobe:
263                    for alias, replacement in Probe.aliases_indarg.items():
264                        expr = expr.replace(alias, replacement)
265                else:
266                    for alias, replacement in Probe.aliases_arg.items():
267                        # For USDT probes, we replace argN values with the
268                        # actual arguments for that probe obtained using
269                        # bpf_readarg_N macros emitted at BPF construction.
270                        if self.probe_type == "u":
271                                continue
272                        expr = expr.replace(alias, replacement)
273                for alias, replacement in Probe.aliases_common.items():
274                    expr = expr.replace(alias, replacement)
275                if self.bin_cmp:
276                    STRCMP_RE = 'STRCMP\\(\"([^"]+)\\"'
277                else:
278                    STRCMP_RE = 'STRCMP\\(("[^"]+\\")'
279                matches = re.finditer(STRCMP_RE, expr)
280                for match in matches:
281                        string = match.group(1)
282                        fname = self._generate_streq_function(string)
283                        expr = expr.replace("STRCMP", fname, 1)
284                return expr
285
286        p_type = {"u": ct.c_uint, "d": ct.c_int, "lu": ct.c_ulong,
287                  "ld": ct.c_long,
288                  "llu": ct.c_ulonglong, "lld": ct.c_longlong,
289                  "hu": ct.c_ushort, "hd": ct.c_short,
290                  "x": ct.c_uint, "lx": ct.c_ulong, "llx": ct.c_ulonglong,
291                  "c": ct.c_ubyte,
292                  "K": ct.c_ulonglong, "U": ct.c_ulonglong}
293
294        def _generate_python_field_decl(self, idx, fields):
295                field_type = self.types[idx]
296                if field_type == "s":
297                        ptype = ct.c_char * self.string_size
298                else:
299                        ptype = Probe.p_type[field_type]
300                fields.append(("v%d" % idx, ptype))
301
302        def _generate_python_data_decl(self):
303                self.python_struct_name = "%s_%d_Data" % \
304                                (self._display_function(), self.probe_num)
305                fields = []
306                if self.time_field:
307                    fields.append(("timestamp_ns", ct.c_ulonglong))
308                if self.print_cpu:
309                    fields.append(("cpu", ct.c_int))
310                fields.extend([
311                        ("tgid", ct.c_uint),
312                        ("pid", ct.c_uint),
313                        ("comm", ct.c_char * 16)       # TASK_COMM_LEN
314                ])
315                for i in range(0, len(self.types)):
316                        self._generate_python_field_decl(i, fields)
317                if self.kernel_stack:
318                        fields.append(("kernel_stack_id", ct.c_int))
319                if self.user_stack:
320                        fields.append(("user_stack_id", ct.c_int))
321                return type(self.python_struct_name, (ct.Structure,),
322                            dict(_fields_=fields))
323
324        c_type = {"u": "unsigned int", "d": "int",
325                  "lu": "unsigned long", "ld": "long",
326                  "llu": "unsigned long long", "lld": "long long",
327                  "hu": "unsigned short", "hd": "short",
328                  "x": "unsigned int", "lx": "unsigned long",
329                  "llx": "unsigned long long",
330                  "c": "char", "K": "unsigned long long",
331                  "U": "unsigned long long"}
332        fmt_types = c_type.keys()
333
334        def _generate_field_decl(self, idx):
335                field_type = self.types[idx]
336                if field_type == "s":
337                        return "char v%d[%d];\n" % (idx, self.string_size)
338                if field_type in Probe.fmt_types:
339                        return "%s v%d;\n" % (Probe.c_type[field_type], idx)
340                self._bail("unrecognized format specifier %s" % field_type)
341
342        def _generate_data_decl(self):
343                # The BPF program will populate values into the struct
344                # according to the format string, and the Python program will
345                # construct the final display string.
346                self.events_name = "%s_events" % self.probe_name
347                self.struct_name = "%s_data_t" % self.probe_name
348                self.stacks_name = "%s_stacks" % self.probe_name
349                stack_table = "BPF_STACK_TRACE(%s, 1024);" % self.stacks_name \
350                              if (self.kernel_stack or self.user_stack) else ""
351                data_fields = ""
352                for i, field_type in enumerate(self.types):
353                        data_fields += "        " + \
354                                       self._generate_field_decl(i)
355                time_str = "u64 timestamp_ns;" if self.time_field else ""
356                cpu_str = "int cpu;" if self.print_cpu else ""
357                kernel_stack_str = "       int kernel_stack_id;" \
358                                   if self.kernel_stack else ""
359                user_stack_str = "       int user_stack_id;" \
360                                 if self.user_stack else ""
361
362                text = """
363struct %s
364{
365%s
366%s
367        u32 tgid;
368        u32 pid;
369        char comm[TASK_COMM_LEN];
370%s
371%s
372%s
373};
374
375BPF_PERF_OUTPUT(%s);
376%s
377"""
378                return text % (self.struct_name, time_str, cpu_str, data_fields,
379                               kernel_stack_str, user_stack_str,
380                               self.events_name, stack_table)
381
382        def _generate_field_assign(self, idx):
383                field_type = self.types[idx]
384                expr = self.values[idx].strip()
385                text = ""
386                if self.probe_type == "u" and expr[0:3] == "arg":
387                        arg_index = int(expr[3])
388                        arg_ctype = self.usdt.get_probe_arg_ctype(
389                                self.usdt_name, arg_index - 1)
390                        text = ("        %s %s = 0;\n" +
391                                "        bpf_usdt_readarg(%s, ctx, &%s);\n") \
392                                % (arg_ctype, expr, expr[3], expr)
393
394                if field_type == "s":
395                        return text + """
396        if (%s != 0) {
397                void *__tmp = (void *)%s;
398                bpf_probe_read(&__data.v%d, sizeof(__data.v%d), __tmp);
399        }
400                """ % (expr, expr, idx, idx)
401                if field_type in Probe.fmt_types:
402                        return text + "        __data.v%d = (%s)%s;\n" % \
403                                        (idx, Probe.c_type[field_type], expr)
404                self._bail("unrecognized field type %s" % field_type)
405
406        def _generate_usdt_filter_read(self):
407            text = ""
408            if self.probe_type != "u":
409                    return text
410            for arg, _ in Probe.aliases_arg.items():
411                    if not (arg in self.filter):
412                            continue
413                    arg_index = int(arg.replace("arg", ""))
414                    arg_ctype = self.usdt.get_probe_arg_ctype(
415                            self.usdt_name, arg_index - 1)
416                    if not arg_ctype:
417                            self._bail("Unable to determine type of {} "
418                                       "in the filter".format(arg))
419                    text += """
420        {} {}_filter;
421        bpf_usdt_readarg({}, ctx, &{}_filter);
422                    """.format(arg_ctype, arg, arg_index, arg)
423                    self.filter = self.filter.replace(
424                            arg, "{}_filter".format(arg))
425            return text
426
427        def generate_program(self, include_self):
428                data_decl = self._generate_data_decl()
429                if Probe.pid != -1:
430                        pid_filter = """
431        if (__pid != %d) { return 0; }
432                """ % Probe.pid
433                # uprobes can have a built-in tgid filter passed to
434                # attach_uprobe, hence the check here -- for kprobes, we
435                # need to do the tgid test by hand:
436                elif len(self.library) == 0 and Probe.tgid != -1:
437                        pid_filter = """
438        if (__tgid != %d) { return 0; }
439                """ % Probe.tgid
440                elif not include_self:
441                        pid_filter = """
442        if (__tgid == %d) { return 0; }
443                """ % os.getpid()
444                else:
445                        pid_filter = ""
446
447                prefix = ""
448                signature = "struct pt_regs *ctx"
449                if self.signature:
450                        signature += ", " + self.signature
451
452                data_fields = ""
453                for i, expr in enumerate(self.values):
454                        data_fields += self._generate_field_assign(i)
455
456                if self.probe_type == "t":
457                        heading = "TRACEPOINT_PROBE(%s, %s)" % \
458                                  (self.tp_category, self.tp_event)
459                        ctx_name = "args"
460                else:
461                        heading = "int %s(%s)" % (self.probe_name, signature)
462                        ctx_name = "ctx"
463
464                time_str = """
465        __data.timestamp_ns = bpf_ktime_get_ns();""" if self.time_field else ""
466                cpu_str = """
467        __data.cpu = bpf_get_smp_processor_id();""" if self.print_cpu else ""
468                stack_trace = ""
469                if self.user_stack:
470                        stack_trace += """
471        __data.user_stack_id = %s.get_stackid(
472          %s, BPF_F_REUSE_STACKID | BPF_F_USER_STACK
473        );""" % (self.stacks_name, ctx_name)
474                if self.kernel_stack:
475                        stack_trace += """
476        __data.kernel_stack_id = %s.get_stackid(
477          %s, BPF_F_REUSE_STACKID
478        );""" % (self.stacks_name, ctx_name)
479
480                text = heading + """
481{
482        u64 __pid_tgid = bpf_get_current_pid_tgid();
483        u32 __tgid = __pid_tgid >> 32;
484        u32 __pid = __pid_tgid; // implicit cast to u32 for bottom half
485        %s
486        %s
487        %s
488        if (!(%s)) return 0;
489
490        struct %s __data = {0};
491        %s
492        %s
493        __data.tgid = __tgid;
494        __data.pid = __pid;
495        bpf_get_current_comm(&__data.comm, sizeof(__data.comm));
496%s
497%s
498        %s.perf_submit(%s, &__data, sizeof(__data));
499        return 0;
500}
501"""
502                text = text % (pid_filter, prefix,
503                               self._generate_usdt_filter_read(), self.filter,
504                               self.struct_name, time_str, cpu_str, data_fields,
505                               stack_trace, self.events_name, ctx_name)
506
507                return self.streq_functions + data_decl + "\n" + text
508
509        @classmethod
510        def _time_off_str(cls, timestamp_ns):
511                return "%.6f" % (1e-9 * (timestamp_ns - cls.first_ts))
512
513        def _display_function(self):
514                if self.probe_type == 'p' or self.probe_type == 'r':
515                        return self.function
516                elif self.probe_type == 'u':
517                        return self.usdt_name
518                else:   # self.probe_type == 't'
519                        return self.tp_event
520
521        def print_stack(self, bpf, stack_id, tgid):
522            if stack_id < 0:
523                print("        %d" % stack_id)
524                return
525
526            stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
527            for addr in stack:
528                print("        ", end="")
529                if Probe.print_address:
530                    print("%16x " % addr, end="")
531                print("%s" % (bpf.sym(addr, tgid,
532                                     show_module=True, show_offset=True)))
533
534        def _format_message(self, bpf, tgid, values):
535                # Replace each %K with kernel sym and %U with user sym in tgid
536                kernel_placeholders = [i for i, t in enumerate(self.types)
537                                       if t == 'K']
538                user_placeholders = [i for i, t in enumerate(self.types)
539                                     if t == 'U']
540                for kp in kernel_placeholders:
541                        values[kp] = bpf.ksym(values[kp], show_offset=True)
542                for up in user_placeholders:
543                        values[up] = bpf.sym(values[up], tgid,
544                                           show_module=True, show_offset=True)
545                return self.python_format % tuple(values)
546
547        def print_event(self, bpf, cpu, data, size):
548                # Cast as the generated structure type and display
549                # according to the format string in the probe.
550                event = ct.cast(data, ct.POINTER(self.python_struct)).contents
551                values = map(lambda i: getattr(event, "v%d" % i),
552                             range(0, len(self.values)))
553                msg = self._format_message(bpf, event.tgid, values)
554                if Probe.print_time:
555                    time = strftime("%H:%M:%S") if Probe.use_localtime else \
556                           Probe._time_off_str(event.timestamp_ns)
557                    print("%-8s " % time[:8], end="")
558                if Probe.print_cpu:
559                    print("%-3s " % event.cpu, end="")
560                print("%-7d %-7d %-15s %-16s %s" %
561                      (event.tgid, event.pid,
562                       event.comm.decode('utf-8', 'replace'),
563                       self._display_function(), msg))
564
565                if self.kernel_stack:
566                        self.print_stack(bpf, event.kernel_stack_id, -1)
567                if self.user_stack:
568                        self.print_stack(bpf, event.user_stack_id, event.tgid)
569                if self.user_stack or self.kernel_stack:
570                        print("")
571
572                Probe.event_count += 1
573                if Probe.max_events is not None and \
574                   Probe.event_count >= Probe.max_events:
575                        exit()
576
577        def attach(self, bpf, verbose):
578                if len(self.library) == 0:
579                        self._attach_k(bpf)
580                else:
581                        self._attach_u(bpf)
582                self.python_struct = self._generate_python_data_decl()
583                callback = partial(self.print_event, bpf)
584                bpf[self.events_name].open_perf_buffer(callback,
585                        page_cnt=self.page_cnt)
586
587        def _attach_k(self, bpf):
588                if self.probe_type == "r":
589                        bpf.attach_kretprobe(event=self.function,
590                                             fn_name=self.probe_name)
591                elif self.probe_type == "p":
592                        bpf.attach_kprobe(event=self.function,
593                                          fn_name=self.probe_name)
594                # Note that tracepoints don't need an explicit attach
595
596        def _attach_u(self, bpf):
597                libpath = BPF.find_library(self.library)
598                if libpath is None:
599                        # This might be an executable (e.g. 'bash')
600                        libpath = BPF.find_exe(self.library)
601                if libpath is None or len(libpath) == 0:
602                        self._bail("unable to find library %s" % self.library)
603
604                if self.probe_type == "u":
605                        pass    # Was already enabled by the BPF constructor
606                elif self.probe_type == "r":
607                        bpf.attach_uretprobe(name=libpath,
608                                             sym=self.function,
609                                             fn_name=self.probe_name,
610                                             pid=Probe.tgid)
611                else:
612                        bpf.attach_uprobe(name=libpath,
613                                          sym=self.function,
614                                          fn_name=self.probe_name,
615                                          pid=Probe.tgid)
616
617class Tool(object):
618        DEFAULT_PERF_BUFFER_PAGES = 64
619        examples = """
620EXAMPLES:
621
622trace do_sys_open
623        Trace the open syscall and print a default trace message when entered
624trace 'do_sys_open "%s", arg2'
625        Trace the open syscall and print the filename being opened
626trace 'sys_read (arg3 > 20000) "read %d bytes", arg3'
627        Trace the read syscall and print a message for reads >20000 bytes
628trace 'r::do_sys_open "%llx", retval'
629        Trace the return from the open syscall and print the return value
630trace 'c:open (arg2 == 42) "%s %d", arg1, arg2'
631        Trace the open() call from libc only if the flags (arg2) argument is 42
632trace 'c:malloc "size = %d", arg1'
633        Trace malloc calls and print the size being allocated
634trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3'
635        Trace the write() call from libc to monitor writes to STDOUT
636trace 'r::__kmalloc (retval == 0) "kmalloc failed!"'
637        Trace returns from __kmalloc which returned a null pointer
638trace 'r:c:malloc (retval) "allocated = %x", retval'
639        Trace returns from malloc and print non-NULL allocated buffers
640trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
641        Trace the block_rq_complete kernel tracepoint and print # of tx sectors
642trace 'u:pthread:pthread_create (arg4 != 0)'
643        Trace the USDT probe pthread_create when its 4th argument is non-zero
644trace 'p::SyS_nanosleep(struct timespec *ts) "sleep for %lld ns", ts->tv_nsec'
645        Trace the nanosleep syscall and print the sleep duration in ns
646trace -I 'linux/fs.h' \\
647      'p::uprobe_register(struct inode *inode) "a_ops = %llx", inode->i_mapping->a_ops'
648        Trace the uprobe_register inode mapping ops, and the symbol can be found
649        in /proc/kallsyms
650trace -I 'kernel/sched/sched.h' \\
651      'p::__account_cfs_rq_runtime(struct cfs_rq *cfs_rq) "%d", cfs_rq->runtime_remaining'
652        Trace the cfs scheduling runqueue remaining runtime. The struct cfs_rq is defined
653        in kernel/sched/sched.h which is in kernel source tree and not in kernel-devel
654        package.  So this command needs to run at the kernel source tree root directory
655        so that the added header file can be found by the compiler.
656trace -I 'net/sock.h' \\
657      'udpv6_sendmsg(struct sock *sk) (sk->sk_dport == 13568)'
658        Trace udpv6 sendmsg calls only if socket's destination port is equal
659        to 53 (DNS; 13568 in big endian order)
660trace -I 'linux/fs_struct.h' 'mntns_install "users = %d", $task->fs->users'
661        Trace the number of users accessing the file system of the current task
662"""
663
664        def __init__(self):
665                parser = argparse.ArgumentParser(description="Attach to " +
666                  "functions and print trace messages.",
667                  formatter_class=argparse.RawDescriptionHelpFormatter,
668                  epilog=Tool.examples)
669                parser.add_argument("-b", "--buffer-pages", type=int,
670                  default=Tool.DEFAULT_PERF_BUFFER_PAGES,
671                  help="number of pages to use for perf_events ring buffer "
672                       "(default: %(default)d)")
673                # we'll refer to the userspace concepts of "pid" and "tid" by
674                # their kernel names -- tgid and pid -- inside the script
675                parser.add_argument("-p", "--pid", type=int, metavar="PID",
676                  dest="tgid", help="id of the process to trace (optional)")
677                parser.add_argument("-L", "--tid", type=int, metavar="TID",
678                  dest="pid", help="id of the thread to trace (optional)")
679                parser.add_argument("-v", "--verbose", action="store_true",
680                  help="print resulting BPF program code before executing")
681                parser.add_argument("-Z", "--string-size", type=int,
682                  default=80, help="maximum size to read from strings")
683                parser.add_argument("-S", "--include-self",
684                  action="store_true",
685                  help="do not filter trace's own pid from the trace")
686                parser.add_argument("-M", "--max-events", type=int,
687                  help="number of events to print before quitting")
688                parser.add_argument("-t", "--timestamp", action="store_true",
689                  help="print timestamp column (offset from trace start)")
690                parser.add_argument("-T", "--time", action="store_true",
691                  help="print time column")
692                parser.add_argument("-C", "--print_cpu", action="store_true",
693                  help="print CPU id")
694                parser.add_argument("-B", "--bin_cmp", action="store_true",
695                  help="allow to use STRCMP with binary values")
696                parser.add_argument("-K", "--kernel-stack",
697                  action="store_true", help="output kernel stack trace")
698                parser.add_argument("-U", "--user-stack",
699                  action="store_true", help="output user stack trace")
700                parser.add_argument("-a", "--address", action="store_true",
701                  help="print virtual address in stacks")
702                parser.add_argument(metavar="probe", dest="probes", nargs="+",
703                  help="probe specifier (see examples)")
704                parser.add_argument("-I", "--include", action="append",
705                  metavar="header",
706                  help="additional header files to include in the BPF program "
707                       "as either full path, "
708                       "or relative to current working directory, "
709                       "or relative to default kernel header search path")
710                parser.add_argument("--ebpf", action="store_true",
711                  help=argparse.SUPPRESS)
712                self.args = parser.parse_args()
713                if self.args.tgid and self.args.pid:
714                        parser.error("only one of -p and -L may be specified")
715
716        def _create_probes(self):
717                Probe.configure(self.args)
718                self.probes = []
719                for probe_spec in self.args.probes:
720                        self.probes.append(Probe(
721                                probe_spec, self.args.string_size,
722                                self.args.kernel_stack, self.args.user_stack))
723
724        def _generate_program(self):
725                self.program = """
726#include <linux/ptrace.h>
727#include <linux/sched.h>        /* For TASK_COMM_LEN */
728
729"""
730                for include in (self.args.include or []):
731                        if include.startswith((".", "/")):
732                                include = os.path.abspath(include)
733                                self.program += "#include \"%s\"\n" % include
734                        else:
735                                self.program += "#include <%s>\n" % include
736                self.program += BPF.generate_auto_includes(
737                        map(lambda p: p.raw_probe, self.probes))
738                for probe in self.probes:
739                        self.program += probe.generate_program(
740                                        self.args.include_self)
741
742                if self.args.verbose or self.args.ebpf:
743                        print(self.program)
744                        if self.args.ebpf:
745                                exit()
746
747        def _attach_probes(self):
748                usdt_contexts = []
749                for probe in self.probes:
750                    if probe.usdt:
751                        # USDT probes must be enabled before the BPF object
752                        # is initialized, because that's where the actual
753                        # uprobe is being attached.
754                        probe.usdt.enable_probe(
755                                probe.usdt_name, probe.probe_name)
756                        if self.args.verbose:
757                                print(probe.usdt.get_text())
758                        usdt_contexts.append(probe.usdt)
759                self.bpf = BPF(text=self.program, usdt_contexts=usdt_contexts)
760                for probe in self.probes:
761                        if self.args.verbose:
762                                print(probe)
763                        probe.attach(self.bpf, self.args.verbose)
764
765        def _main_loop(self):
766                all_probes_trivial = all(map(Probe.is_default_action,
767                                             self.probes))
768
769                # Print header
770                if self.args.timestamp or self.args.time:
771                    print("%-8s " % "TIME", end="");
772                if self.args.print_cpu:
773                    print("%-3s " % "CPU", end="");
774                print("%-7s %-7s %-15s %-16s %s" %
775                      ("PID", "TID", "COMM", "FUNC",
776                      "-" if not all_probes_trivial else ""))
777
778                while True:
779                        self.bpf.perf_buffer_poll()
780
781        def run(self):
782                try:
783                        self._create_probes()
784                        self._generate_program()
785                        self._attach_probes()
786                        self._main_loop()
787                except:
788                        exc_info = sys.exc_info()
789                        sys_exit = exc_info[0] is SystemExit
790                        if self.args.verbose:
791                                traceback.print_exc()
792                        elif not sys_exit:
793                                print(exc_info[1])
794                        exit(0 if sys_exit else 1)
795
796if __name__ == "__main__":
797        Tool().run()
798