1#!/usr/bin/env python
2#
3# syscount   Summarize syscall counts and latencies.
4#
5# USAGE: syscount [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P] [-l]
6#
7# Copyright 2017, Sasha Goldshtein.
8# Licensed under the Apache License, Version 2.0 (the "License")
9#
10# 15-Feb-2017   Sasha Goldshtein    Created this.
11
12from time import sleep, strftime
13import argparse
14import errno
15import itertools
16import sys
17import signal
18from bcc import BPF
19from bcc.utils import printb
20from bcc.syscall import syscall_name
21
22if sys.version_info.major < 3:
23    izip_longest = itertools.izip_longest
24else:
25    izip_longest = itertools.zip_longest
26
27# signal handler
28def signal_ignore(signal, frame):
29    print()
30
31def handle_errno(errstr):
32    try:
33        return abs(int(errstr))
34    except ValueError:
35        pass
36
37    try:
38        return getattr(errno, errstr)
39    except AttributeError:
40        raise argparse.ArgumentTypeError("couldn't map %s to an errno" % errstr)
41
42
43parser = argparse.ArgumentParser(
44    description="Summarize syscall counts and latencies.")
45parser.add_argument("-p", "--pid", type=int, help="trace only this pid")
46parser.add_argument("-i", "--interval", type=int,
47    help="print summary at this interval (seconds)")
48parser.add_argument("-d", "--duration", type=int,
49    help="total duration of trace, in seconds")
50parser.add_argument("-T", "--top", type=int, default=10,
51    help="print only the top syscalls by count or latency")
52parser.add_argument("-x", "--failures", action="store_true",
53    help="trace only failed syscalls (return < 0)")
54parser.add_argument("-e", "--errno", type=handle_errno,
55    help="trace only syscalls that return this error (numeric or EPERM, etc.)")
56parser.add_argument("-L", "--latency", action="store_true",
57    help="collect syscall latency")
58parser.add_argument("-m", "--milliseconds", action="store_true",
59    help="display latency in milliseconds (default: microseconds)")
60parser.add_argument("-P", "--process", action="store_true",
61    help="count by process and not by syscall")
62parser.add_argument("-l", "--list", action="store_true",
63    help="print list of recognized syscalls and exit")
64parser.add_argument("--ebpf", action="store_true",
65    help=argparse.SUPPRESS)
66args = parser.parse_args()
67if args.duration and not args.interval:
68    args.interval = args.duration
69if not args.interval:
70    args.interval = 99999999
71
72if args.list:
73    for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4):
74        print("   ".join(["%-20s" % s for s in grp if s is not None]))
75    sys.exit(0)
76
77text = """
78#ifdef LATENCY
79struct data_t {
80    u64 count;
81    u64 total_ns;
82};
83
84BPF_HASH(start, u64, u64);
85BPF_HASH(data, u32, struct data_t);
86#else
87BPF_HASH(data, u32, u64);
88#endif
89
90#ifdef LATENCY
91TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
92    u64 pid_tgid = bpf_get_current_pid_tgid();
93
94#ifdef FILTER_PID
95    if (pid_tgid >> 32 != FILTER_PID)
96        return 0;
97#endif
98
99    u64 t = bpf_ktime_get_ns();
100    start.update(&pid_tgid, &t);
101    return 0;
102}
103#endif
104
105TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
106    u64 pid_tgid = bpf_get_current_pid_tgid();
107
108#ifdef FILTER_PID
109    if (pid_tgid >> 32 != FILTER_PID)
110        return 0;
111#endif
112
113#ifdef FILTER_FAILED
114    if (args->ret >= 0)
115        return 0;
116#endif
117
118#ifdef FILTER_ERRNO
119    if (args->ret != -FILTER_ERRNO)
120        return 0;
121#endif
122
123#ifdef BY_PROCESS
124    u32 key = pid_tgid >> 32;
125#else
126    u32 key = args->id;
127#endif
128
129#ifdef LATENCY
130    struct data_t *val, zero = {};
131    u64 *start_ns = start.lookup(&pid_tgid);
132    if (!start_ns)
133        return 0;
134
135    val = data.lookup_or_init(&key, &zero);
136    val->count++;
137    val->total_ns += bpf_ktime_get_ns() - *start_ns;
138#else
139    u64 *val, zero = 0;
140    val = data.lookup_or_init(&key, &zero);
141    ++(*val);
142#endif
143    return 0;
144}
145"""
146
147if args.pid:
148    text = ("#define FILTER_PID %d\n" % args.pid) + text
149if args.failures:
150    text = "#define FILTER_FAILED\n" + text
151if args.errno:
152    text = "#define FILTER_ERRNO %d\n" % abs(args.errno) + text
153if args.latency:
154    text = "#define LATENCY\n" + text
155if args.process:
156    text = "#define BY_PROCESS\n" + text
157if args.ebpf:
158    print(text)
159    exit()
160
161bpf = BPF(text=text)
162
163def print_stats():
164    if args.latency:
165        print_latency_stats()
166    else:
167        print_count_stats()
168
169agg_colname = "PID    COMM" if args.process else "SYSCALL"
170time_colname = "TIME (ms)" if args.milliseconds else "TIME (us)"
171
172def comm_for_pid(pid):
173    try:
174        return open("/proc/%d/comm" % pid, "rb").read().strip()
175    except Exception:
176        return b"[unknown]"
177
178def agg_colval(key):
179    if args.process:
180        return b"%-6d %-15s" % (key.value, comm_for_pid(key.value))
181    else:
182        return syscall_name(key.value)
183
184def print_count_stats():
185    data = bpf["data"]
186    print("[%s]" % strftime("%H:%M:%S"))
187    print("%-22s %8s" % (agg_colname, "COUNT"))
188    for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]:
189        if k.value == 0xFFFFFFFF:
190            continue    # happens occasionally, we don't need it
191        printb(b"%-22s %8d" % (agg_colval(k), v.value))
192    print("")
193    data.clear()
194
195def print_latency_stats():
196    data = bpf["data"]
197    print("[%s]" % strftime("%H:%M:%S"))
198    print("%-22s %8s %16s" % (agg_colname, "COUNT", time_colname))
199    for k, v in sorted(data.items(),
200                       key=lambda kv: -kv[1].total_ns)[:args.top]:
201        if k.value == 0xFFFFFFFF:
202            continue    # happens occasionally, we don't need it
203        printb((b"%-22s %8d " + (b"%16.6f" if args.milliseconds else b"%16.3f")) %
204               (agg_colval(k), v.count,
205                v.total_ns / (1e6 if args.milliseconds else 1e3)))
206    print("")
207    data.clear()
208
209print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." %
210      ("failed " if args.failures else "", args.top))
211exiting = 0 if args.interval else 1
212seconds = 0
213while True:
214    try:
215        sleep(args.interval)
216        seconds += args.interval
217    except KeyboardInterrupt:
218        exiting = 1
219        signal.signal(signal.SIGINT, signal_ignore)
220    if args.duration and seconds >= args.duration:
221        exiting = 1
222
223    print_stats()
224
225    if exiting:
226        print("Detaching...")
227        exit()
228