1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# nfsdist   Summarize NFS operation latency
5#           for Linux, uses BCC and eBPF
6#
7# USAGE: nfsdist [-h] [-T] [-m] [-p PID] [interval] [count]
8#
9# 4-Sep-2017    Samuel Nair     created this
10
11from __future__ import print_function
12from bcc import BPF
13from time import sleep, strftime
14import argparse
15
16# arguments
17examples = """examples:
18    ./nfsdist            # show operation latency as a histogram
19    ./nfsdist -p 181     # trace PID 181 only
20    ./nfsdist 1 10       # print 1 second summaries, 10 times
21    ./nfsdist -m 5       # 5s summaries, milliseconds
22"""
23parser = argparse.ArgumentParser(
24        description="Summarize NFS operation latency",
25        formatter_class=argparse.RawDescriptionHelpFormatter,
26        epilog=examples)
27parser.add_argument("-T", "--notimestamp", action="store_true",
28                    help="don't include timestamp on interval output")
29parser.add_argument("-m", "--milliseconds", action="store_true",
30                    help="output in milliseconds")
31parser.add_argument("-p", "--pid",
32                    help="trace this PID only")
33parser.add_argument("interval", nargs="?",
34                    help="output interval, in seconds")
35parser.add_argument("count", nargs="?", default=99999999,
36                    help="number of outputs")
37parser.add_argument("--ebpf", action="store_true",
38                    help=argparse.SUPPRESS)
39args = parser.parse_args()
40pid = args.pid
41countdown = int(args.count)
42if args.milliseconds:
43    factor = 1000000
44    label = "msecs"
45else:
46    factor = 1000
47    label = "usecs"
48    if args.interval and int(args.interval) == 0:
49        print("ERROR: interval 0. Exiting.")
50        exit()
51debug = 0
52
53# define BPF program
54bpf_text = """
55#include <uapi/linux/ptrace.h>
56#include <linux/fs.h>
57#include <linux/sched.h>
58
59#define OP_NAME_LEN 8
60typedef struct dist_key {
61    char op[OP_NAME_LEN];
62    u64 slot;
63} dist_key_t;
64
65BPF_HASH(start, u32);
66BPF_HISTOGRAM(dist, dist_key_t);
67
68// time operation
69int trace_entry(struct pt_regs *ctx)
70{
71    u32 pid = bpf_get_current_pid_tgid();
72    if (FILTER_PID)
73        return 0;
74    u64 ts = bpf_ktime_get_ns();
75    start.update(&pid, &ts);
76    return 0;
77}
78
79static int trace_return(struct pt_regs *ctx, const char *op)
80{
81    u64 *tsp;
82    u32 pid = bpf_get_current_pid_tgid();
83
84    // fetch timestamp and calculate delta
85    tsp = start.lookup(&pid);
86    if (tsp == 0) {
87        return 0;   // missed start or filtered
88    }
89    u64 delta = (bpf_ktime_get_ns() - *tsp) / FACTOR;
90
91    // store as histogram
92    dist_key_t key = {.slot = bpf_log2l(delta)};
93    __builtin_memcpy(&key.op, op, sizeof(key.op));
94    dist.increment(key);
95
96    start.delete(&pid);
97    return 0;
98}
99
100int trace_read_return(struct pt_regs *ctx)
101{
102    char *op = "read";
103    return trace_return(ctx, op);
104}
105
106int trace_write_return(struct pt_regs *ctx)
107{
108    char *op = "write";
109    return trace_return(ctx, op);
110}
111
112int trace_open_return(struct pt_regs *ctx)
113{
114    char *op = "open";
115    return trace_return(ctx, op);
116}
117
118int trace_getattr_return(struct pt_regs *ctx)
119{
120    char *op = "getattr";
121    return trace_return(ctx, op);
122}
123"""
124bpf_text = bpf_text.replace('FACTOR', str(factor))
125if args.pid:
126    bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
127else:
128    bpf_text = bpf_text.replace('FILTER_PID', '0')
129if debug or args.ebpf:
130    print(bpf_text)
131    if args.ebpf:
132        exit()
133
134# load BPF program
135b = BPF(text=bpf_text)
136
137# common file functions
138b.attach_kprobe(event="nfs_file_read", fn_name="trace_entry")
139b.attach_kprobe(event="nfs_file_write", fn_name="trace_entry")
140b.attach_kprobe(event="nfs4_file_open", fn_name="trace_entry")
141b.attach_kprobe(event="nfs_file_open", fn_name="trace_entry")
142b.attach_kprobe(event="nfs_getattr", fn_name="trace_entry")
143
144b.attach_kretprobe(event="nfs_file_read", fn_name="trace_read_return")
145b.attach_kretprobe(event="nfs_file_write", fn_name="trace_write_return")
146b.attach_kretprobe(event="nfs4_file_open", fn_name="trace_open_return")
147b.attach_kretprobe(event="nfs_file_open", fn_name="trace_open_return")
148b.attach_kretprobe(event="nfs_getattr", fn_name="trace_getattr_return")
149
150print("Tracing NFS operation latency... Hit Ctrl-C to end.")
151
152# output
153exiting = 0
154dist = b.get_table("dist")
155while (1):
156    try:
157        if args.interval:
158            sleep(int(args.interval))
159        else:
160            sleep(99999999)
161    except KeyboardInterrupt:
162        exiting = 1
163
164    print()
165    if args.interval and (not args.notimestamp):
166        print(strftime("%H:%M:%S:"))
167
168    dist.print_log2_hist(label, "operation", section_print_fn=bytes.decode)
169    dist.clear()
170
171    countdown -= 1
172    if exiting or countdown == 0:
173        exit()
174