1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# filetop  file reads and writes by process.
5#          For Linux, uses BCC, eBPF.
6#
7# USAGE: filetop.py [-h] [-C] [-r MAXROWS] [interval] [count]
8#
9# This uses in-kernel eBPF maps to store per process summaries for efficiency.
10#
11# Copyright 2016 Netflix, Inc.
12# Licensed under the Apache License, Version 2.0 (the "License")
13#
14# 06-Feb-2016   Brendan Gregg   Created this.
15
16from __future__ import print_function
17from bcc import BPF
18from time import sleep, strftime
19import argparse
20import signal
21from subprocess import call
22
23# arguments
24examples = """examples:
25    ./filetop            # file I/O top, 1 second refresh
26    ./filetop -C         # don't clear the screen
27    ./filetop -p 181     # PID 181 only
28    ./filetop 5          # 5 second summaries
29    ./filetop 5 10       # 5 second summaries, 10 times only
30"""
31parser = argparse.ArgumentParser(
32    description="File reads and writes by process",
33    formatter_class=argparse.RawDescriptionHelpFormatter,
34    epilog=examples)
35parser.add_argument("-a", "--all-files", action="store_true",
36    help="include non-regular file types (sockets, FIFOs, etc)")
37parser.add_argument("-C", "--noclear", action="store_true",
38    help="don't clear the screen")
39parser.add_argument("-r", "--maxrows", default=20,
40    help="maximum rows to print, default 20")
41parser.add_argument("-s", "--sort", default="rbytes",
42    choices=["reads", "writes", "rbytes", "wbytes"],
43    help="sort column, default rbytes")
44parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
45    help="trace this PID only")
46parser.add_argument("interval", nargs="?", default=1,
47    help="output interval, in seconds")
48parser.add_argument("count", nargs="?", default=99999999,
49    help="number of outputs")
50parser.add_argument("--ebpf", action="store_true",
51    help=argparse.SUPPRESS)
52args = parser.parse_args()
53interval = int(args.interval)
54countdown = int(args.count)
55maxrows = int(args.maxrows)
56clear = not int(args.noclear)
57debug = 0
58
59# linux stats
60loadavg = "/proc/loadavg"
61
62# signal handler
63def signal_ignore(signal_value, frame):
64    print()
65
66# define BPF program
67bpf_text = """
68#include <uapi/linux/ptrace.h>
69#include <linux/blkdev.h>
70
71// the key for the output summary
72struct info_t {
73    u32 pid;
74    u32 name_len;
75    char comm[TASK_COMM_LEN];
76    // de->d_name.name may point to de->d_iname so limit len accordingly
77    char name[DNAME_INLINE_LEN];
78    char type;
79};
80
81// the value of the output summary
82struct val_t {
83    u64 reads;
84    u64 writes;
85    u64 rbytes;
86    u64 wbytes;
87};
88
89BPF_HASH(counts, struct info_t, struct val_t);
90
91static int do_entry(struct pt_regs *ctx, struct file *file,
92    char __user *buf, size_t count, int is_read)
93{
94    u32 tgid = bpf_get_current_pid_tgid() >> 32;
95    if (TGID_FILTER)
96        return 0;
97
98    u32 pid = bpf_get_current_pid_tgid();
99
100    // skip I/O lacking a filename
101    struct dentry *de = file->f_path.dentry;
102    int mode = file->f_inode->i_mode;
103    struct qstr d_name = de->d_name;
104    if (d_name.len == 0 || TYPE_FILTER)
105        return 0;
106
107    // store counts and sizes by pid & file
108    struct info_t info = {.pid = pid};
109    bpf_get_current_comm(&info.comm, sizeof(info.comm));
110    info.name_len = d_name.len;
111    bpf_probe_read(&info.name, sizeof(info.name), d_name.name);
112    if (S_ISREG(mode)) {
113        info.type = 'R';
114    } else if (S_ISSOCK(mode)) {
115        info.type = 'S';
116    } else {
117        info.type = 'O';
118    }
119
120    struct val_t *valp, zero = {};
121    valp = counts.lookup_or_init(&info, &zero);
122    if (is_read) {
123        valp->reads++;
124        valp->rbytes += count;
125    } else {
126        valp->writes++;
127        valp->wbytes += count;
128    }
129
130    return 0;
131}
132
133int trace_read_entry(struct pt_regs *ctx, struct file *file,
134    char __user *buf, size_t count)
135{
136    return do_entry(ctx, file, buf, count, 1);
137}
138
139int trace_write_entry(struct pt_regs *ctx, struct file *file,
140    char __user *buf, size_t count)
141{
142    return do_entry(ctx, file, buf, count, 0);
143}
144
145"""
146if args.tgid:
147    bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
148else:
149    bpf_text = bpf_text.replace('TGID_FILTER', '0')
150if args.all_files:
151    bpf_text = bpf_text.replace('TYPE_FILTER', '0')
152else:
153    bpf_text = bpf_text.replace('TYPE_FILTER', '!S_ISREG(mode)')
154
155if debug or args.ebpf:
156    print(bpf_text)
157    if args.ebpf:
158        exit()
159
160# initialize BPF
161b = BPF(text=bpf_text)
162b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry")
163b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")
164
165DNAME_INLINE_LEN = 32  # linux/dcache.h
166
167print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)
168
169# output
170exiting = 0
171while 1:
172    try:
173        sleep(interval)
174    except KeyboardInterrupt:
175        exiting = 1
176
177    # header
178    if clear:
179        call("clear")
180    else:
181        print()
182    with open(loadavg) as stats:
183        print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
184    print("%-6s %-16s %-6s %-6s %-7s %-7s %1s %s" % ("TID", "COMM",
185        "READS", "WRITES", "R_Kb", "W_Kb", "T", "FILE"))
186
187    # by-TID output
188    counts = b.get_table("counts")
189    line = 0
190    for k, v in reversed(sorted(counts.items(),
191                                key=lambda counts:
192                                  getattr(counts[1], args.sort))):
193        name = k.name.decode('utf-8', 'replace')
194        if k.name_len > DNAME_INLINE_LEN:
195            name = name[:-3] + "..."
196
197        # print line
198        print("%-6d %-16s %-6d %-6d %-7d %-7d %1s %s" % (k.pid,
199            k.comm.decode('utf-8', 'replace'), v.reads, v.writes,
200            v.rbytes / 1024, v.wbytes / 1024,
201            k.type.decode('utf-8', 'replace'), name))
202
203        line += 1
204        if line >= maxrows:
205            break
206    counts.clear()
207
208    countdown -= 1
209    if exiting or countdown == 0:
210        print("Detaching...")
211        exit()
212