1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# biolatency    Summarize block device I/O latency as a histogram.
5#       For Linux, uses BCC, eBPF.
6#
7# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [interval] [count]
8#
9# Copyright (c) 2015 Brendan Gregg.
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
12# 20-Sep-2015   Brendan Gregg   Created this.
13
14from __future__ import print_function
15from bcc import BPF
16from time import sleep, strftime
17import argparse
18
19# arguments
20examples = """examples:
21    ./biolatency            # summarize block I/O latency as a histogram
22    ./biolatency 1 10       # print 1 second summaries, 10 times
23    ./biolatency -mT 1      # 1s summaries, milliseconds, and timestamps
24    ./biolatency -Q         # include OS queued time in I/O time
25    ./biolatency -D         # show each disk device separately
26"""
27parser = argparse.ArgumentParser(
28    description="Summarize block device I/O latency as a histogram",
29    formatter_class=argparse.RawDescriptionHelpFormatter,
30    epilog=examples)
31parser.add_argument("-T", "--timestamp", action="store_true",
32    help="include timestamp on output")
33parser.add_argument("-Q", "--queued", action="store_true",
34    help="include OS queued time in I/O time")
35parser.add_argument("-m", "--milliseconds", action="store_true",
36    help="millisecond histogram")
37parser.add_argument("-D", "--disks", action="store_true",
38    help="print a histogram per disk device")
39parser.add_argument("interval", nargs="?", default=99999999,
40    help="output interval, in seconds")
41parser.add_argument("count", nargs="?", default=99999999,
42    help="number of outputs")
43parser.add_argument("--ebpf", action="store_true",
44    help=argparse.SUPPRESS)
45args = parser.parse_args()
46countdown = int(args.count)
47debug = 0
48
49# define BPF program
50bpf_text = """
51#include <uapi/linux/ptrace.h>
52#include <linux/blkdev.h>
53
54typedef struct disk_key {
55    char disk[DISK_NAME_LEN];
56    u64 slot;
57} disk_key_t;
58BPF_HASH(start, struct request *);
59STORAGE
60
61// time block I/O
62int trace_req_start(struct pt_regs *ctx, struct request *req)
63{
64    u64 ts = bpf_ktime_get_ns();
65    start.update(&req, &ts);
66    return 0;
67}
68
69// output
70int trace_req_done(struct pt_regs *ctx, struct request *req)
71{
72    u64 *tsp, delta;
73
74    // fetch timestamp and calculate delta
75    tsp = start.lookup(&req);
76    if (tsp == 0) {
77        return 0;   // missed issue
78    }
79    delta = bpf_ktime_get_ns() - *tsp;
80    FACTOR
81
82    // store as histogram
83    STORE
84
85    start.delete(&req);
86    return 0;
87}
88"""
89
90# code substitutions
91if args.milliseconds:
92    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
93    label = "msecs"
94else:
95    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
96    label = "usecs"
97if args.disks:
98    bpf_text = bpf_text.replace('STORAGE',
99        'BPF_HISTOGRAM(dist, disk_key_t);')
100    bpf_text = bpf_text.replace('STORE',
101        'disk_key_t key = {.slot = bpf_log2l(delta)}; ' +
102        'void *__tmp = (void *)req->rq_disk->disk_name; ' +
103        'bpf_probe_read(&key.disk, sizeof(key.disk), __tmp); ' +
104        'dist.increment(key);')
105else:
106    bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);')
107    bpf_text = bpf_text.replace('STORE',
108        'dist.increment(bpf_log2l(delta));')
109if debug or args.ebpf:
110    print(bpf_text)
111    if args.ebpf:
112        exit()
113
114# load BPF program
115b = BPF(text=bpf_text)
116if args.queued:
117    b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
118else:
119    if BPF.get_kprobe_functions(b'blk_start_request'):
120        b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
121    b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
122b.attach_kprobe(event="blk_account_io_done",
123    fn_name="trace_req_done")
124
125print("Tracing block device I/O... Hit Ctrl-C to end.")
126
127# output
128exiting = 0 if args.interval else 1
129dist = b.get_table("dist")
130while (1):
131    try:
132        sleep(int(args.interval))
133    except KeyboardInterrupt:
134        exiting = 1
135
136    print()
137    if args.timestamp:
138        print("%-8s\n" % strftime("%H:%M:%S"), end="")
139
140    dist.print_log2_hist(label, "disk")
141    dist.clear()
142
143    countdown -= 1
144    if exiting or countdown == 0:
145        exit()
146