1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# dcstat   Directory entry cache (dcache) stats.
5#          For Linux, uses BCC, eBPF.
6#
7# USAGE: dcstat [interval [count]]
8#
9# This uses kernel dynamic tracing of kernel functions, lookup_fast() and
10# d_lookup(), which will need to be modified to match kernel changes. See
11# code comments.
12#
13# Copyright 2016 Netflix, Inc.
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 09-Feb-2016   Brendan Gregg   Created this.
17
18from __future__ import print_function
19from bcc import BPF
20from ctypes import c_int
21from time import sleep, strftime
22from sys import argv
23
24def usage():
25    print("USAGE: %s [interval [count]]" % argv[0])
26    exit()
27
28# arguments
29interval = 1
30count = -1
31if len(argv) > 1:
32    try:
33        interval = int(argv[1])
34        if interval == 0:
35            raise
36        if len(argv) > 2:
37            count = int(argv[2])
38    except:  # also catches -h, --help
39        usage()
40
41# define BPF program
42bpf_text = """
43#include <uapi/linux/ptrace.h>
44
45enum stats {
46    S_REFS = 1,
47    S_SLOW,
48    S_MISS,
49    S_MAXSTAT
50};
51
52BPF_ARRAY(stats, u64, S_MAXSTAT);
53
54/*
55 * How this is instrumented, and how to interpret the statistics, is very much
56 * tied to the current kernel implementation (this was written on Linux 4.4).
57 * This will need maintenance to keep working as the implementation changes. To
58 * aid future adventurers, this is is what the current code does, and why.
59 *
60 * First problem: the current implementation takes a path and then does a
61 * lookup of each component. So how do we count a reference? Once for the path
62 * lookup, or once for every component lookup? I've chosen the latter
63 * since it seems to map more closely to actual dcache lookups (via
64 * __d_lookup_rcu()). It's counted via calls to lookup_fast().
65 *
66 * The implementation tries different, progressively slower, approaches to
67 * lookup a file. At what point do we call it a dcache miss? I've chosen when
68 * a d_lookup() (which is called during lookup_slow()) returns zero.
69 *
70 * I've also included a "SLOW" statistic to show how often the fast lookup
71 * failed. Whether this exists or is interesting is an implementation detail,
72 * and the "SLOW" statistic may be removed in future versions.
73 */
74void count_fast(struct pt_regs *ctx) {
75    int key = S_REFS;
76    u64 *leaf = stats.lookup(&key);
77    if (leaf) (*leaf)++;
78}
79
80void count_lookup(struct pt_regs *ctx) {
81    int key = S_SLOW;
82    u64 *leaf = stats.lookup(&key);
83    if (leaf) (*leaf)++;
84    if (PT_REGS_RC(ctx) == 0) {
85        key = S_MISS;
86        leaf = stats.lookup(&key);
87        if (leaf) (*leaf)++;
88    }
89}
90"""
91
92# load BPF program
93b = BPF(text=bpf_text)
94b.attach_kprobe(event="lookup_fast", fn_name="count_fast")
95b.attach_kretprobe(event="d_lookup", fn_name="count_lookup")
96
97# stat column labels and indexes
98stats = {
99    "REFS": 1,
100    "SLOW": 2,
101    "MISS": 3
102}
103
104# header
105print("%-8s  " % "TIME", end="")
106for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
107    print(" %8s" % (stype + "/s"), end="")
108print(" %8s" % "HIT%")
109
110# output
111i = 0
112while (1):
113    if count > 0:
114        i += 1
115        if i > count:
116            exit()
117    try:
118        sleep(interval)
119    except KeyboardInterrupt:
120        pass
121        exit()
122
123    print("%-8s: " % strftime("%H:%M:%S"), end="")
124
125    # print each statistic as a column
126    for stype, idx in sorted(stats.items(), key=lambda k_v: (k_v[1], k_v[0])):
127        try:
128            val = b["stats"][c_int(idx)].value / interval
129            print(" %8d" % val, end="")
130        except:
131            print(" %8d" % 0, end="")
132
133    # print hit ratio percentage
134    try:
135        ref = b["stats"][c_int(stats["REFS"])].value
136        miss = b["stats"][c_int(stats["MISS"])].value
137        hit = ref - miss
138        pct = float(100) * hit / ref
139        print(" %8.2f" % pct)
140    except:
141        print(" %7s%%" % "-")
142
143    b["stats"].clear()
144