1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# cachetop Count cache kernel function calls per processes 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: cachetop 8# Taken from cachestat by Brendan Gregg 9# 10# Copyright (c) 2016-present, Facebook, Inc. 11# Licensed under the Apache License, Version 2.0 (the "License") 12# 13# 13-Jul-2016 Emmanuel Bretelle first version 14 15from __future__ import absolute_import 16from __future__ import division 17# Do not import unicode_literals until #623 is fixed 18# from __future__ import unicode_literals 19from __future__ import print_function 20 21from bcc import BPF 22from collections import defaultdict 23from time import strftime 24 25import argparse 26import curses 27import pwd 28import re 29import signal 30from time import sleep 31 32FIELDS = ( 33 "PID", 34 "UID", 35 "CMD", 36 "HITS", 37 "MISSES", 38 "DIRTIES", 39 "READ_HIT%", 40 "WRITE_HIT%" 41) 42DEFAULT_FIELD = "HITS" 43 44 45# signal handler 46def signal_ignore(signal, frame): 47 print() 48 49 50# Function to gather data from /proc/meminfo 51# return dictionary for quicker lookup of both values 52def get_meminfo(): 53 result = {} 54 55 for line in open('/proc/meminfo'): 56 k = line.split(':', 3) 57 v = k[1].split() 58 result[k[0]] = int(v[0]) 59 return result 60 61 62def get_processes_stats( 63 bpf, 64 sort_field=FIELDS.index(DEFAULT_FIELD), 65 sort_reverse=False): 66 ''' 67 Return a tuple containing: 68 buffer 69 cached 70 list of tuple with per process cache stats 71 ''' 72 counts = bpf.get_table("counts") 73 stats = defaultdict(lambda: defaultdict(int)) 74 for k, v in counts.items(): 75 stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.ip] = v.value 76 stats_list = [] 77 78 for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): 79 rtaccess = 0 80 wtaccess = 0 81 mpa = 0 82 mbd = 0 83 apcl = 0 84 apd = 0 85 access = 0 86 misses = 0 87 rhits = 0 88 whits = 0 89 90 for k, v in count.items(): 91 if re.match(b'mark_page_accessed', bpf.ksym(k)) is not None: 92 mpa = max(0, v) 93 94 if re.match(b'mark_buffer_dirty', bpf.ksym(k)) is not None: 95 mbd = max(0, v) 96 97 if re.match(b'add_to_page_cache_lru', bpf.ksym(k)) is not None: 98 apcl = max(0, v) 99 100 if re.match(b'account_page_dirtied', bpf.ksym(k)) is not None: 101 apd = max(0, v) 102 103 # access = total cache access incl. reads(mpa) and writes(mbd) 104 # misses = total of add to lru which we do when we write(mbd) 105 # and also the mark the page dirty(same as mbd) 106 access = (mpa + mbd) 107 misses = (apcl + apd) 108 109 # rtaccess is the read hit % during the sample period. 110 # wtaccess is the write hit % during the smaple period. 111 if mpa > 0: 112 rtaccess = float(mpa) / (access + misses) 113 if apcl > 0: 114 wtaccess = float(apcl) / (access + misses) 115 116 if wtaccess != 0: 117 whits = 100 * wtaccess 118 if rtaccess != 0: 119 rhits = 100 * rtaccess 120 121 _pid, uid, comm = pid.split('-', 2) 122 stats_list.append( 123 (int(_pid), uid, comm, 124 access, misses, mbd, 125 rhits, whits)) 126 127 stats_list = sorted( 128 stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse 129 ) 130 counts.clear() 131 return stats_list 132 133 134def handle_loop(stdscr, args): 135 # don't wait on key press 136 stdscr.nodelay(1) 137 # set default sorting field 138 sort_field = FIELDS.index(DEFAULT_FIELD) 139 sort_reverse = False 140 141 # load BPF program 142 bpf_text = """ 143 144 #include <uapi/linux/ptrace.h> 145 struct key_t { 146 u64 ip; 147 u32 pid; 148 u32 uid; 149 char comm[16]; 150 }; 151 152 BPF_HASH(counts, struct key_t); 153 154 int do_count(struct pt_regs *ctx) { 155 struct key_t key = {}; 156 u64 pid = bpf_get_current_pid_tgid(); 157 u32 uid = bpf_get_current_uid_gid(); 158 159 key.ip = PT_REGS_IP(ctx); 160 key.pid = pid & 0xFFFFFFFF; 161 key.uid = uid & 0xFFFFFFFF; 162 bpf_get_current_comm(&(key.comm), 16); 163 164 counts.increment(key); 165 return 0; 166 } 167 168 """ 169 b = BPF(text=bpf_text) 170 b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") 171 b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") 172 b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") 173 b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") 174 175 exiting = 0 176 177 while 1: 178 s = stdscr.getch() 179 if s == ord('q'): 180 exiting = 1 181 elif s == ord('r'): 182 sort_reverse = not sort_reverse 183 elif s == ord('<'): 184 sort_field = max(0, sort_field - 1) 185 elif s == ord('>'): 186 sort_field = min(len(FIELDS) - 1, sort_field + 1) 187 try: 188 sleep(args.interval) 189 except KeyboardInterrupt: 190 exiting = 1 191 # as cleanup can take many seconds, trap Ctrl-C: 192 signal.signal(signal.SIGINT, signal_ignore) 193 194 # Get memory info 195 mem = get_meminfo() 196 cached = int(mem["Cached"]) / 1024 197 buff = int(mem["Buffers"]) / 1024 198 199 process_stats = get_processes_stats( 200 b, 201 sort_field=sort_field, 202 sort_reverse=sort_reverse) 203 stdscr.clear() 204 stdscr.addstr( 205 0, 0, 206 "%-8s Buffers MB: %.0f / Cached MB: %.0f " 207 "/ Sort: %s / Order: %s" % ( 208 strftime("%H:%M:%S"), buff, cached, FIELDS[sort_field], 209 sort_reverse and "descending" or "ascending" 210 ) 211 ) 212 213 # header 214 stdscr.addstr( 215 1, 0, 216 "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( 217 *FIELDS 218 ), 219 curses.A_REVERSE 220 ) 221 (height, width) = stdscr.getmaxyx() 222 for i, stat in enumerate(process_stats): 223 uid = int(stat[1]) 224 try: 225 username = pwd.getpwuid(uid)[0] 226 except KeyError as ex: 227 # `pwd` throws a KeyError if the user cannot be found. This can 228 # happen e.g. when the process is running in a cgroup that has 229 # different users from the host. 230 username = 'UNKNOWN({})'.format(uid) 231 232 stdscr.addstr( 233 i + 2, 0, 234 "{0:8} {username:8.8} {2:16} {3:8} {4:8} " 235 "{5:8} {6:9.1f}% {7:9.1f}%".format( 236 *stat, username=username 237 ) 238 ) 239 if i > height - 4: 240 break 241 stdscr.refresh() 242 if exiting: 243 print("Detaching...") 244 return 245 246 247def parse_arguments(): 248 parser = argparse.ArgumentParser( 249 description='show Linux page cache hit/miss statistics including read ' 250 'and write hit % per processes in a UI like top.' 251 ) 252 parser.add_argument( 253 'interval', type=int, default=5, nargs='?', 254 help='Interval between probes.' 255 ) 256 257 args = parser.parse_args() 258 return args 259 260args = parse_arguments() 261curses.wrapper(handle_loop, args) 262