1 #!/usr/bin/python 2 # 3 # lockstat Trace and display lock contention stats 4 # 5 # USAGE: lockstat 6 7 # Licensed under the Apache License, Version 2.0 (the "License") 8 # 28-Jul-2017 Gisle Dankel Created this. 9 10 from bcc import BPF 11 from ctypes import c_int 12 from time import sleep 13 from datetime import datetime 14 import argparse 15 import subprocess 16 import os 17 18 # One Lock object per TGID and uaddr. 19 class Lock(object): 20 def __init__(self): 21 self.contention_count = 0 22 self.elapsed_blocked = 0 23 self.thread_count = 0 24 self.last_stack_syms = [] 25 26 def update(self, count, block_time, last_stack_syms): 27 self.contention_count += count 28 self.elapsed_blocked += block_time 29 self.thread_count += 1 30 self.last_stack_syms = last_stack_syms 31 32 def run_command_get_pid(command): 33 p = subprocess.Popen(command.split()) 34 return p.pid 35 36 examples = """ 37 EXAMPLES: 38 39 ./lockstat 40 Trace calls to sys_futex and display contented locks every 5 seconds 41 for all processes running on the system 42 ./lockstat -p <pid> 43 Trace only for the specified pid and display contended locks 44 every 5 seconds 45 ./lockstat -p <pid> -t 46 Trace for a specified pid and print a message on each entry and exit to 47 sys_futex until interrupted or killed 48 ./lockstat -p <pid> 10 49 Trace the specified pid and show a message every 10 seconds 50 ./lockstat -c <command> 1 30 51 Run the specified command and display contended locks every 1 second 52 for a total of 30 times 53 """ 54 55 description = """ 56 Trace kernel futex events. 57 These often occur because of lock contention, e.g. involving a pthread_mutex. 58 This script resemblers the following SystemTap example: 59 https://sourceware.org/systemtap/SystemTap_Beginners_Guide/futexcontentionsect.html 60 """ 61 62 parser = argparse.ArgumentParser(description=description, 63 formatter_class=argparse.RawDescriptionHelpFormatter, 64 epilog=examples) 65 parser.add_argument("-p", "--pid", type=int, default=-1, 66 help="the PID to trace; if not specified, trace all") 67 parser.add_argument("-t", "--trace", action="store_true", 68 help="print trace messages for each futex enter/exit") 69 parser.add_argument("interval", nargs="?", default=5, type=int, 70 help="interval in seconds to print summary") 71 parser.add_argument("count", nargs="?", type=int, 72 help="number of times to print the report before exiting") 73 parser.add_argument("-c", "--command", 74 help="execute and trace the specified command") 75 76 args = parser.parse_args() 77 78 pid = args.pid 79 command = args.command 80 interval = args.interval 81 num_prints = args.count 82 trace_all = args.trace 83 84 if command is not None: 85 print("Executing '%s' and tracing the resulting process." % command) 86 pid = run_command_get_pid(command) 87 88 bpf_source = """ 89 #include <uapi/linux/futex.h> 90 #include <uapi/linux/ptrace.h> 91 #include <linux/sched.h> 92 #include <linux/time.h> 93 94 struct comm_t { 95 char name[TASK_COMM_LEN]; 96 }; 97 98 struct lock_key_t { 99 u64 uaddr; 100 u32 pid; 101 u32 tgid; 102 }; 103 104 struct lock_info_t { 105 u64 elapsed_blocked; 106 u64 contention_count; 107 u64 sid; 108 }; 109 110 BPF_HASH(pid_lock, u32, u64); 111 BPF_HASH(pid_blocktime, u32, u64); 112 BPF_HASH(tgid_comm, u32, struct comm_t); 113 BPF_HASH(lock_stats, struct lock_key_t, struct lock_info_t, 1000000); 114 BPF_STACK_TRACE(stack_traces, 16384); 115 116 static inline int update_stats(u32 pid, u32 tgid, u64 uaddr, u64 block_time, u64 sid) { 117 struct lock_key_t key = {}; 118 struct lock_info_t zero = {}; 119 struct lock_info_t *info; 120 121 key.pid = pid; 122 key.tgid = tgid; 123 key.uaddr = uaddr; 124 info = lock_stats.lookup_or_init(&key, &zero); 125 info->elapsed_blocked += block_time; 126 info->contention_count++; 127 info->sid = sid; 128 129 if (0 == tgid_comm.lookup(&tgid)) { 130 struct comm_t comm; 131 bpf_get_current_comm(&comm.name, sizeof(comm.name)); 132 tgid_comm.update(&tgid, &comm); 133 } 134 return 0; 135 } 136 137 // FIXME: Should attach to sys_enter_futex and sys_exit_futex tracepoints here, 138 // but that does not currently work 139 int sys_futex_enter(struct pt_regs *ctx, u32 *uaddr, int op, u32 val, 140 struct timespec *utime, u32 *uaddr2, u32 val3) { 141 int cmd = op & FUTEX_CMD_MASK; 142 if (cmd != FUTEX_WAIT) 143 return 0; 144 145 u64 pid_tgid = bpf_get_current_pid_tgid(); 146 u32 pid = pid_tgid; 147 u32 tgid = pid_tgid >> 32; 148 149 if (!(THREAD_FILTER)) 150 return 0; 151 152 u64 timestamp = bpf_ktime_get_ns(); 153 u64 uaddr64 = (u64) uaddr; 154 pid_lock.update(&pid, &uaddr64); 155 pid_blocktime.update(&pid, ×tamp); 156 157 if (SHOULD_PRINT) 158 bpf_trace_printk("enter sys_futex, pid = %u, uaddr = %x, " 159 "cmd = %u\\n", pid, uaddr64, cmd); 160 return 0; 161 } 162 163 int sys_futex_exit(struct pt_regs *ctx) { 164 u64 pid_tgid = bpf_get_current_pid_tgid(); 165 u32 pid = pid_tgid; 166 u32 tgid = pid_tgid >> 32; 167 if (!(THREAD_FILTER)) 168 return 0; 169 170 u64 *blocktime = pid_blocktime.lookup(&pid); 171 u64 *uaddr = pid_lock.lookup(&pid); 172 u64 timestamp = bpf_ktime_get_ns(); 173 u64 elapsed; 174 u64 sid; 175 176 if (blocktime == 0 || uaddr == 0) 177 return 0; // not FUTEX_WAIT, or (less likely) missed futex_enter 178 179 elapsed = timestamp - *blocktime; 180 181 sid = stack_traces.get_stackid(ctx, BPF_F_USER_STACK); 182 update_stats(pid, tgid, *uaddr, elapsed, sid); 183 pid_lock.delete(&pid); 184 pid_blocktime.delete(&pid); 185 186 if (SHOULD_PRINT) { 187 bpf_trace_printk("exit sys_futex, uaddr = %x, elapsed = %uns\\n", 188 uaddr == 0 ? 0 : *uaddr, elapsed); 189 } 190 return 0; 191 } 192 193 """ 194 195 bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") 196 197 thread_filter = '1' 198 if pid != -1: 199 print("Tracing pid %d, Ctrl+C to quit." % pid) 200 # 'tgid' in kernel space is what people thin of as 'pid' in userspace 201 thread_filter = "tgid == %d" % pid 202 else: 203 print("Tracing all processes, Ctrl+C to quit.") 204 205 bpf_source = bpf_source.replace("THREAD_FILTER", thread_filter) 206 207 bpf_program = BPF(text=bpf_source) 208 bpf_program.attach_kprobe(event="SyS_futex", fn_name="sys_futex_enter") 209 bpf_program.attach_kretprobe(event="SyS_futex", fn_name="sys_futex_exit") 210 211 def get_syms(stack, pid): 212 global bpf_program 213 syms = [] 214 for addr in stack: 215 s = bpf_program.sym(addr, pid, show_offset=True) 216 syms.append(s) 217 return syms 218 219 def print_syms(syms): 220 print("=========") 221 for f in syms: 222 print(f) 223 print("=========") 224 225 def is_android_monitor_lock(syms): 226 for s in syms: 227 if 'art::Monitor::Lock' in s: 228 return True 229 return False 230 231 def disp_stack(stack, pid): 232 for addr in stack: 233 s = bpf_program.sym(addr, pid, show_offset=True) 234 235 def create_tgid_stats(): 236 global bpf_program 237 stats = bpf_program["lock_stats"] 238 res = {} 239 stack_traces = bpf_program['stack_traces'] 240 for key, val in stats.items(): 241 # Only display Android monitor locks 242 if val.sid >= 0: 243 ust = stack_traces.walk(val.sid) 244 syms = get_syms(ust, key.pid) 245 if not is_android_monitor_lock(syms): 246 continue 247 else: 248 continue 249 250 if not key.tgid in res: 251 res[key.tgid] = {} 252 if not key.uaddr in res[key.tgid]: 253 res[key.tgid][key.uaddr] = Lock() 254 255 lock = res[key.tgid][key.uaddr] 256 lock.update(val.contention_count, val.elapsed_blocked, syms) 257 return res 258 259 def print_comm_stats(stats): 260 if stats == {}: 261 return 262 263 comms = bpf_program["tgid_comm"] 264 print("\n%s:" % (datetime.now().strftime("%H:%M:%S"))) 265 for tgid, locks in stats.items(): 266 comm = comms[c_int(tgid)].name 267 print("\n %s (%d):" % (comm, tgid)) 268 sorted_locks = sorted(locks.items(), 269 key=lambda x: x[1].elapsed_blocked, 270 reverse=True) 271 for addr, stats in sorted_locks: 272 print(" %x: %dms (%d contentions involving %d threads, avg %dus)" % 273 (addr, stats.elapsed_blocked / 1000000, 274 stats.contention_count, stats.thread_count, 275 stats.elapsed_blocked / stats.contention_count / 1000)) 276 277 # No use of displaying lock stacks since we're only 278 # filtering for Android monitor locks. 279 # 280 # print("Last stack for this lock:") 281 # print_syms(stats.last_stack_syms) 282 283 count_so_far = 0 284 while True: 285 if trace_all: 286 print(bpf_program.trace_fields()) 287 else: 288 try: 289 sleep(interval) 290 except KeyboardInterrupt: 291 exit() 292 print_comm_stats(create_tgid_stats()) 293 count_so_far += 1 294 bpf_program['tgid_comm'].clear() 295 bpf_program['lock_stats'].clear() 296 bpf_program['pid_lock'].clear() 297 bpf_program['pid_blocktime'].clear() 298 299 if num_prints is not None and count_so_far >= num_prints: 300 exit() 301