1#!/usr/bin/env python 2# 3# offcputime Summarize off-CPU time by kernel stack trace 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: offcputime [-h] [-u] [-p PID] [-v] [-f] [duration] 7# 8# The current implementation uses an unrolled loop for x86_64, and was written 9# as a proof of concept. This implementation should be replaced in the future 10# with an appropriate bpf_ call, when available. 11# 12# Currently limited to a stack trace depth of 21 (maxdepth + 1). 13# 14# Copyright 2016 Netflix, Inc. 15# Licensed under the Apache License, Version 2.0 (the "License") 16# 17# 13-Jan-2016 Brendan Gregg Created this. 18 19from __future__ import print_function 20from bcc import BPF 21from time import sleep, strftime 22import argparse 23import signal 24 25# arguments 26examples = """examples: 27 ./offcputime # trace off-CPU stack time until Ctrl-C 28 ./offcputime 5 # trace for 5 seconds only 29 ./offcputime -f 5 # 5 seconds, and output in folded format 30 ./offcputime -u # don't include kernel threads (user only) 31 ./offcputime -p 185 # trace fo PID 185 only 32""" 33parser = argparse.ArgumentParser( 34 description="Summarize off-CPU time by kernel stack trace", 35 formatter_class=argparse.RawDescriptionHelpFormatter, 36 epilog=examples) 37parser.add_argument("-u", "--useronly", action="store_true", 38 help="user threads only (no kernel threads)") 39parser.add_argument("-p", "--pid", 40 help="trace this PID only") 41parser.add_argument("-v", "--verbose", action="store_true", 42 help="show raw addresses") 43parser.add_argument("-f", "--folded", action="store_true", 44 help="output folded format") 45parser.add_argument("duration", nargs="?", default=99999999, 46 help="duration of trace, in seconds") 47args = parser.parse_args() 48folded = args.folded 49duration = int(args.duration) 50debug = 0 51maxdepth = 20 # and MAXDEPTH 52if args.pid and args.useronly: 53 print("ERROR: use either -p or -u.") 54 exit() 55 56# signal handler 57def signal_ignore(signal, frame): 58 print() 59 60# define BPF program 61bpf_text = """ 62#include <uapi/linux/ptrace.h> 63#include <linux/sched.h> 64 65#define MAXDEPTH 20 66#define MINBLOCK_US 1 67 68struct key_t { 69 char name[TASK_COMM_LEN]; 70 // Skip saving the ip 71 u64 ret[MAXDEPTH]; 72}; 73BPF_HASH(counts, struct key_t); 74BPF_HASH(start, u32); 75 76static u64 get_frame(u64 *bp) { 77 if (*bp) { 78 // The following stack walker is x86_64 specific 79 u64 ret = 0; 80 if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) 81 return 0; 82 if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) 83 *bp = 0; 84 if (ret < __START_KERNEL_map) 85 return 0; 86 return ret; 87 } 88 return 0; 89} 90 91int oncpu(struct pt_regs *ctx, struct task_struct *prev) { 92 u32 pid = prev->pid; 93 u64 ts, *tsp; 94 95 // record previous thread sleep time 96 if (FILTER) { 97 ts = bpf_ktime_get_ns(); 98 start.update(&pid, &ts); 99 } 100 101 // calculate current thread's delta time 102 pid = bpf_get_current_pid_tgid(); 103 tsp = start.lookup(&pid); 104 if (tsp == 0) 105 return 0; // missed start or filtered 106 u64 delta = bpf_ktime_get_ns() - *tsp; 107 start.delete(&pid); 108 delta = delta / 1000; 109 if (delta < MINBLOCK_US) 110 return 0; 111 112 // create map key 113 u64 zero = 0, *val, bp = 0; 114 int depth = 0; 115 struct key_t key = {}; 116 bpf_get_current_comm(&key.name, sizeof(key.name)); 117 bp = ctx->bp; 118 119 // unrolled loop (MAXDEPTH): 120 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 121 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 122 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 123 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 124 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 125 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 126 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 127 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 128 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 129 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 130 131 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 132 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 133 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 134 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 135 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 136 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 137 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 138 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 139 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 140 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 141 142out: 143 val = counts.lookup_or_init(&key, &zero); 144 (*val) += delta; 145 return 0; 146} 147""" 148if args.pid: 149 filter = 'pid == %s' % args.pid 150elif args.useronly: 151 filter = '!(prev->flags & PF_KTHREAD)' 152else: 153 filter = '1' 154bpf_text = bpf_text.replace('FILTER', filter) 155if debug: 156 print(bpf_text) 157 158# initialize BPF 159b = BPF(text=bpf_text) 160b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") 161matched = b.num_open_kprobes() 162if matched == 0: 163 print("0 functions traced. Exiting.") 164 exit() 165 166# header 167if not folded: 168 print("Tracing off-CPU time (us) by kernel stack", end="") 169 if duration < 99999999: 170 print(" for %d secs." % duration) 171 else: 172 print("... Hit Ctrl-C to end.") 173 174# output 175while (1): 176 try: 177 sleep(duration) 178 except KeyboardInterrupt: 179 # as cleanup can take many seconds, trap Ctrl-C: 180 signal.signal(signal.SIGINT, signal_ignore) 181 182 if not folded: 183 print() 184 counts = b.get_table("counts") 185 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): 186 if folded: 187 # print folded stack output 188 line = k.name.decode('utf-8', 'replace') + ";" 189 for i in reversed(range(0, maxdepth)): 190 if k.ret[i] == 0: 191 continue 192 line = line + b.ksym(k.ret[i]) 193 if i != 0: 194 line = line + ";" 195 print("%s %d" % (line, v.value)) 196 else: 197 # print default multi-line stack output 198 for i in range(0, maxdepth): 199 if k.ret[i] == 0: 200 break 201 print(" %-16x %s" % (k.ret[i], 202 b.ksym(k.ret[i]))) 203 print(" %-16s %s" % ("-", k.name)) 204 print(" %d\n" % v.value) 205 counts.clear() 206 207 if not folded: 208 print("Detaching...") 209 exit() 210