• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #!/usr/bin/env python
2 #
3 # memleak   Trace and display outstanding allocations to detect
4 #           memory leaks in user-mode processes and the kernel.
5 #
6 # USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
7 #                [--combined-only] [-s SAMPLE_RATE] [-T TOP] [-z MIN_SIZE]
8 #                [-Z MAX_SIZE] [-O OBJ]
9 #                [interval] [count]
10 #
11 # Licensed under the Apache License, Version 2.0 (the "License")
12 # Copyright (C) 2016 Sasha Goldshtein.
13 
14 from bcc import BPF
15 from time import sleep
16 from datetime import datetime
17 import resource
18 import argparse
19 import subprocess
20 import os
21 import sys
22 
23 class Allocation(object):
24     def __init__(self, stack, size):
25         self.stack = stack
26         self.count = 1
27         self.size = size
28 
29     def update(self, size):
30         self.count += 1
31         self.size += size
32 
33 def run_command_get_output(command):
34         p = subprocess.Popen(command.split(),
35                 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
36         return iter(p.stdout.readline, b'')
37 
38 def run_command_get_pid(command):
39         p = subprocess.Popen(command.split())
40         return p.pid
41 
42 examples = """
43 EXAMPLES:
44 
45 ./memleak -p $(pidof allocs)
46         Trace allocations and display a summary of "leaked" (outstanding)
47         allocations every 5 seconds
48 ./memleak -p $(pidof allocs) -t
49         Trace allocations and display each individual allocator function call
50 ./memleak -ap $(pidof allocs) 10
51         Trace allocations and display allocated addresses, sizes, and stacks
52         every 10 seconds for outstanding allocations
53 ./memleak -c "./allocs"
54         Run the specified command and trace its allocations
55 ./memleak
56         Trace allocations in kernel mode and display a summary of outstanding
57         allocations every 5 seconds
58 ./memleak -o 60000
59         Trace allocations in kernel mode and display a summary of outstanding
60         allocations that are at least one minute (60 seconds) old
61 ./memleak -s 5
62         Trace roughly every 5th allocation, to reduce overhead
63 """
64 
65 description = """
66 Trace outstanding memory allocations that weren't freed.
67 Supports both user-mode allocations made with libc functions and kernel-mode
68 allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding
69 memory release functions.
70 """
71 
72 parser = argparse.ArgumentParser(description=description,
73         formatter_class=argparse.RawDescriptionHelpFormatter,
74         epilog=examples)
75 parser.add_argument("-p", "--pid", type=int, default=-1,
76         help="the PID to trace; if not specified, trace kernel allocs")
77 parser.add_argument("-t", "--trace", action="store_true",
78         help="print trace messages for each alloc/free call")
79 parser.add_argument("interval", nargs="?", default=5, type=int,
80         help="interval in seconds to print outstanding allocations")
81 parser.add_argument("count", nargs="?", type=int,
82         help="number of times to print the report before exiting")
83 parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
84         help="show allocation addresses and sizes as well as call stacks")
85 parser.add_argument("-o", "--older", default=500, type=int,
86         help="prune allocations younger than this age in milliseconds")
87 parser.add_argument("-c", "--command",
88         help="execute and trace the specified command")
89 parser.add_argument("--combined-only", default=False, action="store_true",
90         help="show combined allocation statistics only")
91 parser.add_argument("-s", "--sample-rate", default=1, type=int,
92         help="sample every N-th allocation to decrease the overhead")
93 parser.add_argument("-T", "--top", type=int, default=10,
94         help="display only this many top allocating stacks (by size)")
95 parser.add_argument("-z", "--min-size", type=int,
96         help="capture only allocations larger than this size")
97 parser.add_argument("-Z", "--max-size", type=int,
98         help="capture only allocations smaller than this size")
99 parser.add_argument("-O", "--obj", type=str, default="c",
100         help="attach to allocator functions in the specified object")
101 parser.add_argument("--ebpf", action="store_true",
102         help=argparse.SUPPRESS)
103 
104 args = parser.parse_args()
105 
106 pid = args.pid
107 command = args.command
108 kernel_trace = (pid == -1 and command is None)
109 trace_all = args.trace
110 interval = args.interval
111 min_age_ns = 1e6 * args.older
112 sample_every_n = args.sample_rate
113 num_prints = args.count
114 top_stacks = args.top
115 min_size = args.min_size
116 max_size = args.max_size
117 obj = args.obj
118 
119 if min_size is not None and max_size is not None and min_size > max_size:
120         print("min_size (-z) can't be greater than max_size (-Z)")
121         exit(1)
122 
123 if command is not None:
124         print("Executing '%s' and tracing the resulting process." % command)
125         pid = run_command_get_pid(command)
126 
127 bpf_source = """
128 #include <uapi/linux/ptrace.h>
129 
130 struct alloc_info_t {
131         u64 size;
132         u64 timestamp_ns;
133         int stack_id;
134 };
135 
136 struct combined_alloc_info_t {
137         u64 total_size;
138         u64 number_of_allocs;
139 };
140 
141 BPF_HASH(sizes, u64);
142 BPF_TABLE("hash", u64, struct alloc_info_t, allocs, 1000000);
143 BPF_HASH(memptrs, u64, u64);
144 BPF_STACK_TRACE(stack_traces, 10240);
145 BPF_TABLE("hash", u64, struct combined_alloc_info_t, combined_allocs, 10240);
146 
147 static inline void update_statistics_add(u64 stack_id, u64 sz) {
148         struct combined_alloc_info_t *existing_cinfo;
149         struct combined_alloc_info_t cinfo = {0};
150 
151         existing_cinfo = combined_allocs.lookup(&stack_id);
152         if (existing_cinfo != 0)
153                 cinfo = *existing_cinfo;
154 
155         cinfo.total_size += sz;
156         cinfo.number_of_allocs += 1;
157 
158         combined_allocs.update(&stack_id, &cinfo);
159 }
160 
161 static inline void update_statistics_del(u64 stack_id, u64 sz) {
162         struct combined_alloc_info_t *existing_cinfo;
163         struct combined_alloc_info_t cinfo = {0};
164 
165         existing_cinfo = combined_allocs.lookup(&stack_id);
166         if (existing_cinfo != 0)
167                 cinfo = *existing_cinfo;
168 
169         if (sz >= cinfo.total_size)
170                 cinfo.total_size = 0;
171         else
172                 cinfo.total_size -= sz;
173 
174         if (cinfo.number_of_allocs > 0)
175                 cinfo.number_of_allocs -= 1;
176 
177         combined_allocs.update(&stack_id, &cinfo);
178 }
179 
180 static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) {
181         SIZE_FILTER
182         if (SAMPLE_EVERY_N > 1) {
183                 u64 ts = bpf_ktime_get_ns();
184                 if (ts % SAMPLE_EVERY_N != 0)
185                         return 0;
186         }
187 
188         u64 pid = bpf_get_current_pid_tgid();
189         u64 size64 = size;
190         sizes.update(&pid, &size64);
191 
192         if (SHOULD_PRINT)
193                 bpf_trace_printk("alloc entered, size = %u\\n", size);
194         return 0;
195 }
196 
197 static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) {
198         u64 pid = bpf_get_current_pid_tgid();
199         u64* size64 = sizes.lookup(&pid);
200         struct alloc_info_t info = {0};
201 
202         if (size64 == 0)
203                 return 0; // missed alloc entry
204 
205         info.size = *size64;
206         sizes.delete(&pid);
207 
208         info.timestamp_ns = bpf_ktime_get_ns();
209         info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS);
210         allocs.update(&address, &info);
211         update_statistics_add(info.stack_id, info.size);
212 
213         if (SHOULD_PRINT) {
214                 bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n",
215                                  info.size, address);
216         }
217         return 0;
218 }
219 
220 static inline int gen_alloc_exit(struct pt_regs *ctx) {
221         return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
222 }
223 
224 static inline int gen_free_enter(struct pt_regs *ctx, void *address) {
225         u64 addr = (u64)address;
226         struct alloc_info_t *info = allocs.lookup(&addr);
227         if (info == 0)
228                 return 0;
229 
230         allocs.delete(&addr);
231         update_statistics_del(info->stack_id, info->size);
232 
233         if (SHOULD_PRINT) {
234                 bpf_trace_printk("free entered, address = %lx, size = %lu\\n",
235                                  address, info->size);
236         }
237         return 0;
238 }
239 
240 int malloc_enter(struct pt_regs *ctx, size_t size) {
241         return gen_alloc_enter(ctx, size);
242 }
243 
244 int malloc_exit(struct pt_regs *ctx) {
245         return gen_alloc_exit(ctx);
246 }
247 
248 int free_enter(struct pt_regs *ctx, void *address) {
249         return gen_free_enter(ctx, address);
250 }
251 
252 int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) {
253         return gen_alloc_enter(ctx, nmemb * size);
254 }
255 
256 int calloc_exit(struct pt_regs *ctx) {
257         return gen_alloc_exit(ctx);
258 }
259 
260 int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) {
261         gen_free_enter(ctx, ptr);
262         return gen_alloc_enter(ctx, size);
263 }
264 
265 int realloc_exit(struct pt_regs *ctx) {
266         return gen_alloc_exit(ctx);
267 }
268 
269 int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment,
270                          size_t size) {
271         u64 memptr64 = (u64)(size_t)memptr;
272         u64 pid = bpf_get_current_pid_tgid();
273 
274         memptrs.update(&pid, &memptr64);
275         return gen_alloc_enter(ctx, size);
276 }
277 
278 int posix_memalign_exit(struct pt_regs *ctx) {
279         u64 pid = bpf_get_current_pid_tgid();
280         u64 *memptr64 = memptrs.lookup(&pid);
281         void *addr;
282 
283         if (memptr64 == 0)
284                 return 0;
285 
286         memptrs.delete(&pid);
287 
288         if (bpf_probe_read(&addr, sizeof(void*), (void*)(size_t)*memptr64))
289                 return 0;
290 
291         u64 addr64 = (u64)(size_t)addr;
292         return gen_alloc_exit2(ctx, addr64);
293 }
294 
295 int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
296         return gen_alloc_enter(ctx, size);
297 }
298 
299 int aligned_alloc_exit(struct pt_regs *ctx) {
300         return gen_alloc_exit(ctx);
301 }
302 
303 int valloc_enter(struct pt_regs *ctx, size_t size) {
304         return gen_alloc_enter(ctx, size);
305 }
306 
307 int valloc_exit(struct pt_regs *ctx) {
308         return gen_alloc_exit(ctx);
309 }
310 
311 int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
312         return gen_alloc_enter(ctx, size);
313 }
314 
315 int memalign_exit(struct pt_regs *ctx) {
316         return gen_alloc_exit(ctx);
317 }
318 
319 int pvalloc_enter(struct pt_regs *ctx, size_t size) {
320         return gen_alloc_enter(ctx, size);
321 }
322 
323 int pvalloc_exit(struct pt_regs *ctx) {
324         return gen_alloc_exit(ctx);
325 }
326 """
327 
328 bpf_source_kernel = """
329 
330 TRACEPOINT_PROBE(kmem, kmalloc) {
331         gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
332         return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
333 }
334 
335 TRACEPOINT_PROBE(kmem, kmalloc_node) {
336         gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
337         return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
338 }
339 
340 TRACEPOINT_PROBE(kmem, kfree) {
341         return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
342 }
343 
344 TRACEPOINT_PROBE(kmem, kmem_cache_alloc) {
345         gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
346         return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
347 }
348 
349 TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) {
350         gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
351         return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
352 }
353 
354 TRACEPOINT_PROBE(kmem, kmem_cache_free) {
355         return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
356 }
357 
358 TRACEPOINT_PROBE(kmem, mm_page_alloc) {
359         gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order);
360         return gen_alloc_exit2((struct pt_regs *)args, args->pfn);
361 }
362 
363 TRACEPOINT_PROBE(kmem, mm_page_free) {
364         return gen_free_enter((struct pt_regs *)args, (void *)args->pfn);
365 }
366 """
367 
368 if kernel_trace:
369         bpf_source += bpf_source_kernel
370 
371 bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
372 bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
373 bpf_source = bpf_source.replace("PAGE_SIZE", str(resource.getpagesize()))
374 
375 size_filter = ""
376 if min_size is not None and max_size is not None:
377         size_filter = "if (size < %d || size > %d) return 0;" % \
378                       (min_size, max_size)
379 elif min_size is not None:
380         size_filter = "if (size < %d) return 0;" % min_size
381 elif max_size is not None:
382         size_filter = "if (size > %d) return 0;" % max_size
383 bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
384 
385 stack_flags = "BPF_F_REUSE_STACKID"
386 if not kernel_trace:
387         stack_flags += "|BPF_F_USER_STACK"
388 bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags)
389 
390 if args.ebpf:
391     print(bpf_source)
392     exit()
393 
394 bpf = BPF(text=bpf_source)
395 
396 if not kernel_trace:
397         print("Attaching to pid %d, Ctrl+C to quit." % pid)
398 
399         def attach_probes(sym, fn_prefix=None, can_fail=False):
400                 if fn_prefix is None:
401                         fn_prefix = sym
402 
403                 try:
404                         bpf.attach_uprobe(name=obj, sym=sym,
405                                           fn_name=fn_prefix + "_enter",
406                                           pid=pid)
407                         bpf.attach_uretprobe(name=obj, sym=sym,
408                                              fn_name=fn_prefix + "_exit",
409                                              pid=pid)
410                 except Exception:
411                         if can_fail:
412                                 return
413                         else:
414                                 raise
415 
416         attach_probes("malloc")
417         attach_probes("calloc")
418         attach_probes("realloc")
419         attach_probes("posix_memalign")
420         attach_probes("valloc")
421         attach_probes("memalign")
422         attach_probes("pvalloc")
423         attach_probes("aligned_alloc", can_fail=True)  # added in C11
424         bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter",
425                                   pid=pid)
426 
427 else:
428         print("Attaching to kernel allocators, Ctrl+C to quit.")
429 
430         # No probe attaching here. Allocations are counted by attaching to
431         # tracepoints.
432         #
433         # Memory allocations in Linux kernel are not limited to malloc/free
434         # equivalents. It's also common to allocate a memory page or multiple
435         # pages. Page allocator have two interfaces, one working with page
436         # frame numbers (PFN), while other working with page addresses. It's
437         # possible to allocate pages with one kind of functions, and free them
438         # with another. Code in kernel can easy convert PFNs to addresses and
439         # back, but it's hard to do the same in eBPF kprobe without fragile
440         # hacks.
441         #
442         # Fortunately, Linux exposes tracepoints for memory allocations, which
443         # can be instrumented by eBPF programs. Tracepoint for page allocations
444         # gives access to PFNs for both allocator interfaces. So there is no
445         # need to guess which allocation corresponds to which free.
446 
447 def print_outstanding():
448         print("[%s] Top %d stacks with outstanding allocations:" %
449               (datetime.now().strftime("%H:%M:%S"), top_stacks))
450         alloc_info = {}
451         allocs = bpf["allocs"]
452         stack_traces = bpf["stack_traces"]
453         for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
454                 if BPF.monotonic_time() - min_age_ns < info.timestamp_ns:
455                         continue
456                 if info.stack_id < 0:
457                         continue
458                 if info.stack_id in alloc_info:
459                         alloc_info[info.stack_id].update(info.size)
460                 else:
461                         stack = list(stack_traces.walk(info.stack_id))
462                         combined = []
463                         for addr in stack:
464                                 combined.append(bpf.sym(addr, pid,
465                                         show_module=True, show_offset=True))
466                         alloc_info[info.stack_id] = Allocation(combined,
467                                                                info.size)
468                 if args.show_allocs:
469                         print("\taddr = %x size = %s" %
470                               (address.value, info.size))
471         to_show = sorted(alloc_info.values(),
472                          key=lambda a: a.size)[-top_stacks:]
473         for alloc in to_show:
474                 print("\t%d bytes in %d allocations from stack\n\t\t%s" %
475                       (alloc.size, alloc.count, b"\n\t\t".join(alloc.stack)))
476 
477 def print_outstanding_combined():
478         stack_traces = bpf["stack_traces"]
479         stacks = sorted(bpf["combined_allocs"].items(),
480                         key=lambda a: -a[1].total_size)
481         cnt = 1
482         entries = []
483         for stack_id, info in stacks:
484                 try:
485                         trace = []
486                         for addr in stack_traces.walk(stack_id.value):
487                                 sym = bpf.sym(addr, pid,
488                                                       show_module=True,
489                                                       show_offset=True)
490                                 trace.append(sym)
491                         trace = "\n\t\t".join(trace)
492                 except KeyError:
493                         trace = "stack information lost"
494 
495                 entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" %
496                          (info.total_size, info.number_of_allocs, trace))
497                 entries.append(entry)
498 
499                 cnt += 1
500                 if cnt > top_stacks:
501                         break
502 
503         print("[%s] Top %d stacks with outstanding allocations:" %
504               (datetime.now().strftime("%H:%M:%S"), top_stacks))
505 
506         print('\n'.join(reversed(entries)))
507 
508 count_so_far = 0
509 while True:
510         if trace_all:
511                 print(bpf.trace_fields())
512         else:
513                 try:
514                         sleep(interval)
515                 except KeyboardInterrupt:
516                         exit()
517                 if args.combined_only:
518                         print_outstanding_combined()
519                 else:
520                         print_outstanding()
521                 sys.stdout.flush()
522                 count_so_far += 1
523                 if num_prints is not None and count_so_far >= num_prints:
524                         exit()
525