1#!/usr/bin/env python
2#
3# memleak   Trace and display outstanding allocations to detect
4#           memory leaks in user-mode processes and the kernel.
5#
6# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
7#                [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
8#                [-Z MAX_SIZE]
9#                [interval] [count]
10#
11# Licensed under the Apache License, Version 2.0 (the "License")
12# Copyright (C) 2016 Sasha Goldshtein.
13
14from bcc import BPF
15from time import sleep
16from datetime import datetime
17import argparse
18import subprocess
19import os
20
21def decode_stack(bpf, pid, info):
22        stack = ""
23        if info.num_frames <= 0:
24                return "???"
25        for i in range(0, info.num_frames):
26                addr = info.callstack[i]
27                stack += " %s ;" % bpf.sym(addr, pid, show_offset=True)
28        return stack
29
30def run_command_get_output(command):
31        p = subprocess.Popen(command.split(),
32                stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
33        return iter(p.stdout.readline, b'')
34
35def run_command_get_pid(command):
36        p = subprocess.Popen(command.split())
37        return p.pid
38
39examples = """
40EXAMPLES:
41
42./memleak -p $(pidof allocs)
43        Trace allocations and display a summary of "leaked" (outstanding)
44        allocations every 5 seconds
45./memleak -p $(pidof allocs) -t
46        Trace allocations and display each individual call to malloc/free
47./memleak -ap $(pidof allocs) 10
48        Trace allocations and display allocated addresses, sizes, and stacks
49        every 10 seconds for outstanding allocations
50./memleak -c "./allocs"
51        Run the specified command and trace its allocations
52./memleak
53        Trace allocations in kernel mode and display a summary of outstanding
54        allocations every 5 seconds
55./memleak -o 60000
56        Trace allocations in kernel mode and display a summary of outstanding
57        allocations that are at least one minute (60 seconds) old
58./memleak -s 5
59        Trace roughly every 5th allocation, to reduce overhead
60"""
61
62description = """
63Trace outstanding memory allocations that weren't freed.
64Supports both user-mode allocations made with malloc/free and kernel-mode
65allocations made with kmalloc/kfree.
66"""
67
68parser = argparse.ArgumentParser(description=description,
69        formatter_class=argparse.RawDescriptionHelpFormatter,
70        epilog=examples)
71parser.add_argument("-p", "--pid", type=int, default=-1,
72        help="the PID to trace; if not specified, trace kernel allocs")
73parser.add_argument("-t", "--trace", action="store_true",
74        help="print trace messages for each alloc/free call")
75parser.add_argument("interval", nargs="?", default=5, type=int,
76        help="interval in seconds to print outstanding allocations")
77parser.add_argument("count", nargs="?", type=int,
78        help="number of times to print the report before exiting")
79parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
80        help="show allocation addresses and sizes as well as call stacks")
81parser.add_argument("-o", "--older", default=500, type=int,
82        help="prune allocations younger than this age in milliseconds")
83parser.add_argument("-c", "--command",
84        help="execute and trace the specified command")
85parser.add_argument("-s", "--sample-rate", default=1, type=int,
86        help="sample every N-th allocation to decrease the overhead")
87parser.add_argument("-d", "--stack-depth", default=10, type=int,
88        help="maximum stack depth to capture")
89parser.add_argument("-T", "--top", type=int, default=10,
90        help="display only this many top allocating stacks (by size)")
91parser.add_argument("-z", "--min-size", type=int,
92        help="capture only allocations larger than this size")
93parser.add_argument("-Z", "--max-size", type=int,
94        help="capture only allocations smaller than this size")
95
96args = parser.parse_args()
97
98pid = args.pid
99command = args.command
100kernel_trace = (pid == -1 and command is None)
101trace_all = args.trace
102interval = args.interval
103min_age_ns = 1e6 * args.older
104sample_every_n = args.sample_rate
105num_prints = args.count
106max_stack_size = args.stack_depth + 2
107top_stacks = args.top
108min_size = args.min_size
109max_size = args.max_size
110
111if min_size is not None and max_size is not None and min_size > max_size:
112        print("min_size (-z) can't be greater than max_size (-Z)")
113        exit(1)
114
115if command is not None:
116        print("Executing '%s' and tracing the resulting process." % command)
117        pid = run_command_get_pid(command)
118
119bpf_source = """
120#include <uapi/linux/ptrace.h>
121
122struct alloc_info_t {
123        u64 size;
124        u64 timestamp_ns;
125        int num_frames;
126        u64 callstack[MAX_STACK_SIZE];
127};
128
129BPF_HASH(sizes, u64);
130BPF_HASH(allocs, u64, struct alloc_info_t);
131
132// Adapted from https://github.com/iovisor/bcc/tools/offcputime.py
133static u64 get_frame(u64 *bp) {
134        if (*bp) {
135                // The following stack walker is x86_64 specific
136                u64 ret = 0;
137                if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8)))
138                        return 0;
139                if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp))
140                        *bp = 0;
141                return ret;
142        }
143        return 0;
144}
145static int grab_stack(struct pt_regs *ctx, struct alloc_info_t *info)
146{
147        int depth = 0;
148        u64 bp = ctx->bp;
149        GRAB_ONE_FRAME
150        return depth;
151}
152
153int alloc_enter(struct pt_regs *ctx, size_t size)
154{
155        SIZE_FILTER
156        if (SAMPLE_EVERY_N > 1) {
157                u64 ts = bpf_ktime_get_ns();
158                if (ts % SAMPLE_EVERY_N != 0)
159                        return 0;
160        }
161
162        u64 pid = bpf_get_current_pid_tgid();
163        u64 size64 = size;
164        sizes.update(&pid, &size64);
165
166        if (SHOULD_PRINT)
167                bpf_trace_printk("alloc entered, size = %u\\n", size);
168        return 0;
169}
170
171int alloc_exit(struct pt_regs *ctx)
172{
173        u64 address = ctx->ax;
174        u64 pid = bpf_get_current_pid_tgid();
175        u64* size64 = sizes.lookup(&pid);
176        struct alloc_info_t info = {0};
177
178        if (size64 == 0)
179                return 0; // missed alloc entry
180
181        info.size = *size64;
182        sizes.delete(&pid);
183
184        info.timestamp_ns = bpf_ktime_get_ns();
185        info.num_frames = grab_stack(ctx, &info) - 2;
186        allocs.update(&address, &info);
187
188        if (SHOULD_PRINT) {
189                bpf_trace_printk("alloc exited, size = %lu, result = %lx,"
190                                 "frames = %d\\n", info.size, address,
191                                 info.num_frames);
192        }
193        return 0;
194}
195
196int free_enter(struct pt_regs *ctx, void *address)
197{
198        u64 addr = (u64)address;
199        struct alloc_info_t *info = allocs.lookup(&addr);
200        if (info == 0)
201                return 0;
202
203        allocs.delete(&addr);
204
205        if (SHOULD_PRINT) {
206                bpf_trace_printk("free entered, address = %lx, size = %lu\\n",
207                                 address, info->size);
208        }
209        return 0;
210}
211"""
212bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
213bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
214bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
215        "\tif (!(info->callstack[depth++] = get_frame(&bp))) return depth;\n")
216bpf_source = bpf_source.replace("MAX_STACK_SIZE", str(max_stack_size))
217
218size_filter = ""
219if min_size is not None and max_size is not None:
220        size_filter = "if (size < %d || size > %d) return 0;" % \
221                      (min_size, max_size)
222elif min_size is not None:
223        size_filter = "if (size < %d) return 0;" % min_size
224elif max_size is not None:
225        size_filter = "if (size > %d) return 0;" % max_size
226bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
227
228bpf_program = BPF(text=bpf_source)
229
230if not kernel_trace:
231        print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
232        bpf_program.attach_uprobe(name="c", sym="malloc",
233                                  fn_name="alloc_enter", pid=pid)
234        bpf_program.attach_uretprobe(name="c", sym="malloc",
235                                     fn_name="alloc_exit", pid=pid)
236        bpf_program.attach_uprobe(name="c", sym="free",
237                                  fn_name="free_enter", pid=pid)
238else:
239        print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
240        bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
241        bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
242        bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
243
244def print_outstanding():
245        stacks = {}
246        print("[%s] Top %d stacks with outstanding allocations:" %
247              (datetime.now().strftime("%H:%M:%S"), top_stacks))
248        allocs = bpf_program.get_table("allocs")
249        for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
250                if BPF.monotonic_time() - min_age_ns < info.timestamp_ns:
251                        continue
252                stack = decode_stack(bpf_program, pid, info)
253                if stack in stacks:
254                        stacks[stack] = (stacks[stack][0] + 1,
255                                         stacks[stack][1] + info.size)
256                else:
257                        stacks[stack] = (1, info.size)
258                if args.show_allocs:
259                        print("\taddr = %x size = %s" %
260                              (address.value, info.size))
261        to_show = sorted(stacks.items(), key=lambda s: s[1][1])[-top_stacks:]
262        for stack, (count, size) in to_show:
263                print("\t%d bytes in %d allocations from stack\n\t\t%s" %
264                      (size, count, stack.replace(";", "\n\t\t")))
265
266count_so_far = 0
267while True:
268        if trace_all:
269                print(bpf_program.trace_fields())
270        else:
271                try:
272                        sleep(interval)
273                except KeyboardInterrupt:
274                        exit()
275                print_outstanding()
276                count_so_far += 1
277                if num_prints is not None and count_so_far >= num_prints:
278                        exit()
279