1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# uobjnew  Summarize object allocations in high-level languages.
5#          For Linux, uses BCC, eBPF.
6#
7# USAGE: uobjnew [-h] [-T TOP] [-v] {c,java,ruby,tcl} pid [interval]
8#
9# Copyright 2016 Sasha Goldshtein
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
12# 25-Oct-2016   Sasha Goldshtein   Created this.
13
14from __future__ import print_function
15import argparse
16from bcc import BPF, USDT, utils
17from time import sleep
18import os
19
20# C needs to be the last language.
21languages = ["c", "java", "ruby", "tcl"]
22
23examples = """examples:
24    ./uobjnew -l java 145         # summarize Java allocations in process 145
25    ./uobjnew -l c 2020 1         # grab malloc() sizes and print every second
26    ./uobjnew -l ruby 6712 -C 10  # top 10 Ruby types by number of allocations
27    ./uobjnew -l ruby 6712 -S 10  # top 10 Ruby types by total size
28"""
29parser = argparse.ArgumentParser(
30    description="Summarize object allocations in high-level languages.",
31    formatter_class=argparse.RawDescriptionHelpFormatter,
32    epilog=examples)
33parser.add_argument("-l", "--language", choices=languages,
34    help="language to trace")
35parser.add_argument("pid", type=int, help="process id to attach to")
36parser.add_argument("interval", type=int, nargs='?',
37    help="print every specified number of seconds")
38parser.add_argument("-C", "--top-count", type=int,
39    help="number of most frequently allocated types to print")
40parser.add_argument("-S", "--top-size", type=int,
41    help="number of largest types by allocated bytes to print")
42parser.add_argument("-v", "--verbose", action="store_true",
43    help="verbose mode: print the BPF program (for debugging purposes)")
44parser.add_argument("--ebpf", action="store_true",
45    help=argparse.SUPPRESS)
46args = parser.parse_args()
47
48language = args.language
49if not language:
50    language = utils.detect_language(languages, args.pid)
51
52program = """
53#include <linux/ptrace.h>
54
55struct key_t {
56#if MALLOC_TRACING
57    u64 size;
58#else
59    char name[50];
60#endif
61};
62
63struct val_t {
64    u64 total_size;
65    u64 num_allocs;
66};
67
68BPF_HASH(allocs, struct key_t, struct val_t);
69""".replace("MALLOC_TRACING", "1" if language == "c" else "0")
70
71usdt = USDT(pid=args.pid)
72
73#
74# C
75#
76if language == "c":
77    program += """
78int alloc_entry(struct pt_regs *ctx, size_t size) {
79    struct key_t key = {};
80    struct val_t *valp, zero = {};
81    key.size = size;
82    valp = allocs.lookup_or_init(&key, &zero);
83    valp->total_size += size;
84    valp->num_allocs += 1;
85    return 0;
86}
87    """
88#
89# Java
90#
91elif language == "java":
92    program += """
93int alloc_entry(struct pt_regs *ctx) {
94    struct key_t key = {};
95    struct val_t *valp, zero = {};
96    u64 classptr = 0, size = 0;
97    bpf_usdt_readarg(2, ctx, &classptr);
98    bpf_usdt_readarg(4, ctx, &size);
99    bpf_probe_read(&key.name, sizeof(key.name), (void *)classptr);
100    valp = allocs.lookup_or_init(&key, &zero);
101    valp->total_size += size;
102    valp->num_allocs += 1;
103    return 0;
104}
105    """
106    usdt.enable_probe_or_bail("object__alloc", "alloc_entry")
107#
108# Ruby
109#
110elif language == "ruby":
111    create_template = """
112int THETHING_alloc_entry(struct pt_regs *ctx) {
113    struct key_t key = { .name = "THETHING" };
114    struct val_t *valp, zero = {};
115    u64 size = 0;
116    bpf_usdt_readarg(1, ctx, &size);
117    valp = allocs.lookup_or_init(&key, &zero);
118    valp->total_size += size;
119    valp->num_allocs += 1;
120    return 0;
121}
122    """
123    program += """
124int object_alloc_entry(struct pt_regs *ctx) {
125    struct key_t key = {};
126    struct val_t *valp, zero = {};
127    u64 classptr = 0;
128    bpf_usdt_readarg(1, ctx, &classptr);
129    bpf_probe_read(&key.name, sizeof(key.name), (void *)classptr);
130    valp = allocs.lookup_or_init(&key, &zero);
131    valp->num_allocs += 1;  // We don't know the size, unfortunately
132    return 0;
133}
134    """
135    usdt.enable_probe_or_bail("object__create", "object_alloc_entry")
136    for thing in ["string", "hash", "array"]:
137        program += create_template.replace("THETHING", thing)
138        usdt.enable_probe_or_bail("%s__create" % thing,
139                                  "%s_alloc_entry" % thing)
140#
141# Tcl
142#
143elif language == "tcl":
144    program += """
145int alloc_entry(struct pt_regs *ctx) {
146    struct key_t key = { .name = "<ALL>" };
147    struct val_t *valp, zero = {};
148    valp = allocs.lookup_or_init(&key, &zero);
149    valp->num_allocs += 1;
150    return 0;
151}
152    """
153    usdt.enable_probe_or_bail("obj__create", "alloc_entry")
154else:
155    print("No language detected; use -l to trace a language.")
156    exit(1)
157
158
159if args.ebpf or args.verbose:
160    if args.verbose:
161        print(usdt.get_text())
162    print(program)
163    if args.ebpf:
164        exit()
165
166bpf = BPF(text=program, usdt_contexts=[usdt])
167if language == "c":
168    bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry",
169                      pid=args.pid)
170
171exit_signaled = False
172print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." %
173      (args.pid, language or "none"))
174while True:
175    try:
176        sleep(args.interval or 99999999)
177    except KeyboardInterrupt:
178        exit_signaled = True
179    print()
180    data = bpf["allocs"]
181    if args.top_count:
182        data = sorted(data.items(), key=lambda kv: kv[1].num_allocs)
183        data = data[-args.top_count:]
184    elif args.top_size:
185        data = sorted(data.items(), key=lambda kv: kv[1].total_size)
186        data = data[-args.top_size:]
187    else:
188        data = sorted(data.items(), key=lambda kv: kv[1].total_size)
189    print("%-30s %8s %12s" % ("NAME/TYPE", "# ALLOCS", "# BYTES"))
190    for key, value in data:
191        if language == "c":
192            obj_type = "block size %d" % key.size
193        else:
194            obj_type = key.name
195        print("%-30s %8d %12d" %
196              (obj_type, value.num_allocs, value.total_size))
197    if args.interval and not exit_signaled:
198        bpf["allocs"].clear()
199    else:
200        exit()
201