1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# execsnoop Trace new processes via exec() syscalls.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: execsnoop [-h] [-t] [-x] [-n NAME]
8#
9# This currently will print up to a maximum of 19 arguments, plus the process
10# name, so 20 fields in total (MAXARG).
11#
12# This won't catch all new processes: an application may fork() but not exec().
13#
14# Copyright 2016 Netflix, Inc.
15# Licensed under the Apache License, Version 2.0 (the "License")
16#
17# 07-Feb-2016   Brendan Gregg   Created this.
18
19from __future__ import print_function
20from bcc import BPF
21from bcc.utils import ArgString, printb
22import bcc.utils as utils
23import argparse
24import ctypes as ct
25import re
26import time
27from collections import defaultdict
28
29# arguments
30examples = """examples:
31    ./execsnoop           # trace all exec() syscalls
32    ./execsnoop -x        # include failed exec()s
33    ./execsnoop -t        # include timestamps
34    ./execsnoop -q        # add "quotemarks" around arguments
35    ./execsnoop -n main   # only print command lines containing "main"
36    ./execsnoop -l tpkg   # only print command where arguments contains "tpkg"
37"""
38parser = argparse.ArgumentParser(
39    description="Trace exec() syscalls",
40    formatter_class=argparse.RawDescriptionHelpFormatter,
41    epilog=examples)
42parser.add_argument("-t", "--timestamp", action="store_true",
43    help="include timestamp on output")
44parser.add_argument("-x", "--fails", action="store_true",
45    help="include failed exec()s")
46parser.add_argument("-q", "--quote", action="store_true",
47    help="Add quotemarks (\") around arguments."
48    )
49parser.add_argument("-n", "--name",
50    type=ArgString,
51    help="only print commands matching this name (regex), any arg")
52parser.add_argument("-l", "--line",
53    type=ArgString,
54    help="only print commands where arg contains this line (regex)")
55parser.add_argument("--max-args", default="20",
56    help="maximum number of arguments parsed and displayed, defaults to 20")
57parser.add_argument("--ebpf", action="store_true",
58    help=argparse.SUPPRESS)
59args = parser.parse_args()
60
61# define BPF program
62bpf_text = """
63#include <uapi/linux/ptrace.h>
64#include <linux/sched.h>
65#include <linux/fs.h>
66
67#define ARGSIZE  128
68
69enum event_type {
70    EVENT_ARG,
71    EVENT_RET,
72};
73
74struct data_t {
75    u32 pid;  // PID as in the userspace term (i.e. task->tgid in kernel)
76    u32 ppid; // Parent PID as in the userspace term (i.e task->real_parent->tgid in kernel)
77    char comm[TASK_COMM_LEN];
78    enum event_type type;
79    char argv[ARGSIZE];
80    int retval;
81};
82
83BPF_PERF_OUTPUT(events);
84
85static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
86{
87    bpf_probe_read(data->argv, sizeof(data->argv), ptr);
88    events.perf_submit(ctx, data, sizeof(struct data_t));
89    return 1;
90}
91
92static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
93{
94    const char *argp = NULL;
95    bpf_probe_read(&argp, sizeof(argp), ptr);
96    if (argp) {
97        return __submit_arg(ctx, (void *)(argp), data);
98    }
99    return 0;
100}
101
102int syscall__execve(struct pt_regs *ctx,
103    const char __user *filename,
104    const char __user *const __user *__argv,
105    const char __user *const __user *__envp)
106{
107    // create data here and pass to submit_arg to save stack space (#555)
108    struct data_t data = {};
109    struct task_struct *task;
110
111    data.pid = bpf_get_current_pid_tgid() >> 32;
112
113    task = (struct task_struct *)bpf_get_current_task();
114    // Some kernels, like Ubuntu 4.13.0-generic, return 0
115    // as the real_parent->tgid.
116    // We use the get_ppid function as a fallback in those cases. (#1883)
117    data.ppid = task->real_parent->tgid;
118
119    bpf_get_current_comm(&data.comm, sizeof(data.comm));
120    data.type = EVENT_ARG;
121
122    __submit_arg(ctx, (void *)filename, &data);
123
124    // skip first arg, as we submitted filename
125    #pragma unroll
126    for (int i = 1; i < MAXARG; i++) {
127        if (submit_arg(ctx, (void *)&__argv[i], &data) == 0)
128             goto out;
129    }
130
131    // handle truncated argument list
132    char ellipsis[] = "...";
133    __submit_arg(ctx, (void *)ellipsis, &data);
134out:
135    return 0;
136}
137
138int do_ret_sys_execve(struct pt_regs *ctx)
139{
140    struct data_t data = {};
141    struct task_struct *task;
142
143    data.pid = bpf_get_current_pid_tgid() >> 32;
144
145    task = (struct task_struct *)bpf_get_current_task();
146    // Some kernels, like Ubuntu 4.13.0-generic, return 0
147    // as the real_parent->tgid.
148    // We use the get_ppid function as a fallback in those cases. (#1883)
149    data.ppid = task->real_parent->tgid;
150
151    bpf_get_current_comm(&data.comm, sizeof(data.comm));
152    data.type = EVENT_RET;
153    data.retval = PT_REGS_RC(ctx);
154    events.perf_submit(ctx, &data, sizeof(data));
155
156    return 0;
157}
158"""
159
160bpf_text = bpf_text.replace("MAXARG", args.max_args)
161if args.ebpf:
162    print(bpf_text)
163    exit()
164
165# initialize BPF
166b = BPF(text=bpf_text)
167execve_fnname = b.get_syscall_fnname("execve")
168b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
169b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")
170
171# header
172if args.timestamp:
173    print("%-8s" % ("TIME(s)"), end="")
174print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))
175
176TASK_COMM_LEN = 16      # linux/sched.h
177ARGSIZE = 128           # should match #define in C above
178
179class Data(ct.Structure):
180    _fields_ = [
181        ("pid", ct.c_uint),
182        ("ppid", ct.c_uint),
183        ("comm", ct.c_char * TASK_COMM_LEN),
184        ("type", ct.c_int),
185        ("argv", ct.c_char * ARGSIZE),
186        ("retval", ct.c_int),
187    ]
188
189class EventType(object):
190    EVENT_ARG = 0
191    EVENT_RET = 1
192
193start_ts = time.time()
194argv = defaultdict(list)
195
196# This is best-effort PPID matching. Short-lived processes may exit
197# before we get a chance to read the PPID.
198# This is a fallback for when fetching the PPID from task->real_parent->tgip
199# returns 0, which happens in some kernel versions.
200def get_ppid(pid):
201    try:
202        with open("/proc/%d/status" % pid) as status:
203            for line in status:
204                if line.startswith("PPid:"):
205                    return int(line.split()[1])
206    except IOError:
207        pass
208    return 0
209
210# process event
211def print_event(cpu, data, size):
212    event = ct.cast(data, ct.POINTER(Data)).contents
213
214    skip = False
215
216    if event.type == EventType.EVENT_ARG:
217        argv[event.pid].append(event.argv)
218    elif event.type == EventType.EVENT_RET:
219        if event.retval != 0 and not args.fails:
220            skip = True
221        if args.name and not re.search(bytes(args.name), event.comm):
222            skip = True
223        if args.line and not re.search(bytes(args.line),
224                                       b' '.join(argv[event.pid])):
225            skip = True
226        if args.quote:
227            argv[event.pid] = [
228                "\"" + arg.replace("\"", "\\\"") + "\""
229                for arg in argv[event.pid]
230            ]
231
232        if not skip:
233            if args.timestamp:
234                print("%-8.3f" % (time.time() - start_ts), end="")
235            ppid = event.ppid if event.ppid > 0 else get_ppid(event.pid)
236            ppid = b"%d" % ppid if ppid > 0 else b"?"
237            argv_text = b' '.join(argv[event.pid]).replace(b'\n', b'\\n')
238            printb(b"%-16s %-6d %-6s %3d %s" % (event.comm, event.pid,
239                   ppid, event.retval, argv_text))
240        try:
241            del(argv[event.pid])
242        except Exception:
243            pass
244
245
246# loop with callback to print_event
247b["events"].open_perf_buffer(print_event)
248while 1:
249    b.perf_buffer_poll()
250