1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# sofdsnoop traces file descriptors passed via socket
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: sofdsnoop
8#
9# Copyright (c) 2018 Jiri Olsa.
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
12# 30-Jul-2018   Jiri Olsa   Created this.
13
14from __future__ import print_function
15from bcc import ArgString, BPF
16import os
17import argparse
18import ctypes as ct
19from datetime import datetime, timedelta
20
21# arguments
22examples = """examples:
23    ./sofdsnoop           # trace file descriptors passes
24    ./sofdsnoop -T        # include timestamps
25    ./sofdsnoop -p 181    # only trace PID 181
26    ./sofdsnoop -t 123    # only trace TID 123
27    ./sofdsnoop -d 10     # trace for 10 seconds only
28    ./sofdsnoop -n main   # only print process names containing "main"
29
30"""
31parser = argparse.ArgumentParser(
32    description="Trace file descriptors passed via socket",
33    formatter_class=argparse.RawDescriptionHelpFormatter,
34    epilog=examples)
35parser.add_argument("-T", "--timestamp", action="store_true",
36    help="include timestamp on output")
37parser.add_argument("-p", "--pid",
38    help="trace this PID only")
39parser.add_argument("-t", "--tid",
40    help="trace this TID only")
41parser.add_argument("-n", "--name",
42    type=ArgString,
43    help="only print process names containing this name")
44parser.add_argument("-d", "--duration",
45    help="total duration of trace in seconds")
46args = parser.parse_args()
47debug = 0
48
49ACTION_SEND=0
50ACTION_RECV=1
51MAX_FD=10
52
53if args.duration:
54    args.duration = timedelta(seconds=int(args.duration))
55
56# define BPF program
57bpf_text = """
58#include <uapi/linux/ptrace.h>
59#include <uapi/linux/limits.h>
60#include <linux/sched.h>
61#include <linux/socket.h>
62#include <net/sock.h>
63
64#define MAX_FD 10
65#define ACTION_SEND   0
66#define ACTION_RECV   1
67
68struct val_t {
69    u64  id;
70    u64  ts;
71    int  action;
72    int  sock_fd;
73    int  fd_cnt;
74    int  fd[MAX_FD];
75    char comm[TASK_COMM_LEN];
76};
77
78BPF_HASH(detach_ptr, u64, struct cmsghdr *);
79BPF_HASH(sock_fd, u64, int);
80BPF_PERF_OUTPUT(events);
81
82static void set_fd(int fd)
83{
84    u64 id = bpf_get_current_pid_tgid();
85
86    sock_fd.update(&id, &fd);
87}
88
89static int get_fd(void)
90{
91    u64 id = bpf_get_current_pid_tgid();
92    int *fd;
93
94    fd = sock_fd.lookup(&id);
95    return fd ? *fd : -1;
96}
97
98static void put_fd(void)
99{
100    u64 id = bpf_get_current_pid_tgid();
101
102    sock_fd.delete(&id);
103}
104
105static int sent_1(struct pt_regs *ctx, struct val_t *val, int num, void *data)
106{
107    val->fd_cnt = min(num, MAX_FD);
108
109    if (bpf_probe_read(&val->fd[0], MAX_FD * sizeof(int), data))
110        return -1;
111
112    events.perf_submit(ctx, val, sizeof(*val));
113    return 0;
114}
115
116#define SEND_1                                  \
117    if (sent_1(ctx, &val, num, (void *) data))  \
118        return 0;                               \
119                                                \
120    num -= MAX_FD;                              \
121    if (num < 0)                                \
122        return 0;                               \
123                                                \
124    data += MAX_FD;
125
126#define SEND_2   SEND_1 SEND_1
127#define SEND_4   SEND_2 SEND_2
128#define SEND_8   SEND_4 SEND_4
129#define SEND_260 SEND_8 SEND_8 SEND_8 SEND_2
130
131static int send(struct pt_regs *ctx, struct cmsghdr *cmsg, int action)
132{
133    struct val_t val = { 0 };
134    int *data, num, fd;
135    u64 tsp = bpf_ktime_get_ns();
136
137    data = (void *) ((char *) cmsg + sizeof(struct cmsghdr));
138    num  = (cmsg->cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
139
140    val.id      = bpf_get_current_pid_tgid();
141    val.action  = action;
142    val.sock_fd = get_fd();
143    val.ts      = tsp / 1000;
144
145    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) != 0)
146        return 0;
147
148    SEND_260
149    return 0;
150}
151
152static bool allow_pid(u64 id)
153{
154    u32 pid = id >> 32; // PID is higher part
155    u32 tid = id;       // Cast and get the lower part
156
157    FILTER
158
159    return 1;
160}
161
162int trace_scm_send_entry(struct pt_regs *ctx, struct socket *sock, struct msghdr *hdr)
163{
164    struct cmsghdr *cmsg = NULL;
165
166    if (!allow_pid(bpf_get_current_pid_tgid()))
167        return 0;
168
169    if (hdr->msg_controllen >= sizeof(struct cmsghdr))
170        cmsg = hdr->msg_control;
171
172    if (!cmsg || (cmsg->cmsg_type != SCM_RIGHTS))
173        return 0;
174
175    return send(ctx, cmsg, ACTION_SEND);
176};
177
178int trace_scm_detach_fds_entry(struct pt_regs *ctx, struct msghdr *hdr)
179{
180    struct cmsghdr *cmsg = NULL;
181    u64 id = bpf_get_current_pid_tgid();
182
183    if (!allow_pid(id))
184        return 0;
185
186    if (hdr->msg_controllen >= sizeof(struct cmsghdr))
187        cmsg = hdr->msg_control;
188
189    if (!cmsg)
190        return 0;
191
192    detach_ptr.update(&id, &cmsg);
193    return 0;
194};
195
196int trace_scm_detach_fds_return(struct pt_regs *ctx)
197{
198    struct cmsghdr **cmsgp;
199    u64 id = bpf_get_current_pid_tgid();
200
201    if (!allow_pid(id))
202        return 0;
203
204    cmsgp = detach_ptr.lookup(&id);
205
206    if (!cmsgp)
207        return 0;
208
209    return send(ctx, *cmsgp, ACTION_RECV);
210}
211
212int syscall__sendmsg(struct pt_regs *ctx, u64 fd, u64 msg, u64 flags)
213{
214    struct pt_regs p;
215
216    if (!allow_pid(bpf_get_current_pid_tgid()))
217        return 0;
218
219    set_fd(fd);
220    return 0;
221}
222
223int trace_sendmsg_return(struct pt_regs *ctx)
224{
225    if (!allow_pid(bpf_get_current_pid_tgid()))
226        return 0;
227
228    put_fd();
229    return 0;
230}
231
232int syscall__recvmsg(struct pt_regs *ctx, u64 fd, u64 msg, u64 flags)
233{
234    struct pt_regs p;
235
236    if (!allow_pid(bpf_get_current_pid_tgid()))
237        return 0;
238
239    fd = fd;
240
241    set_fd(fd);
242    return 0;
243}
244
245int trace_recvmsg_return(struct pt_regs *ctx)
246{
247    if (!allow_pid(bpf_get_current_pid_tgid()))
248        return 0;
249
250    put_fd();
251    return 0;
252}
253
254"""
255
256if args.tid:  # TID trumps PID
257    bpf_text = bpf_text.replace('FILTER',
258        'if (tid != %s) { return 0; }' % args.tid)
259elif args.pid:
260    bpf_text = bpf_text.replace('FILTER',
261        'if (pid != %s) { return 0; }' % args.pid)
262else:
263    bpf_text = bpf_text.replace('FILTER', '')
264
265# initialize BPF
266b = BPF(text=bpf_text)
267
268syscall_fnname = b.get_syscall_fnname("sendmsg")
269if BPF.ksymname(syscall_fnname) != -1:
270    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__sendmsg")
271    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_sendmsg_return")
272
273syscall_fnname = b.get_syscall_fnname("recvmsg")
274if BPF.ksymname(syscall_fnname) != -1:
275    b.attach_kprobe(event=syscall_fnname, fn_name="syscall__recvmsg")
276    b.attach_kretprobe(event=syscall_fnname, fn_name="trace_recvmsg_return")
277
278b.attach_kprobe(event="__scm_send", fn_name="trace_scm_send_entry")
279b.attach_kprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_entry")
280b.attach_kretprobe(event="scm_detach_fds", fn_name="trace_scm_detach_fds_return")
281
282TASK_COMM_LEN = 16    # linux/sched.h
283
284initial_ts = 0
285
286class Data(ct.Structure):
287    _fields_ = [
288        ("id",      ct.c_ulonglong),
289        ("ts",      ct.c_ulonglong),
290        ("action",  ct.c_int),
291        ("sock_fd", ct.c_int),
292        ("fd_cnt",  ct.c_int),
293        ("fd",      ct.c_int  * MAX_FD),
294        ("comm",    ct.c_char * TASK_COMM_LEN),
295    ]
296
297# header
298if args.timestamp:
299    print("%-14s" % ("TIME(s)"), end="")
300print("%-6s %-6s %-16s %-25s %-5s %s" %
301      ("ACTION", "TID", "COMM", "SOCKET", "FD", "NAME"))
302
303def get_file(pid, fd):
304    proc = "/proc/%d/fd/%d" % (pid, fd)
305    try:
306        return os.readlink(proc)
307    except OSError as err:
308        return "N/A"
309
310# process event
311def print_event(cpu, data, size):
312    event = ct.cast(data, ct.POINTER(Data)).contents
313    tid = event.id & 0xffffffff;
314
315    cnt = min(MAX_FD, event.fd_cnt);
316
317    if args.name and bytes(args.name) not in event.comm:
318        return
319
320    for i in range(0, cnt):
321        global initial_ts
322
323        if not initial_ts:
324            initial_ts = event.ts
325
326        if args.timestamp:
327            delta = event.ts - initial_ts
328            print("%-14.9f" % (float(delta) / 1000000), end="")
329
330        print("%-6s %-6d %-16s " %
331              ("SEND" if event.action == ACTION_SEND else "RECV",
332               tid, event.comm.decode()), end = '')
333
334        sock = "%d:%s" % (event.sock_fd, get_file(tid, event.sock_fd))
335        print("%-25s " % sock, end = '')
336
337        fd = event.fd[i]
338        fd_file = get_file(tid, fd) if event.action == ACTION_SEND else ""
339        print("%-5d %s" % (fd, fd_file))
340
341# loop with callback to print_event
342b["events"].open_perf_buffer(print_event, page_cnt=64)
343start_time = datetime.now()
344while not args.duration or datetime.now() - start_time < args.duration:
345    try:
346        b.perf_buffer_poll(timeout=1000)
347    except KeyboardInterrupt:
348        exit()
349