1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpdrop   Trace TCP kernel-dropped packets/segments.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# This provides information such as packet details, socket state, and kernel
8# stack trace for packets/segments that were dropped via tcp_drop().
9#
10# USAGE: tcpdrop [-c] [-h] [-l]
11#
12# This uses dynamic tracing of kernel functions, and will need to be updated
13# to match kernel changes.
14#
15# Copyright 2018 Netflix, Inc.
16# Licensed under the Apache License, Version 2.0 (the "License")
17#
18# 30-May-2018   Brendan Gregg   Created this.
19
20from __future__ import print_function
21from bcc import BPF
22import argparse
23from time import strftime
24from socket import inet_ntop, AF_INET, AF_INET6
25from struct import pack
26import ctypes as ct
27from time import sleep
28from bcc import tcp
29
30# arguments
31examples = """examples:
32    ./tcpdrop           # trace kernel TCP drops
33"""
34parser = argparse.ArgumentParser(
35    description="Trace TCP drops by the kernel",
36    formatter_class=argparse.RawDescriptionHelpFormatter,
37    epilog=examples)
38parser.add_argument("--ebpf", action="store_true",
39    help=argparse.SUPPRESS)
40args = parser.parse_args()
41debug = 0
42
43# define BPF program
44bpf_text = """
45#include <uapi/linux/ptrace.h>
46#include <uapi/linux/tcp.h>
47#include <uapi/linux/ip.h>
48#include <net/sock.h>
49#include <bcc/proto.h>
50
51BPF_STACK_TRACE(stack_traces, 1024);
52
53// separate data structs for ipv4 and ipv6
54struct ipv4_data_t {
55    u32 pid;
56    u64 ip;
57    u32 saddr;
58    u32 daddr;
59    u16 sport;
60    u16 dport;
61    u8 state;
62    u8 tcpflags;
63    u32 stack_id;
64};
65BPF_PERF_OUTPUT(ipv4_events);
66
67struct ipv6_data_t {
68    u32 pid;
69    u64 ip;
70    unsigned __int128 saddr;
71    unsigned __int128 daddr;
72    u16 sport;
73    u16 dport;
74    u8 state;
75    u8 tcpflags;
76    u32 stack_id;
77};
78BPF_PERF_OUTPUT(ipv6_events);
79
80static struct tcphdr *skb_to_tcphdr(const struct sk_buff *skb)
81{
82    // unstable API. verify logic in tcp_hdr() -> skb_transport_header().
83    return (struct tcphdr *)(skb->head + skb->transport_header);
84}
85
86static inline struct iphdr *skb_to_iphdr(const struct sk_buff *skb)
87{
88    // unstable API. verify logic in ip_hdr() -> skb_network_header().
89    return (struct iphdr *)(skb->head + skb->network_header);
90}
91
92// from include/net/tcp.h:
93#ifndef tcp_flag_byte
94#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
95#endif
96
97int trace_tcp_drop(struct pt_regs *ctx, struct sock *sk, struct sk_buff *skb)
98{
99    if (sk == NULL)
100        return 0;
101    u32 pid = bpf_get_current_pid_tgid();
102
103    // pull in details from the packet headers and the sock struct
104    u16 family = sk->__sk_common.skc_family;
105    char state = sk->__sk_common.skc_state;
106    u16 sport = 0, dport = 0;
107    struct tcphdr *tcp = skb_to_tcphdr(skb);
108    struct iphdr *ip = skb_to_iphdr(skb);
109    u8 tcpflags = ((u_int8_t *)tcp)[13];
110    sport = tcp->source;
111    dport = tcp->dest;
112    sport = ntohs(sport);
113    dport = ntohs(dport);
114
115    if (family == AF_INET) {
116        struct ipv4_data_t data4 = {};
117        data4.pid = pid;
118        data4.ip = 4;
119        data4.saddr = ip->saddr;
120        data4.daddr = ip->daddr;
121        data4.dport = dport;
122        data4.sport = sport;
123        data4.state = state;
124        data4.tcpflags = tcpflags;
125        data4.stack_id = stack_traces.get_stackid(ctx, 0);
126        ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
127
128    } else if (family == AF_INET6) {
129        struct ipv6_data_t data6 = {};
130        data6.pid = pid;
131        data6.ip = 6;
132        bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
133            sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
134        bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
135            sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
136        data6.dport = dport;
137        data6.sport = sport;
138        data6.state = state;
139        data6.tcpflags = tcpflags;
140        data6.stack_id = stack_traces.get_stackid(ctx, 0);
141        ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
142    }
143    // else drop
144
145    return 0;
146}
147"""
148
149if debug or args.ebpf:
150    print(bpf_text)
151    if args.ebpf:
152        exit()
153
154# event data
155class Data_ipv4(ct.Structure):
156    _fields_ = [
157        ("pid", ct.c_uint),
158        ("ip", ct.c_ulonglong),
159        ("saddr", ct.c_uint),
160        ("daddr", ct.c_uint),
161        ("sport", ct.c_ushort),
162        ("dport", ct.c_ushort),
163        ("state", ct.c_ubyte),
164        ("tcpflags", ct.c_ubyte),
165        ("stack_id", ct.c_ulong)
166    ]
167
168class Data_ipv6(ct.Structure):
169    _fields_ = [
170        ("pid", ct.c_uint),
171        ("ip", ct.c_ulonglong),
172        ("saddr", (ct.c_ulonglong * 2)),
173        ("daddr", (ct.c_ulonglong * 2)),
174        ("sport", ct.c_ushort),
175        ("dport", ct.c_ushort),
176        ("state", ct.c_ubyte),
177        ("tcpflags", ct.c_ubyte),
178        ("stack_id", ct.c_ulong)
179    ]
180
181# process event
182def print_ipv4_event(cpu, data, size):
183    event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
184    print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % (
185        strftime("%H:%M:%S"), event.pid, event.ip,
186        "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.sport),
187        "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
188        tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags)))
189    for addr in stack_traces.walk(event.stack_id):
190        sym = b.ksym(addr, show_offset=True)
191        print("\t%s" % sym)
192    print("")
193
194def print_ipv6_event(cpu, data, size):
195    event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
196    print("%-8s %-6d %-2d %-20s > %-20s %s (%s)" % (
197        strftime("%H:%M:%S"), event.pid, event.ip,
198        "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.sport),
199        "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
200        tcp.tcpstate[event.state], tcp.flags2str(event.tcpflags)))
201    for addr in stack_traces.walk(event.stack_id):
202        sym = b.ksym(addr, show_offset=True)
203        print("\t%s" % sym)
204    print("")
205
206# initialize BPF
207b = BPF(text=bpf_text)
208if b.get_kprobe_functions(b"tcp_drop"):
209    b.attach_kprobe(event="tcp_drop", fn_name="trace_tcp_drop")
210else:
211    print("ERROR: tcp_drop() kernel function not found or traceable. "
212        "Older kernel versions not supported.")
213    exit()
214stack_traces = b.get_table("stack_traces")
215
216# header
217print("%-8s %-6s %-2s %-20s > %-20s %s (%s)" % ("TIME", "PID", "IP",
218    "SADDR:SPORT", "DADDR:DPORT", "STATE", "FLAGS"))
219
220# read events
221b["ipv4_events"].open_perf_buffer(print_ipv4_event)
222b["ipv6_events"].open_perf_buffer(print_ipv6_event)
223while 1:
224    try:
225        b.perf_buffer_poll()
226    except KeyboardInterrupt:
227        exit()
228