1# Copyright 2015 PLUMgrid
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16import atexit
17import ctypes as ct
18import fcntl
19import json
20import os
21import re
22import struct
23import errno
24import sys
25basestring = (unicode if sys.version_info[0] < 3 else str)
26
27from .libbcc import lib, bcc_symbol, bcc_symbol_option, _SYM_CB_TYPE
28from .table import Table, PerfEventArray
29from .perf import Perf
30from .utils import get_online_cpus, printb, _assert_is_bytes, ArgString
31from .version import __version__
32
33_probe_limit = 1000
34_num_open_probes = 0
35
36# for tests
37def _get_num_open_probes():
38    global _num_open_probes
39    return _num_open_probes
40
41TRACEFS = "/sys/kernel/debug/tracing"
42
43# Debug flags
44
45# Debug output compiled LLVM IR.
46DEBUG_LLVM_IR = 0x1
47# Debug output loaded BPF bytecode and register state on branches.
48DEBUG_BPF = 0x2
49# Debug output pre-processor result.
50DEBUG_PREPROCESSOR = 0x4
51# Debug output ASM instructions embedded with source.
52DEBUG_SOURCE = 0x8
53#Debug output register state on all instructions in addition to DEBUG_BPF.
54DEBUG_BPF_REGISTER_STATE = 0x10
55
56class SymbolCache(object):
57    def __init__(self, pid):
58        self.cache = lib.bcc_symcache_new(
59                pid, ct.cast(None, ct.POINTER(bcc_symbol_option)))
60
61    def resolve(self, addr, demangle):
62        """
63        Return a tuple of the symbol (function), its offset from the beginning
64        of the function, and the module in which it lies. For example:
65            ("start_thread", 0x202, "/usr/lib/.../libpthread-2.24.so")
66        If the symbol cannot be found but we know which module it is in,
67        return the module name and the offset from the beginning of the
68        module. If we don't even know the module, return the absolute
69        address as the offset.
70        """
71        sym = bcc_symbol()
72        if demangle:
73            res = lib.bcc_symcache_resolve(self.cache, addr, ct.byref(sym))
74        else:
75            res = lib.bcc_symcache_resolve_no_demangle(self.cache, addr,
76                                                       ct.byref(sym))
77        if res < 0:
78            if sym.module and sym.offset:
79                return (None, sym.offset,
80                        ct.cast(sym.module, ct.c_char_p).value)
81            return (None, addr, None)
82        if demangle:
83            name_res = sym.demangle_name
84            lib.bcc_symbol_free_demangle_name(ct.byref(sym))
85        else:
86            name_res = sym.name
87        return (name_res, sym.offset, ct.cast(sym.module, ct.c_char_p).value)
88
89    def resolve_name(self, module, name):
90        module = _assert_is_bytes(module)
91        name = _assert_is_bytes(name)
92        addr = ct.c_ulonglong()
93        if lib.bcc_symcache_resolve_name(self.cache, module, name,
94                ct.byref(addr)) < 0:
95            return -1
96        return addr.value
97
98class PerfType:
99    # From perf_type_id in uapi/linux/perf_event.h
100    HARDWARE = 0
101    SOFTWARE = 1
102
103class PerfHWConfig:
104    # From perf_hw_id in uapi/linux/perf_event.h
105    CPU_CYCLES = 0
106    INSTRUCTIONS = 1
107    CACHE_REFERENCES = 2
108    CACHE_MISSES = 3
109    BRANCH_INSTRUCTIONS = 4
110    BRANCH_MISSES = 5
111    BUS_CYCLES = 6
112    STALLED_CYCLES_FRONTEND = 7
113    STALLED_CYCLES_BACKEND = 8
114    REF_CPU_CYCLES = 9
115
116class PerfSWConfig:
117    # From perf_sw_id in uapi/linux/perf_event.h
118    CPU_CLOCK = 0
119    TASK_CLOCK = 1
120    PAGE_FAULTS = 2
121    CONTEXT_SWITCHES = 3
122    CPU_MIGRATIONS = 4
123    PAGE_FAULTS_MIN = 5
124    PAGE_FAULTS_MAJ = 6
125    ALIGNMENT_FAULTS = 7
126    EMULATION_FAULTS = 8
127    DUMMY = 9
128    BPF_OUTPUT = 10
129
130class BPF(object):
131    # From bpf_prog_type in uapi/linux/bpf.h
132    SOCKET_FILTER = 1
133    KPROBE = 2
134    SCHED_CLS = 3
135    SCHED_ACT = 4
136    TRACEPOINT = 5
137    XDP = 6
138    PERF_EVENT = 7
139    CGROUP_SKB = 8
140    CGROUP_SOCK = 9
141    LWT_IN = 10
142    LWT_OUT = 11
143    LWT_XMIT = 12
144    SOCK_OPS = 13
145    SK_SKB = 14
146    CGROUP_DEVICE = 15
147    SK_MSG = 16
148    RAW_TRACEPOINT = 17
149    CGROUP_SOCK_ADDR = 18
150
151    # from xdp_action uapi/linux/bpf.h
152    XDP_ABORTED = 0
153    XDP_DROP = 1
154    XDP_PASS = 2
155    XDP_TX = 3
156    XDP_REDIRECT = 4
157
158    _probe_repl = re.compile(b"[^a-zA-Z0-9_]")
159    _sym_caches = {}
160
161    _auto_includes = {
162        "linux/time.h": ["time"],
163        "linux/fs.h": ["fs", "file"],
164        "linux/blkdev.h": ["bio", "request"],
165        "linux/slab.h": ["alloc"],
166        "linux/netdevice.h": ["sk_buff", "net_device"]
167    }
168
169    _syscall_prefixes = [
170        b"sys_",
171        b"__x64_sys_",
172        b"__x32_compat_sys_",
173        b"__ia32_compat_sys_",
174    ]
175
176    # BPF timestamps come from the monotonic clock. To be able to filter
177    # and compare them from Python, we need to invoke clock_gettime.
178    # Adapted from http://stackoverflow.com/a/1205762
179    CLOCK_MONOTONIC = 1         # see <linux/time.h>
180
181    class timespec(ct.Structure):
182        _fields_ = [('tv_sec', ct.c_long), ('tv_nsec', ct.c_long)]
183
184    _librt = ct.CDLL('librt.so.1', use_errno=True)
185    _clock_gettime = _librt.clock_gettime
186    _clock_gettime.argtypes = [ct.c_int, ct.POINTER(timespec)]
187
188    @classmethod
189    def monotonic_time(cls):
190        """monotonic_time()
191        Returns the system monotonic time from clock_gettime, using the
192        CLOCK_MONOTONIC constant. The time returned is in nanoseconds.
193        """
194        t = cls.timespec()
195        if cls._clock_gettime(cls.CLOCK_MONOTONIC, ct.byref(t)) != 0:
196            errno = ct.get_errno()
197            raise OSError(errno, os.strerror(errno))
198        return t.tv_sec * 1e9 + t.tv_nsec
199
200    @classmethod
201    def generate_auto_includes(cls, program_words):
202        """
203        Generates #include statements automatically based on a set of
204        recognized types such as sk_buff and bio. The input is all the words
205        that appear in the BPF program, and the output is a (possibly empty)
206        string of #include statements, such as "#include <linux/fs.h>".
207        """
208        headers = ""
209        for header, keywords in cls._auto_includes.items():
210            for keyword in keywords:
211                for word in program_words:
212                    if keyword in word and header not in headers:
213                        headers += "#include <%s>\n" % header
214        return headers
215
216    # defined for compatibility reasons, to be removed
217    Table = Table
218
219    class Function(object):
220        def __init__(self, bpf, name, fd):
221            self.bpf = bpf
222            self.name = name
223            self.fd = fd
224
225    @staticmethod
226    def _find_file(filename):
227        """ If filename is invalid, search in ./ of argv[0] """
228        if filename:
229            if not os.path.isfile(filename):
230                argv0 = ArgString(sys.argv[0])
231                t = b"/".join([os.path.abspath(os.path.dirname(argv0.__str__())), filename])
232                if os.path.isfile(t):
233                    filename = t
234                else:
235                    raise Exception("Could not find file %s" % filename)
236        return filename
237
238    @staticmethod
239    def find_exe(bin_path):
240        """
241        find_exe(bin_path)
242
243        Traverses the PATH environment variable, looking for the first
244        directory that contains an executable file named bin_path, and
245        returns the full path to that file, or None if no such file
246        can be found. This is meant to replace invocations of the
247        "which" shell utility, which doesn't have portable semantics
248        for skipping aliases.
249        """
250        # Source: http://stackoverflow.com/a/377028
251        def is_exe(fpath):
252            return os.path.isfile(fpath) and \
253                os.access(fpath, os.X_OK)
254
255        fpath, fname = os.path.split(bin_path)
256        if fpath:
257            if is_exe(bin_path):
258                return bin_path
259        else:
260            for path in os.environ["PATH"].split(os.pathsep):
261                path = path.strip('"')
262                exe_file = os.path.join(path, bin_path)
263                if is_exe(exe_file):
264                    return exe_file
265        return None
266
267    def __init__(self, src_file=b"", hdr_file=b"", text=None, debug=0,
268            cflags=[], usdt_contexts=[]):
269        """Create a new BPF module with the given source code.
270
271        Note:
272            All fields are marked as optional, but either `src_file` or `text`
273            must be supplied, and not both.
274
275        Args:
276            src_file (Optional[str]): Path to a source file for the module
277            hdr_file (Optional[str]): Path to a helper header file for the `src_file`
278            text (Optional[str]): Contents of a source file for the module
279            debug (Optional[int]): Flags used for debug prints, can be |'d together
280                                   See "Debug flags" for explanation
281        """
282
283        src_file = _assert_is_bytes(src_file)
284        hdr_file = _assert_is_bytes(hdr_file)
285        text = _assert_is_bytes(text)
286
287        self.kprobe_fds = {}
288        self.uprobe_fds = {}
289        self.tracepoint_fds = {}
290        self.raw_tracepoint_fds = {}
291        self.perf_buffers = {}
292        self.open_perf_events = {}
293        self.tracefile = None
294        atexit.register(self.cleanup)
295
296        self.debug = debug
297        self.funcs = {}
298        self.tables = {}
299        self.module = None
300        cflags_array = (ct.c_char_p * len(cflags))()
301        for i, s in enumerate(cflags): cflags_array[i] = bytes(ArgString(s))
302        if text:
303            ctx_array = (ct.c_void_p * len(usdt_contexts))()
304            for i, usdt in enumerate(usdt_contexts):
305                ctx_array[i] = ct.c_void_p(usdt.get_context())
306            usdt_text = lib.bcc_usdt_genargs(ctx_array, len(usdt_contexts))
307            if usdt_text is None:
308                raise Exception("can't generate USDT probe arguments; " +
309                                "possible cause is missing pid when a " +
310                                "probe in a shared object has multiple " +
311                                "locations")
312            text = usdt_text + text
313
314        if text:
315            self.module = lib.bpf_module_create_c_from_string(text,
316                    self.debug, cflags_array, len(cflags_array))
317            if not self.module:
318                raise Exception("Failed to compile BPF text")
319        else:
320            src_file = BPF._find_file(src_file)
321            hdr_file = BPF._find_file(hdr_file)
322            if src_file.endswith(b".b"):
323                self.module = lib.bpf_module_create_b(src_file, hdr_file,
324                        self.debug)
325            else:
326                self.module = lib.bpf_module_create_c(src_file, self.debug,
327                        cflags_array, len(cflags_array))
328            if not self.module:
329                raise Exception("Failed to compile BPF module %s" % src_file)
330
331        for usdt_context in usdt_contexts:
332            usdt_context.attach_uprobes(self)
333
334        # If any "kprobe__" or "tracepoint__" or "raw_tracepoint__"
335        # prefixed functions were defined,
336        # they will be loaded and attached here.
337        self._trace_autoload()
338
339    def load_funcs(self, prog_type=KPROBE):
340        """load_funcs(prog_type=KPROBE)
341
342        Load all functions in this BPF module with the given type.
343        Returns a list of the function handles."""
344
345        fns = []
346        for i in range(0, lib.bpf_num_functions(self.module)):
347            func_name = lib.bpf_function_name(self.module, i)
348            fns.append(self.load_func(func_name, prog_type))
349
350        return fns
351
352    def load_func(self, func_name, prog_type):
353        func_name = _assert_is_bytes(func_name)
354        if func_name in self.funcs:
355            return self.funcs[func_name]
356        if not lib.bpf_function_start(self.module, func_name):
357            raise Exception("Unknown program %s" % func_name)
358        log_level = 0
359        if (self.debug & DEBUG_BPF_REGISTER_STATE):
360            log_level = 2
361        elif (self.debug & DEBUG_BPF):
362            log_level = 1
363        fd = lib.bpf_prog_load(prog_type, func_name,
364                lib.bpf_function_start(self.module, func_name),
365                lib.bpf_function_size(self.module, func_name),
366                lib.bpf_module_license(self.module),
367                lib.bpf_module_kern_version(self.module),
368                log_level, None, 0);
369
370        if fd < 0:
371            atexit.register(self.donothing)
372            if ct.get_errno() == errno.EPERM:
373                raise Exception("Need super-user privileges to run")
374
375            errstr = os.strerror(ct.get_errno())
376            raise Exception("Failed to load BPF program %s: %s" %
377                            (func_name, errstr))
378
379        fn = BPF.Function(self, func_name, fd)
380        self.funcs[func_name] = fn
381
382        return fn
383
384    def dump_func(self, func_name):
385        """
386        Return the eBPF bytecodes for the specified function as a string
387        """
388        func_name = _assert_is_bytes(func_name)
389        if not lib.bpf_function_start(self.module, func_name):
390            raise Exception("Unknown program %s" % func_name)
391
392        start, = lib.bpf_function_start(self.module, func_name),
393        size, = lib.bpf_function_size(self.module, func_name),
394        return ct.string_at(start, size)
395
396    str2ctype = {
397        u"_Bool": ct.c_bool,
398        u"char": ct.c_char,
399        u"wchar_t": ct.c_wchar,
400        u"unsigned char": ct.c_ubyte,
401        u"short": ct.c_short,
402        u"unsigned short": ct.c_ushort,
403        u"int": ct.c_int,
404        u"unsigned int": ct.c_uint,
405        u"long": ct.c_long,
406        u"unsigned long": ct.c_ulong,
407        u"long long": ct.c_longlong,
408        u"unsigned long long": ct.c_ulonglong,
409        u"float": ct.c_float,
410        u"double": ct.c_double,
411        u"long double": ct.c_longdouble,
412        u"__int128": ct.c_int64 * 2,
413        u"unsigned __int128": ct.c_uint64 * 2,
414    }
415    @staticmethod
416    def _decode_table_type(desc):
417        if isinstance(desc, basestring):
418            return BPF.str2ctype[desc]
419        anon = []
420        fields = []
421        for t in desc[1]:
422            if len(t) == 2:
423                fields.append((t[0], BPF._decode_table_type(t[1])))
424            elif len(t) == 3:
425                if isinstance(t[2], list):
426                    fields.append((t[0], BPF._decode_table_type(t[1]) * t[2][0]))
427                elif isinstance(t[2], int):
428                    fields.append((t[0], BPF._decode_table_type(t[1]), t[2]))
429                elif isinstance(t[2], basestring) and (
430                        t[2] == u"union" or t[2] == u"struct" or
431                        t[2] == u"struct_packed"):
432                    name = t[0]
433                    if name == "":
434                        name = "__anon%d" % len(anon)
435                        anon.append(name)
436                    fields.append((name, BPF._decode_table_type(t)))
437                else:
438                    raise Exception("Failed to decode type %s" % str(t))
439            else:
440                raise Exception("Failed to decode type %s" % str(t))
441        base = ct.Structure
442        is_packed = False
443        if len(desc) > 2:
444            if desc[2] == u"union":
445                base = ct.Union
446            elif desc[2] == u"struct":
447                base = ct.Structure
448            elif desc[2] == u"struct_packed":
449                base = ct.Structure
450                is_packed = True
451        if is_packed:
452            cls = type(str(desc[0]), (base,), dict(_anonymous_=anon, _pack_=1,
453                _fields_=fields))
454        else:
455            cls = type(str(desc[0]), (base,), dict(_anonymous_=anon,
456                _fields_=fields))
457        return cls
458
459    def get_table(self, name, keytype=None, leaftype=None, reducer=None):
460        name = _assert_is_bytes(name)
461        map_id = lib.bpf_table_id(self.module, name)
462        map_fd = lib.bpf_table_fd(self.module, name)
463        if map_fd < 0:
464            raise KeyError
465        if not keytype:
466            key_desc = lib.bpf_table_key_desc(self.module, name).decode("utf-8")
467            if not key_desc:
468                raise Exception("Failed to load BPF Table %s key desc" % name)
469            keytype = BPF._decode_table_type(json.loads(key_desc))
470        if not leaftype:
471            leaf_desc = lib.bpf_table_leaf_desc(self.module, name).decode("utf-8")
472            if not leaf_desc:
473                raise Exception("Failed to load BPF Table %s leaf desc" % name)
474            leaftype = BPF._decode_table_type(json.loads(leaf_desc))
475        return Table(self, map_id, map_fd, keytype, leaftype, reducer=reducer)
476
477    def __getitem__(self, key):
478        if key not in self.tables:
479            self.tables[key] = self.get_table(key)
480        return self.tables[key]
481
482    def __setitem__(self, key, leaf):
483        self.tables[key] = leaf
484
485    def __len__(self):
486        return len(self.tables)
487
488    def __delitem__(self, key):
489        del self.tables[key]
490
491    def __iter__(self):
492        return self.tables.__iter__()
493
494    @staticmethod
495    def attach_raw_socket(fn, dev):
496        dev = _assert_is_bytes(dev)
497        if not isinstance(fn, BPF.Function):
498            raise Exception("arg 1 must be of type BPF.Function")
499        sock = lib.bpf_open_raw_sock(dev)
500        if sock < 0:
501            errstr = os.strerror(ct.get_errno())
502            raise Exception("Failed to open raw device %s: %s" % (dev, errstr))
503        res = lib.bpf_attach_socket(sock, fn.fd)
504        if res < 0:
505            errstr = os.strerror(ct.get_errno())
506            raise Exception("Failed to attach BPF to device %s: %s"
507                    % (dev, errstr))
508        fn.sock = sock
509
510    @staticmethod
511    def get_kprobe_functions(event_re):
512        with open("%s/../kprobes/blacklist" % TRACEFS, "rb") as blacklist_f:
513            blacklist = set([line.rstrip().split()[1] for line in blacklist_f])
514        fns = []
515
516        in_init_section = 0
517        with open("/proc/kallsyms", "rb") as avail_file:
518            for line in avail_file:
519                (t, fn) = line.rstrip().split()[1:3]
520                if in_init_section == 0:
521                    if fn == b'__init_begin':
522                        in_init_section = 1
523                        continue
524                elif in_init_section == 1:
525                    if fn == b'__init_end':
526                        in_init_section = 2
527                    continue
528                if (t.lower() in [b't', b'w']) and re.match(event_re, fn) \
529                    and fn not in blacklist:
530                    fns.append(fn)
531        return set(fns)     # Some functions may appear more than once
532
533    def _check_probe_quota(self, num_new_probes):
534        global _num_open_probes
535        if _num_open_probes + num_new_probes > _probe_limit:
536            raise Exception("Number of open probes would exceed global quota")
537
538    def _add_kprobe_fd(self, name, fd):
539        global _num_open_probes
540        self.kprobe_fds[name] = fd
541        _num_open_probes += 1
542
543    def _del_kprobe_fd(self, name):
544        global _num_open_probes
545        del self.kprobe_fds[name]
546        _num_open_probes -= 1
547
548    def _add_uprobe_fd(self, name, fd):
549        global _num_open_probes
550        self.uprobe_fds[name] = fd
551        _num_open_probes += 1
552
553    def _del_uprobe_fd(self, name):
554        global _num_open_probes
555        del self.uprobe_fds[name]
556        _num_open_probes -= 1
557
558    # Find current system's syscall prefix by testing on the BPF syscall.
559    # If no valid value found, will return the first possible value which
560    # would probably lead to error in later API calls.
561    def get_syscall_prefix(self):
562        for prefix in self._syscall_prefixes:
563            if self.ksymname(b"%sbpf" % prefix) != -1:
564                return prefix
565        return self._syscall_prefixes[0]
566
567    # Given a syscall's name, return the full Kernel function name with current
568    # system's syscall prefix. For example, given "clone" the helper would
569    # return "sys_clone" or "__x64_sys_clone".
570    def get_syscall_fnname(self, name):
571        name = _assert_is_bytes(name)
572        return self.get_syscall_prefix() + name
573
574    # Given a Kernel function name that represents a syscall but already has a
575    # prefix included, transform it to current system's prefix. For example,
576    # if "sys_clone" provided, the helper may translate it to "__x64_sys_clone".
577    def fix_syscall_fnname(self, name):
578        name = _assert_is_bytes(name)
579        for prefix in self._syscall_prefixes:
580            if name.startswith(prefix):
581                return self.get_syscall_fnname(name[len(prefix):])
582        return name
583
584    def attach_kprobe(self, event=b"", event_off=0, fn_name=b"", event_re=b""):
585        event = _assert_is_bytes(event)
586        fn_name = _assert_is_bytes(fn_name)
587        event_re = _assert_is_bytes(event_re)
588
589        # allow the caller to glob multiple functions together
590        if event_re:
591            matches = BPF.get_kprobe_functions(event_re)
592            self._check_probe_quota(len(matches))
593            for line in matches:
594                try:
595                    self.attach_kprobe(event=line, fn_name=fn_name)
596                except:
597                    pass
598            return
599
600        self._check_probe_quota(1)
601        fn = self.load_func(fn_name, BPF.KPROBE)
602        ev_name = b"p_" + event.replace(b"+", b"_").replace(b".", b"_")
603        fd = lib.bpf_attach_kprobe(fn.fd, 0, ev_name, event, event_off)
604        if fd < 0:
605            raise Exception("Failed to attach BPF to kprobe")
606        self._add_kprobe_fd(ev_name, fd)
607        return self
608
609    def attach_kretprobe(self, event=b"", fn_name=b"", event_re=b""):
610        event = _assert_is_bytes(event)
611        fn_name = _assert_is_bytes(fn_name)
612        event_re = _assert_is_bytes(event_re)
613
614        # allow the caller to glob multiple functions together
615        if event_re:
616            for line in BPF.get_kprobe_functions(event_re):
617                try:
618                    self.attach_kretprobe(event=line, fn_name=fn_name)
619                except:
620                    pass
621            return
622
623        self._check_probe_quota(1)
624        fn = self.load_func(fn_name, BPF.KPROBE)
625        ev_name = b"r_" + event.replace(b"+", b"_").replace(b".", b"_")
626        fd = lib.bpf_attach_kprobe(fn.fd, 1, ev_name, event, 0)
627        if fd < 0:
628            raise Exception("Failed to attach BPF to kretprobe")
629        self._add_kprobe_fd(ev_name, fd)
630        return self
631
632    def detach_kprobe_event(self, ev_name):
633        if ev_name not in self.kprobe_fds:
634            raise Exception("Kprobe %s is not attached" % event)
635        res = lib.bpf_close_perf_event_fd(self.kprobe_fds[ev_name])
636        if res < 0:
637            raise Exception("Failed to close kprobe FD")
638        res = lib.bpf_detach_kprobe(ev_name)
639        if res < 0:
640            raise Exception("Failed to detach BPF from kprobe")
641        self._del_kprobe_fd(ev_name)
642
643    def detach_kprobe(self, event):
644        event = _assert_is_bytes(event)
645        ev_name = b"p_" + event.replace(b"+", b"_").replace(b".", b"_")
646        self.detach_kprobe_event(ev_name)
647
648    def detach_kretprobe(self, event):
649        event = _assert_is_bytes(event)
650        ev_name = b"r_" + event.replace(b"+", b"_").replace(b".", b"_")
651        self.detach_kprobe_event(ev_name)
652
653    @staticmethod
654    def attach_xdp(dev, fn, flags=0):
655        '''
656            This function attaches a BPF function to a device on the device
657            driver level (XDP)
658        '''
659        dev = _assert_is_bytes(dev)
660        if not isinstance(fn, BPF.Function):
661            raise Exception("arg 1 must be of type BPF.Function")
662        res = lib.bpf_attach_xdp(dev, fn.fd, flags)
663        if res < 0:
664            err_no = ct.get_errno()
665            if err_no == errno.EBADMSG:
666                raise Exception("Internal error while attaching BPF to device,"+
667                    " try increasing the debug level!")
668            else:
669                errstr = os.strerror(err_no)
670                raise Exception("Failed to attach BPF to device %s: %s"
671                            % (dev, errstr))
672
673    @staticmethod
674    def remove_xdp(dev, flags=0):
675        '''
676            This function removes any BPF function from a device on the
677            device driver level (XDP)
678        '''
679        dev = _assert_is_bytes(dev)
680        res = lib.bpf_attach_xdp(dev, -1, flags)
681        if res < 0:
682            errstr = os.strerror(ct.get_errno())
683            raise Exception("Failed to detach BPF from device %s: %s"
684                            % (dev, errstr))
685
686
687
688    @classmethod
689    def _check_path_symbol(cls, module, symname, addr, pid):
690        module = _assert_is_bytes(module)
691        symname = _assert_is_bytes(symname)
692        sym = bcc_symbol()
693        c_pid = 0 if pid == -1 else pid
694        if lib.bcc_resolve_symname(
695            module, symname,
696            addr or 0x0, c_pid,
697            ct.cast(None, ct.POINTER(bcc_symbol_option)),
698            ct.byref(sym),
699        ) < 0:
700            raise Exception("could not determine address of symbol %s" % symname)
701        module_path = ct.cast(sym.module, ct.c_char_p).value
702        lib.bcc_procutils_free(sym.module)
703        return module_path, sym.offset
704
705    @staticmethod
706    def find_library(libname):
707        libname = _assert_is_bytes(libname)
708        res = lib.bcc_procutils_which_so(libname, 0)
709        if not res:
710            return None
711        libpath = ct.cast(res, ct.c_char_p).value
712        lib.bcc_procutils_free(res)
713        return libpath
714
715    @staticmethod
716    def get_tracepoints(tp_re):
717        results = []
718        events_dir = os.path.join(TRACEFS, "events")
719        for category in os.listdir(events_dir):
720            cat_dir = os.path.join(events_dir, category)
721            if not os.path.isdir(cat_dir):
722                continue
723            for event in os.listdir(cat_dir):
724                evt_dir = os.path.join(cat_dir, event)
725                if os.path.isdir(evt_dir):
726                    tp = ("%s:%s" % (category, event))
727                    if re.match(tp_re, tp):
728                        results.append(tp)
729        return results
730
731    @staticmethod
732    def tracepoint_exists(category, event):
733        evt_dir = os.path.join(TRACEFS, "events", category, event)
734        return os.path.isdir(evt_dir)
735
736    def attach_tracepoint(self, tp=b"", tp_re=b"", fn_name=b""):
737        """attach_tracepoint(tp="", tp_re="", fn_name="")
738
739        Run the bpf function denoted by fn_name every time the kernel tracepoint
740        specified by 'tp' is hit. The optional parameters pid, cpu, and group_fd
741        can be used to filter the probe. The tracepoint specification is simply
742        the tracepoint category and the tracepoint name, separated by a colon.
743        For example: sched:sched_switch, syscalls:sys_enter_bind, etc.
744
745        Instead of a tracepoint name, a regular expression can be provided in
746        tp_re. The program will then attach to tracepoints that match the
747        provided regular expression.
748
749        To obtain a list of kernel tracepoints, use the tplist tool or cat the
750        file /sys/kernel/debug/tracing/available_events.
751
752        Examples:
753            BPF(text).attach_tracepoint(tp="sched:sched_switch", fn_name="on_switch")
754            BPF(text).attach_tracepoint(tp_re="sched:.*", fn_name="on_switch")
755        """
756
757        tp = _assert_is_bytes(tp)
758        tp_re = _assert_is_bytes(tp_re)
759        fn_name = _assert_is_bytes(fn_name)
760        if tp_re:
761            for tp in BPF.get_tracepoints(tp_re):
762                self.attach_tracepoint(tp=tp, fn_name=fn_name)
763            return
764
765        fn = self.load_func(fn_name, BPF.TRACEPOINT)
766        (tp_category, tp_name) = tp.split(b':')
767        fd = lib.bpf_attach_tracepoint(fn.fd, tp_category, tp_name)
768        if fd < 0:
769            raise Exception("Failed to attach BPF to tracepoint")
770        self.tracepoint_fds[tp] = fd
771        return self
772
773    def attach_raw_tracepoint(self, tp=b"", fn_name=b""):
774        """attach_raw_tracepoint(self, tp=b"", fn_name=b"")
775
776        Run the bpf function denoted by fn_name every time the kernel tracepoint
777        specified by 'tp' is hit. The bpf function should be loaded as a
778        RAW_TRACEPOINT type. The fn_name is the kernel tracepoint name,
779        e.g., sched_switch, sys_enter_bind, etc.
780
781        Examples:
782            BPF(text).attach_raw_tracepoint(tp="sched_switch", fn_name="on_switch")
783        """
784
785        tp = _assert_is_bytes(tp)
786        if tp in self.raw_tracepoint_fds:
787            raise Exception("Raw tracepoint %s has been attached" % tp)
788
789        fn_name = _assert_is_bytes(fn_name)
790        fn = self.load_func(fn_name, BPF.RAW_TRACEPOINT)
791        fd = lib.bpf_attach_raw_tracepoint(fn.fd, tp)
792        if fd < 0:
793            raise Exception("Failed to attach BPF to raw tracepoint")
794        self.raw_tracepoint_fds[tp] = fd;
795        return self
796
797    def detach_raw_tracepoint(self, tp=b""):
798        """detach_raw_tracepoint(tp="")
799
800        Stop running the bpf function that is attached to the kernel tracepoint
801        specified by 'tp'.
802
803        Example: bpf.detach_raw_tracepoint("sched_switch")
804        """
805
806        tp = _assert_is_bytes(tp)
807        if tp not in self.raw_tracepoint_fds:
808            raise Exception("Raw tracepoint %s is not attached" % tp)
809        os.close(self.raw_tracepoint_fds[tp])
810        del self.raw_tracepoint_fds[tp]
811
812    @staticmethod
813    def support_raw_tracepoint():
814        # kernel symbol "bpf_find_raw_tracepoint" indicates raw_tracepint support
815        if BPF.ksymname("bpf_find_raw_tracepoint") != -1:
816            return True
817        return False
818
819    def detach_tracepoint(self, tp=b""):
820        """detach_tracepoint(tp="")
821
822        Stop running a bpf function that is attached to the kernel tracepoint
823        specified by 'tp'.
824
825        Example: bpf.detach_tracepoint("sched:sched_switch")
826        """
827
828        tp = _assert_is_bytes(tp)
829        if tp not in self.tracepoint_fds:
830            raise Exception("Tracepoint %s is not attached" % tp)
831        res = lib.bpf_close_perf_event_fd(self.tracepoint_fds[tp])
832        if res < 0:
833            raise Exception("Failed to detach BPF from tracepoint")
834        (tp_category, tp_name) = tp.split(b':')
835        res = lib.bpf_detach_tracepoint(tp_category, tp_name)
836        if res < 0:
837            raise Exception("Failed to detach BPF from tracepoint")
838        del self.tracepoint_fds[tp]
839
840    def _attach_perf_event(self, progfd, ev_type, ev_config,
841            sample_period, sample_freq, pid, cpu, group_fd):
842        res = lib.bpf_attach_perf_event(progfd, ev_type, ev_config,
843                sample_period, sample_freq, pid, cpu, group_fd)
844        if res < 0:
845            raise Exception("Failed to attach BPF to perf event")
846        return res
847
848    def attach_perf_event(self, ev_type=-1, ev_config=-1, fn_name=b"",
849            sample_period=0, sample_freq=0, pid=-1, cpu=-1, group_fd=-1):
850        fn_name = _assert_is_bytes(fn_name)
851        fn = self.load_func(fn_name, BPF.PERF_EVENT)
852        res = {}
853        if cpu >= 0:
854            res[cpu] = self._attach_perf_event(fn.fd, ev_type, ev_config,
855                    sample_period, sample_freq, pid, cpu, group_fd)
856        else:
857            for i in get_online_cpus():
858                res[i] = self._attach_perf_event(fn.fd, ev_type, ev_config,
859                        sample_period, sample_freq, pid, i, group_fd)
860        self.open_perf_events[(ev_type, ev_config)] = res
861
862    def detach_perf_event(self, ev_type=-1, ev_config=-1):
863        try:
864            fds = self.open_perf_events[(ev_type, ev_config)]
865        except KeyError:
866            raise Exception("Perf event type {} config {} not attached".format(
867                ev_type, ev_config))
868
869        res = 0
870        for fd in fds.values():
871            res = lib.bpf_close_perf_event_fd(fd) or res
872        if res != 0:
873            raise Exception("Failed to detach BPF from perf event")
874        del self.open_perf_events[(ev_type, ev_config)]
875
876    @staticmethod
877    def get_user_functions(name, sym_re):
878        return set([name for (name, _) in
879                    BPF.get_user_functions_and_addresses(name, sym_re)])
880
881    @staticmethod
882    def get_user_addresses(name, sym_re):
883        """
884        We are returning addresses here instead of symbol names because it
885        turns out that the same name may appear multiple times with different
886        addresses, and the same address may appear multiple times with the same
887        name. We can't attach a uprobe to the same address more than once, so
888        it makes sense to return the unique set of addresses that are mapped to
889        a symbol that matches the provided regular expression.
890        """
891        return set([address for (_, address) in
892                    BPF.get_user_functions_and_addresses(name, sym_re)])
893
894    @staticmethod
895    def get_user_functions_and_addresses(name, sym_re):
896        name = _assert_is_bytes(name)
897        sym_re = _assert_is_bytes(sym_re)
898        addresses = []
899        def sym_cb(sym_name, addr):
900            dname = sym_name
901            if re.match(sym_re, dname):
902                addresses.append((dname, addr))
903            return 0
904
905        res = lib.bcc_foreach_function_symbol(name, _SYM_CB_TYPE(sym_cb))
906        if res < 0:
907            raise Exception("Error %d enumerating symbols in %s" % (res, name))
908        return addresses
909
910    def _get_uprobe_evname(self, prefix, path, addr, pid):
911        if pid == -1:
912            return b"%s_%s_0x%x" % (prefix, self._probe_repl.sub(b"_", path), addr)
913        else:
914            # if pid is valid, put pid in the name, so different pid
915            # can have different event names
916            return b"%s_%s_0x%x_%d" % (prefix, self._probe_repl.sub(b"_", path), addr, pid)
917
918    def attach_uprobe(self, name=b"", sym=b"", sym_re=b"", addr=None,
919            fn_name=b"", pid=-1):
920        """attach_uprobe(name="", sym="", sym_re="", addr=None, fn_name=""
921                         pid=-1)
922
923        Run the bpf function denoted by fn_name every time the symbol sym in
924        the library or binary 'name' is encountered. The real address addr may
925        be supplied in place of sym. Optional parameters pid, cpu, and group_fd
926        can be used to filter the probe.
927
928        Instead of a symbol name, a regular expression can be provided in
929        sym_re. The uprobe will then attach to symbols that match the provided
930        regular expression.
931
932        Libraries can be given in the name argument without the lib prefix, or
933        with the full path (/usr/lib/...). Binaries can be given only with the
934        full path (/bin/sh). If a PID is given, the uprobe will attach to the
935        version of the library used by the process.
936
937        Example: BPF(text).attach_uprobe("c", "malloc")
938                 BPF(text).attach_uprobe("/usr/bin/python", "main")
939        """
940
941        name = _assert_is_bytes(name)
942        sym = _assert_is_bytes(sym)
943        sym_re = _assert_is_bytes(sym_re)
944        fn_name = _assert_is_bytes(fn_name)
945
946        if sym_re:
947            addresses = BPF.get_user_addresses(name, sym_re)
948            self._check_probe_quota(len(addresses))
949            for sym_addr in addresses:
950                self.attach_uprobe(name=name, addr=sym_addr,
951                                   fn_name=fn_name, pid=pid)
952            return
953
954        (path, addr) = BPF._check_path_symbol(name, sym, addr, pid)
955
956        self._check_probe_quota(1)
957        fn = self.load_func(fn_name, BPF.KPROBE)
958        ev_name = self._get_uprobe_evname(b"p", path, addr, pid)
959        fd = lib.bpf_attach_uprobe(fn.fd, 0, ev_name, path, addr, pid)
960        if fd < 0:
961            raise Exception("Failed to attach BPF to uprobe")
962        self._add_uprobe_fd(ev_name, fd)
963        return self
964
965    def attach_uretprobe(self, name=b"", sym=b"", sym_re=b"", addr=None,
966            fn_name=b"", pid=-1):
967        """attach_uretprobe(name="", sym="", sym_re="", addr=None, fn_name=""
968                            pid=-1)
969
970        Run the bpf function denoted by fn_name every time the symbol sym in
971        the library or binary 'name' finishes execution. See attach_uprobe for
972        meaning of additional parameters.
973        """
974
975        name = _assert_is_bytes(name)
976        sym = _assert_is_bytes(sym)
977        sym_re = _assert_is_bytes(sym_re)
978        fn_name = _assert_is_bytes(fn_name)
979
980        if sym_re:
981            for sym_addr in BPF.get_user_addresses(name, sym_re):
982                self.attach_uretprobe(name=name, addr=sym_addr,
983                                      fn_name=fn_name, pid=pid)
984            return
985
986        (path, addr) = BPF._check_path_symbol(name, sym, addr, pid)
987
988        self._check_probe_quota(1)
989        fn = self.load_func(fn_name, BPF.KPROBE)
990        ev_name = self._get_uprobe_evname(b"r", path, addr, pid)
991        fd = lib.bpf_attach_uprobe(fn.fd, 1, ev_name, path, addr, pid)
992        if fd < 0:
993            raise Exception("Failed to attach BPF to uretprobe")
994        self._add_uprobe_fd(ev_name, fd)
995        return self
996
997    def detach_uprobe_event(self, ev_name):
998        if ev_name not in self.uprobe_fds:
999            raise Exception("Uprobe %s is not attached" % ev_name)
1000        res = lib.bpf_close_perf_event_fd(self.uprobe_fds[ev_name])
1001        if res < 0:
1002            raise Exception("Failed to detach BPF from uprobe")
1003        res = lib.bpf_detach_uprobe(ev_name)
1004        if res < 0:
1005            raise Exception("Failed to detach BPF from uprobe")
1006        self._del_uprobe_fd(ev_name)
1007
1008    def detach_uprobe(self, name=b"", sym=b"", addr=None, pid=-1):
1009        """detach_uprobe(name="", sym="", addr=None, pid=-1)
1010
1011        Stop running a bpf function that is attached to symbol 'sym' in library
1012        or binary 'name'.
1013        """
1014
1015        name = _assert_is_bytes(name)
1016        sym = _assert_is_bytes(sym)
1017        (path, addr) = BPF._check_path_symbol(name, sym, addr, pid)
1018        ev_name = self._get_uprobe_evname(b"p", path, addr, pid)
1019        self.detach_uprobe_event(ev_name)
1020
1021    def detach_uretprobe(self, name=b"", sym=b"", addr=None, pid=-1):
1022        """detach_uretprobe(name="", sym="", addr=None, pid=-1)
1023
1024        Stop running a bpf function that is attached to symbol 'sym' in library
1025        or binary 'name'.
1026        """
1027
1028        name = _assert_is_bytes(name)
1029        sym = _assert_is_bytes(sym)
1030
1031        (path, addr) = BPF._check_path_symbol(name, sym, addr, pid)
1032        ev_name = self._get_uprobe_evname(b"r", path, addr, pid)
1033        self.detach_uprobe_event(ev_name)
1034
1035    def _trace_autoload(self):
1036        for i in range(0, lib.bpf_num_functions(self.module)):
1037            func_name = lib.bpf_function_name(self.module, i)
1038            if func_name.startswith(b"kprobe__"):
1039                fn = self.load_func(func_name, BPF.KPROBE)
1040                self.attach_kprobe(
1041                    event=self.fix_syscall_fnname(func_name[8:]),
1042                    fn_name=fn.name)
1043            elif func_name.startswith(b"kretprobe__"):
1044                fn = self.load_func(func_name, BPF.KPROBE)
1045                self.attach_kretprobe(
1046                    event=self.fix_syscall_fnname(func_name[11:]),
1047                    fn_name=fn.name)
1048            elif func_name.startswith(b"tracepoint__"):
1049                fn = self.load_func(func_name, BPF.TRACEPOINT)
1050                tp = fn.name[len(b"tracepoint__"):].replace(b"__", b":")
1051                self.attach_tracepoint(tp=tp, fn_name=fn.name)
1052            elif func_name.startswith(b"raw_tracepoint__"):
1053                fn = self.load_func(func_name, BPF.RAW_TRACEPOINT)
1054                tp = fn.name[len(b"raw_tracepoint__"):]
1055                self.attach_raw_tracepoint(tp=tp, fn_name=fn.name)
1056
1057    def trace_open(self, nonblocking=False):
1058        """trace_open(nonblocking=False)
1059
1060        Open the trace_pipe if not already open
1061        """
1062        if not self.tracefile:
1063            self.tracefile = open("%s/trace_pipe" % TRACEFS, "rb")
1064            if nonblocking:
1065                fd = self.tracefile.fileno()
1066                fl = fcntl.fcntl(fd, fcntl.F_GETFL)
1067                fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
1068        return self.tracefile
1069
1070    def trace_fields(self, nonblocking=False):
1071        """trace_fields(nonblocking=False)
1072
1073        Read from the kernel debug trace pipe and return a tuple of the
1074        fields (task, pid, cpu, flags, timestamp, msg) or None if no
1075        line was read (nonblocking=True)
1076        """
1077        while True:
1078            line = self.trace_readline(nonblocking)
1079            if not line and nonblocking: return (None,) * 6
1080            # don't print messages related to lost events
1081            if line.startswith(b"CPU:"): continue
1082            task = line[:16].lstrip()
1083            line = line[17:]
1084            ts_end = line.find(b":")
1085            pid, cpu, flags, ts = line[:ts_end].split()
1086            cpu = cpu[1:-1]
1087            # line[ts_end:] will have ": [sym_or_addr]: msgs"
1088            # For trace_pipe debug output, the addr typically
1089            # is invalid (e.g., 0x1). For kernel 4.12 or earlier,
1090            # if address is not able to match a kernel symbol,
1091            # nothing will be printed out. For kernel 4.13 and later,
1092            # however, the illegal address will be printed out.
1093            # Hence, both cases are handled here.
1094            line = line[ts_end + 1:]
1095            sym_end = line.find(b":")
1096            msg = line[sym_end + 2:]
1097            return (task, int(pid), int(cpu), flags, float(ts), msg)
1098
1099    def trace_readline(self, nonblocking=False):
1100        """trace_readline(nonblocking=False)
1101
1102        Read from the kernel debug trace pipe and return one line
1103        If nonblocking is False, this will block until ctrl-C is pressed.
1104        """
1105
1106        trace = self.trace_open(nonblocking)
1107
1108        line = None
1109        try:
1110            line = trace.readline(1024).rstrip()
1111        except IOError:
1112            pass
1113        return line
1114
1115    def trace_print(self, fmt=None):
1116        """trace_print(self, fmt=None)
1117
1118        Read from the kernel debug trace pipe and print on stdout.
1119        If fmt is specified, apply as a format string to the output. See
1120        trace_fields for the members of the tuple
1121        example: trace_print(fmt="pid {1}, msg = {5}")
1122        """
1123
1124        while True:
1125            if fmt:
1126                fields = self.trace_fields(nonblocking=False)
1127                if not fields: continue
1128                line = fmt.format(*fields)
1129            else:
1130                line = self.trace_readline(nonblocking=False)
1131            print(line)
1132            sys.stdout.flush()
1133
1134    @staticmethod
1135    def _sym_cache(pid):
1136        """_sym_cache(pid)
1137
1138        Returns a symbol cache for the specified PID.
1139        The kernel symbol cache is accessed by providing any PID less than zero.
1140        """
1141        if pid < 0 and pid != -1:
1142            pid = -1
1143        if not pid in BPF._sym_caches:
1144            BPF._sym_caches[pid] = SymbolCache(pid)
1145        return BPF._sym_caches[pid]
1146
1147    @staticmethod
1148    def sym(addr, pid, show_module=False, show_offset=False, demangle=True):
1149        """sym(addr, pid, show_module=False, show_offset=False)
1150
1151        Translate a memory address into a function name for a pid, which is
1152        returned. When show_module is True, the module name is also included.
1153        When show_offset is True, the instruction offset as a hexadecimal
1154        number is also included in the string.
1155
1156        A pid of less than zero will access the kernel symbol cache.
1157
1158        Example output when both show_module and show_offset are True:
1159            "start_thread+0x202 [libpthread-2.24.so]"
1160
1161        Example output when both show_module and show_offset are False:
1162            "start_thread"
1163        """
1164        name, offset, module = BPF._sym_cache(pid).resolve(addr, demangle)
1165        offset = b"+0x%x" % offset if show_offset and name is not None else b""
1166        name = name or b"[unknown]"
1167        name = name + offset
1168        module = b" [%s]" % os.path.basename(module) \
1169            if show_module and module is not None else b""
1170        return name + module
1171
1172    @staticmethod
1173    def ksym(addr, show_module=False, show_offset=False):
1174        """ksym(addr)
1175
1176        Translate a kernel memory address into a kernel function name, which is
1177        returned. When show_module is True, the module name ("kernel") is also
1178        included. When show_offset is true, the instruction offset as a
1179        hexadecimal number is also included in the string.
1180
1181        Example output when both show_module and show_offset are True:
1182            "default_idle+0x0 [kernel]"
1183        """
1184        return BPF.sym(addr, -1, show_module, show_offset, False)
1185
1186    @staticmethod
1187    def ksymname(name):
1188        """ksymname(name)
1189
1190        Translate a kernel name into an address. This is the reverse of
1191        ksym. Returns -1 when the function name is unknown."""
1192        return BPF._sym_cache(-1).resolve_name(None, name)
1193
1194    def num_open_kprobes(self):
1195        """num_open_kprobes()
1196
1197        Get the number of open K[ret]probes. Can be useful for scenarios where
1198        event_re is used while attaching and detaching probes.
1199        """
1200        return len(self.kprobe_fds)
1201
1202    def num_open_uprobes(self):
1203        """num_open_uprobes()
1204
1205        Get the number of open U[ret]probes.
1206        """
1207        return len(self.uprobe_fds)
1208
1209    def num_open_tracepoints(self):
1210        """num_open_tracepoints()
1211
1212        Get the number of open tracepoints.
1213        """
1214        return len(self.tracepoint_fds)
1215
1216    def perf_buffer_poll(self, timeout = -1):
1217        """perf_buffer_poll(self)
1218
1219        Poll from all open perf ring buffers, calling the callback that was
1220        provided when calling open_perf_buffer for each entry.
1221        """
1222        readers = (ct.c_void_p * len(self.perf_buffers))()
1223        for i, v in enumerate(self.perf_buffers.values()):
1224            readers[i] = v
1225        lib.perf_reader_poll(len(readers), readers, timeout)
1226
1227    def kprobe_poll(self, timeout = -1):
1228        """kprobe_poll(self)
1229
1230        Deprecated. Use perf_buffer_poll instead.
1231        """
1232        self.perf_buffer_poll(timeout)
1233
1234    def donothing(self):
1235        """the do nothing exit handler"""
1236
1237    def cleanup(self):
1238        # Clean up opened probes
1239        for k, v in list(self.kprobe_fds.items()):
1240            self.detach_kprobe_event(k)
1241        for k, v in list(self.uprobe_fds.items()):
1242            self.detach_uprobe_event(k)
1243        for k, v in list(self.tracepoint_fds.items()):
1244            self.detach_tracepoint(k)
1245        for k, v in list(self.raw_tracepoint_fds.items()):
1246            self.detach_raw_tracepoint(k)
1247
1248        # Clean up opened perf ring buffer and perf events
1249        table_keys = list(self.tables.keys())
1250        for key in table_keys:
1251            if isinstance(self.tables[key], PerfEventArray):
1252                del self.tables[key]
1253        for (ev_type, ev_config) in list(self.open_perf_events.keys()):
1254            self.detach_perf_event(ev_type, ev_config)
1255        if self.tracefile:
1256            self.tracefile.close()
1257            self.tracefile = None
1258        if self.module:
1259            lib.bpf_module_destroy(self.module)
1260            self.module = None
1261
1262    def __enter__(self):
1263        return self
1264
1265    def __exit__(self, exc_type, exc_val, exc_tb):
1266        self.cleanup()
1267
1268
1269from .usdt import USDT, USDTException
1270