1#!/usr/bin/env python 2# 3# oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF. 4# 5# This traces the kernel out-of-memory killer, and prints basic details, 6# including the system load averages. This can provide more context on the 7# system state at the time of OOM: was it getting busier or steady, based 8# on the load averages? This tool may also be useful to customize for 9# investigations; for example, by adding other task_struct details at the time 10# of OOM. 11# 12# Copyright 2016 Netflix, Inc. 13# Licensed under the Apache License, Version 2.0 (the "License") 14# 15# 09-Feb-2016 Brendan Gregg Created this. 16 17from bcc import BPF 18from time import strftime 19import ctypes as ct 20 21# linux stats 22loadavg = "/proc/loadavg" 23 24# define BPF program 25bpf_text = """ 26#include <uapi/linux/ptrace.h> 27#include <linux/oom.h> 28 29struct data_t { 30 u64 fpid; 31 u64 tpid; 32 u64 pages; 33 char fcomm[TASK_COMM_LEN]; 34 char tcomm[TASK_COMM_LEN]; 35}; 36 37BPF_PERF_OUTPUT(events); 38 39void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, const char *message) 40{ 41 unsigned long totalpages; 42 struct task_struct *p = oc->chosen; 43 struct data_t data = {}; 44 u32 pid = bpf_get_current_pid_tgid(); 45 data.fpid = pid; 46 data.tpid = p->pid; 47 data.pages = oc->totalpages; 48 bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm)); 49 bpf_probe_read(&data.tcomm, sizeof(data.tcomm), p->comm); 50 events.perf_submit(ctx, &data, sizeof(data)); 51} 52""" 53 54# kernel->user event data: struct data_t 55TASK_COMM_LEN = 16 # linux/sched.h 56class Data(ct.Structure): 57 _fields_ = [ 58 ("fpid", ct.c_ulonglong), 59 ("tpid", ct.c_ulonglong), 60 ("pages", ct.c_ulonglong), 61 ("fcomm", ct.c_char * TASK_COMM_LEN), 62 ("tcomm", ct.c_char * TASK_COMM_LEN) 63 ] 64 65# process event 66def print_event(cpu, data, size): 67 event = ct.cast(data, ct.POINTER(Data)).contents 68 with open(loadavg) as stats: 69 avgline = stats.read().rstrip() 70 print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")" 71 ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid, 72 event.fcomm.decode('utf-8', 'replace'), event.tpid, 73 event.tcomm.decode('utf-8', 'replace'), event.pages, avgline)) 74 75# initialize BPF 76b = BPF(text=bpf_text) 77print("Tracing OOM kills... Ctrl-C to stop.") 78b["events"].open_perf_buffer(print_event) 79while 1: 80 try: 81 b.perf_buffer_poll() 82 except KeyboardInterrupt: 83 exit() 84