• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 --[[
2 Copyright 2016 Marek Vavrusa <mvavrusa@cloudflare.com>
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8 http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 ]]
16 local ffi = require('ffi')
17 local bit = require('bit')
18 local has_syscall, S = pcall(require, 'syscall')
19 local M = {}
20 
21 ffi.cdef [[
22 struct bpf {
23 	/* Instruction classes */
24 	static const int LD   = 0x00;
25 	static const int LDX  = 0x01;
26 	static const int ST   = 0x02;
27 	static const int STX  = 0x03;
28 	static const int ALU  = 0x04;
29 	static const int JMP  = 0x05;
30 	static const int ALU64 = 0x07;
31 	/* ld/ldx fields */
32 	static const int W    = 0x00;
33 	static const int H    = 0x08;
34 	static const int B    = 0x10;
35 	static const int ABS  = 0x20;
36 	static const int IND  = 0x40;
37 	static const int MEM  = 0x60;
38 	static const int LEN  = 0x80;
39 	static const int MSH  = 0xa0;
40 	/* alu/jmp fields */
41 	static const int ADD  = 0x00;
42 	static const int SUB  = 0x10;
43 	static const int MUL  = 0x20;
44 	static const int DIV  = 0x30;
45 	static const int OR   = 0x40;
46 	static const int AND  = 0x50;
47 	static const int LSH  = 0x60;
48 	static const int RSH  = 0x70;
49 	static const int NEG  = 0x80;
50 	static const int MOD  = 0x90;
51 	static const int XOR  = 0xa0;
52 	static const int JA   = 0x00;
53 	static const int JEQ  = 0x10;
54 	static const int JGT  = 0x20;
55 	static const int JGE  = 0x30;
56 	static const int JSET = 0x40;
57 	static const int K    = 0x00;
58 	static const int X    = 0x08;
59 	static const int JNE  = 0x50;	/* jump != */
60 	static const int JSGT = 0x60;	/* SGT is signed '>', GT in x86 */
61 	static const int JSGE = 0x70;	/* SGE is signed '>=', GE in x86 */
62 	static const int CALL = 0x80;	/* function call */
63 	static const int EXIT = 0x90;	/* function return */
64 	/* ld/ldx fields */
65 	static const int DW    = 0x18;	/* double word */
66 	static const int XADD  = 0xc0;	/* exclusive add */
67 	/* alu/jmp fields */
68 	static const int MOV   = 0xb0;	/* mov reg to reg */
69 	static const int ARSH  = 0xc0;	/* sign extending arithmetic shift right */
70 	/* change endianness of a register */
71 	static const int END   = 0xd0;	/* flags for endianness conversion: */
72 	static const int TO_LE = 0x00;	/* convert to little-endian */
73 	static const int TO_BE = 0x08;	/* convert to big-endian */
74 	/* misc */
75 	static const int PSEUDO_MAP_FD = 0x01;
76 	/* helper functions */
77 	static const int F_CURRENT_CPU    = 0xffffffff;
78 	static const int F_USER_STACK     = 1 << 8;
79 	static const int F_FAST_STACK_CMP = 1 << 9;
80 	static const int F_REUSE_STACKID  = 1 << 10;
81 	/* special offsets for ancillary data */
82 	static const int NET_OFF          = -0x100000;
83 	static const int LL_OFF           = -0x200000;
84 };
85 /* eBPF commands */
86 struct bpf_cmd {
87 	static const int MAP_CREATE       = 0;
88 	static const int MAP_LOOKUP_ELEM  = 1;
89 	static const int MAP_UPDATE_ELEM  = 2;
90 	static const int MAP_DELETE_ELEM  = 3;
91 	static const int MAP_GET_NEXT_KEY = 4;
92 	static const int PROG_LOAD        = 5;
93 	static const int OBJ_PIN          = 6;
94 	static const int OBJ_GET          = 7;
95 };
96 /* eBPF helpers */
97 struct bpf_func_id {
98 	static const int unspec               = 0;
99 	static const int map_lookup_elem      = 1;
100 	static const int map_update_elem      = 2;
101 	static const int map_delete_elem      = 3;
102 	static const int probe_read           = 4;
103 	static const int ktime_get_ns         = 5;
104 	static const int trace_printk         = 6;
105 	static const int get_prandom_u32      = 7;
106 	static const int get_smp_processor_id = 8;
107 	static const int skb_store_bytes      = 9;
108 	static const int l3_csum_replace      = 10;
109 	static const int l4_csum_replace      = 11;
110 	static const int tail_call            = 12;
111 	static const int clone_redirect       = 13;
112 	static const int get_current_pid_tgid = 14;
113 	static const int get_current_uid_gid  = 15;
114 	static const int get_current_comm     = 16;
115 	static const int get_cgroup_classid   = 17;
116 	static const int skb_vlan_push        = 18;
117 	static const int skb_vlan_pop         = 19;
118 	static const int skb_get_tunnel_key   = 20;
119 	static const int skb_set_tunnel_key   = 21;
120 	static const int perf_event_read      = 22;
121 	static const int redirect             = 23;
122 	static const int get_route_realm      = 24;
123 	static const int perf_event_output    = 25;
124 	static const int skb_load_bytes       = 26;
125 	static const int get_stackid          = 27;
126 };
127 /* BPF_MAP_STACK_TRACE structures and constants */
128 static const int BPF_MAX_STACK_DEPTH = 127;
129 struct bpf_stacktrace {
130 	uint64_t ip[BPF_MAX_STACK_DEPTH];
131 };
132 ]]
133 
134 -- Compatibility: ljsyscall doesn't have support for BPF syscall
135 if not has_syscall or not S.bpf then
136 	error("ljsyscall doesn't support bpf(), must be updated")
137 else
138 	local strflag = require('syscall.helpers').strflag
139 	-- Compatibility: ljsyscall<=0.12
140 	if not S.c.BPF_MAP.LRU_HASH then
141 		S.c.BPF_MAP = strflag {
142 			UNSPEC           = 0,
143 			HASH             = 1,
144 			ARRAY            = 2,
145 			PROG_ARRAY       = 3,
146 			PERF_EVENT_ARRAY = 4,
147 			PERCPU_HASH      = 5,
148 			PERCPU_ARRAY     = 6,
149 			STACK_TRACE      = 7,
150 			CGROUP_ARRAY     = 8,
151 			LRU_HASH         = 9,
152 			LRU_PERCPU_HASH  = 10,
153 			LPM_TRIE         = 11,
154 			ARRAY_OF_MAPS    = 12,
155 			HASH_OF_MAPS     = 13,
156 			DEVMAP           = 14,
157 			SOCKMAP          = 15,
158 			CPUMAP           = 16,
159 		}
160 	end
161 	if not S.c.BPF_PROG.TRACEPOINT then
162 		S.c.BPF_PROG = strflag {
163 			UNSPEC           = 0,
164 			SOCKET_FILTER    = 1,
165 			KPROBE           = 2,
166 			SCHED_CLS        = 3,
167 			SCHED_ACT        = 4,
168 			TRACEPOINT       = 5,
169 			XDP              = 6,
170 			PERF_EVENT       = 7,
171 			CGROUP_SKB       = 8,
172 			CGROUP_SOCK      = 9,
173 			LWT_IN           = 10,
174 			LWT_OUT          = 11,
175 			LWT_XMIT         = 12,
176 			SOCK_OPS         = 13,
177 			SK_SKB           = 14,
178 			CGROUP_DEVICE    = 15,
179 			SK_MSG           = 16,
180 			RAW_TRACEPOINT   = 17,
181 			CGROUP_SOCK_ADDR = 18,
182 		}
183 	end
184 end
185 
186 -- Compatibility: metatype for stacktrace
187 local function stacktrace_iter(t, i)
188 	i = i + 1
189 	if i < #t and t.ip[i] > 0 then
190 		return i, t.ip[i]
191 	end
192 end
193 ffi.metatype('struct bpf_stacktrace', {
194 	__len = function (t) return ffi.sizeof(t.ip) / ffi.sizeof(t.ip[0]) end,
195 	__ipairs = function (t) return stacktrace_iter, t, -1 end,
196 })
197 
198 -- Reflect cdata type
199 function M.typename(v)
200 	if not v or type(v) ~= 'cdata' then return nil end
201 	return string.match(tostring(ffi.typeof(v)), '<([^>]+)')
202 end
203 
204 -- Reflect if cdata type can be pointer (accepts array or pointer)
205 function M.isptr(v, noarray)
206 	local ctname = M.typename(v)
207 	if ctname then
208 		ctname = string.sub(ctname, -1)
209 		ctname = ctname == '*' or (not noarray and ctname == ']')
210 	end
211 	return ctname
212 end
213 
214 -- Return true if variable is a non-nil constant that can be used as immediate value
215 -- e.g. result of KSHORT and KNUM
216 function M.isimmconst(v)
217 	return (type(v.const) == 'number' and not ffi.istype(v.type, ffi.typeof('void')))
218 		or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('uint64_t')) -- Lua numbers are at most 52 bits
219 		or type(v.const) == 'cdata' and ffi.istype(v.type, ffi.typeof('int64_t'))
220 end
221 
222 function M.osversion()
223 	-- We have no better way to extract current kernel hex-string other
224 	-- than parsing headers, compiling a helper function or reading /proc
225 	local ver_str, count = S.sysctl('kernel.version'):match('%d+.%d+.%d+'), 2
226 	if not ver_str then -- kernel.version is freeform, fallback to kernel.osrelease
227 		ver_str = S.sysctl('kernel.osrelease'):match('%d+.%d+.%d+')
228 	end
229 	local version = 0
230 	for i in ver_str:gmatch('%d+') do -- Convert 'X.Y.Z' to 0xXXYYZZ
231 		version = bit.bor(version, bit.lshift(tonumber(i), 8*count))
232 		count = count - 1
233 	end
234 	return version
235 end
236 
237 function M.event_reader(reader, event_type)
238 	-- Caller can specify event message binary format
239 	if event_type then
240 		assert(type(event_type) == 'string' and ffi.typeof(event_type), 'not a valid type for event reader')
241 		event_type = ffi.typeof(event_type .. '*') -- Convert type to pointer-to-type
242 	end
243 	-- Wrap reader in interface that can interpret read event messages
244 	return setmetatable({reader=reader,type=event_type}, {__index = {
245 		block = function(_ --[[self]])
246 			return S.select { readfds = {reader.fd} }
247 		end,
248 		next = function(_ --[[self]], k)
249 			local len, ev = reader:next(k)
250 			-- Filter out only sample frames
251 			while ev and ev.type ~= S.c.PERF_RECORD.SAMPLE do
252 				len, ev = reader:next(len)
253 			end
254 			if ev and event_type then
255 				-- The perf event reader returns framed data with header and variable length
256 				-- This is going skip the frame header and cast data to given type
257 				ev = ffi.cast(event_type, ffi.cast('char *', ev) + ffi.sizeof('struct perf_event_header') + ffi.sizeof('uint32_t'))
258 			end
259 			return len, ev
260 		end,
261 		read = function(self)
262 			return self.next, self, nil
263 		end,
264 	}})
265 end
266 
267 function M.tracepoint_type(tp)
268 	-- Read tracepoint format string
269 	local fp = assert(io.open('/sys/kernel/debug/tracing/events/'..tp..'/format', 'r'))
270 	local fmt = fp:read '*a'
271 	fp:close()
272 	-- Parse struct fields
273 	local fields = {}
274 	for f in fmt:gmatch 'field:([^;]+;)' do
275 		table.insert(fields, f)
276 	end
277 	return string.format('struct { %s }', table.concat(fields))
278 end
279 
280 return M
281