1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <linux/bpf.h>
22 #include <linux/unistd.h>
23 #include <sys/file.h>
24 
25 #ifdef BPF_FD_JUST_USE_INT
26   #define BPF_FD_TYPE int
27   #define BPF_FD_TO_U32(x) static_cast<__u32>(x)
28 #else
29   #include <android-base/unique_fd.h>
30   #define BPF_FD_TYPE base::unique_fd&
31   #define BPF_FD_TO_U32(x) static_cast<__u32>((x).get())
32 #endif
33 
34 namespace android {
35 namespace bpf {
36 
ptr_to_u64(const void * const x)37 inline uint64_t ptr_to_u64(const void * const x) {
38     return (uint64_t)(uintptr_t)x;
39 }
40 
41 /* Note: bpf_attr is a union which might have a much larger size then the anonymous struct portion
42  * of it that we are using.  The kernel's bpf() system call will perform a strict check to ensure
43  * all unused portions are zero.  It will fail with E2BIG if we don't fully zero bpf_attr.
44  */
45 
bpf(enum bpf_cmd cmd,const bpf_attr & attr)46 inline int bpf(enum bpf_cmd cmd, const bpf_attr& attr) {
47     return syscall(__NR_bpf, cmd, &attr, sizeof(attr));
48 }
49 
50 // this version is meant for use with cmd's which mutate the argument
bpf(enum bpf_cmd cmd,bpf_attr * attr)51 inline int bpf(enum bpf_cmd cmd, bpf_attr *attr) {
52     return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
53 }
54 
createMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags)55 inline int createMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
56                      uint32_t max_entries, uint32_t map_flags) {
57     return bpf(BPF_MAP_CREATE, {
58                                        .map_type = map_type,
59                                        .key_size = key_size,
60                                        .value_size = value_size,
61                                        .max_entries = max_entries,
62                                        .map_flags = map_flags,
63                                });
64 }
65 
66 // Note:
67 //   'map_type' must be one of BPF_MAP_TYPE_{ARRAY,HASH}_OF_MAPS
68 //   'value_size' must be sizeof(u32), ie. 4
69 //   'inner_map_fd' is basically a template specifying {map_type, key_size, value_size, max_entries, map_flags}
70 //   of the inner map type (and possibly only key_size/value_size actually matter?).
createOuterMap(bpf_map_type map_type,uint32_t key_size,uint32_t value_size,uint32_t max_entries,uint32_t map_flags,const BPF_FD_TYPE inner_map_fd)71 inline int createOuterMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
72                           uint32_t max_entries, uint32_t map_flags, const BPF_FD_TYPE inner_map_fd) {
73     return bpf(BPF_MAP_CREATE, {
74                                        .map_type = map_type,
75                                        .key_size = key_size,
76                                        .value_size = value_size,
77                                        .max_entries = max_entries,
78                                        .map_flags = map_flags,
79                                        .inner_map_fd = BPF_FD_TO_U32(inner_map_fd),
80                                });
81 }
82 
writeToMapEntry(const BPF_FD_TYPE map_fd,const void * key,const void * value,uint64_t flags)83 inline int writeToMapEntry(const BPF_FD_TYPE map_fd, const void* key, const void* value,
84                            uint64_t flags) {
85     return bpf(BPF_MAP_UPDATE_ELEM, {
86                                             .map_fd = BPF_FD_TO_U32(map_fd),
87                                             .key = ptr_to_u64(key),
88                                             .value = ptr_to_u64(value),
89                                             .flags = flags,
90                                     });
91 }
92 
findMapEntry(const BPF_FD_TYPE map_fd,const void * key,void * value)93 inline int findMapEntry(const BPF_FD_TYPE map_fd, const void* key, void* value) {
94     return bpf(BPF_MAP_LOOKUP_ELEM, {
95                                             .map_fd = BPF_FD_TO_U32(map_fd),
96                                             .key = ptr_to_u64(key),
97                                             .value = ptr_to_u64(value),
98                                     });
99 }
100 
deleteMapEntry(const BPF_FD_TYPE map_fd,const void * key)101 inline int deleteMapEntry(const BPF_FD_TYPE map_fd, const void* key) {
102     return bpf(BPF_MAP_DELETE_ELEM, {
103                                             .map_fd = BPF_FD_TO_U32(map_fd),
104                                             .key = ptr_to_u64(key),
105                                     });
106 }
107 
getNextMapKey(const BPF_FD_TYPE map_fd,const void * key,void * next_key)108 inline int getNextMapKey(const BPF_FD_TYPE map_fd, const void* key, void* next_key) {
109     return bpf(BPF_MAP_GET_NEXT_KEY, {
110                                              .map_fd = BPF_FD_TO_U32(map_fd),
111                                              .key = ptr_to_u64(key),
112                                              .next_key = ptr_to_u64(next_key),
113                                      });
114 }
115 
getFirstMapKey(const BPF_FD_TYPE map_fd,void * firstKey)116 inline int getFirstMapKey(const BPF_FD_TYPE map_fd, void* firstKey) {
117     return getNextMapKey(map_fd, NULL, firstKey);
118 }
119 
bpfFdPin(const BPF_FD_TYPE map_fd,const char * pathname)120 inline int bpfFdPin(const BPF_FD_TYPE map_fd, const char* pathname) {
121     return bpf(BPF_OBJ_PIN, {
122                                     .pathname = ptr_to_u64(pathname),
123                                     .bpf_fd = BPF_FD_TO_U32(map_fd),
124                             });
125 }
126 
bpfFdGet(const char * pathname,uint32_t flag)127 inline int bpfFdGet(const char* pathname, uint32_t flag) {
128     return bpf(BPF_OBJ_GET, {
129                                     .pathname = ptr_to_u64(pathname),
130                                     .file_flags = flag,
131                             });
132 }
133 
134 int bpfGetFdMapId(const BPF_FD_TYPE map_fd);
135 
bpfLock(int fd,short type)136 inline int bpfLock(int fd, short type) {
137     if (fd < 0) return fd;  // pass any errors straight through
138 #ifdef BPF_MAP_LOCKLESS_FOR_TEST
139     return fd;
140 #endif
141 #ifdef BPF_FD_JUST_USE_INT
142     int mapId = bpfGetFdMapId(fd);
143     int saved_errno = errno;
144 #else
145     base::unique_fd ufd(fd);
146     int mapId = bpfGetFdMapId(ufd);
147     int saved_errno = errno;
148     (void)ufd.release();
149 #endif
150     // 4.14+ required to fetch map id, but we don't want to call isAtLeastKernelVersion
151     if (mapId == -1 && saved_errno == EINVAL) return fd;
152     if (mapId <= 0) abort();  // should not be possible
153 
154     // on __LP64__ (aka. 64-bit userspace) 'struct flock64' is the same as 'struct flock'
155     struct flock64 fl = {
156         .l_type = type,        // short: F_{RD,WR,UN}LCK
157         .l_whence = SEEK_SET,  // short: SEEK_{SET,CUR,END}
158         .l_start = mapId,      // off_t: start offset
159         .l_len = 1,            // off_t: number of bytes
160     };
161 
162     // see: bionic/libc/bionic/fcntl.cpp: iff !__LP64__ this uses fcntl64
163     int ret = fcntl(fd, F_OFD_SETLK, &fl);
164     if (!ret) return fd;  // success
165     close(fd);
166     return ret;  // most likely -1 with errno == EAGAIN, due to already held lock
167 }
168 
mapRetrieveLocklessRW(const char * pathname)169 inline int mapRetrieveLocklessRW(const char* pathname) {
170     return bpfFdGet(pathname, 0);
171 }
172 
mapRetrieveExclusiveRW(const char * pathname)173 inline int mapRetrieveExclusiveRW(const char* pathname) {
174     return bpfLock(mapRetrieveLocklessRW(pathname), F_WRLCK);
175 }
176 
mapRetrieveRW(const char * pathname)177 inline int mapRetrieveRW(const char* pathname) {
178     return bpfLock(mapRetrieveLocklessRW(pathname), F_RDLCK);
179 }
180 
mapRetrieveRO(const char * pathname)181 inline int mapRetrieveRO(const char* pathname) {
182     return bpfFdGet(pathname, BPF_F_RDONLY);
183 }
184 
185 // WARNING: it's impossible to grab a shared (ie. read) lock on a write-only fd,
186 // so we instead choose to grab an exclusive (ie. write) lock.
mapRetrieveWO(const char * pathname)187 inline int mapRetrieveWO(const char* pathname) {
188     return bpfLock(bpfFdGet(pathname, BPF_F_WRONLY), F_WRLCK);
189 }
190 
retrieveProgram(const char * pathname)191 inline int retrieveProgram(const char* pathname) {
192     return bpfFdGet(pathname, BPF_F_RDONLY);
193 }
194 
usableProgram(const char * pathname)195 inline bool usableProgram(const char* pathname) {
196     int fd = retrieveProgram(pathname);
197     bool ok = (fd >= 0);
198     if (ok) close(fd);
199     return ok;
200 }
201 
202 inline int attachProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd,
203                          const BPF_FD_TYPE cg_fd, uint32_t flags = 0) {
204     return bpf(BPF_PROG_ATTACH, {
205                                         .target_fd = BPF_FD_TO_U32(cg_fd),
206                                         .attach_bpf_fd = BPF_FD_TO_U32(prog_fd),
207                                         .attach_type = type,
208                                         .attach_flags = flags,
209                                 });
210 }
211 
detachProgram(bpf_attach_type type,const BPF_FD_TYPE cg_fd)212 inline int detachProgram(bpf_attach_type type, const BPF_FD_TYPE cg_fd) {
213     return bpf(BPF_PROG_DETACH, {
214                                         .target_fd = BPF_FD_TO_U32(cg_fd),
215                                         .attach_type = type,
216                                 });
217 }
218 
219 inline int queryProgram(const BPF_FD_TYPE cg_fd,
220                         enum bpf_attach_type attach_type,
221                         __u32 query_flags = 0,
222                         __u32 attach_flags = 0) {
223     int prog_id = -1;  // equivalent to an array of one integer.
224     bpf_attr arg = {
225             .query = {
226                     .target_fd = BPF_FD_TO_U32(cg_fd),
227                     .attach_type = attach_type,
228                     .query_flags = query_flags,
229                     .attach_flags = attach_flags,
230                     .prog_ids = ptr_to_u64(&prog_id),  // pointer to output array
231                     .prog_cnt = 1,  // in: space - nr of ints in the array, out: used
232             }
233     };
234     int v = bpf(BPF_PROG_QUERY, &arg);
235     if (v) return v;  // error case
236     if (!arg.query.prog_cnt) return 0;  // no program, kernel never returns zero id
237     return prog_id;  // return actual id
238 }
239 
detachSingleProgram(bpf_attach_type type,const BPF_FD_TYPE prog_fd,const BPF_FD_TYPE cg_fd)240 inline int detachSingleProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd,
241                                const BPF_FD_TYPE cg_fd) {
242     return bpf(BPF_PROG_DETACH, {
243                                         .target_fd = BPF_FD_TO_U32(cg_fd),
244                                         .attach_bpf_fd = BPF_FD_TO_U32(prog_fd),
245                                         .attach_type = type,
246                                 });
247 }
248 
249 // Available in 4.12 and later kernels.
runProgram(const BPF_FD_TYPE prog_fd,const void * data,const uint32_t data_size)250 inline int runProgram(const BPF_FD_TYPE prog_fd, const void* data,
251                       const uint32_t data_size) {
252     return bpf(BPF_PROG_RUN, {
253                                      .test = {
254                                              .prog_fd = BPF_FD_TO_U32(prog_fd),
255                                              .data_size_in = data_size,
256                                              .data_in = ptr_to_u64(data),
257                                      },
258                              });
259 }
260 
261 // BPF_OBJ_GET_INFO_BY_FD requires 4.14+ kernel
262 //
263 // Note: some fields are only defined in newer kernels (ie. the map_info struct grows
264 // over time), so we need to check that the field we're interested in is actually
265 // supported/returned by the running kernel.  We do this by checking it is fully
266 // within the bounds of the struct size as reported by the kernel.
267 #define DEFINE_BPF_GET_FD(TYPE, NAME, FIELD) \
268 inline int bpfGetFd ## NAME(const BPF_FD_TYPE fd) { \
269     struct bpf_ ## TYPE ## _info info = {}; \
270     union bpf_attr attr = { .info = { \
271         .bpf_fd = BPF_FD_TO_U32(fd), \
272         .info_len = sizeof(info), \
273         .info = ptr_to_u64(&info), \
274     }}; \
275     int rv = bpf(BPF_OBJ_GET_INFO_BY_FD, attr); \
276     if (rv) return rv; \
277     if (attr.info.info_len < offsetof(bpf_ ## TYPE ## _info, FIELD) + sizeof(info.FIELD)) { \
278         errno = EOPNOTSUPP; \
279         return -1; \
280     }; \
281     return info.FIELD; \
282 }
283 
284 // All 7 of these fields are already present in Linux v4.14 (even ACK 4.14-P)
285 // while BPF_OBJ_GET_INFO_BY_FD is not implemented at all in v4.9 (even ACK 4.9-Q)
286 DEFINE_BPF_GET_FD(map, MapType, type)            // int bpfGetFdMapType(const BPF_FD_TYPE map_fd)
287 DEFINE_BPF_GET_FD(map, MapId, id)                // int bpfGetFdMapId(const BPF_FD_TYPE map_fd)
288 DEFINE_BPF_GET_FD(map, KeySize, key_size)        // int bpfGetFdKeySize(const BPF_FD_TYPE map_fd)
289 DEFINE_BPF_GET_FD(map, ValueSize, value_size)    // int bpfGetFdValueSize(const BPF_FD_TYPE map_fd)
290 DEFINE_BPF_GET_FD(map, MaxEntries, max_entries)  // int bpfGetFdMaxEntries(const BPF_FD_TYPE map_fd)
291 DEFINE_BPF_GET_FD(map, MapFlags, map_flags)      // int bpfGetFdMapFlags(const BPF_FD_TYPE map_fd)
292 DEFINE_BPF_GET_FD(prog, ProgId, id)              // int bpfGetFdProgId(const BPF_FD_TYPE prog_fd)
293 
294 #undef DEFINE_BPF_GET_FD
295 
296 }  // namespace bpf
297 }  // namespace android
298 
299 #undef BPF_FD_TO_U32
300 #undef BPF_FD_TYPE
301 #undef BPF_FD_JUST_USE_INT
302