1 /*
2  * Copyright (c) 2016 GitHub, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/mman.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <libgen.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <limits.h>
27 
28 #include <gelf.h>
29 #include "bcc_elf.h"
30 #include "bcc_proc.h"
31 #include "bcc_syms.h"
32 
33 #define NT_STAPSDT 3
34 #define ELF_ST_TYPE(x) (((uint32_t) x) & 0xf)
35 
openelf_fd(int fd,Elf ** elf_out)36 static int openelf_fd(int fd, Elf **elf_out) {
37   if (elf_version(EV_CURRENT) == EV_NONE)
38     return -1;
39 
40   *elf_out = elf_begin(fd, ELF_C_READ, 0);
41   if (*elf_out == NULL)
42     return -1;
43 
44   return 0;
45 }
46 
openelf(const char * path,Elf ** elf_out,int * fd_out)47 static int openelf(const char *path, Elf **elf_out, int *fd_out) {
48   *fd_out = open(path, O_RDONLY);
49   if (*fd_out < 0)
50     return -1;
51 
52   if (openelf_fd(*fd_out, elf_out) == -1) {
53     close(*fd_out);
54     return -1;
55   }
56 
57   return 0;
58 }
59 
parse_stapsdt_note(struct bcc_elf_usdt * probe,const char * desc,int elf_class)60 static const char *parse_stapsdt_note(struct bcc_elf_usdt *probe,
61                                       const char *desc, int elf_class) {
62   if (elf_class == ELFCLASS32) {
63     probe->pc = *((uint32_t *)(desc));
64     probe->base_addr = *((uint32_t *)(desc + 4));
65     probe->semaphore = *((uint32_t *)(desc + 8));
66     desc = desc + 12;
67   } else {
68     probe->pc = *((uint64_t *)(desc));
69     probe->base_addr = *((uint64_t *)(desc + 8));
70     probe->semaphore = *((uint64_t *)(desc + 16));
71     desc = desc + 24;
72   }
73 
74   probe->provider = desc;
75   desc += strlen(desc) + 1;
76 
77   probe->name = desc;
78   desc += strlen(desc) + 1;
79 
80   probe->arg_fmt = desc;
81   desc += strlen(desc) + 1;
82 
83   return desc;
84 }
85 
do_note_segment(Elf_Scn * section,int elf_class,bcc_elf_probecb callback,const char * binpath,uint64_t first_inst_offset,void * payload)86 static int do_note_segment(Elf_Scn *section, int elf_class,
87                            bcc_elf_probecb callback, const char *binpath,
88                            uint64_t first_inst_offset, void *payload) {
89   Elf_Data *data = NULL;
90 
91   while ((data = elf_getdata(section, data)) != 0) {
92     size_t offset = 0;
93     GElf_Nhdr hdr;
94     size_t name_off, desc_off;
95 
96     while ((offset = gelf_getnote(data, offset, &hdr, &name_off, &desc_off)) !=
97            0) {
98       const char *desc, *desc_end;
99       struct bcc_elf_usdt probe;
100 
101       if (hdr.n_type != NT_STAPSDT)
102         continue;
103 
104       if (hdr.n_namesz != 8)
105         continue;
106 
107       if (memcmp((const char *)data->d_buf + name_off, "stapsdt", 8) != 0)
108         continue;
109 
110       desc = (const char *)data->d_buf + desc_off;
111       desc_end = desc + hdr.n_descsz;
112 
113       if (parse_stapsdt_note(&probe, desc, elf_class) == desc_end) {
114         if (probe.pc < first_inst_offset)
115           fprintf(stderr,
116                   "WARNING: invalid address 0x%lx for probe (%s,%s) in binary %s\n",
117                   probe.pc, probe.provider, probe.name, binpath);
118         else
119           callback(binpath, &probe, payload);
120       }
121     }
122   }
123   return 0;
124 }
125 
listprobes(Elf * e,bcc_elf_probecb callback,const char * binpath,void * payload)126 static int listprobes(Elf *e, bcc_elf_probecb callback, const char *binpath,
127                       void *payload) {
128   Elf_Scn *section = NULL;
129   size_t stridx;
130   int elf_class = gelf_getclass(e);
131   uint64_t first_inst_offset = 0;
132 
133   if (elf_getshdrstrndx(e, &stridx) != 0)
134     return -1;
135 
136   // Get the offset to the first instruction
137   while ((section = elf_nextscn(e, section)) != 0) {
138     GElf_Shdr header;
139 
140     if (!gelf_getshdr(section, &header))
141       continue;
142 
143     // The elf file section layout is based on increasing virtual address,
144     // getting the first section with SHF_EXECINSTR is enough.
145     if (header.sh_flags & SHF_EXECINSTR) {
146       first_inst_offset = header.sh_addr;
147       break;
148     }
149   }
150 
151   while ((section = elf_nextscn(e, section)) != 0) {
152     GElf_Shdr header;
153     char *name;
154 
155     if (!gelf_getshdr(section, &header))
156       continue;
157 
158     if (header.sh_type != SHT_NOTE)
159       continue;
160 
161     name = elf_strptr(e, stridx, header.sh_name);
162     if (name && !strcmp(name, ".note.stapsdt")) {
163       if (do_note_segment(section, elf_class, callback, binpath,
164                           first_inst_offset, payload) < 0)
165         return -1;
166     }
167   }
168 
169   return 0;
170 }
171 
bcc_elf_foreach_usdt(const char * path,bcc_elf_probecb callback,void * payload)172 int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback,
173                          void *payload) {
174   Elf *e;
175   int fd, res;
176 
177   if (openelf(path, &e, &fd) < 0)
178     return -1;
179 
180   res = listprobes(e, callback, path, payload);
181   elf_end(e);
182   close(fd);
183 
184   return res;
185 }
186 
get_section(Elf * e,const char * section_name,GElf_Shdr * section_hdr,size_t * section_idx)187 static Elf_Scn * get_section(Elf *e, const char *section_name,
188                              GElf_Shdr *section_hdr, size_t *section_idx) {
189   Elf_Scn *section = NULL;
190   GElf_Shdr header;
191   char *name;
192 
193   size_t stridx;
194   if (elf_getshdrstrndx(e, &stridx) != 0)
195     return NULL;
196 
197   size_t index;
198   for (index = 1; (section = elf_nextscn(e, section)) != 0; index++) {
199     if (!gelf_getshdr(section, &header))
200       continue;
201 
202     name = elf_strptr(e, stridx, header.sh_name);
203     if (name && !strcmp(name, section_name)) {
204       if (section_hdr)
205         *section_hdr = header;
206       if (section_idx)
207         *section_idx = index;
208       return section;
209     }
210   }
211 
212   return NULL;
213 }
214 
list_in_scn(Elf * e,Elf_Scn * section,size_t stridx,size_t symsize,struct bcc_symbol_option * option,bcc_elf_symcb callback,void * payload)215 static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
216                        struct bcc_symbol_option *option,
217                        bcc_elf_symcb callback, void *payload) {
218   Elf_Data *data = NULL;
219 
220 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
221   size_t opdidx = 0;
222   Elf_Scn *opdsec = NULL;
223   GElf_Shdr opdshdr = {};
224   Elf_Data *opddata = NULL;
225 
226   opdsec = get_section(e, ".opd", &opdshdr, &opdidx);
227   if (opdsec && opdshdr.sh_type == SHT_PROGBITS)
228     opddata = elf_getdata(opdsec, NULL);
229 #endif
230 
231   while ((data = elf_getdata(section, data)) != 0) {
232     size_t i, symcount = data->d_size / symsize;
233 
234     if (data->d_size % symsize)
235       return -1;
236 
237     for (i = 0; i < symcount; ++i) {
238       GElf_Sym sym;
239       const char *name;
240 
241       if (!gelf_getsym(data, (int)i, &sym))
242         continue;
243 
244       if ((name = elf_strptr(e, stridx, sym.st_name)) == NULL)
245         continue;
246       if (name[0] == 0)
247         continue;
248 
249       if (sym.st_value == 0)
250         continue;
251 
252       uint32_t st_type = ELF_ST_TYPE(sym.st_info);
253       if (!(option->use_symbol_type & (1 << st_type)))
254         continue;
255 
256 #ifdef __powerpc64__
257 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
258       if (opddata && sym.st_shndx == opdidx) {
259         size_t offset = sym.st_value - opdshdr.sh_addr;
260         /* Find the function descriptor */
261         uint64_t *descr = opddata->d_buf + offset;
262         /* Read the actual entry point address from the descriptor */
263         sym.st_value = *descr;
264       }
265 #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
266       if (option->use_symbol_type & (1 << STT_PPC64LE_SYM_LEP)) {
267         /*
268          * The PowerPC 64-bit ELF v2 ABI says that the 3 most significant bits
269          * in the st_other field of the symbol table specifies the number of
270          * instructions between a function's Global Entry Point (GEP) and Local
271          * Entry Point (LEP).
272          */
273         switch (sym.st_other >> 5) {
274           /* GEP and LEP are the same for 0 or 1, usage is reserved for 7 */
275           /* If 2, LEP is 1 instruction past the GEP */
276           case 2: sym.st_value += 4; break;
277           /* If 3, LEP is 2 instructions past the GEP */
278           case 3: sym.st_value += 8; break;
279           /* If 4, LEP is 4 instructions past the GEP */
280           case 4: sym.st_value += 16; break;
281           /* If 5, LEP is 8 instructions past the GEP */
282           case 5: sym.st_value += 32; break;
283           /* If 6, LEP is 16 instructions past the GEP */
284           case 6: sym.st_value += 64; break;
285         }
286       }
287 #endif
288 #endif
289 
290       if (callback(name, sym.st_value, sym.st_size, payload) < 0)
291         return 1;      // signal termination to caller
292     }
293   }
294 
295   return 0;
296 }
297 
listsymbols(Elf * e,bcc_elf_symcb callback,void * payload,struct bcc_symbol_option * option)298 static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload,
299                        struct bcc_symbol_option *option) {
300   Elf_Scn *section = NULL;
301 
302   while ((section = elf_nextscn(e, section)) != 0) {
303     GElf_Shdr header;
304 
305     if (!gelf_getshdr(section, &header))
306       continue;
307 
308     if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
309       continue;
310 
311     int rc = list_in_scn(e, section, header.sh_link, header.sh_entsize,
312                          option, callback, payload);
313     if (rc == 1)
314       break;    // callback signaled termination
315 
316     if (rc < 0)
317       return rc;
318   }
319 
320   return 0;
321 }
322 
get_section_elf_data(Elf * e,const char * section_name)323 static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
324   Elf_Scn *section = get_section(e, section_name, NULL, NULL);
325   if (section)
326     return elf_getdata(section, NULL);
327   return NULL;
328 }
329 
find_debuglink(Elf * e,char ** debug_file,unsigned int * crc)330 static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
331   Elf_Data *data = NULL;
332 
333   *debug_file = NULL;
334   *crc = 0;
335 
336   data = get_section_elf_data(e, ".gnu_debuglink");
337   if (!data || data->d_size <= 5)
338     return 0;
339 
340   *debug_file = (char *)data->d_buf;
341   *crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);
342 
343   return *debug_file ? 1 : 0;
344 }
345 
find_buildid(Elf * e,char * buildid)346 static int find_buildid(Elf *e, char *buildid) {
347   Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
348   if (!data || data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
349     return 0;
350 
351   char *buf = (char *)data->d_buf + 16;
352   size_t length = data->d_size - 16;
353   size_t i = 0;
354   for (i = 0; i < length; ++i) {
355     sprintf(buildid + (i * 2), "%02hhx", buf[i]);
356   }
357 
358   return 1;
359 }
360 
361 // The CRC algorithm used by GNU debuglink. Taken from:
362 //    https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
gnu_debuglink_crc32(unsigned int crc,char * buf,size_t len)363 static unsigned int gnu_debuglink_crc32(unsigned int crc,
364                                         char *buf, size_t len) {
365   static const unsigned int crc32_table[256] =
366   {
367     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
368     0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
369     0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
370     0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
371     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
372     0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
373     0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
374     0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
375     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
376     0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
377     0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
378     0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
379     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
380     0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
381     0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
382     0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
383     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
384     0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
385     0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
386     0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
387     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
388     0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
389     0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
390     0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
391     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
392     0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
393     0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
394     0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
395     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
396     0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
397     0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
398     0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
399     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
400     0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
401     0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
402     0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
403     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
404     0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
405     0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
406     0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
407     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
408     0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
409     0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
410     0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
411     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
412     0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
413     0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
414     0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
415     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
416     0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
417     0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
418     0x2d02ef8d
419   };
420   char *end;
421 
422   crc = ~crc & 0xffffffff;
423   for (end = buf + len; buf < end; ++buf)
424     crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
425   return ~crc & 0xffffffff;
426 }
427 
verify_checksum(const char * file,unsigned int crc)428 static int verify_checksum(const char *file, unsigned int crc) {
429   struct stat st;
430   int fd;
431   void *buf;
432   unsigned int actual;
433 
434   fd = open(file, O_RDONLY);
435   if (fd < 0)
436     return 0;
437 
438   if (fstat(fd, &st) < 0) {
439     close(fd);
440     return 0;
441   }
442 
443   buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
444   if (!buf) {
445     close(fd);
446     return 0;
447   }
448 
449   actual = gnu_debuglink_crc32(0, buf, st.st_size);
450 
451   munmap(buf, st.st_size);
452   close(fd);
453   return actual == crc;
454 }
455 
find_debug_via_debuglink(Elf * e,const char * binpath,int check_crc)456 static char *find_debug_via_debuglink(Elf *e, const char *binpath,
457                                       int check_crc) {
458   char fullpath[PATH_MAX];
459   char *tmppath;
460   char *bindir = NULL;
461   char *res = NULL;
462   unsigned int crc;
463   char *name;  // the name of the debuginfo file
464 
465   if (!find_debuglink(e, &name, &crc))
466     return NULL;
467 
468   tmppath = strdup(binpath);
469   bindir = dirname(tmppath);
470 
471   // Search for the file in 'binpath', but ignore the file we find if it
472   // matches the binary itself: the binary will always be probed later on,
473   // and it might contain poorer symbols (e.g. stripped or partial symbols)
474   // than the external debuginfo that might be available elsewhere.
475   snprintf(fullpath, sizeof(fullpath),"%s/%s", bindir, name);
476   if (strcmp(fullpath, binpath) != 0 && access(fullpath, F_OK) != -1) {
477     res = strdup(fullpath);
478     goto DONE;
479   }
480 
481   // Search for the file in 'binpath'/.debug
482   snprintf(fullpath, sizeof(fullpath), "%s/.debug/%s", bindir, name);
483   if (access(fullpath, F_OK) != -1) {
484     res = strdup(fullpath);
485     goto DONE;
486   }
487 
488   // Search for the file in the global debug directory /usr/lib/debug/'binpath'
489   snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug%s/%s", bindir, name);
490   if (access(fullpath, F_OK) != -1) {
491     res = strdup(fullpath);
492     goto DONE;
493   }
494 
495 DONE:
496   free(tmppath);
497   if (res && check_crc && !verify_checksum(res, crc)) {
498     free(res);
499     return NULL;
500   }
501   return res;
502 }
503 
find_debug_via_buildid(Elf * e)504 static char *find_debug_via_buildid(Elf *e) {
505   char fullpath[PATH_MAX];
506   char buildid[128];  // currently 40 seems to be default, let's be safe
507 
508   if (!find_buildid(e, buildid))
509     return NULL;
510 
511   // Search for the file in the global debug directory with a sub-path:
512   //    mm/nnnnnn...nnnn.debug
513   // Where mm are the first two characters of the buildid, and nnnn are the
514   // rest of the build id, followed by .debug.
515   snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug/.build-id/%c%c/%s.debug",
516           buildid[0], buildid[1], buildid + 2);
517   if (access(fullpath, F_OK) != -1) {
518     return strdup(fullpath);
519   }
520 
521   return NULL;
522 }
523 
foreach_sym_core(const char * path,bcc_elf_symcb callback,struct bcc_symbol_option * option,void * payload,int is_debug_file)524 static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
525                             struct bcc_symbol_option *option, void *payload,
526                             int is_debug_file) {
527   Elf *e;
528   int fd, res;
529   char *debug_file;
530 
531   if (!option)
532     return -1;
533 
534   if (openelf(path, &e, &fd) < 0)
535     return -1;
536 
537   // If there is a separate debuginfo file, try to locate and read it, first
538   // using the build-id section, then using the debuglink section. These are
539   // also the rules that GDB folows.
540   // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
541   if (option->use_debug_file && !is_debug_file) {
542     // The is_debug_file argument helps avoid infinitely resolving debuginfo
543     // files for debuginfo files and so on.
544     debug_file = find_debug_via_buildid(e);
545     if (!debug_file)
546       debug_file = find_debug_via_debuglink(e, path,
547                                             option->check_debug_file_crc);
548     if (debug_file) {
549       foreach_sym_core(debug_file, callback, option, payload, 1);
550       free(debug_file);
551     }
552   }
553 
554   res = listsymbols(e, callback, payload, option);
555   elf_end(e);
556   close(fd);
557   return res;
558 }
559 
bcc_elf_foreach_sym(const char * path,bcc_elf_symcb callback,void * option,void * payload)560 int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
561                         void *option, void *payload) {
562   return foreach_sym_core(
563       path, callback, (struct bcc_symbol_option*)option, payload, 0);
564 }
565 
bcc_elf_get_text_scn_info(const char * path,uint64_t * addr,uint64_t * offset)566 int bcc_elf_get_text_scn_info(const char *path, uint64_t *addr,
567 				   uint64_t *offset) {
568   Elf *e = NULL;
569   int fd = -1, err;
570   Elf_Scn *section = NULL;
571   GElf_Shdr header;
572   size_t stridx;
573   char *name;
574 
575   if ((err = openelf(path, &e, &fd)) < 0 ||
576       (err = elf_getshdrstrndx(e, &stridx)) < 0)
577     goto exit;
578 
579   err = -1;
580   while ((section = elf_nextscn(e, section)) != 0) {
581     if (!gelf_getshdr(section, &header))
582       continue;
583 
584     name = elf_strptr(e, stridx, header.sh_name);
585     if (name && !strcmp(name, ".text")) {
586       *addr = (uint64_t)header.sh_addr;
587       *offset = (uint64_t)header.sh_offset;
588       err = 0;
589       break;
590     }
591   }
592 
593 exit:
594   if (e)
595     elf_end(e);
596   if (fd >= 0)
597     close(fd);
598   return err;
599 }
600 
bcc_elf_foreach_load_section(const char * path,bcc_elf_load_sectioncb callback,void * payload)601 int bcc_elf_foreach_load_section(const char *path,
602                                  bcc_elf_load_sectioncb callback,
603                                  void *payload) {
604   Elf *e = NULL;
605   int fd = -1, err = -1, res;
606   size_t nhdrs, i;
607 
608   if (openelf(path, &e, &fd) < 0)
609     goto exit;
610 
611   if (elf_getphdrnum(e, &nhdrs) != 0)
612     goto exit;
613 
614   GElf_Phdr header;
615   for (i = 0; i < nhdrs; i++) {
616     if (!gelf_getphdr(e, (int)i, &header))
617       continue;
618     if (header.p_type != PT_LOAD || !(header.p_flags & PF_X))
619       continue;
620     res = callback(header.p_vaddr, header.p_memsz, header.p_offset, payload);
621     if (res < 0) {
622       err = 1;
623       goto exit;
624     }
625   }
626   err = 0;
627 
628 exit:
629   if (e)
630     elf_end(e);
631   if (fd >= 0)
632     close(fd);
633   return err;
634 }
635 
bcc_elf_get_type(const char * path)636 int bcc_elf_get_type(const char *path) {
637   Elf *e;
638   GElf_Ehdr hdr;
639   int fd;
640   void* res = NULL;
641 
642   if (openelf(path, &e, &fd) < 0)
643     return -1;
644 
645   res = (void*)gelf_getehdr(e, &hdr);
646   elf_end(e);
647   close(fd);
648 
649   if (!res)
650     return -1;
651   else
652     return hdr.e_type;
653 }
654 
bcc_elf_is_exe(const char * path)655 int bcc_elf_is_exe(const char *path) {
656   return (bcc_elf_get_type(path) != -1) && (access(path, X_OK) == 0);
657 }
658 
bcc_elf_is_shared_obj(const char * path)659 int bcc_elf_is_shared_obj(const char *path) {
660   return bcc_elf_get_type(path) == ET_DYN;
661 }
662 
bcc_elf_is_vdso(const char * name)663 int bcc_elf_is_vdso(const char *name) {
664   return strcmp(name, "[vdso]") == 0;
665 }
666 
667 // -2: Failed
668 // -1: Not initialized
669 // >0: Initialized
670 static int vdso_image_fd = -1;
671 
find_vdso(const char * name,uint64_t st,uint64_t en,uint64_t offset,bool enter_ns,void * payload)672 static int find_vdso(const char *name, uint64_t st, uint64_t en,
673                      uint64_t offset, bool enter_ns, void *payload) {
674   int fd;
675   char tmpfile[128];
676   if (!bcc_elf_is_vdso(name))
677     return 0;
678 
679   void *image = malloc(en - st);
680   if (!image)
681     goto on_error;
682   memcpy(image, (void *)st, en - st);
683 
684   snprintf(tmpfile, sizeof(tmpfile), "/tmp/bcc_%d_vdso_image_XXXXXX", getpid());
685   fd = mkostemp(tmpfile, O_CLOEXEC);
686   if (fd < 0) {
687     fprintf(stderr, "Unable to create temp file: %s\n", strerror(errno));
688     goto on_error;
689   }
690   // Unlink the file to avoid leaking
691   if (unlink(tmpfile) == -1)
692     fprintf(stderr, "Unlink %s failed: %s\n", tmpfile, strerror(errno));
693 
694   if (write(fd, image, en - st) == -1) {
695     fprintf(stderr, "Failed to write to vDSO image: %s\n", strerror(errno));
696     close(fd);
697     goto on_error;
698   }
699   vdso_image_fd = fd;
700 
701 on_error:
702   if (image)
703     free(image);
704   // Always stop the iteration
705   return -1;
706 }
707 
bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback,void * payload)708 int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
709   Elf *elf;
710   static struct bcc_symbol_option default_option = {
711     .use_debug_file = 0,
712     .check_debug_file_crc = 0,
713     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
714   };
715 
716   if (vdso_image_fd == -1) {
717     vdso_image_fd = -2;
718     bcc_procutils_each_module(getpid(), &find_vdso, NULL);
719   }
720   if (vdso_image_fd == -2)
721     return -1;
722 
723   if (openelf_fd(vdso_image_fd, &elf) == -1)
724     return -1;
725 
726   return listsymbols(elf, callback, payload, &default_option);
727 }
728 
729 // return value: 0   : success
730 //               < 0 : error and no bcc lib found
731 //               > 0 : error and bcc lib found
bcc_free_memory_with_file(const char * path)732 static int bcc_free_memory_with_file(const char *path) {
733   unsigned long sym_addr = 0, sym_shndx;
734   Elf_Scn *section = NULL;
735   int fd = -1, err;
736   GElf_Shdr header;
737   Elf *e = NULL;
738 
739   if ((err = openelf(path, &e, &fd)) < 0)
740     goto exit;
741 
742   // get symbol address of "bcc_free_memory", which
743   // will be used to calculate runtime .text address
744   // range, esp. for shared libraries.
745   err = -1;
746   while ((section = elf_nextscn(e, section)) != 0) {
747     Elf_Data *data = NULL;
748     size_t symsize;
749 
750     if (!gelf_getshdr(section, &header))
751       continue;
752 
753     if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
754       continue;
755 
756     /* iterate all symbols */
757     symsize = header.sh_entsize;
758     while ((data = elf_getdata(section, data)) != 0) {
759       size_t i, symcount = data->d_size / symsize;
760 
761       for (i = 0; i < symcount; ++i) {
762         GElf_Sym sym;
763 
764         if (!gelf_getsym(data, (int)i, &sym))
765           continue;
766 
767         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
768           continue;
769 
770         const char *name;
771         if ((name = elf_strptr(e, header.sh_link, sym.st_name)) == NULL)
772           continue;
773 
774         if (strcmp(name, "bcc_free_memory") == 0) {
775           sym_addr = sym.st_value;
776           sym_shndx = sym.st_shndx;
777           break;
778         }
779       }
780     }
781   }
782 
783   // Didn't find bcc_free_memory in the ELF file.
784   if (sym_addr == 0)
785     goto exit;
786 
787   int sh_idx = 0;
788   section = NULL;
789   err = 1;
790   while ((section = elf_nextscn(e, section)) != 0) {
791     sh_idx++;
792     if (!gelf_getshdr(section, &header))
793       continue;
794 
795     if (sh_idx == sym_shndx) {
796       unsigned long saddr, saddr_n, eaddr;
797       long page_size = sysconf(_SC_PAGESIZE);
798 
799       saddr = (unsigned long)bcc_free_memory - sym_addr + header.sh_addr;
800       eaddr = saddr + header.sh_size;
801 
802       // adjust saddr and eaddr, start addr needs to be page aligned
803       saddr_n = (saddr + page_size - 1) & ~(page_size - 1);
804       eaddr -= saddr_n - saddr;
805 
806       if (madvise((void *)saddr_n, eaddr - saddr_n, MADV_DONTNEED)) {
807         fprintf(stderr, "madvise failed, saddr %lx, eaddr %lx\n", saddr, eaddr);
808         goto exit;
809       }
810 
811       err = 0;
812       break;
813     }
814   }
815 
816 exit:
817   if (e)
818     elf_end(e);
819   if (fd >= 0)
820     close(fd);
821   return err;
822 }
823 
824 // Free bcc mmemory
825 //
826 // The main purpose of this function is to free llvm/clang text memory
827 // through madvise MADV_DONTNEED.
828 //
829 // bcc could be linked statically or dynamically into the application.
830 // If it is static linking, there is no easy way to know which region
831 // inside .text section belongs to llvm/clang, so the whole .text section
832 // is freed. Otherwise, the process map is searched to find libbcc.so
833 // library and the whole .text section for that shared library is
834 // freed.
835 //
836 // Note that the text memory used by bcc (mainly llvm/clang) is reclaimable
837 // in the kernel as it is file backed. But the reclaim process
838 // may take some time if no memory pressure. So this API is mostly
839 // used for application who needs to immediately lowers its RssFile
840 // metric right after loading BPF program.
bcc_free_memory()841 int bcc_free_memory() {
842   int err;
843 
844   // First try whether bcc is statically linked or not
845   err = bcc_free_memory_with_file("/proc/self/exe");
846   if (err >= 0)
847     return -err;
848 
849   // Not statically linked, let us find the libbcc.so
850   FILE *maps = fopen("/proc/self/maps", "r");
851   if (!maps)
852     return -1;
853 
854   char *line = NULL;
855   size_t size;
856   while (getline(&line, &size, maps) > 0) {
857     char *libbcc = strstr(line, "libbcc.so");
858     if (!libbcc)
859       continue;
860 
861     // Parse the line and get the full libbcc.so path
862     unsigned long addr_start, addr_end, offset, inode;
863     int path_start = 0, path_end = 0;
864     unsigned int devmajor, devminor;
865     char perms[8];
866     if (sscanf(line, "%lx-%lx %7s %lx %u:%u %lu %n%*[^\n]%n",
867                &addr_start, &addr_end, perms, &offset,
868                &devmajor, &devminor, &inode,
869                &path_start, &path_end) < 7)
870        break;
871 
872     // Free the text in the bcc dynamic library.
873     char libbcc_path[4096];
874     memcpy(libbcc_path, line + path_start, path_end - path_start);
875     libbcc_path[path_end - path_start] = '\0';
876     err = bcc_free_memory_with_file(libbcc_path);
877     err = (err <= 0) ? err : -err;
878   }
879 
880   fclose(maps);
881   free(line);
882   return err;
883 }
884 
885 #if 0
886 #include <stdio.h>
887 
888 int main(int argc, char *argv[])
889 {
890   uint64_t addr;
891   if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
892     return -1;
893 
894   printf("%s: %p\n", argv[2], (void *)addr);
895   return 0;
896 }
897 #endif
898