1 /*
2  * Copyright (c) 2016 GitHub, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cxxabi.h>
18 #include <cstring>
19 #include <fcntl.h>
20 #include <linux/elf.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <unistd.h>
25 #include <cstdio>
26 
27 #include "bcc_elf.h"
28 #include "bcc_perf_map.h"
29 #include "bcc_proc.h"
30 #include "bcc_syms.h"
31 #include "common.h"
32 #include "vendor/tinyformat.hpp"
33 
34 #include "syms.h"
35 
getinode_()36 ino_t ProcStat::getinode_() {
37   struct stat s;
38   return (!stat(procfs_.c_str(), &s)) ? s.st_ino : -1;
39 }
40 
is_stale()41 bool ProcStat::is_stale() {
42   ino_t cur_inode = getinode_();
43   return (cur_inode > 0) && (cur_inode != inode_);
44 }
45 
ProcStat(int pid)46 ProcStat::ProcStat(int pid)
47     : procfs_(tfm::format("/proc/%d/exe", pid)), inode_(getinode_()) {}
48 
_add_symbol(const char * symname,uint64_t addr,void * p)49 void KSyms::_add_symbol(const char *symname, uint64_t addr, void *p) {
50   KSyms *ks = static_cast<KSyms *>(p);
51   ks->syms_.emplace_back(symname, addr);
52 }
53 
refresh()54 void KSyms::refresh() {
55   if (syms_.empty()) {
56     bcc_procutils_each_ksym(_add_symbol, this);
57     std::sort(syms_.begin(), syms_.end());
58   }
59 }
60 
resolve_addr(uint64_t addr,struct bcc_symbol * sym,bool demangle)61 bool KSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, bool demangle) {
62   refresh();
63 
64   std::vector<Symbol>::iterator it;
65 
66   if (syms_.empty())
67     goto unknown_symbol;
68 
69   it = std::upper_bound(syms_.begin(), syms_.end(), Symbol("", addr));
70   if (it != syms_.begin()) {
71     it--;
72     sym->name = (*it).name.c_str();
73     if (demangle)
74       sym->demangle_name = sym->name;
75     sym->module = "kernel";
76     sym->offset = addr - (*it).addr;
77     return true;
78   }
79 
80 unknown_symbol:
81   memset(sym, 0, sizeof(struct bcc_symbol));
82   return false;
83 }
84 
resolve_name(const char * _unused,const char * name,uint64_t * addr)85 bool KSyms::resolve_name(const char *_unused, const char *name,
86                          uint64_t *addr) {
87   refresh();
88 
89   if (syms_.size() != symnames_.size()) {
90     symnames_.clear();
91     for (Symbol &sym : syms_) {
92       symnames_[sym.name] = sym.addr;
93     }
94   }
95 
96   auto it = symnames_.find(name);
97   if (it == symnames_.end())
98     return false;
99 
100   *addr = it->second;
101   return true;
102 }
103 
ProcSyms(int pid,struct bcc_symbol_option * option)104 ProcSyms::ProcSyms(int pid, struct bcc_symbol_option *option)
105     : pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) {
106   if (option)
107     std::memcpy(&symbol_option_, option, sizeof(bcc_symbol_option));
108   else
109     symbol_option_ = {
110       .use_debug_file = 1,
111       .check_debug_file_crc = 1,
112       .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
113     };
114   load_modules();
115 }
116 
_add_load_sections(uint64_t v_addr,uint64_t mem_sz,uint64_t file_offset,void * payload)117 int ProcSyms::_add_load_sections(uint64_t v_addr, uint64_t mem_sz,
118                                  uint64_t file_offset, void *payload) {
119   auto module = static_cast<Module *>(payload);
120   module->ranges_.emplace_back(v_addr, v_addr + mem_sz, file_offset);
121   return 0;
122 }
123 
load_exe()124 void ProcSyms::load_exe() {
125   ProcMountNSGuard g(mount_ns_instance_.get());
126   std::string exe = ebpf::get_pid_exe(pid_);
127   Module module(exe.c_str(), mount_ns_instance_.get(), &symbol_option_);
128 
129   if (module.type_ != ModuleType::EXEC)
130     return;
131 
132 
133   bcc_elf_foreach_load_section(exe.c_str(), &_add_load_sections, &module);
134 
135   if (!module.ranges_.empty())
136     modules_.emplace_back(std::move(module));
137 }
138 
load_modules()139 void ProcSyms::load_modules() {
140   load_exe();
141   bcc_procutils_each_module(pid_, _add_module, this);
142 }
143 
refresh()144 void ProcSyms::refresh() {
145   modules_.clear();
146   mount_ns_instance_.reset(new ProcMountNS(pid_));
147   load_modules();
148   procstat_.reset();
149 }
150 
_add_module(const char * modname,uint64_t start,uint64_t end,uint64_t offset,bool check_mount_ns,void * payload)151 int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end,
152                           uint64_t offset, bool check_mount_ns, void *payload) {
153   ProcSyms *ps = static_cast<ProcSyms *>(payload);
154   auto it = std::find_if(
155       ps->modules_.begin(), ps->modules_.end(),
156       [=](const ProcSyms::Module &m) { return m.name_ == modname; });
157   if (it == ps->modules_.end()) {
158     auto module = Module(
159         modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr,
160         &ps->symbol_option_);
161 
162     // pid/maps doesn't account for file_offset of text within the ELF.
163     // It only gives the mmap offset. We need the real offset for symbol
164     // lookup.
165     if (module.type_ == ModuleType::SO) {
166       ProcMountNSGuard g(ps->mount_ns_instance_.get());
167       if (bcc_elf_get_text_scn_info(modname, &module.elf_so_addr_,
168                                     &module.elf_so_offset_) < 0) {
169         fprintf(stderr, "WARNING: Couldn't find .text section in %s\n", modname);
170         fprintf(stderr, "WARNING: BCC can't handle sym look ups for %s", modname);
171       }
172     }
173 
174     if (!bcc_is_perf_map(modname) || module.type_ != ModuleType::UNKNOWN)
175       // Always add the module even if we can't read it, so that we could
176       // report correct module name. Unless it's a perf map that we only
177       // add readable ones.
178       it = ps->modules_.insert(ps->modules_.end(), std::move(module));
179     else
180       return 0;
181   }
182   it->ranges_.emplace_back(start, end, offset);
183   // perf-PID map is added last. We try both inside the Process's mount
184   // namespace + chroot, and in global /tmp. Make sure we only add one.
185   if (it->type_ == ModuleType::PERF_MAP)
186     return -1;
187 
188   return 0;
189 }
190 
resolve_addr(uint64_t addr,struct bcc_symbol * sym,bool demangle)191 bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym,
192                             bool demangle) {
193   if (procstat_.is_stale())
194     refresh();
195 
196   memset(sym, 0, sizeof(struct bcc_symbol));
197 
198   const char *original_module = nullptr;
199   uint64_t offset;
200   bool only_perf_map = false;
201   for (Module &mod : modules_) {
202     if (only_perf_map && (mod.type_ != ModuleType::PERF_MAP))
203       continue;
204     if (mod.contains(addr, offset)) {
205       if (mod.find_addr(offset, sym)) {
206         if (demangle) {
207           if (sym->name && (!strncmp(sym->name, "_Z", 2) || !strncmp(sym->name, "___Z", 4)))
208             sym->demangle_name =
209                 abi::__cxa_demangle(sym->name, nullptr, nullptr, nullptr);
210           if (!sym->demangle_name)
211             sym->demangle_name = sym->name;
212         }
213         return true;
214       } else if (mod.type_ != ModuleType::PERF_MAP) {
215         // In this case, we found the address in the range of a module, but
216         // not able to find a symbol of that address in the module.
217         // Thus, we would try to find the address in perf map, and
218         // save the module's name in case we will need it later.
219         original_module = mod.name_.c_str();
220         only_perf_map = true;
221       }
222     }
223   }
224   // If we didn't find the symbol anywhere, the module name is probably
225   // set to be the perf map's name as it would be the last we tried.
226   // In this case, if we have found the address previously in a module,
227   // report the saved original module name instead.
228   if (original_module)
229     sym->module = original_module;
230   return false;
231 }
232 
resolve_name(const char * module,const char * name,uint64_t * addr)233 bool ProcSyms::resolve_name(const char *module, const char *name,
234                             uint64_t *addr) {
235   if (procstat_.is_stale())
236     refresh();
237 
238   for (Module &mod : modules_) {
239     if (mod.name_ == module)
240       return mod.find_name(name, addr);
241   }
242   return false;
243 }
244 
Module(const char * name,ProcMountNS * mount_ns,struct bcc_symbol_option * option)245 ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns,
246                          struct bcc_symbol_option *option)
247     : name_(name),
248       loaded_(false),
249       mount_ns_(mount_ns),
250       symbol_option_(option),
251       type_(ModuleType::UNKNOWN) {
252   ProcMountNSGuard g(mount_ns_);
253   int elf_type = bcc_elf_get_type(name_.c_str());
254   // The Module is an ELF file
255   if (elf_type >= 0) {
256     if (elf_type == ET_EXEC)
257       type_ = ModuleType::EXEC;
258     else if (elf_type == ET_DYN)
259       type_ = ModuleType::SO;
260     return;
261   }
262   // Other symbol files
263   if (bcc_is_valid_perf_map(name_.c_str()) == 1)
264     type_ = ModuleType::PERF_MAP;
265   else if (bcc_elf_is_vdso(name_.c_str()) == 1)
266     type_ = ModuleType::VDSO;
267 
268   // Will be stored later
269   elf_so_offset_ = 0;
270   elf_so_addr_ = 0;
271 }
272 
_add_symbol(const char * symname,uint64_t start,uint64_t size,void * p)273 int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
274                                   uint64_t size, void *p) {
275   Module *m = static_cast<Module *>(p);
276   auto res = m->symnames_.emplace(symname);
277   m->syms_.emplace_back(&*(res.first), start, size);
278   return 0;
279 }
280 
load_sym_table()281 void ProcSyms::Module::load_sym_table() {
282   if (loaded_)
283     return;
284   loaded_ = true;
285 
286   if (type_ == ModuleType::UNKNOWN)
287     return;
288 
289   ProcMountNSGuard g(mount_ns_);
290 
291   if (type_ == ModuleType::PERF_MAP)
292     bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this);
293   if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
294     bcc_elf_foreach_sym(name_.c_str(), _add_symbol, symbol_option_, this);
295   if (type_ == ModuleType::VDSO)
296     bcc_elf_foreach_vdso_sym(_add_symbol, this);
297 
298   std::sort(syms_.begin(), syms_.end());
299 }
300 
contains(uint64_t addr,uint64_t & offset) const301 bool ProcSyms::Module::contains(uint64_t addr, uint64_t &offset) const {
302   for (const auto &range : ranges_) {
303     if (addr >= range.start && addr < range.end) {
304       if (type_ == ModuleType::SO || type_ == ModuleType::VDSO) {
305         // Offset within the mmap
306         offset = addr - range.start + range.file_offset;
307 
308         // Offset within the ELF for SO symbol lookup
309         offset += (elf_so_addr_ - elf_so_offset_);
310       } else {
311         offset = addr;
312       }
313 
314       return true;
315     }
316   }
317 
318   return false;
319 }
320 
find_name(const char * symname,uint64_t * addr)321 bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
322   load_sym_table();
323 
324   for (Symbol &s : syms_) {
325     if (*(s.name) == symname) {
326       *addr = type_ == ModuleType::SO ? start() + s.start : s.start;
327       return true;
328     }
329   }
330   return false;
331 }
332 
find_addr(uint64_t offset,struct bcc_symbol * sym)333 bool ProcSyms::Module::find_addr(uint64_t offset, struct bcc_symbol *sym) {
334   load_sym_table();
335 
336   sym->module = name_.c_str();
337   sym->offset = offset;
338 
339   auto it = std::upper_bound(syms_.begin(), syms_.end(), Symbol(nullptr, offset, 0));
340   if (it == syms_.begin())
341     return false;
342 
343   // 'it' points to the symbol whose start address is strictly greater than
344   // the address we're looking for. Start stepping backwards as long as the
345   // current symbol is still below the desired address, and see if the end
346   // of the current symbol (start + size) is above the desired address. Once
347   // we have a matching symbol, return it. Note that simply looking at '--it'
348   // is not enough, because symbols can be nested. For example, we could be
349   // looking for offset 0x12 with the following symbols available:
350   // SYMBOL   START   SIZE    END
351   // goo      0x0     0x6     0x0 + 0x6 = 0x6
352   // foo      0x6     0x10    0x6 + 0x10 = 0x16
353   // bar      0x8     0x4     0x8 + 0x4 = 0xc
354   // baz      0x16    0x10    0x16 + 0x10 = 0x26
355   // The upper_bound lookup will return baz, and then going one symbol back
356   // brings us to bar, which does not contain offset 0x12 and is nested inside
357   // foo. Going back one more symbol brings us to foo, which contains 0x12
358   // and is a match.
359   // However, we also don't want to walk through the entire symbol list for
360   // unknown / missing symbols. So we will break if we reach a function that
361   // doesn't cover the function immediately before 'it', which means it is
362   // not possibly a nested function containing the address we're looking for.
363   --it;
364   uint64_t limit = it->start;
365   for (; offset >= it->start; --it) {
366     if (offset < it->start + it->size) {
367       sym->name = it->name->c_str();
368       sym->offset = (offset - it->start);
369       return true;
370     }
371     if (limit > it->start + it->size)
372       break;
373     // But don't step beyond begin()!
374     if (it == syms_.begin())
375       break;
376   }
377 
378   return false;
379 }
380 
381 extern "C" {
382 
bcc_symcache_new(int pid,struct bcc_symbol_option * option)383 void *bcc_symcache_new(int pid, struct bcc_symbol_option *option) {
384   if (pid < 0)
385     return static_cast<void *>(new KSyms());
386   return static_cast<void *>(new ProcSyms(pid, option));
387 }
388 
bcc_free_symcache(void * symcache,int pid)389 void bcc_free_symcache(void *symcache, int pid) {
390   if (pid < 0)
391     delete static_cast<KSyms*>(symcache);
392   else
393     delete static_cast<ProcSyms*>(symcache);
394 }
395 
bcc_symbol_free_demangle_name(struct bcc_symbol * sym)396 void bcc_symbol_free_demangle_name(struct bcc_symbol *sym) {
397   if (sym->demangle_name && (sym->demangle_name != sym->name))
398     free(const_cast<char*>(sym->demangle_name));
399 }
400 
bcc_symcache_resolve(void * resolver,uint64_t addr,struct bcc_symbol * sym)401 int bcc_symcache_resolve(void *resolver, uint64_t addr,
402                          struct bcc_symbol *sym) {
403   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
404   return cache->resolve_addr(addr, sym) ? 0 : -1;
405 }
406 
bcc_symcache_resolve_no_demangle(void * resolver,uint64_t addr,struct bcc_symbol * sym)407 int bcc_symcache_resolve_no_demangle(void *resolver, uint64_t addr,
408                                      struct bcc_symbol *sym) {
409   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
410   return cache->resolve_addr(addr, sym, false) ? 0 : -1;
411 }
412 
bcc_symcache_resolve_name(void * resolver,const char * module,const char * name,uint64_t * addr)413 int bcc_symcache_resolve_name(void *resolver, const char *module,
414                               const char *name, uint64_t *addr) {
415   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
416   return cache->resolve_name(module, name, addr) ? 0 : -1;
417 }
418 
bcc_symcache_refresh(void * resolver)419 void bcc_symcache_refresh(void *resolver) {
420   SymbolCache *cache = static_cast<SymbolCache *>(resolver);
421   cache->refresh();
422 }
423 
424 struct mod_st {
425   const char *name;
426   uint64_t start;
427   uint64_t file_offset;
428 };
429 
_find_module(const char * modname,uint64_t start,uint64_t end,uint64_t offset,bool,void * p)430 static int _find_module(const char *modname, uint64_t start, uint64_t end,
431                         uint64_t offset, bool, void *p) {
432   struct mod_st *mod = (struct mod_st *)p;
433   if (!strcmp(modname, mod->name)) {
434     mod->start = start;
435     mod->file_offset = offset;
436     return -1;
437   }
438   return 0;
439 }
440 
bcc_resolve_global_addr(int pid,const char * module,const uint64_t address,uint64_t * global)441 int bcc_resolve_global_addr(int pid, const char *module, const uint64_t address,
442                             uint64_t *global) {
443   struct mod_st mod = {module, 0x0};
444   if (bcc_procutils_each_module(pid, _find_module, &mod) < 0 ||
445       mod.start == 0x0)
446     return -1;
447 
448   *global = mod.start - mod.file_offset + address;
449   return 0;
450 }
451 
_sym_cb_wrapper(const char * symname,uint64_t addr,uint64_t,void * payload)452 static int _sym_cb_wrapper(const char *symname, uint64_t addr, uint64_t,
453                            void *payload) {
454   SYM_CB cb = (SYM_CB) payload;
455   return cb(symname, addr);
456 }
457 
bcc_foreach_function_symbol(const char * module,SYM_CB cb)458 int bcc_foreach_function_symbol(const char *module, SYM_CB cb) {
459   if (module == 0 || cb == 0)
460     return -1;
461 
462   static struct bcc_symbol_option default_option = {
463     .use_debug_file = 1,
464     .check_debug_file_crc = 1,
465     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
466   };
467 
468   return bcc_elf_foreach_sym(
469       module, _sym_cb_wrapper, &default_option, (void *)cb);
470 }
471 
_find_sym(const char * symname,uint64_t addr,uint64_t,void * payload)472 static int _find_sym(const char *symname, uint64_t addr, uint64_t,
473                      void *payload) {
474   struct bcc_symbol *sym = (struct bcc_symbol *)payload;
475   if (!strcmp(sym->name, symname)) {
476     sym->offset = addr;
477     return -1;
478   }
479   return 0;
480 }
481 
482 struct load_addr_t {
483   uint64_t target_addr;
484   uint64_t binary_addr;
485 };
_find_load(uint64_t v_addr,uint64_t mem_sz,uint64_t file_offset,void * payload)486 int _find_load(uint64_t v_addr, uint64_t mem_sz, uint64_t file_offset,
487                        void *payload) {
488   struct load_addr_t *addr = static_cast<load_addr_t *>(payload);
489   if (addr->target_addr >= v_addr && addr->target_addr < (v_addr + mem_sz)) {
490     addr->binary_addr = addr->target_addr - v_addr + file_offset;
491     return -1;
492   }
493   return 0;
494 }
495 
bcc_resolve_symname(const char * module,const char * symname,const uint64_t addr,int pid,struct bcc_symbol_option * option,struct bcc_symbol * sym)496 int bcc_resolve_symname(const char *module, const char *symname,
497                         const uint64_t addr, int pid,
498                         struct bcc_symbol_option *option,
499                         struct bcc_symbol *sym) {
500   static struct bcc_symbol_option default_option = {
501     .use_debug_file = 1,
502     .check_debug_file_crc = 1,
503 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
504     .use_symbol_type = BCC_SYM_ALL_TYPES | (1 << STT_PPC64LE_SYM_LEP),
505 #else
506     .use_symbol_type = BCC_SYM_ALL_TYPES,
507 #endif
508   };
509 
510   if (module == NULL)
511     return -1;
512 
513   memset(sym, 0, sizeof(bcc_symbol));
514 
515   if (strchr(module, '/')) {
516     sym->module = strdup(module);
517   } else {
518     sym->module = bcc_procutils_which_so(module, pid);
519   }
520   if (sym->module == NULL)
521     return -1;
522 
523   ProcMountNSGuard g(pid);
524 
525   sym->name = symname;
526   sym->offset = addr;
527   if (option == NULL)
528     option = &default_option;
529 
530   if (sym->name && sym->offset == 0x0)
531     if (bcc_elf_foreach_sym(sym->module, _find_sym, option, sym) < 0)
532       goto invalid_module;
533   if (sym->offset == 0x0)
534     goto invalid_module;
535 
536   // For executable (ET_EXEC) binaries, translate the virtual address
537   // to physical address in the binary file.
538   // For shared object binaries (ET_DYN), the address from symbol table should
539   // already be physical address in the binary file.
540   if (bcc_elf_get_type(sym->module) == ET_EXEC) {
541     struct load_addr_t addr = {
542       .target_addr = sym->offset,
543       .binary_addr = 0x0,
544     };
545     if (bcc_elf_foreach_load_section(sym->module, &_find_load, &addr) < 0)
546       goto invalid_module;
547     if (!addr.binary_addr)
548       goto invalid_module;
549     sym->offset = addr.binary_addr;
550   }
551   return 0;
552 
553 invalid_module:
554   if (sym->module) {
555     ::free(const_cast<char*>(sym->module));
556     sym->module = NULL;
557   }
558   return -1;
559 }
560 }
561