1 /*
2  * Copyright (c) 2016 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <linux/elf.h>
19 #include <linux/perf_event.h>
20 #include <sys/epoll.h>
21 #include <unistd.h>
22 #include <cerrno>
23 #include <cinttypes>
24 #include <cstdint>
25 #include <cstring>
26 #include <iostream>
27 #include <memory>
28 
29 #include "BPFTable.h"
30 
31 #include "bcc_exception.h"
32 #include "bcc_syms.h"
33 #include "common.h"
34 #include "file_desc.h"
35 #include "libbpf.h"
36 #include "perf_reader.h"
37 
38 namespace ebpf {
39 
BPFTable(const TableDesc & desc)40 BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {}
41 
get_value(const std::string & key_str,std::string & value_str)42 StatusTuple BPFTable::get_value(const std::string& key_str,
43                                 std::string& value_str) {
44   char key[desc.key_size];
45   char value[desc.leaf_size];
46 
47   StatusTuple r(0);
48 
49   r = string_to_key(key_str, key);
50   if (r.code() != 0)
51     return r;
52 
53   if (!lookup(key, value))
54     return StatusTuple(-1, "error getting value");
55 
56   return leaf_to_string(value, value_str);
57 }
58 
get_value(const std::string & key_str,std::vector<std::string> & value_str)59 StatusTuple BPFTable::get_value(const std::string& key_str,
60                                 std::vector<std::string>& value_str) {
61   size_t ncpus = get_possible_cpus().size();
62   char key[desc.key_size];
63   char value[desc.leaf_size * ncpus];
64 
65   StatusTuple r(0);
66 
67   r = string_to_key(key_str, key);
68   if (r.code() != 0)
69     return r;
70 
71   if (!lookup(key, value))
72     return StatusTuple(-1, "error getting value");
73 
74   value_str.resize(ncpus);
75 
76   for (size_t i = 0; i < ncpus; i++) {
77     r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i));
78     if (r.code() != 0)
79       return r;
80   }
81   return StatusTuple(0);
82 }
83 
update_value(const std::string & key_str,const std::string & value_str)84 StatusTuple BPFTable::update_value(const std::string& key_str,
85                                    const std::string& value_str) {
86   char key[desc.key_size];
87   char value[desc.leaf_size];
88 
89   StatusTuple r(0);
90 
91   r = string_to_key(key_str, key);
92   if (r.code() != 0)
93     return r;
94 
95   r = string_to_leaf(value_str, value);
96   if (r.code() != 0)
97     return r;
98 
99   if (!update(key, value))
100     return StatusTuple(-1, "error updating element");
101 
102   return StatusTuple(0);
103 }
104 
update_value(const std::string & key_str,const std::vector<std::string> & value_str)105 StatusTuple BPFTable::update_value(const std::string& key_str,
106                                    const std::vector<std::string>& value_str) {
107   size_t ncpus = get_possible_cpus().size();
108   char key[desc.key_size];
109   char value[desc.leaf_size * ncpus];
110 
111   StatusTuple r(0);
112 
113   r = string_to_key(key_str, key);
114   if (r.code() != 0)
115     return r;
116 
117   if (value_str.size() != ncpus)
118     return StatusTuple(-1, "bad value size");
119 
120   for (size_t i = 0; i < ncpus; i++) {
121     r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size);
122     if (r.code() != 0)
123       return r;
124   }
125 
126   if (!update(key, value))
127     return StatusTuple(-1, "error updating element");
128 
129   return StatusTuple(0);
130 }
131 
remove_value(const std::string & key_str)132 StatusTuple BPFTable::remove_value(const std::string& key_str) {
133   char key[desc.key_size];
134 
135   StatusTuple r(0);
136 
137   r = string_to_key(key_str, key);
138   if (r.code() != 0)
139     return r;
140 
141   if (!remove(key))
142     return StatusTuple(-1, "error removing element");
143 
144   return StatusTuple(0);
145 }
146 
clear_table_non_atomic()147 StatusTuple BPFTable::clear_table_non_atomic() {
148   if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
149       desc.type == BPF_MAP_TYPE_LRU_HASH ||
150       desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
151       desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
152     // For hash maps, use the first() interface (which uses get_next_key) to
153     // iterate through the map and clear elements
154     auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
155                                                         ::free);
156 
157     while (this->first(key.get()))
158       if (!this->remove(key.get())) {
159         return StatusTuple(-1,
160                            "Failed to delete element when clearing table %s",
161                            desc.name.c_str());
162       }
163   } else if (desc.type == BPF_MAP_TYPE_ARRAY ||
164              desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) {
165     return StatusTuple(-1, "Array map %s do not support clearing elements",
166                        desc.name.c_str());
167   } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
168              desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
169              desc.type == BPF_MAP_TYPE_STACK_TRACE ||
170              desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
171     // For Stack-trace and FD arrays, just iterate over all indices
172     for (size_t i = 0; i < desc.max_entries; i++) {
173       this->remove(&i);
174     }
175   } else {
176     return StatusTuple(-1, "Clearing for map type of %s not supported yet",
177                        desc.name.c_str());
178   }
179 
180   return StatusTuple(0);
181 }
182 
get_table_offline(std::vector<std::pair<std::string,std::string>> & res)183 StatusTuple BPFTable::get_table_offline(
184   std::vector<std::pair<std::string, std::string>> &res) {
185   StatusTuple r(0);
186   int err;
187 
188   auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
189                                                       ::free);
190   auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size),
191                                                       ::free);
192   std::string key_str;
193   std::string value_str;
194 
195   if (desc.type == BPF_MAP_TYPE_ARRAY ||
196       desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
197       desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
198       desc.type == BPF_MAP_TYPE_PERCPU_ARRAY ||
199       desc.type == BPF_MAP_TYPE_CGROUP_ARRAY ||
200       desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
201       desc.type == BPF_MAP_TYPE_DEVMAP ||
202       desc.type == BPF_MAP_TYPE_CPUMAP ||
203       desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
204     // For arrays, just iterate over all indices
205     for (size_t i = 0; i < desc.max_entries; i++) {
206       err = bpf_lookup_elem(desc.fd, &i, value.get());
207       if (err < 0 && errno == ENOENT) {
208         // Element is not present, skip it
209         continue;
210       } else if (err < 0) {
211         // Other error, abort
212         return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno));
213       }
214 
215       r = key_to_string(&i, key_str);
216       if (r.code() != 0)
217         return r;
218 
219       r = leaf_to_string(value.get(), value_str);
220       if (r.code() != 0)
221         return r;
222       res.emplace_back(key_str, value_str);
223     }
224   } else {
225     res.clear();
226     // For other maps, try to use the first() and next() interfaces
227     if (!this->first(key.get()))
228       return StatusTuple(0);
229 
230     while (true) {
231       if (!this->lookup(key.get(), value.get()))
232         break;
233       r = key_to_string(key.get(), key_str);
234       if (r.code() != 0)
235         return r;
236 
237       r = leaf_to_string(value.get(), value_str);
238       if (r.code() != 0)
239         return r;
240       res.emplace_back(key_str, value_str);
241       if (!this->next(key.get(), key.get()))
242         break;
243     }
244   }
245 
246   return StatusTuple(0);
247 }
248 
get_possible_cpu_count()249 size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); }
250 
BPFStackTable(const TableDesc & desc,bool use_debug_file,bool check_debug_file_crc)251 BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file,
252                              bool check_debug_file_crc)
253     : BPFTableBase<int, stacktrace_t>(desc) {
254   if (desc.type != BPF_MAP_TYPE_STACK_TRACE)
255     throw std::invalid_argument("Table '" + desc.name +
256                                 "' is not a stack table");
257 
258   symbol_option_ = {.use_debug_file = use_debug_file,
259                     .check_debug_file_crc = check_debug_file_crc,
260                     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)};
261 }
262 
BPFStackTable(BPFStackTable && that)263 BPFStackTable::BPFStackTable(BPFStackTable&& that)
264     : BPFTableBase<int, stacktrace_t>(that.desc),
265       symbol_option_(std::move(that.symbol_option_)),
266       pid_sym_(std::move(that.pid_sym_)) {
267   that.pid_sym_.clear();
268 }
269 
~BPFStackTable()270 BPFStackTable::~BPFStackTable() {
271   for (auto it : pid_sym_)
272     bcc_free_symcache(it.second, it.first);
273 }
274 
clear_table_non_atomic()275 void BPFStackTable::clear_table_non_atomic() {
276   for (int i = 0; size_t(i) < capacity(); i++) {
277     remove(&i);
278   }
279 }
280 
get_stack_addr(int stack_id)281 std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) {
282   std::vector<uintptr_t> res;
283   stacktrace_t stack;
284   if (stack_id < 0)
285     return res;
286   if (!lookup(&stack_id, &stack))
287     return res;
288   for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++)
289     res.push_back(stack.ip[i]);
290   return res;
291 }
292 
get_stack_symbol(int stack_id,int pid)293 std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
294                                                          int pid) {
295   auto addresses = get_stack_addr(stack_id);
296   std::vector<std::string> res;
297   if (addresses.empty())
298     return res;
299   res.reserve(addresses.size());
300 
301   if (pid < 0)
302     pid = -1;
303   if (pid_sym_.find(pid) == pid_sym_.end())
304     pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_);
305   void* cache = pid_sym_[pid];
306 
307   bcc_symbol symbol;
308   for (auto addr : addresses)
309     if (bcc_symcache_resolve(cache, addr, &symbol) != 0)
310       res.emplace_back("[UNKNOWN]");
311     else {
312       res.push_back(symbol.demangle_name);
313       bcc_symbol_free_demangle_name(&symbol);
314     }
315 
316   return res;
317 }
318 
BPFPerfBuffer(const TableDesc & desc)319 BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
320     : BPFTableBase<int, int>(desc), epfd_(-1) {
321   if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
322     throw std::invalid_argument("Table '" + desc.name +
323                                 "' is not a perf buffer");
324 }
325 
open_on_cpu(perf_reader_raw_cb cb,perf_reader_lost_cb lost_cb,int cpu,void * cb_cookie,int page_cnt)326 StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
327                                        perf_reader_lost_cb lost_cb, int cpu,
328                                        void* cb_cookie, int page_cnt) {
329   if (cpu_readers_.find(cpu) != cpu_readers_.end())
330     return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
331 
332   auto reader = static_cast<perf_reader*>(
333       bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
334   if (reader == nullptr)
335     return StatusTuple(-1, "Unable to construct perf reader");
336 
337   int reader_fd = perf_reader_fd(reader);
338   if (!update(&cpu, &reader_fd)) {
339     perf_reader_free(static_cast<void*>(reader));
340     return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
341                        std::strerror(errno));
342   }
343 
344   struct epoll_event event = {};
345   event.events = EPOLLIN;
346   event.data.ptr = static_cast<void*>(reader);
347   if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) {
348     perf_reader_free(static_cast<void*>(reader));
349     return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s",
350                        std::strerror(errno));
351   }
352 
353   cpu_readers_[cpu] = reader;
354   return StatusTuple(0);
355 }
356 
open_all_cpu(perf_reader_raw_cb cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int page_cnt)357 StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
358                                         perf_reader_lost_cb lost_cb,
359                                         void* cb_cookie, int page_cnt) {
360   if (cpu_readers_.size() != 0 || epfd_ != -1)
361     return StatusTuple(-1, "Previously opened perf buffer not cleaned");
362 
363   std::vector<int> cpus = get_online_cpus();
364   ep_events_.reset(new epoll_event[cpus.size()]);
365   epfd_ = epoll_create1(EPOLL_CLOEXEC);
366 
367   for (int i : cpus) {
368     auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
369     if (res.code() != 0) {
370       TRY2(close_all_cpu());
371       return res;
372     }
373   }
374   return StatusTuple(0);
375 }
376 
close_on_cpu(int cpu)377 StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) {
378   auto it = cpu_readers_.find(cpu);
379   if (it == cpu_readers_.end())
380     return StatusTuple(0);
381   perf_reader_free(static_cast<void*>(it->second));
382   if (!remove(const_cast<int*>(&(it->first))))
383     return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first);
384   cpu_readers_.erase(it);
385   return StatusTuple(0);
386 }
387 
close_all_cpu()388 StatusTuple BPFPerfBuffer::close_all_cpu() {
389   std::string errors;
390   bool has_error = false;
391 
392   if (epfd_ >= 0) {
393     int close_res = close(epfd_);
394     epfd_ = -1;
395     ep_events_.reset();
396     if (close_res != 0) {
397       has_error = true;
398       errors += std::string(std::strerror(errno)) + "\n";
399     }
400   }
401 
402   std::vector<int> opened_cpus;
403   for (auto it : cpu_readers_)
404     opened_cpus.push_back(it.first);
405   for (int i : opened_cpus) {
406     auto res = close_on_cpu(i);
407     if (res.code() != 0) {
408       errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: ";
409       errors += res.msg() + "\n";
410       has_error = true;
411     }
412   }
413 
414   if (has_error)
415     return StatusTuple(-1, errors);
416   return StatusTuple(0);
417 }
418 
poll(int timeout_ms)419 int BPFPerfBuffer::poll(int timeout_ms) {
420   if (epfd_ < 0)
421     return -1;
422   int cnt =
423       epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms);
424   for (int i = 0; i < cnt; i++)
425     perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr));
426   return cnt;
427 }
428 
~BPFPerfBuffer()429 BPFPerfBuffer::~BPFPerfBuffer() {
430   auto res = close_all_cpu();
431   if (res.code() != 0)
432     std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
433               << std::endl;
434 }
435 
BPFPerfEventArray(const TableDesc & desc)436 BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc)
437     : BPFTableBase<int, int>(desc) {
438   if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
439     throw std::invalid_argument("Table '" + desc.name +
440                                 "' is not a perf event array");
441 }
442 
open_all_cpu(uint32_t type,uint64_t config)443 StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) {
444   if (cpu_fds_.size() != 0)
445     return StatusTuple(-1, "Previously opened perf event not cleaned");
446 
447   std::vector<int> cpus = get_online_cpus();
448 
449   for (int i : cpus) {
450     auto res = open_on_cpu(i, type, config);
451     if (res.code() != 0) {
452       TRY2(close_all_cpu());
453       return res;
454     }
455   }
456   return StatusTuple(0);
457 }
458 
close_all_cpu()459 StatusTuple BPFPerfEventArray::close_all_cpu() {
460   std::string errors;
461   bool has_error = false;
462 
463   std::vector<int> opened_cpus;
464   for (auto it : cpu_fds_)
465     opened_cpus.push_back(it.first);
466   for (int i : opened_cpus) {
467     auto res = close_on_cpu(i);
468     if (res.code() != 0) {
469       errors += "Failed to close CPU" + std::to_string(i) + " perf event: ";
470       errors += res.msg() + "\n";
471       has_error = true;
472     }
473   }
474 
475   if (has_error)
476     return StatusTuple(-1, errors);
477   return StatusTuple(0);
478 }
479 
open_on_cpu(int cpu,uint32_t type,uint64_t config)480 StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type,
481                                            uint64_t config) {
482   if (cpu_fds_.find(cpu) != cpu_fds_.end())
483     return StatusTuple(-1, "Perf event already open on CPU %d", cpu);
484   int fd = bpf_open_perf_event(type, config, -1, cpu);
485   if (fd < 0) {
486     return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64,
487                        type, config);
488   }
489   if (!update(&cpu, &fd)) {
490     bpf_close_perf_event_fd(fd);
491     return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu,
492                        std::strerror(errno));
493   }
494   cpu_fds_[cpu] = fd;
495   return StatusTuple(0);
496 }
497 
close_on_cpu(int cpu)498 StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) {
499   auto it = cpu_fds_.find(cpu);
500   if (it == cpu_fds_.end()) {
501     return StatusTuple(0);
502   }
503   bpf_close_perf_event_fd(it->second);
504   cpu_fds_.erase(it);
505   return StatusTuple(0);
506 }
507 
~BPFPerfEventArray()508 BPFPerfEventArray::~BPFPerfEventArray() {
509   auto res = close_all_cpu();
510   if (res.code() != 0) {
511     std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
512               << std::endl;
513   }
514 }
515 
BPFProgTable(const TableDesc & desc)516 BPFProgTable::BPFProgTable(const TableDesc& desc)
517     : BPFTableBase<int, int>(desc) {
518   if (desc.type != BPF_MAP_TYPE_PROG_ARRAY)
519     throw std::invalid_argument("Table '" + desc.name +
520                                 "' is not a prog table");
521 }
522 
update_value(const int & index,const int & prog_fd)523 StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) {
524   if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd)))
525     return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
526   return StatusTuple(0);
527 }
528 
remove_value(const int & index)529 StatusTuple BPFProgTable::remove_value(const int& index) {
530   if (!this->remove(const_cast<int*>(&index)))
531     return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
532   return StatusTuple(0);
533 }
534 
BPFCgroupArray(const TableDesc & desc)535 BPFCgroupArray::BPFCgroupArray(const TableDesc& desc)
536     : BPFTableBase<int, int>(desc) {
537   if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY)
538     throw std::invalid_argument("Table '" + desc.name +
539                                 "' is not a cgroup array");
540 }
541 
update_value(const int & index,const int & cgroup2_fd)542 StatusTuple BPFCgroupArray::update_value(const int& index,
543                                          const int& cgroup2_fd) {
544   if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd)))
545     return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
546   return StatusTuple(0);
547 }
548 
update_value(const int & index,const std::string & cgroup2_path)549 StatusTuple BPFCgroupArray::update_value(const int& index,
550                                          const std::string& cgroup2_path) {
551   FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC));
552   if ((int)f < 0)
553     return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str());
554   TRY2(update_value(index, (int)f));
555   return StatusTuple(0);
556 }
557 
remove_value(const int & index)558 StatusTuple BPFCgroupArray::remove_value(const int& index) {
559   if (!this->remove(const_cast<int*>(&index)))
560     return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
561   return StatusTuple(0);
562 }
563 
BPFDevmapTable(const TableDesc & desc)564 BPFDevmapTable::BPFDevmapTable(const TableDesc& desc)
565     : BPFTableBase<int, int>(desc) {
566     if(desc.type != BPF_MAP_TYPE_DEVMAP)
567       throw std::invalid_argument("Table '" + desc.name +
568                                   "' is not a devmap table");
569 }
570 
update_value(const int & index,const int & value)571 StatusTuple BPFDevmapTable::update_value(const int& index,
572                                          const int& value) {
573     if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value)))
574       return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
575     return StatusTuple(0);
576 }
577 
get_value(const int & index,int & value)578 StatusTuple BPFDevmapTable::get_value(const int& index,
579                                       int& value) {
580     if (!this->lookup(const_cast<int*>(&index), &value))
581       return StatusTuple(-1, "Error getting value: %s", std::strerror(errno));
582     return StatusTuple(0);
583 }
584 
remove_value(const int & index)585 StatusTuple BPFDevmapTable::remove_value(const int& index) {
586     if (!this->remove(const_cast<int*>(&index)))
587       return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
588     return StatusTuple(0);
589 }
590 
591 }  // namespace ebpf
592