1 /*
2  * Copyright (c) 2016 GitHub, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <unordered_map>
17 #include <regex>
18 
19 #include "syms.h"
20 #include "usdt.h"
21 #include "vendor/tinyformat.hpp"
22 
23 #include "bcc_elf.h"
24 #include "bcc_syms.h"
25 
26 namespace USDT {
27 
Argument()28 Argument::Argument() {}
~Argument()29 Argument::~Argument() {}
30 
ctype() const31 std::string Argument::ctype() const {
32   const int s = arg_size() * 8;
33   return (s < 0) ? tfm::format("int%d_t", -s) : tfm::format("uint%d_t", s);
34 }
35 
get_global_address(uint64_t * address,const std::string & binpath,const optional<int> & pid) const36 bool Argument::get_global_address(uint64_t *address, const std::string &binpath,
37                                   const optional<int> &pid) const {
38   if (pid) {
39     static struct bcc_symbol_option default_option = {
40       .use_debug_file = 1,
41       .check_debug_file_crc = 1,
42       .use_symbol_type = BCC_SYM_ALL_TYPES
43     };
44     return ProcSyms(*pid, &default_option)
45         .resolve_name(binpath.c_str(), deref_ident_->c_str(), address);
46   }
47 
48   if (!bcc_elf_is_shared_obj(binpath.c_str())) {
49     struct bcc_symbol sym;
50     if (bcc_resolve_symname(binpath.c_str(), deref_ident_->c_str(), 0x0, -1, nullptr, &sym) == 0) {
51       *address = sym.offset;
52       if (sym.module)
53         ::free(const_cast<char*>(sym.module));
54       return true;
55     }
56   }
57 
58   return false;
59 }
60 
assign_to_local(std::ostream & stream,const std::string & local_name,const std::string & binpath,const optional<int> & pid) const61 bool Argument::assign_to_local(std::ostream &stream,
62                                const std::string &local_name,
63                                const std::string &binpath,
64                                const optional<int> &pid) const {
65   if (constant_) {
66     tfm::format(stream, "%s = %d;", local_name, *constant_);
67     return true;
68   }
69 
70   if (!deref_offset_) {
71     tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_);
72     // Put a compiler barrier to prevent optimization
73     // like llvm SimplifyCFG SinkThenElseCodeToEnd
74     // Volatile marking is not sufficient to prevent such optimization.
75     tfm::format(stream, " %s", COMPILER_BARRIER);
76     return true;
77   }
78 
79   if (deref_offset_ && !deref_ident_) {
80     tfm::format(stream, "{ u64 __addr = ctx->%s + %d",
81                 *base_register_name_, *deref_offset_);
82     if (index_register_name_) {
83       int scale = scale_.value_or(1);
84       tfm::format(stream, " + (ctx->%s * %d);", *index_register_name_, scale);
85     } else {
86       tfm::format(stream, ";");
87     }
88     // Theoretically, llvm SimplifyCFG SinkThenElseCodeToEnd may still
89     // sink bpf_probe_read call, so put a barrier here to prevent sinking
90     // of ctx->#fields.
91     tfm::format(stream, " %s ", COMPILER_BARRIER);
92     tfm::format(stream,
93                 "%s __res = 0x0; "
94                 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
95                 "%s = __res; }",
96                 ctype(), local_name);
97     return true;
98   }
99 
100   if (deref_offset_ && deref_ident_ && *base_register_name_ == "ip") {
101     uint64_t global_address;
102     if (!get_global_address(&global_address, binpath, pid))
103       return false;
104 
105     tfm::format(stream,
106                 "{ u64 __addr = 0x%xull + %d; %s __res = 0x0; "
107                 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
108                 "%s = __res; }",
109                 global_address, *deref_offset_, ctype(), local_name);
110     return true;
111   }
112 
113   return false;
114 }
115 
print_error(ssize_t pos)116 void ArgumentParser::print_error(ssize_t pos) {
117   fprintf(stderr, "Parse error:\n    %s\n", arg_);
118   for (ssize_t i = 0; i < pos + 4; ++i) fputc('-', stderr);
119   fputc('^', stderr);
120   fputc('\n', stderr);
121 }
122 
skip_whitespace_from(size_t pos)123 void ArgumentParser::skip_whitespace_from(size_t pos) {
124     while (isspace(arg_[pos])) pos++;
125     cur_pos_ = pos;
126 }
127 
skip_until_whitespace_from(size_t pos)128 void ArgumentParser::skip_until_whitespace_from(size_t pos) {
129     while (arg_[pos] != '\0' && !isspace(arg_[pos]))
130         pos++;
131     cur_pos_ = pos;
132 }
133 
parse_register(ssize_t pos,ssize_t & new_pos,optional<int> * reg_num)134 bool ArgumentParser_aarch64::parse_register(ssize_t pos, ssize_t &new_pos,
135                                             optional<int> *reg_num) {
136   new_pos = parse_number(pos, reg_num);
137   if (new_pos == pos || *reg_num < 0 || *reg_num > 31)
138     return error_return(pos, pos);
139   return true;
140 }
141 
parse_size(ssize_t pos,ssize_t & new_pos,optional<int> * arg_size)142 bool ArgumentParser_aarch64::parse_size(ssize_t pos, ssize_t &new_pos,
143                                         optional<int> *arg_size) {
144   int abs_arg_size;
145 
146   new_pos = parse_number(pos, arg_size);
147   if (new_pos == pos)
148     return error_return(pos, pos);
149 
150   abs_arg_size = abs(arg_size->value());
151   if (abs_arg_size != 1 && abs_arg_size != 2 && abs_arg_size != 4 &&
152       abs_arg_size != 8)
153     return error_return(pos, pos);
154   return true;
155 }
156 
parse_mem(ssize_t pos,ssize_t & new_pos,optional<int> * reg_num,optional<int> * offset)157 bool ArgumentParser_aarch64::parse_mem(ssize_t pos, ssize_t &new_pos,
158                                        optional<int> *reg_num,
159                                        optional<int> *offset) {
160   if (arg_[pos] != 'x')
161     return error_return(pos, pos);
162   if (parse_register(pos + 1, new_pos, reg_num) == false)
163     return false;
164 
165   if (arg_[new_pos] == ',') {
166     pos = new_pos + 1;
167     new_pos = parse_number(pos, offset);
168     if (new_pos == pos)
169       return error_return(pos, pos);
170   }
171   if (arg_[new_pos] != ']')
172     return error_return(new_pos, new_pos);
173   new_pos++;
174   return true;
175 }
176 
parse(Argument * dest)177 bool ArgumentParser_aarch64::parse(Argument *dest) {
178   if (done())
179     return false;
180 
181   // Support the following argument patterns:
182   //   [-]<size>@<value>, [-]<size>@<reg>, [-]<size>@[<reg>], or
183   //   [-]<size>@[<reg>,<offset>]
184   ssize_t cur_pos = cur_pos_, new_pos;
185   optional<int> arg_size;
186 
187   // Parse [-]<size>
188   if (parse_size(cur_pos, new_pos, &arg_size) == false)
189     return false;
190   dest->arg_size_ = arg_size;
191 
192   // Make sure '@' present
193   if (arg_[new_pos] != '@')
194     return error_return(new_pos, new_pos);
195   cur_pos = new_pos + 1;
196 
197   if (arg_[cur_pos] == 'x') {
198     // Parse ...@<reg>
199     optional<int> reg_num;
200     if (parse_register(cur_pos + 1, new_pos, &reg_num) == false)
201       return false;
202     cur_pos_ = new_pos;
203     dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
204   } else if (arg_[cur_pos] == '[') {
205     // Parse ...@[<reg>] and ...@[<reg,<offset>]
206     optional<int> reg_num, offset = 0;
207     if (parse_mem(cur_pos + 1, new_pos, &reg_num, &offset) == false)
208       return false;
209     cur_pos_ = new_pos;
210     dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
211     dest->deref_offset_ = offset;
212   } else {
213     // Parse ...@<value>
214     optional<int> val;
215     new_pos = parse_number(cur_pos, &val);
216     if (cur_pos == new_pos)
217       return error_return(cur_pos, cur_pos);
218     cur_pos_ = new_pos;
219     dest->constant_ = val;
220   }
221 
222   skip_whitespace_from(cur_pos_);
223   return true;
224 }
225 
parse(Argument * dest)226 bool ArgumentParser_powerpc64::parse(Argument *dest) {
227   if (done())
228     return false;
229 
230   bool matched;
231   std::smatch matches;
232   std::string arg_str(&arg_[cur_pos_]);
233   std::regex arg_n_regex("^(\\-?[1248])\\@");
234   // Operands with constants of form iNUM or i-NUM
235   std::regex arg_op_regex_const("^i(\\-?[0-9]+)( +|$)");
236   // Operands with register only of form REG or %rREG
237   std::regex arg_op_regex_reg("^(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
238   // Operands with a base register and an offset of form
239   // NUM(REG) or -NUM(REG) or NUM(%rREG) or -NUM(%rREG)
240   std::regex arg_op_regex_breg_off(
241         "^(\\-?[0-9]+)\\((?:%r)?([1-2]?[0-9]|3[0-1])\\)( +|$)");
242   // Operands with a base register and an index register
243   // of form REG,REG or %rREG,%rREG
244   std::regex arg_op_regex_breg_ireg(
245         "^(?:%r)?([1-2]?[0-9]|3[0-1])\\,(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
246 
247   matched = std::regex_search(arg_str, matches, arg_n_regex);
248   if (matched) {
249     dest->arg_size_ = stoi(matches.str(1));
250     cur_pos_ += matches.length(0);
251     arg_str = &arg_[cur_pos_];
252 
253     if (std::regex_search(arg_str, matches, arg_op_regex_const)) {
254       dest->constant_ = stoi(matches.str(1));
255     } else if (std::regex_search(arg_str, matches, arg_op_regex_reg)) {
256       dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
257     } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_off)) {
258       dest->deref_offset_ = stoi(matches.str(1));
259       dest->base_register_name_ = "gpr[" + matches.str(2) + "]";
260     } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_ireg)) {
261       dest->deref_offset_ = 0; // In powerpc64, such operands contain a base
262                                // register and an index register which are
263                                // part of an indexed load/store operation.
264                                // Even if no offset value is present, this
265                                // is required by Argument::assign_to_local()
266                                // in order to generate code for reading the
267                                // argument. So, this is set to zero.
268       dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
269       dest->index_register_name_ = "gpr[" + matches.str(2) + "]";
270       dest->scale_ = abs(*dest->arg_size_);
271     } else {
272       matched = false;
273     }
274   }
275 
276   if (!matched) {
277     print_error(cur_pos_);
278     skip_until_whitespace_from(cur_pos_);
279     skip_whitespace_from(cur_pos_);
280     return false;
281   }
282 
283   cur_pos_ += matches.length(0);
284   skip_whitespace_from(cur_pos_);
285   return true;
286 }
287 
parse_identifier(ssize_t pos,optional<std::string> * result)288 ssize_t ArgumentParser_x64::parse_identifier(ssize_t pos,
289                                              optional<std::string> *result) {
290   if (isalpha(arg_[pos]) || arg_[pos] == '_') {
291     ssize_t start = pos++;
292     while (isalnum(arg_[pos]) || arg_[pos] == '_') pos++;
293     if (pos - start)
294       result->emplace(arg_ + start, pos - start);
295   }
296   return pos;
297 }
298 
parse_register(ssize_t pos,std::string & name,int & size)299 ssize_t ArgumentParser_x64::parse_register(ssize_t pos, std::string &name,
300                                            int &size) {
301   ssize_t start = ++pos;
302   if (arg_[start - 1] != '%')
303     return -start;
304 
305   while (isalnum(arg_[pos])) pos++;
306 
307   std::string regname(arg_ + start, pos - start);
308   if (!normalize_register(&regname, &size))
309     return -start;
310 
311   name = regname;
312   return pos;
313 }
314 
parse_base_register(ssize_t pos,Argument * dest)315 ssize_t ArgumentParser_x64::parse_base_register(ssize_t pos, Argument *dest) {
316   int size;
317   std::string name;
318   ssize_t res = parse_register(pos, name, size);
319   if (res < 0)
320       return res;
321 
322   dest->base_register_name_ = name;
323   if (!dest->arg_size_)
324     dest->arg_size_ = size;
325 
326   return res;
327 }
328 
parse_index_register(ssize_t pos,Argument * dest)329 ssize_t ArgumentParser_x64::parse_index_register(ssize_t pos, Argument *dest) {
330   int size;
331   std::string name;
332   ssize_t res = parse_register(pos, name, size);
333   if (res < 0)
334       return res;
335 
336   dest->index_register_name_ = name;
337 
338   return res;
339 }
340 
parse_scale(ssize_t pos,Argument * dest)341 ssize_t ArgumentParser_x64::parse_scale(ssize_t pos, Argument *dest) {
342   return parse_number(pos, &dest->scale_);
343 }
344 
parse_expr(ssize_t pos,Argument * dest)345 ssize_t ArgumentParser_x64::parse_expr(ssize_t pos, Argument *dest) {
346   if (arg_[pos] == '$')
347     return parse_number(pos + 1, &dest->constant_);
348 
349   if (arg_[pos] == '%')
350     return parse_base_register(pos, dest);
351 
352   if (isdigit(arg_[pos]) || arg_[pos] == '-') {
353     pos = parse_number(pos, &dest->deref_offset_);
354     if (arg_[pos] == '+') {
355       pos = parse_identifier(pos + 1, &dest->deref_ident_);
356       if (!dest->deref_ident_)
357         return -pos;
358     }
359   } else {
360     dest->deref_offset_ = 0;
361     pos = parse_identifier(pos, &dest->deref_ident_);
362     if (arg_[pos] == '+' || arg_[pos] == '-') {
363       pos = parse_number(pos, &dest->deref_offset_);
364     }
365   }
366 
367   if (arg_[pos] != '(')
368     return -pos;
369 
370   pos = parse_base_register(pos + 1, dest);
371   if (pos < 0)
372     return pos;
373 
374   if (arg_[pos] == ',') {
375     pos = parse_index_register(pos + 1, dest);
376     if (pos < 0)
377       return pos;
378 
379     if (arg_[pos] == ',') {
380       pos = parse_scale(pos + 1, dest);
381       if (pos < 0)
382         return pos;
383     }
384   }
385 
386   return (arg_[pos] == ')') ? pos + 1 : -pos;
387 }
388 
parse_1(ssize_t pos,Argument * dest)389 ssize_t ArgumentParser_x64::parse_1(ssize_t pos, Argument *dest) {
390   if (isdigit(arg_[pos]) || arg_[pos] == '-') {
391     optional<int> asize;
392     ssize_t m = parse_number(pos, &asize);
393     if (arg_[m] == '@' && asize) {
394       dest->arg_size_ = asize;
395       return parse_expr(m + 1, dest);
396     }
397   }
398   return parse_expr(pos, dest);
399 }
400 
parse(Argument * dest)401 bool ArgumentParser_x64::parse(Argument *dest) {
402   if (done())
403     return false;
404 
405   ssize_t res = parse_1(cur_pos_, dest);
406   if (res < 0)
407     return error_return(-res, -res + 1);
408   if (!isspace(arg_[res]) && arg_[res] != '\0')
409     return error_return(res, res);
410   skip_whitespace_from(res);
411   return true;
412 }
413 
414 const std::unordered_map<std::string, ArgumentParser_x64::RegInfo>
415     ArgumentParser_x64::registers_ = {
416         {"rax", {REG_A, 8}},   {"eax", {REG_A, 4}},
417         {"ax", {REG_A, 2}},    {"al", {REG_A, 1}},
418 
419         {"rbx", {REG_B, 8}},   {"ebx", {REG_B, 4}},
420         {"bx", {REG_B, 2}},    {"bl", {REG_B, 1}},
421 
422         {"rcx", {REG_C, 8}},   {"ecx", {REG_C, 4}},
423         {"cx", {REG_C, 2}},    {"cl", {REG_C, 1}},
424 
425         {"rdx", {REG_D, 8}},   {"edx", {REG_D, 4}},
426         {"dx", {REG_D, 2}},    {"dl", {REG_D, 1}},
427 
428         {"rsi", {REG_SI, 8}},  {"esi", {REG_SI, 4}},
429         {"si", {REG_SI, 2}},   {"sil", {REG_SI, 1}},
430 
431         {"rdi", {REG_DI, 8}},  {"edi", {REG_DI, 4}},
432         {"di", {REG_DI, 2}},   {"dil", {REG_DI, 1}},
433 
434         {"rbp", {REG_BP, 8}},  {"ebp", {REG_BP, 4}},
435         {"bp", {REG_BP, 2}},   {"bpl", {REG_BP, 1}},
436 
437         {"rsp", {REG_SP, 8}},  {"esp", {REG_SP, 4}},
438         {"sp", {REG_SP, 2}},   {"spl", {REG_SP, 1}},
439 
440         {"r8", {REG_8, 8}},    {"r8d", {REG_8, 4}},
441         {"r8w", {REG_8, 2}},   {"r8b", {REG_8, 1}},
442 
443         {"r9", {REG_9, 8}},    {"r9d", {REG_9, 4}},
444         {"r9w", {REG_9, 2}},   {"r9b", {REG_9, 1}},
445 
446         {"r10", {REG_10, 8}},  {"r10d", {REG_10, 4}},
447         {"r10w", {REG_10, 2}}, {"r10b", {REG_10, 1}},
448 
449         {"r11", {REG_11, 8}},  {"r11d", {REG_11, 4}},
450         {"r11w", {REG_11, 2}}, {"r11b", {REG_11, 1}},
451 
452         {"r12", {REG_12, 8}},  {"r12d", {REG_12, 4}},
453         {"r12w", {REG_12, 2}}, {"r12b", {REG_12, 1}},
454 
455         {"r13", {REG_13, 8}},  {"r13d", {REG_13, 4}},
456         {"r13w", {REG_13, 2}}, {"r13b", {REG_13, 1}},
457 
458         {"r14", {REG_14, 8}},  {"r14d", {REG_14, 4}},
459         {"r14w", {REG_14, 2}}, {"r14b", {REG_14, 1}},
460 
461         {"r15", {REG_15, 8}},  {"r15d", {REG_15, 4}},
462         {"r15w", {REG_15, 2}}, {"r15b", {REG_15, 1}},
463 
464         {"rip", {REG_RIP, 8}},
465 };
466 
reg_to_name(std::string * norm,Register reg)467 void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) {
468   switch (reg) {
469   case REG_A:
470     *norm = "ax";
471     break;
472   case REG_B:
473     *norm = "bx";
474     break;
475   case REG_C:
476     *norm = "cx";
477     break;
478   case REG_D:
479     *norm = "dx";
480     break;
481 
482   case REG_SI:
483     *norm = "si";
484     break;
485   case REG_DI:
486     *norm = "di";
487     break;
488   case REG_BP:
489     *norm = "bp";
490     break;
491   case REG_SP:
492     *norm = "sp";
493     break;
494 
495   case REG_8:
496     *norm = "r8";
497     break;
498   case REG_9:
499     *norm = "r9";
500     break;
501   case REG_10:
502     *norm = "r10";
503     break;
504   case REG_11:
505     *norm = "r11";
506     break;
507   case REG_12:
508     *norm = "r12";
509     break;
510   case REG_13:
511     *norm = "r13";
512     break;
513   case REG_14:
514     *norm = "r14";
515     break;
516   case REG_15:
517     *norm = "r15";
518     break;
519 
520   case REG_RIP:
521     *norm = "ip";
522     break;
523   }
524 }
525 
normalize_register(std::string * reg,int * reg_size)526 bool ArgumentParser_x64::normalize_register(std::string *reg, int *reg_size) {
527   auto it = registers_.find(*reg);
528   if (it == registers_.end())
529     return false;
530 
531   *reg_size = it->second.size;
532   reg_to_name(reg, it->second.reg);
533   return true;
534 }
535 }
536