1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <map>
17 #include <string>
18 #include <vector>
19 
20 #include <llvm/ExecutionEngine/MCJIT.h>
21 #include <llvm/IR/IRBuilder.h>
22 #include <llvm/Support/TargetSelect.h>
23 
24 #include "common.h"
25 #include "bpf_module.h"
26 #include "table_storage.h"
27 
28 namespace ebpf {
29 
30 using std::map;
31 using std::move;
32 using std::string;
33 using std::unique_ptr;
34 using std::vector;
35 using namespace llvm;
36 
bpf_module_rw_engine_enabled(void)37 bool bpf_module_rw_engine_enabled(void) {
38   return true;
39 }
40 
initialize_rw_engine()41 void BPFModule::initialize_rw_engine() {
42   InitializeNativeTarget();
43   InitializeNativeTargetAsmPrinter();
44 }
45 
cleanup_rw_engine()46 void BPFModule::cleanup_rw_engine() {
47   rw_engine_.reset();
48 }
49 
debug_printf(Module * mod,IRBuilder<> & B,const string & fmt,vector<Value * > args)50 static void debug_printf(Module *mod, IRBuilder<> &B, const string &fmt, vector<Value *> args) {
51   GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
52   args.insert(args.begin(), B.CreateInBoundsGEP(fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)})));
53   args.insert(args.begin(), B.getInt64((uintptr_t)stderr));
54   Function *fprintf_fn = mod->getFunction("fprintf");
55   if (!fprintf_fn) {
56     vector<Type *> fprintf_fn_args({B.getInt64Ty(), B.getInt8PtrTy()});
57     FunctionType *fprintf_fn_type = FunctionType::get(B.getInt32Ty(), fprintf_fn_args, /*isvarArg=*/true);
58     fprintf_fn = Function::Create(fprintf_fn_type, GlobalValue::ExternalLinkage, "fprintf", mod);
59     fprintf_fn->setCallingConv(CallingConv::C);
60     fprintf_fn->addFnAttr(Attribute::NoUnwind);
61   }
62   B.CreateCall(fprintf_fn, args);
63 }
64 
finish_sscanf(IRBuilder<> & B,vector<Value * > * args,string * fmt,const map<string,Value * > & locals,bool exact_args)65 static void finish_sscanf(IRBuilder<> &B, vector<Value *> *args, string *fmt,
66                           const map<string, Value *> &locals, bool exact_args) {
67   // fmt += "%n";
68   // int nread = 0;
69   // int n = sscanf(s, fmt, args..., &nread);
70   // if (n < 0) return -1;
71   // s = &s[nread];
72   Value *sptr = locals.at("sptr");
73   Value *nread = locals.at("nread");
74   Function *cur_fn = B.GetInsertBlock()->getParent();
75   Function *sscanf_fn = B.GetInsertBlock()->getModule()->getFunction("sscanf");
76   *fmt += "%n";
77   B.CreateStore(B.getInt32(0), nread);
78   GlobalVariable *fmt_gvar = B.CreateGlobalString(*fmt, "fmt");
79   (*args)[1] = B.CreateInBoundsGEP(fmt_gvar, {B.getInt64(0), B.getInt64(0)});
80   (*args)[0] = B.CreateLoad(sptr);
81   args->push_back(nread);
82   CallInst *call = B.CreateCall(sscanf_fn, *args);
83   call->setTailCall(true);
84 
85   BasicBlock *label_true = BasicBlock::Create(B.getContext(), "", cur_fn);
86   BasicBlock *label_false = BasicBlock::Create(B.getContext(), "", cur_fn);
87 
88   // exact_args means fail if don't consume exact number of "%" inputs
89   // exact_args is disabled for string parsing (empty case)
90   Value *cond = exact_args ? B.CreateICmpNE(call, B.getInt32(args->size() - 3))
91                            : B.CreateICmpSLT(call, B.getInt32(0));
92   B.CreateCondBr(cond, label_true, label_false);
93 
94   B.SetInsertPoint(label_true);
95   B.CreateRet(B.getInt32(-1));
96 
97   B.SetInsertPoint(label_false);
98   // s = &s[nread];
99   B.CreateStore(
100       B.CreateInBoundsGEP(B.CreateLoad(sptr), B.CreateLoad(nread, true)), sptr);
101 
102   args->resize(2);
103   fmt->clear();
104 }
105 
106 // recursive helper to capture the arguments
parse_type(IRBuilder<> & B,vector<Value * > * args,string * fmt,Type * type,Value * out,const map<string,Value * > & locals,bool is_writer)107 static void parse_type(IRBuilder<> &B, vector<Value *> *args, string *fmt,
108                        Type *type, Value *out,
109                        const map<string, Value *> &locals, bool is_writer) {
110   if (StructType *st = dyn_cast<StructType>(type)) {
111     *fmt += "{ ";
112     unsigned idx = 0;
113     for (auto field : st->elements()) {
114       parse_type(B, args, fmt, field, B.CreateStructGEP(type, out, idx++),
115                  locals, is_writer);
116       *fmt += " ";
117     }
118     *fmt += "}";
119   } else if (ArrayType *at = dyn_cast<ArrayType>(type)) {
120     if (at->getElementType() == B.getInt8Ty()) {
121       // treat i8[] as a char string instead of as an array of u8's
122       if (is_writer) {
123         *fmt += "\"%s\"";
124         args->push_back(out);
125       } else {
126         // When reading strings, scanf doesn't support empty "", so we need to
127         // break this up into multiple scanf calls. To understand it, let's take
128         // an example:
129         // struct Event {
130         //   u32 a;
131         //   struct {
132         //     char x[64];
133         //     int y;
134         //   } b[2];
135         //   u32 c;
136         // };
137         // The writer string would look like:
138         //  "{ 0x%x [ { \"%s\" 0x%x } { \"%s\" 0x%x } ] 0x%x }"
139         // But the reader string needs to restart at each \"\".
140         //  reader0(const char *s, struct Event *val) {
141         //    int nread, rc;
142         //    nread = 0;
143         //    rc = sscanf(s, "{ %i [ { \"%n", &val->a, &nread);
144         //    if (rc != 1) return -1;
145         //    s += nread; nread = 0;
146         //    rc = sscanf(s, "%[^\"]%n", &val->b[0].x, &nread);
147         //    if (rc < 0) return -1;
148         //    s += nread; nread = 0;
149         //    rc = sscanf(s, "\" %i } { \"%n", &val->b[0].y, &nread);
150         //    if (rc != 1) return -1;
151         //    s += nread; nread = 0;
152         //    rc = sscanf(s, "%[^\"]%n", &val->b[1].x, &nread);
153         //    if (rc < 0) return -1;
154         //    s += nread; nread = 0;
155         //    rc = sscanf(s, "\" %i } ] %i }%n", &val->b[1].y, &val->c, &nread);
156         //    if (rc != 2) return -1;
157         //    s += nread; nread = 0;
158         //    return 0;
159         //  }
160         *fmt += "\"";
161         finish_sscanf(B, args, fmt, locals, true);
162 
163         *fmt = "%[^\"]";
164         args->push_back(out);
165         finish_sscanf(B, args, fmt, locals, false);
166 
167         *fmt = "\"";
168       }
169     } else {
170       *fmt += "[ ";
171       for (size_t i = 0; i < at->getNumElements(); ++i) {
172         parse_type(B, args, fmt, at->getElementType(),
173                    B.CreateStructGEP(type, out, i), locals, is_writer);
174         *fmt += " ";
175       }
176       *fmt += "]";
177     }
178   } else if (isa<PointerType>(type)) {
179     *fmt += "0xl";
180     if (is_writer)
181       *fmt += "x";
182     else
183       *fmt += "i";
184   } else if (IntegerType *it = dyn_cast<IntegerType>(type)) {
185     if (is_writer)
186       *fmt += "0x";
187     if (it->getBitWidth() <= 8)
188       *fmt += "%hh";
189     else if (it->getBitWidth() <= 16)
190       *fmt += "%h";
191     else if (it->getBitWidth() <= 32)
192       *fmt += "%";
193     else
194       *fmt += "%l";
195     if (is_writer)
196       *fmt += "x";
197     else
198       *fmt += "i";
199     args->push_back(is_writer ? B.CreateLoad(out) : out);
200   }
201 }
202 
203 // make_reader generates a dynamic function in the instruction set of the host
204 // (not bpf) that is able to convert c-strings in the pretty-print format of
205 // make_writer back into binary representations. The encoding of the string
206 // takes the llvm ir structure format, which closely maps the c structure but
207 // not exactly (no support for unions for instance).
208 // The general algorithm is:
209 //  pod types (u8..u64)                <= %i
210 //  array types
211 //   u8[]  no nested quotes :(         <= "..."
212 //   !u8[]                             <= [ %i %i ... ]
213 //  struct types
214 //   struct { u8 a; u64 b; }           <= { %i %i }
215 //  nesting is supported
216 //   struct { struct { u8 a[]; }; }    <= { "" }
217 //   struct { struct { u64 a[]; }; }   <= { [ %i %i .. ] }
make_reader(Module * mod,Type * type)218 string BPFModule::make_reader(Module *mod, Type *type) {
219   auto fn_it = readers_.find(type);
220   if (fn_it != readers_.end())
221     return fn_it->second;
222 
223   // int read(const char *in, Type *out) {
224   //   int n = sscanf(in, "{ %i ... }", &out->field1, ...);
225   //   if (n != num_fields) return -1;
226   //   return 0;
227   // }
228 
229   IRBuilder<> B(*ctx_);
230 
231   FunctionType *sscanf_fn_type = FunctionType::get(
232       B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, /*isVarArg=*/true);
233   Function *sscanf_fn = mod->getFunction("sscanf");
234   if (!sscanf_fn) {
235     sscanf_fn = Function::Create(sscanf_fn_type, GlobalValue::ExternalLinkage,
236                                  "sscanf", mod);
237     sscanf_fn->setCallingConv(CallingConv::C);
238     sscanf_fn->addFnAttr(Attribute::NoUnwind);
239   }
240 
241   string name = "reader" + std::to_string(readers_.size());
242   vector<Type *> fn_args({B.getInt8PtrTy(), PointerType::getUnqual(type)});
243   FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
244   Function *fn =
245       Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
246   auto arg_it = fn->arg_begin();
247   Argument *arg_in = &*arg_it;
248   ++arg_it;
249   arg_in->setName("in");
250   Argument *arg_out = &*arg_it;
251   ++arg_it;
252   arg_out->setName("out");
253 
254   BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
255   B.SetInsertPoint(label_entry);
256 
257   Value *nread = B.CreateAlloca(B.getInt32Ty());
258   Value *sptr = B.CreateAlloca(B.getInt8PtrTy());
259   map<string, Value *> locals{{"nread", nread}, {"sptr", sptr}};
260   B.CreateStore(arg_in, sptr);
261   vector<Value *> args({nullptr, nullptr});
262   string fmt;
263   parse_type(B, &args, &fmt, type, arg_out, locals, false);
264 
265   if (0)
266     debug_printf(mod, B, "%p %p\n", vector<Value *>({arg_in, arg_out}));
267 
268   finish_sscanf(B, &args, &fmt, locals, true);
269 
270   B.CreateRet(B.getInt32(0));
271 
272   readers_[type] = name;
273   return name;
274 }
275 
276 // make_writer generates a dynamic function in the instruction set of the host
277 // (not bpf) that is able to pretty-print key/leaf entries as a c-string. The
278 // encoding of the string takes the llvm ir structure format, which closely maps
279 // the c structure but not exactly (no support for unions for instance).
280 // The general algorithm is:
281 //  pod types (u8..u64)                => 0x%x
282 //  array types
283 //   u8[]                              => "..."
284 //   !u8[]                             => [ 0x%x 0x%x ... ]
285 //  struct types
286 //   struct { u8 a; u64 b; }           => { 0x%x 0x%x }
287 //  nesting is supported
288 //   struct { struct { u8 a[]; }; }    => { "" }
289 //   struct { struct { u64 a[]; }; }   => { [ 0x%x 0x%x .. ] }
make_writer(Module * mod,Type * type)290 string BPFModule::make_writer(Module *mod, Type *type) {
291   auto fn_it = writers_.find(type);
292   if (fn_it != writers_.end())
293     return fn_it->second;
294 
295   // int write(int len, char *out, Type *in) {
296   //   return snprintf(out, len, "{ %i ... }", out->field1, ...);
297   // }
298 
299   IRBuilder<> B(*ctx_);
300 
301   string name = "writer" + std::to_string(writers_.size());
302   vector<Type *> fn_args({B.getInt8PtrTy(), B.getInt64Ty(), PointerType::getUnqual(type)});
303   FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
304   Function *fn =
305       Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
306   auto arg_it = fn->arg_begin();
307   Argument *arg_out = &*arg_it;
308   ++arg_it;
309   arg_out->setName("out");
310   Argument *arg_len = &*arg_it;
311   ++arg_it;
312   arg_len->setName("len");
313   Argument *arg_in = &*arg_it;
314   ++arg_it;
315   arg_in->setName("in");
316 
317   BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
318   B.SetInsertPoint(label_entry);
319 
320   map<string, Value *> locals{
321       {"nread", B.CreateAlloca(B.getInt64Ty())},
322   };
323   vector<Value *> args({arg_out, B.CreateZExt(arg_len, B.getInt64Ty()), nullptr});
324   string fmt;
325   parse_type(B, &args, &fmt, type, arg_in, locals, true);
326 
327   GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
328 
329   args[2] = B.CreateInBoundsGEP(fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)}));
330 
331   if (0)
332     debug_printf(mod, B, "%d %p %p\n", vector<Value *>({arg_len, arg_out, arg_in}));
333 
334   vector<Type *> snprintf_fn_args({B.getInt8PtrTy(), B.getInt64Ty(), B.getInt8PtrTy()});
335   FunctionType *snprintf_fn_type = FunctionType::get(B.getInt32Ty(), snprintf_fn_args, /*isVarArg=*/true);
336   Function *snprintf_fn = mod->getFunction("snprintf");
337   if (!snprintf_fn)
338     snprintf_fn = Function::Create(snprintf_fn_type, GlobalValue::ExternalLinkage, "snprintf", mod);
339   snprintf_fn->setCallingConv(CallingConv::C);
340   snprintf_fn->addFnAttr(Attribute::NoUnwind);
341 
342   CallInst *call = B.CreateCall(snprintf_fn, args);
343   call->setTailCall(true);
344 
345   B.CreateRet(call);
346 
347   writers_[type] = name;
348   return name;
349 }
350 
finalize_rw(unique_ptr<Module> m)351 unique_ptr<ExecutionEngine> BPFModule::finalize_rw(unique_ptr<Module> m) {
352   Module *mod = &*m;
353 
354   run_pass_manager(*mod);
355 
356   string err;
357   EngineBuilder builder(move(m));
358   builder.setErrorStr(&err);
359   builder.setUseOrcMCJITReplacement(false);
360   auto engine = unique_ptr<ExecutionEngine>(builder.create());
361   if (!engine)
362     fprintf(stderr, "Could not create ExecutionEngine: %s\n", err.c_str());
363   return engine;
364 }
365 
annotate()366 int BPFModule::annotate() {
367   for (auto fn = mod_->getFunctionList().begin(); fn != mod_->getFunctionList().end(); ++fn)
368     if (!fn->hasFnAttribute(Attribute::NoInline))
369       fn->addFnAttr(Attribute::AlwaysInline);
370 
371   // separate module to hold the reader functions
372   auto m = ebpf::make_unique<Module>("sscanf", *ctx_);
373 
374   size_t id = 0;
375   Path path({id_});
376   for (auto it = ts_->lower_bound(path), up = ts_->upper_bound(path); it != up; ++it) {
377     TableDesc &table = it->second;
378     tables_.push_back(&it->second);
379     table_names_[table.name] = id++;
380     GlobalValue *gvar = mod_->getNamedValue(table.name);
381     if (!gvar) continue;
382     if (PointerType *pt = dyn_cast<PointerType>(gvar->getType())) {
383       if (StructType *st = dyn_cast<StructType>(pt->getElementType())) {
384         if (st->getNumElements() < 2) continue;
385         Type *key_type = st->elements()[0];
386         Type *leaf_type = st->elements()[1];
387 
388         using std::placeholders::_1;
389         using std::placeholders::_2;
390         using std::placeholders::_3;
391         table.key_sscanf = std::bind(&BPFModule::sscanf, this,
392                                      make_reader(&*m, key_type), _1, _2);
393         table.leaf_sscanf = std::bind(&BPFModule::sscanf, this,
394                                       make_reader(&*m, leaf_type), _1, _2);
395         table.key_snprintf = std::bind(&BPFModule::snprintf, this,
396                                        make_writer(&*m, key_type), _1, _2, _3);
397         table.leaf_snprintf =
398             std::bind(&BPFModule::snprintf, this, make_writer(&*m, leaf_type),
399                       _1, _2, _3);
400       }
401     }
402   }
403 
404   rw_engine_ = finalize_rw(move(m));
405   if (!rw_engine_)
406     return -1;
407   return 0;
408 }
409 
sscanf(string fn_name,const char * str,void * val)410 StatusTuple BPFModule::sscanf(string fn_name, const char *str, void *val) {
411   if (!rw_engine_enabled_)
412     return StatusTuple(-1, "rw_engine not enabled");
413   auto fn =
414       (int (*)(const char *, void *))rw_engine_->getFunctionAddress(fn_name);
415   if (!fn)
416     return StatusTuple(-1, "sscanf not available");
417   int rc = fn(str, val);
418   if (rc < 0)
419     return StatusTuple(rc, "error in sscanf: %s", std::strerror(errno));
420   return StatusTuple(rc);
421 }
422 
snprintf(string fn_name,char * str,size_t sz,const void * val)423 StatusTuple BPFModule::snprintf(string fn_name, char *str, size_t sz,
424                                 const void *val) {
425   if (!rw_engine_enabled_)
426     return StatusTuple(-1, "rw_engine not enabled");
427   auto fn = (int (*)(char *, size_t,
428                      const void *))rw_engine_->getFunctionAddress(fn_name);
429   if (!fn)
430     return StatusTuple(-1, "snprintf not available");
431   int rc = fn(str, sz, val);
432   if (rc < 0)
433     return StatusTuple(rc, "error in snprintf: %s", std::strerror(errno));
434   if ((size_t)rc == sz)
435     return StatusTuple(-1, "buffer of size %zd too small", sz);
436   return StatusTuple(0);
437 }
438 
439 } // namespace ebpf
440