1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
6 #define SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <map>
12 #include <vector>
13 
14 #include "base/macros.h"
15 #include "base/tuple.h"
16 #include "sandbox/sandbox_export.h"
17 
18 struct sock_filter;
19 
20 namespace sandbox {
21 
22 // The code generator implements a basic assembler that can convert a
23 // graph of BPF instructions into a well-formed array of BPF
24 // instructions. Most notably, it ensures that jumps are always
25 // forward and don't exceed the limit of 255 instructions imposed by
26 // the instruction set.
27 //
28 // Callers would typically create a new CodeGen object and then use it
29 // to build a DAG of instruction nodes. They'll eventually call
30 // Compile() to convert this DAG to a Program.
31 //
32 //   CodeGen gen;
33 //   CodeGen::Node allow, branch, dag;
34 //
35 //   allow =
36 //     gen.MakeInstruction(BPF_RET+BPF_K,
37 //                         ErrorCode(ErrorCode::ERR_ALLOWED).err()));
38 //   branch =
39 //     gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
40 //                         Trap(GetPidHandler, NULL), allow);
41 //   dag =
42 //     gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
43 //                         offsetof(struct arch_seccomp_data, nr), branch);
44 //
45 //   // Simplified code follows; in practice, it is important to avoid calling
46 //   // any C++ destructors after starting the sandbox.
47 //   CodeGen::Program program = gen.Compile(dag);
48 //   const struct sock_fprog prog = {
49 //     static_cast<unsigned short>(program.size()), &program[0] };
50 //   prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
51 //
52 class SANDBOX_EXPORT CodeGen {
53  public:
54   // A vector of BPF instructions that need to be installed as a filter
55   // program in the kernel.
56   typedef std::vector<struct sock_filter> Program;
57 
58   // Node represents a node within the instruction DAG being compiled.
59   using Node = Program::size_type;
60 
61   // kNullNode represents the "null" node; i.e., the reserved node
62   // value guaranteed to not equal any actual nodes.
63   static const Node kNullNode = -1;
64 
65   CodeGen();
66   ~CodeGen();
67 
68   // MakeInstruction creates a node representing the specified
69   // instruction, or returns and existing equivalent node if one
70   // exists. For details on the possible parameters refer to
71   // https://www.kernel.org/doc/Documentation/networking/filter.txt.
72   // TODO(mdempsky): Reconsider using default arguments here.
73   Node MakeInstruction(uint16_t code,
74                        uint32_t k,
75                        Node jt = kNullNode,
76                        Node jf = kNullNode);
77 
78   // Compile linearizes the instruction DAG rooted at |head| into a
79   // program that can be executed by a BPF virtual machine.
80   Program Compile(Node head);
81 
82  private:
83   using MemoKey = base::Tuple<uint16_t, uint32_t, Node, Node>;
84   struct MemoKeyLess {
85     bool operator()(const MemoKey& lhs, const MemoKey& rhs) const;
86   };
87 
88   // AppendInstruction adds a new instruction, ensuring that |jt| and
89   // |jf| are within range as necessary for |code|.
90   Node AppendInstruction(uint16_t code, uint32_t k, Node jt, Node jf);
91 
92   // WithinRange returns a node equivalent to |next| that is at most
93   // |range| instructions away from the (logical) beginning of the
94   // program.
95   Node WithinRange(Node next, size_t range);
96 
97   // Append appends a new instruction to the physical end (i.e.,
98   // logical beginning) of |program_|.
99   Node Append(uint16_t code, uint32_t k, size_t jt, size_t jf);
100 
101   // Offset returns how many instructions exist in |program_| after |target|.
102   size_t Offset(Node target) const;
103 
104   // NOTE: program_ is the compiled program in *reverse*, so that
105   // indices remain stable as we add instructions.
106   Program program_;
107 
108   // equivalent_ stores the most recent semantically-equivalent node for each
109   // instruction in program_. A node is defined as semantically-equivalent to N
110   // if it has the same instruction code and constant as N and its successor
111   // nodes (if any) are semantically-equivalent to N's successor nodes, or
112   // if it's an unconditional jump to a node semantically-equivalent to N.
113   std::vector<Node> equivalent_;
114 
115   std::map<MemoKey, Node, MemoKeyLess> memos_;
116 
117   DISALLOW_COPY_AND_ASSIGN(CodeGen);
118 };
119 
120 }  // namespace sandbox
121 
122 #endif  // SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
123