1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/bpf_dsl/codegen.h"
6
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <limits>
11 #include <utility>
12
13 #include "base/logging.h"
14 #include "sandbox/linux/system_headers/linux_filter.h"
15
16 // This CodeGen implementation strives for simplicity while still
17 // generating acceptable BPF programs under typical usage patterns
18 // (e.g., by PolicyCompiler).
19 //
20 // The key to its simplicity is that BPF programs only support forward
21 // jumps/branches, which allows constraining the DAG construction API
22 // to make instruction nodes immutable. Immutable nodes admits a
23 // simple greedy approach of emitting new instructions as needed and
24 // then reusing existing ones that have already been emitted. This
25 // cleanly avoids any need to compute basic blocks or apply
26 // topological sorting because the API effectively sorts instructions
27 // for us (e.g., before MakeInstruction() can be called to emit a
28 // branch instruction, it must have already been called for each
29 // branch path).
30 //
31 // This greedy algorithm is not without (theoretical) weakness though:
32 //
33 // 1. In the general case, we don't eliminate dead code. If needed,
34 // we could trace back through the program in Compile() and elide
35 // any unneeded instructions, but in practice we only emit live
36 // instructions anyway.
37 //
38 // 2. By not dividing instructions into basic blocks and sorting, we
39 // lose an opportunity to move non-branch/non-return instructions
40 // adjacent to their successor instructions, which means we might
41 // need to emit additional jumps. But in practice, they'll
42 // already be nearby as long as callers don't go out of their way
43 // to interleave MakeInstruction() calls for unrelated code
44 // sequences.
45
46 namespace sandbox {
47
48 // kBranchRange is the maximum value that can be stored in
49 // sock_filter's 8-bit jt and jf fields.
50 const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
51
52 const CodeGen::Node CodeGen::kNullNode;
53
CodeGen()54 CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
55 }
56
~CodeGen()57 CodeGen::~CodeGen() {
58 }
59
Compile(CodeGen::Node head)60 CodeGen::Program CodeGen::Compile(CodeGen::Node head) {
61 return Program(program_.rbegin() + Offset(head), program_.rend());
62 }
63
MakeInstruction(uint16_t code,uint32_t k,Node jt,Node jf)64 CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
65 uint32_t k,
66 Node jt,
67 Node jf) {
68 // To avoid generating redundant code sequences, we memoize the
69 // results from AppendInstruction().
70 auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
71 CodeGen::Node* node = &res.first->second;
72 if (res.second) { // Newly inserted memo entry.
73 *node = AppendInstruction(code, k, jt, jf);
74 }
75 return *node;
76 }
77
AppendInstruction(uint16_t code,uint32_t k,Node jt,Node jf)78 CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
79 uint32_t k,
80 Node jt,
81 Node jf) {
82 if (BPF_CLASS(code) == BPF_JMP) {
83 CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
84
85 // Optimally adding jumps is rather tricky, so we use a quick
86 // approximation: by artificially reducing |jt|'s range, |jt| will
87 // stay within its true range even if we add a jump for |jf|.
88 jt = WithinRange(jt, kBranchRange - 1);
89 jf = WithinRange(jf, kBranchRange);
90 return Append(code, k, Offset(jt), Offset(jf));
91 }
92
93 CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
94 if (BPF_CLASS(code) == BPF_RET) {
95 CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
96 } else {
97 // For non-branch/non-return instructions, execution always
98 // proceeds to the next instruction; so we need to arrange for
99 // that to be |jt|.
100 jt = WithinRange(jt, 0);
101 CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
102 }
103 return Append(code, k, 0, 0);
104 }
105
WithinRange(Node target,size_t range)106 CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
107 // Just use |target| if it's already within range.
108 if (Offset(target) <= range) {
109 return target;
110 }
111
112 // Alternatively, look for an equivalent instruction within range.
113 if (Offset(equivalent_.at(target)) <= range) {
114 return equivalent_.at(target);
115 }
116
117 // Otherwise, fall back to emitting a jump instruction.
118 Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
119 equivalent_.at(target) = jump;
120 return jump;
121 }
122
Append(uint16_t code,uint32_t k,size_t jt,size_t jf)123 CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
124 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
125 CHECK_LE(jt, kBranchRange);
126 CHECK_LE(jf, kBranchRange);
127 } else {
128 CHECK_EQ(0U, jt);
129 CHECK_EQ(0U, jf);
130 }
131
132 CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
133 CHECK_EQ(program_.size(), equivalent_.size());
134
135 Node res = program_.size();
136 program_.push_back(sock_filter{
137 code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
138 equivalent_.push_back(res);
139 return res;
140 }
141
Offset(Node target) const142 size_t CodeGen::Offset(Node target) const {
143 CHECK_LT(target, program_.size()) << "Bogus offset target node";
144 return (program_.size() - 1) - target;
145 }
146
147 // TODO(mdempsky): Move into a general base::Tuple helper library.
operator ()(const MemoKey & lhs,const MemoKey & rhs) const148 bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,
149 const MemoKey& rhs) const {
150 if (base::get<0>(lhs) != base::get<0>(rhs))
151 return base::get<0>(lhs) < base::get<0>(rhs);
152 if (base::get<1>(lhs) != base::get<1>(rhs))
153 return base::get<1>(lhs) < base::get<1>(rhs);
154 if (base::get<2>(lhs) != base::get<2>(rhs))
155 return base::get<2>(lhs) < base::get<2>(rhs);
156 if (base::get<3>(lhs) != base::get<3>(rhs))
157 return base::get<3>(lhs) < base::get<3>(rhs);
158 return false;
159 }
160
161 } // namespace sandbox
162