1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
6 
7 #include <errno.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <sys/syscall.h>
11 
12 #include <limits>
13 
14 #include "base/logging.h"
15 #include "base/macros.h"
16 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
17 #include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
18 #include "sandbox/linux/bpf_dsl/codegen.h"
19 #include "sandbox/linux/bpf_dsl/policy.h"
20 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
21 #include "sandbox/linux/bpf_dsl/syscall_set.h"
22 #include "sandbox/linux/system_headers/linux_filter.h"
23 #include "sandbox/linux/system_headers/linux_seccomp.h"
24 #include "sandbox/linux/system_headers/linux_syscalls.h"
25 
26 namespace sandbox {
27 namespace bpf_dsl {
28 
29 namespace {
30 
31 #if defined(__i386__) || defined(__x86_64__)
32 const bool kIsIntel = true;
33 #else
34 const bool kIsIntel = false;
35 #endif
36 #if defined(__x86_64__) && defined(__ILP32__)
37 const bool kIsX32 = true;
38 #else
39 const bool kIsX32 = false;
40 #endif
41 
42 const int kSyscallsRequiredForUnsafeTraps[] = {
43     __NR_rt_sigprocmask,
44     __NR_rt_sigreturn,
45 #if defined(__NR_sigprocmask)
46     __NR_sigprocmask,
47 #endif
48 #if defined(__NR_sigreturn)
49     __NR_sigreturn,
50 #endif
51 };
52 
HasExactlyOneBit(uint64_t x)53 bool HasExactlyOneBit(uint64_t x) {
54   // Common trick; e.g., see http://stackoverflow.com/a/108329.
55   return x != 0 && (x & (x - 1)) == 0;
56 }
57 
DefaultPanic(const char * error)58 ResultExpr DefaultPanic(const char* error) {
59   return Kill();
60 }
61 
62 // A Trap() handler that returns an "errno" value. The value is encoded
63 // in the "aux" parameter.
ReturnErrno(const struct arch_seccomp_data &,void * aux)64 intptr_t ReturnErrno(const struct arch_seccomp_data&, void* aux) {
65   // TrapFnc functions report error by following the native kernel convention
66   // of returning an exit code in the range of -1..-4096. They do not try to
67   // set errno themselves. The glibc wrapper that triggered the SIGSYS will
68   // ultimately do so for us.
69   int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
70   return -err;
71 }
72 
HasUnsafeTraps(const Policy * policy)73 bool HasUnsafeTraps(const Policy* policy) {
74   DCHECK(policy);
75   for (uint32_t sysnum : SyscallSet::ValidOnly()) {
76     if (policy->EvaluateSyscall(sysnum)->HasUnsafeTraps()) {
77       return true;
78     }
79   }
80   return policy->InvalidSyscall()->HasUnsafeTraps();
81 }
82 
83 }  // namespace
84 
85 struct PolicyCompiler::Range {
86   uint32_t from;
87   CodeGen::Node node;
88 };
89 
PolicyCompiler(const Policy * policy,TrapRegistry * registry)90 PolicyCompiler::PolicyCompiler(const Policy* policy, TrapRegistry* registry)
91     : policy_(policy),
92       registry_(registry),
93       escapepc_(0),
94       panic_func_(DefaultPanic),
95       gen_(),
96       has_unsafe_traps_(HasUnsafeTraps(policy_)) {
97   DCHECK(policy);
98 }
99 
~PolicyCompiler()100 PolicyCompiler::~PolicyCompiler() {
101 }
102 
Compile()103 CodeGen::Program PolicyCompiler::Compile() {
104   CHECK(policy_->InvalidSyscall()->IsDeny())
105       << "Policies should deny invalid system calls";
106 
107   // If our BPF program has unsafe traps, enable support for them.
108   if (has_unsafe_traps_) {
109     CHECK_NE(0U, escapepc_) << "UnsafeTrap() requires a valid escape PC";
110 
111     for (int sysnum : kSyscallsRequiredForUnsafeTraps) {
112       CHECK(policy_->EvaluateSyscall(sysnum)->IsAllow())
113           << "Policies that use UnsafeTrap() must unconditionally allow all "
114              "required system calls";
115     }
116 
117     CHECK(registry_->EnableUnsafeTraps())
118         << "We'd rather die than enable unsafe traps";
119   }
120 
121   // Assemble the BPF filter program.
122   return gen_.Compile(AssemblePolicy());
123 }
124 
DangerousSetEscapePC(uint64_t escapepc)125 void PolicyCompiler::DangerousSetEscapePC(uint64_t escapepc) {
126   escapepc_ = escapepc;
127 }
128 
SetPanicFunc(PanicFunc panic_func)129 void PolicyCompiler::SetPanicFunc(PanicFunc panic_func) {
130   panic_func_ = panic_func;
131 }
132 
AssemblePolicy()133 CodeGen::Node PolicyCompiler::AssemblePolicy() {
134   // A compiled policy consists of three logical parts:
135   //   1. Check that the "arch" field matches the expected architecture.
136   //   2. If the policy involves unsafe traps, check if the syscall was
137   //      invoked by Syscall::Call, and then allow it unconditionally.
138   //   3. Check the system call number and jump to the appropriate compiled
139   //      system call policy number.
140   return CheckArch(MaybeAddEscapeHatch(DispatchSyscall()));
141 }
142 
CheckArch(CodeGen::Node passed)143 CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) {
144   // If the architecture doesn't match SECCOMP_ARCH, disallow the
145   // system call.
146   return gen_.MakeInstruction(
147       BPF_LD + BPF_W + BPF_ABS, SECCOMP_ARCH_IDX,
148       gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_ARCH, passed,
149                            CompileResult(panic_func_(
150                                "Invalid audit architecture in BPF filter"))));
151 }
152 
MaybeAddEscapeHatch(CodeGen::Node rest)153 CodeGen::Node PolicyCompiler::MaybeAddEscapeHatch(CodeGen::Node rest) {
154   // If no unsafe traps, then simply return |rest|.
155   if (!has_unsafe_traps_) {
156     return rest;
157   }
158 
159   // We already enabled unsafe traps in Compile, but enable them again to give
160   // the trap registry a second chance to complain before we add the backdoor.
161   CHECK(registry_->EnableUnsafeTraps());
162 
163   // Allow system calls, if they originate from our magic return address.
164   const uint32_t lopc = static_cast<uint32_t>(escapepc_);
165   const uint32_t hipc = static_cast<uint32_t>(escapepc_ >> 32);
166 
167   // BPF cannot do native 64-bit comparisons, so we have to compare
168   // both 32-bit halves of the instruction pointer. If they match what
169   // we expect, we return ERR_ALLOWED. If either or both don't match,
170   // we continue evalutating the rest of the sandbox policy.
171   //
172   // For simplicity, we check the full 64-bit instruction pointer even
173   // on 32-bit architectures.
174   return gen_.MakeInstruction(
175       BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_LSB_IDX,
176       gen_.MakeInstruction(
177           BPF_JMP + BPF_JEQ + BPF_K, lopc,
178           gen_.MakeInstruction(
179               BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_MSB_IDX,
180               gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, hipc,
181                                    CompileResult(Allow()), rest)),
182           rest));
183 }
184 
DispatchSyscall()185 CodeGen::Node PolicyCompiler::DispatchSyscall() {
186   // Evaluate all possible system calls and group their Nodes into
187   // ranges of identical codes.
188   Ranges ranges;
189   FindRanges(&ranges);
190 
191   // Compile the system call ranges to an optimized BPF jumptable
192   CodeGen::Node jumptable = AssembleJumpTable(ranges.begin(), ranges.end());
193 
194   // Grab the system call number, so that we can check it and then
195   // execute the jump table.
196   return gen_.MakeInstruction(
197       BPF_LD + BPF_W + BPF_ABS, SECCOMP_NR_IDX, CheckSyscallNumber(jumptable));
198 }
199 
CheckSyscallNumber(CodeGen::Node passed)200 CodeGen::Node PolicyCompiler::CheckSyscallNumber(CodeGen::Node passed) {
201   if (kIsIntel) {
202     // On Intel architectures, verify that system call numbers are in the
203     // expected number range.
204     CodeGen::Node invalidX32 =
205         CompileResult(panic_func_("Illegal mixing of system call ABIs"));
206     if (kIsX32) {
207       // The newer x32 API always sets bit 30.
208       return gen_.MakeInstruction(
209           BPF_JMP + BPF_JSET + BPF_K, 0x40000000, passed, invalidX32);
210     } else {
211       // The older i386 and x86-64 APIs clear bit 30 on all system calls.
212       return gen_.MakeInstruction(
213           BPF_JMP + BPF_JSET + BPF_K, 0x40000000, invalidX32, passed);
214     }
215   }
216 
217   // TODO(mdempsky): Similar validation for other architectures?
218   return passed;
219 }
220 
FindRanges(Ranges * ranges)221 void PolicyCompiler::FindRanges(Ranges* ranges) {
222   // Please note that "struct seccomp_data" defines system calls as a signed
223   // int32_t, but BPF instructions always operate on unsigned quantities. We
224   // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
225   // and then verifying that the rest of the number range (both positive and
226   // negative) all return the same Node.
227   const CodeGen::Node invalid_node = CompileResult(policy_->InvalidSyscall());
228   uint32_t old_sysnum = 0;
229   CodeGen::Node old_node =
230       SyscallSet::IsValid(old_sysnum)
231           ? CompileResult(policy_->EvaluateSyscall(old_sysnum))
232           : invalid_node;
233 
234   for (uint32_t sysnum : SyscallSet::All()) {
235     CodeGen::Node node =
236         SyscallSet::IsValid(sysnum)
237             ? CompileResult(policy_->EvaluateSyscall(static_cast<int>(sysnum)))
238             : invalid_node;
239     // N.B., here we rely on CodeGen folding (i.e., returning the same
240     // node value for) identical code sequences, otherwise our jump
241     // table will blow up in size.
242     if (node != old_node) {
243       ranges->push_back(Range{old_sysnum, old_node});
244       old_sysnum = sysnum;
245       old_node = node;
246     }
247   }
248   ranges->push_back(Range{old_sysnum, old_node});
249 }
250 
AssembleJumpTable(Ranges::const_iterator start,Ranges::const_iterator stop)251 CodeGen::Node PolicyCompiler::AssembleJumpTable(Ranges::const_iterator start,
252                                                 Ranges::const_iterator stop) {
253   // We convert the list of system call ranges into jump table that performs
254   // a binary search over the ranges.
255   // As a sanity check, we need to have at least one distinct ranges for us
256   // to be able to build a jump table.
257   CHECK(start < stop) << "Invalid iterator range";
258   const auto n = stop - start;
259   if (n == 1) {
260     // If we have narrowed things down to a single range object, we can
261     // return from the BPF filter program.
262     return start->node;
263   }
264 
265   // Pick the range object that is located at the mid point of our list.
266   // We compare our system call number against the lowest valid system call
267   // number in this range object. If our number is lower, it is outside of
268   // this range object. If it is greater or equal, it might be inside.
269   Ranges::const_iterator mid = start + n / 2;
270 
271   // Sub-divide the list of ranges and continue recursively.
272   CodeGen::Node jf = AssembleJumpTable(start, mid);
273   CodeGen::Node jt = AssembleJumpTable(mid, stop);
274   return gen_.MakeInstruction(BPF_JMP + BPF_JGE + BPF_K, mid->from, jt, jf);
275 }
276 
CompileResult(const ResultExpr & res)277 CodeGen::Node PolicyCompiler::CompileResult(const ResultExpr& res) {
278   return res->Compile(this);
279 }
280 
MaskedEqual(int argno,size_t width,uint64_t mask,uint64_t value,CodeGen::Node passed,CodeGen::Node failed)281 CodeGen::Node PolicyCompiler::MaskedEqual(int argno,
282                                           size_t width,
283                                           uint64_t mask,
284                                           uint64_t value,
285                                           CodeGen::Node passed,
286                                           CodeGen::Node failed) {
287   // Sanity check that arguments make sense.
288   CHECK(argno >= 0 && argno < 6) << "Invalid argument number " << argno;
289   CHECK(width == 4 || width == 8) << "Invalid argument width " << width;
290   CHECK_NE(0U, mask) << "Zero mask is invalid";
291   CHECK_EQ(value, value & mask) << "Value contains masked out bits";
292   if (sizeof(void*) == 4) {
293     CHECK_EQ(4U, width) << "Invalid width on 32-bit platform";
294   }
295   if (width == 4) {
296     CHECK_EQ(0U, mask >> 32) << "Mask exceeds argument size";
297     CHECK_EQ(0U, value >> 32) << "Value exceeds argument size";
298   }
299 
300   // We want to emit code to check "(arg & mask) == value" where arg, mask, and
301   // value are 64-bit values, but the BPF machine is only 32-bit. We implement
302   // this by independently testing the upper and lower 32-bits and continuing to
303   // |passed| if both evaluate true, or to |failed| if either evaluate false.
304   return MaskedEqualHalf(argno, width, mask, value, ArgHalf::UPPER,
305                          MaskedEqualHalf(argno, width, mask, value,
306                                          ArgHalf::LOWER, passed, failed),
307                          failed);
308 }
309 
MaskedEqualHalf(int argno,size_t width,uint64_t full_mask,uint64_t full_value,ArgHalf half,CodeGen::Node passed,CodeGen::Node failed)310 CodeGen::Node PolicyCompiler::MaskedEqualHalf(int argno,
311                                               size_t width,
312                                               uint64_t full_mask,
313                                               uint64_t full_value,
314                                               ArgHalf half,
315                                               CodeGen::Node passed,
316                                               CodeGen::Node failed) {
317   if (width == 4 && half == ArgHalf::UPPER) {
318     // Special logic for sanity checking the upper 32-bits of 32-bit system
319     // call arguments.
320 
321     // TODO(mdempsky): Compile Unexpected64bitArgument() just per program.
322     CodeGen::Node invalid_64bit = Unexpected64bitArgument();
323 
324     const uint32_t upper = SECCOMP_ARG_MSB_IDX(argno);
325     const uint32_t lower = SECCOMP_ARG_LSB_IDX(argno);
326 
327     if (sizeof(void*) == 4) {
328       // On 32-bit platforms, the upper 32-bits should always be 0:
329       //   LDW  [upper]
330       //   JEQ  0, passed, invalid
331       return gen_.MakeInstruction(
332           BPF_LD + BPF_W + BPF_ABS,
333           upper,
334           gen_.MakeInstruction(
335               BPF_JMP + BPF_JEQ + BPF_K, 0, passed, invalid_64bit));
336     }
337 
338     // On 64-bit platforms, the upper 32-bits may be 0 or ~0; but we only allow
339     // ~0 if the sign bit of the lower 32-bits is set too:
340     //   LDW  [upper]
341     //   JEQ  0, passed, (next)
342     //   JEQ  ~0, (next), invalid
343     //   LDW  [lower]
344     //   JSET (1<<31), passed, invalid
345     //
346     // TODO(mdempsky): The JSET instruction could perhaps jump to passed->next
347     // instead, as the first instruction of passed should be "LDW [lower]".
348     return gen_.MakeInstruction(
349         BPF_LD + BPF_W + BPF_ABS,
350         upper,
351         gen_.MakeInstruction(
352             BPF_JMP + BPF_JEQ + BPF_K,
353             0,
354             passed,
355             gen_.MakeInstruction(
356                 BPF_JMP + BPF_JEQ + BPF_K,
357                 std::numeric_limits<uint32_t>::max(),
358                 gen_.MakeInstruction(
359                     BPF_LD + BPF_W + BPF_ABS,
360                     lower,
361                     gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K,
362                                          1U << 31,
363                                          passed,
364                                          invalid_64bit)),
365                 invalid_64bit)));
366   }
367 
368   const uint32_t idx = (half == ArgHalf::UPPER) ? SECCOMP_ARG_MSB_IDX(argno)
369                                                 : SECCOMP_ARG_LSB_IDX(argno);
370   const uint32_t mask = (half == ArgHalf::UPPER) ? full_mask >> 32 : full_mask;
371   const uint32_t value =
372       (half == ArgHalf::UPPER) ? full_value >> 32 : full_value;
373 
374   // Emit a suitable instruction sequence for (arg & mask) == value.
375 
376   // For (arg & 0) == 0, just return passed.
377   if (mask == 0) {
378     CHECK_EQ(0U, value);
379     return passed;
380   }
381 
382   // For (arg & ~0) == value, emit:
383   //   LDW  [idx]
384   //   JEQ  value, passed, failed
385   if (mask == std::numeric_limits<uint32_t>::max()) {
386     return gen_.MakeInstruction(
387         BPF_LD + BPF_W + BPF_ABS,
388         idx,
389         gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed));
390   }
391 
392   // For (arg & mask) == 0, emit:
393   //   LDW  [idx]
394   //   JSET mask, failed, passed
395   // (Note: failed and passed are intentionally swapped.)
396   if (value == 0) {
397     return gen_.MakeInstruction(
398         BPF_LD + BPF_W + BPF_ABS,
399         idx,
400         gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, failed, passed));
401   }
402 
403   // For (arg & x) == x where x is a single-bit value, emit:
404   //   LDW  [idx]
405   //   JSET mask, passed, failed
406   if (mask == value && HasExactlyOneBit(mask)) {
407     return gen_.MakeInstruction(
408         BPF_LD + BPF_W + BPF_ABS,
409         idx,
410         gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, passed, failed));
411   }
412 
413   // Generic fallback:
414   //   LDW  [idx]
415   //   AND  mask
416   //   JEQ  value, passed, failed
417   return gen_.MakeInstruction(
418       BPF_LD + BPF_W + BPF_ABS,
419       idx,
420       gen_.MakeInstruction(
421           BPF_ALU + BPF_AND + BPF_K,
422           mask,
423           gen_.MakeInstruction(
424               BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed)));
425 }
426 
Unexpected64bitArgument()427 CodeGen::Node PolicyCompiler::Unexpected64bitArgument() {
428   return CompileResult(panic_func_("Unexpected 64bit argument detected"));
429 }
430 
Return(uint32_t ret)431 CodeGen::Node PolicyCompiler::Return(uint32_t ret) {
432   if (has_unsafe_traps_ && (ret & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
433     // When inside an UnsafeTrap() callback, we want to allow all system calls.
434     // This means, we must conditionally disable the sandbox -- and that's not
435     // something that kernel-side BPF filters can do, as they cannot inspect
436     // any state other than the syscall arguments.
437     // But if we redirect all error handlers to user-space, then we can easily
438     // make this decision.
439     // The performance penalty for this extra round-trip to user-space is not
440     // actually that bad, as we only ever pay it for denied system calls; and a
441     // typical program has very few of these.
442     return Trap(ReturnErrno, reinterpret_cast<void*>(ret & SECCOMP_RET_DATA),
443                 true);
444   }
445 
446   return gen_.MakeInstruction(BPF_RET + BPF_K, ret);
447 }
448 
Trap(TrapRegistry::TrapFnc fnc,const void * aux,bool safe)449 CodeGen::Node PolicyCompiler::Trap(TrapRegistry::TrapFnc fnc,
450                                    const void* aux,
451                                    bool safe) {
452   uint16_t trap_id = registry_->Add(fnc, aux, safe);
453   return gen_.MakeInstruction(BPF_RET + BPF_K, SECCOMP_RET_TRAP + trap_id);
454 }
455 
IsRequiredForUnsafeTrap(int sysno)456 bool PolicyCompiler::IsRequiredForUnsafeTrap(int sysno) {
457   for (size_t i = 0; i < arraysize(kSyscallsRequiredForUnsafeTraps); ++i) {
458     if (sysno == kSyscallsRequiredForUnsafeTraps[i]) {
459       return true;
460     }
461   }
462   return false;
463 }
464 
465 }  // namespace bpf_dsl
466 }  // namespace sandbox
467