1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
6 
7 #include <errno.h>
8 #include <stdint.h>
9 #include <sys/prctl.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 
13 #include "base/compiler_specific.h"
14 #include "base/files/scoped_file.h"
15 #include "base/logging.h"
16 #include "base/macros.h"
17 #include "base/memory/scoped_ptr.h"
18 #include "base/posix/eintr_wrapper.h"
19 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
20 #include "sandbox/linux/bpf_dsl/codegen.h"
21 #include "sandbox/linux/bpf_dsl/policy.h"
22 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
23 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
24 #include "sandbox/linux/bpf_dsl/syscall_set.h"
25 #include "sandbox/linux/seccomp-bpf/die.h"
26 #include "sandbox/linux/seccomp-bpf/syscall.h"
27 #include "sandbox/linux/seccomp-bpf/trap.h"
28 #include "sandbox/linux/services/proc_util.h"
29 #include "sandbox/linux/services/syscall_wrappers.h"
30 #include "sandbox/linux/services/thread_helpers.h"
31 #include "sandbox/linux/system_headers/linux_filter.h"
32 #include "sandbox/linux/system_headers/linux_seccomp.h"
33 #include "sandbox/linux/system_headers/linux_syscalls.h"
34 #include "third_party/valgrind/valgrind.h"
35 
36 namespace sandbox {
37 
38 namespace {
39 
IsRunningOnValgrind()40 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
41 
IsSingleThreaded(int proc_fd)42 bool IsSingleThreaded(int proc_fd) {
43   return ThreadHelpers::IsSingleThreaded(proc_fd);
44 }
45 
46 // Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
47 // prctl().
KernelSupportsSeccompBPF()48 bool KernelSupportsSeccompBPF() {
49   errno = 0;
50   const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
51 
52   if (rv == -1 && EFAULT == errno) {
53     return true;
54   }
55   return false;
56 }
57 
58 // LG introduced a buggy syscall, sys_set_media_ext, with the same number as
59 // seccomp. Return true if the current kernel has this buggy syscall.
60 //
61 // We want this to work with upcoming versions of seccomp, so we pass bogus
62 // flags that are unlikely to ever be used by the kernel. A normal kernel would
63 // return -EINVAL, but a buggy LG kernel would return 1.
KernelHasLGBug()64 bool KernelHasLGBug() {
65 #if defined(OS_ANDROID)
66   // sys_set_media will see this as NULL, which should be a safe (non-crashing)
67   // way to invoke it. A genuine seccomp syscall will see it as
68   // SECCOMP_SET_MODE_STRICT.
69   const unsigned int operation = 0;
70   // Chosen by fair dice roll. Guaranteed to be random.
71   const unsigned int flags = 0xf7a46a5c;
72   const int rv = sys_seccomp(operation, flags, nullptr);
73   // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
74   // to EINVAL), or at the very least return some kind of error (which would
75   // set rv to -1). Any other behavior indicates that whatever code received
76   // our syscall was not the real seccomp.
77   if (rv != -1) {
78     return true;
79   }
80 #endif  // defined(OS_ANDROID)
81 
82   return false;
83 }
84 
85 // Check if the kernel supports seccomp-filter via the seccomp system call
86 // and the TSYNC feature to enable seccomp on all threads.
KernelSupportsSeccompTsync()87 bool KernelSupportsSeccompTsync() {
88   if (KernelHasLGBug()) {
89     return false;
90   }
91 
92   errno = 0;
93   const int rv =
94       sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
95 
96   if (rv == -1 && errno == EFAULT) {
97     return true;
98   } else {
99     // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
100     CHECK_EQ(-1, rv);
101     CHECK(ENOSYS == errno || EINVAL == errno);
102     return false;
103   }
104 }
105 
EscapePC()106 uint64_t EscapePC() {
107   intptr_t rv = Syscall::Call(-1);
108   if (rv == -1 && errno == ENOSYS) {
109     return 0;
110   }
111   return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
112 }
113 
SandboxPanicTrap(const struct arch_seccomp_data &,void * aux)114 intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
115   SANDBOX_DIE(static_cast<const char*>(aux));
116 }
117 
SandboxPanic(const char * error)118 bpf_dsl::ResultExpr SandboxPanic(const char* error) {
119   return bpf_dsl::Trap(SandboxPanicTrap, error);
120 }
121 
122 }  // namespace
123 
SandboxBPF(bpf_dsl::Policy * policy)124 SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
125     : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
126 }
127 
~SandboxBPF()128 SandboxBPF::~SandboxBPF() {
129 }
130 
131 // static
SupportsSeccompSandbox(SeccompLevel level)132 bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
133   // Never pretend to support seccomp with Valgrind, as it
134   // throws the tool off.
135   if (IsRunningOnValgrind()) {
136     return false;
137   }
138 
139   switch (level) {
140     case SeccompLevel::SINGLE_THREADED:
141       return KernelSupportsSeccompBPF();
142     case SeccompLevel::MULTI_THREADED:
143       return KernelSupportsSeccompTsync();
144   }
145   NOTREACHED();
146   return false;
147 }
148 
StartSandbox(SeccompLevel seccomp_level)149 bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
150   DCHECK(policy_);
151   CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
152         seccomp_level == SeccompLevel::MULTI_THREADED);
153 
154   if (sandbox_has_started_) {
155     SANDBOX_DIE(
156         "Cannot repeatedly start sandbox. Create a separate Sandbox "
157         "object instead.");
158     return false;
159   }
160 
161   if (!proc_fd_.is_valid()) {
162     SetProcFd(ProcUtil::OpenProc());
163   }
164 
165   const bool supports_tsync = KernelSupportsSeccompTsync();
166 
167   if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
168     // Wait for /proc/self/task/ to update if needed and assert the
169     // process is single threaded.
170     ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
171   } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
172     if (IsSingleThreaded(proc_fd_.get())) {
173       SANDBOX_DIE("Cannot start sandbox; "
174                   "process may be single-threaded when reported as not");
175       return false;
176     }
177     if (!supports_tsync) {
178       SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
179                   "filters for a threadgroup");
180       return false;
181     }
182   }
183 
184   // We no longer need access to any files in /proc. We want to do this
185   // before installing the filters, just in case that our policy denies
186   // close().
187   if (proc_fd_.is_valid()) {
188     proc_fd_.reset();
189   }
190 
191   // Install the filters.
192   InstallFilter(supports_tsync ||
193                 seccomp_level == SeccompLevel::MULTI_THREADED);
194 
195   return true;
196 }
197 
SetProcFd(base::ScopedFD proc_fd)198 void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
199   proc_fd_.swap(proc_fd);
200 }
201 
202 // static
IsValidSyscallNumber(int sysnum)203 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
204   return SyscallSet::IsValid(sysnum);
205 }
206 
207 // static
IsRequiredForUnsafeTrap(int sysno)208 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
209   return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
210 }
211 
212 // static
ForwardSyscall(const struct arch_seccomp_data & args)213 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
214   return Syscall::Call(
215       args.nr, static_cast<intptr_t>(args.args[0]),
216       static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
217       static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
218       static_cast<intptr_t>(args.args[5]));
219 }
220 
AssembleFilter()221 CodeGen::Program SandboxBPF::AssembleFilter() {
222   DCHECK(policy_);
223 
224   bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
225   if (Trap::SandboxDebuggingAllowedByUser()) {
226     compiler.DangerousSetEscapePC(EscapePC());
227   }
228   compiler.SetPanicFunc(SandboxPanic);
229   return compiler.Compile();
230 }
231 
InstallFilter(bool must_sync_threads)232 void SandboxBPF::InstallFilter(bool must_sync_threads) {
233   // We want to be very careful in not imposing any requirements on the
234   // policies that are set with SetSandboxPolicy(). This means, as soon as
235   // the sandbox is active, we shouldn't be relying on libraries that could
236   // be making system calls. This, for example, means we should avoid
237   // using the heap and we should avoid using STL functions.
238   // Temporarily copy the contents of the "program" vector into a
239   // stack-allocated array; and then explicitly destroy that object.
240   // This makes sure we don't ex- or implicitly call new/delete after we
241   // installed the BPF filter program in the kernel. Depending on the
242   // system memory allocator that is in effect, these operators can result
243   // in system calls to things like munmap() or brk().
244   CodeGen::Program program = AssembleFilter();
245 
246   struct sock_filter bpf[program.size()];
247   const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
248                                   bpf};
249   memcpy(bpf, &program[0], sizeof(bpf));
250   CodeGen::Program().swap(program);  // vector swap trick
251 
252   // Make an attempt to release memory that is no longer needed here, rather
253   // than in the destructor. Try to avoid as much as possible to presume of
254   // what will be possible to do in the new (sandboxed) execution environment.
255   policy_.reset();
256 
257   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
258     SANDBOX_DIE("Kernel refuses to enable no-new-privs");
259   }
260 
261   // Install BPF filter program. If the thread state indicates multi-threading
262   // support, then the kernel hass the seccomp system call. Otherwise, fall
263   // back on prctl, which requires the process to be single-threaded.
264   if (must_sync_threads) {
265     int rv =
266         sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
267     if (rv) {
268       SANDBOX_DIE(
269           "Kernel refuses to turn on and synchronize threads for BPF filters");
270     }
271   } else {
272     if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
273       SANDBOX_DIE("Kernel refuses to turn on BPF filters");
274     }
275   }
276 
277   sandbox_has_started_ = true;
278 }
279 
280 }  // namespace sandbox
281