1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
6 
7 #include <errno.h>
8 #include <stdint.h>
9 #include <sys/prctl.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 
13 #include "base/compiler_specific.h"
14 #include "base/files/scoped_file.h"
15 #include "base/logging.h"
16 #include "base/macros.h"
17 #include "base/posix/eintr_wrapper.h"
18 #include "base/third_party/valgrind/valgrind.h"
19 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
20 #include "sandbox/linux/bpf_dsl/codegen.h"
21 #include "sandbox/linux/bpf_dsl/policy.h"
22 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
23 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
24 #include "sandbox/linux/bpf_dsl/syscall_set.h"
25 #include "sandbox/linux/seccomp-bpf/die.h"
26 #include "sandbox/linux/seccomp-bpf/syscall.h"
27 #include "sandbox/linux/seccomp-bpf/trap.h"
28 #include "sandbox/linux/services/proc_util.h"
29 #include "sandbox/linux/services/syscall_wrappers.h"
30 #include "sandbox/linux/services/thread_helpers.h"
31 #include "sandbox/linux/system_headers/linux_filter.h"
32 #include "sandbox/linux/system_headers/linux_seccomp.h"
33 #include "sandbox/linux/system_headers/linux_syscalls.h"
34 
35 namespace sandbox {
36 
37 namespace {
38 
IsRunningOnValgrind()39 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
40 
IsSingleThreaded(int proc_fd)41 bool IsSingleThreaded(int proc_fd) {
42   return ThreadHelpers::IsSingleThreaded(proc_fd);
43 }
44 
45 // Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
46 // prctl().
KernelSupportsSeccompBPF()47 bool KernelSupportsSeccompBPF() {
48   errno = 0;
49   const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
50 
51   if (rv == -1 && EFAULT == errno) {
52     return true;
53   }
54   return false;
55 }
56 
57 // LG introduced a buggy syscall, sys_set_media_ext, with the same number as
58 // seccomp. Return true if the current kernel has this buggy syscall.
59 //
60 // We want this to work with upcoming versions of seccomp, so we pass bogus
61 // flags that are unlikely to ever be used by the kernel. A normal kernel would
62 // return -EINVAL, but a buggy LG kernel would return 1.
KernelHasLGBug()63 bool KernelHasLGBug() {
64 #if defined(OS_ANDROID)
65   // sys_set_media will see this as NULL, which should be a safe (non-crashing)
66   // way to invoke it. A genuine seccomp syscall will see it as
67   // SECCOMP_SET_MODE_STRICT.
68   const unsigned int operation = 0;
69   // Chosen by fair dice roll. Guaranteed to be random.
70   const unsigned int flags = 0xf7a46a5c;
71   const int rv = sys_seccomp(operation, flags, nullptr);
72   // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
73   // to EINVAL), or at the very least return some kind of error (which would
74   // set rv to -1). Any other behavior indicates that whatever code received
75   // our syscall was not the real seccomp.
76   if (rv != -1) {
77     return true;
78   }
79 #endif  // defined(OS_ANDROID)
80 
81   return false;
82 }
83 
84 // Check if the kernel supports seccomp-filter via the seccomp system call
85 // and the TSYNC feature to enable seccomp on all threads.
KernelSupportsSeccompTsync()86 bool KernelSupportsSeccompTsync() {
87   if (KernelHasLGBug()) {
88     return false;
89   }
90 
91   errno = 0;
92   const int rv =
93       sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
94 
95   if (rv == -1 && errno == EFAULT) {
96     return true;
97   } else {
98     // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
99     CHECK_EQ(-1, rv);
100     CHECK(ENOSYS == errno || EINVAL == errno);
101     return false;
102   }
103 }
104 
EscapePC()105 uint64_t EscapePC() {
106   intptr_t rv = Syscall::Call(-1);
107   if (rv == -1 && errno == ENOSYS) {
108     return 0;
109   }
110   return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
111 }
112 
SandboxPanicTrap(const struct arch_seccomp_data &,void * aux)113 intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
114   SANDBOX_DIE(static_cast<const char*>(aux));
115 }
116 
SandboxPanic(const char * error)117 bpf_dsl::ResultExpr SandboxPanic(const char* error) {
118   return bpf_dsl::Trap(SandboxPanicTrap, error);
119 }
120 
121 }  // namespace
122 
SandboxBPF(bpf_dsl::Policy * policy)123 SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
124     : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
125 }
126 
~SandboxBPF()127 SandboxBPF::~SandboxBPF() {
128 }
129 
130 // static
SupportsSeccompSandbox(SeccompLevel level)131 bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
132   // Never pretend to support seccomp with Valgrind, as it
133   // throws the tool off.
134   if (IsRunningOnValgrind()) {
135     return false;
136   }
137 
138   switch (level) {
139     case SeccompLevel::SINGLE_THREADED:
140       return KernelSupportsSeccompBPF();
141     case SeccompLevel::MULTI_THREADED:
142       return KernelSupportsSeccompTsync();
143   }
144   NOTREACHED();
145   return false;
146 }
147 
StartSandbox(SeccompLevel seccomp_level)148 bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
149   DCHECK(policy_);
150   CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
151         seccomp_level == SeccompLevel::MULTI_THREADED);
152 
153   if (sandbox_has_started_) {
154     SANDBOX_DIE(
155         "Cannot repeatedly start sandbox. Create a separate Sandbox "
156         "object instead.");
157     return false;
158   }
159 
160   if (!proc_fd_.is_valid()) {
161     SetProcFd(ProcUtil::OpenProc());
162   }
163 
164   const bool supports_tsync = KernelSupportsSeccompTsync();
165 
166   if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
167     // Wait for /proc/self/task/ to update if needed and assert the
168     // process is single threaded.
169     ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
170   } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
171     if (IsSingleThreaded(proc_fd_.get())) {
172       SANDBOX_DIE("Cannot start sandbox; "
173                   "process may be single-threaded when reported as not");
174       return false;
175     }
176     if (!supports_tsync) {
177       SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
178                   "filters for a threadgroup");
179       return false;
180     }
181   }
182 
183   // We no longer need access to any files in /proc. We want to do this
184   // before installing the filters, just in case that our policy denies
185   // close().
186   if (proc_fd_.is_valid()) {
187     proc_fd_.reset();
188   }
189 
190   // Install the filters.
191   InstallFilter(supports_tsync ||
192                 seccomp_level == SeccompLevel::MULTI_THREADED);
193 
194   return true;
195 }
196 
SetProcFd(base::ScopedFD proc_fd)197 void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
198   proc_fd_.swap(proc_fd);
199 }
200 
201 // static
IsValidSyscallNumber(int sysnum)202 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
203   return SyscallSet::IsValid(sysnum);
204 }
205 
206 // static
IsRequiredForUnsafeTrap(int sysno)207 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
208   return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
209 }
210 
211 // static
ForwardSyscall(const struct arch_seccomp_data & args)212 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
213   return Syscall::Call(
214       args.nr, static_cast<intptr_t>(args.args[0]),
215       static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
216       static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
217       static_cast<intptr_t>(args.args[5]));
218 }
219 
AssembleFilter()220 CodeGen::Program SandboxBPF::AssembleFilter() {
221   DCHECK(policy_);
222 
223   bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
224   if (Trap::SandboxDebuggingAllowedByUser()) {
225     compiler.DangerousSetEscapePC(EscapePC());
226   }
227   compiler.SetPanicFunc(SandboxPanic);
228   return compiler.Compile();
229 }
230 
InstallFilter(bool must_sync_threads)231 void SandboxBPF::InstallFilter(bool must_sync_threads) {
232   // We want to be very careful in not imposing any requirements on the
233   // policies that are set with SetSandboxPolicy(). This means, as soon as
234   // the sandbox is active, we shouldn't be relying on libraries that could
235   // be making system calls. This, for example, means we should avoid
236   // using the heap and we should avoid using STL functions.
237   // Temporarily copy the contents of the "program" vector into a
238   // stack-allocated array; and then explicitly destroy that object.
239   // This makes sure we don't ex- or implicitly call new/delete after we
240   // installed the BPF filter program in the kernel. Depending on the
241   // system memory allocator that is in effect, these operators can result
242   // in system calls to things like munmap() or brk().
243   CodeGen::Program program = AssembleFilter();
244 
245   struct sock_filter bpf[program.size()];
246   const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
247                                   bpf};
248   memcpy(bpf, &program[0], sizeof(bpf));
249   CodeGen::Program().swap(program);  // vector swap trick
250 
251   // Make an attempt to release memory that is no longer needed here, rather
252   // than in the destructor. Try to avoid as much as possible to presume of
253   // what will be possible to do in the new (sandboxed) execution environment.
254   policy_.reset();
255 
256   if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
257     SANDBOX_DIE("Kernel refuses to enable no-new-privs");
258   }
259 
260   // Install BPF filter program. If the thread state indicates multi-threading
261   // support, then the kernel hass the seccomp system call. Otherwise, fall
262   // back on prctl, which requires the process to be single-threaded.
263   if (must_sync_threads) {
264     int rv =
265         sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
266     if (rv) {
267       SANDBOX_DIE(
268           "Kernel refuses to turn on and synchronize threads for BPF filters");
269     }
270   } else {
271     if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
272       SANDBOX_DIE("Kernel refuses to turn on BPF filters");
273     }
274   }
275 
276   sandbox_has_started_ = true;
277 }
278 
279 }  // namespace sandbox
280