1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
6
7 #include <errno.h>
8 #include <stdint.h>
9 #include <sys/prctl.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12
13 #include "base/compiler_specific.h"
14 #include "base/files/scoped_file.h"
15 #include "base/logging.h"
16 #include "base/macros.h"
17 #include "base/memory/scoped_ptr.h"
18 #include "base/posix/eintr_wrapper.h"
19 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
20 #include "sandbox/linux/bpf_dsl/codegen.h"
21 #include "sandbox/linux/bpf_dsl/policy.h"
22 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
23 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
24 #include "sandbox/linux/bpf_dsl/syscall_set.h"
25 #include "sandbox/linux/seccomp-bpf/die.h"
26 #include "sandbox/linux/seccomp-bpf/syscall.h"
27 #include "sandbox/linux/seccomp-bpf/trap.h"
28 #include "sandbox/linux/services/proc_util.h"
29 #include "sandbox/linux/services/syscall_wrappers.h"
30 #include "sandbox/linux/services/thread_helpers.h"
31 #include "sandbox/linux/system_headers/linux_filter.h"
32 #include "sandbox/linux/system_headers/linux_seccomp.h"
33 #include "sandbox/linux/system_headers/linux_syscalls.h"
34 #include "third_party/valgrind/valgrind.h"
35
36 namespace sandbox {
37
38 namespace {
39
IsRunningOnValgrind()40 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
41
IsSingleThreaded(int proc_fd)42 bool IsSingleThreaded(int proc_fd) {
43 return ThreadHelpers::IsSingleThreaded(proc_fd);
44 }
45
46 // Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
47 // prctl().
KernelSupportsSeccompBPF()48 bool KernelSupportsSeccompBPF() {
49 errno = 0;
50 const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
51
52 if (rv == -1 && EFAULT == errno) {
53 return true;
54 }
55 return false;
56 }
57
58 // LG introduced a buggy syscall, sys_set_media_ext, with the same number as
59 // seccomp. Return true if the current kernel has this buggy syscall.
60 //
61 // We want this to work with upcoming versions of seccomp, so we pass bogus
62 // flags that are unlikely to ever be used by the kernel. A normal kernel would
63 // return -EINVAL, but a buggy LG kernel would return 1.
KernelHasLGBug()64 bool KernelHasLGBug() {
65 #if defined(OS_ANDROID)
66 // sys_set_media will see this as NULL, which should be a safe (non-crashing)
67 // way to invoke it. A genuine seccomp syscall will see it as
68 // SECCOMP_SET_MODE_STRICT.
69 const unsigned int operation = 0;
70 // Chosen by fair dice roll. Guaranteed to be random.
71 const unsigned int flags = 0xf7a46a5c;
72 const int rv = sys_seccomp(operation, flags, nullptr);
73 // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
74 // to EINVAL), or at the very least return some kind of error (which would
75 // set rv to -1). Any other behavior indicates that whatever code received
76 // our syscall was not the real seccomp.
77 if (rv != -1) {
78 return true;
79 }
80 #endif // defined(OS_ANDROID)
81
82 return false;
83 }
84
85 // Check if the kernel supports seccomp-filter via the seccomp system call
86 // and the TSYNC feature to enable seccomp on all threads.
KernelSupportsSeccompTsync()87 bool KernelSupportsSeccompTsync() {
88 if (KernelHasLGBug()) {
89 return false;
90 }
91
92 errno = 0;
93 const int rv =
94 sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
95
96 if (rv == -1 && errno == EFAULT) {
97 return true;
98 } else {
99 // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
100 CHECK_EQ(-1, rv);
101 CHECK(ENOSYS == errno || EINVAL == errno);
102 return false;
103 }
104 }
105
EscapePC()106 uint64_t EscapePC() {
107 intptr_t rv = Syscall::Call(-1);
108 if (rv == -1 && errno == ENOSYS) {
109 return 0;
110 }
111 return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
112 }
113
SandboxPanicTrap(const struct arch_seccomp_data &,void * aux)114 intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
115 SANDBOX_DIE(static_cast<const char*>(aux));
116 }
117
SandboxPanic(const char * error)118 bpf_dsl::ResultExpr SandboxPanic(const char* error) {
119 return bpf_dsl::Trap(SandboxPanicTrap, error);
120 }
121
122 } // namespace
123
SandboxBPF(bpf_dsl::Policy * policy)124 SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
125 : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
126 }
127
~SandboxBPF()128 SandboxBPF::~SandboxBPF() {
129 }
130
131 // static
SupportsSeccompSandbox(SeccompLevel level)132 bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
133 // Never pretend to support seccomp with Valgrind, as it
134 // throws the tool off.
135 if (IsRunningOnValgrind()) {
136 return false;
137 }
138
139 switch (level) {
140 case SeccompLevel::SINGLE_THREADED:
141 return KernelSupportsSeccompBPF();
142 case SeccompLevel::MULTI_THREADED:
143 return KernelSupportsSeccompTsync();
144 }
145 NOTREACHED();
146 return false;
147 }
148
StartSandbox(SeccompLevel seccomp_level)149 bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
150 DCHECK(policy_);
151 CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
152 seccomp_level == SeccompLevel::MULTI_THREADED);
153
154 if (sandbox_has_started_) {
155 SANDBOX_DIE(
156 "Cannot repeatedly start sandbox. Create a separate Sandbox "
157 "object instead.");
158 return false;
159 }
160
161 if (!proc_fd_.is_valid()) {
162 SetProcFd(ProcUtil::OpenProc());
163 }
164
165 const bool supports_tsync = KernelSupportsSeccompTsync();
166
167 if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
168 // Wait for /proc/self/task/ to update if needed and assert the
169 // process is single threaded.
170 ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
171 } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
172 if (IsSingleThreaded(proc_fd_.get())) {
173 SANDBOX_DIE("Cannot start sandbox; "
174 "process may be single-threaded when reported as not");
175 return false;
176 }
177 if (!supports_tsync) {
178 SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
179 "filters for a threadgroup");
180 return false;
181 }
182 }
183
184 // We no longer need access to any files in /proc. We want to do this
185 // before installing the filters, just in case that our policy denies
186 // close().
187 if (proc_fd_.is_valid()) {
188 proc_fd_.reset();
189 }
190
191 // Install the filters.
192 InstallFilter(supports_tsync ||
193 seccomp_level == SeccompLevel::MULTI_THREADED);
194
195 return true;
196 }
197
SetProcFd(base::ScopedFD proc_fd)198 void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
199 proc_fd_.swap(proc_fd);
200 }
201
202 // static
IsValidSyscallNumber(int sysnum)203 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
204 return SyscallSet::IsValid(sysnum);
205 }
206
207 // static
IsRequiredForUnsafeTrap(int sysno)208 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
209 return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
210 }
211
212 // static
ForwardSyscall(const struct arch_seccomp_data & args)213 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
214 return Syscall::Call(
215 args.nr, static_cast<intptr_t>(args.args[0]),
216 static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
217 static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
218 static_cast<intptr_t>(args.args[5]));
219 }
220
AssembleFilter()221 CodeGen::Program SandboxBPF::AssembleFilter() {
222 DCHECK(policy_);
223
224 bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
225 if (Trap::SandboxDebuggingAllowedByUser()) {
226 compiler.DangerousSetEscapePC(EscapePC());
227 }
228 compiler.SetPanicFunc(SandboxPanic);
229 return compiler.Compile();
230 }
231
InstallFilter(bool must_sync_threads)232 void SandboxBPF::InstallFilter(bool must_sync_threads) {
233 // We want to be very careful in not imposing any requirements on the
234 // policies that are set with SetSandboxPolicy(). This means, as soon as
235 // the sandbox is active, we shouldn't be relying on libraries that could
236 // be making system calls. This, for example, means we should avoid
237 // using the heap and we should avoid using STL functions.
238 // Temporarily copy the contents of the "program" vector into a
239 // stack-allocated array; and then explicitly destroy that object.
240 // This makes sure we don't ex- or implicitly call new/delete after we
241 // installed the BPF filter program in the kernel. Depending on the
242 // system memory allocator that is in effect, these operators can result
243 // in system calls to things like munmap() or brk().
244 CodeGen::Program program = AssembleFilter();
245
246 struct sock_filter bpf[program.size()];
247 const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
248 bpf};
249 memcpy(bpf, &program[0], sizeof(bpf));
250 CodeGen::Program().swap(program); // vector swap trick
251
252 // Make an attempt to release memory that is no longer needed here, rather
253 // than in the destructor. Try to avoid as much as possible to presume of
254 // what will be possible to do in the new (sandboxed) execution environment.
255 policy_.reset();
256
257 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
258 SANDBOX_DIE("Kernel refuses to enable no-new-privs");
259 }
260
261 // Install BPF filter program. If the thread state indicates multi-threading
262 // support, then the kernel hass the seccomp system call. Otherwise, fall
263 // back on prctl, which requires the process to be single-threaded.
264 if (must_sync_threads) {
265 int rv =
266 sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
267 if (rv) {
268 SANDBOX_DIE(
269 "Kernel refuses to turn on and synchronize threads for BPF filters");
270 }
271 } else {
272 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
273 SANDBOX_DIE("Kernel refuses to turn on BPF filters");
274 }
275 }
276
277 sandbox_has_started_ = true;
278 }
279
280 } // namespace sandbox
281