1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/seccomp-bpf/syscall.h"
6 
7 #include <errno.h>
8 #include <stdint.h>
9 
10 #include "base/logging.h"
11 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
12 
13 namespace sandbox {
14 
15 namespace {
16 
17 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
18     defined(ARCH_CPU_MIPS_FAMILY)
19 // Number that's not currently used by any Linux kernel ABIs.
20 const int kInvalidSyscallNumber = 0x351d3;
21 #else
22 #error Unrecognized architecture
23 #endif
24 
25 asm(// We need to be able to tell the kernel exactly where we made a
26     // system call. The C++ compiler likes to sometimes clone or
27     // inline code, which would inadvertently end up duplicating
28     // the entry point.
29     // "gcc" can suppress code duplication with suitable function
30     // attributes, but "clang" doesn't have this ability.
31     // The "clang" developer mailing list suggested that the correct
32     // and portable solution is a file-scope assembly block.
33     // N.B. We do mark our code as a proper function so that backtraces
34     // work correctly. But we make absolutely no attempt to use the
35     // ABI's calling conventions for passing arguments. We will only
36     // ever be called from assembly code and thus can pick more
37     // suitable calling conventions.
38 #if defined(__i386__)
39     ".text\n"
40     ".align 16, 0x90\n"
41     ".type SyscallAsm, @function\n"
42     "SyscallAsm:.cfi_startproc\n"
43     // Check if "%eax" is negative. If so, do not attempt to make a
44     // system call. Instead, compute the return address that is visible
45     // to the kernel after we execute "int $0x80". This address can be
46     // used as a marker that BPF code inspects.
47     "test %eax, %eax\n"
48     "jge  1f\n"
49     // Always, make sure that our code is position-independent, or
50     // address space randomization might not work on i386. This means,
51     // we can't use "lea", but instead have to rely on "call/pop".
52     "call 0f;   .cfi_adjust_cfa_offset  4\n"
53     "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
54     "addl $2f-0b, %eax\n"
55     "ret\n"
56     // Save register that we don't want to clobber. On i386, we need to
57     // save relatively aggressively, as there are a couple or registers
58     // that are used internally (e.g. %ebx for position-independent
59     // code, and %ebp for the frame pointer), and as we need to keep at
60     // least a few registers available for the register allocator.
61     "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
62     "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
63     "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
64     "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
65     // Copy entries from the array holding the arguments into the
66     // correct CPU registers.
67     "movl  0(%edi), %ebx\n"
68     "movl  4(%edi), %ecx\n"
69     "movl  8(%edi), %edx\n"
70     "movl 12(%edi), %esi\n"
71     "movl 20(%edi), %ebp\n"
72     "movl 16(%edi), %edi\n"
73     // Enter the kernel.
74     "int  $0x80\n"
75     // This is our "magic" return address that the BPF filter sees.
76     "2:"
77     // Restore any clobbered registers that we didn't declare to the
78     // compiler.
79     "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
80     "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
81     "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
82     "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
83     "ret\n"
84     ".cfi_endproc\n"
85     "9:.size SyscallAsm, 9b-SyscallAsm\n"
86 #elif defined(__x86_64__)
87     ".text\n"
88     ".align 16, 0x90\n"
89     ".type SyscallAsm, @function\n"
90     "SyscallAsm:.cfi_startproc\n"
91     // Check if "%rdi" is negative. If so, do not attempt to make a
92     // system call. Instead, compute the return address that is visible
93     // to the kernel after we execute "syscall". This address can be
94     // used as a marker that BPF code inspects.
95     "test %rdi, %rdi\n"
96     "jge  1f\n"
97     // Always make sure that our code is position-independent, or the
98     // linker will throw a hissy fit on x86-64.
99     "lea 2f(%rip), %rax\n"
100     "ret\n"
101     // Now we load the registers used to pass arguments to the system
102     // call: system call number in %rax, and arguments in %rdi, %rsi,
103     // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers
104     // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no
105     // need to worry here about spilling registers or CFI directives.
106     "1:movq %rdi, %rax\n"
107     "movq  0(%rsi), %rdi\n"
108     "movq 16(%rsi), %rdx\n"
109     "movq 24(%rsi), %r10\n"
110     "movq 32(%rsi), %r8\n"
111     "movq 40(%rsi), %r9\n"
112     "movq  8(%rsi), %rsi\n"
113     // Enter the kernel.
114     "syscall\n"
115     // This is our "magic" return address that the BPF filter sees.
116     "2:ret\n"
117     ".cfi_endproc\n"
118     "9:.size SyscallAsm, 9b-SyscallAsm\n"
119 #elif defined(__arm__)
120     // Throughout this file, we use the same mode (ARM vs. thumb)
121     // that the C++ compiler uses. This means, when transfering control
122     // from C++ to assembly code, we do not need to switch modes (e.g.
123     // by using the "bx" instruction). It also means that our assembly
124     // code should not be invoked directly from code that lives in
125     // other compilation units, as we don't bother implementing thumb
126     // interworking. That's OK, as we don't make any of the assembly
127     // symbols public. They are all local to this file.
128     ".text\n"
129     ".align 2\n"
130     ".type SyscallAsm, %function\n"
131 #if defined(__thumb__)
132     ".thumb_func\n"
133 #else
134     ".arm\n"
135 #endif
136     "SyscallAsm:\n"
137 #if !defined(__native_client_nonsfi__)
138     // .fnstart and .fnend pseudo operations creates unwind table.
139     // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which
140     // is not provided by PNaCl toolchain. Disable it.
141     ".fnstart\n"
142 #endif
143     "@ args = 0, pretend = 0, frame = 8\n"
144     "@ frame_needed = 1, uses_anonymous_args = 0\n"
145 #if defined(__thumb__)
146     ".cfi_startproc\n"
147     "push {r7, lr}\n"
148     ".save {r7, lr}\n"
149     ".cfi_offset 14, -4\n"
150     ".cfi_offset  7, -8\n"
151     ".cfi_def_cfa_offset 8\n"
152 #else
153     "stmfd sp!, {fp, lr}\n"
154     "add fp, sp, #4\n"
155 #endif
156     // Check if "r0" is negative. If so, do not attempt to make a
157     // system call. Instead, compute the return address that is visible
158     // to the kernel after we execute "swi 0". This address can be
159     // used as a marker that BPF code inspects.
160     "cmp r0, #0\n"
161     "bge 1f\n"
162     "adr r0, 2f\n"
163     "b   2f\n"
164     // We declared (almost) all clobbered registers to the compiler. On
165     // ARM there is no particular register pressure. So, we can go
166     // ahead and directly copy the entries from the arguments array
167     // into the appropriate CPU registers.
168     "1:ldr r5, [r6, #20]\n"
169     "ldr r4, [r6, #16]\n"
170     "ldr r3, [r6, #12]\n"
171     "ldr r2, [r6, #8]\n"
172     "ldr r1, [r6, #4]\n"
173     "mov r7, r0\n"
174     "ldr r0, [r6, #0]\n"
175     // Enter the kernel
176     "swi 0\n"
177 // Restore the frame pointer. Also restore the program counter from
178 // the link register; this makes us return to the caller.
179 #if defined(__thumb__)
180     "2:pop {r7, pc}\n"
181     ".cfi_endproc\n"
182 #else
183     "2:ldmfd sp!, {fp, pc}\n"
184 #endif
185 #if !defined(__native_client_nonsfi__)
186     // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment,
187     // for more details.
188     ".fnend\n"
189 #endif
190     "9:.size SyscallAsm, 9b-SyscallAsm\n"
191 #elif defined(__mips__)
192     ".text\n"
193     ".option pic2\n"
194     ".align 4\n"
195     ".global SyscallAsm\n"
196     ".type SyscallAsm, @function\n"
197     "SyscallAsm:.ent SyscallAsm\n"
198     ".frame  $sp, 40, $ra\n"
199     ".set   push\n"
200     ".set   noreorder\n"
201     ".cpload $t9\n"
202     "addiu  $sp, $sp, -40\n"
203     "sw     $ra, 36($sp)\n"
204     // Check if "v0" is negative. If so, do not attempt to make a
205     // system call. Instead, compute the return address that is visible
206     // to the kernel after we execute "syscall". This address can be
207     // used as a marker that BPF code inspects.
208     "bgez   $v0, 1f\n"
209     " nop\n"
210     // This is equivalent to "la $v0, 2f".
211     // LA macro has to be avoided since LLVM-AS has issue with LA in PIC mode
212     // https://llvm.org/bugs/show_bug.cgi?id=27644
213     "lw     $v0, %got(2f)($gp)\n"
214     "addiu  $v0, $v0, %lo(2f)\n"
215     "b      2f\n"
216     " nop\n"
217     // On MIPS first four arguments go to registers a0 - a3 and any
218     // argument after that goes to stack. We can go ahead and directly
219     // copy the entries from the arguments array into the appropriate
220     // CPU registers and on the stack.
221     "1:lw     $a3, 28($a0)\n"
222     "lw     $a2, 24($a0)\n"
223     "lw     $a1, 20($a0)\n"
224     "lw     $t0, 16($a0)\n"
225     "sw     $a3, 28($sp)\n"
226     "sw     $a2, 24($sp)\n"
227     "sw     $a1, 20($sp)\n"
228     "sw     $t0, 16($sp)\n"
229     "lw     $a3, 12($a0)\n"
230     "lw     $a2, 8($a0)\n"
231     "lw     $a1, 4($a0)\n"
232     "lw     $a0, 0($a0)\n"
233     // Enter the kernel
234     "syscall\n"
235     // This is our "magic" return address that the BPF filter sees.
236     // Restore the return address from the stack.
237     "2:lw     $ra, 36($sp)\n"
238     "jr     $ra\n"
239     " addiu  $sp, $sp, 40\n"
240     ".set    pop\n"
241     ".end    SyscallAsm\n"
242     ".size   SyscallAsm,.-SyscallAsm\n"
243 #elif defined(__aarch64__)
244     ".text\n"
245     ".align 2\n"
246     ".type SyscallAsm, %function\n"
247     "SyscallAsm:\n"
248     ".cfi_startproc\n"
249     "cmp x0, #0\n"
250     "b.ge 1f\n"
251     "adr x0,2f\n"
252     "b 2f\n"
253     "1:ldr x5, [x6, #40]\n"
254     "ldr x4, [x6, #32]\n"
255     "ldr x3, [x6, #24]\n"
256     "ldr x2, [x6, #16]\n"
257     "ldr x1, [x6, #8]\n"
258     "mov x8, x0\n"
259     "ldr x0, [x6, #0]\n"
260     // Enter the kernel
261     "svc 0\n"
262     "2:ret\n"
263     ".cfi_endproc\n"
264     ".size SyscallAsm, .-SyscallAsm\n"
265 #endif
266     );  // asm
267 
268 #if defined(__x86_64__)
269 extern "C" {
270 intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]);
271 }
272 #elif defined(__mips__)
273 extern "C" {
274 intptr_t SyscallAsm(intptr_t nr, const intptr_t args[8]);
275 }
276 #endif
277 
278 }  // namespace
279 
InvalidCall()280 intptr_t Syscall::InvalidCall() {
281   // Explicitly pass eight zero arguments just in case.
282   return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
283 }
284 
Call(int nr,intptr_t p0,intptr_t p1,intptr_t p2,intptr_t p3,intptr_t p4,intptr_t p5,intptr_t p6,intptr_t p7)285 intptr_t Syscall::Call(int nr,
286                        intptr_t p0,
287                        intptr_t p1,
288                        intptr_t p2,
289                        intptr_t p3,
290                        intptr_t p4,
291                        intptr_t p5,
292                        intptr_t p6,
293                        intptr_t p7) {
294   // We rely on "intptr_t" to be the exact size as a "void *". This is
295   // typically true, but just in case, we add a check. The language
296   // specification allows platforms some leeway in cases, where
297   // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
298   // that this would only be an issue for IA64, which we are currently not
299   // planning on supporting. And it is even possible that this would work
300   // on IA64, but for lack of actual hardware, I cannot test.
301   static_assert(sizeof(void*) == sizeof(intptr_t),
302                 "pointer types and intptr_t must be exactly the same size");
303 
304   // TODO(nedeljko): Enable use of more than six parameters on architectures
305   //                 where that makes sense.
306 #if defined(__mips__)
307   const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
308 #else
309   DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
310                       "added for this architecture";
311   DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
312                       "added for this architecture";
313   const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
314 #endif  // defined(__mips__)
315 
316 // Invoke our file-scope assembly code. The constraints have been picked
317 // carefully to match what the rest of the assembly code expects in input,
318 // output, and clobbered registers.
319 #if defined(__i386__)
320   intptr_t ret = nr;
321   asm volatile(
322       "call SyscallAsm\n"
323       // N.B. These are not the calling conventions normally used by the ABI.
324       : "=a"(ret)
325       : "0"(ret), "D"(args)
326       : "cc", "esp", "memory", "ecx", "edx");
327 #elif defined(__x86_64__)
328   intptr_t ret = SyscallAsm(nr, args);
329 #elif defined(__arm__)
330   intptr_t ret;
331   {
332     register intptr_t inout __asm__("r0") = nr;
333     register const intptr_t* data __asm__("r6") = args;
334     asm volatile(
335         "bl SyscallAsm\n"
336         // N.B. These are not the calling conventions normally used by the ABI.
337         : "=r"(inout)
338         : "0"(inout), "r"(data)
339         : "cc",
340           "lr",
341           "memory",
342           "r1",
343           "r2",
344           "r3",
345           "r4",
346           "r5"
347 #if !defined(__thumb__)
348           // In thumb mode, we cannot use "r7" as a general purpose register, as
349           // it is our frame pointer. We have to manually manage and preserve
350           // it.
351           // In ARM mode, we have a dedicated frame pointer register and "r7" is
352           // thus available as a general purpose register. We don't preserve it,
353           // but instead mark it as clobbered.
354           ,
355           "r7"
356 #endif  // !defined(__thumb__)
357         );
358     ret = inout;
359   }
360 #elif defined(__mips__)
361   int err_status;
362   intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
363 
364   if (err_status) {
365     // On error, MIPS returns errno from syscall instead of -errno.
366     // The purpose of this negation is for SandboxSyscall() to behave
367     // more like it would on other architectures.
368     ret = -ret;
369   }
370 #elif defined(__aarch64__)
371   intptr_t ret;
372   {
373     register intptr_t inout __asm__("x0") = nr;
374     register const intptr_t* data __asm__("x6") = args;
375     asm volatile("bl SyscallAsm\n"
376                  : "=r"(inout)
377                  : "0"(inout), "r"(data)
378                  : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
379     ret = inout;
380   }
381 
382 #else
383 #error "Unimplemented architecture"
384 #endif
385   return ret;
386 }
387 
PutValueInUcontext(intptr_t ret_val,ucontext_t * ctx)388 void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
389 #if defined(__mips__)
390   // Mips ABI states that on error a3 CPU register has non zero value and if
391   // there is no error, it should be zero.
392   if (ret_val <= -1 && ret_val >= -4095) {
393     // |ret_val| followes the Syscall::Call() convention of being -errno on
394     // errors. In order to write correct value to return register this sign
395     // needs to be changed back.
396     ret_val = -ret_val;
397     SECCOMP_PARM4(ctx) = 1;
398   } else
399     SECCOMP_PARM4(ctx) = 0;
400 #endif
401   SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
402 }
403 
404 #if defined(__mips__)
SandboxSyscallRaw(int nr,const intptr_t * args,intptr_t * err_ret)405 intptr_t Syscall::SandboxSyscallRaw(int nr,
406                                     const intptr_t* args,
407                                     intptr_t* err_ret) {
408   register intptr_t ret __asm__("v0") = nr;
409   register intptr_t syscallasm __asm__("t9") = (intptr_t) &SyscallAsm;
410   // a3 register becomes non zero on error.
411   register intptr_t err_stat __asm__("a3") = 0;
412   {
413     register const intptr_t* data __asm__("a0") = args;
414     asm volatile(
415         "jalr $t9\n"
416         " nop\n"
417         : "=r"(ret), "=r"(err_stat)
418         : "0"(ret),
419           "r"(data),
420           "r"(syscallasm)
421           // a2 is in the clober list so inline assembly can not change its
422           // value.
423         : "memory", "ra", "a2");
424   }
425 
426   // Set an error status so it can be used outside of this function
427   *err_ret = err_stat;
428 
429   return ret;
430 }
431 #endif  // defined(__mips__)
432 
433 }  // namespace sandbox
434