1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include "debuggerd/handler.h"
30
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <inttypes.h>
34 #include <linux/futex.h>
35 #include <pthread.h>
36 #include <sched.h>
37 #include <signal.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sys/capability.h>
43 #include <sys/mman.h>
44 #include <sys/prctl.h>
45 #include <sys/socket.h>
46 #include <sys/syscall.h>
47 #include <sys/un.h>
48 #include <sys/wait.h>
49 #include <unistd.h>
50
51 #include "private/bionic_futex.h"
52 #include "private/libc_logging.h"
53
54 // see man(2) prctl, specifically the section about PR_GET_NAME
55 #define MAX_TASK_NAME_LEN (16)
56
57 #if defined(__LP64__)
58 #define CRASH_DUMP_NAME "crash_dump64"
59 #else
60 #define CRASH_DUMP_NAME "crash_dump32"
61 #endif
62
63 #define CRASH_DUMP_PATH "/system/bin/" CRASH_DUMP_NAME
64
65 // Wrappers that directly invoke the respective syscalls, in case the cached values are invalid.
66 #pragma GCC poison getpid gettid
__getpid()67 static pid_t __getpid() {
68 return syscall(__NR_getpid);
69 }
70
__gettid()71 static pid_t __gettid() {
72 return syscall(__NR_gettid);
73 }
74
75 class ErrnoRestorer {
76 public:
ErrnoRestorer()77 ErrnoRestorer() : saved_errno_(errno) {
78 }
79
~ErrnoRestorer()80 ~ErrnoRestorer() {
81 errno = saved_errno_;
82 }
83
84 private:
85 int saved_errno_;
86 };
87
88 extern "C" void debuggerd_fallback_handler(siginfo_t*, ucontext_t*, void*);
89
90 static debuggerd_callbacks_t g_callbacks;
91
92 // Mutex to ensure only one crashing thread dumps itself.
93 static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER;
94
95 // Don't use __libc_fatal because it exits via abort, which might put us back into a signal handler.
fatal(const char * fmt,...)96 static void __noreturn __printflike(1, 2) fatal(const char* fmt, ...) {
97 va_list args;
98 va_start(args, fmt);
99 __libc_format_log_va_list(ANDROID_LOG_FATAL, "libc", fmt, args);
100 _exit(1);
101 }
102
fatal_errno(const char * fmt,...)103 static void __noreturn __printflike(1, 2) fatal_errno(const char* fmt, ...) {
104 int err = errno;
105 va_list args;
106 va_start(args, fmt);
107
108 char buf[4096];
109 __libc_format_buffer_va_list(buf, sizeof(buf), fmt, args);
110 fatal("%s: %s", buf, strerror(err));
111 }
112
113 /*
114 * Writes a summary of the signal to the log file. We do this so that, if
115 * for some reason we're not able to contact debuggerd, there is still some
116 * indication of the failure in the log.
117 *
118 * We could be here as a result of native heap corruption, or while a
119 * mutex is being held, so we don't want to use any libc functions that
120 * could allocate memory or hold a lock.
121 */
log_signal_summary(int signum,const siginfo_t * info)122 static void log_signal_summary(int signum, const siginfo_t* info) {
123 char thread_name[MAX_TASK_NAME_LEN + 1]; // one more for termination
124 if (prctl(PR_GET_NAME, reinterpret_cast<unsigned long>(thread_name), 0, 0, 0) != 0) {
125 strcpy(thread_name, "<name unknown>");
126 } else {
127 // short names are null terminated by prctl, but the man page
128 // implies that 16 byte names are not.
129 thread_name[MAX_TASK_NAME_LEN] = 0;
130 }
131
132 if (signum == DEBUGGER_SIGNAL) {
133 __libc_format_log(ANDROID_LOG_INFO, "libc", "Requested dump for tid %d (%s)", __gettid(),
134 thread_name);
135 return;
136 }
137
138 const char* signal_name = "???";
139 bool has_address = false;
140 switch (signum) {
141 case SIGABRT:
142 signal_name = "SIGABRT";
143 break;
144 case SIGBUS:
145 signal_name = "SIGBUS";
146 has_address = true;
147 break;
148 case SIGFPE:
149 signal_name = "SIGFPE";
150 has_address = true;
151 break;
152 case SIGILL:
153 signal_name = "SIGILL";
154 has_address = true;
155 break;
156 case SIGSEGV:
157 signal_name = "SIGSEGV";
158 has_address = true;
159 break;
160 #if defined(SIGSTKFLT)
161 case SIGSTKFLT:
162 signal_name = "SIGSTKFLT";
163 break;
164 #endif
165 case SIGSYS:
166 signal_name = "SIGSYS";
167 break;
168 case SIGTRAP:
169 signal_name = "SIGTRAP";
170 break;
171 }
172
173 // "info" will be null if the siginfo_t information was not available.
174 // Many signals don't have an address or a code.
175 char code_desc[32]; // ", code -6"
176 char addr_desc[32]; // ", fault addr 0x1234"
177 addr_desc[0] = code_desc[0] = 0;
178 if (info != nullptr) {
179 __libc_format_buffer(code_desc, sizeof(code_desc), ", code %d", info->si_code);
180 if (has_address) {
181 __libc_format_buffer(addr_desc, sizeof(addr_desc), ", fault addr %p", info->si_addr);
182 }
183 }
184
185 __libc_format_log(ANDROID_LOG_FATAL, "libc", "Fatal signal %d (%s)%s%s in tid %d (%s)", signum,
186 signal_name, code_desc, addr_desc, __gettid(), thread_name);
187 }
188
189 /*
190 * Returns true if the handler for signal "signum" has SA_SIGINFO set.
191 */
have_siginfo(int signum)192 static bool have_siginfo(int signum) {
193 struct sigaction old_action;
194 if (sigaction(signum, nullptr, &old_action) < 0) {
195 __libc_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s",
196 strerror(errno));
197 return false;
198 }
199 return (old_action.sa_flags & SA_SIGINFO) != 0;
200 }
201
raise_caps()202 static void raise_caps() {
203 // Raise CapInh to match CapPrm, so that we can set the ambient bits.
204 __user_cap_header_struct capheader;
205 memset(&capheader, 0, sizeof(capheader));
206 capheader.version = _LINUX_CAPABILITY_VERSION_3;
207 capheader.pid = 0;
208
209 __user_cap_data_struct capdata[2];
210 if (capget(&capheader, &capdata[0]) == -1) {
211 fatal_errno("capget failed");
212 }
213
214 if (capdata[0].permitted != capdata[0].inheritable ||
215 capdata[1].permitted != capdata[1].inheritable) {
216 capdata[0].inheritable = capdata[0].permitted;
217 capdata[1].inheritable = capdata[1].permitted;
218
219 if (capset(&capheader, &capdata[0]) == -1) {
220 __libc_format_log(ANDROID_LOG_ERROR, "libc", "capset failed: %s", strerror(errno));
221 }
222 }
223
224 // Set the ambient capability bits so that crash_dump gets all of our caps and can ptrace us.
225 uint64_t capmask = capdata[0].inheritable;
226 capmask |= static_cast<uint64_t>(capdata[1].inheritable) << 32;
227 for (unsigned long i = 0; i < 64; ++i) {
228 if (capmask & (1ULL << i)) {
229 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) != 0) {
230 __libc_format_log(ANDROID_LOG_ERROR, "libc", "failed to raise ambient capability %lu: %s",
231 i, strerror(errno));
232 }
233 }
234 }
235 }
236
237 struct debugger_thread_info {
238 bool crash_dump_started;
239 pid_t crashing_tid;
240 pid_t pseudothread_tid;
241 int signal_number;
242 siginfo_t* info;
243 };
244
245 // Logging and contacting debuggerd requires free file descriptors, which we might not have.
246 // Work around this by spawning a "thread" that shares its parent's address space, but not its file
247 // descriptor table, so that we can close random file descriptors without affecting the original
248 // process. Note that this doesn't go through pthread_create, so TLS is shared with the spawning
249 // process.
250 static void* pseudothread_stack;
251
debuggerd_dispatch_pseudothread(void * arg)252 static int debuggerd_dispatch_pseudothread(void* arg) {
253 debugger_thread_info* thread_info = static_cast<debugger_thread_info*>(arg);
254
255 for (int i = 0; i < 1024; ++i) {
256 close(i);
257 }
258
259 int devnull = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR));
260
261 // devnull will be 0.
262 TEMP_FAILURE_RETRY(dup2(devnull, STDOUT_FILENO));
263 TEMP_FAILURE_RETRY(dup2(devnull, STDERR_FILENO));
264
265 int pipefds[2];
266 if (pipe(pipefds) != 0) {
267 fatal_errno("failed to create pipe");
268 }
269
270 // Don't use fork(2) to avoid calling pthread_atfork handlers.
271 int forkpid = clone(nullptr, nullptr, 0, nullptr);
272 if (forkpid == -1) {
273 __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to fork in debuggerd signal handler: %s",
274 strerror(errno));
275 } else if (forkpid == 0) {
276 TEMP_FAILURE_RETRY(dup2(pipefds[1], STDOUT_FILENO));
277 close(pipefds[0]);
278 close(pipefds[1]);
279
280 raise_caps();
281
282 char main_tid[10];
283 char pseudothread_tid[10];
284 __libc_format_buffer(main_tid, sizeof(main_tid), "%d", thread_info->crashing_tid);
285 __libc_format_buffer(pseudothread_tid, sizeof(pseudothread_tid), "%d", thread_info->pseudothread_tid);
286
287 execl(CRASH_DUMP_PATH, CRASH_DUMP_NAME, main_tid, pseudothread_tid, nullptr);
288
289 fatal_errno("exec failed");
290 } else {
291 close(pipefds[1]);
292 char buf[4];
293 ssize_t rc = TEMP_FAILURE_RETRY(read(pipefds[0], &buf, sizeof(buf)));
294 if (rc == -1) {
295 __libc_format_log(ANDROID_LOG_FATAL, "libc", "read of IPC pipe failed: %s", strerror(errno));
296 } else if (rc == 0) {
297 __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper failed to exec");
298 } else if (rc != 1) {
299 __libc_format_log(ANDROID_LOG_FATAL, "libc",
300 "read of IPC pipe returned unexpected value: %zd", rc);
301 } else {
302 if (buf[0] != '\1') {
303 __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper reported failure");
304 } else {
305 thread_info->crash_dump_started = true;
306 }
307 }
308 close(pipefds[0]);
309
310 // Don't leave a zombie child.
311 int status;
312 if (TEMP_FAILURE_RETRY(waitpid(forkpid, &status, 0)) == -1) {
313 __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to wait for crash_dump helper: %s",
314 strerror(errno));
315 } else if (WIFSTOPPED(status) || WIFSIGNALED(status)) {
316 __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper crashed or stopped");
317 thread_info->crash_dump_started = false;
318 }
319 }
320
321 syscall(__NR_exit, 0);
322 return 0;
323 }
324
resend_signal(siginfo_t * info,bool crash_dump_started)325 static void resend_signal(siginfo_t* info, bool crash_dump_started) {
326 // Signals can either be fatal or nonfatal.
327 // For fatal signals, crash_dump will send us the signal we crashed with
328 // before resuming us, so that processes using waitpid on us will see that we
329 // exited with the correct exit status (e.g. so that sh will report
330 // "Segmentation fault" instead of "Killed"). For this to work, we need
331 // to deregister our signal handler for that signal before continuing.
332 if (info->si_signo != DEBUGGER_SIGNAL) {
333 signal(info->si_signo, SIG_DFL);
334 }
335
336 // We need to return from our signal handler so that crash_dump can see the
337 // signal via ptrace and dump the thread that crashed. However, returning
338 // does not guarantee that the signal will be thrown again, even for SIGSEGV
339 // and friends, since the signal could have been sent manually. We blocked
340 // all signals when registering the handler, so resending the signal (using
341 // rt_tgsigqueueinfo(2) to preserve SA_SIGINFO) will cause it to be delivered
342 // when our signal handler returns.
343 if (crash_dump_started || info->si_signo != DEBUGGER_SIGNAL) {
344 int rc = syscall(SYS_rt_tgsigqueueinfo, __getpid(), __gettid(), info->si_signo, info);
345 if (rc != 0) {
346 fatal_errno("failed to resend signal during crash");
347 }
348 }
349 }
350
351 // Handler that does crash dumping by forking and doing the processing in the child.
352 // Do this by ptracing the relevant thread, and then execing debuggerd to do the actual dump.
debuggerd_signal_handler(int signal_number,siginfo_t * info,void * context)353 static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* context) {
354 // Make sure we don't change the value of errno, in case a signal comes in between the process
355 // making a syscall and checking errno.
356 ErrnoRestorer restorer;
357
358 // It's possible somebody cleared the SA_SIGINFO flag, which would mean
359 // our "info" arg holds an undefined value.
360 if (!have_siginfo(signal_number)) {
361 info = nullptr;
362 }
363
364 struct siginfo si = {};
365 if (!info) {
366 memset(&si, 0, sizeof(si));
367 si.si_signo = signal_number;
368 si.si_code = SI_USER;
369 si.si_pid = __getpid();
370 si.si_uid = getuid();
371 info = &si;
372 } else if (info->si_code >= 0 || info->si_code == SI_TKILL) {
373 // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels
374 // that contain commit 66dd34a (3.9+). The manpage claims to only allow
375 // negative si_code values that are not SI_TKILL, but 66dd34a changed the
376 // check to allow all si_code values in calls coming from inside the house.
377 }
378
379 void* abort_message = nullptr;
380 if (g_callbacks.get_abort_message) {
381 abort_message = g_callbacks.get_abort_message();
382 }
383
384 if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
385 // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
386 // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
387 // ANR trace.
388 debuggerd_fallback_handler(info, static_cast<ucontext_t*>(context), abort_message);
389 resend_signal(info, false);
390 return;
391 }
392
393 // Only allow one thread to handle a signal at a time.
394 int ret = pthread_mutex_lock(&crash_mutex);
395 if (ret != 0) {
396 __libc_format_log(ANDROID_LOG_INFO, "libc", "pthread_mutex_lock failed: %s", strerror(ret));
397 return;
398 }
399
400 log_signal_summary(signal_number, info);
401
402 // If this was a fatal crash, populate si_value with the abort message address if possible.
403 // Note that applications can set an abort message without aborting.
404 if (abort_message && signal_number != DEBUGGER_SIGNAL) {
405 info->si_value.sival_ptr = abort_message;
406 }
407
408 debugger_thread_info thread_info = {
409 .crash_dump_started = false,
410 .pseudothread_tid = -1,
411 .crashing_tid = __gettid(),
412 .signal_number = signal_number,
413 .info = info
414 };
415
416 // Set PR_SET_DUMPABLE to 1, so that crash_dump can ptrace us.
417 int orig_dumpable = prctl(PR_GET_DUMPABLE);
418 if (prctl(PR_SET_DUMPABLE, 1) != 0) {
419 fatal_errno("failed to set dumpable");
420 }
421
422 // Essentially pthread_create without CLONE_FILES (see debuggerd_dispatch_pseudothread).
423 pid_t child_pid =
424 clone(debuggerd_dispatch_pseudothread, pseudothread_stack,
425 CLONE_THREAD | CLONE_SIGHAND | CLONE_VM | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
426 &thread_info, nullptr, nullptr, &thread_info.pseudothread_tid);
427 if (child_pid == -1) {
428 fatal_errno("failed to spawn debuggerd dispatch thread");
429 }
430
431 // Wait for the child to start...
432 __futex_wait(&thread_info.pseudothread_tid, -1, nullptr);
433
434 // and then wait for it to finish.
435 __futex_wait(&thread_info.pseudothread_tid, child_pid, nullptr);
436
437 // Restore PR_SET_DUMPABLE to its original value.
438 if (prctl(PR_SET_DUMPABLE, orig_dumpable) != 0) {
439 fatal_errno("failed to restore dumpable");
440 }
441
442 // Signals can either be fatal or nonfatal.
443 // For fatal signals, crash_dump will PTRACE_CONT us with the signal we
444 // crashed with, so that processes using waitpid on us will see that we
445 // exited with the correct exit status (e.g. so that sh will report
446 // "Segmentation fault" instead of "Killed"). For this to work, we need
447 // to deregister our signal handler for that signal before continuing.
448 if (signal_number != DEBUGGER_SIGNAL) {
449 signal(signal_number, SIG_DFL);
450 }
451
452 resend_signal(info, thread_info.crash_dump_started);
453 if (info->si_signo == DEBUGGER_SIGNAL) {
454 // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
455 // starting to dump right before our death.
456 pthread_mutex_unlock(&crash_mutex);
457 }
458 }
459
debuggerd_init(debuggerd_callbacks_t * callbacks)460 void debuggerd_init(debuggerd_callbacks_t* callbacks) {
461 if (callbacks) {
462 g_callbacks = *callbacks;
463 }
464
465 void* thread_stack_allocation =
466 mmap(nullptr, PAGE_SIZE * 3, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
467 if (thread_stack_allocation == MAP_FAILED) {
468 fatal_errno("failed to allocate debuggerd thread stack");
469 }
470
471 char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE;
472 if (mprotect(stack, PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) {
473 fatal_errno("failed to mprotect debuggerd thread stack");
474 }
475
476 // Stack grows negatively, set it to the last byte in the page...
477 stack = (stack + PAGE_SIZE - 1);
478 // and align it.
479 stack -= 15;
480 pseudothread_stack = stack;
481
482 struct sigaction action;
483 memset(&action, 0, sizeof(action));
484 sigfillset(&action.sa_mask);
485 action.sa_sigaction = debuggerd_signal_handler;
486 action.sa_flags = SA_RESTART | SA_SIGINFO;
487
488 // Use the alternate signal stack if available so we can catch stack overflows.
489 action.sa_flags |= SA_ONSTACK;
490 debuggerd_register_handlers(&action);
491 }
492