1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "debuggerd/handler.h"
30 
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <inttypes.h>
34 #include <linux/futex.h>
35 #include <pthread.h>
36 #include <sched.h>
37 #include <signal.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sys/capability.h>
43 #include <sys/mman.h>
44 #include <sys/prctl.h>
45 #include <sys/socket.h>
46 #include <sys/syscall.h>
47 #include <sys/un.h>
48 #include <sys/wait.h>
49 #include <unistd.h>
50 
51 #include "private/bionic_futex.h"
52 #include "private/libc_logging.h"
53 
54 // see man(2) prctl, specifically the section about PR_GET_NAME
55 #define MAX_TASK_NAME_LEN (16)
56 
57 #if defined(__LP64__)
58 #define CRASH_DUMP_NAME "crash_dump64"
59 #else
60 #define CRASH_DUMP_NAME "crash_dump32"
61 #endif
62 
63 #define CRASH_DUMP_PATH "/system/bin/" CRASH_DUMP_NAME
64 
65 // Wrappers that directly invoke the respective syscalls, in case the cached values are invalid.
66 #pragma GCC poison getpid gettid
__getpid()67 static pid_t __getpid() {
68   return syscall(__NR_getpid);
69 }
70 
__gettid()71 static pid_t __gettid() {
72   return syscall(__NR_gettid);
73 }
74 
75 class ErrnoRestorer {
76  public:
ErrnoRestorer()77   ErrnoRestorer() : saved_errno_(errno) {
78   }
79 
~ErrnoRestorer()80   ~ErrnoRestorer() {
81     errno = saved_errno_;
82   }
83 
84  private:
85   int saved_errno_;
86 };
87 
88 extern "C" void debuggerd_fallback_handler(siginfo_t*, ucontext_t*, void*);
89 
90 static debuggerd_callbacks_t g_callbacks;
91 
92 // Mutex to ensure only one crashing thread dumps itself.
93 static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER;
94 
95 // Don't use __libc_fatal because it exits via abort, which might put us back into a signal handler.
fatal(const char * fmt,...)96 static void __noreturn __printflike(1, 2) fatal(const char* fmt, ...) {
97   va_list args;
98   va_start(args, fmt);
99   __libc_format_log_va_list(ANDROID_LOG_FATAL, "libc", fmt, args);
100   _exit(1);
101 }
102 
fatal_errno(const char * fmt,...)103 static void __noreturn __printflike(1, 2) fatal_errno(const char* fmt, ...) {
104   int err = errno;
105   va_list args;
106   va_start(args, fmt);
107 
108   char buf[4096];
109   __libc_format_buffer_va_list(buf, sizeof(buf), fmt, args);
110   fatal("%s: %s", buf, strerror(err));
111 }
112 
113 /*
114  * Writes a summary of the signal to the log file.  We do this so that, if
115  * for some reason we're not able to contact debuggerd, there is still some
116  * indication of the failure in the log.
117  *
118  * We could be here as a result of native heap corruption, or while a
119  * mutex is being held, so we don't want to use any libc functions that
120  * could allocate memory or hold a lock.
121  */
log_signal_summary(int signum,const siginfo_t * info)122 static void log_signal_summary(int signum, const siginfo_t* info) {
123   char thread_name[MAX_TASK_NAME_LEN + 1];  // one more for termination
124   if (prctl(PR_GET_NAME, reinterpret_cast<unsigned long>(thread_name), 0, 0, 0) != 0) {
125     strcpy(thread_name, "<name unknown>");
126   } else {
127     // short names are null terminated by prctl, but the man page
128     // implies that 16 byte names are not.
129     thread_name[MAX_TASK_NAME_LEN] = 0;
130   }
131 
132   if (signum == DEBUGGER_SIGNAL) {
133     __libc_format_log(ANDROID_LOG_INFO, "libc", "Requested dump for tid %d (%s)", __gettid(),
134                       thread_name);
135     return;
136   }
137 
138   const char* signal_name = "???";
139   bool has_address = false;
140   switch (signum) {
141     case SIGABRT:
142       signal_name = "SIGABRT";
143       break;
144     case SIGBUS:
145       signal_name = "SIGBUS";
146       has_address = true;
147       break;
148     case SIGFPE:
149       signal_name = "SIGFPE";
150       has_address = true;
151       break;
152     case SIGILL:
153       signal_name = "SIGILL";
154       has_address = true;
155       break;
156     case SIGSEGV:
157       signal_name = "SIGSEGV";
158       has_address = true;
159       break;
160 #if defined(SIGSTKFLT)
161     case SIGSTKFLT:
162       signal_name = "SIGSTKFLT";
163       break;
164 #endif
165     case SIGSYS:
166       signal_name = "SIGSYS";
167       break;
168     case SIGTRAP:
169       signal_name = "SIGTRAP";
170       break;
171   }
172 
173   // "info" will be null if the siginfo_t information was not available.
174   // Many signals don't have an address or a code.
175   char code_desc[32];  // ", code -6"
176   char addr_desc[32];  // ", fault addr 0x1234"
177   addr_desc[0] = code_desc[0] = 0;
178   if (info != nullptr) {
179     __libc_format_buffer(code_desc, sizeof(code_desc), ", code %d", info->si_code);
180     if (has_address) {
181       __libc_format_buffer(addr_desc, sizeof(addr_desc), ", fault addr %p", info->si_addr);
182     }
183   }
184 
185   __libc_format_log(ANDROID_LOG_FATAL, "libc", "Fatal signal %d (%s)%s%s in tid %d (%s)", signum,
186                     signal_name, code_desc, addr_desc, __gettid(), thread_name);
187 }
188 
189 /*
190  * Returns true if the handler for signal "signum" has SA_SIGINFO set.
191  */
have_siginfo(int signum)192 static bool have_siginfo(int signum) {
193   struct sigaction old_action;
194   if (sigaction(signum, nullptr, &old_action) < 0) {
195     __libc_format_log(ANDROID_LOG_WARN, "libc", "Failed testing for SA_SIGINFO: %s",
196                       strerror(errno));
197     return false;
198   }
199   return (old_action.sa_flags & SA_SIGINFO) != 0;
200 }
201 
raise_caps()202 static void raise_caps() {
203   // Raise CapInh to match CapPrm, so that we can set the ambient bits.
204   __user_cap_header_struct capheader;
205   memset(&capheader, 0, sizeof(capheader));
206   capheader.version = _LINUX_CAPABILITY_VERSION_3;
207   capheader.pid = 0;
208 
209   __user_cap_data_struct capdata[2];
210   if (capget(&capheader, &capdata[0]) == -1) {
211     fatal_errno("capget failed");
212   }
213 
214   if (capdata[0].permitted != capdata[0].inheritable ||
215       capdata[1].permitted != capdata[1].inheritable) {
216     capdata[0].inheritable = capdata[0].permitted;
217     capdata[1].inheritable = capdata[1].permitted;
218 
219     if (capset(&capheader, &capdata[0]) == -1) {
220       __libc_format_log(ANDROID_LOG_ERROR, "libc", "capset failed: %s", strerror(errno));
221     }
222   }
223 
224   // Set the ambient capability bits so that crash_dump gets all of our caps and can ptrace us.
225   uint64_t capmask = capdata[0].inheritable;
226   capmask |= static_cast<uint64_t>(capdata[1].inheritable) << 32;
227   for (unsigned long i = 0; i < 64; ++i) {
228     if (capmask & (1ULL << i)) {
229       if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) != 0) {
230         __libc_format_log(ANDROID_LOG_ERROR, "libc", "failed to raise ambient capability %lu: %s",
231                           i, strerror(errno));
232       }
233     }
234   }
235 }
236 
237 struct debugger_thread_info {
238   bool crash_dump_started;
239   pid_t crashing_tid;
240   pid_t pseudothread_tid;
241   int signal_number;
242   siginfo_t* info;
243 };
244 
245 // Logging and contacting debuggerd requires free file descriptors, which we might not have.
246 // Work around this by spawning a "thread" that shares its parent's address space, but not its file
247 // descriptor table, so that we can close random file descriptors without affecting the original
248 // process. Note that this doesn't go through pthread_create, so TLS is shared with the spawning
249 // process.
250 static void* pseudothread_stack;
251 
debuggerd_dispatch_pseudothread(void * arg)252 static int debuggerd_dispatch_pseudothread(void* arg) {
253   debugger_thread_info* thread_info = static_cast<debugger_thread_info*>(arg);
254 
255   for (int i = 0; i < 1024; ++i) {
256     close(i);
257   }
258 
259   int devnull = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR));
260 
261   // devnull will be 0.
262   TEMP_FAILURE_RETRY(dup2(devnull, STDOUT_FILENO));
263   TEMP_FAILURE_RETRY(dup2(devnull, STDERR_FILENO));
264 
265   int pipefds[2];
266   if (pipe(pipefds) != 0) {
267     fatal_errno("failed to create pipe");
268   }
269 
270   // Don't use fork(2) to avoid calling pthread_atfork handlers.
271   int forkpid = clone(nullptr, nullptr, 0, nullptr);
272   if (forkpid == -1) {
273     __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to fork in debuggerd signal handler: %s",
274                       strerror(errno));
275   } else if (forkpid == 0) {
276     TEMP_FAILURE_RETRY(dup2(pipefds[1], STDOUT_FILENO));
277     close(pipefds[0]);
278     close(pipefds[1]);
279 
280     raise_caps();
281 
282     char main_tid[10];
283     char pseudothread_tid[10];
284     __libc_format_buffer(main_tid, sizeof(main_tid), "%d", thread_info->crashing_tid);
285     __libc_format_buffer(pseudothread_tid, sizeof(pseudothread_tid), "%d", thread_info->pseudothread_tid);
286 
287     execl(CRASH_DUMP_PATH, CRASH_DUMP_NAME, main_tid, pseudothread_tid, nullptr);
288 
289     fatal_errno("exec failed");
290   } else {
291     close(pipefds[1]);
292     char buf[4];
293     ssize_t rc = TEMP_FAILURE_RETRY(read(pipefds[0], &buf, sizeof(buf)));
294     if (rc == -1) {
295       __libc_format_log(ANDROID_LOG_FATAL, "libc", "read of IPC pipe failed: %s", strerror(errno));
296     } else if (rc == 0) {
297       __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper failed to exec");
298     } else if (rc != 1) {
299       __libc_format_log(ANDROID_LOG_FATAL, "libc",
300                         "read of IPC pipe returned unexpected value: %zd", rc);
301     } else {
302       if (buf[0] != '\1') {
303         __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper reported failure");
304       } else {
305         thread_info->crash_dump_started = true;
306       }
307     }
308     close(pipefds[0]);
309 
310     // Don't leave a zombie child.
311     int status;
312     if (TEMP_FAILURE_RETRY(waitpid(forkpid, &status, 0)) == -1) {
313       __libc_format_log(ANDROID_LOG_FATAL, "libc", "failed to wait for crash_dump helper: %s",
314                         strerror(errno));
315     } else if (WIFSTOPPED(status) || WIFSIGNALED(status)) {
316       __libc_format_log(ANDROID_LOG_FATAL, "libc", "crash_dump helper crashed or stopped");
317       thread_info->crash_dump_started = false;
318     }
319   }
320 
321   syscall(__NR_exit, 0);
322   return 0;
323 }
324 
resend_signal(siginfo_t * info,bool crash_dump_started)325 static void resend_signal(siginfo_t* info, bool crash_dump_started) {
326   // Signals can either be fatal or nonfatal.
327   // For fatal signals, crash_dump will send us the signal we crashed with
328   // before resuming us, so that processes using waitpid on us will see that we
329   // exited with the correct exit status (e.g. so that sh will report
330   // "Segmentation fault" instead of "Killed"). For this to work, we need
331   // to deregister our signal handler for that signal before continuing.
332   if (info->si_signo != DEBUGGER_SIGNAL) {
333     signal(info->si_signo, SIG_DFL);
334   }
335 
336   // We need to return from our signal handler so that crash_dump can see the
337   // signal via ptrace and dump the thread that crashed. However, returning
338   // does not guarantee that the signal will be thrown again, even for SIGSEGV
339   // and friends, since the signal could have been sent manually. We blocked
340   // all signals when registering the handler, so resending the signal (using
341   // rt_tgsigqueueinfo(2) to preserve SA_SIGINFO) will cause it to be delivered
342   // when our signal handler returns.
343   if (crash_dump_started || info->si_signo != DEBUGGER_SIGNAL) {
344     int rc = syscall(SYS_rt_tgsigqueueinfo, __getpid(), __gettid(), info->si_signo, info);
345     if (rc != 0) {
346       fatal_errno("failed to resend signal during crash");
347     }
348   }
349 }
350 
351 // Handler that does crash dumping by forking and doing the processing in the child.
352 // Do this by ptracing the relevant thread, and then execing debuggerd to do the actual dump.
debuggerd_signal_handler(int signal_number,siginfo_t * info,void * context)353 static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void* context) {
354   // Make sure we don't change the value of errno, in case a signal comes in between the process
355   // making a syscall and checking errno.
356   ErrnoRestorer restorer;
357 
358   // It's possible somebody cleared the SA_SIGINFO flag, which would mean
359   // our "info" arg holds an undefined value.
360   if (!have_siginfo(signal_number)) {
361     info = nullptr;
362   }
363 
364   struct siginfo si = {};
365   if (!info) {
366     memset(&si, 0, sizeof(si));
367     si.si_signo = signal_number;
368     si.si_code = SI_USER;
369     si.si_pid = __getpid();
370     si.si_uid = getuid();
371     info = &si;
372   } else if (info->si_code >= 0 || info->si_code == SI_TKILL) {
373     // rt_tgsigqueueinfo(2)'s documentation appears to be incorrect on kernels
374     // that contain commit 66dd34a (3.9+). The manpage claims to only allow
375     // negative si_code values that are not SI_TKILL, but 66dd34a changed the
376     // check to allow all si_code values in calls coming from inside the house.
377   }
378 
379   void* abort_message = nullptr;
380   if (g_callbacks.get_abort_message) {
381     abort_message = g_callbacks.get_abort_message();
382   }
383 
384   if (prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
385     // This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
386     // you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
387     // ANR trace.
388     debuggerd_fallback_handler(info, static_cast<ucontext_t*>(context), abort_message);
389     resend_signal(info, false);
390     return;
391   }
392 
393   // Only allow one thread to handle a signal at a time.
394   int ret = pthread_mutex_lock(&crash_mutex);
395   if (ret != 0) {
396     __libc_format_log(ANDROID_LOG_INFO, "libc", "pthread_mutex_lock failed: %s", strerror(ret));
397     return;
398   }
399 
400   log_signal_summary(signal_number, info);
401 
402   // If this was a fatal crash, populate si_value with the abort message address if possible.
403   // Note that applications can set an abort message without aborting.
404   if (abort_message && signal_number != DEBUGGER_SIGNAL) {
405     info->si_value.sival_ptr = abort_message;
406   }
407 
408   debugger_thread_info thread_info = {
409     .crash_dump_started = false,
410     .pseudothread_tid = -1,
411     .crashing_tid = __gettid(),
412     .signal_number = signal_number,
413     .info = info
414   };
415 
416   // Set PR_SET_DUMPABLE to 1, so that crash_dump can ptrace us.
417   int orig_dumpable = prctl(PR_GET_DUMPABLE);
418   if (prctl(PR_SET_DUMPABLE, 1) != 0) {
419     fatal_errno("failed to set dumpable");
420   }
421 
422   // Essentially pthread_create without CLONE_FILES (see debuggerd_dispatch_pseudothread).
423   pid_t child_pid =
424     clone(debuggerd_dispatch_pseudothread, pseudothread_stack,
425           CLONE_THREAD | CLONE_SIGHAND | CLONE_VM | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
426           &thread_info, nullptr, nullptr, &thread_info.pseudothread_tid);
427   if (child_pid == -1) {
428     fatal_errno("failed to spawn debuggerd dispatch thread");
429   }
430 
431   // Wait for the child to start...
432   __futex_wait(&thread_info.pseudothread_tid, -1, nullptr);
433 
434   // and then wait for it to finish.
435   __futex_wait(&thread_info.pseudothread_tid, child_pid, nullptr);
436 
437   // Restore PR_SET_DUMPABLE to its original value.
438   if (prctl(PR_SET_DUMPABLE, orig_dumpable) != 0) {
439     fatal_errno("failed to restore dumpable");
440   }
441 
442   // Signals can either be fatal or nonfatal.
443   // For fatal signals, crash_dump will PTRACE_CONT us with the signal we
444   // crashed with, so that processes using waitpid on us will see that we
445   // exited with the correct exit status (e.g. so that sh will report
446   // "Segmentation fault" instead of "Killed"). For this to work, we need
447   // to deregister our signal handler for that signal before continuing.
448   if (signal_number != DEBUGGER_SIGNAL) {
449     signal(signal_number, SIG_DFL);
450   }
451 
452   resend_signal(info, thread_info.crash_dump_started);
453   if (info->si_signo == DEBUGGER_SIGNAL) {
454     // If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
455     // starting to dump right before our death.
456     pthread_mutex_unlock(&crash_mutex);
457   }
458 }
459 
debuggerd_init(debuggerd_callbacks_t * callbacks)460 void debuggerd_init(debuggerd_callbacks_t* callbacks) {
461   if (callbacks) {
462     g_callbacks = *callbacks;
463   }
464 
465   void* thread_stack_allocation =
466     mmap(nullptr, PAGE_SIZE * 3, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
467   if (thread_stack_allocation == MAP_FAILED) {
468     fatal_errno("failed to allocate debuggerd thread stack");
469   }
470 
471   char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE;
472   if (mprotect(stack, PAGE_SIZE, PROT_READ | PROT_WRITE) != 0) {
473     fatal_errno("failed to mprotect debuggerd thread stack");
474   }
475 
476   // Stack grows negatively, set it to the last byte in the page...
477   stack = (stack + PAGE_SIZE - 1);
478   // and align it.
479   stack -= 15;
480   pseudothread_stack = stack;
481 
482   struct sigaction action;
483   memset(&action, 0, sizeof(action));
484   sigfillset(&action.sa_mask);
485   action.sa_sigaction = debuggerd_signal_handler;
486   action.sa_flags = SA_RESTART | SA_SIGINFO;
487 
488   // Use the alternate signal stack if available so we can catch stack overflows.
489   action.sa_flags |= SA_ONSTACK;
490   debuggerd_register_handlers(&action);
491 }
492