1 /*
2  * Copyright (c) 2015, The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   * Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  *   * Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer
12  *     in the documentation and/or other materials provided with the
13  *     distribution.
14  *   * Neither the name of Google, Inc. nor the names of its contributors
15  *     may be used to endorse or promote products derived from this
16  *     software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <binder/IBinder.h>
33 #include <binder/IServiceManager.h>
34 #include <binder/Parcel.h>
35 
36 #include <ctime>
37 #include <cutils/properties.h>
38 #include <signal.h>
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 #include <sys/resource.h>
45 #include <sys/stat.h>
46 #include <sys/time.h>
47 #include <sys/types.h>
48 #include <unistd.h>
49 
50 #include <utils/Log.h>
51 #include <utils/String8.h>
52 #include <utils/Trace.h>
53 #include <zlib.h>
54 
55 using namespace android;
56 
57 #ifdef LOG_TAG
58 #undef LOG_TAG
59 #endif
60 
61 #define LOG_TAG "anrdaemon"
62 
63 static const int check_period = 1;              // in sec
64 static const int tracing_check_period = 500000; // in micro sec
65 static const int cpu_stat_entries = 7;          // number of cpu stat entries
66 static const int min_buffer_size = 16;
67 static const int max_buffer_size = 2048;
68 static const char *min_buffer_size_str = "16";
69 static const char *max_buffer_size_str = "2048";
70 
71 typedef struct cpu_stat {
72     unsigned long utime, ntime, stime, itime;
73     unsigned long iowtime, irqtime, sirqtime, steal;
74     unsigned long total;
75 } cpu_stat_t;
76 
77 /*
78  * Logging on/off threshold.
79  * Uint: 0.01%; default to 99.90% cpu.
80  */
81 static int idle_threshold = 10;
82 
83 static bool quit = false;
84 static bool suspend= false;
85 static bool err = false;
86 static char err_msg[100];
87 static bool tracing = false;
88 
89 static const char *buf_size_kb = "2048";
90 static const char *apps = "";
91 static uint64_t tag = 0;
92 
93 static cpu_stat_t new_cpu;
94 static cpu_stat_t old_cpu;
95 
96 /* Log certain kernel activity when enabled */
97 static bool log_sched = false;
98 static bool log_stack = false;
99 static bool log_irq   = false;
100 static bool log_sync  = false;
101 static bool log_workq = false;
102 
103 /* Paths for debugfs controls*/
104 static const char* dfs_trace_output_path =
105     "/d/tracing/trace";
106 static const char* dfs_irq_path =
107     "/d/tracing/events/irq/enable";
108 static const char* dfs_sync_path =
109     "/d/tracing/events/sync/enable";
110 static const char* dfs_workq_path =
111     "/d/tracing/events/workqueue/enable";
112 static const char* dfs_stack_path =
113     "/d/tracing/options/stacktrace";
114 static const char* dfs_sched_switch_path =
115     "/d/tracing/events/sched/sched_switch/enable";
116 static const char* dfs_sched_wakeup_path =
117     "/d/tracing/events/sched/sched_wakeup/enable";
118 static const char* dfs_control_path =
119     "/d/tracing/tracing_on";
120 static const char* dfs_buffer_size_path =
121     "/d/tracing/buffer_size_kb";
122 static const char* dfs_tags_property = "debug.atrace.tags.enableflags";
123 static const char* dfs_apps_property = "debug.atrace.app_cmdlines";
124 
125 /*
126  * Read accumulated cpu data from /proc/stat
127  */
get_cpu_stat(cpu_stat_t * cpu)128 static void get_cpu_stat(cpu_stat_t *cpu) {
129     FILE *fp = NULL;
130     const char *params = "cpu  %lu %lu %lu %lu %lu %lu %lu %*d %*d %*d\n";
131 
132     if ((fp = fopen("/proc/stat", "r")) == NULL) {
133         err = true;
134         sprintf(err_msg, "can't read from /proc/stat with errno %d", errno);
135     } else {
136         if (fscanf(fp, params, &cpu->utime, &cpu->ntime,
137                 &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime,
138                 &cpu->sirqtime) != cpu_stat_entries) {
139             /*
140              * If failed in getting status, new_cpu won't be updated and
141              * is_heavy_loaded() will return false.
142              */
143             ALOGE("Error in getting cpu status. Skipping this check.");
144             return;
145         }
146 
147         cpu->total = cpu->utime + cpu->ntime + cpu->stime + cpu->itime
148             + cpu->iowtime + cpu->irqtime + cpu->sirqtime;
149 
150         fclose(fp);
151     }
152 }
153 
154 /*
155  * Calculate cpu usage in the past interval.
156  * If tracing is on, increase the idle threshold by 1.00% so that we do not
157  * turn on and off tracing frequently whe the cpu load is right close to
158  * threshold.
159  */
is_heavy_load(void)160 static bool is_heavy_load(void) {
161     unsigned long diff_idle, diff_total;
162     int threshold = idle_threshold + (tracing?100:0);
163     get_cpu_stat(&new_cpu);
164     diff_idle = new_cpu.itime - old_cpu.itime;
165     diff_total = new_cpu.total - old_cpu.total;
166     old_cpu = new_cpu;
167     return (diff_idle * 10000 < diff_total * threshold);
168 }
169 
170 /*
171  * Force the userland processes to refresh their property for logging.
172  */
dfs_poke_binder(void)173 static void dfs_poke_binder(void) {
174     sp<IServiceManager> sm = defaultServiceManager();
175     Vector<String16> services = sm->listServices();
176     for (size_t i = 0; i < services.size(); i++) {
177         sp<IBinder> obj = sm->checkService(services[i]);
178         if (obj != NULL) {
179             Parcel data;
180             obj->transact(IBinder::SYSPROPS_TRANSACTION, data, NULL, 0);
181         }
182     }
183 }
184 
185 /*
186  * Enable/disable a debugfs property by writing 0/1 to its path.
187  */
dfs_enable(bool enable,const char * path)188 static int dfs_enable(bool enable, const char* path) {
189     int fd = open(path, O_WRONLY);
190     if (fd == -1) {
191         err = true;
192         sprintf(err_msg, "Can't open %s. Error: %d", path, errno);
193         return -1;
194     }
195     const char* control = (enable?"1":"0");
196     ssize_t len = strlen(control);
197     int max_try = 10; // Fail if write was interrupted for 10 times
198     while (write(fd, control, len) != len) {
199         if (errno == EINTR && max_try-- > 0) {
200             usleep(100);
201             continue;
202         }
203 
204         err = true;
205         sprintf(err_msg, "Error %d in writing to %s.", errno, path);
206     }
207     close(fd);
208     return (err?-1:0);
209 }
210 
211 /*
212  * Set the userland tracing properties.
213  */
dfs_set_property(uint64_t mtag,const char * mapp,bool enable)214 static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) {
215     char buf[64];
216     snprintf(buf, 64, "%#" PRIx64, mtag);
217     if (property_set(dfs_tags_property, buf) < 0) {
218         err = true;
219         sprintf(err_msg, "Failed to set debug tags system properties.");
220     }
221 
222     if (strlen(mapp) > 0
223             && property_set(dfs_apps_property, mapp) < 0) {
224         err = true;
225         sprintf(err_msg, "Failed to set debug applications.");
226     }
227 
228     if (log_sched) {
229         dfs_enable(enable, dfs_sched_switch_path);
230         dfs_enable(enable, dfs_sched_wakeup_path);
231     }
232     if (log_stack) {
233         dfs_enable(enable, dfs_stack_path);
234     }
235     if (log_irq) {
236         dfs_enable(enable, dfs_irq_path);
237     }
238     if (log_sync) {
239         dfs_enable(enable, dfs_sync_path);
240     }
241     if (log_workq) {
242         dfs_enable(enable, dfs_workq_path);
243     }
244 }
245 
246 /*
247  * Start logging when cpu usage is high. Meanwhile, moniter the cpu usage and
248  * stop logging when it drops down.
249  */
start_tracing(void)250 static void start_tracing(void) {
251     ALOGD("High cpu usage, start logging.");
252 
253     if (dfs_enable(true, dfs_control_path) != 0) {
254         ALOGE("Failed to start tracing.");
255         return;
256     }
257     tracing = true;
258 
259     /* Stop logging when cpu usage drops or the daemon is suspended.*/
260     do {
261         usleep(tracing_check_period);
262     } while (!suspend && is_heavy_load());
263 
264     if (dfs_enable(false, dfs_control_path) != 0) {
265         ALOGE("Failed to stop tracing.");
266     }
267 
268     ALOGD("Usage back to low, stop logging.");
269     tracing = false;
270 }
271 
272 /*
273  * Set the tracing log buffer size.
274  * Note the actual buffer size will be buf_size_kb * number of cores.
275  * E.g. for dory, the total buffer size is buf_size_kb * 4.
276  */
set_tracing_buffer_size(void)277 static int set_tracing_buffer_size(void) {
278     int fd = open(dfs_buffer_size_path, O_WRONLY);
279     if (fd == -1) {
280         err = true;
281         sprintf(err_msg, "Can't open atrace buffer size file under /d/tracing.");
282         return -1;
283     }
284     ssize_t len = strlen(buf_size_kb);
285     if (write(fd, buf_size_kb, len) != len) {
286         err = true;
287         sprintf(err_msg, "Error in writing to atrace buffer size file.");
288     }
289     close(fd);
290     return (err?-1:0);
291 
292 }
293 
294 /*
295  * Main loop to moniter the cpu usage and decided whether to start logging.
296  */
start(void)297 static void start(void) {
298     if ((set_tracing_buffer_size()) != 0)
299         return;
300 
301     dfs_set_property(tag, apps, true);
302     dfs_poke_binder();
303 
304     get_cpu_stat(&old_cpu);
305     sleep(check_period);
306 
307     while (!quit && !err) {
308         if (!suspend && is_heavy_load()) {
309             /*
310              * Increase process priority to make sure we can stop logging when
311              * necessary and do not overwrite the buffer
312              */
313             setpriority(PRIO_PROCESS, 0, -20);
314             start_tracing();
315             setpriority(PRIO_PROCESS, 0, 0);
316         }
317         sleep(check_period);
318     }
319     return;
320 }
321 
322 /*
323  * Dump the log in a compressed format for systrace to visualize.
324  */
dump_trace()325 static void dump_trace()
326 {
327     int remain_attempts = 5;
328     suspend = true;
329     while (tracing) {
330         ALOGI("Waiting logging to stop.");
331         usleep(tracing_check_period);
332         remain_attempts--;
333         if (remain_attempts == 0) {
334             ALOGE("Can't stop logging after 5 attempts. Dump aborted.");
335             return;
336         }
337     }
338 
339     /*
340      * Create /sdcard/ANRdaemon/ if it doesn't exist
341      */
342     struct stat st;
343     if (stat("/sdcard/ANRdaemon", &st) == -1) {
344         ALOGI("Creating /sdcard/ANRdaemon/");
345         int err = mkdir("/sdcard/ANRdaemon", 0700);
346         if (err != 0)
347             ALOGI("Creating /sdcard/ANRdaemon/ failed with %s", strerror(err));
348     }
349 
350     /*
351      * Create a dump file "dump_of_anrdaemon.<current_time>" under /sdcard/ANRdaemon/
352      */
353     time_t now = time(0);
354     struct tm  tstruct;
355     char time_buf[80];
356     char path_buf[200];
357     const char* header = " done\nTRACE:\n";
358     ssize_t header_len = strlen(header);
359     tstruct = *localtime(&now);
360     strftime(time_buf, sizeof(time_buf), "%Y-%m-%d.%X", &tstruct);
361     sprintf(path_buf, "/sdcard/ANRdaemon/dump_of_anrdaemon.%s", time_buf);
362     int output_fd = creat(path_buf, S_IRWXU);
363     if (output_fd == -1) {
364         ALOGE("Failed to create %s. Dump aborted.", path_buf);
365         return;
366     }
367 
368     if (write(output_fd, header, strlen(header)) != header_len) {
369         ALOGE("Failed to write the header.");
370         close(output_fd);
371         return;
372     }
373 
374     int trace_fd = open(dfs_trace_output_path, O_RDWR);
375     if (trace_fd == -1) {
376         ALOGE("Failed to open %s. Dump aborted.", dfs_trace_output_path);
377         close(output_fd);
378         return;
379     }
380 
381     z_stream zs;
382     uint8_t *in, *out;
383     int result, flush;
384 
385     memset(&zs, 0, sizeof(zs));
386     result = deflateInit(&zs, Z_DEFAULT_COMPRESSION);
387     if (result != Z_OK) {
388         ALOGE("error initializing zlib: %d\n", result);
389         close(trace_fd);
390         close(output_fd);
391         return;
392     }
393 
394     const size_t bufSize = 64*1024;
395     in = (uint8_t*)malloc(bufSize);
396     out = (uint8_t*)malloc(bufSize);
397     flush = Z_NO_FLUSH;
398 
399     zs.next_out = out;
400     zs.avail_out = bufSize;
401 
402     do {
403         if (zs.avail_in == 0) {
404             result = read(trace_fd, in, bufSize);
405             if (result < 0) {
406                 ALOGE("error reading trace: %s", strerror(errno));
407                 result = Z_STREAM_END;
408                 break;
409             } else if (result == 0) {
410                 flush = Z_FINISH;
411             } else {
412                 zs.next_in = in;
413                 zs.avail_in = result;
414             }
415         }
416 
417         if (zs.avail_out == 0) {
418             result = write(output_fd, out, bufSize);
419             if ((size_t)result < bufSize) {
420                 ALOGE("error writing deflated trace: %s", strerror(errno));
421                 result = Z_STREAM_END;
422                 zs.avail_out = bufSize;
423                 break;
424             }
425             zs.next_out = out;
426             zs.avail_out = bufSize;
427         }
428 
429     } while ((result = deflate(&zs, flush)) == Z_OK);
430 
431     if (result != Z_STREAM_END) {
432         ALOGE("error deflating trace: %s\n", zs.msg);
433     }
434 
435     if (zs.avail_out < bufSize) {
436         size_t bytes = bufSize - zs.avail_out;
437         result = write(output_fd, out, bytes);
438         if ((size_t)result < bytes) {
439             ALOGE("error writing deflated trace: %s", strerror(errno));
440         }
441     }
442 
443     result = deflateEnd(&zs);
444     if (result != Z_OK) {
445         ALOGE("error cleaning up zlib: %d\n", result);
446     }
447 
448     free(in);
449     free(out);
450 
451     close(trace_fd);
452     close(output_fd);
453 
454     suspend = false;
455     ALOGI("Finished dump. Output file stored at: %s", path_buf);
456 }
457 
handle_signal(int signo)458 static void handle_signal(int signo)
459 {
460     switch (signo) {
461         case SIGQUIT:
462             suspend = true;
463             quit = true;
464             break;
465         case SIGSTOP:
466             suspend = true;
467             break;
468         case SIGCONT:
469             suspend = false;
470             break;
471         case SIGUSR1:
472             dump_trace();
473     }
474 }
475 
476 /*
477  * Set the signal handler:
478  * SIGQUIT: Reset debugfs and tracing property and terminate the daemon.
479  * SIGSTOP: Stop logging and suspend the daemon.
480  * SIGCONT: Resume the daemon as normal.
481  * SIGUSR1: Dump the logging to a compressed format for systrace to visualize.
482  */
register_sighandler(void)483 static void register_sighandler(void)
484 {
485     struct sigaction sa;
486     sigset_t block_mask;
487 
488     sigemptyset(&block_mask);
489     sigaddset (&block_mask, SIGQUIT);
490     sigaddset (&block_mask, SIGSTOP);
491     sigaddset (&block_mask, SIGCONT);
492     sigaddset (&block_mask, SIGUSR1);
493 
494     sa.sa_flags = 0;
495     sa.sa_mask = block_mask;
496     sa.sa_handler = handle_signal;
497     sigaction(SIGQUIT, &sa, NULL);
498     sigaction(SIGSTOP, &sa, NULL);
499     sigaction(SIGCONT, &sa, NULL);
500     sigaction(SIGUSR1, &sa, NULL);
501 }
502 
show_help(void)503 static void show_help(void) {
504 
505     fprintf(stderr, "usage: ANRdaemon [options] [categoris...]\n");
506     fprintf(stdout, "Options includes:\n"
507                     "   -a appname  enable app-level tracing for a comma "
508                        "separated list of cmdlines\n"
509                     "   -t N        cpu threshold for logging to start "
510                         "(uint = 0.01%%, min = 5000, max = 9999, default = 9990)\n"
511                     "   -s N        use a trace buffer size of N KB "
512                         "default to 2048KB\n"
513                     "   -h          show helps\n");
514     fprintf(stdout, "Categoris includes:\n"
515                     "   am         - activity manager\n"
516                     "   sm         - sync manager\n"
517                     "   input      - input\n"
518                     "   dalvik     - dalvik VM\n"
519                     "   audio      - Audio\n"
520                     "   gfx        - Graphics\n"
521                     "   rs         - RenderScript\n"
522                     "   hal        - Hardware Modules\n"
523                     "   irq        - kernel irq events\n"
524                     "   sched      - kernel scheduler activity\n"
525                     "   stack      - kernel stack\n"
526                     "   sync       - kernel sync activity\n"
527                     "   workq      - kernel work queues\n");
528     fprintf(stdout, "Control includes:\n"
529                     "   SIGQUIT: terminate the process\n"
530                     "   SIGSTOP: suspend all function of the daemon\n"
531                     "   SIGCONT: resume the normal function\n"
532                     "   SIGUSR1: dump the current logging in a compressed form\n");
533     exit(0);
534 }
535 
get_options(int argc,char * argv[])536 static int get_options(int argc, char *argv[]) {
537     int opt = 0;
538     int threshold;
539     while ((opt = getopt(argc, argv, "a:s:t:h")) >= 0) {
540         switch(opt) {
541             case 'a':
542                 apps = optarg;
543                 break;
544             case 's':
545                 if (atoi(optarg) > max_buffer_size)
546                     buf_size_kb = max_buffer_size_str;
547                 else if (atoi(optarg) < min_buffer_size)
548                     buf_size_kb = min_buffer_size_str;
549                 else
550                     buf_size_kb = optarg;
551                 break;
552             case 't':
553                 threshold = atoi(optarg);
554                 if (threshold > 9999 || threshold < 5000) {
555                     fprintf(stderr, "logging threshold should be 5000-9999\n");
556                     return 1;
557                 }
558                 idle_threshold = 10000 - threshold;
559                 break;
560             case 'h':
561                 show_help();
562                 break;
563             default:
564                 fprintf(stderr, "Error in getting options.\n"
565                         "run \"%s -h\" for usage.\n", argv[0]);
566                 return 1;
567         }
568     }
569 
570     for (int i = optind; i < argc; i++) {
571         if (strcmp(argv[i], "am") == 0) {
572             tag |= ATRACE_TAG_ACTIVITY_MANAGER;
573         } else if (strcmp(argv[i], "input") == 0) {
574             tag |= ATRACE_TAG_INPUT;
575         } else if (strcmp(argv[i], "sm") == 0) {
576             tag |= ATRACE_TAG_SYNC_MANAGER;
577         } else if (strcmp(argv[i], "dalvik") == 0) {
578             tag |= ATRACE_TAG_DALVIK;
579         } else if (strcmp(argv[i], "gfx") == 0) {
580             tag |= ATRACE_TAG_GRAPHICS;
581         } else if (strcmp(argv[i], "audio") == 0) {
582             tag |= ATRACE_TAG_AUDIO;
583         } else if (strcmp(argv[i], "hal") == 0) {
584             tag |= ATRACE_TAG_HAL;
585         } else if (strcmp(argv[i], "rs") == 0) {
586             tag |= ATRACE_TAG_RS;
587         } else if (strcmp(argv[i], "sched") == 0) {
588             log_sched = true;
589         } else if (strcmp(argv[i], "stack") == 0) {
590             log_stack = true;
591         } else if (strcmp(argv[i], "workq") == 0) {
592             log_workq = true;
593         } else if (strcmp(argv[i], "irq") == 0) {
594             log_irq = true;
595         } else if (strcmp(argv[i], "sync") == 0) {
596             log_sync = true;
597         } else {
598             fprintf(stderr, "invalid category: %s\n"
599                     "run \"%s -h\" for usage.\n", argv[i], argv[0]);
600             return 1;
601         }
602     }
603 
604     /* If nothing is enabled, don't run */
605     if (!tag && !log_sched && !log_stack && !log_workq && !log_irq && !log_sync) {
606         ALOGE("Specify at least one category to trace.");
607         return 1;
608     }
609 
610     return 0;
611 }
612 
main(int argc,char * argv[])613 int main(int argc, char *argv[])
614 {
615     if(get_options(argc, argv) != 0)
616         return 1;
617 
618     if (daemon(0, 0) != 0)
619         return 1;
620 
621     register_sighandler();
622 
623     /* Clear any the trace log file by overwrite it with a new file */
624     int fd = creat(dfs_trace_output_path, 0);
625     if (fd == -1) {
626         ALOGE("Faield to open and cleaup previous log");
627         return 1;
628     }
629     close(fd);
630 
631     ALOGI("ANRdaemon starting");
632     start();
633 
634     if (err)
635         ALOGE("ANRdaemon stopped due to Error: %s", err_msg);
636 
637     ALOGI("ANRdaemon terminated.");
638 
639     return (err?1:0);
640 }
641