1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <time.h>
18 #include <pthread.h>
19 #include <sys/timerfd.h>
20 #include <inttypes.h>
21 #include <sys/stat.h>
22 
23 #include <algorithm>
24 #include <list>
25 #include <memory>
26 #include <set>
27 #include <string>
28 #include <vector>
29 
30 #define LOG_TAG "AnrTimerService"
31 #define ATRACE_TAG ATRACE_TAG_ACTIVITY_MANAGER
32 #define ANR_TIMER_TRACK "AnrTimerTrack"
33 
34 #include <jni.h>
35 #include <nativehelper/JNIHelp.h>
36 #include "android_runtime/AndroidRuntime.h"
37 #include "core_jni_helpers.h"
38 
39 #include <processgroup/processgroup.h>
40 #include <utils/Log.h>
41 #include <utils/Mutex.h>
42 #include <utils/Timers.h>
43 #include <utils/Trace.h>
44 
45 #include <android-base/logging.h>
46 #include <android-base/stringprintf.h>
47 #include <android-base/unique_fd.h>
48 
49 using ::android::base::StringPrintf;
50 
51 
52 // Native support is unavailable on WIN32 platforms.  This macro preemptively disables it.
53 #ifdef _WIN32
54 #define NATIVE_SUPPORT 0
55 #else
56 #define NATIVE_SUPPORT 1
57 #endif
58 
59 namespace android {
60 
61 // using namespace android;
62 
63 // Almost nothing in this module needs to be in the android namespace.
64 namespace {
65 
66 // If not on a Posix system, create stub timerfd methods.  These are defined to allow
67 // compilation.  They are not functional.  Also, they do not leak outside this compilation unit.
68 #ifdef _WIN32
timer_create()69 int timer_create() {
70   return -1;
71 }
timer_settime(int,int,void const *,void *)72 int timer_settime(int, int, void const *, void *) {
73   return -1;
74 }
75 #else
76 int timer_create() {
77   return timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
78 }
79 int timer_settime(int fd, int flags, const struct itimerspec *new_value,
80                   struct itimerspec *_Nullable old_value) {
81   return timerfd_settime(fd, flags, new_value, old_value);
82 }
83 #endif
84 
85 // A local debug flag that gates a set of log messages for debug only.  This is normally const
86 // false so the debug statements are not included in the image.  The flag can be set true in a
87 // unit test image to debug test failures.
88 const bool DEBUG_TIMER = false;
89 
90 // A local debug flag to debug the timer thread itself.
91 const bool DEBUG_TICKER = false;
92 
93 // Enable error logging.
94 const bool DEBUG_ERROR = true;
95 
96 // Return the current time in nanoseconds.  This time is relative to system boot.
now()97 nsecs_t now() {
98     return systemTime(SYSTEM_TIME_MONOTONIC);
99 }
100 
101 // Return true if the process exists and false if we cannot know.
processExists(pid_t pid)102 bool processExists(pid_t pid) {
103     char path[PATH_MAX];
104     snprintf(path, sizeof(path), "/proc/%d", pid);
105     struct stat buff;
106     return stat(path, &buff) == 0;
107 }
108 
109 // Return the name of the process whose pid is the input.  If the process does not exist, the
110 // name will "notfound".
getProcessName(pid_t pid)111 std::string getProcessName(pid_t pid) {
112     char buffer[PATH_MAX];
113     snprintf(buffer, sizeof(buffer), "/proc/%d/cmdline", pid);
114     int fd = ::open(buffer, O_RDONLY);
115     if (fd >= 0) {
116         size_t pos = 0;
117         ssize_t result;
118         while (pos < sizeof(buffer)-1) {
119             result = ::read(fd, buffer + pos, (sizeof(buffer) - pos) - 1);
120             if (result <= 0) {
121                 break;
122             }
123         }
124         ::close(fd);
125 
126         if (result >= 0) {
127             buffer[pos] = 0;
128         } else {
129             snprintf(buffer, sizeof(buffer), "err: %s", strerror(errno));
130         }
131     } else {
132         snprintf(buffer, sizeof(buffer), "notfound");
133     }
134     return std::string(buffer);
135 }
136 
137 /**
138  * This class encapsulates the anr timer service.  The service manages a list of individual
139  * timers.  A timer is either Running or Expired.  Once started, a timer may be canceled or
140  * accepted.  Both actions collect statistics about the timer and then delete it.  An expired
141  * timer may also be discarded, which deletes the timer without collecting any statistics.
142  *
143  * All public methods in this class are thread-safe.
144  */
145 class AnrTimerService {
146   private:
147     class ProcessStats;
148     class Timer;
149 
150   public:
151 
152     // The class that actually runs the clock.
153     class Ticker;
154 
155     // A timer is identified by a timer_id_t.  Timer IDs are unique in the moment.
156     using timer_id_t = uint32_t;
157 
158     // A manifest constant.  No timer is ever created with this ID.
159     static const timer_id_t NOTIMER = 0;
160 
161     // A notifier is called with a timer ID, the timer's tag, and the client's cookie.  The pid
162     // and uid that were originally assigned to the timer are passed as well.  The elapsed time
163     // is the time since the timer was scheduled.
164     using notifier_t = bool (*)(timer_id_t, int pid, int uid, nsecs_t elapsed,
165                                 void* cookie, jweak object);
166 
167     enum Status {
168         Invalid,
169         Running,
170         Expired,
171         Canceled
172     };
173 
174     /**
175      * Create a timer service.  The service is initialized with a name used for logging.  The
176      * constructor is also given the notifier callback, and two cookies for the callback: the
177      * traditional void* and Java object pointer.  The remaining parameters are
178      * configuration options.
179      */
180     AnrTimerService(char const* label, notifier_t notifier, void* cookie, jweak jtimer, Ticker*,
181                     bool extend, bool freeze);
182 
183     // Delete the service and clean up memory.
184     ~AnrTimerService();
185 
186     // Start a timer and return the associated timer ID.  It does not matter if the same pid/uid
187     // are already in the running list.  Once start() is called, one of cancel(), accept(), or
188     // discard() must be called to clean up the internal data structures.
189     timer_id_t start(int pid, int uid, nsecs_t timeout);
190 
191     // Cancel a timer and remove it from all lists.  This is called when the event being timed
192     // has occurred.  If the timer was Running, the function returns true.  The other
193     // possibilities are that the timer was Expired or non-existent; in both cases, the function
194     // returns false.
195     bool cancel(timer_id_t timerId);
196 
197     // Accept a timer.  This is called when the upper layers accept that a timer has expired.
198     // If the timer was Expired and its process was frozen, the timer is pushed to the expired
199     // list and 'true' is returned.  Otherwise the function returns false.
200     bool accept(timer_id_t timerId);
201 
202     // Discard a timer without collecting any statistics.  This is called when the upper layers
203     // recognize that a timer expired but decide the expiration is not significant.  If the
204     // timer was Expired, the function returns true.  The other possibilities are tha the timer
205     // was Running or non-existing; in both cases, the function returns false.
206     bool discard(timer_id_t timerId);
207 
208     // A timer has expired.
209     void expire(timer_id_t);
210 
211     // Release a timer.  The timer must be in the expired list.
212     bool release(timer_id_t);
213 
214     // Return the Java object associated with this instance.
jtimer() const215     jweak jtimer() const {
216         return notifierObject_;
217     }
218 
219     // Return the per-instance statistics.
220     std::vector<std::string> getDump() const;
221 
222   private:
223     // The service cannot be copied.
224     AnrTimerService(AnrTimerService const&) = delete;
225 
226     // Insert a timer into the running list.  The lock must be held by the caller.
227     void insertLocked(const Timer&);
228 
229     // Remove a timer from the lists and return it. The lock must be held by the caller.
230     Timer removeLocked(timer_id_t timerId);
231 
232     // Add a timer to the expired list.
233     void addExpiredLocked(Timer const&);
234 
235     // Scrub the expired list by removing all entries for non-existent processes.  The expired
236     // lock must be held by the caller.
237     void scrubExpiredLocked();
238 
239     // Return a string representation of a status value.
240     static const char* statusString(Status);
241 
242     // The name of this service, for logging.
243     std::string const label_;
244 
245     // The callback that is invoked when a timer expires.
246     notifier_t const notifier_;
247 
248     // The two cookies passed to the notifier.
249     void* notifierCookie_;
250     jweak notifierObject_;
251 
252     // True if extensions can be granted to expired timers.
253     const bool extend_;
254 
255     // True if the service should freeze anr'ed processes.
256     const bool freeze_;
257 
258     // The global lock
259     mutable Mutex lock_;
260 
261     // The list of all timers that are still running.  This is sorted by ID for fast lookup.
262     std::set<Timer> running_;
263 
264     // The list of all expired timers that are awaiting release.
265     std::set<Timer> expired_;
266 
267     // The maximum number of active timers.
268     size_t maxRunning_;
269 
270     // Simple counters
271     struct Counters {
272         // The number of timers started, canceled, accepted, discarded, and expired.
273         size_t started;
274         size_t canceled;
275         size_t accepted;
276         size_t discarded;
277         size_t expired;
278         size_t extended;
279         size_t released;
280 
281         // The number of times there were zero active timers.
282         size_t drained;
283 
284         // The number of times a protocol error was seen.
285         size_t error;
286     };
287 
288     Counters counters_;
289 
290     // The clock used by this AnrTimerService.
291     Ticker *ticker_;
292 };
293 
294 class AnrTimerService::ProcessStats {
295   public:
296     nsecs_t cpu_time;
297     nsecs_t cpu_delay;
298 
ProcessStats()299     ProcessStats() :
300             cpu_time(0),
301             cpu_delay(0) {
302     }
303 
304     // Collect all statistics for a process.  Return true if the fill succeeded and false if it
305     // did not.  If there is any problem, the statistics are zeroed.
fill(int pid)306     bool fill(int pid) {
307         cpu_time = 0;
308         cpu_delay = 0;
309 
310         char path[PATH_MAX];
311         snprintf(path, sizeof(path), "/proc/%u/schedstat", pid);
312         ::android::base::unique_fd fd(open(path, O_RDONLY | O_CLOEXEC));
313         if (!fd.ok()) {
314             return false;
315         }
316         char buffer[128];
317         ssize_t len = read(fd, buffer, sizeof(buffer));
318         if (len <= 0) {
319             return false;
320         }
321         if (len >= sizeof(buffer)) {
322             ALOGE("proc file too big: %s", path);
323             return false;
324         }
325         buffer[len] = 0;
326         unsigned long t1;
327         unsigned long t2;
328         if (sscanf(buffer, "%lu %lu", &t1, &t2) != 2) {
329             return false;
330         }
331         cpu_time = t1;
332         cpu_delay = t2;
333         return true;
334     }
335 };
336 
337 class AnrTimerService::Timer {
338   public:
339     // A unique ID assigned when the Timer is created.
340     timer_id_t const id;
341 
342     // The creation parameters.  The timeout is the original, relative timeout.
343     int const pid;
344     int const uid;
345     nsecs_t const timeout;
346     bool const extend;
347     bool const freeze;
348 
349     // The state of this timer.
350     Status status;
351 
352     // The time at which the timer was started.
353     nsecs_t started;
354 
355     // The scheduled timeout.  This is an absolute time.  It may be extended.
356     nsecs_t scheduled;
357 
358     // True if this timer has been extended.
359     bool extended;
360 
361     // True if the process has been frozen.
362     bool frozen;
363 
364     // Bookkeeping for extensions.  The initial state of the process.  This is collected only if
365     // the timer is extensible.
366     ProcessStats initial;
367 
368     // The default constructor is used to create timers that are Invalid, representing the "not
369     // found" condition when a collection is searched.
Timer()370     Timer() :
371             id(NOTIMER),
372             pid(0),
373             uid(0),
374             timeout(0),
375             extend(false),
376             freeze(false),
377             status(Invalid),
378             started(0),
379             scheduled(0),
380             extended(false),
381             frozen(false) {
382     }
383 
384     // This constructor creates a timer with the specified id.  This can be used as the argument
385     // to find().
Timer(timer_id_t id)386     Timer(timer_id_t id) :
387             id(id),
388             pid(0),
389             uid(0),
390             timeout(0),
391             extend(false),
392             freeze(false),
393             status(Invalid),
394             started(0),
395             scheduled(0),
396             extended(false),
397             frozen(false) {
398     }
399 
400     // Create a new timer.  This starts the timer.
Timer(int pid,int uid,nsecs_t timeout,bool extend,bool freeze)401     Timer(int pid, int uid, nsecs_t timeout, bool extend, bool freeze) :
402             id(nextId()),
403             pid(pid),
404             uid(uid),
405             timeout(timeout),
406             extend(extend),
407             freeze(pid != 0 && freeze),
408             status(Running),
409             started(now()),
410             scheduled(started + timeout),
411             extended(false),
412             frozen(false) {
413         if (extend && pid != 0) {
414             initial.fill(pid);
415         }
416         // A zero-pid is odd but it means the upper layers will never ANR the process.  Freezing
417         // is always disabled.  (It won't work anyway, but disabling it avoids error messages.)
418         ALOGI_IF(DEBUG_ERROR && pid == 0, "error: zero-pid %s", toString().c_str());
419     }
420 
421     // Start a timer.  This interface exists to generate log messages, if enabled.
start()422     void start() {
423         event("start", /* verbose= */ true);
424     }
425 
426     // Cancel a timer.
cancel()427     void cancel() {
428         ALOGW_IF(DEBUG_ERROR && status != Running, "error: canceling %s", toString().c_str());
429         status = Canceled;
430         event("cancel");
431     }
432 
433     // Expire a timer. Return true if the timer is expired and false otherwise.  The function
434     // returns false if the timer is eligible for extension.  If the function returns false, the
435     // scheduled time is updated.
expire()436     bool expire() {
437         nsecs_t extension = 0;
438         if (extend && !extended) {
439             // Only one extension is permitted.
440             extended = true;
441             ProcessStats current;
442             current.fill(pid);
443             extension = current.cpu_delay - initial.cpu_delay;
444             if (extension < 0) extension = 0;
445             if (extension > timeout) extension = timeout;
446         }
447         if (extension == 0) {
448             status = Expired;
449             maybeFreezeProcess();
450             event("expire");
451         } else {
452             scheduled += extension;
453             event("extend");
454         }
455         return status == Expired;
456     }
457 
458     // Accept a timeout.  This does nothing other than log the state machine change.
accept()459     void accept() {
460         event("accept");
461     }
462 
463     // Discard a timeout.
discard()464     void discard() {
465         maybeUnfreezeProcess();
466         status = Canceled;
467         event("discard");
468     }
469 
470     // Release the timer.
release()471     void release() {
472         // If timer represents a frozen process, unfreeze it at this time.
473         maybeUnfreezeProcess();
474         event("release");
475     }
476 
477     // Return true if this timer corresponds to a running process.
alive() const478     bool alive() const {
479         return processExists(pid);
480     }
481 
482     // Timers are sorted by id, which is unique.  This provides fast lookups.
operator <(Timer const & r) const483     bool operator<(Timer const &r) const {
484         return id < r.id;
485     }
486 
operator ==(timer_id_t r) const487     bool operator==(timer_id_t r) const {
488         return id == r;
489     }
490 
toString() const491     std::string toString() const {
492         return StringPrintf("id=%d pid=%d uid=%d status=%s",
493                             id, pid, uid, statusString(status));
494     }
495 
toString(nsecs_t now) const496     std::string toString(nsecs_t now) const {
497         uint32_t ms = nanoseconds_to_milliseconds(now - scheduled);
498         return StringPrintf("id=%d pid=%d uid=%d status=%s scheduled=%ums",
499                             id, pid, uid, statusString(status), -ms);
500     }
501 
maxId()502     static int maxId() {
503         return idGen;
504     }
505 
506   private:
507     /**
508      * Collect the name of the process.
509      */
getName() const510     std::string getName() const {
511         return getProcessName(pid);
512     }
513 
514     /**
515      * Freeze the process identified here.  Failures are not logged, as they are primarily due
516      * to a process having died (therefore failed to respond).
517      */
maybeFreezeProcess()518     void maybeFreezeProcess() {
519         if (!freeze || !alive()) return;
520 
521         // Construct a unique event ID.  The id*2 spans from the beginning of the freeze to the
522         // end of the freeze.  The id*2+1 spans the period inside the freeze/unfreeze
523         // operations.
524         const uint32_t cookie = id << 1;
525 
526         char tag[PATH_MAX];
527         snprintf(tag, sizeof(tag), "freeze(pid=%d,uid=%d)", pid, uid);
528         ATRACE_ASYNC_FOR_TRACK_BEGIN(ANR_TIMER_TRACK, tag, cookie);
529         if (SetProcessProfiles(uid, pid, {"Frozen"})) {
530             ALOGI("freeze %s name=%s", toString().c_str(), getName().c_str());
531             frozen = true;
532             ATRACE_ASYNC_FOR_TRACK_BEGIN(ANR_TIMER_TRACK, "frozen", cookie+1);
533         } else {
534             ALOGE("error: freezing %s name=%s error=%s",
535                   toString().c_str(), getName().c_str(), strerror(errno));
536             ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie);
537         }
538     }
539 
maybeUnfreezeProcess()540     void maybeUnfreezeProcess() {
541         if (!freeze || !frozen) return;
542 
543         // See maybeFreezeProcess for an explanation of the cookie.
544         const uint32_t cookie = id << 1;
545 
546         ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie+1);
547         if (SetProcessProfiles(uid, pid, {"Unfrozen"})) {
548             ALOGI("unfreeze %s name=%s", toString().c_str(), getName().c_str());
549             frozen = false;
550         } else {
551             ALOGE("error: unfreezing %s name=%s error=%s",
552                   toString().c_str(), getName().c_str(), strerror(errno));
553         }
554         ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie);
555     }
556 
557     // Get the next free ID.  NOTIMER is never returned.
nextId()558     static timer_id_t nextId() {
559         timer_id_t id = idGen.fetch_add(1);
560         while (id == NOTIMER) {
561             id = idGen.fetch_add(1);
562         }
563         return id;
564     }
565 
566     // Log an event, non-verbose.
event(char const * tag)567     void event(char const* tag) {
568         event(tag, false);
569     }
570 
571     // Log an event, guarded by the debug flag.
event(char const * tag,bool verbose)572     void event(char const* tag, bool verbose) {
573         if (verbose) {
574             char name[PATH_MAX];
575             ALOGI_IF(DEBUG_TIMER, "event %s %s name=%s",
576                      tag, toString().c_str(), getName().c_str());
577         } else {
578             ALOGI_IF(DEBUG_TIMER, "event %s id=%u", tag, id);
579         }
580     }
581 
582     // IDs start at 1.  A zero ID is invalid.
583     static std::atomic<timer_id_t> idGen;
584 };
585 
586 // IDs start at 1.
587 std::atomic<AnrTimerService::timer_id_t> AnrTimerService::Timer::idGen(1);
588 
589 /**
590  * Manage a set of timers and notify clients when there is a timeout.
591  */
592 class AnrTimerService::Ticker {
593   private:
594     struct Entry {
595         const nsecs_t scheduled;
596         const timer_id_t id;
597         AnrTimerService* const service;
598 
Entryandroid::__anoncbc99b2b0111::AnrTimerService::Ticker::Entry599         Entry(nsecs_t scheduled, timer_id_t id, AnrTimerService* service) :
600                 scheduled(scheduled), id(id), service(service) {};
601 
operator <android::__anoncbc99b2b0111::AnrTimerService::Ticker::Entry602         bool operator<(const Entry &r) const {
603             return scheduled == r.scheduled ? id < r.id : scheduled < r.scheduled;
604         }
605     };
606 
607   public:
608 
609     // Construct the ticker.  This creates the timerfd file descriptor and starts the monitor
610     // thread.  The monitor thread is given a unique name.
Ticker()611     Ticker() :
612             id_(idGen_.fetch_add(1))
613     {
614         timerFd_ = timer_create();
615         if (timerFd_ < 0) {
616             ALOGE("failed to create timerFd: %s", strerror(errno));
617             return;
618         }
619 
620         if (pthread_create(&watcher_, 0, run, this) != 0) {
621             ALOGE("failed to start thread: %s", strerror(errno));
622             watcher_ = 0;
623             ::close(timerFd_);
624             return;
625         }
626 
627         // 16 is a magic number from the kernel.  Thread names may not be longer than this many
628         // bytes, including the terminating null.  The snprintf() method will truncate properly.
629         char name[16];
630         snprintf(name, sizeof(name), "AnrTimerService");
631         pthread_setname_np(watcher_, name);
632 
633         ready_ = true;
634     }
635 
~Ticker()636     ~Ticker() {
637         // Closing the file descriptor will close the monitor process, if any.
638         if (timerFd_ >= 0) ::close(timerFd_);
639         timerFd_ = -1;
640         watcher_ = 0;
641     }
642 
643     // Insert a timer.  Unless canceled, the timer will expire at the scheduled time.  If it
644     // expires, the service will be notified with the id.
insert(nsecs_t scheduled,timer_id_t id,AnrTimerService * service)645     void insert(nsecs_t scheduled, timer_id_t id, AnrTimerService *service) {
646         Entry e(scheduled, id, service);
647         AutoMutex _l(lock_);
648         timer_id_t front = headTimerId();
649         running_.insert(e);
650         if (front != headTimerId()) restartLocked();
651         maxRunning_ = std::max(maxRunning_, running_.size());
652     }
653 
654     // Remove a timer.  The timer is identified by its scheduled timeout and id.  Technically,
655     // the id is sufficient (because timer IDs are unique) but using the timeout is more
656     // efficient.
remove(nsecs_t scheduled,timer_id_t id)657     void remove(nsecs_t scheduled, timer_id_t id) {
658         Entry key(scheduled, id, 0);
659         AutoMutex _l(lock_);
660         timer_id_t front = headTimerId();
661         auto found = running_.find(key);
662         if (found != running_.end()) running_.erase(found);
663         if (running_.empty()) drained_++;
664     }
665 
666     // Remove every timer associated with the service.
remove(AnrTimerService const * service)667     void remove(AnrTimerService const* service) {
668         AutoMutex _l(lock_);
669         timer_id_t front = headTimerId();
670         for (auto i = running_.begin(); i != running_.end(); ) {
671             if (i->service == service) {
672                 i = running_.erase(i);
673             } else {
674                 i++;
675             }
676         }
677     }
678 
679     // The unique ID of this particular ticker. Used for debug and logging.
id() const680     size_t id() const {
681         return id_;
682     }
683 
684     // Return the number of timers still running.
running() const685     size_t running() const {
686         AutoMutex _l(lock_);
687         return running_.size();
688     }
689 
690     // Return the high-water mark of timers running.
maxRunning() const691     size_t maxRunning() const {
692         AutoMutex _l(lock_);
693         return maxRunning_;
694     }
695 
696   private:
697 
698     // Return the head of the running list.  The lock must be held by the caller.
headTimerId()699     timer_id_t headTimerId() {
700         return running_.empty() ? NOTIMER : running_.cbegin()->id;
701     }
702 
703     // A simple wrapper that meets the requirements of pthread_create.
run(void * arg)704     static void* run(void* arg) {
705         reinterpret_cast<Ticker*>(arg)->monitor();
706         ALOGI_IF(DEBUG_TICKER, "monitor exited");
707         return 0;
708     }
709 
710     // Loop (almost) forever.  Whenever the timerfd expires, expire as many entries as
711     // possible.  The loop terminates when the read fails; this generally indicates that the
712     // file descriptor has been closed and the thread can exit.
monitor()713     void monitor() {
714         uint64_t token = 0;
715         while (read(timerFd_, &token, sizeof(token)) == sizeof(token)) {
716             // Move expired timers into the local ready list.  This is done inside
717             // the lock.  Then, outside the lock, expire them.
718             nsecs_t current = now();
719             std::vector<Entry> ready;
720             {
721                 AutoMutex _l(lock_);
722                 while (!running_.empty()) {
723                     Entry timer = *(running_.begin());
724                     if (timer.scheduled <= current) {
725                         ready.push_back(timer);
726                         running_.erase(running_.cbegin());
727                     } else {
728                         break;
729                     }
730                 }
731                 restartLocked();
732             }
733             // Call the notifiers outside the lock.  Calling the notifiers with the lock held
734             // can lead to deadlock, if the Java-side handler also takes a lock.  Note that the
735             // timerfd is already running.
736             for (auto i = ready.begin(); i != ready.end(); i++) {
737                 Entry e = *i;
738                 e.service->expire(e.id);
739             }
740         }
741     }
742 
743     // Restart the ticker.  The caller must be holding the lock.  This method updates the
744     // timerFd_ to expire at the time of the first Entry in the running list.  This method does
745     // not check to see if the currently programmed expiration time is different from the
746     // scheduled expiration time of the first entry.
restartLocked()747     void restartLocked() {
748         if (!running_.empty()) {
749             Entry const x = *(running_.cbegin());
750             nsecs_t delay = x.scheduled - now();
751             // Force a minimum timeout of 10ns.
752             if (delay < 10) delay = 10;
753             time_t sec = nanoseconds_to_seconds(delay);
754             time_t ns = delay - seconds_to_nanoseconds(sec);
755             struct itimerspec setting = {
756                 .it_interval = { 0, 0 },
757                 .it_value = { sec, ns },
758             };
759             timer_settime(timerFd_, 0, &setting, nullptr);
760             restarted_++;
761             ALOGI_IF(DEBUG_TICKER, "restarted timerfd for %ld.%09ld", sec, ns);
762         } else {
763             const struct itimerspec setting = {
764                 .it_interval = { 0, 0 },
765                 .it_value = { 0, 0 },
766             };
767             timer_settime(timerFd_, 0, &setting, nullptr);
768             drained_++;
769             ALOGI_IF(DEBUG_TICKER, "drained timer list");
770         }
771     }
772 
773     // The usual lock.
774     mutable Mutex lock_;
775 
776     // True if the object was initialized properly.  Android does not support throwing C++
777     // exceptions, so clients should check this flag after constructing the object.  This is
778     // effectively const after the instance has been created.
779     bool ready_ = false;
780 
781     // The file descriptor of the timer.
782     int timerFd_ = -1;
783 
784     // The thread that monitors the timer.
785     pthread_t watcher_ = 0;
786 
787     // The number of times the timer was restarted.
788     size_t restarted_ = 0;
789 
790     // The number of times the timer list was exhausted.
791     size_t drained_ = 0;
792 
793     // The highwater mark of timers that are running.
794     size_t maxRunning_ = 0;
795 
796     // The list of timers that are scheduled.  This set is sorted by timeout and then by timer
797     // ID.  A set is sufficient (as opposed to a multiset) because timer IDs are unique.
798     std::set<Entry> running_;
799 
800     // A unique ID assigned to this instance.
801     const size_t id_;
802 
803     // The ID generator.
804     static std::atomic<size_t> idGen_;
805 };
806 
807 std::atomic<size_t> AnrTimerService::Ticker::idGen_;
808 
809 
AnrTimerService(char const * label,notifier_t notifier,void * cookie,jweak jtimer,Ticker * ticker,bool extend,bool freeze)810 AnrTimerService::AnrTimerService(char const* label, notifier_t notifier, void* cookie,
811             jweak jtimer, Ticker* ticker, bool extend, bool freeze) :
812         label_(label),
813         notifier_(notifier),
814         notifierCookie_(cookie),
815         notifierObject_(jtimer),
816         extend_(extend),
817         freeze_(freeze),
818         ticker_(ticker) {
819 
820     // Zero the statistics
821     maxRunning_ = 0;
822     memset(&counters_, 0, sizeof(counters_));
823 
824     ALOGI_IF(DEBUG_TIMER, "initialized %s", label);
825 }
826 
~AnrTimerService()827 AnrTimerService::~AnrTimerService() {
828     AutoMutex _l(lock_);
829     ticker_->remove(this);
830 }
831 
statusString(Status s)832 const char* AnrTimerService::statusString(Status s) {
833     switch (s) {
834         case Invalid: return "invalid";
835         case Running: return "running";
836         case Expired: return "expired";
837         case Canceled: return "canceled";
838     }
839     return "unknown";
840 }
841 
start(int pid,int uid,nsecs_t timeout)842 AnrTimerService::timer_id_t AnrTimerService::start(int pid, int uid, nsecs_t timeout) {
843     AutoMutex _l(lock_);
844     Timer t(pid, uid, timeout, extend_, freeze_);
845     insertLocked(t);
846     t.start();
847     counters_.started++;
848     return t.id;
849 }
850 
cancel(timer_id_t timerId)851 bool AnrTimerService::cancel(timer_id_t timerId) {
852     if (timerId == NOTIMER) return false;
853     AutoMutex _l(lock_);
854     Timer timer = removeLocked(timerId);
855 
856     bool result = timer.status == Running;
857     if (timer.status != Invalid) {
858         timer.cancel();
859     } else {
860         counters_.error++;
861     }
862     counters_.canceled++;
863     return result;
864 }
865 
accept(timer_id_t timerId)866 bool AnrTimerService::accept(timer_id_t timerId) {
867     if (timerId == NOTIMER) return false;
868     AutoMutex _l(lock_);
869     Timer timer = removeLocked(timerId);
870 
871     bool result = false;
872     if (timer.status == Expired) {
873         timer.accept();
874         if (timer.frozen) {
875             addExpiredLocked(timer);
876             result = true;
877         }
878     } else {
879         counters_.error++;
880     }
881     counters_.accepted++;
882     return result;
883 }
884 
discard(timer_id_t timerId)885 bool AnrTimerService::discard(timer_id_t timerId) {
886     if (timerId == NOTIMER) return false;
887     AutoMutex _l(lock_);
888     Timer timer = removeLocked(timerId);
889 
890     bool result = timer.status == Expired;
891     if (timer.status == Expired) {
892         timer.discard();
893     } else {
894         counters_.error++;
895     }
896     counters_.discarded++;
897     return result;
898 }
899 
release(timer_id_t id)900 bool AnrTimerService::release(timer_id_t id) {
901     if (id == NOTIMER) return true;
902 
903     Timer key(id);
904     bool okay = false;
905     AutoMutex _l(lock_);
906     std::set<Timer>::iterator found = expired_.find(key);
907     if (found != expired_.end()) {
908         Timer t = *found;
909         t.release();
910         counters_.released++;
911         expired_.erase(found);
912         okay = true;
913     } else {
914         ALOGI_IF(DEBUG_ERROR, "error: unable to release (%u)", id);
915         counters_.error++;
916     }
917     scrubExpiredLocked();
918     return okay;
919 }
920 
addExpiredLocked(Timer const & timer)921 void AnrTimerService::addExpiredLocked(Timer const& timer) {
922     scrubExpiredLocked();
923     expired_.insert(timer);
924 }
925 
scrubExpiredLocked()926 void AnrTimerService::scrubExpiredLocked() {
927     for (auto i = expired_.begin(); i != expired_.end(); ) {
928         if (!i->alive()) {
929             i = expired_.erase(i);
930         } else {
931             i++;
932         }
933     }
934 }
935 
936 // Hold the lock in order to manage the running list.
937 // the listener.
expire(timer_id_t timerId)938 void AnrTimerService::expire(timer_id_t timerId) {
939     // Save the timer attributes for the notification
940     int pid = 0;
941     int uid = 0;
942     nsecs_t elapsed = 0;
943     bool expired = false;
944     {
945         AutoMutex _l(lock_);
946         Timer t = removeLocked(timerId);
947         expired = t.expire();
948         if (t.status == Invalid) {
949             ALOGW_IF(DEBUG_ERROR, "error: expired invalid timer %u", timerId);
950             return;
951         } else {
952             // The timer is either Running (because it was extended) or expired (and is awaiting an
953             // accept or discard).
954             insertLocked(t);
955         }
956         pid = t.pid;
957         uid = t.uid;
958         elapsed = now() - t.started;
959     }
960 
961     if (expired) {
962         counters_.expired++;
963     } else {
964         counters_.extended++;
965     }
966 
967     // Deliver the notification outside of the lock.
968     if (expired) {
969         if (!notifier_(timerId, pid, uid, elapsed, notifierCookie_, notifierObject_)) {
970             AutoMutex _l(lock_);
971             // Notification failed, which means the listener will never call accept() or
972             // discard().  Do not reinsert the timer.
973             discard(timerId);
974         }
975     }
976 }
977 
insertLocked(const Timer & t)978 void AnrTimerService::insertLocked(const Timer& t) {
979     running_.insert(t);
980     if (t.status == Running) {
981         // Only forward running timers to the ticker.  Expired timers are handled separately.
982         ticker_->insert(t.scheduled, t.id, this);
983     }
984     maxRunning_ = std::max(maxRunning_, running_.size());
985 }
986 
removeLocked(timer_id_t timerId)987 AnrTimerService::Timer AnrTimerService::removeLocked(timer_id_t timerId) {
988     Timer key(timerId);
989     auto found = running_.find(key);
990     if (found != running_.end()) {
991         Timer result = *found;
992         running_.erase(found);
993         ticker_->remove(result.scheduled, result.id);
994         if (running_.size() == 0) counters_.drained++;
995         return result;
996     }
997     return Timer();
998 }
999 
getDump() const1000 std::vector<std::string> AnrTimerService::getDump() const {
1001     std::vector<std::string> r;
1002     AutoMutex _l(lock_);
1003     r.push_back(StringPrintf("started:%zu canceled:%zu accepted:%zu discarded:%zu expired:%zu",
1004                              counters_.started,
1005                              counters_.canceled,
1006                              counters_.accepted,
1007                              counters_.discarded,
1008                              counters_.expired));
1009     r.push_back(StringPrintf("extended:%zu drained:%zu error:%zu running:%zu maxRunning:%zu",
1010                              counters_.extended,
1011                              counters_.drained,
1012                              counters_.error,
1013                              running_.size(),
1014                              maxRunning_));
1015     r.push_back(StringPrintf("released:%zu releasing:%zu",
1016                              counters_.released,
1017                              expired_.size()));
1018     r.push_back(StringPrintf("ticker:%zu ticking:%zu maxTicking:%zu",
1019                              ticker_->id(),
1020                              ticker_->running(),
1021                              ticker_->maxRunning()));
1022     return r;
1023 }
1024 
1025 /**
1026  * True if the native methods are supported in this process.  Native methods are supported only
1027  * if the initialization succeeds.
1028  */
1029 bool nativeSupportEnabled = false;
1030 
1031 /**
1032  * Singleton/globals for the anr timer.  Among other things, this includes a Ticker* and a use
1033  * count.  The JNI layer creates a single Ticker for all operational AnrTimers.  The Ticker is
1034  * created when the first AnrTimer is created; this means that the Ticker is only created if
1035  * native anr timers are used.
1036  */
1037 static Mutex gAnrLock;
1038 struct AnrArgs {
1039     jclass clazz = NULL;
1040     jmethodID func = NULL;
1041     JavaVM* vm = NULL;
1042     AnrTimerService::Ticker* ticker = nullptr;
1043 };
1044 static AnrArgs gAnrArgs;
1045 
1046 // The cookie is the address of the AnrArgs object to which the notification should be sent.
anrNotify(AnrTimerService::timer_id_t timerId,int pid,int uid,nsecs_t elapsed,void * cookie,jweak jtimer)1047 static bool anrNotify(AnrTimerService::timer_id_t timerId, int pid, int uid, nsecs_t elapsed,
1048                       void* cookie, jweak jtimer) {
1049     AutoMutex _l(gAnrLock);
1050     AnrArgs* target = reinterpret_cast<AnrArgs* >(cookie);
1051     JNIEnv *env;
1052     if (target->vm->AttachCurrentThread(&env, 0) != JNI_OK) {
1053         ALOGE("failed to attach thread to JavaVM");
1054         return false;
1055     }
1056     jboolean r = false;
1057     jobject timer = env->NewGlobalRef(jtimer);
1058     if (timer != nullptr) {
1059         // Convert the elsapsed time from ns (native) to ms (Java)
1060         r = env->CallBooleanMethod(timer, target->func, timerId, pid, uid, ns2ms(elapsed));
1061         env->DeleteGlobalRef(timer);
1062     }
1063     target->vm->DetachCurrentThread();
1064     return r;
1065 }
1066 
anrTimerSupported(JNIEnv * env,jclass)1067 jboolean anrTimerSupported(JNIEnv* env, jclass) {
1068     return nativeSupportEnabled;
1069 }
1070 
anrTimerCreate(JNIEnv * env,jobject jtimer,jstring jname,jboolean extend,jboolean freeze)1071 jlong anrTimerCreate(JNIEnv* env, jobject jtimer, jstring jname,
1072                      jboolean extend, jboolean freeze) {
1073     if (!nativeSupportEnabled) return 0;
1074     AutoMutex _l(gAnrLock);
1075     if (gAnrArgs.ticker == nullptr) {
1076         gAnrArgs.ticker = new AnrTimerService::Ticker();
1077     }
1078 
1079     ScopedUtfChars name(env, jname);
1080     jobject timer = env->NewWeakGlobalRef(jtimer);
1081     AnrTimerService* service = new AnrTimerService(name.c_str(),
1082             anrNotify, &gAnrArgs, timer, gAnrArgs.ticker, extend, freeze);
1083     return reinterpret_cast<jlong>(service);
1084 }
1085 
toService(jlong pointer)1086 AnrTimerService *toService(jlong pointer) {
1087     return reinterpret_cast<AnrTimerService*>(pointer);
1088 }
1089 
anrTimerClose(JNIEnv * env,jclass,jlong ptr)1090 jint anrTimerClose(JNIEnv* env, jclass, jlong ptr) {
1091     if (!nativeSupportEnabled) return -1;
1092     if (ptr == 0) return -1;
1093     AutoMutex _l(gAnrLock);
1094     AnrTimerService *s = toService(ptr);
1095     env->DeleteWeakGlobalRef(s->jtimer());
1096     delete s;
1097     return 0;
1098 }
1099 
anrTimerStart(JNIEnv * env,jclass,jlong ptr,jint pid,jint uid,jlong timeout)1100 jint anrTimerStart(JNIEnv* env, jclass, jlong ptr, jint pid, jint uid, jlong timeout) {
1101     if (!nativeSupportEnabled) return 0;
1102     // On the Java side, timeouts are expressed in milliseconds and must be converted to
1103     // nanoseconds before being passed to the library code.
1104     return toService(ptr)->start(pid, uid, milliseconds_to_nanoseconds(timeout));
1105 }
1106 
anrTimerCancel(JNIEnv * env,jclass,jlong ptr,jint timerId)1107 jboolean anrTimerCancel(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1108     if (!nativeSupportEnabled) return false;
1109     return toService(ptr)->cancel(timerId);
1110 }
1111 
anrTimerAccept(JNIEnv * env,jclass,jlong ptr,jint timerId)1112 jboolean anrTimerAccept(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1113     if (!nativeSupportEnabled) return false;
1114     return toService(ptr)->accept(timerId);
1115 }
1116 
anrTimerDiscard(JNIEnv * env,jclass,jlong ptr,jint timerId)1117 jboolean anrTimerDiscard(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1118     if (!nativeSupportEnabled) return false;
1119     return toService(ptr)->discard(timerId);
1120 }
1121 
anrTimerRelease(JNIEnv * env,jclass,jlong ptr,jint timerId)1122 jboolean anrTimerRelease(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1123     if (!nativeSupportEnabled) return false;
1124     return toService(ptr)->release(timerId);
1125 }
1126 
anrTimerDump(JNIEnv * env,jclass,jlong ptr)1127 jobjectArray anrTimerDump(JNIEnv *env, jclass, jlong ptr) {
1128     if (!nativeSupportEnabled) return nullptr;
1129     std::vector<std::string> stats = toService(ptr)->getDump();
1130     jclass sclass = env->FindClass("java/lang/String");
1131     jobjectArray r = env->NewObjectArray(stats.size(), sclass, nullptr);
1132     for (size_t i = 0; i < stats.size(); i++) {
1133         env->SetObjectArrayElement(r, i, env->NewStringUTF(stats[i].c_str()));
1134     }
1135     return r;
1136 }
1137 
1138 static const JNINativeMethod methods[] = {
1139     {"nativeAnrTimerSupported", "()Z",  (void*) anrTimerSupported},
1140     {"nativeAnrTimerCreate",   "(Ljava/lang/String;ZZ)J", (void*) anrTimerCreate},
1141     {"nativeAnrTimerClose",    "(J)I",     (void*) anrTimerClose},
1142     {"nativeAnrTimerStart",    "(JIIJ)I",  (void*) anrTimerStart},
1143     {"nativeAnrTimerCancel",   "(JI)Z",    (void*) anrTimerCancel},
1144     {"nativeAnrTimerAccept",   "(JI)Z",    (void*) anrTimerAccept},
1145     {"nativeAnrTimerDiscard",  "(JI)Z",    (void*) anrTimerDiscard},
1146     {"nativeAnrTimerRelease",  "(JI)Z",    (void*) anrTimerRelease},
1147     {"nativeAnrTimerDump",     "(J)[Ljava/lang/String;", (void*) anrTimerDump},
1148 };
1149 
1150 } // anonymous namespace
1151 
register_android_server_utils_AnrTimer(JNIEnv * env)1152 int register_android_server_utils_AnrTimer(JNIEnv* env)
1153 {
1154     static const char *className = "com/android/server/utils/AnrTimer";
1155     jniRegisterNativeMethods(env, className, methods, NELEM(methods));
1156 
1157     nativeSupportEnabled = NATIVE_SUPPORT;
1158 
1159     // Do not perform any further initialization if native support is not enabled.
1160     if (!nativeSupportEnabled) return 0;
1161 
1162     jclass service = FindClassOrDie(env, className);
1163     gAnrArgs.clazz = MakeGlobalRefOrDie(env, service);
1164     gAnrArgs.func = env->GetMethodID(gAnrArgs.clazz, "expire", "(IIIJ)Z");
1165     env->GetJavaVM(&gAnrArgs.vm);
1166 
1167     return 0;
1168 }
1169 
1170 } // namespace android
1171