1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <time.h>
18 #include <pthread.h>
19 #include <sys/timerfd.h>
20 #include <inttypes.h>
21 #include <sys/stat.h>
22
23 #include <algorithm>
24 #include <list>
25 #include <memory>
26 #include <set>
27 #include <string>
28 #include <vector>
29
30 #define LOG_TAG "AnrTimerService"
31 #define ATRACE_TAG ATRACE_TAG_ACTIVITY_MANAGER
32 #define ANR_TIMER_TRACK "AnrTimerTrack"
33
34 #include <jni.h>
35 #include <nativehelper/JNIHelp.h>
36 #include "android_runtime/AndroidRuntime.h"
37 #include "core_jni_helpers.h"
38
39 #include <processgroup/processgroup.h>
40 #include <utils/Log.h>
41 #include <utils/Mutex.h>
42 #include <utils/Timers.h>
43 #include <utils/Trace.h>
44
45 #include <android-base/logging.h>
46 #include <android-base/stringprintf.h>
47 #include <android-base/unique_fd.h>
48
49 using ::android::base::StringPrintf;
50
51
52 // Native support is unavailable on WIN32 platforms. This macro preemptively disables it.
53 #ifdef _WIN32
54 #define NATIVE_SUPPORT 0
55 #else
56 #define NATIVE_SUPPORT 1
57 #endif
58
59 namespace android {
60
61 // using namespace android;
62
63 // Almost nothing in this module needs to be in the android namespace.
64 namespace {
65
66 // If not on a Posix system, create stub timerfd methods. These are defined to allow
67 // compilation. They are not functional. Also, they do not leak outside this compilation unit.
68 #ifdef _WIN32
timer_create()69 int timer_create() {
70 return -1;
71 }
timer_settime(int,int,void const *,void *)72 int timer_settime(int, int, void const *, void *) {
73 return -1;
74 }
75 #else
76 int timer_create() {
77 return timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
78 }
79 int timer_settime(int fd, int flags, const struct itimerspec *new_value,
80 struct itimerspec *_Nullable old_value) {
81 return timerfd_settime(fd, flags, new_value, old_value);
82 }
83 #endif
84
85 // A local debug flag that gates a set of log messages for debug only. This is normally const
86 // false so the debug statements are not included in the image. The flag can be set true in a
87 // unit test image to debug test failures.
88 const bool DEBUG_TIMER = false;
89
90 // A local debug flag to debug the timer thread itself.
91 const bool DEBUG_TICKER = false;
92
93 // Enable error logging.
94 const bool DEBUG_ERROR = true;
95
96 // Return the current time in nanoseconds. This time is relative to system boot.
now()97 nsecs_t now() {
98 return systemTime(SYSTEM_TIME_MONOTONIC);
99 }
100
101 // Return true if the process exists and false if we cannot know.
processExists(pid_t pid)102 bool processExists(pid_t pid) {
103 char path[PATH_MAX];
104 snprintf(path, sizeof(path), "/proc/%d", pid);
105 struct stat buff;
106 return stat(path, &buff) == 0;
107 }
108
109 // Return the name of the process whose pid is the input. If the process does not exist, the
110 // name will "notfound".
getProcessName(pid_t pid)111 std::string getProcessName(pid_t pid) {
112 char buffer[PATH_MAX];
113 snprintf(buffer, sizeof(buffer), "/proc/%d/cmdline", pid);
114 int fd = ::open(buffer, O_RDONLY);
115 if (fd >= 0) {
116 size_t pos = 0;
117 ssize_t result;
118 while (pos < sizeof(buffer)-1) {
119 result = ::read(fd, buffer + pos, (sizeof(buffer) - pos) - 1);
120 if (result <= 0) {
121 break;
122 }
123 }
124 ::close(fd);
125
126 if (result >= 0) {
127 buffer[pos] = 0;
128 } else {
129 snprintf(buffer, sizeof(buffer), "err: %s", strerror(errno));
130 }
131 } else {
132 snprintf(buffer, sizeof(buffer), "notfound");
133 }
134 return std::string(buffer);
135 }
136
137 /**
138 * This class encapsulates the anr timer service. The service manages a list of individual
139 * timers. A timer is either Running or Expired. Once started, a timer may be canceled or
140 * accepted. Both actions collect statistics about the timer and then delete it. An expired
141 * timer may also be discarded, which deletes the timer without collecting any statistics.
142 *
143 * All public methods in this class are thread-safe.
144 */
145 class AnrTimerService {
146 private:
147 class ProcessStats;
148 class Timer;
149
150 public:
151
152 // The class that actually runs the clock.
153 class Ticker;
154
155 // A timer is identified by a timer_id_t. Timer IDs are unique in the moment.
156 using timer_id_t = uint32_t;
157
158 // A manifest constant. No timer is ever created with this ID.
159 static const timer_id_t NOTIMER = 0;
160
161 // A notifier is called with a timer ID, the timer's tag, and the client's cookie. The pid
162 // and uid that were originally assigned to the timer are passed as well. The elapsed time
163 // is the time since the timer was scheduled.
164 using notifier_t = bool (*)(timer_id_t, int pid, int uid, nsecs_t elapsed,
165 void* cookie, jweak object);
166
167 enum Status {
168 Invalid,
169 Running,
170 Expired,
171 Canceled
172 };
173
174 /**
175 * Create a timer service. The service is initialized with a name used for logging. The
176 * constructor is also given the notifier callback, and two cookies for the callback: the
177 * traditional void* and Java object pointer. The remaining parameters are
178 * configuration options.
179 */
180 AnrTimerService(char const* label, notifier_t notifier, void* cookie, jweak jtimer, Ticker*,
181 bool extend, bool freeze);
182
183 // Delete the service and clean up memory.
184 ~AnrTimerService();
185
186 // Start a timer and return the associated timer ID. It does not matter if the same pid/uid
187 // are already in the running list. Once start() is called, one of cancel(), accept(), or
188 // discard() must be called to clean up the internal data structures.
189 timer_id_t start(int pid, int uid, nsecs_t timeout);
190
191 // Cancel a timer and remove it from all lists. This is called when the event being timed
192 // has occurred. If the timer was Running, the function returns true. The other
193 // possibilities are that the timer was Expired or non-existent; in both cases, the function
194 // returns false.
195 bool cancel(timer_id_t timerId);
196
197 // Accept a timer. This is called when the upper layers accept that a timer has expired.
198 // If the timer was Expired and its process was frozen, the timer is pushed to the expired
199 // list and 'true' is returned. Otherwise the function returns false.
200 bool accept(timer_id_t timerId);
201
202 // Discard a timer without collecting any statistics. This is called when the upper layers
203 // recognize that a timer expired but decide the expiration is not significant. If the
204 // timer was Expired, the function returns true. The other possibilities are tha the timer
205 // was Running or non-existing; in both cases, the function returns false.
206 bool discard(timer_id_t timerId);
207
208 // A timer has expired.
209 void expire(timer_id_t);
210
211 // Release a timer. The timer must be in the expired list.
212 bool release(timer_id_t);
213
214 // Return the Java object associated with this instance.
jtimer() const215 jweak jtimer() const {
216 return notifierObject_;
217 }
218
219 // Return the per-instance statistics.
220 std::vector<std::string> getDump() const;
221
222 private:
223 // The service cannot be copied.
224 AnrTimerService(AnrTimerService const&) = delete;
225
226 // Insert a timer into the running list. The lock must be held by the caller.
227 void insertLocked(const Timer&);
228
229 // Remove a timer from the lists and return it. The lock must be held by the caller.
230 Timer removeLocked(timer_id_t timerId);
231
232 // Add a timer to the expired list.
233 void addExpiredLocked(Timer const&);
234
235 // Scrub the expired list by removing all entries for non-existent processes. The expired
236 // lock must be held by the caller.
237 void scrubExpiredLocked();
238
239 // Return a string representation of a status value.
240 static const char* statusString(Status);
241
242 // The name of this service, for logging.
243 std::string const label_;
244
245 // The callback that is invoked when a timer expires.
246 notifier_t const notifier_;
247
248 // The two cookies passed to the notifier.
249 void* notifierCookie_;
250 jweak notifierObject_;
251
252 // True if extensions can be granted to expired timers.
253 const bool extend_;
254
255 // True if the service should freeze anr'ed processes.
256 const bool freeze_;
257
258 // The global lock
259 mutable Mutex lock_;
260
261 // The list of all timers that are still running. This is sorted by ID for fast lookup.
262 std::set<Timer> running_;
263
264 // The list of all expired timers that are awaiting release.
265 std::set<Timer> expired_;
266
267 // The maximum number of active timers.
268 size_t maxRunning_;
269
270 // Simple counters
271 struct Counters {
272 // The number of timers started, canceled, accepted, discarded, and expired.
273 size_t started;
274 size_t canceled;
275 size_t accepted;
276 size_t discarded;
277 size_t expired;
278 size_t extended;
279 size_t released;
280
281 // The number of times there were zero active timers.
282 size_t drained;
283
284 // The number of times a protocol error was seen.
285 size_t error;
286 };
287
288 Counters counters_;
289
290 // The clock used by this AnrTimerService.
291 Ticker *ticker_;
292 };
293
294 class AnrTimerService::ProcessStats {
295 public:
296 nsecs_t cpu_time;
297 nsecs_t cpu_delay;
298
ProcessStats()299 ProcessStats() :
300 cpu_time(0),
301 cpu_delay(0) {
302 }
303
304 // Collect all statistics for a process. Return true if the fill succeeded and false if it
305 // did not. If there is any problem, the statistics are zeroed.
fill(int pid)306 bool fill(int pid) {
307 cpu_time = 0;
308 cpu_delay = 0;
309
310 char path[PATH_MAX];
311 snprintf(path, sizeof(path), "/proc/%u/schedstat", pid);
312 ::android::base::unique_fd fd(open(path, O_RDONLY | O_CLOEXEC));
313 if (!fd.ok()) {
314 return false;
315 }
316 char buffer[128];
317 ssize_t len = read(fd, buffer, sizeof(buffer));
318 if (len <= 0) {
319 return false;
320 }
321 if (len >= sizeof(buffer)) {
322 ALOGE("proc file too big: %s", path);
323 return false;
324 }
325 buffer[len] = 0;
326 unsigned long t1;
327 unsigned long t2;
328 if (sscanf(buffer, "%lu %lu", &t1, &t2) != 2) {
329 return false;
330 }
331 cpu_time = t1;
332 cpu_delay = t2;
333 return true;
334 }
335 };
336
337 class AnrTimerService::Timer {
338 public:
339 // A unique ID assigned when the Timer is created.
340 timer_id_t const id;
341
342 // The creation parameters. The timeout is the original, relative timeout.
343 int const pid;
344 int const uid;
345 nsecs_t const timeout;
346 bool const extend;
347 bool const freeze;
348
349 // The state of this timer.
350 Status status;
351
352 // The time at which the timer was started.
353 nsecs_t started;
354
355 // The scheduled timeout. This is an absolute time. It may be extended.
356 nsecs_t scheduled;
357
358 // True if this timer has been extended.
359 bool extended;
360
361 // True if the process has been frozen.
362 bool frozen;
363
364 // Bookkeeping for extensions. The initial state of the process. This is collected only if
365 // the timer is extensible.
366 ProcessStats initial;
367
368 // The default constructor is used to create timers that are Invalid, representing the "not
369 // found" condition when a collection is searched.
Timer()370 Timer() :
371 id(NOTIMER),
372 pid(0),
373 uid(0),
374 timeout(0),
375 extend(false),
376 freeze(false),
377 status(Invalid),
378 started(0),
379 scheduled(0),
380 extended(false),
381 frozen(false) {
382 }
383
384 // This constructor creates a timer with the specified id. This can be used as the argument
385 // to find().
Timer(timer_id_t id)386 Timer(timer_id_t id) :
387 id(id),
388 pid(0),
389 uid(0),
390 timeout(0),
391 extend(false),
392 freeze(false),
393 status(Invalid),
394 started(0),
395 scheduled(0),
396 extended(false),
397 frozen(false) {
398 }
399
400 // Create a new timer. This starts the timer.
Timer(int pid,int uid,nsecs_t timeout,bool extend,bool freeze)401 Timer(int pid, int uid, nsecs_t timeout, bool extend, bool freeze) :
402 id(nextId()),
403 pid(pid),
404 uid(uid),
405 timeout(timeout),
406 extend(extend),
407 freeze(pid != 0 && freeze),
408 status(Running),
409 started(now()),
410 scheduled(started + timeout),
411 extended(false),
412 frozen(false) {
413 if (extend && pid != 0) {
414 initial.fill(pid);
415 }
416 // A zero-pid is odd but it means the upper layers will never ANR the process. Freezing
417 // is always disabled. (It won't work anyway, but disabling it avoids error messages.)
418 ALOGI_IF(DEBUG_ERROR && pid == 0, "error: zero-pid %s", toString().c_str());
419 }
420
421 // Start a timer. This interface exists to generate log messages, if enabled.
start()422 void start() {
423 event("start", /* verbose= */ true);
424 }
425
426 // Cancel a timer.
cancel()427 void cancel() {
428 ALOGW_IF(DEBUG_ERROR && status != Running, "error: canceling %s", toString().c_str());
429 status = Canceled;
430 event("cancel");
431 }
432
433 // Expire a timer. Return true if the timer is expired and false otherwise. The function
434 // returns false if the timer is eligible for extension. If the function returns false, the
435 // scheduled time is updated.
expire()436 bool expire() {
437 nsecs_t extension = 0;
438 if (extend && !extended) {
439 // Only one extension is permitted.
440 extended = true;
441 ProcessStats current;
442 current.fill(pid);
443 extension = current.cpu_delay - initial.cpu_delay;
444 if (extension < 0) extension = 0;
445 if (extension > timeout) extension = timeout;
446 }
447 if (extension == 0) {
448 status = Expired;
449 maybeFreezeProcess();
450 event("expire");
451 } else {
452 scheduled += extension;
453 event("extend");
454 }
455 return status == Expired;
456 }
457
458 // Accept a timeout. This does nothing other than log the state machine change.
accept()459 void accept() {
460 event("accept");
461 }
462
463 // Discard a timeout.
discard()464 void discard() {
465 maybeUnfreezeProcess();
466 status = Canceled;
467 event("discard");
468 }
469
470 // Release the timer.
release()471 void release() {
472 // If timer represents a frozen process, unfreeze it at this time.
473 maybeUnfreezeProcess();
474 event("release");
475 }
476
477 // Return true if this timer corresponds to a running process.
alive() const478 bool alive() const {
479 return processExists(pid);
480 }
481
482 // Timers are sorted by id, which is unique. This provides fast lookups.
operator <(Timer const & r) const483 bool operator<(Timer const &r) const {
484 return id < r.id;
485 }
486
operator ==(timer_id_t r) const487 bool operator==(timer_id_t r) const {
488 return id == r;
489 }
490
toString() const491 std::string toString() const {
492 return StringPrintf("id=%d pid=%d uid=%d status=%s",
493 id, pid, uid, statusString(status));
494 }
495
toString(nsecs_t now) const496 std::string toString(nsecs_t now) const {
497 uint32_t ms = nanoseconds_to_milliseconds(now - scheduled);
498 return StringPrintf("id=%d pid=%d uid=%d status=%s scheduled=%ums",
499 id, pid, uid, statusString(status), -ms);
500 }
501
maxId()502 static int maxId() {
503 return idGen;
504 }
505
506 private:
507 /**
508 * Collect the name of the process.
509 */
getName() const510 std::string getName() const {
511 return getProcessName(pid);
512 }
513
514 /**
515 * Freeze the process identified here. Failures are not logged, as they are primarily due
516 * to a process having died (therefore failed to respond).
517 */
maybeFreezeProcess()518 void maybeFreezeProcess() {
519 if (!freeze || !alive()) return;
520
521 // Construct a unique event ID. The id*2 spans from the beginning of the freeze to the
522 // end of the freeze. The id*2+1 spans the period inside the freeze/unfreeze
523 // operations.
524 const uint32_t cookie = id << 1;
525
526 char tag[PATH_MAX];
527 snprintf(tag, sizeof(tag), "freeze(pid=%d,uid=%d)", pid, uid);
528 ATRACE_ASYNC_FOR_TRACK_BEGIN(ANR_TIMER_TRACK, tag, cookie);
529 if (SetProcessProfiles(uid, pid, {"Frozen"})) {
530 ALOGI("freeze %s name=%s", toString().c_str(), getName().c_str());
531 frozen = true;
532 ATRACE_ASYNC_FOR_TRACK_BEGIN(ANR_TIMER_TRACK, "frozen", cookie+1);
533 } else {
534 ALOGE("error: freezing %s name=%s error=%s",
535 toString().c_str(), getName().c_str(), strerror(errno));
536 ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie);
537 }
538 }
539
maybeUnfreezeProcess()540 void maybeUnfreezeProcess() {
541 if (!freeze || !frozen) return;
542
543 // See maybeFreezeProcess for an explanation of the cookie.
544 const uint32_t cookie = id << 1;
545
546 ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie+1);
547 if (SetProcessProfiles(uid, pid, {"Unfrozen"})) {
548 ALOGI("unfreeze %s name=%s", toString().c_str(), getName().c_str());
549 frozen = false;
550 } else {
551 ALOGE("error: unfreezing %s name=%s error=%s",
552 toString().c_str(), getName().c_str(), strerror(errno));
553 }
554 ATRACE_ASYNC_FOR_TRACK_END(ANR_TIMER_TRACK, cookie);
555 }
556
557 // Get the next free ID. NOTIMER is never returned.
nextId()558 static timer_id_t nextId() {
559 timer_id_t id = idGen.fetch_add(1);
560 while (id == NOTIMER) {
561 id = idGen.fetch_add(1);
562 }
563 return id;
564 }
565
566 // Log an event, non-verbose.
event(char const * tag)567 void event(char const* tag) {
568 event(tag, false);
569 }
570
571 // Log an event, guarded by the debug flag.
event(char const * tag,bool verbose)572 void event(char const* tag, bool verbose) {
573 if (verbose) {
574 char name[PATH_MAX];
575 ALOGI_IF(DEBUG_TIMER, "event %s %s name=%s",
576 tag, toString().c_str(), getName().c_str());
577 } else {
578 ALOGI_IF(DEBUG_TIMER, "event %s id=%u", tag, id);
579 }
580 }
581
582 // IDs start at 1. A zero ID is invalid.
583 static std::atomic<timer_id_t> idGen;
584 };
585
586 // IDs start at 1.
587 std::atomic<AnrTimerService::timer_id_t> AnrTimerService::Timer::idGen(1);
588
589 /**
590 * Manage a set of timers and notify clients when there is a timeout.
591 */
592 class AnrTimerService::Ticker {
593 private:
594 struct Entry {
595 const nsecs_t scheduled;
596 const timer_id_t id;
597 AnrTimerService* const service;
598
Entryandroid::__anoncbc99b2b0111::AnrTimerService::Ticker::Entry599 Entry(nsecs_t scheduled, timer_id_t id, AnrTimerService* service) :
600 scheduled(scheduled), id(id), service(service) {};
601
operator <android::__anoncbc99b2b0111::AnrTimerService::Ticker::Entry602 bool operator<(const Entry &r) const {
603 return scheduled == r.scheduled ? id < r.id : scheduled < r.scheduled;
604 }
605 };
606
607 public:
608
609 // Construct the ticker. This creates the timerfd file descriptor and starts the monitor
610 // thread. The monitor thread is given a unique name.
Ticker()611 Ticker() :
612 id_(idGen_.fetch_add(1))
613 {
614 timerFd_ = timer_create();
615 if (timerFd_ < 0) {
616 ALOGE("failed to create timerFd: %s", strerror(errno));
617 return;
618 }
619
620 if (pthread_create(&watcher_, 0, run, this) != 0) {
621 ALOGE("failed to start thread: %s", strerror(errno));
622 watcher_ = 0;
623 ::close(timerFd_);
624 return;
625 }
626
627 // 16 is a magic number from the kernel. Thread names may not be longer than this many
628 // bytes, including the terminating null. The snprintf() method will truncate properly.
629 char name[16];
630 snprintf(name, sizeof(name), "AnrTimerService");
631 pthread_setname_np(watcher_, name);
632
633 ready_ = true;
634 }
635
~Ticker()636 ~Ticker() {
637 // Closing the file descriptor will close the monitor process, if any.
638 if (timerFd_ >= 0) ::close(timerFd_);
639 timerFd_ = -1;
640 watcher_ = 0;
641 }
642
643 // Insert a timer. Unless canceled, the timer will expire at the scheduled time. If it
644 // expires, the service will be notified with the id.
insert(nsecs_t scheduled,timer_id_t id,AnrTimerService * service)645 void insert(nsecs_t scheduled, timer_id_t id, AnrTimerService *service) {
646 Entry e(scheduled, id, service);
647 AutoMutex _l(lock_);
648 timer_id_t front = headTimerId();
649 running_.insert(e);
650 if (front != headTimerId()) restartLocked();
651 maxRunning_ = std::max(maxRunning_, running_.size());
652 }
653
654 // Remove a timer. The timer is identified by its scheduled timeout and id. Technically,
655 // the id is sufficient (because timer IDs are unique) but using the timeout is more
656 // efficient.
remove(nsecs_t scheduled,timer_id_t id)657 void remove(nsecs_t scheduled, timer_id_t id) {
658 Entry key(scheduled, id, 0);
659 AutoMutex _l(lock_);
660 timer_id_t front = headTimerId();
661 auto found = running_.find(key);
662 if (found != running_.end()) running_.erase(found);
663 if (running_.empty()) drained_++;
664 }
665
666 // Remove every timer associated with the service.
remove(AnrTimerService const * service)667 void remove(AnrTimerService const* service) {
668 AutoMutex _l(lock_);
669 timer_id_t front = headTimerId();
670 for (auto i = running_.begin(); i != running_.end(); ) {
671 if (i->service == service) {
672 i = running_.erase(i);
673 } else {
674 i++;
675 }
676 }
677 }
678
679 // The unique ID of this particular ticker. Used for debug and logging.
id() const680 size_t id() const {
681 return id_;
682 }
683
684 // Return the number of timers still running.
running() const685 size_t running() const {
686 AutoMutex _l(lock_);
687 return running_.size();
688 }
689
690 // Return the high-water mark of timers running.
maxRunning() const691 size_t maxRunning() const {
692 AutoMutex _l(lock_);
693 return maxRunning_;
694 }
695
696 private:
697
698 // Return the head of the running list. The lock must be held by the caller.
headTimerId()699 timer_id_t headTimerId() {
700 return running_.empty() ? NOTIMER : running_.cbegin()->id;
701 }
702
703 // A simple wrapper that meets the requirements of pthread_create.
run(void * arg)704 static void* run(void* arg) {
705 reinterpret_cast<Ticker*>(arg)->monitor();
706 ALOGI_IF(DEBUG_TICKER, "monitor exited");
707 return 0;
708 }
709
710 // Loop (almost) forever. Whenever the timerfd expires, expire as many entries as
711 // possible. The loop terminates when the read fails; this generally indicates that the
712 // file descriptor has been closed and the thread can exit.
monitor()713 void monitor() {
714 uint64_t token = 0;
715 while (read(timerFd_, &token, sizeof(token)) == sizeof(token)) {
716 // Move expired timers into the local ready list. This is done inside
717 // the lock. Then, outside the lock, expire them.
718 nsecs_t current = now();
719 std::vector<Entry> ready;
720 {
721 AutoMutex _l(lock_);
722 while (!running_.empty()) {
723 Entry timer = *(running_.begin());
724 if (timer.scheduled <= current) {
725 ready.push_back(timer);
726 running_.erase(running_.cbegin());
727 } else {
728 break;
729 }
730 }
731 restartLocked();
732 }
733 // Call the notifiers outside the lock. Calling the notifiers with the lock held
734 // can lead to deadlock, if the Java-side handler also takes a lock. Note that the
735 // timerfd is already running.
736 for (auto i = ready.begin(); i != ready.end(); i++) {
737 Entry e = *i;
738 e.service->expire(e.id);
739 }
740 }
741 }
742
743 // Restart the ticker. The caller must be holding the lock. This method updates the
744 // timerFd_ to expire at the time of the first Entry in the running list. This method does
745 // not check to see if the currently programmed expiration time is different from the
746 // scheduled expiration time of the first entry.
restartLocked()747 void restartLocked() {
748 if (!running_.empty()) {
749 Entry const x = *(running_.cbegin());
750 nsecs_t delay = x.scheduled - now();
751 // Force a minimum timeout of 10ns.
752 if (delay < 10) delay = 10;
753 time_t sec = nanoseconds_to_seconds(delay);
754 time_t ns = delay - seconds_to_nanoseconds(sec);
755 struct itimerspec setting = {
756 .it_interval = { 0, 0 },
757 .it_value = { sec, ns },
758 };
759 timer_settime(timerFd_, 0, &setting, nullptr);
760 restarted_++;
761 ALOGI_IF(DEBUG_TICKER, "restarted timerfd for %ld.%09ld", sec, ns);
762 } else {
763 const struct itimerspec setting = {
764 .it_interval = { 0, 0 },
765 .it_value = { 0, 0 },
766 };
767 timer_settime(timerFd_, 0, &setting, nullptr);
768 drained_++;
769 ALOGI_IF(DEBUG_TICKER, "drained timer list");
770 }
771 }
772
773 // The usual lock.
774 mutable Mutex lock_;
775
776 // True if the object was initialized properly. Android does not support throwing C++
777 // exceptions, so clients should check this flag after constructing the object. This is
778 // effectively const after the instance has been created.
779 bool ready_ = false;
780
781 // The file descriptor of the timer.
782 int timerFd_ = -1;
783
784 // The thread that monitors the timer.
785 pthread_t watcher_ = 0;
786
787 // The number of times the timer was restarted.
788 size_t restarted_ = 0;
789
790 // The number of times the timer list was exhausted.
791 size_t drained_ = 0;
792
793 // The highwater mark of timers that are running.
794 size_t maxRunning_ = 0;
795
796 // The list of timers that are scheduled. This set is sorted by timeout and then by timer
797 // ID. A set is sufficient (as opposed to a multiset) because timer IDs are unique.
798 std::set<Entry> running_;
799
800 // A unique ID assigned to this instance.
801 const size_t id_;
802
803 // The ID generator.
804 static std::atomic<size_t> idGen_;
805 };
806
807 std::atomic<size_t> AnrTimerService::Ticker::idGen_;
808
809
AnrTimerService(char const * label,notifier_t notifier,void * cookie,jweak jtimer,Ticker * ticker,bool extend,bool freeze)810 AnrTimerService::AnrTimerService(char const* label, notifier_t notifier, void* cookie,
811 jweak jtimer, Ticker* ticker, bool extend, bool freeze) :
812 label_(label),
813 notifier_(notifier),
814 notifierCookie_(cookie),
815 notifierObject_(jtimer),
816 extend_(extend),
817 freeze_(freeze),
818 ticker_(ticker) {
819
820 // Zero the statistics
821 maxRunning_ = 0;
822 memset(&counters_, 0, sizeof(counters_));
823
824 ALOGI_IF(DEBUG_TIMER, "initialized %s", label);
825 }
826
~AnrTimerService()827 AnrTimerService::~AnrTimerService() {
828 AutoMutex _l(lock_);
829 ticker_->remove(this);
830 }
831
statusString(Status s)832 const char* AnrTimerService::statusString(Status s) {
833 switch (s) {
834 case Invalid: return "invalid";
835 case Running: return "running";
836 case Expired: return "expired";
837 case Canceled: return "canceled";
838 }
839 return "unknown";
840 }
841
start(int pid,int uid,nsecs_t timeout)842 AnrTimerService::timer_id_t AnrTimerService::start(int pid, int uid, nsecs_t timeout) {
843 AutoMutex _l(lock_);
844 Timer t(pid, uid, timeout, extend_, freeze_);
845 insertLocked(t);
846 t.start();
847 counters_.started++;
848 return t.id;
849 }
850
cancel(timer_id_t timerId)851 bool AnrTimerService::cancel(timer_id_t timerId) {
852 if (timerId == NOTIMER) return false;
853 AutoMutex _l(lock_);
854 Timer timer = removeLocked(timerId);
855
856 bool result = timer.status == Running;
857 if (timer.status != Invalid) {
858 timer.cancel();
859 } else {
860 counters_.error++;
861 }
862 counters_.canceled++;
863 return result;
864 }
865
accept(timer_id_t timerId)866 bool AnrTimerService::accept(timer_id_t timerId) {
867 if (timerId == NOTIMER) return false;
868 AutoMutex _l(lock_);
869 Timer timer = removeLocked(timerId);
870
871 bool result = false;
872 if (timer.status == Expired) {
873 timer.accept();
874 if (timer.frozen) {
875 addExpiredLocked(timer);
876 result = true;
877 }
878 } else {
879 counters_.error++;
880 }
881 counters_.accepted++;
882 return result;
883 }
884
discard(timer_id_t timerId)885 bool AnrTimerService::discard(timer_id_t timerId) {
886 if (timerId == NOTIMER) return false;
887 AutoMutex _l(lock_);
888 Timer timer = removeLocked(timerId);
889
890 bool result = timer.status == Expired;
891 if (timer.status == Expired) {
892 timer.discard();
893 } else {
894 counters_.error++;
895 }
896 counters_.discarded++;
897 return result;
898 }
899
release(timer_id_t id)900 bool AnrTimerService::release(timer_id_t id) {
901 if (id == NOTIMER) return true;
902
903 Timer key(id);
904 bool okay = false;
905 AutoMutex _l(lock_);
906 std::set<Timer>::iterator found = expired_.find(key);
907 if (found != expired_.end()) {
908 Timer t = *found;
909 t.release();
910 counters_.released++;
911 expired_.erase(found);
912 okay = true;
913 } else {
914 ALOGI_IF(DEBUG_ERROR, "error: unable to release (%u)", id);
915 counters_.error++;
916 }
917 scrubExpiredLocked();
918 return okay;
919 }
920
addExpiredLocked(Timer const & timer)921 void AnrTimerService::addExpiredLocked(Timer const& timer) {
922 scrubExpiredLocked();
923 expired_.insert(timer);
924 }
925
scrubExpiredLocked()926 void AnrTimerService::scrubExpiredLocked() {
927 for (auto i = expired_.begin(); i != expired_.end(); ) {
928 if (!i->alive()) {
929 i = expired_.erase(i);
930 } else {
931 i++;
932 }
933 }
934 }
935
936 // Hold the lock in order to manage the running list.
937 // the listener.
expire(timer_id_t timerId)938 void AnrTimerService::expire(timer_id_t timerId) {
939 // Save the timer attributes for the notification
940 int pid = 0;
941 int uid = 0;
942 nsecs_t elapsed = 0;
943 bool expired = false;
944 {
945 AutoMutex _l(lock_);
946 Timer t = removeLocked(timerId);
947 expired = t.expire();
948 if (t.status == Invalid) {
949 ALOGW_IF(DEBUG_ERROR, "error: expired invalid timer %u", timerId);
950 return;
951 } else {
952 // The timer is either Running (because it was extended) or expired (and is awaiting an
953 // accept or discard).
954 insertLocked(t);
955 }
956 pid = t.pid;
957 uid = t.uid;
958 elapsed = now() - t.started;
959 }
960
961 if (expired) {
962 counters_.expired++;
963 } else {
964 counters_.extended++;
965 }
966
967 // Deliver the notification outside of the lock.
968 if (expired) {
969 if (!notifier_(timerId, pid, uid, elapsed, notifierCookie_, notifierObject_)) {
970 AutoMutex _l(lock_);
971 // Notification failed, which means the listener will never call accept() or
972 // discard(). Do not reinsert the timer.
973 discard(timerId);
974 }
975 }
976 }
977
insertLocked(const Timer & t)978 void AnrTimerService::insertLocked(const Timer& t) {
979 running_.insert(t);
980 if (t.status == Running) {
981 // Only forward running timers to the ticker. Expired timers are handled separately.
982 ticker_->insert(t.scheduled, t.id, this);
983 }
984 maxRunning_ = std::max(maxRunning_, running_.size());
985 }
986
removeLocked(timer_id_t timerId)987 AnrTimerService::Timer AnrTimerService::removeLocked(timer_id_t timerId) {
988 Timer key(timerId);
989 auto found = running_.find(key);
990 if (found != running_.end()) {
991 Timer result = *found;
992 running_.erase(found);
993 ticker_->remove(result.scheduled, result.id);
994 if (running_.size() == 0) counters_.drained++;
995 return result;
996 }
997 return Timer();
998 }
999
getDump() const1000 std::vector<std::string> AnrTimerService::getDump() const {
1001 std::vector<std::string> r;
1002 AutoMutex _l(lock_);
1003 r.push_back(StringPrintf("started:%zu canceled:%zu accepted:%zu discarded:%zu expired:%zu",
1004 counters_.started,
1005 counters_.canceled,
1006 counters_.accepted,
1007 counters_.discarded,
1008 counters_.expired));
1009 r.push_back(StringPrintf("extended:%zu drained:%zu error:%zu running:%zu maxRunning:%zu",
1010 counters_.extended,
1011 counters_.drained,
1012 counters_.error,
1013 running_.size(),
1014 maxRunning_));
1015 r.push_back(StringPrintf("released:%zu releasing:%zu",
1016 counters_.released,
1017 expired_.size()));
1018 r.push_back(StringPrintf("ticker:%zu ticking:%zu maxTicking:%zu",
1019 ticker_->id(),
1020 ticker_->running(),
1021 ticker_->maxRunning()));
1022 return r;
1023 }
1024
1025 /**
1026 * True if the native methods are supported in this process. Native methods are supported only
1027 * if the initialization succeeds.
1028 */
1029 bool nativeSupportEnabled = false;
1030
1031 /**
1032 * Singleton/globals for the anr timer. Among other things, this includes a Ticker* and a use
1033 * count. The JNI layer creates a single Ticker for all operational AnrTimers. The Ticker is
1034 * created when the first AnrTimer is created; this means that the Ticker is only created if
1035 * native anr timers are used.
1036 */
1037 static Mutex gAnrLock;
1038 struct AnrArgs {
1039 jclass clazz = NULL;
1040 jmethodID func = NULL;
1041 JavaVM* vm = NULL;
1042 AnrTimerService::Ticker* ticker = nullptr;
1043 };
1044 static AnrArgs gAnrArgs;
1045
1046 // The cookie is the address of the AnrArgs object to which the notification should be sent.
anrNotify(AnrTimerService::timer_id_t timerId,int pid,int uid,nsecs_t elapsed,void * cookie,jweak jtimer)1047 static bool anrNotify(AnrTimerService::timer_id_t timerId, int pid, int uid, nsecs_t elapsed,
1048 void* cookie, jweak jtimer) {
1049 AutoMutex _l(gAnrLock);
1050 AnrArgs* target = reinterpret_cast<AnrArgs* >(cookie);
1051 JNIEnv *env;
1052 if (target->vm->AttachCurrentThread(&env, 0) != JNI_OK) {
1053 ALOGE("failed to attach thread to JavaVM");
1054 return false;
1055 }
1056 jboolean r = false;
1057 jobject timer = env->NewGlobalRef(jtimer);
1058 if (timer != nullptr) {
1059 // Convert the elsapsed time from ns (native) to ms (Java)
1060 r = env->CallBooleanMethod(timer, target->func, timerId, pid, uid, ns2ms(elapsed));
1061 env->DeleteGlobalRef(timer);
1062 }
1063 target->vm->DetachCurrentThread();
1064 return r;
1065 }
1066
anrTimerSupported(JNIEnv * env,jclass)1067 jboolean anrTimerSupported(JNIEnv* env, jclass) {
1068 return nativeSupportEnabled;
1069 }
1070
anrTimerCreate(JNIEnv * env,jobject jtimer,jstring jname,jboolean extend,jboolean freeze)1071 jlong anrTimerCreate(JNIEnv* env, jobject jtimer, jstring jname,
1072 jboolean extend, jboolean freeze) {
1073 if (!nativeSupportEnabled) return 0;
1074 AutoMutex _l(gAnrLock);
1075 if (gAnrArgs.ticker == nullptr) {
1076 gAnrArgs.ticker = new AnrTimerService::Ticker();
1077 }
1078
1079 ScopedUtfChars name(env, jname);
1080 jobject timer = env->NewWeakGlobalRef(jtimer);
1081 AnrTimerService* service = new AnrTimerService(name.c_str(),
1082 anrNotify, &gAnrArgs, timer, gAnrArgs.ticker, extend, freeze);
1083 return reinterpret_cast<jlong>(service);
1084 }
1085
toService(jlong pointer)1086 AnrTimerService *toService(jlong pointer) {
1087 return reinterpret_cast<AnrTimerService*>(pointer);
1088 }
1089
anrTimerClose(JNIEnv * env,jclass,jlong ptr)1090 jint anrTimerClose(JNIEnv* env, jclass, jlong ptr) {
1091 if (!nativeSupportEnabled) return -1;
1092 if (ptr == 0) return -1;
1093 AutoMutex _l(gAnrLock);
1094 AnrTimerService *s = toService(ptr);
1095 env->DeleteWeakGlobalRef(s->jtimer());
1096 delete s;
1097 return 0;
1098 }
1099
anrTimerStart(JNIEnv * env,jclass,jlong ptr,jint pid,jint uid,jlong timeout)1100 jint anrTimerStart(JNIEnv* env, jclass, jlong ptr, jint pid, jint uid, jlong timeout) {
1101 if (!nativeSupportEnabled) return 0;
1102 // On the Java side, timeouts are expressed in milliseconds and must be converted to
1103 // nanoseconds before being passed to the library code.
1104 return toService(ptr)->start(pid, uid, milliseconds_to_nanoseconds(timeout));
1105 }
1106
anrTimerCancel(JNIEnv * env,jclass,jlong ptr,jint timerId)1107 jboolean anrTimerCancel(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1108 if (!nativeSupportEnabled) return false;
1109 return toService(ptr)->cancel(timerId);
1110 }
1111
anrTimerAccept(JNIEnv * env,jclass,jlong ptr,jint timerId)1112 jboolean anrTimerAccept(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1113 if (!nativeSupportEnabled) return false;
1114 return toService(ptr)->accept(timerId);
1115 }
1116
anrTimerDiscard(JNIEnv * env,jclass,jlong ptr,jint timerId)1117 jboolean anrTimerDiscard(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1118 if (!nativeSupportEnabled) return false;
1119 return toService(ptr)->discard(timerId);
1120 }
1121
anrTimerRelease(JNIEnv * env,jclass,jlong ptr,jint timerId)1122 jboolean anrTimerRelease(JNIEnv* env, jclass, jlong ptr, jint timerId) {
1123 if (!nativeSupportEnabled) return false;
1124 return toService(ptr)->release(timerId);
1125 }
1126
anrTimerDump(JNIEnv * env,jclass,jlong ptr)1127 jobjectArray anrTimerDump(JNIEnv *env, jclass, jlong ptr) {
1128 if (!nativeSupportEnabled) return nullptr;
1129 std::vector<std::string> stats = toService(ptr)->getDump();
1130 jclass sclass = env->FindClass("java/lang/String");
1131 jobjectArray r = env->NewObjectArray(stats.size(), sclass, nullptr);
1132 for (size_t i = 0; i < stats.size(); i++) {
1133 env->SetObjectArrayElement(r, i, env->NewStringUTF(stats[i].c_str()));
1134 }
1135 return r;
1136 }
1137
1138 static const JNINativeMethod methods[] = {
1139 {"nativeAnrTimerSupported", "()Z", (void*) anrTimerSupported},
1140 {"nativeAnrTimerCreate", "(Ljava/lang/String;ZZ)J", (void*) anrTimerCreate},
1141 {"nativeAnrTimerClose", "(J)I", (void*) anrTimerClose},
1142 {"nativeAnrTimerStart", "(JIIJ)I", (void*) anrTimerStart},
1143 {"nativeAnrTimerCancel", "(JI)Z", (void*) anrTimerCancel},
1144 {"nativeAnrTimerAccept", "(JI)Z", (void*) anrTimerAccept},
1145 {"nativeAnrTimerDiscard", "(JI)Z", (void*) anrTimerDiscard},
1146 {"nativeAnrTimerRelease", "(JI)Z", (void*) anrTimerRelease},
1147 {"nativeAnrTimerDump", "(J)[Ljava/lang/String;", (void*) anrTimerDump},
1148 };
1149
1150 } // anonymous namespace
1151
register_android_server_utils_AnrTimer(JNIEnv * env)1152 int register_android_server_utils_AnrTimer(JNIEnv* env)
1153 {
1154 static const char *className = "com/android/server/utils/AnrTimer";
1155 jniRegisterNativeMethods(env, className, methods, NELEM(methods));
1156
1157 nativeSupportEnabled = NATIVE_SUPPORT;
1158
1159 // Do not perform any further initialization if native support is not enabled.
1160 if (!nativeSupportEnabled) return 0;
1161
1162 jclass service = FindClassOrDie(env, className);
1163 gAnrArgs.clazz = MakeGlobalRefOrDie(env, service);
1164 gAnrArgs.func = env->GetMethodID(gAnrArgs.clazz, "expire", "(IIIJ)Z");
1165 env->GetJavaVM(&gAnrArgs.vm);
1166
1167 return 0;
1168 }
1169
1170 } // namespace android
1171