1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import android.app.IActivityController;
20 import android.content.BroadcastReceiver;
21 import android.content.Context;
22 import android.content.Intent;
23 import android.content.IntentFilter;
24 import android.hidl.manager.V1_0.IServiceManager;
25 import android.os.Binder;
26 import android.os.Build;
27 import android.os.Debug;
28 import android.os.Handler;
29 import android.os.IPowerManager;
30 import android.os.Looper;
31 import android.os.Process;
32 import android.os.RemoteException;
33 import android.os.ServiceManager;
34 import android.os.SystemClock;
35 import android.system.ErrnoException;
36 import android.system.Os;
37 import android.system.OsConstants;
38 import android.system.StructRlimit;
39 import android.util.EventLog;
40 import android.util.Log;
41 import android.util.Slog;
42 import android.util.StatsLog;
43 
44 import com.android.internal.os.ZygoteConnectionConstants;
45 import com.android.server.am.ActivityManagerService;
46 import com.android.server.wm.SurfaceAnimationThread;
47 
48 import java.io.File;
49 import java.io.FileWriter;
50 import java.io.IOException;
51 import java.nio.charset.StandardCharsets;
52 import java.nio.file.Files;
53 import java.nio.file.Path;
54 import java.nio.file.Paths;
55 import java.util.ArrayList;
56 import java.util.Arrays;
57 import java.util.Collections;
58 import java.util.HashSet;
59 import java.util.List;
60 
61 /** This class calls its monitor every minute. Killing this process if they don't return **/
62 public class Watchdog extends Thread {
63     static final String TAG = "Watchdog";
64 
65     /** Debug flag. */
66     public static final boolean DEBUG = false;
67 
68     // Set this to true to use debug default values.
69     static final boolean DB = false;
70 
71     // Note 1: Do not lower this value below thirty seconds without tightening the invoke-with
72     //         timeout in com.android.internal.os.ZygoteConnection, or wrapped applications
73     //         can trigger the watchdog.
74     // Note 2: The debug value is already below the wait time in ZygoteConnection. Wrapped
75     //         applications may not work with a debug build. CTS will fail.
76     static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
77     static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
78 
79     // These are temporally ordered: larger values as lateness increases
80     static final int COMPLETED = 0;
81     static final int WAITING = 1;
82     static final int WAITED_HALF = 2;
83     static final int OVERDUE = 3;
84 
85     // Which native processes to dump into dropbox's stack traces
86     public static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
87         "/system/bin/audioserver",
88         "/system/bin/cameraserver",
89         "/system/bin/drmserver",
90         "/system/bin/mediadrmserver",
91         "/system/bin/mediaserver",
92         "/system/bin/sdcard",
93         "/system/bin/surfaceflinger",
94         "/system/bin/vold",
95         "media.extractor", // system/bin/mediaextractor
96         "media.metrics", // system/bin/mediametrics
97         "media.codec", // vendor/bin/hw/android.hardware.media.omx@1.0-service
98         "media.swcodec", // /apex/com.android.media.swcodec/bin/mediaswcodec
99         "com.android.bluetooth",  // Bluetooth service
100         "/system/bin/statsd",  // Stats daemon
101     };
102 
103     public static final List<String> HAL_INTERFACES_OF_INTEREST = Arrays.asList(
104             "android.hardware.audio@2.0::IDevicesFactory",
105             "android.hardware.audio@4.0::IDevicesFactory",
106             "android.hardware.bluetooth@1.0::IBluetoothHci",
107             "android.hardware.camera.provider@2.4::ICameraProvider",
108             "android.hardware.graphics.allocator@2.0::IAllocator",
109             "android.hardware.graphics.composer@2.1::IComposer",
110             "android.hardware.health@2.0::IHealth",
111             "android.hardware.media.c2@1.0::IComponentStore",
112             "android.hardware.media.omx@1.0::IOmx",
113             "android.hardware.media.omx@1.0::IOmxStore",
114             "android.hardware.sensors@1.0::ISensors",
115             "android.hardware.vr@1.0::IVr",
116             "android.hardware.biometrics.face@1.0::IBiometricsFace"
117     );
118 
119     static Watchdog sWatchdog;
120 
121     /* This handler will be used to post message back onto the main thread */
122     final ArrayList<HandlerChecker> mHandlerCheckers = new ArrayList<>();
123     final HandlerChecker mMonitorChecker;
124     ActivityManagerService mActivity;
125 
126     int mPhonePid;
127     IActivityController mController;
128     boolean mAllowRestart = true;
129     final OpenFdMonitor mOpenFdMonitor;
130 
131     /**
132      * Used for checking status of handle threads and scheduling monitor callbacks.
133      */
134     public final class HandlerChecker implements Runnable {
135         private final Handler mHandler;
136         private final String mName;
137         private final long mWaitMax;
138         private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
139         private final ArrayList<Monitor> mMonitorQueue = new ArrayList<Monitor>();
140         private boolean mCompleted;
141         private Monitor mCurrentMonitor;
142         private long mStartTime;
143         private int mPauseCount;
144 
HandlerChecker(Handler handler, String name, long waitMaxMillis)145         HandlerChecker(Handler handler, String name, long waitMaxMillis) {
146             mHandler = handler;
147             mName = name;
148             mWaitMax = waitMaxMillis;
149             mCompleted = true;
150         }
151 
addMonitorLocked(Monitor monitor)152         void addMonitorLocked(Monitor monitor) {
153             // We don't want to update mMonitors when the Handler is in the middle of checking
154             // all monitors. We will update mMonitors on the next schedule if it is safe
155             mMonitorQueue.add(monitor);
156         }
157 
scheduleCheckLocked()158         public void scheduleCheckLocked() {
159             if (mCompleted) {
160                 // Safe to update monitors in queue, Handler is not in the middle of work
161                 mMonitors.addAll(mMonitorQueue);
162                 mMonitorQueue.clear();
163             }
164             if ((mMonitors.size() == 0 && mHandler.getLooper().getQueue().isPolling())
165                     || (mPauseCount > 0)) {
166                 // Don't schedule until after resume OR
167                 // If the target looper has recently been polling, then
168                 // there is no reason to enqueue our checker on it since that
169                 // is as good as it not being deadlocked.  This avoid having
170                 // to do a context switch to check the thread. Note that we
171                 // only do this if we have no monitors since those would need to
172                 // be executed at this point.
173                 mCompleted = true;
174                 return;
175             }
176             if (!mCompleted) {
177                 // we already have a check in flight, so no need
178                 return;
179             }
180 
181             mCompleted = false;
182             mCurrentMonitor = null;
183             mStartTime = SystemClock.uptimeMillis();
184             mHandler.postAtFrontOfQueue(this);
185         }
186 
isOverdueLocked()187         boolean isOverdueLocked() {
188             return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
189         }
190 
getCompletionStateLocked()191         public int getCompletionStateLocked() {
192             if (mCompleted) {
193                 return COMPLETED;
194             } else {
195                 long latency = SystemClock.uptimeMillis() - mStartTime;
196                 if (latency < mWaitMax/2) {
197                     return WAITING;
198                 } else if (latency < mWaitMax) {
199                     return WAITED_HALF;
200                 }
201             }
202             return OVERDUE;
203         }
204 
getThread()205         public Thread getThread() {
206             return mHandler.getLooper().getThread();
207         }
208 
getName()209         public String getName() {
210             return mName;
211         }
212 
describeBlockedStateLocked()213         String describeBlockedStateLocked() {
214             if (mCurrentMonitor == null) {
215                 return "Blocked in handler on " + mName + " (" + getThread().getName() + ")";
216             } else {
217                 return "Blocked in monitor " + mCurrentMonitor.getClass().getName()
218                         + " on " + mName + " (" + getThread().getName() + ")";
219             }
220         }
221 
222         @Override
run()223         public void run() {
224             // Once we get here, we ensure that mMonitors does not change even if we call
225             // #addMonitorLocked because we first add the new monitors to mMonitorQueue and
226             // move them to mMonitors on the next schedule when mCompleted is true, at which
227             // point we have completed execution of this method.
228             final int size = mMonitors.size();
229             for (int i = 0 ; i < size ; i++) {
230                 synchronized (Watchdog.this) {
231                     mCurrentMonitor = mMonitors.get(i);
232                 }
233                 mCurrentMonitor.monitor();
234             }
235 
236             synchronized (Watchdog.this) {
237                 mCompleted = true;
238                 mCurrentMonitor = null;
239             }
240         }
241 
242         /** Pause the HandlerChecker. */
pauseLocked(String reason)243         public void pauseLocked(String reason) {
244             mPauseCount++;
245             // Mark as completed, because there's a chance we called this after the watchog
246             // thread loop called Object#wait after 'WAITED_HALF'. In that case we want to ensure
247             // the next call to #getCompletionStateLocked for this checker returns 'COMPLETED'
248             mCompleted = true;
249             Slog.i(TAG, "Pausing HandlerChecker: " + mName + " for reason: "
250                     + reason + ". Pause count: " + mPauseCount);
251         }
252 
253         /** Resume the HandlerChecker from the last {@link #pauseLocked}. */
resumeLocked(String reason)254         public void resumeLocked(String reason) {
255             if (mPauseCount > 0) {
256                 mPauseCount--;
257                 Slog.i(TAG, "Resuming HandlerChecker: " + mName + " for reason: "
258                         + reason + ". Pause count: " + mPauseCount);
259             } else {
260                 Slog.wtf(TAG, "Already resumed HandlerChecker: " + mName);
261             }
262         }
263     }
264 
265     final class RebootRequestReceiver extends BroadcastReceiver {
266         @Override
onReceive(Context c, Intent intent)267         public void onReceive(Context c, Intent intent) {
268             if (intent.getIntExtra("nowait", 0) != 0) {
269                 rebootSystem("Received ACTION_REBOOT broadcast");
270                 return;
271             }
272             Slog.w(TAG, "Unsupported ACTION_REBOOT broadcast: " + intent);
273         }
274     }
275 
276     /** Monitor for checking the availability of binder threads. The monitor will block until
277      * there is a binder thread available to process in coming IPCs to make sure other processes
278      * can still communicate with the service.
279      */
280     private static final class BinderThreadMonitor implements Watchdog.Monitor {
281         @Override
monitor()282         public void monitor() {
283             Binder.blockUntilThreadAvailable();
284         }
285     }
286 
287     public interface Monitor {
monitor()288         void monitor();
289     }
290 
getInstance()291     public static Watchdog getInstance() {
292         if (sWatchdog == null) {
293             sWatchdog = new Watchdog();
294         }
295 
296         return sWatchdog;
297     }
298 
Watchdog()299     private Watchdog() {
300         super("watchdog");
301         // Initialize handler checkers for each common thread we want to check.  Note
302         // that we are not currently checking the background thread, since it can
303         // potentially hold longer running operations with no guarantees about the timeliness
304         // of operations there.
305 
306         // The shared foreground thread is the main checker.  It is where we
307         // will also dispatch monitor checks and do other work.
308         mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
309                 "foreground thread", DEFAULT_TIMEOUT);
310         mHandlerCheckers.add(mMonitorChecker);
311         // Add checker for main thread.  We only do a quick check since there
312         // can be UI running on the thread.
313         mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
314                 "main thread", DEFAULT_TIMEOUT));
315         // Add checker for shared UI thread.
316         mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
317                 "ui thread", DEFAULT_TIMEOUT));
318         // And also check IO thread.
319         mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
320                 "i/o thread", DEFAULT_TIMEOUT));
321         // And the display thread.
322         mHandlerCheckers.add(new HandlerChecker(DisplayThread.getHandler(),
323                 "display thread", DEFAULT_TIMEOUT));
324         // And the animation thread.
325         mHandlerCheckers.add(new HandlerChecker(AnimationThread.getHandler(),
326                 "animation thread", DEFAULT_TIMEOUT));
327         // And the surface animation thread.
328         mHandlerCheckers.add(new HandlerChecker(SurfaceAnimationThread.getHandler(),
329                 "surface animation thread", DEFAULT_TIMEOUT));
330 
331         // Initialize monitor for Binder threads.
332         addMonitor(new BinderThreadMonitor());
333 
334         mOpenFdMonitor = OpenFdMonitor.create();
335 
336         // See the notes on DEFAULT_TIMEOUT.
337         assert DB ||
338                 DEFAULT_TIMEOUT > ZygoteConnectionConstants.WRAPPED_PID_TIMEOUT_MILLIS;
339     }
340 
341     /**
342      * Registers a {@link BroadcastReceiver} to listen to reboot broadcasts and trigger reboot.
343      * Should be called during boot after the ActivityManagerService is up and registered
344      * as a system service so it can handle registration of a {@link BroadcastReceiver}.
345      */
init(Context context, ActivityManagerService activity)346     public void init(Context context, ActivityManagerService activity) {
347         mActivity = activity;
348         context.registerReceiver(new RebootRequestReceiver(),
349                 new IntentFilter(Intent.ACTION_REBOOT),
350                 android.Manifest.permission.REBOOT, null);
351     }
352 
processStarted(String name, int pid)353     public void processStarted(String name, int pid) {
354         synchronized (this) {
355             if ("com.android.phone".equals(name)) {
356                 mPhonePid = pid;
357             }
358         }
359     }
360 
setActivityController(IActivityController controller)361     public void setActivityController(IActivityController controller) {
362         synchronized (this) {
363             mController = controller;
364         }
365     }
366 
setAllowRestart(boolean allowRestart)367     public void setAllowRestart(boolean allowRestart) {
368         synchronized (this) {
369             mAllowRestart = allowRestart;
370         }
371     }
372 
addMonitor(Monitor monitor)373     public void addMonitor(Monitor monitor) {
374         synchronized (this) {
375             mMonitorChecker.addMonitorLocked(monitor);
376         }
377     }
378 
addThread(Handler thread)379     public void addThread(Handler thread) {
380         addThread(thread, DEFAULT_TIMEOUT);
381     }
382 
addThread(Handler thread, long timeoutMillis)383     public void addThread(Handler thread, long timeoutMillis) {
384         synchronized (this) {
385             final String name = thread.getLooper().getThread().getName();
386             mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
387         }
388     }
389 
390     /**
391      * Pauses Watchdog action for the currently running thread. Useful before executing long running
392      * operations that could falsely trigger the watchdog. Each call to this will require a matching
393      * call to {@link #resumeWatchingCurrentThread}.
394      *
395      * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
396      *
397      * <p>If the Watchdog is already paused for the current thread, this call adds
398      * adds another pause and will require an additional {@link #resumeCurrentThread} to resume.
399      *
400      * <p>Note: Use with care, as any deadlocks on the current thread will be undetected until all
401      * pauses have been resumed.
402      */
pauseWatchingCurrentThread(String reason)403     public void pauseWatchingCurrentThread(String reason) {
404         synchronized (this) {
405             for (HandlerChecker hc : mHandlerCheckers) {
406                 if (Thread.currentThread().equals(hc.getThread())) {
407                     hc.pauseLocked(reason);
408                 }
409             }
410         }
411     }
412 
413     /**
414      * Resumes the last pause from {@link #pauseWatchingCurrentThread} for the currently running
415      * thread.
416      *
417      * <p>If the current thread has not been added to the Watchdog, this call is a no-op.
418      *
419      * <p>If the Watchdog action for the current thread is already resumed, this call logs a wtf.
420      *
421      * <p>If all pauses have been resumed, the Watchdog action is finally resumed, otherwise,
422      * the Watchdog action for the current thread remains paused until resume is called at least
423      * as many times as the calls to pause.
424      */
resumeWatchingCurrentThread(String reason)425     public void resumeWatchingCurrentThread(String reason) {
426         synchronized (this) {
427             for (HandlerChecker hc : mHandlerCheckers) {
428                 if (Thread.currentThread().equals(hc.getThread())) {
429                     hc.resumeLocked(reason);
430                 }
431             }
432         }
433     }
434 
435     /**
436      * Perform a full reboot of the system.
437      */
rebootSystem(String reason)438     void rebootSystem(String reason) {
439         Slog.i(TAG, "Rebooting system because: " + reason);
440         IPowerManager pms = (IPowerManager)ServiceManager.getService(Context.POWER_SERVICE);
441         try {
442             pms.reboot(false, reason, false);
443         } catch (RemoteException ex) {
444         }
445     }
446 
evaluateCheckerCompletionLocked()447     private int evaluateCheckerCompletionLocked() {
448         int state = COMPLETED;
449         for (int i=0; i<mHandlerCheckers.size(); i++) {
450             HandlerChecker hc = mHandlerCheckers.get(i);
451             state = Math.max(state, hc.getCompletionStateLocked());
452         }
453         return state;
454     }
455 
getBlockedCheckersLocked()456     private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
457         ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
458         for (int i=0; i<mHandlerCheckers.size(); i++) {
459             HandlerChecker hc = mHandlerCheckers.get(i);
460             if (hc.isOverdueLocked()) {
461                 checkers.add(hc);
462             }
463         }
464         return checkers;
465     }
466 
describeCheckersLocked(List<HandlerChecker> checkers)467     private String describeCheckersLocked(List<HandlerChecker> checkers) {
468         StringBuilder builder = new StringBuilder(128);
469         for (int i=0; i<checkers.size(); i++) {
470             if (builder.length() > 0) {
471                 builder.append(", ");
472             }
473             builder.append(checkers.get(i).describeBlockedStateLocked());
474         }
475         return builder.toString();
476     }
477 
getInterestingHalPids()478     private static ArrayList<Integer> getInterestingHalPids() {
479         try {
480             IServiceManager serviceManager = IServiceManager.getService();
481             ArrayList<IServiceManager.InstanceDebugInfo> dump =
482                     serviceManager.debugDump();
483             HashSet<Integer> pids = new HashSet<>();
484             for (IServiceManager.InstanceDebugInfo info : dump) {
485                 if (info.pid == IServiceManager.PidConstant.NO_PID) {
486                     continue;
487                 }
488 
489                 if (!HAL_INTERFACES_OF_INTEREST.contains(info.interfaceName)) {
490                     continue;
491                 }
492 
493                 pids.add(info.pid);
494             }
495             return new ArrayList<Integer>(pids);
496         } catch (RemoteException e) {
497             return new ArrayList<Integer>();
498         }
499     }
500 
getInterestingNativePids()501     static ArrayList<Integer> getInterestingNativePids() {
502         ArrayList<Integer> pids = getInterestingHalPids();
503 
504         int[] nativePids = Process.getPidsForCommands(NATIVE_STACKS_OF_INTEREST);
505         if (nativePids != null) {
506             pids.ensureCapacity(pids.size() + nativePids.length);
507             for (int i : nativePids) {
508                 pids.add(i);
509             }
510         }
511 
512         return pids;
513     }
514 
515     @Override
run()516     public void run() {
517         boolean waitedHalf = false;
518         while (true) {
519             final List<HandlerChecker> blockedCheckers;
520             final String subject;
521             final boolean allowRestart;
522             int debuggerWasConnected = 0;
523             synchronized (this) {
524                 long timeout = CHECK_INTERVAL;
525                 // Make sure we (re)spin the checkers that have become idle within
526                 // this wait-and-check interval
527                 for (int i=0; i<mHandlerCheckers.size(); i++) {
528                     HandlerChecker hc = mHandlerCheckers.get(i);
529                     hc.scheduleCheckLocked();
530                 }
531 
532                 if (debuggerWasConnected > 0) {
533                     debuggerWasConnected--;
534                 }
535 
536                 // NOTE: We use uptimeMillis() here because we do not want to increment the time we
537                 // wait while asleep. If the device is asleep then the thing that we are waiting
538                 // to timeout on is asleep as well and won't have a chance to run, causing a false
539                 // positive on when to kill things.
540                 long start = SystemClock.uptimeMillis();
541                 while (timeout > 0) {
542                     if (Debug.isDebuggerConnected()) {
543                         debuggerWasConnected = 2;
544                     }
545                     try {
546                         wait(timeout);
547                         // Note: mHandlerCheckers and mMonitorChecker may have changed after waiting
548                     } catch (InterruptedException e) {
549                         Log.wtf(TAG, e);
550                     }
551                     if (Debug.isDebuggerConnected()) {
552                         debuggerWasConnected = 2;
553                     }
554                     timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
555                 }
556 
557                 boolean fdLimitTriggered = false;
558                 if (mOpenFdMonitor != null) {
559                     fdLimitTriggered = mOpenFdMonitor.monitor();
560                 }
561 
562                 if (!fdLimitTriggered) {
563                     final int waitState = evaluateCheckerCompletionLocked();
564                     if (waitState == COMPLETED) {
565                         // The monitors have returned; reset
566                         waitedHalf = false;
567                         continue;
568                     } else if (waitState == WAITING) {
569                         // still waiting but within their configured intervals; back off and recheck
570                         continue;
571                     } else if (waitState == WAITED_HALF) {
572                         if (!waitedHalf) {
573                             Slog.i(TAG, "WAITED_HALF");
574                             // We've waited half the deadlock-detection interval.  Pull a stack
575                             // trace and wait another half.
576                             ArrayList<Integer> pids = new ArrayList<Integer>();
577                             pids.add(Process.myPid());
578                             ActivityManagerService.dumpStackTraces(pids, null, null,
579                                 getInterestingNativePids());
580                             waitedHalf = true;
581                         }
582                         continue;
583                     }
584 
585                     // something is overdue!
586                     blockedCheckers = getBlockedCheckersLocked();
587                     subject = describeCheckersLocked(blockedCheckers);
588                 } else {
589                     blockedCheckers = Collections.emptyList();
590                     subject = "Open FD high water mark reached";
591                 }
592                 allowRestart = mAllowRestart;
593             }
594 
595             // If we got here, that means that the system is most likely hung.
596             // First collect stack traces from all threads of the system process.
597             // Then kill this process so that the system will restart.
598             EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
599 
600             ArrayList<Integer> pids = new ArrayList<>();
601             pids.add(Process.myPid());
602             if (mPhonePid > 0) pids.add(mPhonePid);
603 
604             final File stack = ActivityManagerService.dumpStackTraces(
605                     pids, null, null, getInterestingNativePids());
606 
607             // Give some extra time to make sure the stack traces get written.
608             // The system's been hanging for a minute, another second or two won't hurt much.
609             SystemClock.sleep(5000);
610 
611             // Trigger the kernel to dump all blocked threads, and backtraces on all CPUs to the kernel log
612             doSysRq('w');
613             doSysRq('l');
614 
615             // Try to add the error to the dropbox, but assuming that the ActivityManager
616             // itself may be deadlocked.  (which has happened, causing this statement to
617             // deadlock and the watchdog as a whole to be ineffective)
618             Thread dropboxThread = new Thread("watchdogWriteToDropbox") {
619                     public void run() {
620                         // If a watched thread hangs before init() is called, we don't have a
621                         // valid mActivity. So we can't log the error to dropbox.
622                         if (mActivity != null) {
623                             mActivity.addErrorToDropBox(
624                                     "watchdog", null, "system_server", null, null, null,
625                                     subject, null, stack, null);
626                         }
627                         StatsLog.write(StatsLog.SYSTEM_SERVER_WATCHDOG_OCCURRED, subject);
628                     }
629                 };
630             dropboxThread.start();
631             try {
632                 dropboxThread.join(2000);  // wait up to 2 seconds for it to return.
633             } catch (InterruptedException ignored) {}
634 
635             IActivityController controller;
636             synchronized (this) {
637                 controller = mController;
638             }
639             if (controller != null) {
640                 Slog.i(TAG, "Reporting stuck state to activity controller");
641                 try {
642                     Binder.setDumpDisabled("Service dumps disabled due to hung system process.");
643                     // 1 = keep waiting, -1 = kill system
644                     int res = controller.systemNotResponding(subject);
645                     if (res >= 0) {
646                         Slog.i(TAG, "Activity controller requested to coninue to wait");
647                         waitedHalf = false;
648                         continue;
649                     }
650                 } catch (RemoteException e) {
651                 }
652             }
653 
654             // Only kill the process if the debugger is not attached.
655             if (Debug.isDebuggerConnected()) {
656                 debuggerWasConnected = 2;
657             }
658             if (debuggerWasConnected >= 2) {
659                 Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process");
660             } else if (debuggerWasConnected > 0) {
661                 Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process");
662             } else if (!allowRestart) {
663                 Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process");
664             } else {
665                 Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject);
666                 WatchdogDiagnostics.diagnoseCheckers(blockedCheckers);
667                 Slog.w(TAG, "*** GOODBYE!");
668                 Process.killProcess(Process.myPid());
669                 System.exit(10);
670             }
671 
672             waitedHalf = false;
673         }
674     }
675 
doSysRq(char c)676     private void doSysRq(char c) {
677         try {
678             FileWriter sysrq_trigger = new FileWriter("/proc/sysrq-trigger");
679             sysrq_trigger.write(c);
680             sysrq_trigger.close();
681         } catch (IOException e) {
682             Slog.w(TAG, "Failed to write to /proc/sysrq-trigger", e);
683         }
684     }
685 
686     public static final class OpenFdMonitor {
687         /**
688          * Number of FDs below the soft limit that we trigger a runtime restart at. This was
689          * chosen arbitrarily, but will need to be at least 6 in order to have a sufficient number
690          * of FDs in reserve to complete a dump.
691          */
692         private static final int FD_HIGH_WATER_MARK = 12;
693 
694         private final File mDumpDir;
695         private final File mFdHighWaterMark;
696 
create()697         public static OpenFdMonitor create() {
698             // Only run the FD monitor on debuggable builds (such as userdebug and eng builds).
699             if (!Build.IS_DEBUGGABLE) {
700                 return null;
701             }
702 
703             final StructRlimit rlimit;
704             try {
705                 rlimit = android.system.Os.getrlimit(OsConstants.RLIMIT_NOFILE);
706             } catch (ErrnoException errno) {
707                 Slog.w(TAG, "Error thrown from getrlimit(RLIMIT_NOFILE)", errno);
708                 return null;
709             }
710 
711             // The assumption we're making here is that FD numbers are allocated (more or less)
712             // sequentially, which is currently (and historically) true since open is currently
713             // specified to always return the lowest-numbered non-open file descriptor for the
714             // current process.
715             //
716             // We do this to avoid having to enumerate the contents of /proc/self/fd in order to
717             // count the number of descriptors open in the process.
718             final File fdThreshold = new File("/proc/self/fd/" + (rlimit.rlim_cur - FD_HIGH_WATER_MARK));
719             return new OpenFdMonitor(new File("/data/anr"), fdThreshold);
720         }
721 
OpenFdMonitor(File dumpDir, File fdThreshold)722         OpenFdMonitor(File dumpDir, File fdThreshold) {
723             mDumpDir = dumpDir;
724             mFdHighWaterMark = fdThreshold;
725         }
726 
727         /**
728          * Dumps open file descriptors and their full paths to a temporary file in {@code mDumpDir}.
729          */
dumpOpenDescriptors()730         private void dumpOpenDescriptors() {
731             // We cannot exec lsof to get more info about open file descriptors because a newly
732             // forked process will not have the permissions to readlink. Instead list all open
733             // descriptors from /proc/pid/fd and resolve them.
734             List<String> dumpInfo = new ArrayList<>();
735             String fdDirPath = String.format("/proc/%d/fd/", Process.myPid());
736             File[] fds = new File(fdDirPath).listFiles();
737             if (fds == null) {
738                 dumpInfo.add("Unable to list " + fdDirPath);
739             } else {
740                 for (File f : fds) {
741                     String fdSymLink = f.getAbsolutePath();
742                     String resolvedPath = "";
743                     try {
744                         resolvedPath = Os.readlink(fdSymLink);
745                     } catch (ErrnoException ex) {
746                         resolvedPath = ex.getMessage();
747                     }
748                     dumpInfo.add(fdSymLink + "\t" + resolvedPath);
749                 }
750             }
751 
752             // Dump the fds & paths to a temp file.
753             try {
754                 File dumpFile = File.createTempFile("anr_fd_", "", mDumpDir);
755                 Path out = Paths.get(dumpFile.getAbsolutePath());
756                 Files.write(out, dumpInfo, StandardCharsets.UTF_8);
757             } catch (IOException ex) {
758                 Slog.w(TAG, "Unable to write open descriptors to file: " + ex);
759             }
760         }
761 
762         /**
763          * @return {@code true} if the high water mark was breached and a dump was written,
764          *     {@code false} otherwise.
765          */
monitor()766         public boolean monitor() {
767             if (mFdHighWaterMark.exists()) {
768                 dumpOpenDescriptors();
769                 return true;
770             }
771 
772             return false;
773         }
774     }
775 }
776