1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.server;
18 
19 import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo;
20 
21 import android.content.ContentResolver;
22 import android.content.Context;
23 import android.os.Build;
24 import android.os.Environment;
25 import android.os.FileUtils;
26 import android.os.RecoverySystem;
27 import android.os.SystemClock;
28 import android.os.SystemProperties;
29 import android.os.UserHandle;
30 import android.provider.Settings;
31 import android.text.format.DateUtils;
32 import android.util.ExceptionUtils;
33 import android.util.Log;
34 import android.util.MathUtils;
35 import android.util.Slog;
36 import android.util.SparseArray;
37 
38 import com.android.internal.util.ArrayUtils;
39 import com.android.server.pm.PackageManagerService;
40 
41 import java.io.File;
42 
43 /**
44  * Utilities to help rescue the system from crash loops. Callers are expected to
45  * report boot events and persistent app crashes, and if they happen frequently
46  * enough this class will slowly escalate through several rescue operations
47  * before finally rebooting and prompting the user if they want to wipe data as
48  * a last resort.
49  *
50  * @hide
51  */
52 public class RescueParty {
53     private static final String TAG = "RescueParty";
54 
55     private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
56     private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
57     private static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
58     private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
59     private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
60     private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
61 
62     private static final int LEVEL_NONE = 0;
63     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
64     private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
65     private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
66     private static final int LEVEL_FACTORY_RESET = 4;
67 
68     /** Threshold for boot loops */
69     private static final Threshold sBoot = new BootThreshold();
70     /** Threshold for app crash loops */
71     private static SparseArray<Threshold> sApps = new SparseArray<>();
72 
isDisabled()73     private static boolean isDisabled() {
74         // Check if we're explicitly enabled for testing
75         if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
76             return false;
77         }
78 
79         // We're disabled on all engineering devices
80         if (Build.IS_ENG) {
81             Slog.v(TAG, "Disabled because of eng build");
82             return true;
83         }
84 
85         // We're disabled on userdebug devices connected over USB, since that's
86         // a decent signal that someone is actively trying to debug the device,
87         // or that it's in a lab environment.
88         if (Build.IS_USERDEBUG && isUsbActive()) {
89             Slog.v(TAG, "Disabled because of active USB connection");
90             return true;
91         }
92 
93         // One last-ditch check
94         if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
95             Slog.v(TAG, "Disabled because of manual property");
96             return true;
97         }
98 
99         return false;
100     }
101 
102     /**
103      * Take note of a boot event. If we notice too many of these events
104      * happening in rapid succession, we'll send out a rescue party.
105      */
noteBoot(Context context)106     public static void noteBoot(Context context) {
107         if (isDisabled()) return;
108         if (sBoot.incrementAndTest()) {
109             sBoot.reset();
110             incrementRescueLevel(sBoot.uid);
111             executeRescueLevel(context);
112         }
113     }
114 
115     /**
116      * Take note of a persistent app crash. If we notice too many of these
117      * events happening in rapid succession, we'll send out a rescue party.
118      */
notePersistentAppCrash(Context context, int uid)119     public static void notePersistentAppCrash(Context context, int uid) {
120         if (isDisabled()) return;
121         Threshold t = sApps.get(uid);
122         if (t == null) {
123             t = new AppThreshold(uid);
124             sApps.put(uid, t);
125         }
126         if (t.incrementAndTest()) {
127             t.reset();
128             incrementRescueLevel(t.uid);
129             executeRescueLevel(context);
130         }
131     }
132 
133     /**
134      * Check if we're currently attempting to reboot for a factory reset.
135      */
isAttemptingFactoryReset()136     public static boolean isAttemptingFactoryReset() {
137         return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET;
138     }
139 
140     /**
141      * Escalate to the next rescue level. After incrementing the level you'll
142      * probably want to call {@link #executeRescueLevel(Context)}.
143      */
incrementRescueLevel(int triggerUid)144     private static void incrementRescueLevel(int triggerUid) {
145         final int level = MathUtils.constrain(
146                 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1,
147                 LEVEL_NONE, LEVEL_FACTORY_RESET);
148         SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level));
149 
150         EventLogTags.writeRescueLevel(level, triggerUid);
151         logCriticalInfo(Log.WARN, "Incremented rescue level to "
152                 + levelToString(level) + " triggered by UID " + triggerUid);
153     }
154 
155     /**
156      * Called when {@code SettingsProvider} has been published, which is a good
157      * opportunity to reset any settings depending on our rescue level.
158      */
onSettingsProviderPublished(Context context)159     public static void onSettingsProviderPublished(Context context) {
160         executeRescueLevel(context);
161     }
162 
executeRescueLevel(Context context)163     private static void executeRescueLevel(Context context) {
164         final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE);
165         if (level == LEVEL_NONE) return;
166 
167         Slog.w(TAG, "Attempting rescue level " + levelToString(level));
168         try {
169             executeRescueLevelInternal(context, level);
170             EventLogTags.writeRescueSuccess(level);
171             logCriticalInfo(Log.DEBUG,
172                     "Finished rescue level " + levelToString(level));
173         } catch (Throwable t) {
174             final String msg = ExceptionUtils.getCompleteMessage(t);
175             EventLogTags.writeRescueFailure(level, msg);
176             logCriticalInfo(Log.ERROR,
177                     "Failed rescue level " + levelToString(level) + ": " + msg);
178         }
179     }
180 
executeRescueLevelInternal(Context context, int level)181     private static void executeRescueLevelInternal(Context context, int level) throws Exception {
182         switch (level) {
183             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS:
184                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS);
185                 break;
186             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES:
187                 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES);
188                 break;
189             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS:
190                 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS);
191                 break;
192             case LEVEL_FACTORY_RESET:
193                 RecoverySystem.rebootPromptAndWipeUserData(context, TAG);
194                 break;
195         }
196     }
197 
resetAllSettings(Context context, int mode)198     private static void resetAllSettings(Context context, int mode) throws Exception {
199         // Try our best to reset all settings possible, and once finished
200         // rethrow any exception that we encountered
201         Exception res = null;
202         final ContentResolver resolver = context.getContentResolver();
203         try {
204             Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM);
205         } catch (Throwable t) {
206             res = new RuntimeException("Failed to reset global settings", t);
207         }
208         for (int userId : getAllUserIds()) {
209             try {
210                 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId);
211             } catch (Throwable t) {
212                 res = new RuntimeException("Failed to reset secure settings for " + userId, t);
213             }
214         }
215         if (res != null) {
216             throw res;
217         }
218     }
219 
220     /**
221      * Threshold that can be triggered if a number of events occur within a
222      * window of time.
223      */
224     private abstract static class Threshold {
getCount()225         public abstract int getCount();
setCount(int count)226         public abstract void setCount(int count);
getStart()227         public abstract long getStart();
setStart(long start)228         public abstract void setStart(long start);
229 
230         private final int uid;
231         private final int triggerCount;
232         private final long triggerWindow;
233 
Threshold(int uid, int triggerCount, long triggerWindow)234         public Threshold(int uid, int triggerCount, long triggerWindow) {
235             this.uid = uid;
236             this.triggerCount = triggerCount;
237             this.triggerWindow = triggerWindow;
238         }
239 
reset()240         public void reset() {
241             setCount(0);
242             setStart(0);
243         }
244 
245         /**
246          * @return if this threshold has been triggered
247          */
incrementAndTest()248         public boolean incrementAndTest() {
249             final long now = SystemClock.elapsedRealtime();
250             final long window = now - getStart();
251             if (window > triggerWindow) {
252                 setCount(1);
253                 setStart(now);
254                 return false;
255             } else {
256                 int count = getCount() + 1;
257                 setCount(count);
258                 EventLogTags.writeRescueNote(uid, count, window);
259                 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last "
260                         + (window / 1000) + " sec");
261                 return (count >= triggerCount);
262             }
263         }
264     }
265 
266     /**
267      * Specialization of {@link Threshold} for monitoring boot events. It stores
268      * counters in system properties for robustness.
269      */
270     private static class BootThreshold extends Threshold {
BootThreshold()271         public BootThreshold() {
272             // We're interested in 5 events in any 300 second period; this
273             // window is super relaxed because booting can take a long time if
274             // forced to dexopt things.
275             super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS);
276         }
277 
278         @Override
getCount()279         public int getCount() {
280             return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0);
281         }
282 
283         @Override
setCount(int count)284         public void setCount(int count) {
285             SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count));
286         }
287 
288         @Override
getStart()289         public long getStart() {
290             return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0);
291         }
292 
293         @Override
setStart(long start)294         public void setStart(long start) {
295             SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start));
296         }
297     }
298 
299     /**
300      * Specialization of {@link Threshold} for monitoring app crashes. It stores
301      * counters in memory.
302      */
303     private static class AppThreshold extends Threshold {
304         private int count;
305         private long start;
306 
AppThreshold(int uid)307         public AppThreshold(int uid) {
308             // We're interested in 5 events in any 30 second period; apps crash
309             // pretty quickly so we can keep a tight leash on them.
310             super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS);
311         }
312 
getCount()313         @Override public int getCount() { return count; }
setCount(int count)314         @Override public void setCount(int count) { this.count = count; }
getStart()315         @Override public long getStart() { return start; }
setStart(long start)316         @Override public void setStart(long start) { this.start = start; }
317     }
318 
getAllUserIds()319     private static int[] getAllUserIds() {
320         int[] userIds = { UserHandle.USER_SYSTEM };
321         try {
322             for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) {
323                 try {
324                     final int userId = Integer.parseInt(file.getName());
325                     if (userId != UserHandle.USER_SYSTEM) {
326                         userIds = ArrayUtils.appendInt(userIds, userId);
327                     }
328                 } catch (NumberFormatException ignored) {
329                 }
330             }
331         } catch (Throwable t) {
332             Slog.w(TAG, "Trouble discovering users", t);
333         }
334         return userIds;
335     }
336 
337     /**
338      * Hacky test to check if the device has an active USB connection, which is
339      * a good proxy for someone doing local development work.
340      */
isUsbActive()341     private static boolean isUsbActive() {
342         if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) {
343             Slog.v(TAG, "Assuming virtual device is connected over USB");
344             return true;
345         }
346         try {
347             final String state = FileUtils
348                     .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, "");
349             return "CONFIGURED".equals(state.trim());
350         } catch (Throwable t) {
351             Slog.w(TAG, "Failed to determine if device was on USB", t);
352             return false;
353         }
354     }
355 
levelToString(int level)356     private static String levelToString(int level) {
357         switch (level) {
358             case LEVEL_NONE: return "NONE";
359             case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS";
360             case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES";
361             case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS";
362             case LEVEL_FACTORY_RESET: return "FACTORY_RESET";
363             default: return Integer.toString(level);
364         }
365     }
366 }
367