1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import static com.android.server.pm.PackageManagerServiceUtils.logCriticalInfo; 20 21 import android.content.ContentResolver; 22 import android.content.Context; 23 import android.os.Build; 24 import android.os.Environment; 25 import android.os.FileUtils; 26 import android.os.RecoverySystem; 27 import android.os.SystemClock; 28 import android.os.SystemProperties; 29 import android.os.UserHandle; 30 import android.provider.Settings; 31 import android.text.format.DateUtils; 32 import android.util.ExceptionUtils; 33 import android.util.Log; 34 import android.util.MathUtils; 35 import android.util.Slog; 36 import android.util.SparseArray; 37 38 import com.android.internal.util.ArrayUtils; 39 import com.android.server.pm.PackageManagerService; 40 41 import java.io.File; 42 43 /** 44 * Utilities to help rescue the system from crash loops. Callers are expected to 45 * report boot events and persistent app crashes, and if they happen frequently 46 * enough this class will slowly escalate through several rescue operations 47 * before finally rebooting and prompting the user if they want to wipe data as 48 * a last resort. 49 * 50 * @hide 51 */ 52 public class RescueParty { 53 private static final String TAG = "RescueParty"; 54 55 private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; 56 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue"; 57 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; 58 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; 59 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; 60 private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device"; 61 62 private static final int LEVEL_NONE = 0; 63 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1; 64 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2; 65 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3; 66 private static final int LEVEL_FACTORY_RESET = 4; 67 68 /** Threshold for boot loops */ 69 private static final Threshold sBoot = new BootThreshold(); 70 /** Threshold for app crash loops */ 71 private static SparseArray<Threshold> sApps = new SparseArray<>(); 72 isDisabled()73 private static boolean isDisabled() { 74 // Check if we're explicitly enabled for testing 75 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) { 76 return false; 77 } 78 79 // We're disabled on all engineering devices 80 if (Build.IS_ENG) { 81 Slog.v(TAG, "Disabled because of eng build"); 82 return true; 83 } 84 85 // We're disabled on userdebug devices connected over USB, since that's 86 // a decent signal that someone is actively trying to debug the device, 87 // or that it's in a lab environment. 88 if (Build.IS_USERDEBUG && isUsbActive()) { 89 Slog.v(TAG, "Disabled because of active USB connection"); 90 return true; 91 } 92 93 // One last-ditch check 94 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) { 95 Slog.v(TAG, "Disabled because of manual property"); 96 return true; 97 } 98 99 return false; 100 } 101 102 /** 103 * Take note of a boot event. If we notice too many of these events 104 * happening in rapid succession, we'll send out a rescue party. 105 */ noteBoot(Context context)106 public static void noteBoot(Context context) { 107 if (isDisabled()) return; 108 if (sBoot.incrementAndTest()) { 109 sBoot.reset(); 110 incrementRescueLevel(sBoot.uid); 111 executeRescueLevel(context); 112 } 113 } 114 115 /** 116 * Take note of a persistent app crash. If we notice too many of these 117 * events happening in rapid succession, we'll send out a rescue party. 118 */ notePersistentAppCrash(Context context, int uid)119 public static void notePersistentAppCrash(Context context, int uid) { 120 if (isDisabled()) return; 121 Threshold t = sApps.get(uid); 122 if (t == null) { 123 t = new AppThreshold(uid); 124 sApps.put(uid, t); 125 } 126 if (t.incrementAndTest()) { 127 t.reset(); 128 incrementRescueLevel(t.uid); 129 executeRescueLevel(context); 130 } 131 } 132 133 /** 134 * Check if we're currently attempting to reboot for a factory reset. 135 */ isAttemptingFactoryReset()136 public static boolean isAttemptingFactoryReset() { 137 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET; 138 } 139 140 /** 141 * Escalate to the next rescue level. After incrementing the level you'll 142 * probably want to call {@link #executeRescueLevel(Context)}. 143 */ incrementRescueLevel(int triggerUid)144 private static void incrementRescueLevel(int triggerUid) { 145 final int level = MathUtils.constrain( 146 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, 147 LEVEL_NONE, LEVEL_FACTORY_RESET); 148 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); 149 150 EventLogTags.writeRescueLevel(level, triggerUid); 151 logCriticalInfo(Log.WARN, "Incremented rescue level to " 152 + levelToString(level) + " triggered by UID " + triggerUid); 153 } 154 155 /** 156 * Called when {@code SettingsProvider} has been published, which is a good 157 * opportunity to reset any settings depending on our rescue level. 158 */ onSettingsProviderPublished(Context context)159 public static void onSettingsProviderPublished(Context context) { 160 executeRescueLevel(context); 161 } 162 executeRescueLevel(Context context)163 private static void executeRescueLevel(Context context) { 164 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); 165 if (level == LEVEL_NONE) return; 166 167 Slog.w(TAG, "Attempting rescue level " + levelToString(level)); 168 try { 169 executeRescueLevelInternal(context, level); 170 EventLogTags.writeRescueSuccess(level); 171 logCriticalInfo(Log.DEBUG, 172 "Finished rescue level " + levelToString(level)); 173 } catch (Throwable t) { 174 final String msg = ExceptionUtils.getCompleteMessage(t); 175 EventLogTags.writeRescueFailure(level, msg); 176 logCriticalInfo(Log.ERROR, 177 "Failed rescue level " + levelToString(level) + ": " + msg); 178 } 179 } 180 executeRescueLevelInternal(Context context, int level)181 private static void executeRescueLevelInternal(Context context, int level) throws Exception { 182 switch (level) { 183 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: 184 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS); 185 break; 186 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: 187 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES); 188 break; 189 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: 190 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS); 191 break; 192 case LEVEL_FACTORY_RESET: 193 RecoverySystem.rebootPromptAndWipeUserData(context, TAG); 194 break; 195 } 196 } 197 resetAllSettings(Context context, int mode)198 private static void resetAllSettings(Context context, int mode) throws Exception { 199 // Try our best to reset all settings possible, and once finished 200 // rethrow any exception that we encountered 201 Exception res = null; 202 final ContentResolver resolver = context.getContentResolver(); 203 try { 204 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM); 205 } catch (Throwable t) { 206 res = new RuntimeException("Failed to reset global settings", t); 207 } 208 for (int userId : getAllUserIds()) { 209 try { 210 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId); 211 } catch (Throwable t) { 212 res = new RuntimeException("Failed to reset secure settings for " + userId, t); 213 } 214 } 215 if (res != null) { 216 throw res; 217 } 218 } 219 220 /** 221 * Threshold that can be triggered if a number of events occur within a 222 * window of time. 223 */ 224 private abstract static class Threshold { getCount()225 public abstract int getCount(); setCount(int count)226 public abstract void setCount(int count); getStart()227 public abstract long getStart(); setStart(long start)228 public abstract void setStart(long start); 229 230 private final int uid; 231 private final int triggerCount; 232 private final long triggerWindow; 233 Threshold(int uid, int triggerCount, long triggerWindow)234 public Threshold(int uid, int triggerCount, long triggerWindow) { 235 this.uid = uid; 236 this.triggerCount = triggerCount; 237 this.triggerWindow = triggerWindow; 238 } 239 reset()240 public void reset() { 241 setCount(0); 242 setStart(0); 243 } 244 245 /** 246 * @return if this threshold has been triggered 247 */ incrementAndTest()248 public boolean incrementAndTest() { 249 final long now = SystemClock.elapsedRealtime(); 250 final long window = now - getStart(); 251 if (window > triggerWindow) { 252 setCount(1); 253 setStart(now); 254 return false; 255 } else { 256 int count = getCount() + 1; 257 setCount(count); 258 EventLogTags.writeRescueNote(uid, count, window); 259 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last " 260 + (window / 1000) + " sec"); 261 return (count >= triggerCount); 262 } 263 } 264 } 265 266 /** 267 * Specialization of {@link Threshold} for monitoring boot events. It stores 268 * counters in system properties for robustness. 269 */ 270 private static class BootThreshold extends Threshold { BootThreshold()271 public BootThreshold() { 272 // We're interested in 5 events in any 300 second period; this 273 // window is super relaxed because booting can take a long time if 274 // forced to dexopt things. 275 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS); 276 } 277 278 @Override getCount()279 public int getCount() { 280 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0); 281 } 282 283 @Override setCount(int count)284 public void setCount(int count) { 285 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count)); 286 } 287 288 @Override getStart()289 public long getStart() { 290 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); 291 } 292 293 @Override setStart(long start)294 public void setStart(long start) { 295 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start)); 296 } 297 } 298 299 /** 300 * Specialization of {@link Threshold} for monitoring app crashes. It stores 301 * counters in memory. 302 */ 303 private static class AppThreshold extends Threshold { 304 private int count; 305 private long start; 306 AppThreshold(int uid)307 public AppThreshold(int uid) { 308 // We're interested in 5 events in any 30 second period; apps crash 309 // pretty quickly so we can keep a tight leash on them. 310 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS); 311 } 312 getCount()313 @Override public int getCount() { return count; } setCount(int count)314 @Override public void setCount(int count) { this.count = count; } getStart()315 @Override public long getStart() { return start; } setStart(long start)316 @Override public void setStart(long start) { this.start = start; } 317 } 318 getAllUserIds()319 private static int[] getAllUserIds() { 320 int[] userIds = { UserHandle.USER_SYSTEM }; 321 try { 322 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) { 323 try { 324 final int userId = Integer.parseInt(file.getName()); 325 if (userId != UserHandle.USER_SYSTEM) { 326 userIds = ArrayUtils.appendInt(userIds, userId); 327 } 328 } catch (NumberFormatException ignored) { 329 } 330 } 331 } catch (Throwable t) { 332 Slog.w(TAG, "Trouble discovering users", t); 333 } 334 return userIds; 335 } 336 337 /** 338 * Hacky test to check if the device has an active USB connection, which is 339 * a good proxy for someone doing local development work. 340 */ isUsbActive()341 private static boolean isUsbActive() { 342 if (SystemProperties.getBoolean(PROP_VIRTUAL_DEVICE, false)) { 343 Slog.v(TAG, "Assuming virtual device is connected over USB"); 344 return true; 345 } 346 try { 347 final String state = FileUtils 348 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, ""); 349 return "CONFIGURED".equals(state.trim()); 350 } catch (Throwable t) { 351 Slog.w(TAG, "Failed to determine if device was on USB", t); 352 return false; 353 } 354 } 355 levelToString(int level)356 private static String levelToString(int level) { 357 switch (level) { 358 case LEVEL_NONE: return "NONE"; 359 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS"; 360 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES"; 361 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS"; 362 case LEVEL_FACTORY_RESET: return "FACTORY_RESET"; 363 default: return Integer.toString(level); 364 } 365 } 366 } 367