1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import android.content.ContentResolver; 20 import android.content.Context; 21 import android.os.Build; 22 import android.os.Environment; 23 import android.os.FileUtils; 24 import android.os.RecoverySystem; 25 import android.os.SystemClock; 26 import android.os.SystemProperties; 27 import android.os.UserHandle; 28 import android.provider.Settings; 29 import android.text.format.DateUtils; 30 import android.util.ExceptionUtils; 31 import android.util.Log; 32 import android.util.MathUtils; 33 import android.util.Slog; 34 import android.util.SparseArray; 35 36 import com.android.internal.util.ArrayUtils; 37 import com.android.server.pm.PackageManagerService; 38 39 import java.io.File; 40 41 /** 42 * Utilities to help rescue the system from crash loops. Callers are expected to 43 * report boot events and persistent app crashes, and if they happen frequently 44 * enough this class will slowly escalate through several rescue operations 45 * before finally rebooting and prompting the user if they want to wipe data as 46 * a last resort. 47 * 48 * @hide 49 */ 50 public class RescueParty { 51 private static final String TAG = "RescueParty"; 52 53 private static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue"; 54 private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue"; 55 private static final String PROP_RESCUE_LEVEL = "sys.rescue_level"; 56 private static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count"; 57 private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start"; 58 59 private static final int LEVEL_NONE = 0; 60 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1; 61 private static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2; 62 private static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3; 63 private static final int LEVEL_FACTORY_RESET = 4; 64 65 /** Threshold for boot loops */ 66 private static final Threshold sBoot = new BootThreshold(); 67 /** Threshold for app crash loops */ 68 private static SparseArray<Threshold> sApps = new SparseArray<>(); 69 isDisabled()70 private static boolean isDisabled() { 71 // Check if we're explicitly enabled for testing 72 if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) { 73 return false; 74 } 75 76 // We're disabled on all engineering devices 77 if (Build.IS_ENG) { 78 Slog.v(TAG, "Disabled because of eng build"); 79 return true; 80 } 81 82 // We're disabled on userdebug devices connected over USB, since that's 83 // a decent signal that someone is actively trying to debug the device, 84 // or that it's in a lab environment. 85 if (Build.IS_USERDEBUG && isUsbActive()) { 86 Slog.v(TAG, "Disabled because of active USB connection"); 87 return true; 88 } 89 90 // One last-ditch check 91 if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) { 92 Slog.v(TAG, "Disabled because of manual property"); 93 return true; 94 } 95 96 return false; 97 } 98 99 /** 100 * Take note of a boot event. If we notice too many of these events 101 * happening in rapid succession, we'll send out a rescue party. 102 */ noteBoot(Context context)103 public static void noteBoot(Context context) { 104 if (isDisabled()) return; 105 if (sBoot.incrementAndTest()) { 106 sBoot.reset(); 107 incrementRescueLevel(sBoot.uid); 108 executeRescueLevel(context); 109 } 110 } 111 112 /** 113 * Take note of a persistent app crash. If we notice too many of these 114 * events happening in rapid succession, we'll send out a rescue party. 115 */ notePersistentAppCrash(Context context, int uid)116 public static void notePersistentAppCrash(Context context, int uid) { 117 if (isDisabled()) return; 118 Threshold t = sApps.get(uid); 119 if (t == null) { 120 t = new AppThreshold(uid); 121 sApps.put(uid, t); 122 } 123 if (t.incrementAndTest()) { 124 t.reset(); 125 incrementRescueLevel(t.uid); 126 executeRescueLevel(context); 127 } 128 } 129 130 /** 131 * Check if we're currently attempting to reboot for a factory reset. 132 */ isAttemptingFactoryReset()133 public static boolean isAttemptingFactoryReset() { 134 return SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) == LEVEL_FACTORY_RESET; 135 } 136 137 /** 138 * Escalate to the next rescue level. After incrementing the level you'll 139 * probably want to call {@link #executeRescueLevel(Context)}. 140 */ incrementRescueLevel(int triggerUid)141 private static void incrementRescueLevel(int triggerUid) { 142 final int level = MathUtils.constrain( 143 SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE) + 1, 144 LEVEL_NONE, LEVEL_FACTORY_RESET); 145 SystemProperties.set(PROP_RESCUE_LEVEL, Integer.toString(level)); 146 147 EventLogTags.writeRescueLevel(level, triggerUid); 148 PackageManagerService.logCriticalInfo(Log.WARN, "Incremented rescue level to " 149 + levelToString(level) + " triggered by UID " + triggerUid); 150 } 151 152 /** 153 * Called when {@code SettingsProvider} has been published, which is a good 154 * opportunity to reset any settings depending on our rescue level. 155 */ onSettingsProviderPublished(Context context)156 public static void onSettingsProviderPublished(Context context) { 157 executeRescueLevel(context); 158 } 159 executeRescueLevel(Context context)160 private static void executeRescueLevel(Context context) { 161 final int level = SystemProperties.getInt(PROP_RESCUE_LEVEL, LEVEL_NONE); 162 if (level == LEVEL_NONE) return; 163 164 Slog.w(TAG, "Attempting rescue level " + levelToString(level)); 165 try { 166 executeRescueLevelInternal(context, level); 167 EventLogTags.writeRescueSuccess(level); 168 PackageManagerService.logCriticalInfo(Log.DEBUG, 169 "Finished rescue level " + levelToString(level)); 170 } catch (Throwable t) { 171 final String msg = ExceptionUtils.getCompleteMessage(t); 172 EventLogTags.writeRescueFailure(level, msg); 173 PackageManagerService.logCriticalInfo(Log.ERROR, 174 "Failed rescue level " + levelToString(level) + ": " + msg); 175 } 176 } 177 executeRescueLevelInternal(Context context, int level)178 private static void executeRescueLevelInternal(Context context, int level) throws Exception { 179 switch (level) { 180 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: 181 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_DEFAULTS); 182 break; 183 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: 184 resetAllSettings(context, Settings.RESET_MODE_UNTRUSTED_CHANGES); 185 break; 186 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: 187 resetAllSettings(context, Settings.RESET_MODE_TRUSTED_DEFAULTS); 188 break; 189 case LEVEL_FACTORY_RESET: 190 RecoverySystem.rebootPromptAndWipeUserData(context, TAG); 191 break; 192 } 193 } 194 resetAllSettings(Context context, int mode)195 private static void resetAllSettings(Context context, int mode) throws Exception { 196 // Try our best to reset all settings possible, and once finished 197 // rethrow any exception that we encountered 198 Exception res = null; 199 final ContentResolver resolver = context.getContentResolver(); 200 try { 201 Settings.Global.resetToDefaultsAsUser(resolver, null, mode, UserHandle.USER_SYSTEM); 202 } catch (Throwable t) { 203 res = new RuntimeException("Failed to reset global settings", t); 204 } 205 for (int userId : getAllUserIds()) { 206 try { 207 Settings.Secure.resetToDefaultsAsUser(resolver, null, mode, userId); 208 } catch (Throwable t) { 209 res = new RuntimeException("Failed to reset secure settings for " + userId, t); 210 } 211 } 212 if (res != null) { 213 throw res; 214 } 215 } 216 217 /** 218 * Threshold that can be triggered if a number of events occur within a 219 * window of time. 220 */ 221 private abstract static class Threshold { getCount()222 public abstract int getCount(); setCount(int count)223 public abstract void setCount(int count); getStart()224 public abstract long getStart(); setStart(long start)225 public abstract void setStart(long start); 226 227 private final int uid; 228 private final int triggerCount; 229 private final long triggerWindow; 230 Threshold(int uid, int triggerCount, long triggerWindow)231 public Threshold(int uid, int triggerCount, long triggerWindow) { 232 this.uid = uid; 233 this.triggerCount = triggerCount; 234 this.triggerWindow = triggerWindow; 235 } 236 reset()237 public void reset() { 238 setCount(0); 239 setStart(0); 240 } 241 242 /** 243 * @return if this threshold has been triggered 244 */ incrementAndTest()245 public boolean incrementAndTest() { 246 final long now = SystemClock.elapsedRealtime(); 247 final long window = now - getStart(); 248 if (window > triggerWindow) { 249 setCount(1); 250 setStart(now); 251 return false; 252 } else { 253 int count = getCount() + 1; 254 setCount(count); 255 EventLogTags.writeRescueNote(uid, count, window); 256 Slog.w(TAG, "Noticed " + count + " events for UID " + uid + " in last " 257 + (window / 1000) + " sec"); 258 return (count >= triggerCount); 259 } 260 } 261 } 262 263 /** 264 * Specialization of {@link Threshold} for monitoring boot events. It stores 265 * counters in system properties for robustness. 266 */ 267 private static class BootThreshold extends Threshold { BootThreshold()268 public BootThreshold() { 269 // We're interested in 5 events in any 300 second period; this 270 // window is super relaxed because booting can take a long time if 271 // forced to dexopt things. 272 super(android.os.Process.ROOT_UID, 5, 300 * DateUtils.SECOND_IN_MILLIS); 273 } 274 275 @Override getCount()276 public int getCount() { 277 return SystemProperties.getInt(PROP_RESCUE_BOOT_COUNT, 0); 278 } 279 280 @Override setCount(int count)281 public void setCount(int count) { 282 SystemProperties.set(PROP_RESCUE_BOOT_COUNT, Integer.toString(count)); 283 } 284 285 @Override getStart()286 public long getStart() { 287 return SystemProperties.getLong(PROP_RESCUE_BOOT_START, 0); 288 } 289 290 @Override setStart(long start)291 public void setStart(long start) { 292 SystemProperties.set(PROP_RESCUE_BOOT_START, Long.toString(start)); 293 } 294 } 295 296 /** 297 * Specialization of {@link Threshold} for monitoring app crashes. It stores 298 * counters in memory. 299 */ 300 private static class AppThreshold extends Threshold { 301 private int count; 302 private long start; 303 AppThreshold(int uid)304 public AppThreshold(int uid) { 305 // We're interested in 5 events in any 30 second period; apps crash 306 // pretty quickly so we can keep a tight leash on them. 307 super(uid, 5, 30 * DateUtils.SECOND_IN_MILLIS); 308 } 309 getCount()310 @Override public int getCount() { return count; } setCount(int count)311 @Override public void setCount(int count) { this.count = count; } getStart()312 @Override public long getStart() { return start; } setStart(long start)313 @Override public void setStart(long start) { this.start = start; } 314 } 315 getAllUserIds()316 private static int[] getAllUserIds() { 317 int[] userIds = { UserHandle.USER_SYSTEM }; 318 try { 319 for (File file : FileUtils.listFilesOrEmpty(Environment.getDataSystemDeDirectory())) { 320 try { 321 final int userId = Integer.parseInt(file.getName()); 322 if (userId != UserHandle.USER_SYSTEM) { 323 userIds = ArrayUtils.appendInt(userIds, userId); 324 } 325 } catch (NumberFormatException ignored) { 326 } 327 } 328 } catch (Throwable t) { 329 Slog.w(TAG, "Trouble discovering users", t); 330 } 331 return userIds; 332 } 333 334 /** 335 * Hacky test to check if the device has an active USB connection, which is 336 * a good proxy for someone doing local development work. 337 */ isUsbActive()338 private static boolean isUsbActive() { 339 try { 340 final String state = FileUtils 341 .readTextFile(new File("/sys/class/android_usb/android0/state"), 128, ""); 342 return "CONFIGURED".equals(state.trim()); 343 } catch (Throwable t) { 344 Slog.w(TAG, "Failed to determine if device was on USB", t); 345 return false; 346 } 347 } 348 levelToString(int level)349 private static String levelToString(int level) { 350 switch (level) { 351 case LEVEL_NONE: return "NONE"; 352 case LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS: return "RESET_SETTINGS_UNTRUSTED_DEFAULTS"; 353 case LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES: return "RESET_SETTINGS_UNTRUSTED_CHANGES"; 354 case LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS: return "RESET_SETTINGS_TRUSTED_DEFAULTS"; 355 case LEVEL_FACTORY_RESET: return "FACTORY_RESET"; 356 default: return Integer.toString(level); 357 } 358 } 359 } 360