1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.server; 18 19 import static android.service.watchdog.ExplicitHealthCheckService.PackageConfig; 20 21 import static java.lang.annotation.RetentionPolicy.SOURCE; 22 23 import android.annotation.IntDef; 24 import android.annotation.Nullable; 25 import android.content.Context; 26 import android.content.pm.PackageInfo; 27 import android.content.pm.PackageManager; 28 import android.content.pm.VersionedPackage; 29 import android.crashrecovery.flags.Flags; 30 import android.net.ConnectivityModuleConnector; 31 import android.os.Environment; 32 import android.os.Handler; 33 import android.os.Looper; 34 import android.os.Process; 35 import android.os.SystemProperties; 36 import android.provider.DeviceConfig; 37 import android.sysprop.CrashRecoveryProperties; 38 import android.text.TextUtils; 39 import android.util.ArrayMap; 40 import android.util.ArraySet; 41 import android.util.AtomicFile; 42 import android.util.LongArrayQueue; 43 import android.util.Slog; 44 import android.util.Xml; 45 46 import com.android.internal.annotations.GuardedBy; 47 import com.android.internal.annotations.VisibleForTesting; 48 import com.android.internal.os.BackgroundThread; 49 import com.android.internal.util.IndentingPrintWriter; 50 import com.android.internal.util.XmlUtils; 51 import com.android.modules.utils.TypedXmlPullParser; 52 import com.android.modules.utils.TypedXmlSerializer; 53 54 import libcore.io.IoUtils; 55 56 import org.xmlpull.v1.XmlPullParserException; 57 58 import java.io.BufferedReader; 59 import java.io.BufferedWriter; 60 import java.io.File; 61 import java.io.FileInputStream; 62 import java.io.FileNotFoundException; 63 import java.io.FileOutputStream; 64 import java.io.FileReader; 65 import java.io.FileWriter; 66 import java.io.IOException; 67 import java.io.InputStream; 68 import java.io.ObjectInputStream; 69 import java.io.ObjectOutputStream; 70 import java.lang.annotation.Retention; 71 import java.lang.annotation.RetentionPolicy; 72 import java.util.ArrayList; 73 import java.util.Collections; 74 import java.util.HashMap; 75 import java.util.Iterator; 76 import java.util.List; 77 import java.util.Map; 78 import java.util.NoSuchElementException; 79 import java.util.Set; 80 import java.util.concurrent.TimeUnit; 81 82 /** 83 * Monitors the health of packages on the system and notifies interested observers when packages 84 * fail. On failure, the registered observer with the least user impacting mitigation will 85 * be notified. 86 */ 87 public class PackageWatchdog { 88 private static final String TAG = "PackageWatchdog"; 89 90 static final String PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS = 91 "watchdog_trigger_failure_duration_millis"; 92 static final String PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT = 93 "watchdog_trigger_failure_count"; 94 static final String PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED = 95 "watchdog_explicit_health_check_enabled"; 96 97 // TODO: make the following values configurable via DeviceConfig 98 private static final long NATIVE_CRASH_POLLING_INTERVAL_MILLIS = 99 TimeUnit.SECONDS.toMillis(30); 100 private static final long NUMBER_OF_NATIVE_CRASH_POLLS = 10; 101 102 103 public static final int FAILURE_REASON_UNKNOWN = 0; 104 public static final int FAILURE_REASON_NATIVE_CRASH = 1; 105 public static final int FAILURE_REASON_EXPLICIT_HEALTH_CHECK = 2; 106 public static final int FAILURE_REASON_APP_CRASH = 3; 107 public static final int FAILURE_REASON_APP_NOT_RESPONDING = 4; 108 public static final int FAILURE_REASON_BOOT_LOOP = 5; 109 110 @IntDef(prefix = { "FAILURE_REASON_" }, value = { 111 FAILURE_REASON_UNKNOWN, 112 FAILURE_REASON_NATIVE_CRASH, 113 FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 114 FAILURE_REASON_APP_CRASH, 115 FAILURE_REASON_APP_NOT_RESPONDING, 116 FAILURE_REASON_BOOT_LOOP 117 }) 118 @Retention(RetentionPolicy.SOURCE) 119 public @interface FailureReasons {} 120 121 // Duration to count package failures before it resets to 0 122 @VisibleForTesting 123 static final int DEFAULT_TRIGGER_FAILURE_DURATION_MS = 124 (int) TimeUnit.MINUTES.toMillis(1); 125 // Number of package failures within the duration above before we notify observers 126 @VisibleForTesting 127 static final int DEFAULT_TRIGGER_FAILURE_COUNT = 5; 128 @VisibleForTesting 129 static final long DEFAULT_OBSERVING_DURATION_MS = TimeUnit.DAYS.toMillis(2); 130 // Sliding window for tracking how many mitigation calls were made for a package. 131 @VisibleForTesting 132 static final long DEFAULT_DEESCALATION_WINDOW_MS = TimeUnit.HOURS.toMillis(1); 133 // Whether explicit health checks are enabled or not 134 private static final boolean DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED = true; 135 136 @VisibleForTesting 137 static final int DEFAULT_BOOT_LOOP_TRIGGER_COUNT = 5; 138 139 static final long DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS = TimeUnit.MINUTES.toMillis(10); 140 141 // Time needed to apply mitigation 142 private static final String MITIGATION_WINDOW_MS = 143 "persist.device_config.configuration.mitigation_window_ms"; 144 @VisibleForTesting 145 static final long DEFAULT_MITIGATION_WINDOW_MS = TimeUnit.SECONDS.toMillis(5); 146 147 // Threshold level at which or above user might experience significant disruption. 148 private static final String MAJOR_USER_IMPACT_LEVEL_THRESHOLD = 149 "persist.device_config.configuration.major_user_impact_level_threshold"; 150 private static final int DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD = 151 PackageHealthObserverImpact.USER_IMPACT_LEVEL_71; 152 153 private long mNumberOfNativeCrashPollsRemaining; 154 155 private static final int DB_VERSION = 1; 156 private static final String TAG_PACKAGE_WATCHDOG = "package-watchdog"; 157 private static final String TAG_PACKAGE = "package"; 158 private static final String TAG_OBSERVER = "observer"; 159 private static final String ATTR_VERSION = "version"; 160 private static final String ATTR_NAME = "name"; 161 private static final String ATTR_DURATION = "duration"; 162 private static final String ATTR_EXPLICIT_HEALTH_CHECK_DURATION = "health-check-duration"; 163 private static final String ATTR_PASSED_HEALTH_CHECK = "passed-health-check"; 164 private static final String ATTR_MITIGATION_CALLS = "mitigation-calls"; 165 private static final String ATTR_MITIGATION_COUNT = "mitigation-count"; 166 167 // A file containing information about the current mitigation count in the case of a boot loop. 168 // This allows boot loop information to persist in the case of an fs-checkpoint being 169 // aborted. 170 private static final String METADATA_FILE = "/metadata/watchdog/mitigation_count.txt"; 171 172 @GuardedBy("PackageWatchdog.class") 173 private static PackageWatchdog sPackageWatchdog; 174 175 private final Object mLock = new Object(); 176 // System server context 177 private final Context mContext; 178 // Handler to run short running tasks 179 private final Handler mShortTaskHandler; 180 // Handler for processing IO and long running tasks 181 private final Handler mLongTaskHandler; 182 // Contains (observer-name -> observer-handle) that have ever been registered from 183 // previous boots. Observers with all packages expired are periodically pruned. 184 // It is saved to disk on system shutdown and repouplated on startup so it survives reboots. 185 @GuardedBy("mLock") 186 private final ArrayMap<String, ObserverInternal> mAllObservers = new ArrayMap<>(); 187 // File containing the XML data of monitored packages /data/system/package-watchdog.xml 188 private final AtomicFile mPolicyFile; 189 private final ExplicitHealthCheckController mHealthCheckController; 190 private final ConnectivityModuleConnector mConnectivityModuleConnector; 191 private final Runnable mSyncRequests = this::syncRequests; 192 private final Runnable mSyncStateWithScheduledReason = this::syncStateWithScheduledReason; 193 private final Runnable mSaveToFile = this::saveToFile; 194 private final SystemClock mSystemClock; 195 private final BootThreshold mBootThreshold; 196 private final DeviceConfig.OnPropertiesChangedListener 197 mOnPropertyChangedListener = this::onPropertyChanged; 198 199 // The set of packages that have been synced with the ExplicitHealthCheckController 200 @GuardedBy("mLock") 201 private Set<String> mRequestedHealthCheckPackages = new ArraySet<>(); 202 @GuardedBy("mLock") 203 private boolean mIsPackagesReady; 204 // Flag to control whether explicit health checks are supported or not 205 @GuardedBy("mLock") 206 private boolean mIsHealthCheckEnabled = DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED; 207 @GuardedBy("mLock") 208 private int mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; 209 @GuardedBy("mLock") 210 private int mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; 211 // SystemClock#uptimeMillis when we last executed #syncState 212 // 0 if no prune is scheduled. 213 @GuardedBy("mLock") 214 private long mUptimeAtLastStateSync; 215 // If true, sync explicit health check packages with the ExplicitHealthCheckController. 216 @GuardedBy("mLock") 217 private boolean mSyncRequired = false; 218 219 @GuardedBy("mLock") 220 private long mLastMitigation = -1000000; 221 222 @FunctionalInterface 223 @VisibleForTesting 224 interface SystemClock { uptimeMillis()225 long uptimeMillis(); 226 } 227 PackageWatchdog(Context context)228 private PackageWatchdog(Context context) { 229 // Needs to be constructed inline 230 this(context, new AtomicFile( 231 new File(new File(Environment.getDataDirectory(), "system"), 232 "package-watchdog.xml")), 233 new Handler(Looper.myLooper()), BackgroundThread.getHandler(), 234 new ExplicitHealthCheckController(context), 235 ConnectivityModuleConnector.getInstance(), 236 android.os.SystemClock::uptimeMillis); 237 } 238 239 /** 240 * Creates a PackageWatchdog that allows injecting dependencies. 241 */ 242 @VisibleForTesting PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, Handler longTaskHandler, ExplicitHealthCheckController controller, ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock)243 PackageWatchdog(Context context, AtomicFile policyFile, Handler shortTaskHandler, 244 Handler longTaskHandler, ExplicitHealthCheckController controller, 245 ConnectivityModuleConnector connectivityModuleConnector, SystemClock clock) { 246 mContext = context; 247 mPolicyFile = policyFile; 248 mShortTaskHandler = shortTaskHandler; 249 mLongTaskHandler = longTaskHandler; 250 mHealthCheckController = controller; 251 mConnectivityModuleConnector = connectivityModuleConnector; 252 mSystemClock = clock; 253 mNumberOfNativeCrashPollsRemaining = NUMBER_OF_NATIVE_CRASH_POLLS; 254 mBootThreshold = new BootThreshold(DEFAULT_BOOT_LOOP_TRIGGER_COUNT, 255 DEFAULT_BOOT_LOOP_TRIGGER_WINDOW_MS); 256 257 loadFromFile(); 258 sPackageWatchdog = this; 259 } 260 261 /** Creates or gets singleton instance of PackageWatchdog. */ getInstance(Context context)262 public static PackageWatchdog getInstance(Context context) { 263 synchronized (PackageWatchdog.class) { 264 if (sPackageWatchdog == null) { 265 new PackageWatchdog(context); 266 } 267 return sPackageWatchdog; 268 } 269 } 270 271 /** 272 * Called during boot to notify when packages are ready on the device so we can start 273 * binding. 274 */ onPackagesReady()275 public void onPackagesReady() { 276 synchronized (mLock) { 277 mIsPackagesReady = true; 278 mHealthCheckController.setCallbacks(packageName -> onHealthCheckPassed(packageName), 279 packages -> onSupportedPackages(packages), 280 this::onSyncRequestNotified); 281 setPropertyChangedListenerLocked(); 282 updateConfigs(); 283 registerConnectivityModuleHealthListener(); 284 } 285 } 286 287 /** 288 * Registers {@code observer} to listen for package failures. Add a new ObserverInternal for 289 * this observer if it does not already exist. 290 * 291 * <p>Observers are expected to call this on boot. It does not specify any packages but 292 * it will resume observing any packages requested from a previous boot. 293 */ registerHealthObserver(PackageHealthObserver observer)294 public void registerHealthObserver(PackageHealthObserver observer) { 295 synchronized (mLock) { 296 ObserverInternal internalObserver = mAllObservers.get(observer.getName()); 297 if (internalObserver != null) { 298 internalObserver.registeredObserver = observer; 299 } else { 300 internalObserver = new ObserverInternal(observer.getName(), new ArrayList<>()); 301 internalObserver.registeredObserver = observer; 302 mAllObservers.put(observer.getName(), internalObserver); 303 syncState("added new observer"); 304 } 305 } 306 } 307 308 /** 309 * Starts observing the health of the {@code packages} for {@code observer} and notifies 310 * {@code observer} of any package failures within the monitoring duration. 311 * 312 * <p>If monitoring a package supporting explicit health check, at the end of the monitoring 313 * duration if {@link #onHealthCheckPassed} was never called, 314 * {@link PackageHealthObserver#execute} will be called as if the package failed. 315 * 316 * <p>If {@code observer} is already monitoring a package in {@code packageNames}, 317 * the monitoring window of that package will be reset to {@code durationMs} and the health 318 * check state will be reset to a default depending on if the package is contained in 319 * {@link mPackagesWithExplicitHealthCheckEnabled}. 320 * 321 * <p>If {@code packageNames} is empty, this will be a no-op. 322 * 323 * <p>If {@code durationMs} is less than 1, a default monitoring duration 324 * {@link #DEFAULT_OBSERVING_DURATION_MS} will be used. 325 */ startObservingHealth(PackageHealthObserver observer, List<String> packageNames, long durationMs)326 public void startObservingHealth(PackageHealthObserver observer, List<String> packageNames, 327 long durationMs) { 328 if (packageNames.isEmpty()) { 329 Slog.wtf(TAG, "No packages to observe, " + observer.getName()); 330 return; 331 } 332 if (durationMs < 1) { 333 Slog.wtf(TAG, "Invalid duration " + durationMs + "ms for observer " 334 + observer.getName() + ". Not observing packages " + packageNames); 335 durationMs = DEFAULT_OBSERVING_DURATION_MS; 336 } 337 338 List<MonitoredPackage> packages = new ArrayList<>(); 339 for (int i = 0; i < packageNames.size(); i++) { 340 // Health checks not available yet so health check state will start INACTIVE 341 MonitoredPackage pkg = newMonitoredPackage(packageNames.get(i), durationMs, false); 342 if (pkg != null) { 343 packages.add(pkg); 344 } else { 345 Slog.w(TAG, "Failed to create MonitoredPackage for pkg=" + packageNames.get(i)); 346 } 347 } 348 349 if (packages.isEmpty()) { 350 return; 351 } 352 353 // Sync before we add the new packages to the observers. This will #pruneObservers, 354 // causing any elapsed time to be deducted from all existing packages before we add new 355 // packages. This maintains the invariant that the elapsed time for ALL (new and existing) 356 // packages is the same. 357 mLongTaskHandler.post(() -> { 358 syncState("observing new packages"); 359 360 synchronized (mLock) { 361 ObserverInternal oldObserver = mAllObservers.get(observer.getName()); 362 if (oldObserver == null) { 363 Slog.d(TAG, observer.getName() + " started monitoring health " 364 + "of packages " + packageNames); 365 mAllObservers.put(observer.getName(), 366 new ObserverInternal(observer.getName(), packages)); 367 } else { 368 Slog.d(TAG, observer.getName() + " added the following " 369 + "packages to monitor " + packageNames); 370 oldObserver.updatePackagesLocked(packages); 371 } 372 } 373 374 // Register observer in case not already registered 375 registerHealthObserver(observer); 376 377 // Sync after we add the new packages to the observers. We may have received packges 378 // requiring an earlier schedule than we are currently scheduled for. 379 syncState("updated observers"); 380 }); 381 382 } 383 384 /** 385 * Unregisters {@code observer} from listening to package failure. 386 * Additionally, this stops observing any packages that may have previously been observed 387 * even from a previous boot. 388 */ unregisterHealthObserver(PackageHealthObserver observer)389 public void unregisterHealthObserver(PackageHealthObserver observer) { 390 mLongTaskHandler.post(() -> { 391 synchronized (mLock) { 392 mAllObservers.remove(observer.getName()); 393 } 394 syncState("unregistering observer: " + observer.getName()); 395 }); 396 } 397 398 /** 399 * Called when a process fails due to a crash, ANR or explicit health check. 400 * 401 * <p>For each package contained in the process, one registered observer with the least user 402 * impact will be notified for mitigation. 403 * 404 * <p>This method could be called frequently if there is a severe problem on the device. 405 */ onPackageFailure(List<VersionedPackage> packages, @FailureReasons int failureReason)406 public void onPackageFailure(List<VersionedPackage> packages, 407 @FailureReasons int failureReason) { 408 if (packages == null) { 409 Slog.w(TAG, "Could not resolve a list of failing packages"); 410 return; 411 } 412 synchronized (mLock) { 413 final long now = mSystemClock.uptimeMillis(); 414 if (Flags.recoverabilityDetection()) { 415 if (now >= mLastMitigation 416 && (now - mLastMitigation) < getMitigationWindowMs()) { 417 Slog.i(TAG, "Skipping onPackageFailure mitigation"); 418 return; 419 } 420 } 421 } 422 mLongTaskHandler.post(() -> { 423 synchronized (mLock) { 424 if (mAllObservers.isEmpty()) { 425 return; 426 } 427 boolean requiresImmediateAction = (failureReason == FAILURE_REASON_NATIVE_CRASH 428 || failureReason == FAILURE_REASON_EXPLICIT_HEALTH_CHECK); 429 if (requiresImmediateAction) { 430 handleFailureImmediately(packages, failureReason); 431 } else { 432 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 433 VersionedPackage versionedPackage = packages.get(pIndex); 434 // Observer that will receive failure for versionedPackage 435 PackageHealthObserver currentObserverToNotify = null; 436 int currentObserverImpact = Integer.MAX_VALUE; 437 MonitoredPackage currentMonitoredPackage = null; 438 439 // Find observer with least user impact 440 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 441 ObserverInternal observer = mAllObservers.valueAt(oIndex); 442 PackageHealthObserver registeredObserver = observer.registeredObserver; 443 if (registeredObserver != null 444 && observer.onPackageFailureLocked( 445 versionedPackage.getPackageName())) { 446 MonitoredPackage p = observer.getMonitoredPackage( 447 versionedPackage.getPackageName()); 448 int mitigationCount = 1; 449 if (p != null) { 450 mitigationCount = p.getMitigationCountLocked() + 1; 451 } 452 int impact = registeredObserver.onHealthCheckFailed( 453 versionedPackage, failureReason, mitigationCount); 454 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 455 && impact < currentObserverImpact) { 456 currentObserverToNotify = registeredObserver; 457 currentObserverImpact = impact; 458 currentMonitoredPackage = p; 459 } 460 } 461 } 462 463 // Execute action with least user impact 464 if (currentObserverToNotify != null) { 465 int mitigationCount = 1; 466 if (currentMonitoredPackage != null) { 467 currentMonitoredPackage.noteMitigationCallLocked(); 468 mitigationCount = 469 currentMonitoredPackage.getMitigationCountLocked(); 470 } 471 if (Flags.recoverabilityDetection()) { 472 maybeExecute(currentObserverToNotify, versionedPackage, 473 failureReason, currentObserverImpact, mitigationCount); 474 } else { 475 currentObserverToNotify.execute(versionedPackage, 476 failureReason, mitigationCount); 477 } 478 } 479 } 480 } 481 } 482 }); 483 } 484 485 /** 486 * For native crashes or explicit health check failures, call directly into each observer to 487 * mitigate the error without going through failure threshold logic. 488 */ handleFailureImmediately(List<VersionedPackage> packages, @FailureReasons int failureReason)489 private void handleFailureImmediately(List<VersionedPackage> packages, 490 @FailureReasons int failureReason) { 491 VersionedPackage failingPackage = packages.size() > 0 ? packages.get(0) : null; 492 PackageHealthObserver currentObserverToNotify = null; 493 int currentObserverImpact = Integer.MAX_VALUE; 494 for (ObserverInternal observer: mAllObservers.values()) { 495 PackageHealthObserver registeredObserver = observer.registeredObserver; 496 if (registeredObserver != null) { 497 int impact = registeredObserver.onHealthCheckFailed( 498 failingPackage, failureReason, 1); 499 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 500 && impact < currentObserverImpact) { 501 currentObserverToNotify = registeredObserver; 502 currentObserverImpact = impact; 503 } 504 } 505 } 506 if (currentObserverToNotify != null) { 507 if (Flags.recoverabilityDetection()) { 508 maybeExecute(currentObserverToNotify, failingPackage, failureReason, 509 currentObserverImpact, /*mitigationCount=*/ 1); 510 } else { 511 currentObserverToNotify.execute(failingPackage, failureReason, 1); 512 } 513 } 514 } 515 maybeExecute(PackageHealthObserver currentObserverToNotify, VersionedPackage versionedPackage, @FailureReasons int failureReason, int currentObserverImpact, int mitigationCount)516 private void maybeExecute(PackageHealthObserver currentObserverToNotify, 517 VersionedPackage versionedPackage, 518 @FailureReasons int failureReason, 519 int currentObserverImpact, 520 int mitigationCount) { 521 if (currentObserverImpact < getUserImpactLevelLimit()) { 522 synchronized (mLock) { 523 mLastMitigation = mSystemClock.uptimeMillis(); 524 } 525 currentObserverToNotify.execute(versionedPackage, failureReason, mitigationCount); 526 } 527 } 528 getMitigationWindowMs()529 private long getMitigationWindowMs() { 530 return SystemProperties.getLong(MITIGATION_WINDOW_MS, DEFAULT_MITIGATION_WINDOW_MS); 531 } 532 533 534 /** 535 * Called when the system server boots. If the system server is detected to be in a boot loop, 536 * query each observer and perform the mitigation action with the lowest user impact. 537 * 538 * Note: PackageWatchdog considers system_server restart loop as bootloop. Full reboots 539 * are not counted in bootloop. 540 */ 541 @SuppressWarnings("GuardedBy") noteBoot()542 public void noteBoot() { 543 synchronized (mLock) { 544 // if boot count has reached threshold, start mitigation. 545 // We wait until threshold number of restarts only for the first time. Perform 546 // mitigations for every restart after that. 547 boolean mitigate = mBootThreshold.incrementAndTest(); 548 if (mitigate) { 549 if (!Flags.recoverabilityDetection()) { 550 mBootThreshold.reset(); 551 } 552 int mitigationCount = mBootThreshold.getMitigationCount() + 1; 553 PackageHealthObserver currentObserverToNotify = null; 554 ObserverInternal currentObserverInternal = null; 555 int currentObserverImpact = Integer.MAX_VALUE; 556 for (int i = 0; i < mAllObservers.size(); i++) { 557 final ObserverInternal observer = mAllObservers.valueAt(i); 558 PackageHealthObserver registeredObserver = observer.registeredObserver; 559 if (registeredObserver != null) { 560 int impact = Flags.recoverabilityDetection() 561 ? registeredObserver.onBootLoop( 562 observer.getBootMitigationCount() + 1) 563 : registeredObserver.onBootLoop(mitigationCount); 564 if (impact != PackageHealthObserverImpact.USER_IMPACT_LEVEL_0 565 && impact < currentObserverImpact) { 566 currentObserverToNotify = registeredObserver; 567 currentObserverInternal = observer; 568 currentObserverImpact = impact; 569 } 570 } 571 } 572 if (currentObserverToNotify != null) { 573 if (Flags.recoverabilityDetection()) { 574 int currentObserverMitigationCount = 575 currentObserverInternal.getBootMitigationCount() + 1; 576 currentObserverInternal.setBootMitigationCount( 577 currentObserverMitigationCount); 578 saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); 579 currentObserverToNotify.executeBootLoopMitigation( 580 currentObserverMitigationCount); 581 } else { 582 mBootThreshold.setMitigationCount(mitigationCount); 583 mBootThreshold.saveMitigationCountToMetadata(); 584 currentObserverToNotify.executeBootLoopMitigation(mitigationCount); 585 } 586 } 587 } 588 } 589 } 590 591 // TODO(b/120598832): Optimize write? Maybe only write a separate smaller file? Also 592 // avoid holding lock? 593 // This currently adds about 7ms extra to shutdown thread 594 /** Writes the package information to file during shutdown. */ writeNow()595 public void writeNow() { 596 synchronized (mLock) { 597 // Must only run synchronous tasks as this runs on the ShutdownThread and no other 598 // thread is guaranteed to run during shutdown. 599 if (!mAllObservers.isEmpty()) { 600 mLongTaskHandler.removeCallbacks(mSaveToFile); 601 pruneObserversLocked(); 602 saveToFile(); 603 Slog.i(TAG, "Last write to update package durations"); 604 } 605 } 606 } 607 608 /** 609 * Enables or disables explicit health checks. 610 * <p> If explicit health checks are enabled, the health check service is started. 611 * <p> If explicit health checks are disabled, pending explicit health check requests are 612 * passed and the health check service is stopped. 613 */ setExplicitHealthCheckEnabled(boolean enabled)614 private void setExplicitHealthCheckEnabled(boolean enabled) { 615 synchronized (mLock) { 616 mIsHealthCheckEnabled = enabled; 617 mHealthCheckController.setEnabled(enabled); 618 mSyncRequired = true; 619 // Prune to update internal state whenever health check is enabled/disabled 620 syncState("health check state " + (enabled ? "enabled" : "disabled")); 621 } 622 } 623 624 /** 625 * This method should be only called on mShortTaskHandler, since it modifies 626 * {@link #mNumberOfNativeCrashPollsRemaining}. 627 */ checkAndMitigateNativeCrashes()628 private void checkAndMitigateNativeCrashes() { 629 mNumberOfNativeCrashPollsRemaining--; 630 // Check if native watchdog reported a crash 631 if ("1".equals(SystemProperties.get("sys.init.updatable_crashing"))) { 632 // We rollback all available low impact rollbacks when crash is unattributable 633 onPackageFailure(Collections.EMPTY_LIST, FAILURE_REASON_NATIVE_CRASH); 634 // we stop polling after an attempt to execute rollback, regardless of whether the 635 // attempt succeeds or not 636 } else { 637 if (mNumberOfNativeCrashPollsRemaining > 0) { 638 mShortTaskHandler.postDelayed(() -> checkAndMitigateNativeCrashes(), 639 NATIVE_CRASH_POLLING_INTERVAL_MILLIS); 640 } 641 } 642 } 643 644 /** 645 * Since this method can eventually trigger a rollback, it should be called 646 * only once boot has completed {@code onBootCompleted} and not earlier, because the install 647 * session must be entirely completed before we try to rollback. 648 */ scheduleCheckAndMitigateNativeCrashes()649 public void scheduleCheckAndMitigateNativeCrashes() { 650 Slog.i(TAG, "Scheduling " + mNumberOfNativeCrashPollsRemaining + " polls to check " 651 + "and mitigate native crashes"); 652 mShortTaskHandler.post(()->checkAndMitigateNativeCrashes()); 653 } 654 getUserImpactLevelLimit()655 private int getUserImpactLevelLimit() { 656 return SystemProperties.getInt(MAJOR_USER_IMPACT_LEVEL_THRESHOLD, 657 DEFAULT_MAJOR_USER_IMPACT_LEVEL_THRESHOLD); 658 } 659 660 /** Possible severity values of the user impact of a {@link PackageHealthObserver#execute}. */ 661 @Retention(SOURCE) 662 @IntDef(value = {PackageHealthObserverImpact.USER_IMPACT_LEVEL_0, 663 PackageHealthObserverImpact.USER_IMPACT_LEVEL_10, 664 PackageHealthObserverImpact.USER_IMPACT_LEVEL_20, 665 PackageHealthObserverImpact.USER_IMPACT_LEVEL_30, 666 PackageHealthObserverImpact.USER_IMPACT_LEVEL_40, 667 PackageHealthObserverImpact.USER_IMPACT_LEVEL_50, 668 PackageHealthObserverImpact.USER_IMPACT_LEVEL_70, 669 PackageHealthObserverImpact.USER_IMPACT_LEVEL_71, 670 PackageHealthObserverImpact.USER_IMPACT_LEVEL_75, 671 PackageHealthObserverImpact.USER_IMPACT_LEVEL_80, 672 PackageHealthObserverImpact.USER_IMPACT_LEVEL_90, 673 PackageHealthObserverImpact.USER_IMPACT_LEVEL_100}) 674 public @interface PackageHealthObserverImpact { 675 /** No action to take. */ 676 int USER_IMPACT_LEVEL_0 = 0; 677 /* Action has low user impact, user of a device will barely notice. */ 678 int USER_IMPACT_LEVEL_10 = 10; 679 /* Actions having medium user impact, user of a device will likely notice. */ 680 int USER_IMPACT_LEVEL_20 = 20; 681 int USER_IMPACT_LEVEL_30 = 30; 682 int USER_IMPACT_LEVEL_40 = 40; 683 int USER_IMPACT_LEVEL_50 = 50; 684 int USER_IMPACT_LEVEL_70 = 70; 685 /* Action has high user impact, a last resort, user of a device will be very frustrated. */ 686 int USER_IMPACT_LEVEL_71 = 71; 687 int USER_IMPACT_LEVEL_75 = 75; 688 int USER_IMPACT_LEVEL_80 = 80; 689 int USER_IMPACT_LEVEL_90 = 90; 690 int USER_IMPACT_LEVEL_100 = 100; 691 } 692 693 /** Register instances of this interface to receive notifications on package failure. */ 694 public interface PackageHealthObserver { 695 /** 696 * Called when health check fails for the {@code versionedPackage}. 697 * 698 * @param versionedPackage the package that is failing. This may be null if a native 699 * service is crashing. 700 * @param failureReason the type of failure that is occurring. 701 * @param mitigationCount the number of times mitigation has been called for this package 702 * (including this time). 703 * 704 * 705 * @return any one of {@link PackageHealthObserverImpact} to express the impact 706 * to the user on {@link #execute} 707 */ onHealthCheckFailed( @ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)708 @PackageHealthObserverImpact int onHealthCheckFailed( 709 @Nullable VersionedPackage versionedPackage, 710 @FailureReasons int failureReason, 711 int mitigationCount); 712 713 /** 714 * Executes mitigation for {@link #onHealthCheckFailed}. 715 * 716 * @param versionedPackage the package that is failing. This may be null if a native 717 * service is crashing. 718 * @param failureReason the type of failure that is occurring. 719 * @param mitigationCount the number of times mitigation has been called for this package 720 * (including this time). 721 * @return {@code true} if action was executed successfully, {@code false} otherwise 722 */ execute(@ullable VersionedPackage versionedPackage, @FailureReasons int failureReason, int mitigationCount)723 boolean execute(@Nullable VersionedPackage versionedPackage, 724 @FailureReasons int failureReason, int mitigationCount); 725 726 727 /** 728 * Called when the system server has booted several times within a window of time, defined 729 * by {@link #mBootThreshold} 730 * 731 * @param mitigationCount the number of times mitigation has been attempted for this 732 * boot loop (including this time). 733 */ onBootLoop(int mitigationCount)734 default @PackageHealthObserverImpact int onBootLoop(int mitigationCount) { 735 return PackageHealthObserverImpact.USER_IMPACT_LEVEL_0; 736 } 737 738 /** 739 * Executes mitigation for {@link #onBootLoop} 740 * @param mitigationCount the number of times mitigation has been attempted for this 741 * boot loop (including this time). 742 */ executeBootLoopMitigation(int mitigationCount)743 default boolean executeBootLoopMitigation(int mitigationCount) { 744 return false; 745 } 746 747 // TODO(b/120598832): Ensure uniqueness? 748 /** 749 * Identifier for the observer, should not change across device updates otherwise the 750 * watchdog may drop observing packages with the old name. 751 */ getName()752 String getName(); 753 754 /** 755 * An observer will not be pruned if this is set, even if the observer is not explicitly 756 * monitoring any packages. 757 */ isPersistent()758 default boolean isPersistent() { 759 return false; 760 } 761 762 /** 763 * Returns {@code true} if this observer wishes to observe the given package, {@code false} 764 * otherwise 765 * 766 * <p> A persistent observer may choose to start observing certain failing packages, even if 767 * it has not explicitly asked to watch the package with {@link #startObservingHealth}. 768 */ mayObservePackage(String packageName)769 default boolean mayObservePackage(String packageName) { 770 return false; 771 } 772 } 773 774 @VisibleForTesting getTriggerFailureCount()775 long getTriggerFailureCount() { 776 synchronized (mLock) { 777 return mTriggerFailureCount; 778 } 779 } 780 781 @VisibleForTesting getTriggerFailureDurationMs()782 long getTriggerFailureDurationMs() { 783 synchronized (mLock) { 784 return mTriggerFailureDurationMs; 785 } 786 } 787 788 /** 789 * Serializes and syncs health check requests with the {@link ExplicitHealthCheckController}. 790 */ syncRequestsAsync()791 private void syncRequestsAsync() { 792 mShortTaskHandler.removeCallbacks(mSyncRequests); 793 mShortTaskHandler.post(mSyncRequests); 794 } 795 796 /** 797 * Syncs health check requests with the {@link ExplicitHealthCheckController}. 798 * Calls to this must be serialized. 799 * 800 * @see #syncRequestsAsync 801 */ syncRequests()802 private void syncRequests() { 803 boolean syncRequired = false; 804 synchronized (mLock) { 805 if (mIsPackagesReady) { 806 Set<String> packages = getPackagesPendingHealthChecksLocked(); 807 if (mSyncRequired || !packages.equals(mRequestedHealthCheckPackages) 808 || packages.isEmpty()) { 809 syncRequired = true; 810 mRequestedHealthCheckPackages = packages; 811 } 812 } // else, we will sync requests when packages become ready 813 } 814 815 // Call outside lock to avoid holding lock when calling into the controller. 816 if (syncRequired) { 817 Slog.i(TAG, "Syncing health check requests for packages: " 818 + mRequestedHealthCheckPackages); 819 mHealthCheckController.syncRequests(mRequestedHealthCheckPackages); 820 mSyncRequired = false; 821 } 822 } 823 824 /** 825 * Updates the observers monitoring {@code packageName} that explicit health check has passed. 826 * 827 * <p> This update is strictly for registered observers at the time of the call 828 * Observers that register after this signal will have no knowledge of prior signals and will 829 * effectively behave as if the explicit health check hasn't passed for {@code packageName}. 830 * 831 * <p> {@code packageName} can still be considered failed if reported by 832 * {@link #onPackageFailureLocked} before the package expires. 833 * 834 * <p> Triggered by components outside the system server when they are fully functional after an 835 * update. 836 */ onHealthCheckPassed(String packageName)837 private void onHealthCheckPassed(String packageName) { 838 Slog.i(TAG, "Health check passed for package: " + packageName); 839 boolean isStateChanged = false; 840 841 synchronized (mLock) { 842 for (int observerIdx = 0; observerIdx < mAllObservers.size(); observerIdx++) { 843 ObserverInternal observer = mAllObservers.valueAt(observerIdx); 844 MonitoredPackage monitoredPackage = observer.getMonitoredPackage(packageName); 845 846 if (monitoredPackage != null) { 847 int oldState = monitoredPackage.getHealthCheckStateLocked(); 848 int newState = monitoredPackage.tryPassHealthCheckLocked(); 849 isStateChanged |= oldState != newState; 850 } 851 } 852 } 853 854 if (isStateChanged) { 855 syncState("health check passed for " + packageName); 856 } 857 } 858 onSupportedPackages(List<PackageConfig> supportedPackages)859 private void onSupportedPackages(List<PackageConfig> supportedPackages) { 860 boolean isStateChanged = false; 861 862 Map<String, Long> supportedPackageTimeouts = new ArrayMap<>(); 863 Iterator<PackageConfig> it = supportedPackages.iterator(); 864 while (it.hasNext()) { 865 PackageConfig info = it.next(); 866 supportedPackageTimeouts.put(info.getPackageName(), info.getHealthCheckTimeoutMillis()); 867 } 868 869 synchronized (mLock) { 870 Slog.d(TAG, "Received supported packages " + supportedPackages); 871 Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); 872 while (oit.hasNext()) { 873 Iterator<MonitoredPackage> pit = oit.next().getMonitoredPackages() 874 .values().iterator(); 875 while (pit.hasNext()) { 876 MonitoredPackage monitoredPackage = pit.next(); 877 String packageName = monitoredPackage.getName(); 878 int oldState = monitoredPackage.getHealthCheckStateLocked(); 879 int newState; 880 881 if (supportedPackageTimeouts.containsKey(packageName)) { 882 // Supported packages become ACTIVE if currently INACTIVE 883 newState = monitoredPackage.setHealthCheckActiveLocked( 884 supportedPackageTimeouts.get(packageName)); 885 } else { 886 // Unsupported packages are marked as PASSED unless already FAILED 887 newState = monitoredPackage.tryPassHealthCheckLocked(); 888 } 889 isStateChanged |= oldState != newState; 890 } 891 } 892 } 893 894 if (isStateChanged) { 895 syncState("updated health check supported packages " + supportedPackages); 896 } 897 } 898 onSyncRequestNotified()899 private void onSyncRequestNotified() { 900 synchronized (mLock) { 901 mSyncRequired = true; 902 syncRequestsAsync(); 903 } 904 } 905 906 @GuardedBy("mLock") getPackagesPendingHealthChecksLocked()907 private Set<String> getPackagesPendingHealthChecksLocked() { 908 Set<String> packages = new ArraySet<>(); 909 Iterator<ObserverInternal> oit = mAllObservers.values().iterator(); 910 while (oit.hasNext()) { 911 ObserverInternal observer = oit.next(); 912 Iterator<MonitoredPackage> pit = 913 observer.getMonitoredPackages().values().iterator(); 914 while (pit.hasNext()) { 915 MonitoredPackage monitoredPackage = pit.next(); 916 String packageName = monitoredPackage.getName(); 917 if (monitoredPackage.isPendingHealthChecksLocked()) { 918 packages.add(packageName); 919 } 920 } 921 } 922 return packages; 923 } 924 925 /** 926 * Syncs the state of the observers. 927 * 928 * <p> Prunes all observers, saves new state to disk, syncs health check requests with the 929 * health check service and schedules the next state sync. 930 */ syncState(String reason)931 private void syncState(String reason) { 932 synchronized (mLock) { 933 Slog.i(TAG, "Syncing state, reason: " + reason); 934 pruneObserversLocked(); 935 936 saveToFileAsync(); 937 syncRequestsAsync(); 938 939 // Done syncing state, schedule the next state sync 940 scheduleNextSyncStateLocked(); 941 } 942 } 943 syncStateWithScheduledReason()944 private void syncStateWithScheduledReason() { 945 syncState("scheduled"); 946 } 947 948 @GuardedBy("mLock") scheduleNextSyncStateLocked()949 private void scheduleNextSyncStateLocked() { 950 long durationMs = getNextStateSyncMillisLocked(); 951 mShortTaskHandler.removeCallbacks(mSyncStateWithScheduledReason); 952 if (durationMs == Long.MAX_VALUE) { 953 Slog.i(TAG, "Cancelling state sync, nothing to sync"); 954 mUptimeAtLastStateSync = 0; 955 } else { 956 mUptimeAtLastStateSync = mSystemClock.uptimeMillis(); 957 mShortTaskHandler.postDelayed(mSyncStateWithScheduledReason, durationMs); 958 } 959 } 960 961 /** 962 * Returns the next duration in millis to sync the watchdog state. 963 * 964 * @returns Long#MAX_VALUE if there are no observed packages. 965 */ 966 @GuardedBy("mLock") getNextStateSyncMillisLocked()967 private long getNextStateSyncMillisLocked() { 968 long shortestDurationMs = Long.MAX_VALUE; 969 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 970 ArrayMap<String, MonitoredPackage> packages = mAllObservers.valueAt(oIndex) 971 .getMonitoredPackages(); 972 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 973 MonitoredPackage mp = packages.valueAt(pIndex); 974 long duration = mp.getShortestScheduleDurationMsLocked(); 975 if (duration < shortestDurationMs) { 976 shortestDurationMs = duration; 977 } 978 } 979 } 980 return shortestDurationMs; 981 } 982 983 /** 984 * Removes {@code elapsedMs} milliseconds from all durations on monitored packages 985 * and updates other internal state. 986 */ 987 @GuardedBy("mLock") pruneObserversLocked()988 private void pruneObserversLocked() { 989 long elapsedMs = mUptimeAtLastStateSync == 0 990 ? 0 : mSystemClock.uptimeMillis() - mUptimeAtLastStateSync; 991 if (elapsedMs <= 0) { 992 Slog.i(TAG, "Not pruning observers, elapsed time: " + elapsedMs + "ms"); 993 return; 994 } 995 996 Iterator<ObserverInternal> it = mAllObservers.values().iterator(); 997 while (it.hasNext()) { 998 ObserverInternal observer = it.next(); 999 Set<MonitoredPackage> failedPackages = 1000 observer.prunePackagesLocked(elapsedMs); 1001 if (!failedPackages.isEmpty()) { 1002 onHealthCheckFailed(observer, failedPackages); 1003 } 1004 if (observer.getMonitoredPackages().isEmpty() && (observer.registeredObserver == null 1005 || !observer.registeredObserver.isPersistent())) { 1006 Slog.i(TAG, "Discarding observer " + observer.name + ". All packages expired"); 1007 it.remove(); 1008 } 1009 } 1010 } 1011 onHealthCheckFailed(ObserverInternal observer, Set<MonitoredPackage> failedPackages)1012 private void onHealthCheckFailed(ObserverInternal observer, 1013 Set<MonitoredPackage> failedPackages) { 1014 mLongTaskHandler.post(() -> { 1015 synchronized (mLock) { 1016 PackageHealthObserver registeredObserver = observer.registeredObserver; 1017 if (registeredObserver != null) { 1018 Iterator<MonitoredPackage> it = failedPackages.iterator(); 1019 while (it.hasNext()) { 1020 VersionedPackage versionedPkg = getVersionedPackage(it.next().getName()); 1021 if (versionedPkg != null) { 1022 Slog.i(TAG, 1023 "Explicit health check failed for package " + versionedPkg); 1024 registeredObserver.execute(versionedPkg, 1025 PackageWatchdog.FAILURE_REASON_EXPLICIT_HEALTH_CHECK, 1); 1026 } 1027 } 1028 } 1029 } 1030 }); 1031 } 1032 1033 /** 1034 * Gets PackageInfo for the given package. Matches any user and apex. 1035 * 1036 * @throws PackageManager.NameNotFoundException if no such package is installed. 1037 */ getPackageInfo(String packageName)1038 private PackageInfo getPackageInfo(String packageName) 1039 throws PackageManager.NameNotFoundException { 1040 PackageManager pm = mContext.getPackageManager(); 1041 try { 1042 // The MATCH_ANY_USER flag doesn't mix well with the MATCH_APEX 1043 // flag, so make two separate attempts to get the package info. 1044 // We don't need both flags at the same time because we assume 1045 // apex files are always installed for all users. 1046 return pm.getPackageInfo(packageName, PackageManager.MATCH_ANY_USER); 1047 } catch (PackageManager.NameNotFoundException e) { 1048 return pm.getPackageInfo(packageName, PackageManager.MATCH_APEX); 1049 } 1050 } 1051 1052 @Nullable getVersionedPackage(String packageName)1053 private VersionedPackage getVersionedPackage(String packageName) { 1054 final PackageManager pm = mContext.getPackageManager(); 1055 if (pm == null || TextUtils.isEmpty(packageName)) { 1056 return null; 1057 } 1058 try { 1059 final long versionCode = getPackageInfo(packageName).getLongVersionCode(); 1060 return new VersionedPackage(packageName, versionCode); 1061 } catch (PackageManager.NameNotFoundException e) { 1062 return null; 1063 } 1064 } 1065 1066 /** 1067 * Loads mAllObservers from file. 1068 * 1069 * <p>Note that this is <b>not</b> thread safe and should only called be called 1070 * from the constructor. 1071 */ loadFromFile()1072 private void loadFromFile() { 1073 InputStream infile = null; 1074 mAllObservers.clear(); 1075 try { 1076 infile = mPolicyFile.openRead(); 1077 final TypedXmlPullParser parser = Xml.resolvePullParser(infile); 1078 XmlUtils.beginDocument(parser, TAG_PACKAGE_WATCHDOG); 1079 int outerDepth = parser.getDepth(); 1080 while (XmlUtils.nextElementWithin(parser, outerDepth)) { 1081 ObserverInternal observer = ObserverInternal.read(parser, this); 1082 if (observer != null) { 1083 mAllObservers.put(observer.name, observer); 1084 } 1085 } 1086 } catch (FileNotFoundException e) { 1087 // Nothing to monitor 1088 } catch (IOException | NumberFormatException | XmlPullParserException e) { 1089 Slog.wtf(TAG, "Unable to read monitored packages, deleting file", e); 1090 mPolicyFile.delete(); 1091 } finally { 1092 IoUtils.closeQuietly(infile); 1093 } 1094 } 1095 onPropertyChanged(DeviceConfig.Properties properties)1096 private void onPropertyChanged(DeviceConfig.Properties properties) { 1097 try { 1098 updateConfigs(); 1099 } catch (Exception ignore) { 1100 Slog.w(TAG, "Failed to reload device config changes"); 1101 } 1102 } 1103 1104 /** Adds a {@link DeviceConfig#OnPropertiesChangedListener}. */ setPropertyChangedListenerLocked()1105 private void setPropertyChangedListenerLocked() { 1106 DeviceConfig.addOnPropertiesChangedListener( 1107 DeviceConfig.NAMESPACE_ROLLBACK, 1108 mContext.getMainExecutor(), 1109 mOnPropertyChangedListener); 1110 } 1111 1112 @VisibleForTesting removePropertyChangedListener()1113 void removePropertyChangedListener() { 1114 DeviceConfig.removeOnPropertiesChangedListener(mOnPropertyChangedListener); 1115 } 1116 1117 /** 1118 * Health check is enabled or disabled after reading the flags 1119 * from DeviceConfig. 1120 */ 1121 @VisibleForTesting updateConfigs()1122 void updateConfigs() { 1123 synchronized (mLock) { 1124 mTriggerFailureCount = DeviceConfig.getInt( 1125 DeviceConfig.NAMESPACE_ROLLBACK, 1126 PROPERTY_WATCHDOG_TRIGGER_FAILURE_COUNT, 1127 DEFAULT_TRIGGER_FAILURE_COUNT); 1128 if (mTriggerFailureCount <= 0) { 1129 mTriggerFailureCount = DEFAULT_TRIGGER_FAILURE_COUNT; 1130 } 1131 1132 mTriggerFailureDurationMs = DeviceConfig.getInt( 1133 DeviceConfig.NAMESPACE_ROLLBACK, 1134 PROPERTY_WATCHDOG_TRIGGER_DURATION_MILLIS, 1135 DEFAULT_TRIGGER_FAILURE_DURATION_MS); 1136 if (mTriggerFailureDurationMs <= 0) { 1137 mTriggerFailureDurationMs = DEFAULT_TRIGGER_FAILURE_DURATION_MS; 1138 } 1139 1140 setExplicitHealthCheckEnabled(DeviceConfig.getBoolean( 1141 DeviceConfig.NAMESPACE_ROLLBACK, 1142 PROPERTY_WATCHDOG_EXPLICIT_HEALTH_CHECK_ENABLED, 1143 DEFAULT_EXPLICIT_HEALTH_CHECK_ENABLED)); 1144 } 1145 } 1146 registerConnectivityModuleHealthListener()1147 private void registerConnectivityModuleHealthListener() { 1148 // TODO: have an internal method to trigger a rollback by reporting high severity errors, 1149 // and rely on ActivityManager to inform the watchdog of severe network stack crashes 1150 // instead of having this listener in parallel. 1151 mConnectivityModuleConnector.registerHealthListener( 1152 packageName -> { 1153 final VersionedPackage pkg = getVersionedPackage(packageName); 1154 if (pkg == null) { 1155 Slog.wtf(TAG, "NetworkStack failed but could not find its package"); 1156 return; 1157 } 1158 final List<VersionedPackage> pkgList = Collections.singletonList(pkg); 1159 onPackageFailure(pkgList, FAILURE_REASON_EXPLICIT_HEALTH_CHECK); 1160 }); 1161 } 1162 1163 /** 1164 * Persists mAllObservers to file. Threshold information is ignored. 1165 */ saveToFile()1166 private boolean saveToFile() { 1167 Slog.i(TAG, "Saving observer state to file"); 1168 synchronized (mLock) { 1169 FileOutputStream stream; 1170 try { 1171 stream = mPolicyFile.startWrite(); 1172 } catch (IOException e) { 1173 Slog.w(TAG, "Cannot update monitored packages", e); 1174 return false; 1175 } 1176 1177 try { 1178 TypedXmlSerializer out = Xml.resolveSerializer(stream); 1179 out.startDocument(null, true); 1180 out.startTag(null, TAG_PACKAGE_WATCHDOG); 1181 out.attributeInt(null, ATTR_VERSION, DB_VERSION); 1182 for (int oIndex = 0; oIndex < mAllObservers.size(); oIndex++) { 1183 mAllObservers.valueAt(oIndex).writeLocked(out); 1184 } 1185 out.endTag(null, TAG_PACKAGE_WATCHDOG); 1186 out.endDocument(); 1187 mPolicyFile.finishWrite(stream); 1188 return true; 1189 } catch (IOException e) { 1190 Slog.w(TAG, "Failed to save monitored packages, restoring backup", e); 1191 mPolicyFile.failWrite(stream); 1192 return false; 1193 } finally { 1194 IoUtils.closeQuietly(stream); 1195 } 1196 } 1197 } 1198 saveToFileAsync()1199 private void saveToFileAsync() { 1200 if (!mLongTaskHandler.hasCallbacks(mSaveToFile)) { 1201 mLongTaskHandler.post(mSaveToFile); 1202 } 1203 } 1204 1205 /** Convert a {@code LongArrayQueue} to a String of comma-separated values. */ longArrayQueueToString(LongArrayQueue queue)1206 public static String longArrayQueueToString(LongArrayQueue queue) { 1207 if (queue.size() > 0) { 1208 StringBuilder sb = new StringBuilder(); 1209 sb.append(queue.get(0)); 1210 for (int i = 1; i < queue.size(); i++) { 1211 sb.append(","); 1212 sb.append(queue.get(i)); 1213 } 1214 return sb.toString(); 1215 } 1216 return ""; 1217 } 1218 1219 /** Parse a comma-separated String of longs into a LongArrayQueue. */ parseLongArrayQueue(String commaSeparatedValues)1220 public static LongArrayQueue parseLongArrayQueue(String commaSeparatedValues) { 1221 LongArrayQueue result = new LongArrayQueue(); 1222 if (!TextUtils.isEmpty(commaSeparatedValues)) { 1223 String[] values = commaSeparatedValues.split(","); 1224 for (String value : values) { 1225 result.addLast(Long.parseLong(value)); 1226 } 1227 } 1228 return result; 1229 } 1230 1231 1232 /** Dump status of every observer in mAllObservers. */ dump(IndentingPrintWriter pw)1233 public void dump(IndentingPrintWriter pw) { 1234 pw.println("Package Watchdog status"); 1235 pw.increaseIndent(); 1236 synchronized (mLock) { 1237 for (String observerName : mAllObservers.keySet()) { 1238 pw.println("Observer name: " + observerName); 1239 pw.increaseIndent(); 1240 ObserverInternal observerInternal = mAllObservers.get(observerName); 1241 observerInternal.dump(pw); 1242 pw.decreaseIndent(); 1243 } 1244 } 1245 } 1246 1247 @VisibleForTesting 1248 @GuardedBy("mLock") registerObserverInternal(ObserverInternal observerInternal)1249 void registerObserverInternal(ObserverInternal observerInternal) { 1250 mAllObservers.put(observerInternal.name, observerInternal); 1251 } 1252 1253 /** 1254 * Represents an observer monitoring a set of packages along with the failure thresholds for 1255 * each package. 1256 * 1257 * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing 1258 * instances of this class. 1259 */ 1260 static class ObserverInternal { 1261 public final String name; 1262 @GuardedBy("mLock") 1263 private final ArrayMap<String, MonitoredPackage> mPackages = new ArrayMap<>(); 1264 @Nullable 1265 @GuardedBy("mLock") 1266 public PackageHealthObserver registeredObserver; 1267 private int mMitigationCount; 1268 ObserverInternal(String name, List<MonitoredPackage> packages)1269 ObserverInternal(String name, List<MonitoredPackage> packages) { 1270 this(name, packages, /*mitigationCount=*/ 0); 1271 } 1272 ObserverInternal(String name, List<MonitoredPackage> packages, int mitigationCount)1273 ObserverInternal(String name, List<MonitoredPackage> packages, int mitigationCount) { 1274 this.name = name; 1275 updatePackagesLocked(packages); 1276 this.mMitigationCount = mitigationCount; 1277 } 1278 1279 /** 1280 * Writes important {@link MonitoredPackage} details for this observer to file. 1281 * Does not persist any package failure thresholds. 1282 */ 1283 @GuardedBy("mLock") writeLocked(TypedXmlSerializer out)1284 public boolean writeLocked(TypedXmlSerializer out) { 1285 try { 1286 out.startTag(null, TAG_OBSERVER); 1287 out.attribute(null, ATTR_NAME, name); 1288 if (Flags.recoverabilityDetection()) { 1289 out.attributeInt(null, ATTR_MITIGATION_COUNT, mMitigationCount); 1290 } 1291 for (int i = 0; i < mPackages.size(); i++) { 1292 MonitoredPackage p = mPackages.valueAt(i); 1293 p.writeLocked(out); 1294 } 1295 out.endTag(null, TAG_OBSERVER); 1296 return true; 1297 } catch (IOException e) { 1298 Slog.w(TAG, "Cannot save observer", e); 1299 return false; 1300 } 1301 } 1302 getBootMitigationCount()1303 public int getBootMitigationCount() { 1304 return mMitigationCount; 1305 } 1306 setBootMitigationCount(int mitigationCount)1307 public void setBootMitigationCount(int mitigationCount) { 1308 mMitigationCount = mitigationCount; 1309 } 1310 1311 @GuardedBy("mLock") updatePackagesLocked(List<MonitoredPackage> packages)1312 public void updatePackagesLocked(List<MonitoredPackage> packages) { 1313 for (int pIndex = 0; pIndex < packages.size(); pIndex++) { 1314 MonitoredPackage p = packages.get(pIndex); 1315 MonitoredPackage existingPackage = getMonitoredPackage(p.getName()); 1316 if (existingPackage != null) { 1317 existingPackage.updateHealthCheckDuration(p.mDurationMs); 1318 } else { 1319 putMonitoredPackage(p); 1320 } 1321 } 1322 } 1323 1324 /** 1325 * Reduces the monitoring durations of all packages observed by this observer by 1326 * {@code elapsedMs}. If any duration is less than 0, the package is removed from 1327 * observation. If any health check duration is less than 0, the health check result 1328 * is evaluated. 1329 * 1330 * @return a {@link Set} of packages that were removed from the observer without explicit 1331 * health check passing, or an empty list if no package expired for which an explicit health 1332 * check was still pending 1333 */ 1334 @GuardedBy("mLock") prunePackagesLocked(long elapsedMs)1335 private Set<MonitoredPackage> prunePackagesLocked(long elapsedMs) { 1336 Set<MonitoredPackage> failedPackages = new ArraySet<>(); 1337 Iterator<MonitoredPackage> it = mPackages.values().iterator(); 1338 while (it.hasNext()) { 1339 MonitoredPackage p = it.next(); 1340 int oldState = p.getHealthCheckStateLocked(); 1341 int newState = p.handleElapsedTimeLocked(elapsedMs); 1342 if (oldState != HealthCheckState.FAILED 1343 && newState == HealthCheckState.FAILED) { 1344 Slog.i(TAG, "Package " + p.getName() + " failed health check"); 1345 failedPackages.add(p); 1346 } 1347 if (p.isExpiredLocked()) { 1348 it.remove(); 1349 } 1350 } 1351 return failedPackages; 1352 } 1353 1354 /** 1355 * Increments failure counts of {@code packageName}. 1356 * @returns {@code true} if failure threshold is exceeded, {@code false} otherwise 1357 */ 1358 @GuardedBy("mLock") onPackageFailureLocked(String packageName)1359 public boolean onPackageFailureLocked(String packageName) { 1360 if (getMonitoredPackage(packageName) == null && registeredObserver.isPersistent() 1361 && registeredObserver.mayObservePackage(packageName)) { 1362 putMonitoredPackage(sPackageWatchdog.newMonitoredPackage( 1363 packageName, DEFAULT_OBSERVING_DURATION_MS, false)); 1364 } 1365 MonitoredPackage p = getMonitoredPackage(packageName); 1366 if (p != null) { 1367 return p.onFailureLocked(); 1368 } 1369 return false; 1370 } 1371 1372 /** 1373 * Returns the map of packages monitored by this observer. 1374 * 1375 * @return a mapping of package names to {@link MonitoredPackage} objects. 1376 */ 1377 @GuardedBy("mLock") getMonitoredPackages()1378 public ArrayMap<String, MonitoredPackage> getMonitoredPackages() { 1379 return mPackages; 1380 } 1381 1382 /** 1383 * Returns the {@link MonitoredPackage} associated with a given package name if the 1384 * package is being monitored by this observer. 1385 * 1386 * @param packageName: the name of the package. 1387 * @return the {@link MonitoredPackage} object associated with the package name if one 1388 * exists, {@code null} otherwise. 1389 */ 1390 @GuardedBy("mLock") 1391 @Nullable getMonitoredPackage(String packageName)1392 public MonitoredPackage getMonitoredPackage(String packageName) { 1393 return mPackages.get(packageName); 1394 } 1395 1396 /** 1397 * Associates a {@link MonitoredPackage} with the observer. 1398 * 1399 * @param p: the {@link MonitoredPackage} to store. 1400 */ 1401 @GuardedBy("mLock") putMonitoredPackage(MonitoredPackage p)1402 public void putMonitoredPackage(MonitoredPackage p) { 1403 mPackages.put(p.getName(), p); 1404 } 1405 1406 /** 1407 * Returns one ObserverInternal from the {@code parser} and advances its state. 1408 * 1409 * <p>Note that this method is <b>not</b> thread safe. It should only be called from 1410 * #loadFromFile which in turn is only called on construction of the 1411 * singleton PackageWatchdog. 1412 **/ read(TypedXmlPullParser parser, PackageWatchdog watchdog)1413 public static ObserverInternal read(TypedXmlPullParser parser, PackageWatchdog watchdog) { 1414 String observerName = null; 1415 int observerMitigationCount = 0; 1416 if (TAG_OBSERVER.equals(parser.getName())) { 1417 observerName = parser.getAttributeValue(null, ATTR_NAME); 1418 if (TextUtils.isEmpty(observerName)) { 1419 Slog.wtf(TAG, "Unable to read observer name"); 1420 return null; 1421 } 1422 } 1423 List<MonitoredPackage> packages = new ArrayList<>(); 1424 int innerDepth = parser.getDepth(); 1425 try { 1426 if (Flags.recoverabilityDetection()) { 1427 try { 1428 observerMitigationCount = 1429 parser.getAttributeInt(null, ATTR_MITIGATION_COUNT); 1430 } catch (XmlPullParserException e) { 1431 Slog.i( 1432 TAG, 1433 "ObserverInternal mitigation count was not present."); 1434 } 1435 } 1436 while (XmlUtils.nextElementWithin(parser, innerDepth)) { 1437 if (TAG_PACKAGE.equals(parser.getName())) { 1438 try { 1439 MonitoredPackage pkg = watchdog.parseMonitoredPackage(parser); 1440 if (pkg != null) { 1441 packages.add(pkg); 1442 } 1443 } catch (NumberFormatException e) { 1444 Slog.wtf(TAG, "Skipping package for observer " + observerName, e); 1445 continue; 1446 } 1447 } 1448 } 1449 } catch (XmlPullParserException | IOException e) { 1450 Slog.wtf(TAG, "Unable to read observer " + observerName, e); 1451 return null; 1452 } 1453 if (packages.isEmpty()) { 1454 return null; 1455 } 1456 return new ObserverInternal(observerName, packages, observerMitigationCount); 1457 } 1458 1459 /** Dumps information about this observer and the packages it watches. */ dump(IndentingPrintWriter pw)1460 public void dump(IndentingPrintWriter pw) { 1461 boolean isPersistent = registeredObserver != null && registeredObserver.isPersistent(); 1462 pw.println("Persistent: " + isPersistent); 1463 for (String packageName : mPackages.keySet()) { 1464 MonitoredPackage p = getMonitoredPackage(packageName); 1465 pw.println(packageName + ": "); 1466 pw.increaseIndent(); 1467 pw.println("# Failures: " + p.mFailureHistory.size()); 1468 pw.println("Monitoring duration remaining: " + p.mDurationMs + "ms"); 1469 pw.println("Explicit health check duration: " + p.mHealthCheckDurationMs + "ms"); 1470 pw.println("Health check state: " + p.toString(p.mHealthCheckState)); 1471 pw.decreaseIndent(); 1472 } 1473 } 1474 } 1475 1476 @Retention(SOURCE) 1477 @IntDef(value = { 1478 HealthCheckState.ACTIVE, 1479 HealthCheckState.INACTIVE, 1480 HealthCheckState.PASSED, 1481 HealthCheckState.FAILED}) 1482 public @interface HealthCheckState { 1483 // The package has not passed health check but has requested a health check 1484 int ACTIVE = 0; 1485 // The package has not passed health check and has not requested a health check 1486 int INACTIVE = 1; 1487 // The package has passed health check 1488 int PASSED = 2; 1489 // The package has failed health check 1490 int FAILED = 3; 1491 } 1492 newMonitoredPackage( String name, long durationMs, boolean hasPassedHealthCheck)1493 MonitoredPackage newMonitoredPackage( 1494 String name, long durationMs, boolean hasPassedHealthCheck) { 1495 return newMonitoredPackage(name, durationMs, Long.MAX_VALUE, hasPassedHealthCheck, 1496 new LongArrayQueue()); 1497 } 1498 newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1499 MonitoredPackage newMonitoredPackage(String name, long durationMs, long healthCheckDurationMs, 1500 boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls) { 1501 return new MonitoredPackage(name, durationMs, healthCheckDurationMs, 1502 hasPassedHealthCheck, mitigationCalls); 1503 } 1504 parseMonitoredPackage(TypedXmlPullParser parser)1505 MonitoredPackage parseMonitoredPackage(TypedXmlPullParser parser) 1506 throws XmlPullParserException { 1507 String packageName = parser.getAttributeValue(null, ATTR_NAME); 1508 long duration = parser.getAttributeLong(null, ATTR_DURATION); 1509 long healthCheckDuration = parser.getAttributeLong(null, 1510 ATTR_EXPLICIT_HEALTH_CHECK_DURATION); 1511 boolean hasPassedHealthCheck = parser.getAttributeBoolean(null, ATTR_PASSED_HEALTH_CHECK); 1512 LongArrayQueue mitigationCalls = parseLongArrayQueue( 1513 parser.getAttributeValue(null, ATTR_MITIGATION_CALLS)); 1514 return newMonitoredPackage(packageName, 1515 duration, healthCheckDuration, hasPassedHealthCheck, mitigationCalls); 1516 } 1517 1518 /** 1519 * Represents a package and its health check state along with the time 1520 * it should be monitored for. 1521 * 1522 * <p> Note, the PackageWatchdog#mLock must always be held when reading or writing 1523 * instances of this class. 1524 */ 1525 class MonitoredPackage { 1526 private final String mPackageName; 1527 // Times when package failures happen sorted in ascending order 1528 @GuardedBy("mLock") 1529 private final LongArrayQueue mFailureHistory = new LongArrayQueue(); 1530 // Times when an observer was called to mitigate this package's failure. Sorted in 1531 // ascending order. 1532 @GuardedBy("mLock") 1533 private final LongArrayQueue mMitigationCalls; 1534 // One of STATE_[ACTIVE|INACTIVE|PASSED|FAILED]. Updated on construction and after 1535 // methods that could change the health check state: handleElapsedTimeLocked and 1536 // tryPassHealthCheckLocked 1537 private int mHealthCheckState = HealthCheckState.INACTIVE; 1538 // Whether an explicit health check has passed. 1539 // This value in addition with mHealthCheckDurationMs determines the health check state 1540 // of the package, see #getHealthCheckStateLocked 1541 @GuardedBy("mLock") 1542 private boolean mHasPassedHealthCheck; 1543 // System uptime duration to monitor package. 1544 @GuardedBy("mLock") 1545 private long mDurationMs; 1546 // System uptime duration to check the result of an explicit health check 1547 // Initially, MAX_VALUE until we get a value from the health check service 1548 // and request health checks. 1549 // This value in addition with mHasPassedHealthCheck determines the health check state 1550 // of the package, see #getHealthCheckStateLocked 1551 @GuardedBy("mLock") 1552 private long mHealthCheckDurationMs = Long.MAX_VALUE; 1553 MonitoredPackage(String packageName, long durationMs, long healthCheckDurationMs, boolean hasPassedHealthCheck, LongArrayQueue mitigationCalls)1554 MonitoredPackage(String packageName, long durationMs, 1555 long healthCheckDurationMs, boolean hasPassedHealthCheck, 1556 LongArrayQueue mitigationCalls) { 1557 mPackageName = packageName; 1558 mDurationMs = durationMs; 1559 mHealthCheckDurationMs = healthCheckDurationMs; 1560 mHasPassedHealthCheck = hasPassedHealthCheck; 1561 mMitigationCalls = mitigationCalls; 1562 updateHealthCheckStateLocked(); 1563 } 1564 1565 /** Writes the salient fields to disk using {@code out}. */ 1566 @GuardedBy("mLock") writeLocked(TypedXmlSerializer out)1567 public void writeLocked(TypedXmlSerializer out) throws IOException { 1568 out.startTag(null, TAG_PACKAGE); 1569 out.attribute(null, ATTR_NAME, getName()); 1570 out.attributeLong(null, ATTR_DURATION, mDurationMs); 1571 out.attributeLong(null, ATTR_EXPLICIT_HEALTH_CHECK_DURATION, mHealthCheckDurationMs); 1572 out.attributeBoolean(null, ATTR_PASSED_HEALTH_CHECK, mHasPassedHealthCheck); 1573 LongArrayQueue normalizedCalls = normalizeMitigationCalls(); 1574 out.attribute(null, ATTR_MITIGATION_CALLS, longArrayQueueToString(normalizedCalls)); 1575 out.endTag(null, TAG_PACKAGE); 1576 } 1577 1578 /** 1579 * Increment package failures or resets failure count depending on the last package failure. 1580 * 1581 * @return {@code true} if failure count exceeds a threshold, {@code false} otherwise 1582 */ 1583 @GuardedBy("mLock") onFailureLocked()1584 public boolean onFailureLocked() { 1585 // Sliding window algorithm: find out if there exists a window containing failures >= 1586 // mTriggerFailureCount. 1587 final long now = mSystemClock.uptimeMillis(); 1588 mFailureHistory.addLast(now); 1589 while (now - mFailureHistory.peekFirst() > mTriggerFailureDurationMs) { 1590 // Prune values falling out of the window 1591 mFailureHistory.removeFirst(); 1592 } 1593 boolean failed = mFailureHistory.size() >= mTriggerFailureCount; 1594 if (failed) { 1595 mFailureHistory.clear(); 1596 } 1597 return failed; 1598 } 1599 1600 /** 1601 * Notes the timestamp of a mitigation call into the observer. 1602 */ 1603 @GuardedBy("mLock") noteMitigationCallLocked()1604 public void noteMitigationCallLocked() { 1605 mMitigationCalls.addLast(mSystemClock.uptimeMillis()); 1606 } 1607 1608 /** 1609 * Prunes any mitigation calls outside of the de-escalation window, and returns the 1610 * number of calls that are in the window afterwards. 1611 * 1612 * @return the number of mitigation calls made in the de-escalation window. 1613 */ 1614 @GuardedBy("mLock") getMitigationCountLocked()1615 public int getMitigationCountLocked() { 1616 try { 1617 final long now = mSystemClock.uptimeMillis(); 1618 while (now - mMitigationCalls.peekFirst() > DEFAULT_DEESCALATION_WINDOW_MS) { 1619 mMitigationCalls.removeFirst(); 1620 } 1621 } catch (NoSuchElementException ignore) { 1622 } 1623 1624 return mMitigationCalls.size(); 1625 } 1626 1627 /** 1628 * Before writing to disk, make the mitigation call timestamps relative to the current 1629 * system uptime. This is because they need to be relative to the uptime which will reset 1630 * at the next boot. 1631 * 1632 * @return a LongArrayQueue of the mitigation calls relative to the current system uptime. 1633 */ 1634 @GuardedBy("mLock") normalizeMitigationCalls()1635 public LongArrayQueue normalizeMitigationCalls() { 1636 LongArrayQueue normalized = new LongArrayQueue(); 1637 final long now = mSystemClock.uptimeMillis(); 1638 for (int i = 0; i < mMitigationCalls.size(); i++) { 1639 normalized.addLast(mMitigationCalls.get(i) - now); 1640 } 1641 return normalized; 1642 } 1643 1644 /** 1645 * Sets the initial health check duration. 1646 * 1647 * @return the new health check state 1648 */ 1649 @GuardedBy("mLock") setHealthCheckActiveLocked(long initialHealthCheckDurationMs)1650 public int setHealthCheckActiveLocked(long initialHealthCheckDurationMs) { 1651 if (initialHealthCheckDurationMs <= 0) { 1652 Slog.wtf(TAG, "Cannot set non-positive health check duration " 1653 + initialHealthCheckDurationMs + "ms for package " + getName() 1654 + ". Using total duration " + mDurationMs + "ms instead"); 1655 initialHealthCheckDurationMs = mDurationMs; 1656 } 1657 if (mHealthCheckState == HealthCheckState.INACTIVE) { 1658 // Transitions to ACTIVE 1659 mHealthCheckDurationMs = initialHealthCheckDurationMs; 1660 } 1661 return updateHealthCheckStateLocked(); 1662 } 1663 1664 /** 1665 * Updates the monitoring durations of the package. 1666 * 1667 * @return the new health check state 1668 */ 1669 @GuardedBy("mLock") handleElapsedTimeLocked(long elapsedMs)1670 public int handleElapsedTimeLocked(long elapsedMs) { 1671 if (elapsedMs <= 0) { 1672 Slog.w(TAG, "Cannot handle non-positive elapsed time for package " + getName()); 1673 return mHealthCheckState; 1674 } 1675 // Transitions to FAILED if now <= 0 and health check not passed 1676 mDurationMs -= elapsedMs; 1677 if (mHealthCheckState == HealthCheckState.ACTIVE) { 1678 // We only update health check durations if we have #setHealthCheckActiveLocked 1679 // This ensures we don't leave the INACTIVE state for an unexpected elapsed time 1680 // Transitions to FAILED if now <= 0 and health check not passed 1681 mHealthCheckDurationMs -= elapsedMs; 1682 } 1683 return updateHealthCheckStateLocked(); 1684 } 1685 1686 /** Explicitly update the monitoring duration of the package. */ 1687 @GuardedBy("mLock") updateHealthCheckDuration(long newDurationMs)1688 public void updateHealthCheckDuration(long newDurationMs) { 1689 mDurationMs = newDurationMs; 1690 } 1691 1692 /** 1693 * Marks the health check as passed and transitions to {@link HealthCheckState.PASSED} 1694 * if not yet {@link HealthCheckState.FAILED}. 1695 * 1696 * @return the new {@link HealthCheckState health check state} 1697 */ 1698 @GuardedBy("mLock") 1699 @HealthCheckState tryPassHealthCheckLocked()1700 public int tryPassHealthCheckLocked() { 1701 if (mHealthCheckState != HealthCheckState.FAILED) { 1702 // FAILED is a final state so only pass if we haven't failed 1703 // Transition to PASSED 1704 mHasPassedHealthCheck = true; 1705 } 1706 return updateHealthCheckStateLocked(); 1707 } 1708 1709 /** Returns the monitored package name. */ getName()1710 private String getName() { 1711 return mPackageName; 1712 } 1713 1714 /** 1715 * Returns the current {@link HealthCheckState health check state}. 1716 */ 1717 @GuardedBy("mLock") 1718 @HealthCheckState getHealthCheckStateLocked()1719 public int getHealthCheckStateLocked() { 1720 return mHealthCheckState; 1721 } 1722 1723 /** 1724 * Returns the shortest duration before the package should be scheduled for a prune. 1725 * 1726 * @return the duration or {@link Long#MAX_VALUE} if the package should not be scheduled 1727 */ 1728 @GuardedBy("mLock") getShortestScheduleDurationMsLocked()1729 public long getShortestScheduleDurationMsLocked() { 1730 // Consider health check duration only if #isPendingHealthChecksLocked is true 1731 return Math.min(toPositive(mDurationMs), 1732 isPendingHealthChecksLocked() 1733 ? toPositive(mHealthCheckDurationMs) : Long.MAX_VALUE); 1734 } 1735 1736 /** 1737 * Returns {@code true} if the total duration left to monitor the package is less than or 1738 * equal to 0 {@code false} otherwise. 1739 */ 1740 @GuardedBy("mLock") isExpiredLocked()1741 public boolean isExpiredLocked() { 1742 return mDurationMs <= 0; 1743 } 1744 1745 /** 1746 * Returns {@code true} if the package, {@link #getName} is expecting health check results 1747 * {@code false} otherwise. 1748 */ 1749 @GuardedBy("mLock") isPendingHealthChecksLocked()1750 public boolean isPendingHealthChecksLocked() { 1751 return mHealthCheckState == HealthCheckState.ACTIVE 1752 || mHealthCheckState == HealthCheckState.INACTIVE; 1753 } 1754 1755 /** 1756 * Updates the health check state based on {@link #mHasPassedHealthCheck} 1757 * and {@link #mHealthCheckDurationMs}. 1758 * 1759 * @return the new {@link HealthCheckState health check state} 1760 */ 1761 @GuardedBy("mLock") 1762 @HealthCheckState updateHealthCheckStateLocked()1763 private int updateHealthCheckStateLocked() { 1764 int oldState = mHealthCheckState; 1765 if (mHasPassedHealthCheck) { 1766 // Set final state first to avoid ambiguity 1767 mHealthCheckState = HealthCheckState.PASSED; 1768 } else if (mHealthCheckDurationMs <= 0 || mDurationMs <= 0) { 1769 // Set final state first to avoid ambiguity 1770 mHealthCheckState = HealthCheckState.FAILED; 1771 } else if (mHealthCheckDurationMs == Long.MAX_VALUE) { 1772 mHealthCheckState = HealthCheckState.INACTIVE; 1773 } else { 1774 mHealthCheckState = HealthCheckState.ACTIVE; 1775 } 1776 1777 if (oldState != mHealthCheckState) { 1778 Slog.i(TAG, "Updated health check state for package " + getName() + ": " 1779 + toString(oldState) + " -> " + toString(mHealthCheckState)); 1780 } 1781 return mHealthCheckState; 1782 } 1783 1784 /** Returns a {@link String} representation of the current health check state. */ toString(@ealthCheckState int state)1785 private String toString(@HealthCheckState int state) { 1786 switch (state) { 1787 case HealthCheckState.ACTIVE: 1788 return "ACTIVE"; 1789 case HealthCheckState.INACTIVE: 1790 return "INACTIVE"; 1791 case HealthCheckState.PASSED: 1792 return "PASSED"; 1793 case HealthCheckState.FAILED: 1794 return "FAILED"; 1795 default: 1796 return "UNKNOWN"; 1797 } 1798 } 1799 1800 /** Returns {@code value} if it is greater than 0 or {@link Long#MAX_VALUE} otherwise. */ toPositive(long value)1801 private long toPositive(long value) { 1802 return value > 0 ? value : Long.MAX_VALUE; 1803 } 1804 1805 /** Compares the equality of this object with another {@link MonitoredPackage}. */ 1806 @VisibleForTesting isEqualTo(MonitoredPackage pkg)1807 boolean isEqualTo(MonitoredPackage pkg) { 1808 return (getName().equals(pkg.getName())) 1809 && mDurationMs == pkg.mDurationMs 1810 && mHasPassedHealthCheck == pkg.mHasPassedHealthCheck 1811 && mHealthCheckDurationMs == pkg.mHealthCheckDurationMs 1812 && (mMitigationCalls.toString()).equals(pkg.mMitigationCalls.toString()); 1813 } 1814 } 1815 1816 @GuardedBy("mLock") 1817 @SuppressWarnings("GuardedBy") saveAllObserversBootMitigationCountToMetadata(String filePath)1818 void saveAllObserversBootMitigationCountToMetadata(String filePath) { 1819 HashMap<String, Integer> bootMitigationCounts = new HashMap<>(); 1820 for (int i = 0; i < mAllObservers.size(); i++) { 1821 final ObserverInternal observer = mAllObservers.valueAt(i); 1822 bootMitigationCounts.put(observer.name, observer.getBootMitigationCount()); 1823 } 1824 1825 try { 1826 FileOutputStream fileStream = new FileOutputStream(new File(filePath)); 1827 ObjectOutputStream objectStream = new ObjectOutputStream(fileStream); 1828 objectStream.writeObject(bootMitigationCounts); 1829 objectStream.flush(); 1830 objectStream.close(); 1831 fileStream.close(); 1832 } catch (Exception e) { 1833 Slog.i(TAG, "Could not save observers metadata to file: " + e); 1834 } 1835 } 1836 1837 /** 1838 * Handles the thresholding logic for system server boots. 1839 */ 1840 class BootThreshold { 1841 1842 private final int mBootTriggerCount; 1843 private final long mTriggerWindow; 1844 BootThreshold(int bootTriggerCount, long triggerWindow)1845 BootThreshold(int bootTriggerCount, long triggerWindow) { 1846 this.mBootTriggerCount = bootTriggerCount; 1847 this.mTriggerWindow = triggerWindow; 1848 } 1849 reset()1850 public void reset() { 1851 setStart(0); 1852 setCount(0); 1853 } 1854 getCount()1855 protected int getCount() { 1856 return CrashRecoveryProperties.rescueBootCount().orElse(0); 1857 } 1858 setCount(int count)1859 protected void setCount(int count) { 1860 CrashRecoveryProperties.rescueBootCount(count); 1861 } 1862 getStart()1863 public long getStart() { 1864 return CrashRecoveryProperties.rescueBootStart().orElse(0L); 1865 } 1866 getMitigationCount()1867 public int getMitigationCount() { 1868 return CrashRecoveryProperties.bootMitigationCount().orElse(0); 1869 } 1870 setStart(long start)1871 public void setStart(long start) { 1872 CrashRecoveryProperties.rescueBootStart(getStartTime(start)); 1873 } 1874 setMitigationStart(long start)1875 public void setMitigationStart(long start) { 1876 CrashRecoveryProperties.bootMitigationStart(getStartTime(start)); 1877 } 1878 getMitigationStart()1879 public long getMitigationStart() { 1880 return CrashRecoveryProperties.bootMitigationStart().orElse(0L); 1881 } 1882 setMitigationCount(int count)1883 public void setMitigationCount(int count) { 1884 CrashRecoveryProperties.bootMitigationCount(count); 1885 } 1886 constrain(long amount, long low, long high)1887 private static long constrain(long amount, long low, long high) { 1888 return amount < low ? low : (amount > high ? high : amount); 1889 } 1890 getStartTime(long start)1891 public long getStartTime(long start) { 1892 final long now = mSystemClock.uptimeMillis(); 1893 return constrain(start, 0, now); 1894 } 1895 saveMitigationCountToMetadata()1896 public void saveMitigationCountToMetadata() { 1897 try (BufferedWriter writer = new BufferedWriter(new FileWriter(METADATA_FILE))) { 1898 writer.write(String.valueOf(getMitigationCount())); 1899 } catch (Exception e) { 1900 Slog.e(TAG, "Could not save metadata to file: " + e); 1901 } 1902 } 1903 readMitigationCountFromMetadataIfNecessary()1904 public void readMitigationCountFromMetadataIfNecessary() { 1905 File bootPropsFile = new File(METADATA_FILE); 1906 if (bootPropsFile.exists()) { 1907 try (BufferedReader reader = new BufferedReader(new FileReader(METADATA_FILE))) { 1908 String mitigationCount = reader.readLine(); 1909 setMitigationCount(Integer.parseInt(mitigationCount)); 1910 bootPropsFile.delete(); 1911 } catch (Exception e) { 1912 Slog.i(TAG, "Could not read metadata file: " + e); 1913 } 1914 } 1915 } 1916 1917 1918 /** Increments the boot counter, and returns whether the device is bootlooping. */ 1919 @GuardedBy("mLock") incrementAndTest()1920 public boolean incrementAndTest() { 1921 if (Flags.recoverabilityDetection()) { 1922 readAllObserversBootMitigationCountIfNecessary(METADATA_FILE); 1923 } else { 1924 readMitigationCountFromMetadataIfNecessary(); 1925 } 1926 1927 final long now = mSystemClock.uptimeMillis(); 1928 if (now - getStart() < 0) { 1929 Slog.e(TAG, "Window was less than zero. Resetting start to current time."); 1930 setStart(now); 1931 setMitigationStart(now); 1932 } 1933 if (now - getMitigationStart() > DEFAULT_DEESCALATION_WINDOW_MS) { 1934 setMitigationStart(now); 1935 if (Flags.recoverabilityDetection()) { 1936 resetAllObserversBootMitigationCount(); 1937 } else { 1938 setMitigationCount(0); 1939 } 1940 } 1941 final long window = now - getStart(); 1942 if (window >= mTriggerWindow) { 1943 setCount(1); 1944 setStart(now); 1945 return false; 1946 } else { 1947 int count = getCount() + 1; 1948 setCount(count); 1949 EventLogTags.writeRescueNote(Process.ROOT_UID, count, window); 1950 if (Flags.recoverabilityDetection()) { 1951 // After a reboot (e.g. by WARM_REBOOT or mainline rollback) we apply 1952 // mitigations without waiting for DEFAULT_BOOT_LOOP_TRIGGER_COUNT. 1953 return (count >= mBootTriggerCount) 1954 || (performedMitigationsDuringWindow() && count > 1); 1955 } 1956 return count >= mBootTriggerCount; 1957 } 1958 } 1959 1960 @GuardedBy("mLock") performedMitigationsDuringWindow()1961 private boolean performedMitigationsDuringWindow() { 1962 for (ObserverInternal observerInternal: mAllObservers.values()) { 1963 if (observerInternal.getBootMitigationCount() > 0) { 1964 return true; 1965 } 1966 } 1967 return false; 1968 } 1969 1970 @GuardedBy("mLock") resetAllObserversBootMitigationCount()1971 private void resetAllObserversBootMitigationCount() { 1972 for (int i = 0; i < mAllObservers.size(); i++) { 1973 final ObserverInternal observer = mAllObservers.valueAt(i); 1974 observer.setBootMitigationCount(0); 1975 } 1976 saveAllObserversBootMitigationCountToMetadata(METADATA_FILE); 1977 } 1978 1979 @GuardedBy("mLock") 1980 @SuppressWarnings("GuardedBy") readAllObserversBootMitigationCountIfNecessary(String filePath)1981 void readAllObserversBootMitigationCountIfNecessary(String filePath) { 1982 File metadataFile = new File(filePath); 1983 if (metadataFile.exists()) { 1984 try { 1985 FileInputStream fileStream = new FileInputStream(metadataFile); 1986 ObjectInputStream objectStream = new ObjectInputStream(fileStream); 1987 HashMap<String, Integer> bootMitigationCounts = 1988 (HashMap<String, Integer>) objectStream.readObject(); 1989 objectStream.close(); 1990 fileStream.close(); 1991 1992 for (int i = 0; i < mAllObservers.size(); i++) { 1993 final ObserverInternal observer = mAllObservers.valueAt(i); 1994 if (bootMitigationCounts.containsKey(observer.name)) { 1995 observer.setBootMitigationCount( 1996 bootMitigationCounts.get(observer.name)); 1997 } 1998 } 1999 } catch (Exception e) { 2000 Slog.i(TAG, "Could not read observer metadata file: " + e); 2001 } 2002 } 2003 } 2004 2005 } 2006 } 2007