1 /* 2 * Copyright (c) 2020, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 18 #define CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 19 20 #include "LooperWrapper.h" 21 #include "ProcDiskStatsCollector.h" 22 #include "ProcStatCollector.h" 23 #include "UidStatsCollector.h" 24 #include "WatchdogServiceHelper.h" 25 26 #include <WatchdogProperties.sysprop.h> 27 #include <aidl/android/automotive/watchdog/internal/PackageIoOveruseStats.h> 28 #include <aidl/android/automotive/watchdog/internal/ResourceStats.h> 29 #include <aidl/android/automotive/watchdog/internal/UserState.h> 30 #include <android-base/chrono_utils.h> 31 #include <android-base/result.h> 32 #include <android/util/ProtoOutputStream.h> 33 #include <cutils/multiuser.h> 34 #include <gtest/gtest_prod.h> 35 #include <utils/Errors.h> 36 #include <utils/Looper.h> 37 #include <utils/Mutex.h> 38 #include <utils/RefBase.h> 39 #include <utils/String16.h> 40 #include <utils/StrongPointer.h> 41 #include <utils/Vector.h> 42 43 #include <time.h> 44 45 #include <string> 46 #include <thread> // NOLINT(build/c++11) 47 #include <unordered_set> 48 49 namespace android { 50 namespace automotive { 51 namespace watchdog { 52 53 // Forward declaration for testing use only. 54 namespace internal { 55 56 class WatchdogPerfServicePeer; 57 58 } // namespace internal 59 60 constexpr std::chrono::seconds kDefaultPostSystemEventDurationSec = 30s; 61 constexpr std::chrono::seconds kDefaultWakeUpEventDurationSec = 30s; 62 constexpr std::chrono::seconds kDefaultUserSwitchTimeoutSec = 30s; 63 constexpr std::chrono::nanoseconds kPrevUnsentResourceStatsMaxDurationNs = 10min; 64 constexpr const char* kStartCustomCollectionFlag = "--start_perf"; 65 constexpr const char* kEndCustomCollectionFlag = "--stop_perf"; 66 constexpr const char* kIntervalFlag = "--interval"; 67 constexpr const char* kMaxDurationFlag = "--max_duration"; 68 constexpr const char* kFilterPackagesFlag = "--filter_packages"; 69 70 enum SystemState { 71 NORMAL_MODE = 0, 72 GARAGE_MODE = 1, 73 }; 74 75 using time_point_millis = 76 std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>; 77 78 /** 79 * DataProcessor defines methods that must be implemented in order to process the data collected 80 * by |WatchdogPerfService|. 81 */ 82 class DataProcessorInterface : virtual public android::RefBase { 83 public: 84 struct CollectionIntervals { 85 std::chrono::milliseconds mBoottimeIntervalMillis = std::chrono::milliseconds(0); 86 std::chrono::milliseconds mPeriodicIntervalMillis = std::chrono::milliseconds(0); 87 std::chrono::milliseconds mUserSwitchIntervalMillis = std::chrono::milliseconds(0); 88 std::chrono::milliseconds mWakeUpIntervalMillis = std::chrono::milliseconds(0); 89 std::chrono::milliseconds mCustomIntervalMillis = std::chrono::milliseconds(0); 90 }; DataProcessorInterface()91 DataProcessorInterface() {} ~DataProcessorInterface()92 virtual ~DataProcessorInterface() {} 93 // Returns the name of the data processor. 94 virtual std::string name() const = 0; 95 // Callback to initialize the data processor. 96 virtual android::base::Result<void> init() = 0; 97 // Callback to terminate the data processor. 98 virtual void terminate() = 0; 99 // Callback to perform actions (such as clearing stats from previous system startup events) 100 // before starting boot-time or wake-up collections. 101 virtual android::base::Result<void> onSystemStartup() = 0; 102 // Callback to perform actions once CarWatchdogService is registered. 103 virtual void onCarWatchdogServiceRegistered() = 0; 104 // Callback to process the data collected during boot-time. 105 virtual android::base::Result<void> onBoottimeCollection( 106 time_point_millis time, 107 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 108 const android::wp<ProcStatCollectorInterface>& procStatCollector, 109 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 110 // Callback to process the data collected during a wake-up event. 111 virtual android::base::Result<void> onWakeUpCollection( 112 time_point_millis time, 113 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 114 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 115 // Callback to process the data collected periodically post boot complete. 116 virtual android::base::Result<void> onPeriodicCollection( 117 time_point_millis time, SystemState systemState, 118 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 119 const android::wp<ProcStatCollectorInterface>& procStatCollector, 120 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 121 // Callback to process the data collected during user switch. 122 virtual android::base::Result<void> onUserSwitchCollection( 123 time_point_millis time, userid_t from, userid_t to, 124 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 125 const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0; 126 127 /** 128 * Callback to process the data collected on custom collection and filter the results only to 129 * the specified |filterPackages|. 130 */ 131 virtual android::base::Result<void> onCustomCollection( 132 time_point_millis time, SystemState systemState, 133 const std::unordered_set<std::string>& filterPackages, 134 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 135 const android::wp<ProcStatCollectorInterface>& procStatCollector, 136 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0; 137 /** 138 * Callback to periodically monitor the collected data and trigger the given |alertHandler| 139 * on detecting resource overuse. 140 */ 141 virtual android::base::Result<void> onPeriodicMonitor( 142 time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector, 143 const std::function<void()>& alertHandler) = 0; 144 // Callback to dump system event data and periodically collected data. 145 virtual android::base::Result<void> onDump(int fd) const = 0; 146 // Callback to dump system event data and periodically collected data in proto format. 147 virtual android::base::Result<void> onDumpProto( 148 const CollectionIntervals& collectionIntervals, 149 android::util::ProtoOutputStream& outProto) const = 0; 150 /** 151 * Callback to dump the custom collected data. When fd == -1, clear the custom collection cache. 152 */ 153 virtual android::base::Result<void> onCustomCollectionDump(int fd) = 0; 154 }; 155 156 enum EventType { 157 // WatchdogPerfService's state. 158 INIT = 0, 159 TERMINATED, 160 161 // Collection events. 162 BOOT_TIME_COLLECTION, 163 PERIODIC_COLLECTION, 164 USER_SWITCH_COLLECTION, 165 WAKE_UP_COLLECTION, 166 CUSTOM_COLLECTION, 167 168 // Monitor event. 169 PERIODIC_MONITOR, 170 171 LAST_EVENT, 172 }; 173 174 enum SwitchMessage { 175 /** 176 * On receiving this message, collect the last boot-time record and start periodic collection 177 * and monitor. 178 */ 179 END_BOOTTIME_COLLECTION = EventType::LAST_EVENT + 1, 180 181 /** 182 * On receiving this message, collect the last user switch record and start periodic collection 183 * and monitor. 184 */ 185 END_USER_SWITCH_COLLECTION, 186 187 /** 188 * On receiving this message, collect the last wake up record and start periodic collection and 189 * monitor. 190 */ 191 END_WAKE_UP_COLLECTION, 192 193 /** 194 * On receiving this message, ends custom collection, discard collected data and start periodic 195 * collection and monitor. 196 */ 197 END_CUSTOM_COLLECTION, 198 199 LAST_SWITCH_MSG, 200 }; 201 202 enum TaskMessage { 203 // On receiving this message, send the cached resource stats to CarWatchdogService. 204 SEND_RESOURCE_STATS = SwitchMessage::LAST_SWITCH_MSG + 1, 205 }; 206 207 /** 208 * WatchdogPerfServiceInterface collects performance data during boot-time, user switch, system wake 209 * up and periodically post system events. It exposes APIs that the main thread and binder service 210 * can call to start a collection, switch the collection type, and generate collection dumps. 211 */ 212 class WatchdogPerfServiceInterface : virtual public MessageHandler { 213 public: 214 // Register a data processor to process the data collected by |WatchdogPerfService|. 215 virtual android::base::Result<void> registerDataProcessor( 216 android::sp<DataProcessorInterface> processor) = 0; 217 /** 218 * Starts the boot-time collection in the looper handler on a new thread and returns 219 * immediately. Must be called only once. Otherwise, returns an error. 220 */ 221 virtual android::base::Result<void> start() = 0; 222 // Terminates the collection thread and returns. 223 virtual void terminate() = 0; 224 // Sets the system state. 225 virtual void setSystemState(SystemState systemState) = 0; 226 // Handles unsent resource stats. 227 virtual void onCarWatchdogServiceRegistered() = 0; 228 // Ends the boot-time collection by switching to periodic collection after the post event 229 // duration. 230 virtual android::base::Result<void> onBootFinished() = 0; 231 // Starts and ends the user switch collection depending on the user states received. 232 virtual android::base::Result<void> onUserStateChange( 233 userid_t userId, 234 const aidl::android::automotive::watchdog::internal::UserState& userState) = 0; 235 // Starts wake-up collection. Any running collection is stopped, except for custom collections. 236 virtual android::base::Result<void> onSuspendExit() = 0; 237 // Called on shutdown enter, suspend enter and hibernation enter. 238 virtual android::base::Result<void> onShutdownEnter() = 0; 239 240 /** 241 * Depending on the arguments, it either: 242 * 1. Starts a custom collection. 243 * 2. Or ends the current custom collection and dumps the collected data. 244 * Returns any error observed during the dump generation. 245 */ 246 virtual android::base::Result<void> onCustomCollection(int fd, const char** args, 247 uint32_t numArgs) = 0; 248 // Generates a dump from the system events and periodic collection events. 249 virtual android::base::Result<void> onDump(int fd) const = 0; 250 // Generates a proto dump from system events and periodic collection events. 251 virtual android::base::Result<void> onDumpProto( 252 android::util::ProtoOutputStream& outProto) const = 0; 253 // Dumps the help text. 254 virtual bool dumpHelpText(int fd) const = 0; 255 }; 256 257 class WatchdogPerfService final : public WatchdogPerfServiceInterface { 258 public: WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper,const std::function<int64_t ()> & getElapsedTimeSinceBootMsFunc)259 WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper, 260 const std::function<int64_t()>& getElapsedTimeSinceBootMsFunc) : 261 kGetElapsedTimeSinceBootMillisFunc(std::move(getElapsedTimeSinceBootMsFunc)), 262 mPostSystemEventDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 263 std::chrono::seconds(sysprop::postSystemEventDuration().value_or( 264 kDefaultPostSystemEventDurationSec.count())))), 265 mWakeUpDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 266 std::chrono::seconds(sysprop::wakeUpEventDuration().value_or( 267 kDefaultWakeUpEventDurationSec.count())))), 268 mUserSwitchTimeoutNs(std::chrono::duration_cast<std::chrono::nanoseconds>( 269 std::chrono::seconds(sysprop::userSwitchTimeout().value_or( 270 kDefaultUserSwitchTimeoutSec.count())))), 271 mHandlerLooper(android::sp<LooperWrapper>::make()), 272 mSystemState(NORMAL_MODE), 273 mBoottimeCollection({}), 274 mPeriodicCollection({}), 275 mUserSwitchCollection({}), 276 mCustomCollection({}), 277 mPeriodicMonitor({}), 278 mUnsentResourceStats({}), 279 mLastCollectionTimeMillis(0), 280 mCurrCollectionEvent(EventType::INIT), 281 mUidStatsCollector(android::sp<UidStatsCollector>::make()), 282 mProcStatCollector(android::sp<ProcStatCollector>::make()), 283 mProcDiskStatsCollector(android::sp<ProcDiskStatsCollector>::make()), 284 mDataProcessors({}), 285 mWatchdogServiceHelper(watchdogServiceHelper) {} 286 287 android::base::Result<void> registerDataProcessor( 288 android::sp<DataProcessorInterface> processor) override; 289 290 android::base::Result<void> start() override; 291 292 void terminate() override; 293 294 void setSystemState(SystemState systemState) override; 295 296 void onCarWatchdogServiceRegistered() override; 297 298 android::base::Result<void> onBootFinished() override; 299 300 android::base::Result<void> onUserStateChange( 301 userid_t userId, 302 const aidl::android::automotive::watchdog::internal::UserState& userState) override; 303 304 android::base::Result<void> onSuspendExit() override; 305 306 android::base::Result<void> onShutdownEnter() override; 307 308 android::base::Result<void> onCustomCollection(int fd, const char** args, 309 uint32_t numArgs) override; 310 311 android::base::Result<void> onDump(int fd) const override; 312 android::base::Result<void> onDumpProto( 313 android::util::ProtoOutputStream& outProto) const override; 314 315 bool dumpHelpText(int fd) const override; 316 317 private: 318 struct EventMetadata { 319 // Collection or monitor event. 320 EventType eventType = EventType::LAST_EVENT; 321 // Interval between subsequent events. 322 std::chrono::nanoseconds pollingIntervalNs = 0ns; 323 // Used to calculate the uptime for next event. 324 nsecs_t lastPollUptimeNs = 0; 325 // Filter the results only to the specified packages. 326 std::unordered_set<std::string> filterPackages; 327 328 std::string toString() const; 329 }; 330 331 struct UserSwitchEventMetadata : WatchdogPerfService::EventMetadata { 332 // User id of user being switched from. 333 userid_t from = 0; 334 // User id of user being switched to. 335 userid_t to = 0; 336 }; 337 338 // Dumps the collectors' status when they are disabled. 339 android::base::Result<void> dumpCollectorsStatusLocked(int fd) const; 340 341 /** 342 * Starts a custom collection on the looper handler, temporarily stops the periodic collection 343 * (won't discard the collected data), and returns immediately. Returns any error observed 344 * during this process. 345 * The custom collection happens once every |interval| seconds. When the |maxDuration| is 346 * reached, the looper receives a message to end the collection, discards the collected data, 347 * and starts the periodic collection. This is needed to ensure the custom collection doesn't 348 * run forever when a subsequent |endCustomCollection| call is not received. 349 * When |kFilterPackagesFlag| value specified, the results are filtered only to the specified 350 * package names. 351 */ 352 android::base::Result<void> startCustomCollection( 353 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration, 354 const std::unordered_set<std::string>& filterPackages); 355 356 /** 357 * Ends the current custom collection, generates a dump, sends a looper message to start the 358 * periodic collection, and returns immediately. Returns an error when there is no custom 359 * collection running or when a dump couldn't be generated from the custom collection. 360 */ 361 android::base::Result<void> endCustomCollection(int fd); 362 363 // Start a user switch collection. 364 android::base::Result<void> startUserSwitchCollection(); 365 366 // Switch to periodic collection and periodic monitor. 367 void switchToPeriodicLocked(bool startNow); 368 369 // Handles the messages received by the lopper. 370 void handleMessage(const Message& message) override; 371 372 // Processes the collection events received by |handleMessage|. 373 android::base::Result<void> processCollectionEvent(EventMetadata* metadata); 374 375 // Collects/processes the performance data for the current collection event. 376 android::base::Result<void> collectLocked(EventMetadata* metadata); 377 378 // Processes the monitor events received by |handleMessage|. 379 android::base::Result<void> processMonitorEvent(EventMetadata* metadata); 380 381 // Sends the unsent resource stats. 382 android::base::Result<void> sendResourceStats(); 383 384 // Notifies all registered data processors that either boot-time or wake-up collection will 385 // start. Individual implementations of data processors may clear stats collected during 386 // previous system startup events. 387 android::base::Result<void> notifySystemStartUpLocked(); 388 389 // Caches resource stats that have not been sent to CarWatchdogService. 390 void cacheUnsentResourceStatsLocked( 391 aidl::android::automotive::watchdog::internal::ResourceStats resourceStats); 392 393 /** 394 * Returns the metadata for the current collection based on |mCurrCollectionEvent|. Returns 395 * nullptr on invalid collection event. 396 */ 397 EventMetadata* getCurrentCollectionMetadataLocked(); 398 399 std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc; 400 401 // Duration to extend a system event collection after the final signal is received. 402 std::chrono::nanoseconds mPostSystemEventDurationNs; 403 404 // Duration of the wake-up collection event. 405 std::chrono::nanoseconds mWakeUpDurationNs; 406 407 // Timeout duration for user switch collection in case final signal isn't received. 408 std::chrono::nanoseconds mUserSwitchTimeoutNs; 409 410 // Thread on which the actual collection happens. 411 std::thread mCollectionThread; 412 413 // Makes sure only one collection is running at any given time. 414 mutable Mutex mMutex; 415 416 // Handler looper to execute different collection events on the collection thread. 417 android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex); 418 419 // Current system state. 420 SystemState mSystemState GUARDED_BY(mMutex); 421 422 // Info for the |EventType::BOOT_TIME_COLLECTION| collection event. 423 EventMetadata mBoottimeCollection GUARDED_BY(mMutex); 424 425 // Info for the |EventType::PERIODIC_COLLECTION| collection event. 426 EventMetadata mPeriodicCollection GUARDED_BY(mMutex); 427 428 // Info for the |EventType::USER_SWITCH_COLLECTION| collection event. 429 UserSwitchEventMetadata mUserSwitchCollection GUARDED_BY(mMutex); 430 431 // Info for the |EventType::WAKE_UP_COLLECTION| collection event. 432 EventMetadata mWakeUpCollection GUARDED_BY(mMutex); 433 434 // Info for the |EventType::CUSTOM_COLLECTION| collection event. The info is cleared at the end 435 // of every custom collection. 436 EventMetadata mCustomCollection GUARDED_BY(mMutex); 437 438 // Info for the |EventType::PERIODIC_MONITOR| monitor event. 439 EventMetadata mPeriodicMonitor GUARDED_BY(mMutex); 440 441 // Cache of resource stats that have not been sent to CarWatchdogService. 442 std::vector<std::tuple<nsecs_t, aidl::android::automotive::watchdog::internal::ResourceStats>> 443 mUnsentResourceStats GUARDED_BY(mMutex); 444 445 // Tracks the latest collection time since boot in millis. 446 int64_t mLastCollectionTimeMillis GUARDED_BY(mMutex); 447 448 // Tracks either the WatchdogPerfService's state or current collection event. Updated on 449 // |start|, |onBootFinished|, |onUserStateChange|, |startCustomCollection|, 450 // |endCustomCollection|, and |terminate|. 451 EventType mCurrCollectionEvent GUARDED_BY(mMutex); 452 453 // Collector for UID process and I/O stats. 454 android::sp<UidStatsCollectorInterface> mUidStatsCollector GUARDED_BY(mMutex); 455 456 // Collector/parser for `/proc/stat`. 457 android::sp<ProcStatCollectorInterface> mProcStatCollector GUARDED_BY(mMutex); 458 459 // Collector/parser for `/proc/diskstats` file. 460 android::sp<ProcDiskStatsCollectorInterface> mProcDiskStatsCollector GUARDED_BY(mMutex); 461 462 // Data processors for the collected performance data. 463 std::vector<android::sp<DataProcessorInterface>> mDataProcessors GUARDED_BY(mMutex); 464 465 // Helper to communicate with the CarWatchdogService. 466 android::sp<WatchdogServiceHelperInterface> mWatchdogServiceHelper GUARDED_BY(mMutex); 467 468 // For unit tests. 469 friend class internal::WatchdogPerfServicePeer; 470 FRIEND_TEST(WatchdogPerfServiceTest, TestServiceStartAndTerminate); 471 }; 472 473 } // namespace watchdog 474 } // namespace automotive 475 } // namespace android 476 477 #endif // CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_ 478