1 /** 2 * Copyright (c) 2020, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef CPP_WATCHDOG_SERVER_SRC_PERFORMANCEPROFILER_H_ 18 #define CPP_WATCHDOG_SERVER_SRC_PERFORMANCEPROFILER_H_ 19 20 #include "PressureMonitor.h" 21 #include "ProcDiskStatsCollector.h" 22 #include "ProcStatCollector.h" 23 #include "UidStatsCollector.h" 24 #include "WatchdogPerfService.h" 25 26 #include <android-base/chrono_utils.h> 27 #include <android-base/result.h> 28 #include <android/util/ProtoOutputStream.h> 29 #include <cutils/multiuser.h> 30 #include <gtest/gtest_prod.h> 31 #include <meminfo/procmeminfo.h> 32 #include <utils/Errors.h> 33 #include <utils/Mutex.h> 34 #include <utils/RefBase.h> 35 #include <utils/SystemClock.h> 36 37 #include <android_car_feature.h> 38 39 #include <ctime> 40 #include <string> 41 #include <unordered_set> 42 #include <variant> 43 #include <vector> 44 45 namespace android { 46 namespace automotive { 47 namespace watchdog { 48 49 // Number of periodic collection records to cache in memory. 50 constexpr int32_t kDefaultPeriodicCollectionBufferSize = 180; 51 constexpr const char kEmptyCollectionMessage[] = "No collection recorded\n"; 52 53 // Forward declaration for testing use only. 54 namespace internal { 55 56 class PerformanceProfilerPeer; 57 58 } // namespace internal 59 60 // Below classes, structs and enums should be used only by the implementation and unit tests. 61 enum ProcStatType { 62 IO_BLOCKED_TASKS_COUNT = 0, 63 MAJOR_FAULTS, 64 CPU_TIME, 65 MEMORY_STATS, 66 PROC_STAT_TYPES, 67 }; 68 69 // UserPackageStats represents the user package performance stats. 70 class UserPackageStats { 71 public: 72 // TODO(b/332773702): Rename nested structs 73 // first-level IoStatsView, ProcSingleStatsView, and ProcCpuStatsView renames to Uid*Stats 74 // second-level ProcessValue and ProcessCpuValue renames to Process*Stats 75 struct IoStatsView { 76 int64_t bytes[UID_STATES] = {0}; 77 int64_t fsync[UID_STATES] = {0}; 78 totalBytesIoStatsView79 int64_t totalBytes() const { 80 return std::numeric_limits<int64_t>::max() - bytes[UidState::FOREGROUND] > 81 bytes[UidState::BACKGROUND] 82 ? bytes[UidState::FOREGROUND] + bytes[UidState::BACKGROUND] 83 : std::numeric_limits<int64_t>::max(); 84 } 85 }; 86 struct ProcSingleStatsView { 87 uint64_t value = 0; 88 struct ProcessValue { 89 std::string comm = ""; 90 uint64_t value = 0; 91 }; 92 std::vector<ProcessValue> topNProcesses = {}; 93 }; 94 struct ProcCpuStatsView { 95 int64_t cpuTimeMillis = 0; 96 int64_t cpuCycles = 0; 97 struct ProcessCpuValue { 98 int32_t pid = -1; 99 std::string comm = ""; 100 int64_t cpuTimeMillis = 0; 101 int64_t cpuCycles = 0; 102 }; 103 std::vector<ProcessCpuValue> topNProcesses = {}; 104 }; 105 struct MemoryStats { 106 uint64_t rssKb = 0; 107 uint64_t pssKb = 0; 108 uint64_t ussKb = 0; 109 uint64_t swapPssKb = 0; 110 }; 111 struct UidMemoryStats { 112 MemoryStats memoryStats; 113 bool isSmapsRollupSupported; 114 struct ProcessMemoryStats { 115 std::string comm = ""; 116 MemoryStats memoryStats; 117 }; 118 std::vector<ProcessMemoryStats> topNProcesses = {}; 119 }; 120 121 UserPackageStats(MetricType metricType, const UidStats& uidStats); 122 UserPackageStats(ProcStatType procStatType, const UidStats& uidStats, int topNProcessCount, 123 bool isSmapsRollupSupported); 124 125 // Class must be DefaultInsertable for std::vector<T>::resize to work UserPackageStats()126 UserPackageStats() : uid(0), genericPackageName("") {} 127 // For unit test case only UserPackageStats(uid_t uid,std::string genericPackageName,std::variant<std::monostate,IoStatsView,ProcSingleStatsView,ProcCpuStatsView,UidMemoryStats> statsView)128 UserPackageStats(uid_t uid, std::string genericPackageName, 129 std::variant<std::monostate, IoStatsView, ProcSingleStatsView, 130 ProcCpuStatsView, UidMemoryStats> 131 statsView) : 132 uid(uid), 133 genericPackageName(std::move(genericPackageName)), 134 statsView(std::move(statsView)) {} 135 136 // Returns the primary value of the current StatsView. If the variant has value 137 // |std::monostate|, returns 0. 138 // 139 // This value should be used to sort the StatsViews. 140 uint64_t getValue() const; 141 std::string toString(MetricType metricsType, const int64_t totalIoStats[][UID_STATES]) const; 142 std::string toString(int64_t totalValue) const; 143 std::string toString(int64_t totalRssKb, int64_t totalPssKb) const; 144 145 uid_t uid; 146 std::string genericPackageName; 147 std::variant<std::monostate, IoStatsView, ProcSingleStatsView, ProcCpuStatsView, UidMemoryStats> 148 statsView; 149 150 private: 151 void cacheTopNProcessSingleStats( 152 ProcStatType procStatType, const UidStats& uidStats, int topNProcessCount, 153 std::vector<UserPackageStats::ProcSingleStatsView::ProcessValue>* topNProcesses); 154 void cacheTopNProcessCpuStats( 155 const UidStats& uidStats, int topNProcessCount, 156 std::vector<UserPackageStats::ProcCpuStatsView::ProcessCpuValue>* topNProcesses); 157 void cacheTopNProcessMemStats( 158 const UidStats& uidStats, int topNProcessCount, bool isSmapsRollupSupported, 159 std::vector<UserPackageStats::UidMemoryStats::ProcessMemoryStats>* topNProcesses); 160 }; 161 162 /** 163 * User package summary performance stats collected from the `/proc/uid_io/stats`, 164 * `/proc/[pid]/stat`, `/proc/[pid]/task/[tid]/stat`, and /proc/[pid]/status` files. 165 */ 166 struct UserPackageSummaryStats { 167 std::vector<UserPackageStats> topNCpuTimes = {}; 168 std::vector<UserPackageStats> topNIoReads = {}; 169 std::vector<UserPackageStats> topNIoWrites = {}; 170 std::vector<UserPackageStats> topNIoBlocked = {}; 171 std::vector<UserPackageStats> topNMajorFaults = {}; 172 std::vector<UserPackageStats> topNMemStats = {}; 173 int64_t totalIoStats[METRIC_TYPES][UID_STATES] = {{0}}; 174 std::unordered_map<uid_t, uint64_t> taskCountByUid = {}; 175 // TODO(b/337115923): Clean up below duplicate fields and report `totalMajorFaults`, 176 // `totalRssKb`, `totalPssKb`, and `majorFaultsPercentChange` as part of `SystemSummaryStats`. 177 int64_t totalCpuTimeMillis = 0; 178 uint64_t totalCpuCycles = 0; 179 uint64_t totalMajorFaults = 0; 180 uint64_t totalRssKb = 0; 181 uint64_t totalPssKb = 0; 182 // Percentage of increase/decrease in the major page faults since last collection. 183 double majorFaultsPercentChange = 0.0; 184 std::string toString() const; 185 }; 186 187 // TODO(b/268402964): Calculate the total CPU cycles using the per-UID BPF tool. 188 // System performance stats collected from the `/proc/stats` file. 189 struct SystemSummaryStats { 190 int64_t cpuIoWaitTimeMillis = 0; 191 int64_t cpuIdleTimeMillis = 0; 192 int64_t totalCpuTimeMillis = 0; 193 uint64_t totalCpuCycles = 0; 194 uint64_t contextSwitchesCount = 0; 195 uint32_t ioBlockedProcessCount = 0; 196 uint32_t totalProcessCount = 0; 197 std::string toString() const; 198 }; 199 200 // Performance record collected during a sampling/collection period. 201 struct PerfStatsRecord { 202 time_point_millis collectionTimeMillis; 203 SystemSummaryStats systemSummaryStats; 204 UserPackageSummaryStats userPackageSummaryStats; 205 std::unordered_map<PressureMonitorInterface::PressureLevel, std::chrono::milliseconds> 206 memoryPressureLevelDurations; 207 std::string toString() const; 208 }; 209 210 // Group of performance records collected for a collection event. 211 struct CollectionInfo { 212 size_t maxCacheSize = 0; // Maximum cache size for the collection. 213 std::vector<PerfStatsRecord> records; // Cache of collected performance records. 214 std::string toString() const; 215 }; 216 217 // Group of performance records collected for a user switch collection event. 218 struct UserSwitchCollectionInfo : CollectionInfo { 219 userid_t from = 0; 220 userid_t to = 0; 221 }; 222 223 // PerformanceProfiler implements the I/O performance data collection module. 224 class PerformanceProfiler final : 225 public DataProcessorInterface, 226 public PressureMonitorInterface::PressureChangeCallbackInterface { 227 public: 228 PerformanceProfiler( 229 const android::sp<PressureMonitorInterface>& pressureMonitor, 230 const std::function<int64_t()>& getElapsedTimeSinceBootMillisFunc = &elapsedRealtime) : kPressureMonitor(pressureMonitor)231 kPressureMonitor(pressureMonitor), 232 kGetElapsedTimeSinceBootMillisFunc(getElapsedTimeSinceBootMillisFunc), 233 mTopNStatsPerCategory(0), 234 mTopNStatsPerSubcategory(0), 235 mMaxUserSwitchEvents(0), 236 mSystemEventDataCacheDurationSec(0), 237 // TODO(b/333722043): Once carwatchdogd has sys_ptrace capability, set 238 // mIsSmapsRollupSupported field from `android::meminfo::IsSmapsRollupSupported()`. 239 // Disabling smaps_rollup support because this file cannot be read without sys_ptrace 240 // capability. 241 mIsSmapsRollupSupported(false), 242 mIsMemoryProfilingEnabled(android::car::feature::car_watchdog_memory_profiling()), 243 mBoottimeCollection({}), 244 mPeriodicCollection({}), 245 mUserSwitchCollections({}), 246 mWakeUpCollection({}), 247 mCustomCollection({}), 248 mLastMajorFaults(0), 249 mDoSendResourceUsageStats(false), 250 mMemoryPressureLevelDeltaInfo(PressureLevelDeltaInfo(getElapsedTimeSinceBootMillisFunc)) { 251 } 252 ~PerformanceProfiler()253 ~PerformanceProfiler() { terminate(); } 254 name()255 std::string name() const override { return "PerformanceProfiler"; } 256 257 // Implements DataProcessorInterface. 258 android::base::Result<void> onSystemStartup() override; 259 260 void onCarWatchdogServiceRegistered() override; 261 262 android::base::Result<void> onBoottimeCollection( 263 time_point_millis time, 264 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 265 const android::wp<ProcStatCollectorInterface>& procStatCollector, 266 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; 267 268 android::base::Result<void> onWakeUpCollection( 269 time_point_millis time, 270 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 271 const android::wp<ProcStatCollectorInterface>& procStatCollector) override; 272 273 android::base::Result<void> onPeriodicCollection( 274 time_point_millis time, SystemState systemState, 275 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 276 const android::wp<ProcStatCollectorInterface>& procStatCollector, 277 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; 278 279 android::base::Result<void> onUserSwitchCollection( 280 time_point_millis time, userid_t from, userid_t to, 281 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 282 const android::wp<ProcStatCollectorInterface>& procStatCollector) override; 283 284 android::base::Result<void> onCustomCollection( 285 time_point_millis time, SystemState systemState, 286 const std::unordered_set<std::string>& filterPackages, 287 const android::wp<UidStatsCollectorInterface>& uidStatsCollector, 288 const android::wp<ProcStatCollectorInterface>& procStatCollector, 289 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) override; 290 onPeriodicMonitor(time_t time,const android::wp<ProcDiskStatsCollectorInterface> & procDiskStatsCollector,const std::function<void ()> & alertHandler)291 android::base::Result<void> onPeriodicMonitor( 292 [[maybe_unused]] time_t time, 293 [[maybe_unused]] const android::wp<ProcDiskStatsCollectorInterface>& 294 procDiskStatsCollector, 295 [[maybe_unused]] const std::function<void()>& alertHandler) override { 296 // No monitoring done here as this DataProcessor only collects I/O performance records. 297 return {}; 298 } 299 300 android::base::Result<void> onDump(int fd) const override; 301 302 android::base::Result<void> onDumpProto( 303 const CollectionIntervals& collectionIntervals, 304 android::util::ProtoOutputStream& outProto) const override; 305 306 android::base::Result<void> onCustomCollectionDump(int fd) override; 307 308 void onPressureChanged(PressureMonitorInterface::PressureLevel) override; 309 310 protected: 311 android::base::Result<void> init(); 312 313 // Clears in-memory cache. 314 void terminate(); 315 316 private: 317 class PressureLevelDeltaInfo { 318 public: PressureLevelDeltaInfo(const std::function<int64_t ()> & getElapsedTimeSinceBootMillisFunc)319 explicit PressureLevelDeltaInfo( 320 const std::function<int64_t()>& getElapsedTimeSinceBootMillisFunc) : 321 kGetElapsedTimeSinceBootMillisFunc(getElapsedTimeSinceBootMillisFunc), 322 mLatestPressureLevel(PressureMonitorInterface::PRESSURE_LEVEL_NONE), 323 mLatestPressureLevelElapsedRealtimeMillis(getElapsedTimeSinceBootMillisFunc()) {} 324 325 // Calculates the duration for the previously reported pressure level, updates it in 326 // mPressureLevelDurations, and sets the latest pressure level and its elapsed realtime. 327 void setLatestPressureLevelLocked(PressureMonitorInterface::PressureLevel pressureLevel); 328 329 // Returns the latest pressure stats and flushes stats to mPressureLevelDurations. 330 std::unordered_map<PressureMonitorInterface::PressureLevel, std::chrono::milliseconds> 331 onCollectionLocked(); 332 333 private: 334 // Updated by test for mocking elapsed time. 335 const std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc; 336 337 // Latest pressure level reported by the PressureMonitor. 338 PressureMonitorInterface::PressureLevel mLatestPressureLevel; 339 340 // Time when the latest pressure level was recorded. Used to calculate 341 // pressureLevelDurations. 342 int64_t mLatestPressureLevelElapsedRealtimeMillis = 0; 343 344 // Duration spent in different pressure levels since the last poll. 345 std::unordered_map<PressureMonitorInterface::PressureLevel, std::chrono::milliseconds> 346 mPressureLevelDurations = {}; 347 }; 348 349 // Processes the collected data. 350 android::base::Result<void> processLocked( 351 time_point_millis time, SystemState systemState, 352 const std::unordered_set<std::string>& filterPackages, 353 const android::sp<UidStatsCollectorInterface>& uidStatsCollector, 354 const android::sp<ProcStatCollectorInterface>& procStatCollector, 355 CollectionInfo* collectionInfo, 356 aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats); 357 358 // Processes per-UID performance data. 359 void processUidStatsLocked( 360 bool isGarageModeActive, int64_t totalCpuTimeMillis, 361 const std::unordered_set<std::string>& filterPackages, 362 const android::sp<UidStatsCollectorInterface>& uidStatsCollector, 363 std::vector<aidl::android::automotive::watchdog::internal::UidResourceUsageStats>* 364 uidResourceUsageStats, 365 UserPackageSummaryStats* userPackageSummaryStats); 366 367 // Processes system performance data from the `/proc/stats` file. 368 void processProcStatLocked(const android::sp<ProcStatCollectorInterface>& procStatCollector, 369 SystemSummaryStats* systemSummaryStats) const; 370 371 // Dump the user switch collection 372 android::base::Result<void> onUserSwitchCollectionDump(int fd) const; 373 374 void clearExpiredSystemEventCollections(time_point_millis time); 375 376 void dumpStatsRecordsProto(const CollectionInfo& collection, 377 android::util::ProtoOutputStream& outProto) const; 378 379 void dumpPackageCpuStatsProto(const std::vector<UserPackageStats>& userPackageStats, 380 android::util::ProtoOutputStream& outProto) const; 381 382 void dumpPackageStorageIoStatsProto(const std::vector<UserPackageStats>& userPackageStats, 383 const uint64_t storageStatsFieldId, 384 android::util::ProtoOutputStream& outProto) const; 385 386 void dumpPackageTaskStateStatsProto(const std::vector<UserPackageStats>& userPackageStats, 387 const std::unordered_map<uid_t, uint64_t>& taskCountByUid, 388 android::util::ProtoOutputStream& outProto) const; 389 390 void dumpPackageMajorPageFaultsProto(const std::vector<UserPackageStats>& userPackageStats, 391 android::util::ProtoOutputStream& outProto) const; 392 393 // Pressure monitor instance. 394 const android::sp<PressureMonitorInterface> kPressureMonitor; 395 396 // Updated by test for mocking elapsed time. 397 const std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc; 398 399 // Top N per-UID stats per category. 400 int mTopNStatsPerCategory; 401 402 // Top N per-process stats per subcategory. 403 int mTopNStatsPerSubcategory; 404 405 // Max amount of user switch events cached in |mUserSwitchCollections|. 406 size_t mMaxUserSwitchEvents; 407 408 // Amount of seconds before a system event's cache is cleared. 409 std::chrono::seconds mSystemEventDataCacheDurationSec; 410 411 // Smaps rollup is supported by kernel or not. 412 bool mIsSmapsRollupSupported; 413 414 // Memory Profiling feature flag is enabled or not. 415 bool mIsMemoryProfilingEnabled; 416 417 // Makes sure only one collection is running at any given time. 418 mutable Mutex mMutex; 419 420 // Info for the boot-time collection event. The cache is persisted until system shutdown/reboot 421 // or a wake-up collection occurs. 422 CollectionInfo mBoottimeCollection GUARDED_BY(mMutex); 423 424 // Info for the periodic collection event. The cache size is limited by 425 // |ro.carwatchdog.periodic_collection_buffer_size|. 426 CollectionInfo mPeriodicCollection GUARDED_BY(mMutex); 427 428 // Cache for user switch collection events. Events are cached from oldest to newest. 429 std::vector<UserSwitchCollectionInfo> mUserSwitchCollections GUARDED_BY(mMutex); 430 431 // Info for the wake-up collection event. Only the latest wake-up collection is cached. 432 CollectionInfo mWakeUpCollection GUARDED_BY(mMutex); 433 434 // Info for the custom collection event. The info is cleared at the end of every custom 435 // collection. 436 CollectionInfo mCustomCollection GUARDED_BY(mMutex); 437 438 // Major faults delta from last collection. Useful when calculating the percentage change in 439 // major faults since last collection. 440 uint64_t mLastMajorFaults GUARDED_BY(mMutex); 441 442 // Enables the sending of resource usage stats to CarService. 443 bool mDoSendResourceUsageStats GUARDED_BY(mMutex); 444 445 // Aggregated pressure level changes occurred since the last collection. 446 PressureLevelDeltaInfo mMemoryPressureLevelDeltaInfo GUARDED_BY(mMutex); 447 448 friend class WatchdogPerfService; 449 450 // For unit tests. 451 friend class internal::PerformanceProfilerPeer; 452 }; 453 454 } // namespace watchdog 455 } // namespace automotive 456 } // namespace android 457 458 #endif // CPP_WATCHDOG_SERVER_SRC_PERFORMANCEPROFILER_H_ 459