1 /**
2 * Copyright (c) 2020, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
18 #define WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
19
20 #include <android-base/chrono_utils.h>
21 #include <android-base/result.h>
22 #include <android/content/pm/IPackageManagerNative.h>
23 #include <cutils/multiuser.h>
24 #include <gtest/gtest_prod.h>
25 #include <time.h>
26 #include <utils/Errors.h>
27 #include <utils/Looper.h>
28 #include <utils/Mutex.h>
29 #include <utils/String16.h>
30 #include <utils/StrongPointer.h>
31 #include <utils/Vector.h>
32
33 #include <string>
34 #include <thread>
35 #include <unordered_map>
36 #include <unordered_set>
37 #include <vector>
38
39 #include "LooperWrapper.h"
40 #include "ProcPidStat.h"
41 #include "ProcStat.h"
42 #include "UidIoStats.h"
43
44 namespace android {
45 namespace automotive {
46 namespace watchdog {
47
48 constexpr const char* kStartCustomCollectionFlag = "--start_io";
49 constexpr const char* kEndCustomCollectionFlag = "--stop_io";
50 constexpr const char* kIntervalFlag = "--interval";
51 constexpr const char* kMaxDurationFlag = "--max_duration";
52 constexpr const char* kFilterPackagesFlag = "--filter_packages";
53
54 // Performance data collected from the `/proc/uid_io/stats` file.
55 struct UidIoPerfData {
56 struct Stats {
57 userid_t userId = 0;
58 std::string packageName;
59 uint64_t bytes[UID_STATES];
60 uint64_t fsync[UID_STATES];
61 };
62 std::vector<Stats> topNReads = {};
63 std::vector<Stats> topNWrites = {};
64 uint64_t total[METRIC_TYPES][UID_STATES] = {{0}};
65 };
66
67 std::string toString(const UidIoPerfData& perfData);
68
69 // Performance data collected from the `/proc/stats` file.
70 struct SystemIoPerfData {
71 uint64_t cpuIoWaitTime = 0;
72 uint64_t totalCpuTime = 0;
73 uint32_t ioBlockedProcessesCnt = 0;
74 uint32_t totalProcessesCnt = 0;
75 };
76
77 std::string toString(const SystemIoPerfData& perfData);
78
79 // Performance data collected from the `/proc/[pid]/stat` and `/proc/[pid]/task/[tid]/stat` files.
80 struct ProcessIoPerfData {
81 struct UidStats {
82 userid_t userId = 0;
83 std::string packageName;
84 uint64_t count = 0;
85 struct ProcessStats {
86 std::string comm = "";
87 uint64_t count = 0;
88 };
89 std::vector<ProcessStats> topNProcesses = {};
90 };
91 std::vector<UidStats> topNIoBlockedUids = {};
92 // Total # of tasks owned by each UID in |topNIoBlockedUids|.
93 std::vector<uint64_t> topNIoBlockedUidsTotalTaskCnt = {};
94 std::vector<UidStats> topNMajorFaultUids = {};
95 uint64_t totalMajorFaults = 0;
96 // Percentage of increase/decrease in the major page faults since last collection.
97 double majorFaultsPercentChange = 0.0;
98 };
99
100 std::string toString(const ProcessIoPerfData& data);
101
102 struct IoPerfRecord {
103 time_t time; // Collection time.
104 UidIoPerfData uidIoPerfData;
105 SystemIoPerfData systemIoPerfData;
106 ProcessIoPerfData processIoPerfData;
107 };
108
109 std::string toString(const IoPerfRecord& record);
110
111 struct CollectionInfo {
112 std::chrono::nanoseconds interval = 0ns; // Collection interval between subsequent collections.
113 size_t maxCacheSize = 0; // Maximum cache size for the collection.
114 std::unordered_set<std::string> filterPackages; // Filter the output only to the specified
115 // packages.
116 nsecs_t lastCollectionUptime = 0; // Used to calculate the uptime for next collection.
117 std::vector<IoPerfRecord> records; // Cache of collected performance records.
118 };
119
120 std::string toString(const CollectionInfo& collectionInfo);
121
122 enum CollectionEvent {
123 INIT = 0,
124 BOOT_TIME,
125 PERIODIC,
126 CUSTOM,
127 TERMINATED,
128 LAST_EVENT,
129 };
130
131 enum SwitchEvent {
132 // Ends boot-time collection by collecting the last boot-time record and switching the
133 // collection event to periodic collection.
134 END_BOOTTIME_COLLECTION = CollectionEvent::LAST_EVENT + 1,
135 // Ends custom collection, discards collected data and starts periodic collection.
136 END_CUSTOM_COLLECTION
137 };
138
toString(CollectionEvent event)139 static inline std::string toString(CollectionEvent event) {
140 switch (event) {
141 case CollectionEvent::INIT:
142 return "INIT";
143 case CollectionEvent::BOOT_TIME:
144 return "BOOT_TIME";
145 case CollectionEvent::PERIODIC:
146 return "PERIODIC";
147 case CollectionEvent::CUSTOM:
148 return "CUSTOM";
149 case CollectionEvent::TERMINATED:
150 return "TERMINATED";
151 default:
152 return "INVALID";
153 }
154 }
155
156 // IoPerfCollection implements the I/O performance data collection module of the CarWatchDog
157 // service. It exposes APIs that the CarWatchDog main thread and binder service can call to start
158 // a collection, update the collection type, and generate collection dumps.
159 class IoPerfCollection : public MessageHandler {
160 public:
IoPerfCollection()161 IoPerfCollection() :
162 mHandlerLooper(new LooperWrapper()),
163 mBoottimeCollection({}),
164 mPeriodicCollection({}),
165 mCustomCollection({}),
166 mCurrCollectionEvent(CollectionEvent::INIT),
167 mUidToPackageNameMapping({}),
168 mUidIoStats(new UidIoStats()),
169 mProcStat(new ProcStat()),
170 mProcPidStat(new ProcPidStat()),
171 mLastMajorFaults(0) {}
172
~IoPerfCollection()173 ~IoPerfCollection() { terminate(); }
174
175 // Starts the boot-time collection in the looper handler on a collection thread and returns
176 // immediately. Must be called only once. Otherwise, returns an error.
177 android::base::Result<void> start();
178
179 // Terminates the collection thread and returns.
180 void terminate();
181
182 // Ends the boot-time collection, caches boot-time perf records, sends message to the looper to
183 // begin the periodic collection, and returns immediately.
184 virtual android::base::Result<void> onBootFinished();
185
186 // Depending the arguments, it either:
187 // 1. Generates a dump from the boot-time and periodic collection events.
188 // 2. Starts custom collection.
189 // 3. Ends custom collection and dumps the collected data.
190 // Returns any error observed during the dump generation.
191 virtual android::base::Result<void> dump(int fd, const Vector<String16>& args);
192
193 // Dumps the help text.
194 bool dumpHelpText(int fd);
195
196 private:
197 // Generates a dump from the boot-time and periodic collection events.
198 android::base::Result<void> dumpCollection(int fd);
199
200 // Dumps the collectors' status when they are disabled.
201 android::base::Result<void> dumpCollectorsStatusLocked(int fd);
202
203 // Starts a custom collection on the looper handler, temporarily stops the periodic collection
204 // (won't discard the collected data), and returns immediately. Returns any error observed
205 // during this process. The custom collection happens once every |interval| seconds. When the
206 // |maxDuration| is reached, the looper receives a message to end the collection, discards the
207 // collected data, and starts the periodic collection. This is needed to ensure the custom
208 // collection doesn't run forever when a subsequent |endCustomCollection| call is not received.
209 // When |kFilterPackagesFlag| value is provided, the results are filtered only to the specified
210 // package names.
211 android::base::Result<void> startCustomCollection(
212 std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
213 const std::unordered_set<std::string>& filterPackages);
214
215 // Ends the current custom collection, generates a dump, sends message to looper to start the
216 // periodic collection, and returns immediately. Returns an error when there is no custom
217 // collection running or when a dump couldn't be generated from the custom collection.
218 android::base::Result<void> endCustomCollection(int fd);
219
220 // Handles the messages received by the lopper.
221 void handleMessage(const Message& message) override;
222
223 // Processes the events received by |handleMessage|.
224 android::base::Result<void> processCollectionEvent(CollectionEvent event, CollectionInfo* info);
225
226 // Collects/stores the performance data for the current collection event.
227 android::base::Result<void> collectLocked(CollectionInfo* collectionInfo);
228
229 // Collects performance data from the `/proc/uid_io/stats` file.
230 android::base::Result<void> collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo,
231 UidIoPerfData* uidIoPerfData);
232
233 // Collects performance data from the `/proc/stats` file.
234 android::base::Result<void> collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData);
235
236 // Collects performance data from the `/proc/[pid]/stat` and
237 // `/proc/[pid]/task/[tid]/stat` files.
238 android::base::Result<void> collectProcessIoPerfDataLocked(
239 const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData);
240
241 // Updates the |mUidToPackageNameMapping| for the given |uids|.
242 android::base::Result<void> updateUidToPackageNameMapping(
243 const std::unordered_set<uint32_t>& uids);
244
245 // Retrieves package manager from the default service manager.
246 android::base::Result<void> retrievePackageManager();
247
248 // Top N per-UID stats per category.
249 int mTopNStatsPerCategory;
250
251 // Top N per-process stats per subcategory.
252 int mTopNStatsPerSubcategory;
253
254 // Thread on which the actual collection happens.
255 std::thread mCollectionThread;
256
257 // Makes sure only one collection is running at any given time.
258 Mutex mMutex;
259
260 // Handler lopper to execute different collection events on the collection thread.
261 android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
262
263 // Info for the |CollectionEvent::BOOT_TIME| collection event. The cache is persisted until
264 // system shutdown/reboot.
265 CollectionInfo mBoottimeCollection GUARDED_BY(mMutex);
266
267 // Info for the |CollectionEvent::PERIODIC| collection event. The cache size is limited by
268 // |ro.carwatchdog.periodic_collection_buffer_size|.
269 CollectionInfo mPeriodicCollection GUARDED_BY(mMutex);
270
271 // Info for the |CollectionEvent::CUSTOM| collection event. The info is cleared at the end of
272 // every custom collection.
273 CollectionInfo mCustomCollection GUARDED_BY(mMutex);
274
275 // Tracks the current collection event. Updated on |start|, |onBootComplete|,
276 // |startCustomCollection| and |endCustomCollection|.
277 CollectionEvent mCurrCollectionEvent GUARDED_BY(mMutex);
278
279 // Cache of uid to package name mapping.
280 std::unordered_map<uint64_t, std::string> mUidToPackageNameMapping GUARDED_BY(mMutex);
281
282 // Collector/parser for `/proc/uid_io/stats`.
283 android::sp<UidIoStats> mUidIoStats GUARDED_BY(mMutex);
284
285 // Collector/parser for `/proc/stat`.
286 android::sp<ProcStat> mProcStat GUARDED_BY(mMutex);
287
288 // Collector/parser for `/proc/PID/*` stat files.
289 android::sp<ProcPidStat> mProcPidStat GUARDED_BY(mMutex);
290
291 // Major faults delta from last collection. Useful when calculating the percentage change in
292 // major faults since last collection.
293 uint64_t mLastMajorFaults GUARDED_BY(mMutex);
294
295 // To get the package names from app uids.
296 android::sp<android::content::pm::IPackageManagerNative> mPackageManager GUARDED_BY(mMutex);
297
298 FRIEND_TEST(IoPerfCollectionTest, TestCollectionStartAndTerminate);
299 FRIEND_TEST(IoPerfCollectionTest, TestValidCollectionSequence);
300 FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnZeroEnabledCollectors);
301 FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnError);
302 FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionTerminatesAfterMaxDuration);
303 FRIEND_TEST(IoPerfCollectionTest, TestValidUidIoStatFile);
304 FRIEND_TEST(IoPerfCollectionTest, TestUidIOStatsLessThanTopNStatsLimit);
305 FRIEND_TEST(IoPerfCollectionTest, TestProcUidIoStatsContentsFromDevice);
306 FRIEND_TEST(IoPerfCollectionTest, TestValidProcStatFile);
307 FRIEND_TEST(IoPerfCollectionTest, TestValidProcPidContents);
308 FRIEND_TEST(IoPerfCollectionTest, TestProcPidContentsLessThanTopNStatsLimit);
309 FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionFiltersPackageNames);
310 };
311
312 } // namespace watchdog
313 } // namespace automotive
314 } // namespace android
315
316 #endif // WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
317