1 /**
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
18 #define WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
19 
20 #include <android-base/chrono_utils.h>
21 #include <android-base/result.h>
22 #include <android/content/pm/IPackageManagerNative.h>
23 #include <cutils/multiuser.h>
24 #include <gtest/gtest_prod.h>
25 #include <time.h>
26 #include <utils/Errors.h>
27 #include <utils/Looper.h>
28 #include <utils/Mutex.h>
29 #include <utils/String16.h>
30 #include <utils/StrongPointer.h>
31 #include <utils/Vector.h>
32 
33 #include <string>
34 #include <thread>
35 #include <unordered_map>
36 #include <unordered_set>
37 #include <vector>
38 
39 #include "LooperWrapper.h"
40 #include "ProcPidStat.h"
41 #include "ProcStat.h"
42 #include "UidIoStats.h"
43 
44 namespace android {
45 namespace automotive {
46 namespace watchdog {
47 
48 constexpr const char* kStartCustomCollectionFlag = "--start_io";
49 constexpr const char* kEndCustomCollectionFlag = "--stop_io";
50 constexpr const char* kIntervalFlag = "--interval";
51 constexpr const char* kMaxDurationFlag = "--max_duration";
52 constexpr const char* kFilterPackagesFlag = "--filter_packages";
53 
54 // Performance data collected from the `/proc/uid_io/stats` file.
55 struct UidIoPerfData {
56     struct Stats {
57         userid_t userId = 0;
58         std::string packageName;
59         uint64_t bytes[UID_STATES];
60         uint64_t fsync[UID_STATES];
61     };
62     std::vector<Stats> topNReads = {};
63     std::vector<Stats> topNWrites = {};
64     uint64_t total[METRIC_TYPES][UID_STATES] = {{0}};
65 };
66 
67 std::string toString(const UidIoPerfData& perfData);
68 
69 // Performance data collected from the `/proc/stats` file.
70 struct SystemIoPerfData {
71     uint64_t cpuIoWaitTime = 0;
72     uint64_t totalCpuTime = 0;
73     uint32_t ioBlockedProcessesCnt = 0;
74     uint32_t totalProcessesCnt = 0;
75 };
76 
77 std::string toString(const SystemIoPerfData& perfData);
78 
79 // Performance data collected from the `/proc/[pid]/stat` and `/proc/[pid]/task/[tid]/stat` files.
80 struct ProcessIoPerfData {
81     struct UidStats {
82         userid_t userId = 0;
83         std::string packageName;
84         uint64_t count = 0;
85         struct ProcessStats {
86             std::string comm = "";
87             uint64_t count = 0;
88         };
89         std::vector<ProcessStats> topNProcesses = {};
90     };
91     std::vector<UidStats> topNIoBlockedUids = {};
92     // Total # of tasks owned by each UID in |topNIoBlockedUids|.
93     std::vector<uint64_t> topNIoBlockedUidsTotalTaskCnt = {};
94     std::vector<UidStats> topNMajorFaultUids = {};
95     uint64_t totalMajorFaults = 0;
96     // Percentage of increase/decrease in the major page faults since last collection.
97     double majorFaultsPercentChange = 0.0;
98 };
99 
100 std::string toString(const ProcessIoPerfData& data);
101 
102 struct IoPerfRecord {
103     time_t time;  // Collection time.
104     UidIoPerfData uidIoPerfData;
105     SystemIoPerfData systemIoPerfData;
106     ProcessIoPerfData processIoPerfData;
107 };
108 
109 std::string toString(const IoPerfRecord& record);
110 
111 struct CollectionInfo {
112     std::chrono::nanoseconds interval = 0ns;  // Collection interval between subsequent collections.
113     size_t maxCacheSize = 0;                  // Maximum cache size for the collection.
114     std::unordered_set<std::string> filterPackages;  // Filter the output only to the specified
115                                                      // packages.
116     nsecs_t lastCollectionUptime = 0;         // Used to calculate the uptime for next collection.
117     std::vector<IoPerfRecord> records;        // Cache of collected performance records.
118 };
119 
120 std::string toString(const CollectionInfo& collectionInfo);
121 
122 enum CollectionEvent {
123     INIT = 0,
124     BOOT_TIME,
125     PERIODIC,
126     CUSTOM,
127     TERMINATED,
128     LAST_EVENT,
129 };
130 
131 enum SwitchEvent {
132     // Ends boot-time collection by collecting the last boot-time record and switching the
133     // collection event to periodic collection.
134     END_BOOTTIME_COLLECTION = CollectionEvent::LAST_EVENT + 1,
135     // Ends custom collection, discards collected data and starts periodic collection.
136     END_CUSTOM_COLLECTION
137 };
138 
toString(CollectionEvent event)139 static inline std::string toString(CollectionEvent event) {
140     switch (event) {
141         case CollectionEvent::INIT:
142             return "INIT";
143         case CollectionEvent::BOOT_TIME:
144             return "BOOT_TIME";
145         case CollectionEvent::PERIODIC:
146             return "PERIODIC";
147         case CollectionEvent::CUSTOM:
148             return "CUSTOM";
149         case CollectionEvent::TERMINATED:
150             return "TERMINATED";
151         default:
152             return "INVALID";
153     }
154 }
155 
156 // IoPerfCollection implements the I/O performance data collection module of the CarWatchDog
157 // service. It exposes APIs that the CarWatchDog main thread and binder service can call to start
158 // a collection, update the collection type, and generate collection dumps.
159 class IoPerfCollection : public MessageHandler {
160 public:
IoPerfCollection()161     IoPerfCollection() :
162           mHandlerLooper(new LooperWrapper()),
163           mBoottimeCollection({}),
164           mPeriodicCollection({}),
165           mCustomCollection({}),
166           mCurrCollectionEvent(CollectionEvent::INIT),
167           mUidToPackageNameMapping({}),
168           mUidIoStats(new UidIoStats()),
169           mProcStat(new ProcStat()),
170           mProcPidStat(new ProcPidStat()),
171           mLastMajorFaults(0) {}
172 
~IoPerfCollection()173     ~IoPerfCollection() { terminate(); }
174 
175     // Starts the boot-time collection in the looper handler on a collection thread and returns
176     // immediately. Must be called only once. Otherwise, returns an error.
177     android::base::Result<void> start();
178 
179     // Terminates the collection thread and returns.
180     void terminate();
181 
182     // Ends the boot-time collection, caches boot-time perf records, sends message to the looper to
183     // begin the periodic collection, and returns immediately.
184     virtual android::base::Result<void> onBootFinished();
185 
186     // Depending the arguments, it either:
187     // 1. Generates a dump from the boot-time and periodic collection events.
188     // 2. Starts custom collection.
189     // 3. Ends custom collection and dumps the collected data.
190     // Returns any error observed during the dump generation.
191     virtual android::base::Result<void> dump(int fd, const Vector<String16>& args);
192 
193     // Dumps the help text.
194     bool dumpHelpText(int fd);
195 
196 private:
197     // Generates a dump from the boot-time and periodic collection events.
198     android::base::Result<void> dumpCollection(int fd);
199 
200     // Dumps the collectors' status when they are disabled.
201     android::base::Result<void> dumpCollectorsStatusLocked(int fd);
202 
203     // Starts a custom collection on the looper handler, temporarily stops the periodic collection
204     // (won't discard the collected data), and returns immediately. Returns any error observed
205     // during this process. The custom collection happens once every |interval| seconds. When the
206     // |maxDuration| is reached, the looper receives a message to end the collection, discards the
207     // collected data, and starts the periodic collection. This is needed to ensure the custom
208     // collection doesn't run forever when a subsequent |endCustomCollection| call is not received.
209     // When |kFilterPackagesFlag| value is provided, the results are filtered only to the specified
210     // package names.
211     android::base::Result<void> startCustomCollection(
212             std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
213             const std::unordered_set<std::string>& filterPackages);
214 
215     // Ends the current custom collection, generates a dump, sends message to looper to start the
216     // periodic collection, and returns immediately. Returns an error when there is no custom
217     // collection running or when a dump couldn't be generated from the custom collection.
218     android::base::Result<void> endCustomCollection(int fd);
219 
220     // Handles the messages received by the lopper.
221     void handleMessage(const Message& message) override;
222 
223     // Processes the events received by |handleMessage|.
224     android::base::Result<void> processCollectionEvent(CollectionEvent event, CollectionInfo* info);
225 
226     // Collects/stores the performance data for the current collection event.
227     android::base::Result<void> collectLocked(CollectionInfo* collectionInfo);
228 
229     // Collects performance data from the `/proc/uid_io/stats` file.
230     android::base::Result<void> collectUidIoPerfDataLocked(const CollectionInfo& collectionInfo,
231                                                            UidIoPerfData* uidIoPerfData);
232 
233     // Collects performance data from the `/proc/stats` file.
234     android::base::Result<void> collectSystemIoPerfDataLocked(SystemIoPerfData* systemIoPerfData);
235 
236     // Collects performance data from the `/proc/[pid]/stat` and
237     // `/proc/[pid]/task/[tid]/stat` files.
238     android::base::Result<void> collectProcessIoPerfDataLocked(
239             const CollectionInfo& collectionInfo, ProcessIoPerfData* processIoPerfData);
240 
241     // Updates the |mUidToPackageNameMapping| for the given |uids|.
242     android::base::Result<void> updateUidToPackageNameMapping(
243             const std::unordered_set<uint32_t>& uids);
244 
245     // Retrieves package manager from the default service manager.
246     android::base::Result<void> retrievePackageManager();
247 
248     // Top N per-UID stats per category.
249     int mTopNStatsPerCategory;
250 
251     // Top N per-process stats per subcategory.
252     int mTopNStatsPerSubcategory;
253 
254     // Thread on which the actual collection happens.
255     std::thread mCollectionThread;
256 
257     // Makes sure only one collection is running at any given time.
258     Mutex mMutex;
259 
260     // Handler lopper to execute different collection events on the collection thread.
261     android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
262 
263     // Info for the |CollectionEvent::BOOT_TIME| collection event. The cache is persisted until
264     // system shutdown/reboot.
265     CollectionInfo mBoottimeCollection GUARDED_BY(mMutex);
266 
267     // Info for the |CollectionEvent::PERIODIC| collection event. The cache size is limited by
268     // |ro.carwatchdog.periodic_collection_buffer_size|.
269     CollectionInfo mPeriodicCollection GUARDED_BY(mMutex);
270 
271     // Info for the |CollectionEvent::CUSTOM| collection event. The info is cleared at the end of
272     // every custom collection.
273     CollectionInfo mCustomCollection GUARDED_BY(mMutex);
274 
275     // Tracks the current collection event. Updated on |start|, |onBootComplete|,
276     // |startCustomCollection| and |endCustomCollection|.
277     CollectionEvent mCurrCollectionEvent GUARDED_BY(mMutex);
278 
279     // Cache of uid to package name mapping.
280     std::unordered_map<uint64_t, std::string> mUidToPackageNameMapping GUARDED_BY(mMutex);
281 
282     // Collector/parser for `/proc/uid_io/stats`.
283     android::sp<UidIoStats> mUidIoStats GUARDED_BY(mMutex);
284 
285     // Collector/parser for `/proc/stat`.
286     android::sp<ProcStat> mProcStat GUARDED_BY(mMutex);
287 
288     // Collector/parser for `/proc/PID/*` stat files.
289     android::sp<ProcPidStat> mProcPidStat GUARDED_BY(mMutex);
290 
291     // Major faults delta from last collection. Useful when calculating the percentage change in
292     // major faults since last collection.
293     uint64_t mLastMajorFaults GUARDED_BY(mMutex);
294 
295     // To get the package names from app uids.
296     android::sp<android::content::pm::IPackageManagerNative> mPackageManager GUARDED_BY(mMutex);
297 
298     FRIEND_TEST(IoPerfCollectionTest, TestCollectionStartAndTerminate);
299     FRIEND_TEST(IoPerfCollectionTest, TestValidCollectionSequence);
300     FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnZeroEnabledCollectors);
301     FRIEND_TEST(IoPerfCollectionTest, TestCollectionTerminatesOnError);
302     FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionTerminatesAfterMaxDuration);
303     FRIEND_TEST(IoPerfCollectionTest, TestValidUidIoStatFile);
304     FRIEND_TEST(IoPerfCollectionTest, TestUidIOStatsLessThanTopNStatsLimit);
305     FRIEND_TEST(IoPerfCollectionTest, TestProcUidIoStatsContentsFromDevice);
306     FRIEND_TEST(IoPerfCollectionTest, TestValidProcStatFile);
307     FRIEND_TEST(IoPerfCollectionTest, TestValidProcPidContents);
308     FRIEND_TEST(IoPerfCollectionTest, TestProcPidContentsLessThanTopNStatsLimit);
309     FRIEND_TEST(IoPerfCollectionTest, TestCustomCollectionFiltersPackageNames);
310 };
311 
312 }  // namespace watchdog
313 }  // namespace automotive
314 }  // namespace android
315 
316 #endif  //  WATCHDOG_SERVER_SRC_IOPERFCOLLECTION_H_
317