1 /*
2  * Copyright (c) 2020, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_
18 #define CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_
19 
20 #include "LooperWrapper.h"
21 #include "ProcDiskStatsCollector.h"
22 #include "ProcStatCollector.h"
23 #include "UidStatsCollector.h"
24 #include "WatchdogServiceHelper.h"
25 
26 #include <WatchdogProperties.sysprop.h>
27 #include <aidl/android/automotive/watchdog/internal/PackageIoOveruseStats.h>
28 #include <aidl/android/automotive/watchdog/internal/ResourceStats.h>
29 #include <aidl/android/automotive/watchdog/internal/UserState.h>
30 #include <android-base/chrono_utils.h>
31 #include <android-base/result.h>
32 #include <android/util/ProtoOutputStream.h>
33 #include <cutils/multiuser.h>
34 #include <gtest/gtest_prod.h>
35 #include <utils/Errors.h>
36 #include <utils/Looper.h>
37 #include <utils/Mutex.h>
38 #include <utils/RefBase.h>
39 #include <utils/String16.h>
40 #include <utils/StrongPointer.h>
41 #include <utils/Vector.h>
42 
43 #include <time.h>
44 
45 #include <string>
46 #include <thread>  // NOLINT(build/c++11)
47 #include <unordered_set>
48 
49 namespace android {
50 namespace automotive {
51 namespace watchdog {
52 
53 // Forward declaration for testing use only.
54 namespace internal {
55 
56 class WatchdogPerfServicePeer;
57 
58 }  // namespace internal
59 
60 constexpr std::chrono::seconds kDefaultPostSystemEventDurationSec = 30s;
61 constexpr std::chrono::seconds kDefaultWakeUpEventDurationSec = 30s;
62 constexpr std::chrono::seconds kDefaultUserSwitchTimeoutSec = 30s;
63 constexpr std::chrono::nanoseconds kPrevUnsentResourceStatsMaxDurationNs = 10min;
64 constexpr const char* kStartCustomCollectionFlag = "--start_perf";
65 constexpr const char* kEndCustomCollectionFlag = "--stop_perf";
66 constexpr const char* kIntervalFlag = "--interval";
67 constexpr const char* kMaxDurationFlag = "--max_duration";
68 constexpr const char* kFilterPackagesFlag = "--filter_packages";
69 
70 enum SystemState {
71     NORMAL_MODE = 0,
72     GARAGE_MODE = 1,
73 };
74 
75 using time_point_millis =
76         std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>;
77 
78 /**
79  * DataProcessor defines methods that must be implemented in order to process the data collected
80  * by |WatchdogPerfService|.
81  */
82 class DataProcessorInterface : virtual public android::RefBase {
83 public:
84     struct CollectionIntervals {
85         std::chrono::milliseconds mBoottimeIntervalMillis = std::chrono::milliseconds(0);
86         std::chrono::milliseconds mPeriodicIntervalMillis = std::chrono::milliseconds(0);
87         std::chrono::milliseconds mUserSwitchIntervalMillis = std::chrono::milliseconds(0);
88         std::chrono::milliseconds mWakeUpIntervalMillis = std::chrono::milliseconds(0);
89         std::chrono::milliseconds mCustomIntervalMillis = std::chrono::milliseconds(0);
90     };
DataProcessorInterface()91     DataProcessorInterface() {}
~DataProcessorInterface()92     virtual ~DataProcessorInterface() {}
93     // Returns the name of the data processor.
94     virtual std::string name() const = 0;
95     // Callback to initialize the data processor.
96     virtual android::base::Result<void> init() = 0;
97     // Callback to terminate the data processor.
98     virtual void terminate() = 0;
99     // Callback to perform actions (such as clearing stats from previous system startup events)
100     // before starting boot-time or wake-up collections.
101     virtual android::base::Result<void> onSystemStartup() = 0;
102     // Callback to perform actions once CarWatchdogService is registered.
103     virtual void onCarWatchdogServiceRegistered() = 0;
104     // Callback to process the data collected during boot-time.
105     virtual android::base::Result<void> onBoottimeCollection(
106             time_point_millis time,
107             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
108             const android::wp<ProcStatCollectorInterface>& procStatCollector,
109             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
110     // Callback to process the data collected during a wake-up event.
111     virtual android::base::Result<void> onWakeUpCollection(
112             time_point_millis time,
113             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
114             const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0;
115     // Callback to process the data collected periodically post boot complete.
116     virtual android::base::Result<void> onPeriodicCollection(
117             time_point_millis time, SystemState systemState,
118             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
119             const android::wp<ProcStatCollectorInterface>& procStatCollector,
120             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
121     // Callback to process the data collected during user switch.
122     virtual android::base::Result<void> onUserSwitchCollection(
123             time_point_millis time, userid_t from, userid_t to,
124             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
125             const android::wp<ProcStatCollectorInterface>& procStatCollector) = 0;
126 
127     /**
128      * Callback to process the data collected on custom collection and filter the results only to
129      * the specified |filterPackages|.
130      */
131     virtual android::base::Result<void> onCustomCollection(
132             time_point_millis time, SystemState systemState,
133             const std::unordered_set<std::string>& filterPackages,
134             const android::wp<UidStatsCollectorInterface>& uidStatsCollector,
135             const android::wp<ProcStatCollectorInterface>& procStatCollector,
136             aidl::android::automotive::watchdog::internal::ResourceStats* resourceStats) = 0;
137     /**
138      * Callback to periodically monitor the collected data and trigger the given |alertHandler|
139      * on detecting resource overuse.
140      */
141     virtual android::base::Result<void> onPeriodicMonitor(
142             time_t time, const android::wp<ProcDiskStatsCollectorInterface>& procDiskStatsCollector,
143             const std::function<void()>& alertHandler) = 0;
144     // Callback to dump system event data and periodically collected data.
145     virtual android::base::Result<void> onDump(int fd) const = 0;
146     // Callback to dump system event data and periodically collected data in proto format.
147     virtual android::base::Result<void> onDumpProto(
148             const CollectionIntervals& collectionIntervals,
149             android::util::ProtoOutputStream& outProto) const = 0;
150     /**
151      * Callback to dump the custom collected data. When fd == -1, clear the custom collection cache.
152      */
153     virtual android::base::Result<void> onCustomCollectionDump(int fd) = 0;
154 };
155 
156 enum EventType {
157     // WatchdogPerfService's state.
158     INIT = 0,
159     TERMINATED,
160 
161     // Collection events.
162     BOOT_TIME_COLLECTION,
163     PERIODIC_COLLECTION,
164     USER_SWITCH_COLLECTION,
165     WAKE_UP_COLLECTION,
166     CUSTOM_COLLECTION,
167 
168     // Monitor event.
169     PERIODIC_MONITOR,
170 
171     LAST_EVENT,
172 };
173 
174 enum SwitchMessage {
175     /**
176      * On receiving this message, collect the last boot-time record and start periodic collection
177      * and monitor.
178      */
179     END_BOOTTIME_COLLECTION = EventType::LAST_EVENT + 1,
180 
181     /**
182      * On receiving this message, collect the last user switch record and start periodic collection
183      * and monitor.
184      */
185     END_USER_SWITCH_COLLECTION,
186 
187     /**
188      * On receiving this message, collect the last wake up record and start periodic collection and
189      * monitor.
190      */
191     END_WAKE_UP_COLLECTION,
192 
193     /**
194      * On receiving this message, ends custom collection, discard collected data and start periodic
195      * collection and monitor.
196      */
197     END_CUSTOM_COLLECTION,
198 
199     LAST_SWITCH_MSG,
200 };
201 
202 enum TaskMessage {
203     // On receiving this message, send the cached resource stats to CarWatchdogService.
204     SEND_RESOURCE_STATS = SwitchMessage::LAST_SWITCH_MSG + 1,
205 };
206 
207 /**
208  * WatchdogPerfServiceInterface collects performance data during boot-time, user switch, system wake
209  * up and periodically post system events. It exposes APIs that the main thread and binder service
210  * can call to start a collection, switch the collection type, and generate collection dumps.
211  */
212 class WatchdogPerfServiceInterface : virtual public MessageHandler {
213 public:
214     // Register a data processor to process the data collected by |WatchdogPerfService|.
215     virtual android::base::Result<void> registerDataProcessor(
216             android::sp<DataProcessorInterface> processor) = 0;
217     /**
218      * Starts the boot-time collection in the looper handler on a new thread and returns
219      * immediately. Must be called only once. Otherwise, returns an error.
220      */
221     virtual android::base::Result<void> start() = 0;
222     // Terminates the collection thread and returns.
223     virtual void terminate() = 0;
224     // Sets the system state.
225     virtual void setSystemState(SystemState systemState) = 0;
226     // Handles unsent resource stats.
227     virtual void onCarWatchdogServiceRegistered() = 0;
228     // Ends the boot-time collection by switching to periodic collection after the post event
229     // duration.
230     virtual android::base::Result<void> onBootFinished() = 0;
231     // Starts and ends the user switch collection depending on the user states received.
232     virtual android::base::Result<void> onUserStateChange(
233             userid_t userId,
234             const aidl::android::automotive::watchdog::internal::UserState& userState) = 0;
235     // Starts wake-up collection. Any running collection is stopped, except for custom collections.
236     virtual android::base::Result<void> onSuspendExit() = 0;
237     // Called on shutdown enter, suspend enter and hibernation enter.
238     virtual android::base::Result<void> onShutdownEnter() = 0;
239 
240     /**
241      * Depending on the arguments, it either:
242      * 1. Starts a custom collection.
243      * 2. Or ends the current custom collection and dumps the collected data.
244      * Returns any error observed during the dump generation.
245      */
246     virtual android::base::Result<void> onCustomCollection(int fd, const char** args,
247                                                            uint32_t numArgs) = 0;
248     // Generates a dump from the system events and periodic collection events.
249     virtual android::base::Result<void> onDump(int fd) const = 0;
250     // Generates a proto dump from system events and periodic collection events.
251     virtual android::base::Result<void> onDumpProto(
252             android::util::ProtoOutputStream& outProto) const = 0;
253     // Dumps the help text.
254     virtual bool dumpHelpText(int fd) const = 0;
255 };
256 
257 class WatchdogPerfService final : public WatchdogPerfServiceInterface {
258 public:
WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface> & watchdogServiceHelper,const std::function<int64_t ()> & getElapsedTimeSinceBootMsFunc)259     WatchdogPerfService(const android::sp<WatchdogServiceHelperInterface>& watchdogServiceHelper,
260                         const std::function<int64_t()>& getElapsedTimeSinceBootMsFunc) :
261           kGetElapsedTimeSinceBootMillisFunc(std::move(getElapsedTimeSinceBootMsFunc)),
262           mPostSystemEventDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
263                   std::chrono::seconds(sysprop::postSystemEventDuration().value_or(
264                           kDefaultPostSystemEventDurationSec.count())))),
265           mWakeUpDurationNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
266                   std::chrono::seconds(sysprop::wakeUpEventDuration().value_or(
267                           kDefaultWakeUpEventDurationSec.count())))),
268           mUserSwitchTimeoutNs(std::chrono::duration_cast<std::chrono::nanoseconds>(
269                   std::chrono::seconds(sysprop::userSwitchTimeout().value_or(
270                           kDefaultUserSwitchTimeoutSec.count())))),
271           mHandlerLooper(android::sp<LooperWrapper>::make()),
272           mSystemState(NORMAL_MODE),
273           mBoottimeCollection({}),
274           mPeriodicCollection({}),
275           mUserSwitchCollection({}),
276           mCustomCollection({}),
277           mPeriodicMonitor({}),
278           mUnsentResourceStats({}),
279           mLastCollectionTimeMillis(0),
280           mCurrCollectionEvent(EventType::INIT),
281           mUidStatsCollector(android::sp<UidStatsCollector>::make()),
282           mProcStatCollector(android::sp<ProcStatCollector>::make()),
283           mProcDiskStatsCollector(android::sp<ProcDiskStatsCollector>::make()),
284           mDataProcessors({}),
285           mWatchdogServiceHelper(watchdogServiceHelper) {}
286 
287     android::base::Result<void> registerDataProcessor(
288             android::sp<DataProcessorInterface> processor) override;
289 
290     android::base::Result<void> start() override;
291 
292     void terminate() override;
293 
294     void setSystemState(SystemState systemState) override;
295 
296     void onCarWatchdogServiceRegistered() override;
297 
298     android::base::Result<void> onBootFinished() override;
299 
300     android::base::Result<void> onUserStateChange(
301             userid_t userId,
302             const aidl::android::automotive::watchdog::internal::UserState& userState) override;
303 
304     android::base::Result<void> onSuspendExit() override;
305 
306     android::base::Result<void> onShutdownEnter() override;
307 
308     android::base::Result<void> onCustomCollection(int fd, const char** args,
309                                                    uint32_t numArgs) override;
310 
311     android::base::Result<void> onDump(int fd) const override;
312     android::base::Result<void> onDumpProto(
313             android::util::ProtoOutputStream& outProto) const override;
314 
315     bool dumpHelpText(int fd) const override;
316 
317 private:
318     struct EventMetadata {
319         // Collection or monitor event.
320         EventType eventType = EventType::LAST_EVENT;
321         // Interval between subsequent events.
322         std::chrono::nanoseconds pollingIntervalNs = 0ns;
323         // Used to calculate the uptime for next event.
324         nsecs_t lastPollUptimeNs = 0;
325         // Filter the results only to the specified packages.
326         std::unordered_set<std::string> filterPackages;
327 
328         std::string toString() const;
329     };
330 
331     struct UserSwitchEventMetadata : WatchdogPerfService::EventMetadata {
332         // User id of user being switched from.
333         userid_t from = 0;
334         // User id of user being switched to.
335         userid_t to = 0;
336     };
337 
338     // Dumps the collectors' status when they are disabled.
339     android::base::Result<void> dumpCollectorsStatusLocked(int fd) const;
340 
341     /**
342      * Starts a custom collection on the looper handler, temporarily stops the periodic collection
343      * (won't discard the collected data), and returns immediately. Returns any error observed
344      * during this process.
345      * The custom collection happens once every |interval| seconds. When the |maxDuration| is
346      * reached, the looper receives a message to end the collection, discards the collected data,
347      * and starts the periodic collection. This is needed to ensure the custom collection doesn't
348      * run forever when a subsequent |endCustomCollection| call is not received.
349      * When |kFilterPackagesFlag| value specified, the results are filtered only to the specified
350      * package names.
351      */
352     android::base::Result<void> startCustomCollection(
353             std::chrono::nanoseconds interval, std::chrono::nanoseconds maxDuration,
354             const std::unordered_set<std::string>& filterPackages);
355 
356     /**
357      * Ends the current custom collection, generates a dump, sends a looper message to start the
358      * periodic collection, and returns immediately. Returns an error when there is no custom
359      * collection running or when a dump couldn't be generated from the custom collection.
360      */
361     android::base::Result<void> endCustomCollection(int fd);
362 
363     // Start a user switch collection.
364     android::base::Result<void> startUserSwitchCollection();
365 
366     // Switch to periodic collection and periodic monitor.
367     void switchToPeriodicLocked(bool startNow);
368 
369     // Handles the messages received by the lopper.
370     void handleMessage(const Message& message) override;
371 
372     // Processes the collection events received by |handleMessage|.
373     android::base::Result<void> processCollectionEvent(EventMetadata* metadata);
374 
375     // Collects/processes the performance data for the current collection event.
376     android::base::Result<void> collectLocked(EventMetadata* metadata);
377 
378     // Processes the monitor events received by |handleMessage|.
379     android::base::Result<void> processMonitorEvent(EventMetadata* metadata);
380 
381     // Sends the unsent resource stats.
382     android::base::Result<void> sendResourceStats();
383 
384     // Notifies all registered data processors that either boot-time or wake-up collection will
385     // start. Individual implementations of data processors may clear stats collected during
386     // previous system startup events.
387     android::base::Result<void> notifySystemStartUpLocked();
388 
389     // Caches resource stats that have not been sent to CarWatchdogService.
390     void cacheUnsentResourceStatsLocked(
391             aidl::android::automotive::watchdog::internal::ResourceStats resourceStats);
392 
393     /**
394      * Returns the metadata for the current collection based on |mCurrCollectionEvent|. Returns
395      * nullptr on invalid collection event.
396      */
397     EventMetadata* getCurrentCollectionMetadataLocked();
398 
399     std::function<int64_t()> kGetElapsedTimeSinceBootMillisFunc;
400 
401     // Duration to extend a system event collection after the final signal is received.
402     std::chrono::nanoseconds mPostSystemEventDurationNs;
403 
404     // Duration of the wake-up collection event.
405     std::chrono::nanoseconds mWakeUpDurationNs;
406 
407     // Timeout duration for user switch collection in case final signal isn't received.
408     std::chrono::nanoseconds mUserSwitchTimeoutNs;
409 
410     // Thread on which the actual collection happens.
411     std::thread mCollectionThread;
412 
413     // Makes sure only one collection is running at any given time.
414     mutable Mutex mMutex;
415 
416     // Handler looper to execute different collection events on the collection thread.
417     android::sp<LooperWrapper> mHandlerLooper GUARDED_BY(mMutex);
418 
419     // Current system state.
420     SystemState mSystemState GUARDED_BY(mMutex);
421 
422     // Info for the |EventType::BOOT_TIME_COLLECTION| collection event.
423     EventMetadata mBoottimeCollection GUARDED_BY(mMutex);
424 
425     // Info for the |EventType::PERIODIC_COLLECTION| collection event.
426     EventMetadata mPeriodicCollection GUARDED_BY(mMutex);
427 
428     // Info for the |EventType::USER_SWITCH_COLLECTION| collection event.
429     UserSwitchEventMetadata mUserSwitchCollection GUARDED_BY(mMutex);
430 
431     // Info for the |EventType::WAKE_UP_COLLECTION| collection event.
432     EventMetadata mWakeUpCollection GUARDED_BY(mMutex);
433 
434     // Info for the |EventType::CUSTOM_COLLECTION| collection event. The info is cleared at the end
435     // of every custom collection.
436     EventMetadata mCustomCollection GUARDED_BY(mMutex);
437 
438     // Info for the |EventType::PERIODIC_MONITOR| monitor event.
439     EventMetadata mPeriodicMonitor GUARDED_BY(mMutex);
440 
441     // Cache of resource stats that have not been sent to CarWatchdogService.
442     std::vector<std::tuple<nsecs_t, aidl::android::automotive::watchdog::internal::ResourceStats>>
443             mUnsentResourceStats GUARDED_BY(mMutex);
444 
445     // Tracks the latest collection time since boot in millis.
446     int64_t mLastCollectionTimeMillis GUARDED_BY(mMutex);
447 
448     // Tracks either the WatchdogPerfService's state or current collection event. Updated on
449     // |start|, |onBootFinished|, |onUserStateChange|, |startCustomCollection|,
450     // |endCustomCollection|, and |terminate|.
451     EventType mCurrCollectionEvent GUARDED_BY(mMutex);
452 
453     // Collector for UID process and I/O stats.
454     android::sp<UidStatsCollectorInterface> mUidStatsCollector GUARDED_BY(mMutex);
455 
456     // Collector/parser for `/proc/stat`.
457     android::sp<ProcStatCollectorInterface> mProcStatCollector GUARDED_BY(mMutex);
458 
459     // Collector/parser for `/proc/diskstats` file.
460     android::sp<ProcDiskStatsCollectorInterface> mProcDiskStatsCollector GUARDED_BY(mMutex);
461 
462     // Data processors for the collected performance data.
463     std::vector<android::sp<DataProcessorInterface>> mDataProcessors GUARDED_BY(mMutex);
464 
465     // Helper to communicate with the CarWatchdogService.
466     android::sp<WatchdogServiceHelperInterface> mWatchdogServiceHelper GUARDED_BY(mMutex);
467 
468     // For unit tests.
469     friend class internal::WatchdogPerfServicePeer;
470     FRIEND_TEST(WatchdogPerfServiceTest, TestServiceStartAndTerminate);
471 };
472 
473 }  // namespace watchdog
474 }  // namespace automotive
475 }  // namespace android
476 
477 #endif  //  CPP_WATCHDOG_SERVER_SRC_WATCHDOGPERFSERVICE_H_
478