1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef METRIC_PRODUCER_H
18 #define METRIC_PRODUCER_H
19 
20 #include <shared_mutex>
21 
22 #include <frameworks/base/cmds/statsd/src/active_config_list.pb.h>
23 #include "HashableDimensionKey.h"
24 #include "anomaly/AnomalyTracker.h"
25 #include "condition/ConditionWizard.h"
26 #include "config/ConfigKey.h"
27 #include "matchers/matcher_util.h"
28 #include "packages/PackageInfoListener.h"
29 
30 #include <log/logprint.h>
31 #include <utils/RefBase.h>
32 #include <unordered_map>
33 
34 namespace android {
35 namespace os {
36 namespace statsd {
37 
38 // Keep this in sync with DumpReportReason enum in stats_log.proto
39 enum DumpReportReason {
40     DEVICE_SHUTDOWN = 1,
41     CONFIG_UPDATED = 2,
42     CONFIG_REMOVED = 3,
43     GET_DATA_CALLED = 4,
44     ADB_DUMP = 5,
45     CONFIG_RESET = 6,
46     STATSCOMPANION_DIED = 7,
47     TERMINATION_SIGNAL_RECEIVED = 8
48 };
49 
50 // If the metric has no activation requirement, it will be active once the metric producer is
51 // created.
52 // If the metric needs to be activated by atoms, the metric producer will start
53 // with kNotActive state, turn to kActive or kActiveOnBoot when the activation event arrives, become
54 // kNotActive when it reaches the duration limit (timebomb). If the activation event arrives again
55 // before or after it expires, the event producer will be re-activated and ttl will be reset.
56 enum ActivationState {
57     kNotActive = 0,
58     kActive = 1,
59     kActiveOnBoot = 2,
60 };
61 
62 enum DumpLatency {
63     // In some cases, we only have a short time range to do the dump, e.g. statsd is being killed.
64     // We might be able to return all the data in this mode. For instance, pull metrics might need
65     // to be pulled when the current bucket is requested.
66     FAST = 1,
67     // In other cases, it is fine for a dump to take more than a few milliseconds, e.g. config
68     // updates.
69     NO_TIME_CONSTRAINTS = 2
70 };
71 
72 // A MetricProducer is responsible for compute one single metrics, creating stats log report, and
73 // writing the report to dropbox. MetricProducers should respond to package changes as required in
74 // PackageInfoListener, but if none of the metrics are slicing by package name, then the update can
75 // be a no-op.
76 class MetricProducer : public virtual PackageInfoListener {
77 public:
MetricProducer(const int64_t & metricId,const ConfigKey & key,const int64_t timeBaseNs,const int conditionIndex,const sp<ConditionWizard> & wizard)78     MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs,
79                    const int conditionIndex, const sp<ConditionWizard>& wizard)
80         : mMetricId(metricId),
81           mConfigKey(key),
82           mTimeBaseNs(timeBaseNs),
83           mCurrentBucketStartTimeNs(timeBaseNs),
84           mCurrentBucketNum(0),
85           mCondition(initialCondition(conditionIndex)),
86           mConditionSliced(false),
87           mWizard(wizard),
88           mConditionTrackerIndex(conditionIndex),
89           mContainANYPositionInDimensionsInWhat(false),
90           mSliceByPositionALL(false),
91           mSameConditionDimensionsInTracker(false),
92           mHasLinksToAllConditionDimensionsInTracker(false),
93           mIsActive(true) {
94     }
95 
~MetricProducer()96     virtual ~MetricProducer(){};
97 
initialCondition(const int conditionIndex)98     ConditionState initialCondition(const int conditionIndex) const {
99         return conditionIndex >= 0 ? ConditionState::kUnknown : ConditionState::kTrue;
100     }
101 
102     /**
103      * Forces this metric to split into a partial bucket right now. If we're past a full bucket, we
104      * first call the standard flushing code to flush up to the latest full bucket. Then we call
105      * the flush again when the end timestamp is forced to be now, and then after flushing, update
106      * the start timestamp to be now.
107      */
notifyAppUpgrade(const int64_t & eventTimeNs,const string & apk,const int uid,const int64_t version)108     void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
109                           const int64_t version) override {
110         std::lock_guard<std::mutex> lock(mMutex);
111 
112         if (eventTimeNs > getCurrentBucketEndTimeNs()) {
113             // Flush full buckets on the normal path up to the latest bucket boundary.
114             flushIfNeededLocked(eventTimeNs);
115         }
116         // Now flush a partial bucket.
117         flushCurrentBucketLocked(eventTimeNs, eventTimeNs);
118         // Don't update the current bucket number so that the anomaly tracker knows this bucket
119         // is a partial bucket and can merge it with the previous bucket.
120     };
121 
notifyAppRemoved(const int64_t & eventTimeNs,const string & apk,const int uid)122     void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override{
123         // Force buckets to split on removal also.
124         notifyAppUpgrade(eventTimeNs, apk, uid, 0);
125     };
126 
onUidMapReceived(const int64_t & eventTimeNs)127     void onUidMapReceived(const int64_t& eventTimeNs) override{
128             // Purposefully don't flush partial buckets on a new snapshot.
129             // This occurs if a new user is added/removed or statsd crashes.
130     };
131 
132     // Consume the parsed stats log entry that already matched the "what" of the metric.
onMatchedLogEvent(const size_t matcherIndex,const LogEvent & event)133     void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) {
134         std::lock_guard<std::mutex> lock(mMutex);
135         onMatchedLogEventLocked(matcherIndex, event);
136     }
137 
onConditionChanged(const bool condition,const int64_t eventTime)138     void onConditionChanged(const bool condition, const int64_t eventTime) {
139         std::lock_guard<std::mutex> lock(mMutex);
140         onConditionChangedLocked(condition, eventTime);
141     }
142 
onSlicedConditionMayChange(bool overallCondition,const int64_t eventTime)143     void onSlicedConditionMayChange(bool overallCondition, const int64_t eventTime) {
144         std::lock_guard<std::mutex> lock(mMutex);
145         onSlicedConditionMayChangeLocked(overallCondition, eventTime);
146     }
147 
isConditionSliced()148     bool isConditionSliced() const {
149         std::lock_guard<std::mutex> lock(mMutex);
150         return mConditionSliced;
151     };
152 
153     // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp.
154     // This method clears all the past buckets.
onDumpReport(const int64_t dumpTimeNs,const bool include_current_partial_bucket,const bool erase_data,const DumpLatency dumpLatency,std::set<string> * str_set,android::util::ProtoOutputStream * protoOutput)155     void onDumpReport(const int64_t dumpTimeNs,
156                       const bool include_current_partial_bucket,
157                       const bool erase_data,
158                       const DumpLatency dumpLatency,
159                       std::set<string> *str_set,
160                       android::util::ProtoOutputStream* protoOutput) {
161         std::lock_guard<std::mutex> lock(mMutex);
162         return onDumpReportLocked(dumpTimeNs, include_current_partial_bucket, erase_data,
163                 dumpLatency, str_set, protoOutput);
164     }
165 
clearPastBuckets(const int64_t dumpTimeNs)166     void clearPastBuckets(const int64_t dumpTimeNs) {
167         std::lock_guard<std::mutex> lock(mMutex);
168         return clearPastBucketsLocked(dumpTimeNs);
169     }
170 
dumpStates(FILE * out,bool verbose)171     void dumpStates(FILE* out, bool verbose) const {
172         std::lock_guard<std::mutex> lock(mMutex);
173         dumpStatesLocked(out, verbose);
174     }
175 
176     // Returns the memory in bytes currently used to store this metric's data. Does not change
177     // state.
byteSize()178     size_t byteSize() const {
179         std::lock_guard<std::mutex> lock(mMutex);
180         return byteSizeLocked();
181     }
182 
183     /* If alert is valid, adds an AnomalyTracker and returns it. If invalid, returns nullptr. */
addAnomalyTracker(const Alert & alert,const sp<AlarmMonitor> & anomalyAlarmMonitor)184     virtual sp<AnomalyTracker> addAnomalyTracker(const Alert &alert,
185                                                  const sp<AlarmMonitor>& anomalyAlarmMonitor) {
186         std::lock_guard<std::mutex> lock(mMutex);
187         sp<AnomalyTracker> anomalyTracker = new AnomalyTracker(alert, mConfigKey);
188         if (anomalyTracker != nullptr) {
189             mAnomalyTrackers.push_back(anomalyTracker);
190         }
191         return anomalyTracker;
192     }
193 
getBuckeSizeInNs()194     int64_t getBuckeSizeInNs() const {
195         std::lock_guard<std::mutex> lock(mMutex);
196         return mBucketSizeNs;
197     }
198 
199     // Only needed for unit-testing to override guardrail.
setBucketSize(int64_t bucketSize)200     void setBucketSize(int64_t bucketSize) {
201         mBucketSizeNs = bucketSize;
202     }
203 
getMetricId()204     inline const int64_t& getMetricId() const {
205         return mMetricId;
206     }
207 
loadActiveMetric(const ActiveMetric & activeMetric,int64_t currentTimeNs)208     void loadActiveMetric(const ActiveMetric& activeMetric, int64_t currentTimeNs) {
209         std::lock_guard<std::mutex> lock(mMutex);
210         loadActiveMetricLocked(activeMetric, currentTimeNs);
211     }
212 
213     // Let MetricProducer drop in-memory data to save memory.
214     // We still need to keep future data valid and anomaly tracking work, which means we will
215     // have to flush old data, informing anomaly trackers then safely drop old data.
216     // We still keep current bucket data for future metrics' validity.
dropData(const int64_t dropTimeNs)217     void dropData(const int64_t dropTimeNs) {
218         std::lock_guard<std::mutex> lock(mMutex);
219         dropDataLocked(dropTimeNs);
220     }
221 
222     // For test only.
getCurrentBucketNum()223     inline int64_t getCurrentBucketNum() const {
224         return mCurrentBucketNum;
225     }
226 
activate(int activationTrackerIndex,int64_t elapsedTimestampNs)227     void activate(int activationTrackerIndex, int64_t elapsedTimestampNs) {
228         std::lock_guard<std::mutex> lock(mMutex);
229         activateLocked(activationTrackerIndex, elapsedTimestampNs);
230     }
231 
cancelEventActivation(int deactivationTrackerIndex)232     void cancelEventActivation(int deactivationTrackerIndex) {
233         std::lock_guard<std::mutex> lock(mMutex);
234         cancelEventActivationLocked(deactivationTrackerIndex);
235     }
236 
isActive()237     bool isActive() const {
238         std::lock_guard<std::mutex> lock(mMutex);
239         return isActiveLocked();
240     }
241 
242     void addActivation(int activationTrackerIndex, const ActivationType& activationType,
243             int64_t ttl_seconds, int deactivationTrackerIndex = -1);
244 
prepareFirstBucket()245     void prepareFirstBucket() {
246         std::lock_guard<std::mutex> lock(mMutex);
247         prepareFirstBucketLocked();
248     }
249 
250     void flushIfExpire(int64_t elapsedTimestampNs);
251 
252     void writeActiveMetricToProtoOutputStream(
253             int64_t currentTimeNs, const DumpReportReason reason, ProtoOutputStream* proto);
254 protected:
255     virtual void onConditionChangedLocked(const bool condition, const int64_t eventTime) = 0;
256     virtual void onSlicedConditionMayChangeLocked(bool overallCondition,
257                                                   const int64_t eventTime) = 0;
258     virtual void onDumpReportLocked(const int64_t dumpTimeNs,
259                                     const bool include_current_partial_bucket,
260                                     const bool erase_data,
261                                     const DumpLatency dumpLatency,
262                                     std::set<string> *str_set,
263                                     android::util::ProtoOutputStream* protoOutput) = 0;
264     virtual void clearPastBucketsLocked(const int64_t dumpTimeNs) = 0;
265     virtual size_t byteSizeLocked() const = 0;
266     virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0;
267 
268     bool evaluateActiveStateLocked(int64_t elapsedTimestampNs);
269 
270     void activateLocked(int activationTrackerIndex, int64_t elapsedTimestampNs);
271     void cancelEventActivationLocked(int deactivationTrackerIndex);
272 
isActiveLocked()273     inline bool isActiveLocked() const {
274         return mIsActive;
275     }
276 
277     void loadActiveMetricLocked(const ActiveMetric& activeMetric, int64_t currentTimeNs);
278 
prepareFirstBucketLocked()279     virtual void prepareFirstBucketLocked() {};
280     /**
281      * Flushes the current bucket if the eventTime is after the current bucket's end time. This will
282        also flush the current partial bucket in memory.
283      */
flushIfNeededLocked(const int64_t & eventTime)284     virtual void flushIfNeededLocked(const int64_t& eventTime){};
285 
286     /**
287      * Flushes all the data including the current partial bucket.
288      */
flushLocked(const int64_t & eventTimeNs)289     virtual void flushLocked(const int64_t& eventTimeNs) {
290         flushIfNeededLocked(eventTimeNs);
291         flushCurrentBucketLocked(eventTimeNs, eventTimeNs);
292     };
293 
294     /**
295      * For metrics that aggregate (ie, every metric producer except for EventMetricProducer),
296      * we need to be able to flush the current buckets on demand (ie, end the current bucket and
297      * start new bucket). If this function is called when eventTimeNs is greater than the current
298      * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise,
299      * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket
300      * number are not changed by this function. This method should only be called by
301      * flushIfNeededLocked or flushLocked or the app upgrade handler; the caller MUST update the
302      * bucket timestamp and bucket number as needed.
303      */
flushCurrentBucketLocked(const int64_t & eventTimeNs,const int64_t & nextBucketStartTimeNs)304     virtual void flushCurrentBucketLocked(const int64_t& eventTimeNs,
305                                           const int64_t& nextBucketStartTimeNs) {};
306 
onActiveStateChangedLocked(const int64_t & eventTimeNs)307     virtual void onActiveStateChangedLocked(const int64_t& eventTimeNs) {
308         if (!mIsActive) {
309             flushLocked(eventTimeNs);
310         }
311     }
312 
313     // Convenience to compute the current bucket's end time, which is always aligned with the
314     // start time of the metric.
getCurrentBucketEndTimeNs()315     int64_t getCurrentBucketEndTimeNs() const {
316         return mTimeBaseNs + (mCurrentBucketNum + 1) * mBucketSizeNs;
317     }
318 
getBucketNumFromEndTimeNs(const int64_t endNs)319     int64_t getBucketNumFromEndTimeNs(const int64_t endNs) {
320         return (endNs - mTimeBaseNs) / mBucketSizeNs - 1;
321     }
322 
323     virtual void dropDataLocked(const int64_t dropTimeNs) = 0;
324 
325     const int64_t mMetricId;
326 
327     const ConfigKey mConfigKey;
328 
329     // The time when this metric producer was first created. The end time for the current bucket
330     // can be computed from this based on mCurrentBucketNum.
331     int64_t mTimeBaseNs;
332 
333     // Start time may not be aligned with the start of statsd if there is an app upgrade in the
334     // middle of a bucket.
335     int64_t mCurrentBucketStartTimeNs;
336 
337     // Used by anomaly detector to track which bucket we are in. This is not sent with the produced
338     // report.
339     int64_t mCurrentBucketNum;
340 
341     int64_t mBucketSizeNs;
342 
343     ConditionState mCondition;
344 
345     bool mConditionSliced;
346 
347     sp<ConditionWizard> mWizard;
348 
349     int mConditionTrackerIndex;
350 
351     vector<Matcher> mDimensionsInWhat;       // The dimensions_in_what defined in statsd_config
352     vector<Matcher> mDimensionsInCondition;  // The dimensions_in_condition defined in statsd_config
353 
354     bool mContainANYPositionInDimensionsInWhat;
355     bool mSliceByPositionALL;
356 
357     // True iff the condition dimensions equal to the sliced dimensions in the simple condition
358     // tracker. This field is always false for combinational condition trackers.
359     bool mSameConditionDimensionsInTracker;
360 
361     // True iff the metric to condition links cover all dimension fields in the condition tracker.
362     // This field is always false for combinational condition trackers.
363     bool mHasLinksToAllConditionDimensionsInTracker;
364 
365     std::vector<Metric2Condition> mMetric2ConditionLinks;
366 
367     std::vector<sp<AnomalyTracker>> mAnomalyTrackers;
368 
369     /*
370      * Individual metrics can implement their own business logic here. All pre-processing is done.
371      *
372      * [matcherIndex]: the index of the matcher which matched this event. This is interesting to
373      *                 DurationMetric, because it has start/stop/stop_all 3 matchers.
374      * [eventKey]: the extracted dimension key for the final output. if the metric doesn't have
375      *             dimensions, it will be DEFAULT_DIMENSION_KEY
376      * [conditionKey]: the keys of conditions which should be used to query the condition for this
377      *                 target event (from MetricConditionLink). This is passed to individual metrics
378      *                 because DurationMetric needs it to be cached.
379      * [condition]: whether condition is met. If condition is sliced, this is the result coming from
380      *              query with ConditionWizard; If condition is not sliced, this is the
381      *              nonSlicedCondition.
382      * [event]: the log event, just in case the metric needs its data, e.g., EventMetric.
383      */
384     virtual void onMatchedLogEventInternalLocked(
385             const size_t matcherIndex, const MetricDimensionKey& eventKey,
386             const ConditionKey& conditionKey, bool condition,
387             const LogEvent& event) = 0;
388 
389     // Consume the parsed stats log entry that already matched the "what" of the metric.
390     virtual void onMatchedLogEventLocked(const size_t matcherIndex, const LogEvent& event);
391 
392     mutable std::mutex mMutex;
393 
394     struct Activation {
ActivationActivation395         Activation(const ActivationType& activationType, const int64_t ttlNs)
396             : ttl_ns(ttlNs),
397               start_ns(0),
398               state(ActivationState::kNotActive),
399               activationType(activationType) {}
400 
401         const int64_t ttl_ns;
402         int64_t start_ns;
403         ActivationState state;
404         const ActivationType activationType;
405     };
406     // When the metric producer has multiple activations, these activations are ORed to determine
407     // whether the metric producer is ready to generate metrics.
408     std::unordered_map<int, std::shared_ptr<Activation>> mEventActivationMap;
409 
410     // Maps index of atom matcher for deactivation to a list of Activation structs.
411     std::unordered_map<int, std::vector<std::shared_ptr<Activation>>> mEventDeactivationMap;
412 
413     bool mIsActive;
414 
415     FRIEND_TEST(DurationMetricE2eTest, TestOneBucket);
416     FRIEND_TEST(DurationMetricE2eTest, TestTwoBuckets);
417     FRIEND_TEST(DurationMetricE2eTest, TestWithActivation);
418     FRIEND_TEST(DurationMetricE2eTest, TestWithCondition);
419     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedCondition);
420     FRIEND_TEST(DurationMetricE2eTest, TestWithActivationAndSlicedCondition);
421 
422     FRIEND_TEST(MetricActivationE2eTest, TestCountMetric);
423     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithOneDeactivation);
424     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoDeactivations);
425     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithSameDeactivation);
426     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoMetricsTwoDeactivations);
427 
428     FRIEND_TEST(StatsLogProcessorTest, TestActiveConfigMetricDiskWriteRead);
429     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBoot);
430     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBootMultipleActivations);
431     FRIEND_TEST(StatsLogProcessorTest,
432             TestActivationOnBootMultipleActivationsDifferentActivationTypes);
433     FRIEND_TEST(StatsLogProcessorTest, TestActivationsPersistAcrossSystemServerRestart);
434 };
435 
436 }  // namespace statsd
437 }  // namespace os
438 }  // namespace android
439 #endif  // METRIC_PRODUCER_H
440