1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef METRIC_PRODUCER_H
18 #define METRIC_PRODUCER_H
19 
20 #include <frameworks/base/cmds/statsd/src/active_config_list.pb.h>
21 #include <utils/RefBase.h>
22 
23 #include <unordered_map>
24 
25 #include "HashableDimensionKey.h"
26 #include "anomaly/AnomalyTracker.h"
27 #include "condition/ConditionWizard.h"
28 #include "config/ConfigKey.h"
29 #include "matchers/matcher_util.h"
30 #include "packages/PackageInfoListener.h"
31 #include "state/StateListener.h"
32 #include "state/StateManager.h"
33 
34 namespace android {
35 namespace os {
36 namespace statsd {
37 
38 // Keep this in sync with DumpReportReason enum in stats_log.proto
39 enum DumpReportReason {
40     DEVICE_SHUTDOWN = 1,
41     CONFIG_UPDATED = 2,
42     CONFIG_REMOVED = 3,
43     GET_DATA_CALLED = 4,
44     ADB_DUMP = 5,
45     CONFIG_RESET = 6,
46     STATSCOMPANION_DIED = 7,
47     TERMINATION_SIGNAL_RECEIVED = 8
48 };
49 
50 // If the metric has no activation requirement, it will be active once the metric producer is
51 // created.
52 // If the metric needs to be activated by atoms, the metric producer will start
53 // with kNotActive state, turn to kActive or kActiveOnBoot when the activation event arrives, become
54 // kNotActive when it reaches the duration limit (timebomb). If the activation event arrives again
55 // before or after it expires, the event producer will be re-activated and ttl will be reset.
56 enum ActivationState {
57     kNotActive = 0,
58     kActive = 1,
59     kActiveOnBoot = 2,
60 };
61 
62 enum DumpLatency {
63     // In some cases, we only have a short time range to do the dump, e.g. statsd is being killed.
64     // We might be able to return all the data in this mode. For instance, pull metrics might need
65     // to be pulled when the current bucket is requested.
66     FAST = 1,
67     // In other cases, it is fine for a dump to take more than a few milliseconds, e.g. config
68     // updates.
69     NO_TIME_CONSTRAINTS = 2
70 };
71 
72 // Keep this in sync with BucketDropReason enum in stats_log.proto
73 enum BucketDropReason {
74     // For ValueMetric, a bucket is dropped during a dump report request iff
75     // current bucket should be included, a pull is needed (pulled metric and
76     // condition is true), and we are under fast time constraints.
77     DUMP_REPORT_REQUESTED = 1,
78     EVENT_IN_WRONG_BUCKET = 2,
79     CONDITION_UNKNOWN = 3,
80     PULL_FAILED = 4,
81     PULL_DELAYED = 5,
82     DIMENSION_GUARDRAIL_REACHED = 6,
83     MULTIPLE_BUCKETS_SKIPPED = 7,
84     // Not an invalid bucket case, but the bucket is dropped.
85     BUCKET_TOO_SMALL = 8,
86     // Not an invalid bucket case, but the bucket is skipped.
87     NO_DATA = 9
88 };
89 
90 struct Activation {
ActivationActivation91     Activation(const ActivationType& activationType, const int64_t ttlNs)
92         : ttl_ns(ttlNs),
93           start_ns(0),
94           state(ActivationState::kNotActive),
95           activationType(activationType) {}
96 
97     const int64_t ttl_ns;
98     int64_t start_ns;
99     ActivationState state;
100     const ActivationType activationType;
101 };
102 
103 struct DropEvent {
104     // Reason for dropping the bucket and/or marking the bucket invalid.
105     BucketDropReason reason;
106     // The timestamp of the drop event.
107     int64_t dropTimeNs;
108 };
109 
110 struct SkippedBucket {
111     // Start time of the dropped bucket.
112     int64_t bucketStartTimeNs;
113     // End time of the dropped bucket.
114     int64_t bucketEndTimeNs;
115     // List of events that invalidated this bucket.
116     std::vector<DropEvent> dropEvents;
117 
resetSkippedBucket118     void reset() {
119         bucketStartTimeNs = 0;
120         bucketEndTimeNs = 0;
121         dropEvents.clear();
122     }
123 };
124 
125 // A MetricProducer is responsible for compute one single metrics, creating stats log report, and
126 // writing the report to dropbox. MetricProducers should respond to package changes as required in
127 // PackageInfoListener, but if none of the metrics are slicing by package name, then the update can
128 // be a no-op.
129 class MetricProducer : public virtual android::RefBase, public virtual StateListener {
130 public:
131     MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs,
132                    const int conditionIndex, const vector<ConditionState>& initialConditionCache,
133                    const sp<ConditionWizard>& wizard,
134                    const std::unordered_map<int, std::shared_ptr<Activation>>& eventActivationMap,
135                    const std::unordered_map<int, std::vector<std::shared_ptr<Activation>>>&
136                            eventDeactivationMap,
137                    const vector<int>& slicedStateAtoms,
138                    const unordered_map<int, unordered_map<int, int64_t>>& stateGroupMap);
139 
~MetricProducer()140     virtual ~MetricProducer(){};
141 
initialCondition(const int conditionIndex,const vector<ConditionState> & initialConditionCache)142     ConditionState initialCondition(const int conditionIndex,
143                                     const vector<ConditionState>& initialConditionCache) const {
144         return conditionIndex >= 0 ? initialConditionCache[conditionIndex] : ConditionState::kTrue;
145     }
146 
147     /**
148      * Force a partial bucket split on app upgrade
149      */
notifyAppUpgrade(const int64_t & eventTimeNs)150     virtual void notifyAppUpgrade(const int64_t& eventTimeNs) {
151         std::lock_guard<std::mutex> lock(mMutex);
152         flushLocked(eventTimeNs);
153     };
154 
notifyAppRemoved(const int64_t & eventTimeNs)155     void notifyAppRemoved(const int64_t& eventTimeNs) {
156         // Force buckets to split on removal also.
157         notifyAppUpgrade(eventTimeNs);
158     };
159 
160     /**
161      * Force a partial bucket split on boot complete.
162      */
onStatsdInitCompleted(const int64_t & eventTimeNs)163     virtual void onStatsdInitCompleted(const int64_t& eventTimeNs) {
164         std::lock_guard<std::mutex> lock(mMutex);
165         flushLocked(eventTimeNs);
166     }
167     // Consume the parsed stats log entry that already matched the "what" of the metric.
onMatchedLogEvent(const size_t matcherIndex,const LogEvent & event)168     void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) {
169         std::lock_guard<std::mutex> lock(mMutex);
170         onMatchedLogEventLocked(matcherIndex, event);
171     }
172 
onConditionChanged(const bool condition,const int64_t eventTime)173     void onConditionChanged(const bool condition, const int64_t eventTime) {
174         std::lock_guard<std::mutex> lock(mMutex);
175         onConditionChangedLocked(condition, eventTime);
176     }
177 
onSlicedConditionMayChange(bool overallCondition,const int64_t eventTime)178     void onSlicedConditionMayChange(bool overallCondition, const int64_t eventTime) {
179         std::lock_guard<std::mutex> lock(mMutex);
180         onSlicedConditionMayChangeLocked(overallCondition, eventTime);
181     }
182 
isConditionSliced()183     bool isConditionSliced() const {
184         std::lock_guard<std::mutex> lock(mMutex);
185         return mConditionSliced;
186     };
187 
onStateChanged(const int64_t eventTimeNs,const int32_t atomId,const HashableDimensionKey & primaryKey,const FieldValue & oldState,const FieldValue & newState)188     void onStateChanged(const int64_t eventTimeNs, const int32_t atomId,
189                         const HashableDimensionKey& primaryKey, const FieldValue& oldState,
190                         const FieldValue& newState){};
191 
192     // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp.
193     // This method clears all the past buckets.
onDumpReport(const int64_t dumpTimeNs,const bool include_current_partial_bucket,const bool erase_data,const DumpLatency dumpLatency,std::set<string> * str_set,android::util::ProtoOutputStream * protoOutput)194     void onDumpReport(const int64_t dumpTimeNs,
195                       const bool include_current_partial_bucket,
196                       const bool erase_data,
197                       const DumpLatency dumpLatency,
198                       std::set<string> *str_set,
199                       android::util::ProtoOutputStream* protoOutput) {
200         std::lock_guard<std::mutex> lock(mMutex);
201         return onDumpReportLocked(dumpTimeNs, include_current_partial_bucket, erase_data,
202                 dumpLatency, str_set, protoOutput);
203     }
204 
clearPastBuckets(const int64_t dumpTimeNs)205     void clearPastBuckets(const int64_t dumpTimeNs) {
206         std::lock_guard<std::mutex> lock(mMutex);
207         return clearPastBucketsLocked(dumpTimeNs);
208     }
209 
prepareFirstBucket()210     void prepareFirstBucket() {
211         std::lock_guard<std::mutex> lock(mMutex);
212         prepareFirstBucketLocked();
213     }
214 
215     // Returns the memory in bytes currently used to store this metric's data. Does not change
216     // state.
byteSize()217     size_t byteSize() const {
218         std::lock_guard<std::mutex> lock(mMutex);
219         return byteSizeLocked();
220     }
221 
dumpStates(FILE * out,bool verbose)222     void dumpStates(FILE* out, bool verbose) const {
223         std::lock_guard<std::mutex> lock(mMutex);
224         dumpStatesLocked(out, verbose);
225     }
226 
227     // Let MetricProducer drop in-memory data to save memory.
228     // We still need to keep future data valid and anomaly tracking work, which means we will
229     // have to flush old data, informing anomaly trackers then safely drop old data.
230     // We still keep current bucket data for future metrics' validity.
dropData(const int64_t dropTimeNs)231     void dropData(const int64_t dropTimeNs) {
232         std::lock_guard<std::mutex> lock(mMutex);
233         dropDataLocked(dropTimeNs);
234     }
235 
loadActiveMetric(const ActiveMetric & activeMetric,int64_t currentTimeNs)236     void loadActiveMetric(const ActiveMetric& activeMetric, int64_t currentTimeNs) {
237         std::lock_guard<std::mutex> lock(mMutex);
238         loadActiveMetricLocked(activeMetric, currentTimeNs);
239     }
240 
activate(int activationTrackerIndex,int64_t elapsedTimestampNs)241     void activate(int activationTrackerIndex, int64_t elapsedTimestampNs) {
242         std::lock_guard<std::mutex> lock(mMutex);
243         activateLocked(activationTrackerIndex, elapsedTimestampNs);
244     }
245 
cancelEventActivation(int deactivationTrackerIndex)246     void cancelEventActivation(int deactivationTrackerIndex) {
247         std::lock_guard<std::mutex> lock(mMutex);
248         cancelEventActivationLocked(deactivationTrackerIndex);
249     }
250 
isActive()251     bool isActive() const {
252         std::lock_guard<std::mutex> lock(mMutex);
253         return isActiveLocked();
254     }
255 
256     void flushIfExpire(int64_t elapsedTimestampNs);
257 
258     void writeActiveMetricToProtoOutputStream(
259             int64_t currentTimeNs, const DumpReportReason reason, ProtoOutputStream* proto);
260 
261     // Start: getters/setters
getMetricId()262     inline const int64_t& getMetricId() const {
263         return mMetricId;
264     }
265 
266     // For test only.
getCurrentBucketNum()267     inline int64_t getCurrentBucketNum() const {
268         return mCurrentBucketNum;
269     }
270 
getBucketSizeInNs()271     int64_t getBucketSizeInNs() const {
272         std::lock_guard<std::mutex> lock(mMutex);
273         return mBucketSizeNs;
274     }
275 
getSlicedStateAtoms()276     inline const std::vector<int> getSlicedStateAtoms() {
277         std::lock_guard<std::mutex> lock(mMutex);
278         return mSlicedStateAtoms;
279     }
280 
281     /* If alert is valid, adds an AnomalyTracker and returns it. If invalid, returns nullptr. */
addAnomalyTracker(const Alert & alert,const sp<AlarmMonitor> & anomalyAlarmMonitor)282     virtual sp<AnomalyTracker> addAnomalyTracker(const Alert &alert,
283                                                  const sp<AlarmMonitor>& anomalyAlarmMonitor) {
284         std::lock_guard<std::mutex> lock(mMutex);
285         sp<AnomalyTracker> anomalyTracker = new AnomalyTracker(alert, mConfigKey);
286         if (anomalyTracker != nullptr) {
287             mAnomalyTrackers.push_back(anomalyTracker);
288         }
289         return anomalyTracker;
290     }
291     // End: getters/setters
292 protected:
293     /**
294      * Flushes the current bucket if the eventTime is after the current bucket's end time.
295      */
flushIfNeededLocked(const int64_t & eventTime)296     virtual void flushIfNeededLocked(const int64_t& eventTime){};
297 
298     /**
299      * For metrics that aggregate (ie, every metric producer except for EventMetricProducer),
300      * we need to be able to flush the current buckets on demand (ie, end the current bucket and
301      * start new bucket). If this function is called when eventTimeNs is greater than the current
302      * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise,
303      * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket
304      * number are not changed by this function. This method should only be called by
305      * flushIfNeededLocked or flushLocked or the app upgrade handler; the caller MUST update the
306      * bucket timestamp and bucket number as needed.
307      */
flushCurrentBucketLocked(const int64_t & eventTimeNs,const int64_t & nextBucketStartTimeNs)308     virtual void flushCurrentBucketLocked(const int64_t& eventTimeNs,
309                                           const int64_t& nextBucketStartTimeNs) {};
310 
311     /**
312      * Flushes all the data including the current partial bucket.
313      */
flushLocked(const int64_t & eventTimeNs)314     virtual void flushLocked(const int64_t& eventTimeNs) {
315         flushIfNeededLocked(eventTimeNs);
316         flushCurrentBucketLocked(eventTimeNs, eventTimeNs);
317     };
318 
319     /*
320      * Individual metrics can implement their own business logic here. All pre-processing is done.
321      *
322      * [matcherIndex]: the index of the matcher which matched this event. This is interesting to
323      *                 DurationMetric, because it has start/stop/stop_all 3 matchers.
324      * [eventKey]: the extracted dimension key for the final output. if the metric doesn't have
325      *             dimensions, it will be DEFAULT_DIMENSION_KEY
326      * [conditionKey]: the keys of conditions which should be used to query the condition for this
327      *                 target event (from MetricConditionLink). This is passed to individual metrics
328      *                 because DurationMetric needs it to be cached.
329      * [condition]: whether condition is met. If condition is sliced, this is the result coming from
330      *              query with ConditionWizard; If condition is not sliced, this is the
331      *              nonSlicedCondition.
332      * [event]: the log event, just in case the metric needs its data, e.g., EventMetric.
333      */
334     virtual void onMatchedLogEventInternalLocked(
335             const size_t matcherIndex, const MetricDimensionKey& eventKey,
336             const ConditionKey& conditionKey, bool condition, const LogEvent& event,
337             const map<int, HashableDimensionKey>& statePrimaryKeys) = 0;
338 
339     // Consume the parsed stats log entry that already matched the "what" of the metric.
340     virtual void onMatchedLogEventLocked(const size_t matcherIndex, const LogEvent& event);
341     virtual void onConditionChangedLocked(const bool condition, const int64_t eventTime) = 0;
342     virtual void onSlicedConditionMayChangeLocked(bool overallCondition,
343                                                   const int64_t eventTime) = 0;
344     virtual void onDumpReportLocked(const int64_t dumpTimeNs,
345                                     const bool include_current_partial_bucket,
346                                     const bool erase_data,
347                                     const DumpLatency dumpLatency,
348                                     std::set<string> *str_set,
349                                     android::util::ProtoOutputStream* protoOutput) = 0;
350     virtual void clearPastBucketsLocked(const int64_t dumpTimeNs) = 0;
prepareFirstBucketLocked()351     virtual void prepareFirstBucketLocked(){};
352     virtual size_t byteSizeLocked() const = 0;
353     virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0;
354     virtual void dropDataLocked(const int64_t dropTimeNs) = 0;
355     void loadActiveMetricLocked(const ActiveMetric& activeMetric, int64_t currentTimeNs);
356     void activateLocked(int activationTrackerIndex, int64_t elapsedTimestampNs);
357     void cancelEventActivationLocked(int deactivationTrackerIndex);
358 
359     bool evaluateActiveStateLocked(int64_t elapsedTimestampNs);
360 
onActiveStateChangedLocked(const int64_t & eventTimeNs)361     virtual void onActiveStateChangedLocked(const int64_t& eventTimeNs) {
362         if (!mIsActive) {
363             flushLocked(eventTimeNs);
364         }
365     }
366 
isActiveLocked()367     inline bool isActiveLocked() const {
368         return mIsActive;
369     }
370 
371     // Convenience to compute the current bucket's end time, which is always aligned with the
372     // start time of the metric.
getCurrentBucketEndTimeNs()373     int64_t getCurrentBucketEndTimeNs() const {
374         return mTimeBaseNs + (mCurrentBucketNum + 1) * mBucketSizeNs;
375     }
376 
getBucketNumFromEndTimeNs(const int64_t endNs)377     int64_t getBucketNumFromEndTimeNs(const int64_t endNs) {
378         return (endNs - mTimeBaseNs) / mBucketSizeNs - 1;
379     }
380 
381     // Query StateManager for original state value using the queryKey.
382     // The field and value are output.
383     void queryStateValue(const int32_t atomId, const HashableDimensionKey& queryKey,
384                          FieldValue* value);
385 
386     // If a state map exists for the given atom, replace the original state
387     // value with the group id mapped to the value.
388     // If no state map exists, keep the original state value.
389     void mapStateValue(const int32_t atomId, FieldValue* value);
390 
391     // Returns a HashableDimensionKey with unknown state value for each state
392     // atom.
393     HashableDimensionKey getUnknownStateKey();
394 
395     DropEvent buildDropEvent(const int64_t dropTimeNs, const BucketDropReason reason);
396 
397     // Returns true if the number of drop events in the current bucket has
398     // exceeded the maximum number allowed, which is currently capped at 10.
399     bool maxDropEventsReached();
400 
401     const int64_t mMetricId;
402 
403     const ConfigKey mConfigKey;
404 
405     // The time when this metric producer was first created. The end time for the current bucket
406     // can be computed from this based on mCurrentBucketNum.
407     int64_t mTimeBaseNs;
408 
409     // Start time may not be aligned with the start of statsd if there is an app upgrade in the
410     // middle of a bucket.
411     int64_t mCurrentBucketStartTimeNs;
412 
413     // Used by anomaly detector to track which bucket we are in. This is not sent with the produced
414     // report.
415     int64_t mCurrentBucketNum;
416 
417     int64_t mBucketSizeNs;
418 
419     ConditionState mCondition;
420 
421     int mConditionTrackerIndex;
422 
423     bool mConditionSliced;
424 
425     sp<ConditionWizard> mWizard;
426 
427     bool mContainANYPositionInDimensionsInWhat;
428 
429     bool mSliceByPositionALL;
430 
431     vector<Matcher> mDimensionsInWhat;  // The dimensions_in_what defined in statsd_config
432 
433     // True iff the metric to condition links cover all dimension fields in the condition tracker.
434     // This field is always false for combinational condition trackers.
435     bool mHasLinksToAllConditionDimensionsInTracker;
436 
437     std::vector<Metric2Condition> mMetric2ConditionLinks;
438 
439     std::vector<sp<AnomalyTracker>> mAnomalyTrackers;
440 
441     mutable std::mutex mMutex;
442 
443     // When the metric producer has multiple activations, these activations are ORed to determine
444     // whether the metric producer is ready to generate metrics.
445     std::unordered_map<int, std::shared_ptr<Activation>> mEventActivationMap;
446 
447     // Maps index of atom matcher for deactivation to a list of Activation structs.
448     std::unordered_map<int, std::vector<std::shared_ptr<Activation>>> mEventDeactivationMap;
449 
450     bool mIsActive;
451 
452     // The slice_by_state atom ids defined in statsd_config.
453     const std::vector<int32_t> mSlicedStateAtoms;
454 
455     // Maps atom ids and state values to group_ids (<atom_id, <value, group_id>>).
456     const std::unordered_map<int32_t, std::unordered_map<int, int64_t>> mStateGroupMap;
457 
458     // MetricStateLinks defined in statsd_config that link fields in the state
459     // atom to fields in the "what" atom.
460     std::vector<Metric2State> mMetric2StateLinks;
461 
462     SkippedBucket mCurrentSkippedBucket;
463     // Buckets that were invalidated and had their data dropped.
464     std::vector<SkippedBucket> mSkippedBuckets;
465 
466     FRIEND_TEST(CountMetricE2eTest, TestSlicedState);
467     FRIEND_TEST(CountMetricE2eTest, TestSlicedStateWithMap);
468     FRIEND_TEST(CountMetricE2eTest, TestMultipleSlicedStates);
469     FRIEND_TEST(CountMetricE2eTest, TestSlicedStateWithPrimaryFields);
470     FRIEND_TEST(CountMetricE2eTest, TestInitialConditionChanges);
471 
472     FRIEND_TEST(DurationMetricE2eTest, TestOneBucket);
473     FRIEND_TEST(DurationMetricE2eTest, TestTwoBuckets);
474     FRIEND_TEST(DurationMetricE2eTest, TestWithActivation);
475     FRIEND_TEST(DurationMetricE2eTest, TestWithCondition);
476     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedCondition);
477     FRIEND_TEST(DurationMetricE2eTest, TestWithActivationAndSlicedCondition);
478     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedState);
479     FRIEND_TEST(DurationMetricE2eTest, TestWithConditionAndSlicedState);
480     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedStateMapped);
481     FRIEND_TEST(DurationMetricE2eTest, TestSlicedStatePrimaryFieldsNotSubsetDimInWhat);
482     FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedStatePrimaryFieldsSubset);
483 
484     FRIEND_TEST(MetricActivationE2eTest, TestCountMetric);
485     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithOneDeactivation);
486     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoDeactivations);
487     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithSameDeactivation);
488     FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoMetricsTwoDeactivations);
489 
490     FRIEND_TEST(StatsLogProcessorTest, TestActiveConfigMetricDiskWriteRead);
491     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBoot);
492     FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBootMultipleActivations);
493     FRIEND_TEST(StatsLogProcessorTest,
494             TestActivationOnBootMultipleActivationsDifferentActivationTypes);
495     FRIEND_TEST(StatsLogProcessorTest, TestActivationsPersistAcrossSystemServerRestart);
496 
497     FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState);
498     FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState_WithDimensions);
499     FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState_WithIncorrectDimensions);
500     FRIEND_TEST(ValueMetricE2eTest, TestInitialConditionChanges);
501 
502     FRIEND_TEST(MetricsManagerTest, TestInitialConditions);
503 };
504 
505 }  // namespace statsd
506 }  // namespace os
507 }  // namespace android
508 #endif  // METRIC_PRODUCER_H
509