1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #define DEBUG false
17 #include "Log.h"
18 #include "OringDurationTracker.h"
19 #include "guardrail/StatsdStats.h"
20 
21 namespace android {
22 namespace os {
23 namespace statsd {
24 
25 using std::pair;
26 
OringDurationTracker(const ConfigKey & key,const int64_t & id,const MetricDimensionKey & eventKey,sp<ConditionWizard> wizard,int conditionIndex,bool nesting,int64_t currentBucketStartNs,int64_t currentBucketNum,int64_t startTimeNs,int64_t bucketSizeNs,bool conditionSliced,bool fullLink,const vector<sp<DurationAnomalyTracker>> & anomalyTrackers)27 OringDurationTracker::OringDurationTracker(
28         const ConfigKey& key, const int64_t& id, const MetricDimensionKey& eventKey,
29         sp<ConditionWizard> wizard, int conditionIndex, bool nesting, int64_t currentBucketStartNs,
30         int64_t currentBucketNum, int64_t startTimeNs, int64_t bucketSizeNs, bool conditionSliced,
31         bool fullLink, const vector<sp<DurationAnomalyTracker>>& anomalyTrackers)
32     : DurationTracker(key, id, eventKey, wizard, conditionIndex, nesting, currentBucketStartNs,
33                       currentBucketNum, startTimeNs, bucketSizeNs, conditionSliced, fullLink,
34                       anomalyTrackers),
35       mStarted(),
36       mPaused() {
37     mLastStartTime = 0;
38 }
39 
hitGuardRail(const HashableDimensionKey & newKey)40 bool OringDurationTracker::hitGuardRail(const HashableDimensionKey& newKey) {
41     // ===========GuardRail==============
42     // 1. Report the tuple count if the tuple count > soft limit
43     if (mConditionKeyMap.find(newKey) != mConditionKeyMap.end()) {
44         return false;
45     }
46     if (mConditionKeyMap.size() > StatsdStats::kDimensionKeySizeSoftLimit - 1) {
47         size_t newTupleCount = mConditionKeyMap.size() + 1;
48         StatsdStats::getInstance().noteMetricDimensionSize(mConfigKey, mTrackerId, newTupleCount);
49         // 2. Don't add more tuples, we are above the allowed threshold. Drop the data.
50         if (newTupleCount > StatsdStats::kDimensionKeySizeHardLimit) {
51             ALOGE("OringDurTracker %lld dropping data for dimension key %s",
52                 (long long)mTrackerId, newKey.toString().c_str());
53             return true;
54         }
55     }
56     return false;
57 }
58 
noteStart(const HashableDimensionKey & key,bool condition,const int64_t eventTime,const ConditionKey & conditionKey)59 void OringDurationTracker::noteStart(const HashableDimensionKey& key, bool condition,
60                                      const int64_t eventTime, const ConditionKey& conditionKey) {
61     if (hitGuardRail(key)) {
62         return;
63     }
64     if (condition) {
65         if (mStarted.size() == 0) {
66             mLastStartTime = eventTime;
67             VLOG("record first start....");
68             startAnomalyAlarm(eventTime);
69         }
70         mStarted[key]++;
71     } else {
72         mPaused[key]++;
73     }
74 
75     if (mConditionSliced && mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
76         mConditionKeyMap[key] = conditionKey;
77     }
78     VLOG("Oring: %s start, condition %d", key.toString().c_str(), condition);
79 }
80 
noteStop(const HashableDimensionKey & key,const int64_t timestamp,const bool stopAll)81 void OringDurationTracker::noteStop(const HashableDimensionKey& key, const int64_t timestamp,
82                                     const bool stopAll) {
83     VLOG("Oring: %s stop", key.toString().c_str());
84     auto it = mStarted.find(key);
85     if (it != mStarted.end()) {
86         (it->second)--;
87         if (stopAll || !mNested || it->second <= 0) {
88             mStarted.erase(it);
89             mConditionKeyMap.erase(key);
90         }
91         if (mStarted.empty()) {
92             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
93                     (timestamp - mLastStartTime);
94             detectAndDeclareAnomaly(
95                     timestamp, mCurrentBucketNum,
96                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
97             VLOG("record duration %lld, total duration %lld for state key %s",
98                  (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
99                  mEventKey.getStateValuesKey().toString().c_str());
100         }
101     }
102 
103     auto pausedIt = mPaused.find(key);
104     if (pausedIt != mPaused.end()) {
105         (pausedIt->second)--;
106         if (stopAll || !mNested || pausedIt->second <= 0) {
107             mPaused.erase(pausedIt);
108             mConditionKeyMap.erase(key);
109         }
110     }
111     if (mStarted.empty()) {
112         stopAnomalyAlarm(timestamp);
113     }
114 }
115 
noteStopAll(const int64_t timestamp)116 void OringDurationTracker::noteStopAll(const int64_t timestamp) {
117     if (!mStarted.empty()) {
118         mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
119                 (timestamp - mLastStartTime);
120         VLOG("Oring Stop all: record duration %lld, total duration %lld for state key %s",
121              (long long)timestamp - mLastStartTime, (long long)getCurrentStateKeyDuration(),
122              mEventKey.getStateValuesKey().toString().c_str());
123         detectAndDeclareAnomaly(
124                 timestamp, mCurrentBucketNum,
125                 getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
126     }
127 
128     stopAnomalyAlarm(timestamp);
129     mStarted.clear();
130     mPaused.clear();
131     mConditionKeyMap.clear();
132 }
133 
flushCurrentBucket(const int64_t & eventTimeNs,std::unordered_map<MetricDimensionKey,std::vector<DurationBucket>> * output)134 bool OringDurationTracker::flushCurrentBucket(
135         const int64_t& eventTimeNs,
136         std::unordered_map<MetricDimensionKey, std::vector<DurationBucket>>* output) {
137     VLOG("OringDurationTracker Flushing.............");
138 
139     // Note that we have to mimic the bucket time changes we do in the
140     // MetricProducer#notifyAppUpgrade.
141 
142     int numBucketsForward = 0;
143     int64_t fullBucketEnd = getCurrentBucketEndTimeNs();
144     int64_t currentBucketEndTimeNs;
145 
146     if (eventTimeNs >= fullBucketEnd) {
147         numBucketsForward = 1 + (eventTimeNs - fullBucketEnd) / mBucketSizeNs;
148         currentBucketEndTimeNs = fullBucketEnd;
149     } else {
150         // This must be a partial bucket.
151         currentBucketEndTimeNs = eventTimeNs;
152     }
153 
154     // Process the current bucket.
155     if (mStarted.size() > 0) {
156         // Calculate the duration for the current state key.
157         mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
158                 (currentBucketEndTimeNs - mLastStartTime);
159     }
160     // Store DurationBucket info for each whatKey, stateKey pair.
161     // Note: The whatKey stored in mEventKey is constant for each DurationTracker, while the
162     // stateKey stored in mEventKey is only the current stateKey. mStateKeyDurationMap is used to
163     // store durations for each stateKey, so we need to flush the bucket by creating a
164     // DurationBucket for each stateKey.
165     for (auto& durationIt : mStateKeyDurationMap) {
166         if (durationIt.second.mDuration > 0) {
167             DurationBucket current_info;
168             current_info.mBucketStartNs = mCurrentBucketStartTimeNs;
169             current_info.mBucketEndNs = currentBucketEndTimeNs;
170             current_info.mDuration = durationIt.second.mDuration;
171             (*output)[MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first)]
172                     .push_back(current_info);
173 
174             durationIt.second.mDurationFullBucket += durationIt.second.mDuration;
175             VLOG("  duration: %lld", (long long)current_info.mDuration);
176         }
177 
178         if (eventTimeNs > fullBucketEnd) {
179             // End of full bucket, can send to anomaly tracker now.
180             addPastBucketToAnomalyTrackers(
181                     MetricDimensionKey(mEventKey.getDimensionKeyInWhat(), durationIt.first),
182                     getCurrentStateKeyFullBucketDuration(), mCurrentBucketNum);
183             durationIt.second.mDurationFullBucket = 0;
184         }
185         durationIt.second.mDuration = 0;
186     }
187 
188     if (mStarted.size() > 0) {
189         for (int i = 1; i < numBucketsForward; i++) {
190             DurationBucket info;
191             info.mBucketStartNs = fullBucketEnd + mBucketSizeNs * (i - 1);
192             info.mBucketEndNs = info.mBucketStartNs + mBucketSizeNs;
193             info.mDuration = mBucketSizeNs;
194             // Full duration buckets are attributed to the current stateKey.
195             (*output)[mEventKey].push_back(info);
196             // Safe to send these buckets to anomaly tracker since they must be full buckets.
197             // If it's a partial bucket, numBucketsForward would be 0.
198             addPastBucketToAnomalyTrackers(mEventKey, info.mDuration, mCurrentBucketNum + i);
199             VLOG("  add filling bucket with duration %lld", (long long)info.mDuration);
200         }
201     } else {
202         if (numBucketsForward >= 2) {
203             addPastBucketToAnomalyTrackers(mEventKey, 0, mCurrentBucketNum + numBucketsForward - 1);
204         }
205     }
206 
207     if (numBucketsForward > 0) {
208         mCurrentBucketStartTimeNs = fullBucketEnd + (numBucketsForward - 1) * mBucketSizeNs;
209         mCurrentBucketNum += numBucketsForward;
210     } else {  // We must be forming a partial bucket.
211         mCurrentBucketStartTimeNs = eventTimeNs;
212     }
213     mLastStartTime = mCurrentBucketStartTimeNs;
214 
215     // if all stopped, then tell owner it's safe to remove this tracker.
216     return mStarted.empty() && mPaused.empty();
217 }
218 
flushIfNeeded(int64_t eventTimeNs,unordered_map<MetricDimensionKey,vector<DurationBucket>> * output)219 bool OringDurationTracker::flushIfNeeded(
220         int64_t eventTimeNs, unordered_map<MetricDimensionKey, vector<DurationBucket>>* output) {
221     if (eventTimeNs < getCurrentBucketEndTimeNs()) {
222         return false;
223     }
224     return flushCurrentBucket(eventTimeNs, output);
225 }
226 
onSlicedConditionMayChange(bool overallCondition,const int64_t timestamp)227 void OringDurationTracker::onSlicedConditionMayChange(bool overallCondition,
228                                                       const int64_t timestamp) {
229     vector<pair<HashableDimensionKey, int>> startedToPaused;
230     vector<pair<HashableDimensionKey, int>> pausedToStarted;
231     if (!mStarted.empty()) {
232         for (auto it = mStarted.begin(); it != mStarted.end();) {
233             const auto& key = it->first;
234             const auto& condIt = mConditionKeyMap.find(key);
235             if (condIt == mConditionKeyMap.end()) {
236                 VLOG("Key %s dont have condition key", key.toString().c_str());
237                 ++it;
238                 continue;
239             }
240             ConditionState conditionState =
241                 mWizard->query(mConditionTrackerIndex, condIt->second,
242                                !mHasLinksToAllConditionDimensionsInTracker);
243             if (conditionState != ConditionState::kTrue) {
244                 startedToPaused.push_back(*it);
245                 it = mStarted.erase(it);
246                 VLOG("Key %s started -> paused", key.toString().c_str());
247             } else {
248                 ++it;
249             }
250         }
251 
252         if (mStarted.empty()) {
253             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
254                     (timestamp - mLastStartTime);
255             VLOG("record duration %lld, total duration %lld for state key %s",
256                  (long long)(timestamp - mLastStartTime), (long long)getCurrentStateKeyDuration(),
257                  mEventKey.getStateValuesKey().toString().c_str());
258             detectAndDeclareAnomaly(
259                     timestamp, mCurrentBucketNum,
260                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
261         }
262     }
263 
264     if (!mPaused.empty()) {
265         for (auto it = mPaused.begin(); it != mPaused.end();) {
266             const auto& key = it->first;
267             if (mConditionKeyMap.find(key) == mConditionKeyMap.end()) {
268                 VLOG("Key %s dont have condition key", key.toString().c_str());
269                 ++it;
270                 continue;
271             }
272             ConditionState conditionState =
273                 mWizard->query(mConditionTrackerIndex, mConditionKeyMap[key],
274                                !mHasLinksToAllConditionDimensionsInTracker);
275             if (conditionState == ConditionState::kTrue) {
276                 pausedToStarted.push_back(*it);
277                 it = mPaused.erase(it);
278                 VLOG("Key %s paused -> started", key.toString().c_str());
279             } else {
280                 ++it;
281             }
282         }
283 
284         if (mStarted.empty() && pausedToStarted.size() > 0) {
285             mLastStartTime = timestamp;
286         }
287     }
288 
289     if (mStarted.empty() && !pausedToStarted.empty()) {
290         startAnomalyAlarm(timestamp);
291     }
292     mStarted.insert(pausedToStarted.begin(), pausedToStarted.end());
293     mPaused.insert(startedToPaused.begin(), startedToPaused.end());
294 
295     if (mStarted.empty()) {
296         stopAnomalyAlarm(timestamp);
297     }
298 }
299 
onConditionChanged(bool condition,const int64_t timestamp)300 void OringDurationTracker::onConditionChanged(bool condition, const int64_t timestamp) {
301     if (condition) {
302         if (!mPaused.empty()) {
303             VLOG("Condition true, all started");
304             if (mStarted.empty()) {
305                 mLastStartTime = timestamp;
306             }
307             if (mStarted.empty() && !mPaused.empty()) {
308                 startAnomalyAlarm(timestamp);
309             }
310             mStarted.insert(mPaused.begin(), mPaused.end());
311             mPaused.clear();
312         }
313     } else {
314         if (!mStarted.empty()) {
315             VLOG("Condition false, all paused");
316             mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration +=
317                     (timestamp - mLastStartTime);
318             mPaused.insert(mStarted.begin(), mStarted.end());
319             mStarted.clear();
320             detectAndDeclareAnomaly(
321                     timestamp, mCurrentBucketNum,
322                     getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration());
323         }
324     }
325     if (mStarted.empty()) {
326         stopAnomalyAlarm(timestamp);
327     }
328 }
329 
onStateChanged(const int64_t timestamp,const int32_t atomId,const FieldValue & newState)330 void OringDurationTracker::onStateChanged(const int64_t timestamp, const int32_t atomId,
331                                           const FieldValue& newState) {
332     // Nothing needs to be done on a state change if we have not seen a start
333     // event, the metric is currently not active, or condition is false.
334     // For these cases, no keys are being tracked in mStarted, so update
335     // the current state key and return.
336     if (mStarted.empty()) {
337         updateCurrentStateKey(atomId, newState);
338         return;
339     }
340     // Add the current duration length to the previous state key and then update
341     // the last start time and current state key.
342     mStateKeyDurationMap[mEventKey.getStateValuesKey()].mDuration += (timestamp - mLastStartTime);
343     mLastStartTime = timestamp;
344     updateCurrentStateKey(atomId, newState);
345 }
346 
predictAnomalyTimestampNs(const DurationAnomalyTracker & anomalyTracker,const int64_t eventTimestampNs) const347 int64_t OringDurationTracker::predictAnomalyTimestampNs(
348         const DurationAnomalyTracker& anomalyTracker, const int64_t eventTimestampNs) const {
349 
350     // The anomaly threshold.
351     const int64_t thresholdNs = anomalyTracker.getAnomalyThreshold();
352 
353     // The timestamp of the current bucket end.
354     const int64_t currentBucketEndNs = getCurrentBucketEndTimeNs();
355 
356     // The past duration ns for the current bucket of the current stateKey.
357     int64_t currentStateBucketPastNs =
358             getCurrentStateKeyDuration() + getCurrentStateKeyFullBucketDuration();
359 
360     // As we move into the future, old buckets get overwritten (so their old data is erased).
361     // Sum of past durations. Will change as we overwrite old buckets.
362     int64_t pastNs = currentStateBucketPastNs + anomalyTracker.getSumOverPastBuckets(mEventKey);
363 
364     // The refractory period end timestamp for dimension mEventKey.
365     const int64_t refractoryPeriodEndNs =
366             anomalyTracker.getRefractoryPeriodEndsSec(mEventKey) * NS_PER_SEC;
367 
368     // The anomaly should happen when accumulated wakelock duration is above the threshold and
369     // not within the refractory period.
370     int64_t anomalyTimestampNs =
371         std::max(eventTimestampNs + thresholdNs - pastNs, refractoryPeriodEndNs);
372     // If the predicted the anomaly timestamp is within the current bucket, return it directly.
373     if (anomalyTimestampNs <= currentBucketEndNs) {
374         return std::max(eventTimestampNs, anomalyTimestampNs);
375     }
376 
377     // Remove the old bucket.
378     if (anomalyTracker.getNumOfPastBuckets() > 0) {
379         pastNs -= anomalyTracker.getPastBucketValue(
380                             mEventKey,
381                             mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets());
382         // Add the remaining of the current bucket to the accumulated wakelock duration.
383         pastNs += (currentBucketEndNs - eventTimestampNs);
384     } else {
385         // The anomaly depends on only one bucket.
386         pastNs = 0;
387     }
388 
389     // The anomaly will not happen in the current bucket. We need to iterate over the future buckets
390     // to predict the accumulated wakelock duration and determine the anomaly timestamp accordingly.
391     for (int futureBucketIdx = 1; futureBucketIdx <= anomalyTracker.getNumOfPastBuckets() + 1;
392             futureBucketIdx++) {
393         // The alarm candidate timestamp should meet two requirements:
394         // 1. the accumulated wakelock duration is above the threshold.
395         // 2. it is not within the refractory period.
396         // 3. the alarm timestamp falls in this bucket. Otherwise we need to flush the past buckets,
397         //    find the new alarm candidate timestamp and check these requirements again.
398         const int64_t bucketEndNs = currentBucketEndNs + futureBucketIdx * mBucketSizeNs;
399         int64_t anomalyTimestampNs =
400             std::max(bucketEndNs - mBucketSizeNs + thresholdNs - pastNs, refractoryPeriodEndNs);
401         if (anomalyTimestampNs <= bucketEndNs) {
402             return anomalyTimestampNs;
403         }
404         if (anomalyTracker.getNumOfPastBuckets() <= 0) {
405             continue;
406         }
407 
408         // No valid alarm timestamp is found in this bucket. The clock moves to the end of the
409         // bucket. Update the pastNs.
410         pastNs += mBucketSizeNs;
411         // 1. If the oldest past bucket is still in the past bucket window, we could fetch the past
412         // bucket and erase it from pastNs.
413         // 2. If the oldest past bucket is the current bucket, we should compute the
414         //   wakelock duration in the current bucket and erase it from pastNs.
415         // 3. Otherwise all othe past buckets are ancient.
416         if (futureBucketIdx < anomalyTracker.getNumOfPastBuckets()) {
417             pastNs -= anomalyTracker.getPastBucketValue(
418                     mEventKey,
419                     mCurrentBucketNum - anomalyTracker.getNumOfPastBuckets() + futureBucketIdx);
420         } else if (futureBucketIdx == anomalyTracker.getNumOfPastBuckets()) {
421             pastNs -= (currentStateBucketPastNs + (currentBucketEndNs - eventTimestampNs));
422         }
423     }
424 
425     return std::max(eventTimestampNs + thresholdNs, refractoryPeriodEndNs);
426 }
427 
dumpStates(FILE * out,bool verbose) const428 void OringDurationTracker::dumpStates(FILE* out, bool verbose) const {
429     fprintf(out, "\t\t started count %lu\n", (unsigned long)mStarted.size());
430     fprintf(out, "\t\t paused count %lu\n", (unsigned long)mPaused.size());
431     fprintf(out, "\t\t current duration %lld\n", (long long)getCurrentStateKeyDuration());
432 }
433 
getCurrentStateKeyDuration() const434 int64_t OringDurationTracker::getCurrentStateKeyDuration() const {
435     auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
436     if (it == mStateKeyDurationMap.end()) {
437         return 0;
438     } else {
439         return it->second.mDuration;
440     }
441 }
442 
getCurrentStateKeyFullBucketDuration() const443 int64_t OringDurationTracker::getCurrentStateKeyFullBucketDuration() const {
444     auto it = mStateKeyDurationMap.find(mEventKey.getStateValuesKey());
445     if (it == mStateKeyDurationMap.end()) {
446         return 0;
447     } else {
448         return it->second.mDurationFullBucket;
449     }
450 }
451 
updateCurrentStateKey(const int32_t atomId,const FieldValue & newState)452 void OringDurationTracker::updateCurrentStateKey(const int32_t atomId, const FieldValue& newState) {
453     HashableDimensionKey* stateValuesKey = mEventKey.getMutableStateValuesKey();
454     for (size_t i = 0; i < stateValuesKey->getValues().size(); i++) {
455         if (stateValuesKey->getValues()[i].mField.getTag() == atomId) {
456             stateValuesKey->mutableValue(i)->mValue = newState.mValue;
457         }
458     }
459 }
460 
461 }  // namespace statsd
462 }  // namespace os
463 }  // namespace android
464