1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define DEBUG false  // STOPSHIP if true
18 #include "Log.h"
19 
20 #include "AnomalyTracker.h"
21 #include "external/Perfetto.h"
22 #include "guardrail/StatsdStats.h"
23 #include "metadata_util.h"
24 #include "stats_log_util.h"
25 #include "subscriber_util.h"
26 #include "subscriber/IncidentdReporter.h"
27 #include "subscriber/SubscriberReporter.h"
28 
29 #include <inttypes.h>
30 #include <statslog_statsd.h>
31 #include <time.h>
32 
33 namespace android {
34 namespace os {
35 namespace statsd {
36 
AnomalyTracker(const Alert & alert,const ConfigKey & configKey)37 AnomalyTracker::AnomalyTracker(const Alert& alert, const ConfigKey& configKey)
38         : mAlert(alert), mConfigKey(configKey), mNumOfPastBuckets(mAlert.num_buckets() - 1) {
39     VLOG("AnomalyTracker() called");
40     if (mAlert.num_buckets() <= 0) {
41         ALOGE("Cannot create AnomalyTracker with %lld buckets", (long long)mAlert.num_buckets());
42         return;
43     }
44     if (!mAlert.has_trigger_if_sum_gt()) {
45         ALOGE("Cannot create AnomalyTracker without threshold");
46         return;
47     }
48     resetStorage();  // initialization
49 }
50 
~AnomalyTracker()51 AnomalyTracker::~AnomalyTracker() {
52     VLOG("~AnomalyTracker() called");
53 }
54 
resetStorage()55 void AnomalyTracker::resetStorage() {
56     VLOG("resetStorage() called.");
57     mPastBuckets.clear();
58     // Excludes the current bucket.
59     mPastBuckets.resize(mNumOfPastBuckets);
60     mSumOverPastBuckets.clear();
61 }
62 
index(int64_t bucketNum) const63 size_t AnomalyTracker::index(int64_t bucketNum) const {
64     if (bucketNum < 0) {
65         ALOGE("index() was passed a negative bucket number (%lld)!", (long long)bucketNum);
66     }
67     return bucketNum % mNumOfPastBuckets;
68 }
69 
advanceMostRecentBucketTo(const int64_t & bucketNum)70 void AnomalyTracker::advanceMostRecentBucketTo(const int64_t& bucketNum) {
71     VLOG("advanceMostRecentBucketTo() called.");
72     if (mNumOfPastBuckets <= 0) {
73         return;
74     }
75     if (bucketNum <= mMostRecentBucketNum) {
76         ALOGW("Cannot advance buckets backwards (bucketNum=%lld but mMostRecentBucketNum=%lld)",
77               (long long)bucketNum, (long long)mMostRecentBucketNum);
78         return;
79     }
80     // If in the future (i.e. buckets are ancient), just empty out all past info.
81     if (bucketNum >= mMostRecentBucketNum + mNumOfPastBuckets) {
82         resetStorage();
83         mMostRecentBucketNum = bucketNum;
84         return;
85     }
86 
87     // Clear out space by emptying out old mPastBuckets[i] values and update mSumOverPastBuckets.
88     for (int64_t i = mMostRecentBucketNum + 1; i <= bucketNum; i++) {
89         const int idx = index(i);
90         subtractBucketFromSum(mPastBuckets[idx]);
91         mPastBuckets[idx] = nullptr;  // release (but not clear) the old bucket.
92     }
93     mMostRecentBucketNum = bucketNum;
94 }
95 
addPastBucket(const MetricDimensionKey & key,const int64_t & bucketValue,const int64_t & bucketNum)96 void AnomalyTracker::addPastBucket(const MetricDimensionKey& key,
97                                    const int64_t& bucketValue,
98                                    const int64_t& bucketNum) {
99     VLOG("addPastBucket(bucketValue) called.");
100     if (mNumOfPastBuckets == 0 ||
101         bucketNum < 0 || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets) {
102         return;
103     }
104 
105     const int bucketIndex = index(bucketNum);
106     if (bucketNum <= mMostRecentBucketNum && (mPastBuckets[bucketIndex] != nullptr)) {
107         // We need to insert into an already existing past bucket.
108         std::shared_ptr<DimToValMap>& bucket = mPastBuckets[bucketIndex];
109         auto itr = bucket->find(key);
110         if (itr != bucket->end()) {
111             // Old entry already exists; update it.
112             subtractValueFromSum(key, itr->second);
113             itr->second = bucketValue;
114         } else {
115             bucket->insert({key, bucketValue});
116         }
117         mSumOverPastBuckets[key] += bucketValue;
118     } else {
119         // Bucket does not exist yet (in future or was never made), so we must make it.
120         std::shared_ptr<DimToValMap> bucket = std::make_shared<DimToValMap>();
121         bucket->insert({key, bucketValue});
122         addPastBucket(bucket, bucketNum);
123     }
124 }
125 
addPastBucket(std::shared_ptr<DimToValMap> bucket,const int64_t & bucketNum)126 void AnomalyTracker::addPastBucket(std::shared_ptr<DimToValMap> bucket,
127                                    const int64_t& bucketNum) {
128     VLOG("addPastBucket(bucket) called.");
129     if (mNumOfPastBuckets == 0 ||
130             bucketNum < 0 || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets) {
131         return;
132     }
133 
134     if (bucketNum <= mMostRecentBucketNum) {
135         // We are updating an old bucket, not adding a new one.
136         subtractBucketFromSum(mPastBuckets[index(bucketNum)]);
137     } else {
138         // Clear space for the new bucket to be at bucketNum.
139         advanceMostRecentBucketTo(bucketNum);
140     }
141     mPastBuckets[index(bucketNum)] = bucket;
142     addBucketToSum(bucket);
143 }
144 
subtractBucketFromSum(const shared_ptr<DimToValMap> & bucket)145 void AnomalyTracker::subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket) {
146     if (bucket == nullptr) {
147         return;
148     }
149     for (const auto& keyValuePair : *bucket) {
150         subtractValueFromSum(keyValuePair.first, keyValuePair.second);
151     }
152 }
153 
154 
subtractValueFromSum(const MetricDimensionKey & key,const int64_t & bucketValue)155 void AnomalyTracker::subtractValueFromSum(const MetricDimensionKey& key,
156                                           const int64_t& bucketValue) {
157     auto itr = mSumOverPastBuckets.find(key);
158     if (itr == mSumOverPastBuckets.end()) {
159         return;
160     }
161     itr->second -= bucketValue;
162     if (itr->second == 0) {
163         mSumOverPastBuckets.erase(itr);
164     }
165 }
166 
addBucketToSum(const shared_ptr<DimToValMap> & bucket)167 void AnomalyTracker::addBucketToSum(const shared_ptr<DimToValMap>& bucket) {
168     if (bucket == nullptr) {
169         return;
170     }
171     // For each dimension present in the bucket, add its value to its corresponding sum.
172     for (const auto& keyValuePair : *bucket) {
173         mSumOverPastBuckets[keyValuePair.first] += keyValuePair.second;
174     }
175 }
176 
getPastBucketValue(const MetricDimensionKey & key,const int64_t & bucketNum) const177 int64_t AnomalyTracker::getPastBucketValue(const MetricDimensionKey& key,
178                                            const int64_t& bucketNum) const {
179     if (bucketNum < 0 || mMostRecentBucketNum < 0
180             || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets
181             || bucketNum > mMostRecentBucketNum) {
182         return 0;
183     }
184 
185     const auto& bucket = mPastBuckets[index(bucketNum)];
186     if (bucket == nullptr) {
187         return 0;
188     }
189     const auto& itr = bucket->find(key);
190     return itr == bucket->end() ? 0 : itr->second;
191 }
192 
getSumOverPastBuckets(const MetricDimensionKey & key) const193 int64_t AnomalyTracker::getSumOverPastBuckets(const MetricDimensionKey& key) const {
194     const auto& itr = mSumOverPastBuckets.find(key);
195     if (itr != mSumOverPastBuckets.end()) {
196         return itr->second;
197     }
198     return 0;
199 }
200 
detectAnomaly(const int64_t & currentBucketNum,const MetricDimensionKey & key,const int64_t & currentBucketValue)201 bool AnomalyTracker::detectAnomaly(const int64_t& currentBucketNum,
202                                    const MetricDimensionKey& key,
203                                    const int64_t& currentBucketValue) {
204 
205     // currentBucketNum should be the next bucket after pastBuckets. If not, advance so that it is.
206     if (currentBucketNum > mMostRecentBucketNum + 1) {
207         advanceMostRecentBucketTo(currentBucketNum - 1);
208     }
209     return mAlert.has_trigger_if_sum_gt() &&
210            getSumOverPastBuckets(key) + currentBucketValue > mAlert.trigger_if_sum_gt();
211 }
212 
declareAnomaly(const int64_t & timestampNs,int64_t metricId,const MetricDimensionKey & key,int64_t metricValue)213 void AnomalyTracker::declareAnomaly(const int64_t& timestampNs, int64_t metricId,
214                                     const MetricDimensionKey& key, int64_t metricValue) {
215     // TODO(b/110563466): Why receive timestamp? RefractoryPeriod should always be based on
216     // real time right now.
217     if (isInRefractoryPeriod(timestampNs, key)) {
218         VLOG("Skipping anomaly declaration since within refractory period");
219         return;
220     }
221     if (mAlert.has_refractory_period_secs()) {
222         mRefractoryPeriodEndsSec[key] = ((timestampNs + NS_PER_SEC - 1) / NS_PER_SEC) // round up
223                                         + mAlert.refractory_period_secs();
224         // TODO(b/110563466): If we had access to the bucket_size_millis, consider
225         // calling resetStorage()
226         // if (mAlert.refractory_period_secs() > mNumOfPastBuckets * bucketSizeNs) {resetStorage();}
227     }
228 
229     if (!mSubscriptions.empty()) {
230         ALOGI("An anomaly (%" PRId64 ") %s has occurred! Informing subscribers.",
231                 mAlert.id(), key.toString().c_str());
232         informSubscribers(key, metricId, metricValue);
233     } else {
234         ALOGI("An anomaly has occurred! (But no subscriber for that alert.)");
235     }
236 
237     StatsdStats::getInstance().noteAnomalyDeclared(mConfigKey, mAlert.id());
238 
239     // TODO(b/110564268): This should also take in the const MetricDimensionKey& key?
240     util::stats_write(util::ANOMALY_DETECTED, mConfigKey.GetUid(),
241                       mConfigKey.GetId(), mAlert.id());
242 }
243 
detectAndDeclareAnomaly(const int64_t & timestampNs,const int64_t & currBucketNum,int64_t metricId,const MetricDimensionKey & key,const int64_t & currentBucketValue)244 void AnomalyTracker::detectAndDeclareAnomaly(const int64_t& timestampNs,
245                                              const int64_t& currBucketNum, int64_t metricId,
246                                              const MetricDimensionKey& key,
247                                              const int64_t& currentBucketValue) {
248     if (detectAnomaly(currBucketNum, key, currentBucketValue)) {
249         declareAnomaly(timestampNs, metricId, key, currentBucketValue);
250     }
251 }
252 
isInRefractoryPeriod(const int64_t & timestampNs,const MetricDimensionKey & key) const253 bool AnomalyTracker::isInRefractoryPeriod(const int64_t& timestampNs,
254                                           const MetricDimensionKey& key) const {
255     const auto& it = mRefractoryPeriodEndsSec.find(key);
256     if (it != mRefractoryPeriodEndsSec.end()) {
257         return timestampNs < (it->second *  (int64_t)NS_PER_SEC);
258     }
259     return false;
260 }
261 
informSubscribers(const MetricDimensionKey & key,int64_t metric_id,int64_t metricValue)262 void AnomalyTracker::informSubscribers(const MetricDimensionKey& key, int64_t metric_id,
263                                        int64_t metricValue) {
264     triggerSubscribers(mAlert.id(), metric_id, key, metricValue, mConfigKey, mSubscriptions);
265 }
266 
writeAlertMetadataToProto(int64_t currentWallClockTimeNs,int64_t systemElapsedTimeNs,metadata::AlertMetadata * alertMetadata)267 bool AnomalyTracker::writeAlertMetadataToProto(int64_t currentWallClockTimeNs,
268                                                int64_t systemElapsedTimeNs,
269                                                metadata::AlertMetadata* alertMetadata) {
270     bool metadataWritten = false;
271 
272     if (mRefractoryPeriodEndsSec.empty()) {
273         return false;
274     }
275 
276     for (const auto& it: mRefractoryPeriodEndsSec) {
277         // Do not write the timestamp to disk if it has already expired
278         if (it.second < systemElapsedTimeNs / NS_PER_SEC) {
279             continue;
280         }
281 
282         metadataWritten = true;
283         if (alertMetadata->alert_dim_keyed_data_size() == 0) {
284             alertMetadata->set_alert_id(mAlert.id());
285         }
286 
287         metadata::AlertDimensionKeyedData* keyedData = alertMetadata->add_alert_dim_keyed_data();
288         // We convert and write the refractory_end_sec to wall clock time because we do not know
289         // when statsd will start again.
290         int32_t refractoryEndWallClockSec = (int32_t) ((currentWallClockTimeNs / NS_PER_SEC) +
291                 (it.second - systemElapsedTimeNs / NS_PER_SEC));
292 
293         keyedData->set_last_refractory_ends_sec(refractoryEndWallClockSec);
294         writeMetricDimensionKeyToMetadataDimensionKey(
295                 it.first, keyedData->mutable_dimension_key());
296     }
297 
298     return metadataWritten;
299 }
300 
loadAlertMetadata(const metadata::AlertMetadata & alertMetadata,int64_t currentWallClockTimeNs,int64_t systemElapsedTimeNs)301 void AnomalyTracker::loadAlertMetadata(
302         const metadata::AlertMetadata& alertMetadata,
303         int64_t currentWallClockTimeNs,
304         int64_t systemElapsedTimeNs) {
305     for (const metadata::AlertDimensionKeyedData& keyedData :
306             alertMetadata.alert_dim_keyed_data()) {
307         if ((uint64_t) keyedData.last_refractory_ends_sec() < currentWallClockTimeNs / NS_PER_SEC) {
308             // Do not update the timestamp if it has already expired.
309             continue;
310         }
311         MetricDimensionKey metricKey = loadMetricDimensionKeyFromProto(
312                 keyedData.dimension_key());
313         int32_t refractoryPeriodEndsSec = (int32_t) keyedData.last_refractory_ends_sec() -
314                 currentWallClockTimeNs / NS_PER_SEC + systemElapsedTimeNs / NS_PER_SEC;
315         mRefractoryPeriodEndsSec[metricKey] = refractoryPeriodEndsSec;
316     }
317 }
318 
319 }  // namespace statsd
320 }  // namespace os
321 }  // namespace android
322