1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define STATSD_DEBUG false  // STOPSHIP if true
18 #include "Log.h"
19 
20 #include "AnomalyTracker.h"
21 #include "external/Perfetto.h"
22 #include "guardrail/StatsdStats.h"
23 #include "metadata_util.h"
24 #include "stats_log_util.h"
25 #include "subscriber_util.h"
26 #include "subscriber/IncidentdReporter.h"
27 #include "subscriber/SubscriberReporter.h"
28 
29 #include <inttypes.h>
30 #include <statslog_statsd.h>
31 #include <time.h>
32 
33 namespace android {
34 namespace os {
35 namespace statsd {
36 
AnomalyTracker(const Alert & alert,const ConfigKey & configKey)37 AnomalyTracker::AnomalyTracker(const Alert& alert, const ConfigKey& configKey)
38         : mAlert(alert), mConfigKey(configKey), mNumOfPastBuckets(mAlert.num_buckets() - 1) {
39     VLOG("AnomalyTracker() called");
40     resetStorage();  // initialization
41 }
42 
~AnomalyTracker()43 AnomalyTracker::~AnomalyTracker() {
44     VLOG("~AnomalyTracker() called");
45 }
46 
onConfigUpdated()47 void AnomalyTracker::onConfigUpdated() {
48     mSubscriptions.clear();
49 }
50 
resetStorage()51 void AnomalyTracker::resetStorage() {
52     VLOG("resetStorage() called.");
53     mPastBuckets.clear();
54     // Excludes the current bucket.
55     mPastBuckets.resize(mNumOfPastBuckets);
56     mSumOverPastBuckets.clear();
57 }
58 
index(int64_t bucketNum) const59 size_t AnomalyTracker::index(int64_t bucketNum) const {
60     if (bucketNum < 0) {
61         ALOGE("index() was passed a negative bucket number (%lld)!", (long long)bucketNum);
62     }
63     return bucketNum % mNumOfPastBuckets;
64 }
65 
advanceMostRecentBucketTo(const int64_t bucketNum)66 void AnomalyTracker::advanceMostRecentBucketTo(const int64_t bucketNum) {
67     VLOG("advanceMostRecentBucketTo() called.");
68     if (mNumOfPastBuckets <= 0) {
69         return;
70     }
71     if (bucketNum <= mMostRecentBucketNum) {
72         ALOGW("Cannot advance buckets backwards (bucketNum=%lld but mMostRecentBucketNum=%lld)",
73               (long long)bucketNum, (long long)mMostRecentBucketNum);
74         return;
75     }
76     // If in the future (i.e. buckets are ancient), just empty out all past info.
77     if (bucketNum >= mMostRecentBucketNum + mNumOfPastBuckets) {
78         resetStorage();
79         mMostRecentBucketNum = bucketNum;
80         return;
81     }
82 
83     // Clear out space by emptying out old mPastBuckets[i] values and update mSumOverPastBuckets.
84     for (int64_t i = mMostRecentBucketNum + 1; i <= bucketNum; i++) {
85         const int idx = index(i);
86         subtractBucketFromSum(mPastBuckets[idx]);
87         mPastBuckets[idx] = nullptr;  // release (but not clear) the old bucket.
88     }
89     mMostRecentBucketNum = bucketNum;
90 }
91 
addPastBucket(const MetricDimensionKey & key,const int64_t bucketValue,const int64_t bucketNum)92 void AnomalyTracker::addPastBucket(const MetricDimensionKey& key, const int64_t bucketValue,
93                                    const int64_t bucketNum) {
94     VLOG("addPastBucket(bucketValue) called.");
95     if (mNumOfPastBuckets == 0 ||
96         bucketNum < 0 || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets) {
97         return;
98     }
99 
100     const int bucketIndex = index(bucketNum);
101     if (bucketNum <= mMostRecentBucketNum && (mPastBuckets[bucketIndex] != nullptr)) {
102         // We need to insert into an already existing past bucket.
103         std::shared_ptr<DimToValMap>& bucket = mPastBuckets[bucketIndex];
104         auto itr = bucket->find(key);
105         if (itr != bucket->end()) {
106             // Old entry already exists; update it.
107             subtractValueFromSum(key, itr->second);
108             itr->second = bucketValue;
109         } else {
110             bucket->insert({key, bucketValue});
111         }
112         mSumOverPastBuckets[key] += bucketValue;
113     } else {
114         // Bucket does not exist yet (in future or was never made), so we must make it.
115         std::shared_ptr<DimToValMap> bucket = std::make_shared<DimToValMap>();
116         bucket->insert({key, bucketValue});
117         addPastBucket(bucket, bucketNum);
118     }
119 }
120 
addPastBucket(const std::shared_ptr<DimToValMap> & bucket,const int64_t bucketNum)121 void AnomalyTracker::addPastBucket(const std::shared_ptr<DimToValMap>& bucket,
122                                    const int64_t bucketNum) {
123     VLOG("addPastBucket(bucket) called.");
124     if (mNumOfPastBuckets == 0 ||
125             bucketNum < 0 || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets) {
126         return;
127     }
128 
129     if (bucketNum <= mMostRecentBucketNum) {
130         // We are updating an old bucket, not adding a new one.
131         subtractBucketFromSum(mPastBuckets[index(bucketNum)]);
132     } else {
133         // Clear space for the new bucket to be at bucketNum.
134         advanceMostRecentBucketTo(bucketNum);
135     }
136     mPastBuckets[index(bucketNum)] = bucket;
137     addBucketToSum(bucket);
138 }
139 
subtractBucketFromSum(const shared_ptr<DimToValMap> & bucket)140 void AnomalyTracker::subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket) {
141     if (bucket == nullptr) {
142         return;
143     }
144     for (const auto& keyValuePair : *bucket) {
145         subtractValueFromSum(keyValuePair.first, keyValuePair.second);
146     }
147 }
148 
subtractValueFromSum(const MetricDimensionKey & key,const int64_t bucketValue)149 void AnomalyTracker::subtractValueFromSum(const MetricDimensionKey& key,
150                                           const int64_t bucketValue) {
151     auto itr = mSumOverPastBuckets.find(key);
152     if (itr == mSumOverPastBuckets.end()) {
153         return;
154     }
155     itr->second -= bucketValue;
156     if (itr->second == 0) {
157         mSumOverPastBuckets.erase(itr);
158     }
159 }
160 
addBucketToSum(const shared_ptr<DimToValMap> & bucket)161 void AnomalyTracker::addBucketToSum(const shared_ptr<DimToValMap>& bucket) {
162     if (bucket == nullptr) {
163         return;
164     }
165     // For each dimension present in the bucket, add its value to its corresponding sum.
166     for (const auto& keyValuePair : *bucket) {
167         mSumOverPastBuckets[keyValuePair.first] += keyValuePair.second;
168     }
169 }
170 
getPastBucketValue(const MetricDimensionKey & key,const int64_t bucketNum) const171 int64_t AnomalyTracker::getPastBucketValue(const MetricDimensionKey& key,
172                                            const int64_t bucketNum) const {
173     if (bucketNum < 0 || mMostRecentBucketNum < 0
174             || bucketNum <= mMostRecentBucketNum - mNumOfPastBuckets
175             || bucketNum > mMostRecentBucketNum) {
176         return 0;
177     }
178 
179     const auto& bucket = mPastBuckets[index(bucketNum)];
180     if (bucket == nullptr) {
181         return 0;
182     }
183     const auto& itr = bucket->find(key);
184     return itr == bucket->end() ? 0 : itr->second;
185 }
186 
getSumOverPastBuckets(const MetricDimensionKey & key) const187 int64_t AnomalyTracker::getSumOverPastBuckets(const MetricDimensionKey& key) const {
188     const auto& itr = mSumOverPastBuckets.find(key);
189     if (itr != mSumOverPastBuckets.end()) {
190         return itr->second;
191     }
192     return 0;
193 }
194 
detectAnomaly(const int64_t currentBucketNum,const MetricDimensionKey & key,const int64_t currentBucketValue)195 bool AnomalyTracker::detectAnomaly(const int64_t currentBucketNum, const MetricDimensionKey& key,
196                                    const int64_t currentBucketValue) {
197     // currentBucketNum should be the next bucket after pastBuckets. If not, advance so that it is.
198     if (currentBucketNum > mMostRecentBucketNum + 1) {
199         advanceMostRecentBucketTo(currentBucketNum - 1);
200     }
201     return mAlert.has_trigger_if_sum_gt() &&
202            getSumOverPastBuckets(key) + currentBucketValue > mAlert.trigger_if_sum_gt();
203 }
204 
declareAnomaly(const int64_t timestampNs,int64_t metricId,const MetricDimensionKey & key,int64_t metricValue)205 void AnomalyTracker::declareAnomaly(const int64_t timestampNs, int64_t metricId,
206                                     const MetricDimensionKey& key, int64_t metricValue) {
207     // TODO(b/110563466): Why receive timestamp? RefractoryPeriod should always be based on
208     // real time right now.
209     if (isInRefractoryPeriod(timestampNs, key)) {
210         VLOG("Skipping anomaly declaration since within refractory period");
211         return;
212     }
213 
214     // TODO(b/110564268): This should also take in the const MetricDimensionKey& key?
215     util::stats_write(util::ANOMALY_DETECTED, mConfigKey.GetUid(), mConfigKey.GetId(), mAlert.id());
216 
217     if (mAlert.probability_of_informing() < 1 &&
218         ((float)rand() / (float)RAND_MAX) >= mAlert.probability_of_informing()) {
219         // Note that due to float imprecision, 0.0 and 1.0 might not truly mean never/always.
220         // The config writer was advised to use -0.1 and 1.1 for never/always.
221         ALOGI("Fate decided that an alert will not trigger subscribers or start the refactory "
222               "period countdown.");
223         return;
224     }
225 
226     if (mAlert.has_refractory_period_secs()) {
227         mRefractoryPeriodEndsSec[key] = ((timestampNs + NS_PER_SEC - 1) / NS_PER_SEC) // round up
228                                         + mAlert.refractory_period_secs();
229         // TODO(b/110563466): If we had access to the bucket_size_millis, consider
230         // calling resetStorage()
231         // if (mAlert.refractory_period_secs() > mNumOfPastBuckets * bucketSizeNs) {resetStorage();}
232     }
233 
234     if (!mSubscriptions.empty()) {
235         ALOGI("An anomaly (%" PRId64 ") %s has occurred! Informing subscribers.",
236                 mAlert.id(), key.toString().c_str());
237         informSubscribers(key, metricId, metricValue);
238     } else {
239         ALOGI("An anomaly has occurred! (But no subscriber for that alert.)");
240     }
241 
242     StatsdStats::getInstance().noteAnomalyDeclared(mConfigKey, mAlert.id());
243 }
244 
detectAndDeclareAnomaly(const int64_t timestampNs,const int64_t currBucketNum,int64_t metricId,const MetricDimensionKey & key,const int64_t currentBucketValue)245 void AnomalyTracker::detectAndDeclareAnomaly(const int64_t timestampNs, const int64_t currBucketNum,
246                                              int64_t metricId, const MetricDimensionKey& key,
247                                              const int64_t currentBucketValue) {
248     if (detectAnomaly(currBucketNum, key, currentBucketValue)) {
249         declareAnomaly(timestampNs, metricId, key, currentBucketValue);
250     }
251 }
252 
isInRefractoryPeriod(const int64_t timestampNs,const MetricDimensionKey & key) const253 bool AnomalyTracker::isInRefractoryPeriod(const int64_t timestampNs,
254                                           const MetricDimensionKey& key) const {
255     const auto& it = mRefractoryPeriodEndsSec.find(key);
256     if (it != mRefractoryPeriodEndsSec.end()) {
257         return timestampNs < (it->second *  (int64_t)NS_PER_SEC);
258     }
259     return false;
260 }
261 
getProtoHash() const262 std::pair<optional<InvalidConfigReason>, uint64_t> AnomalyTracker::getProtoHash() const {
263     string serializedAlert;
264     if (!mAlert.SerializeToString(&serializedAlert)) {
265         ALOGW("Unable to serialize alert %lld", (long long)mAlert.id());
266         return {createInvalidConfigReasonWithAlert(INVALID_CONFIG_REASON_ALERT_SERIALIZATION_FAILED,
267                                                    mAlert.metric_id(), mAlert.id()),
268                 0};
269     }
270     return {nullopt, Hash64(serializedAlert)};
271 }
272 
informSubscribers(const MetricDimensionKey & key,int64_t metric_id,int64_t metricValue)273 void AnomalyTracker::informSubscribers(const MetricDimensionKey& key, int64_t metric_id,
274                                        int64_t metricValue) {
275     triggerSubscribers(mAlert.id(), metric_id, key, metricValue, mConfigKey, mSubscriptions);
276 }
277 
writeAlertMetadataToProto(int64_t currentWallClockTimeNs,int64_t systemElapsedTimeNs,metadata::AlertMetadata * alertMetadata)278 bool AnomalyTracker::writeAlertMetadataToProto(int64_t currentWallClockTimeNs,
279                                                int64_t systemElapsedTimeNs,
280                                                metadata::AlertMetadata* alertMetadata) {
281     bool metadataWritten = false;
282 
283     if (mRefractoryPeriodEndsSec.empty()) {
284         return false;
285     }
286 
287     for (const auto& it: mRefractoryPeriodEndsSec) {
288         // Do not write the timestamp to disk if it has already expired
289         if (it.second < systemElapsedTimeNs / NS_PER_SEC) {
290             continue;
291         }
292 
293         metadataWritten = true;
294         if (alertMetadata->alert_dim_keyed_data_size() == 0) {
295             alertMetadata->set_alert_id(mAlert.id());
296         }
297 
298         metadata::AlertDimensionKeyedData* keyedData = alertMetadata->add_alert_dim_keyed_data();
299         // We convert and write the refractory_end_sec to wall clock time because we do not know
300         // when statsd will start again.
301         int32_t refractoryEndWallClockSec = (int32_t) ((currentWallClockTimeNs / NS_PER_SEC) +
302                 (it.second - systemElapsedTimeNs / NS_PER_SEC));
303 
304         keyedData->set_last_refractory_ends_sec(refractoryEndWallClockSec);
305         writeMetricDimensionKeyToMetadataDimensionKey(
306                 it.first, keyedData->mutable_dimension_key());
307     }
308 
309     return metadataWritten;
310 }
311 
loadAlertMetadata(const metadata::AlertMetadata & alertMetadata,int64_t currentWallClockTimeNs,int64_t systemElapsedTimeNs)312 void AnomalyTracker::loadAlertMetadata(
313         const metadata::AlertMetadata& alertMetadata,
314         int64_t currentWallClockTimeNs,
315         int64_t systemElapsedTimeNs) {
316     for (const metadata::AlertDimensionKeyedData& keyedData :
317             alertMetadata.alert_dim_keyed_data()) {
318         if ((uint64_t) keyedData.last_refractory_ends_sec() < currentWallClockTimeNs / NS_PER_SEC) {
319             // Do not update the timestamp if it has already expired.
320             continue;
321         }
322         MetricDimensionKey metricKey = loadMetricDimensionKeyFromProto(
323                 keyedData.dimension_key());
324         int32_t refractoryPeriodEndsSec = (int32_t) keyedData.last_refractory_ends_sec() -
325                 currentWallClockTimeNs / NS_PER_SEC + systemElapsedTimeNs / NS_PER_SEC;
326         mRefractoryPeriodEndsSec[metricKey] = refractoryPeriodEndsSec;
327     }
328 }
329 
330 }  // namespace statsd
331 }  // namespace os
332 }  // namespace android
333