1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <gtest/gtest_prod.h>
20 #include <stdlib.h>
21 #include <utils/RefBase.h>
22 
23 #include "AlarmMonitor.h"
24 #include "config/ConfigKey.h"
25 #include "guardrail/StatsdStats.h"
26 #include "hash.h"
27 #include "src/statsd_config.pb.h"    // Alert
28 #include "src/statsd_metadata.pb.h"  // AlertMetadata
29 #include "stats_util.h"              // HashableDimensionKey and DimToValMap
30 
31 namespace android {
32 namespace os {
33 namespace statsd {
34 
35 using std::optional;
36 using std::shared_ptr;
37 
38 // Does NOT allow negative values.
39 class AnomalyTracker : public virtual RefBase {
40 public:
41     AnomalyTracker(const Alert& alert, const ConfigKey& configKey);
42 
43     virtual ~AnomalyTracker();
44 
45     // Reset appropriate state on a config update. Clear subscriptions so they can be reset.
46     void onConfigUpdated();
47 
48     // Add subscriptions that depend on this alert.
addSubscription(const Subscription & subscription)49     void addSubscription(const Subscription& subscription) {
50         mSubscriptions.push_back(subscription);
51     }
52 
53     // Adds a bucket for the given bucketNum (index starting at 0).
54     // If a bucket for bucketNum already exists, it will be replaced.
55     // Also, advances to bucketNum (if not in the past), effectively filling any intervening
56     // buckets with 0s.
57     void addPastBucket(const std::shared_ptr<DimToValMap>& bucket, const int64_t bucketNum);
58 
59     // Inserts (or replaces) the bucket entry for the given bucketNum at the given key to be the
60     // given bucketValue. If the bucket does not exist, it will be created.
61     // Also, advances to bucketNum (if not in the past), effectively filling any intervening
62     // buckets with 0s.
63     void addPastBucket(const MetricDimensionKey& key, int64_t bucketValue, int64_t bucketNum);
64 
65     // Returns true if, based on past buckets plus the new currentBucketValue (which generally
66     // represents the partially-filled current bucket), an anomaly has happened.
67     // Also advances to currBucketNum-1.
68     bool detectAnomaly(int64_t currBucketNum, const MetricDimensionKey& key,
69                        int64_t currentBucketValue);
70 
71     // Informs incidentd about the detected alert.
72     void declareAnomaly(int64_t timestampNs, int64_t metricId, const MetricDimensionKey& key,
73                         int64_t metricValue);
74 
75     // Detects if, based on past buckets plus the new currentBucketValue (which generally
76     // represents the partially-filled current bucket), an anomaly has happened, and if so,
77     // declares an anomaly and informs relevant subscribers.
78     // Also advances to currBucketNum-1.
79     void detectAndDeclareAnomaly(int64_t timestampNs, int64_t currBucketNum, int64_t metricId,
80                                  const MetricDimensionKey& key, int64_t currentBucketValue);
81 
82     // Init the AlarmMonitor which is shared across anomaly trackers.
setAlarmMonitor(const sp<AlarmMonitor> & alarmMonitor)83     virtual void setAlarmMonitor(const sp<AlarmMonitor>& alarmMonitor) {
84         return; // Base AnomalyTracker class has no need for the AlarmMonitor.
85     }
86 
87     // Returns the sum of all past bucket values for the given dimension key.
88     int64_t getSumOverPastBuckets(const MetricDimensionKey& key) const;
89 
90     // Returns the value for a past bucket, or 0 if that bucket doesn't exist.
91     int64_t getPastBucketValue(const MetricDimensionKey& key, int64_t bucketNum) const;
92 
93     // Returns the anomaly threshold set in the configuration.
getAnomalyThreshold()94     inline int64_t getAnomalyThreshold() const {
95         return mAlert.trigger_if_sum_gt();
96     }
97 
98     // Returns the refractory period ending timestamp (in seconds) for the given key.
99     // Before this moment, any detected anomaly will be ignored.
100     // If there is no stored refractory period ending timestamp, returns 0.
getRefractoryPeriodEndsSec(const MetricDimensionKey & key)101     uint32_t getRefractoryPeriodEndsSec(const MetricDimensionKey& key) const {
102         const auto& it = mRefractoryPeriodEndsSec.find(key);
103         return it != mRefractoryPeriodEndsSec.end() ? it->second : 0;
104     }
105 
106     // Returns the (constant) number of past buckets this anomaly tracker can store.
getNumOfPastBuckets()107     inline int getNumOfPastBuckets() const {
108         return mNumOfPastBuckets;
109     }
110 
111     std::pair<optional<InvalidConfigReason>, uint64_t> getProtoHash() const;
112 
113     // Sets an alarm for the given timestamp.
114     // Replaces previous alarm if one already exists.
startAlarm(const MetricDimensionKey & dimensionKey,int64_t eventTime)115     virtual void startAlarm(const MetricDimensionKey& dimensionKey, int64_t eventTime) {
116         return;  // The base AnomalyTracker class doesn't have alarms.
117     }
118 
119     // Stops the alarm.
120     // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed),
121     // declare the anomaly now.
stopAlarm(const MetricDimensionKey & dimensionKey,int64_t timestampNs)122     virtual void stopAlarm(const MetricDimensionKey& dimensionKey, int64_t timestampNs) {
123         return;  // The base AnomalyTracker class doesn't have alarms.
124     }
125 
126     // Stop all the alarms owned by this tracker. Does not declare any anomalies.
cancelAllAlarms()127     virtual void cancelAllAlarms() {
128         return;  // The base AnomalyTracker class doesn't have alarms.
129     }
130 
131     // Declares an anomaly for each alarm in firedAlarms that belongs to this AnomalyTracker,
132     // and removes it from firedAlarms. Does NOT remove the alarm from the AlarmMonitor.
informAlarmsFired(int64_t timestampNs,unordered_set<sp<const InternalAlarm>,SpHash<InternalAlarm>> & firedAlarms)133     virtual void informAlarmsFired(
134             int64_t timestampNs,
135             unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& firedAlarms) {
136         return; // The base AnomalyTracker class doesn't have alarms.
137     }
138 
139     // Writes metadata of the alert (refractory_period_end_sec) to AlertMetadata.
140     // Returns true if at least one element is written to alertMetadata.
141     bool writeAlertMetadataToProto(
142             int64_t currentWallClockTimeNs,
143             int64_t systemElapsedTimeNs, metadata::AlertMetadata* alertMetadata);
144 
145     void loadAlertMetadata(
146             const metadata::AlertMetadata& alertMetadata,
147             int64_t currentWallClockTimeNs,
148             int64_t systemElapsedTimeNs);
149 
150 protected:
151     // For testing only.
152     // Returns the alarm timestamp in seconds for the query dimension if it exists. Otherwise
153     // returns 0.
getAlarmTimestampSec(const MetricDimensionKey & dimensionKey)154     virtual uint32_t getAlarmTimestampSec(const MetricDimensionKey& dimensionKey) const {
155         return 0;   // The base AnomalyTracker class doesn't have alarms.
156     }
157 
158     // statsd_config.proto Alert message that defines this tracker.
159     const Alert mAlert;
160 
161     // The subscriptions that depend on this alert.
162     std::vector<Subscription> mSubscriptions;
163 
164     // A reference to the Alert's config key.
165     const ConfigKey mConfigKey;
166 
167     // Number of past buckets. One less than the total number of buckets needed
168     // for the anomaly detection (since the current bucket is not in the past).
169     const int mNumOfPastBuckets;
170 
171     // Values for each of the past mNumOfPastBuckets buckets. Always of size mNumOfPastBuckets.
172     // mPastBuckets[i] can be null, meaning that no data is present in that bucket.
173     std::vector<shared_ptr<DimToValMap>> mPastBuckets;
174 
175     // Cached sum over all existing buckets in mPastBuckets.
176     // Its buckets never contain entries of 0.
177     DimToValMap mSumOverPastBuckets;
178 
179     // The bucket number of the last added bucket.
180     int64_t mMostRecentBucketNum = -1;
181 
182     // Map from each dimension to the timestamp that its refractory period (if this anomaly was
183     // declared for that dimension) ends, in seconds. From this moment and onwards, anomalies
184     // can be declared again.
185     // Entries may be, but are not guaranteed to be, removed after the period is finished.
186     std::unordered_map<MetricDimensionKey, uint32_t> mRefractoryPeriodEndsSec;
187 
188     // Advances mMostRecentBucketNum to bucketNum, deleting any data that is now too old.
189     // Specifically, since it is now too old, removes the data for
190     //   [mMostRecentBucketNum - mNumOfPastBuckets + 1, bucketNum - mNumOfPastBuckets].
191     void advanceMostRecentBucketTo(int64_t bucketNum);
192 
193     // Add the information in the given bucket to mSumOverPastBuckets.
194     void addBucketToSum(const shared_ptr<DimToValMap>& bucket);
195 
196     // Subtract the information in the given bucket from mSumOverPastBuckets
197     // and remove any items with value 0.
198     void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket);
199 
200     // From mSumOverPastBuckets[key], subtracts bucketValue, removing it if it is now 0.
201     void subtractValueFromSum(const MetricDimensionKey& key, int64_t bucketValue);
202 
203     // Returns true if in the refractory period, else false.
204     bool isInRefractoryPeriod(int64_t timestampNs, const MetricDimensionKey& key) const;
205 
206     // Calculates the corresponding bucket index within the circular array.
207     // Requires bucketNum >= 0.
208     size_t index(int64_t bucketNum) const;
209 
210     // Resets all bucket data. For use when all the data gets stale.
211     virtual void resetStorage();
212 
213     // Informs the subscribers (incidentd, perfetto, broadcasts, etc) that an anomaly has occurred.
214     void informSubscribers(const MetricDimensionKey& key, int64_t metricId, int64_t metricValue);
215 
216     FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets);
217     FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets);
218     FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced);
219     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_single_bucket);
220     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_partial_bucket);
221     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_multiple_buckets);
222     FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_long_refractory_period);
223 
224     FRIEND_TEST(ConfigUpdateTest, TestUpdateAlerts);
225 };
226 
227 }  // namespace statsd
228 }  // namespace os
229 }  // namespace android
230