1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <gtest/gtest_prod.h> 20 #include <stdlib.h> 21 #include <utils/RefBase.h> 22 23 #include "AlarmMonitor.h" 24 #include "config/ConfigKey.h" 25 #include "guardrail/StatsdStats.h" 26 #include "hash.h" 27 #include "src/statsd_config.pb.h" // Alert 28 #include "src/statsd_metadata.pb.h" // AlertMetadata 29 #include "stats_util.h" // HashableDimensionKey and DimToValMap 30 31 namespace android { 32 namespace os { 33 namespace statsd { 34 35 using std::optional; 36 using std::shared_ptr; 37 38 // Does NOT allow negative values. 39 class AnomalyTracker : public virtual RefBase { 40 public: 41 AnomalyTracker(const Alert& alert, const ConfigKey& configKey); 42 43 virtual ~AnomalyTracker(); 44 45 // Reset appropriate state on a config update. Clear subscriptions so they can be reset. 46 void onConfigUpdated(); 47 48 // Add subscriptions that depend on this alert. addSubscription(const Subscription & subscription)49 void addSubscription(const Subscription& subscription) { 50 mSubscriptions.push_back(subscription); 51 } 52 53 // Adds a bucket for the given bucketNum (index starting at 0). 54 // If a bucket for bucketNum already exists, it will be replaced. 55 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 56 // buckets with 0s. 57 void addPastBucket(const std::shared_ptr<DimToValMap>& bucket, const int64_t bucketNum); 58 59 // Inserts (or replaces) the bucket entry for the given bucketNum at the given key to be the 60 // given bucketValue. If the bucket does not exist, it will be created. 61 // Also, advances to bucketNum (if not in the past), effectively filling any intervening 62 // buckets with 0s. 63 void addPastBucket(const MetricDimensionKey& key, int64_t bucketValue, int64_t bucketNum); 64 65 // Returns true if, based on past buckets plus the new currentBucketValue (which generally 66 // represents the partially-filled current bucket), an anomaly has happened. 67 // Also advances to currBucketNum-1. 68 bool detectAnomaly(int64_t currBucketNum, const MetricDimensionKey& key, 69 int64_t currentBucketValue); 70 71 // Informs incidentd about the detected alert. 72 void declareAnomaly(int64_t timestampNs, int64_t metricId, const MetricDimensionKey& key, 73 int64_t metricValue); 74 75 // Detects if, based on past buckets plus the new currentBucketValue (which generally 76 // represents the partially-filled current bucket), an anomaly has happened, and if so, 77 // declares an anomaly and informs relevant subscribers. 78 // Also advances to currBucketNum-1. 79 void detectAndDeclareAnomaly(int64_t timestampNs, int64_t currBucketNum, int64_t metricId, 80 const MetricDimensionKey& key, int64_t currentBucketValue); 81 82 // Init the AlarmMonitor which is shared across anomaly trackers. setAlarmMonitor(const sp<AlarmMonitor> & alarmMonitor)83 virtual void setAlarmMonitor(const sp<AlarmMonitor>& alarmMonitor) { 84 return; // Base AnomalyTracker class has no need for the AlarmMonitor. 85 } 86 87 // Returns the sum of all past bucket values for the given dimension key. 88 int64_t getSumOverPastBuckets(const MetricDimensionKey& key) const; 89 90 // Returns the value for a past bucket, or 0 if that bucket doesn't exist. 91 int64_t getPastBucketValue(const MetricDimensionKey& key, int64_t bucketNum) const; 92 93 // Returns the anomaly threshold set in the configuration. getAnomalyThreshold()94 inline int64_t getAnomalyThreshold() const { 95 return mAlert.trigger_if_sum_gt(); 96 } 97 98 // Returns the refractory period ending timestamp (in seconds) for the given key. 99 // Before this moment, any detected anomaly will be ignored. 100 // If there is no stored refractory period ending timestamp, returns 0. getRefractoryPeriodEndsSec(const MetricDimensionKey & key)101 uint32_t getRefractoryPeriodEndsSec(const MetricDimensionKey& key) const { 102 const auto& it = mRefractoryPeriodEndsSec.find(key); 103 return it != mRefractoryPeriodEndsSec.end() ? it->second : 0; 104 } 105 106 // Returns the (constant) number of past buckets this anomaly tracker can store. getNumOfPastBuckets()107 inline int getNumOfPastBuckets() const { 108 return mNumOfPastBuckets; 109 } 110 111 std::pair<optional<InvalidConfigReason>, uint64_t> getProtoHash() const; 112 113 // Sets an alarm for the given timestamp. 114 // Replaces previous alarm if one already exists. startAlarm(const MetricDimensionKey & dimensionKey,int64_t eventTime)115 virtual void startAlarm(const MetricDimensionKey& dimensionKey, int64_t eventTime) { 116 return; // The base AnomalyTracker class doesn't have alarms. 117 } 118 119 // Stops the alarm. 120 // If it should have already fired, but hasn't yet (e.g. because the AlarmManager is delayed), 121 // declare the anomaly now. stopAlarm(const MetricDimensionKey & dimensionKey,int64_t timestampNs)122 virtual void stopAlarm(const MetricDimensionKey& dimensionKey, int64_t timestampNs) { 123 return; // The base AnomalyTracker class doesn't have alarms. 124 } 125 126 // Stop all the alarms owned by this tracker. Does not declare any anomalies. cancelAllAlarms()127 virtual void cancelAllAlarms() { 128 return; // The base AnomalyTracker class doesn't have alarms. 129 } 130 131 // Declares an anomaly for each alarm in firedAlarms that belongs to this AnomalyTracker, 132 // and removes it from firedAlarms. Does NOT remove the alarm from the AlarmMonitor. informAlarmsFired(int64_t timestampNs,unordered_set<sp<const InternalAlarm>,SpHash<InternalAlarm>> & firedAlarms)133 virtual void informAlarmsFired( 134 int64_t timestampNs, 135 unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& firedAlarms) { 136 return; // The base AnomalyTracker class doesn't have alarms. 137 } 138 139 // Writes metadata of the alert (refractory_period_end_sec) to AlertMetadata. 140 // Returns true if at least one element is written to alertMetadata. 141 bool writeAlertMetadataToProto( 142 int64_t currentWallClockTimeNs, 143 int64_t systemElapsedTimeNs, metadata::AlertMetadata* alertMetadata); 144 145 void loadAlertMetadata( 146 const metadata::AlertMetadata& alertMetadata, 147 int64_t currentWallClockTimeNs, 148 int64_t systemElapsedTimeNs); 149 150 protected: 151 // For testing only. 152 // Returns the alarm timestamp in seconds for the query dimension if it exists. Otherwise 153 // returns 0. getAlarmTimestampSec(const MetricDimensionKey & dimensionKey)154 virtual uint32_t getAlarmTimestampSec(const MetricDimensionKey& dimensionKey) const { 155 return 0; // The base AnomalyTracker class doesn't have alarms. 156 } 157 158 // statsd_config.proto Alert message that defines this tracker. 159 const Alert mAlert; 160 161 // The subscriptions that depend on this alert. 162 std::vector<Subscription> mSubscriptions; 163 164 // A reference to the Alert's config key. 165 const ConfigKey mConfigKey; 166 167 // Number of past buckets. One less than the total number of buckets needed 168 // for the anomaly detection (since the current bucket is not in the past). 169 const int mNumOfPastBuckets; 170 171 // Values for each of the past mNumOfPastBuckets buckets. Always of size mNumOfPastBuckets. 172 // mPastBuckets[i] can be null, meaning that no data is present in that bucket. 173 std::vector<shared_ptr<DimToValMap>> mPastBuckets; 174 175 // Cached sum over all existing buckets in mPastBuckets. 176 // Its buckets never contain entries of 0. 177 DimToValMap mSumOverPastBuckets; 178 179 // The bucket number of the last added bucket. 180 int64_t mMostRecentBucketNum = -1; 181 182 // Map from each dimension to the timestamp that its refractory period (if this anomaly was 183 // declared for that dimension) ends, in seconds. From this moment and onwards, anomalies 184 // can be declared again. 185 // Entries may be, but are not guaranteed to be, removed after the period is finished. 186 std::unordered_map<MetricDimensionKey, uint32_t> mRefractoryPeriodEndsSec; 187 188 // Advances mMostRecentBucketNum to bucketNum, deleting any data that is now too old. 189 // Specifically, since it is now too old, removes the data for 190 // [mMostRecentBucketNum - mNumOfPastBuckets + 1, bucketNum - mNumOfPastBuckets]. 191 void advanceMostRecentBucketTo(int64_t bucketNum); 192 193 // Add the information in the given bucket to mSumOverPastBuckets. 194 void addBucketToSum(const shared_ptr<DimToValMap>& bucket); 195 196 // Subtract the information in the given bucket from mSumOverPastBuckets 197 // and remove any items with value 0. 198 void subtractBucketFromSum(const shared_ptr<DimToValMap>& bucket); 199 200 // From mSumOverPastBuckets[key], subtracts bucketValue, removing it if it is now 0. 201 void subtractValueFromSum(const MetricDimensionKey& key, int64_t bucketValue); 202 203 // Returns true if in the refractory period, else false. 204 bool isInRefractoryPeriod(int64_t timestampNs, const MetricDimensionKey& key) const; 205 206 // Calculates the corresponding bucket index within the circular array. 207 // Requires bucketNum >= 0. 208 size_t index(int64_t bucketNum) const; 209 210 // Resets all bucket data. For use when all the data gets stale. 211 virtual void resetStorage(); 212 213 // Informs the subscribers (incidentd, perfetto, broadcasts, etc) that an anomaly has occurred. 214 void informSubscribers(const MetricDimensionKey& key, int64_t metricId, int64_t metricValue); 215 216 FRIEND_TEST(AnomalyTrackerTest, TestConsecutiveBuckets); 217 FRIEND_TEST(AnomalyTrackerTest, TestSparseBuckets); 218 FRIEND_TEST(CountMetricProducerTest, TestAnomalyDetectionUnSliced); 219 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_single_bucket); 220 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_partial_bucket); 221 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_multiple_buckets); 222 FRIEND_TEST(AnomalyDurationDetectionE2eTest, TestDurationMetric_SUM_long_refractory_period); 223 224 FRIEND_TEST(ConfigUpdateTest, TestUpdateAlerts); 225 }; 226 227 } // namespace statsd 228 } // namespace os 229 } // namespace android 230