1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef METRIC_PRODUCER_H 18 #define METRIC_PRODUCER_H 19 20 #include <frameworks/base/cmds/statsd/src/active_config_list.pb.h> 21 #include <utils/RefBase.h> 22 23 #include <unordered_map> 24 25 #include "HashableDimensionKey.h" 26 #include "anomaly/AnomalyTracker.h" 27 #include "condition/ConditionWizard.h" 28 #include "config/ConfigKey.h" 29 #include "matchers/matcher_util.h" 30 #include "packages/PackageInfoListener.h" 31 #include "state/StateListener.h" 32 #include "state/StateManager.h" 33 34 namespace android { 35 namespace os { 36 namespace statsd { 37 38 // Keep this in sync with DumpReportReason enum in stats_log.proto 39 enum DumpReportReason { 40 DEVICE_SHUTDOWN = 1, 41 CONFIG_UPDATED = 2, 42 CONFIG_REMOVED = 3, 43 GET_DATA_CALLED = 4, 44 ADB_DUMP = 5, 45 CONFIG_RESET = 6, 46 STATSCOMPANION_DIED = 7, 47 TERMINATION_SIGNAL_RECEIVED = 8 48 }; 49 50 // If the metric has no activation requirement, it will be active once the metric producer is 51 // created. 52 // If the metric needs to be activated by atoms, the metric producer will start 53 // with kNotActive state, turn to kActive or kActiveOnBoot when the activation event arrives, become 54 // kNotActive when it reaches the duration limit (timebomb). If the activation event arrives again 55 // before or after it expires, the event producer will be re-activated and ttl will be reset. 56 enum ActivationState { 57 kNotActive = 0, 58 kActive = 1, 59 kActiveOnBoot = 2, 60 }; 61 62 enum DumpLatency { 63 // In some cases, we only have a short time range to do the dump, e.g. statsd is being killed. 64 // We might be able to return all the data in this mode. For instance, pull metrics might need 65 // to be pulled when the current bucket is requested. 66 FAST = 1, 67 // In other cases, it is fine for a dump to take more than a few milliseconds, e.g. config 68 // updates. 69 NO_TIME_CONSTRAINTS = 2 70 }; 71 72 // Keep this in sync with BucketDropReason enum in stats_log.proto 73 enum BucketDropReason { 74 // For ValueMetric, a bucket is dropped during a dump report request iff 75 // current bucket should be included, a pull is needed (pulled metric and 76 // condition is true), and we are under fast time constraints. 77 DUMP_REPORT_REQUESTED = 1, 78 EVENT_IN_WRONG_BUCKET = 2, 79 CONDITION_UNKNOWN = 3, 80 PULL_FAILED = 4, 81 PULL_DELAYED = 5, 82 DIMENSION_GUARDRAIL_REACHED = 6, 83 MULTIPLE_BUCKETS_SKIPPED = 7, 84 // Not an invalid bucket case, but the bucket is dropped. 85 BUCKET_TOO_SMALL = 8, 86 // Not an invalid bucket case, but the bucket is skipped. 87 NO_DATA = 9 88 }; 89 90 struct Activation { ActivationActivation91 Activation(const ActivationType& activationType, const int64_t ttlNs) 92 : ttl_ns(ttlNs), 93 start_ns(0), 94 state(ActivationState::kNotActive), 95 activationType(activationType) {} 96 97 const int64_t ttl_ns; 98 int64_t start_ns; 99 ActivationState state; 100 const ActivationType activationType; 101 }; 102 103 struct DropEvent { 104 // Reason for dropping the bucket and/or marking the bucket invalid. 105 BucketDropReason reason; 106 // The timestamp of the drop event. 107 int64_t dropTimeNs; 108 }; 109 110 struct SkippedBucket { 111 // Start time of the dropped bucket. 112 int64_t bucketStartTimeNs; 113 // End time of the dropped bucket. 114 int64_t bucketEndTimeNs; 115 // List of events that invalidated this bucket. 116 std::vector<DropEvent> dropEvents; 117 resetSkippedBucket118 void reset() { 119 bucketStartTimeNs = 0; 120 bucketEndTimeNs = 0; 121 dropEvents.clear(); 122 } 123 }; 124 125 // A MetricProducer is responsible for compute one single metrics, creating stats log report, and 126 // writing the report to dropbox. MetricProducers should respond to package changes as required in 127 // PackageInfoListener, but if none of the metrics are slicing by package name, then the update can 128 // be a no-op. 129 class MetricProducer : public virtual android::RefBase, public virtual StateListener { 130 public: 131 MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs, 132 const int conditionIndex, const vector<ConditionState>& initialConditionCache, 133 const sp<ConditionWizard>& wizard, 134 const std::unordered_map<int, std::shared_ptr<Activation>>& eventActivationMap, 135 const std::unordered_map<int, std::vector<std::shared_ptr<Activation>>>& 136 eventDeactivationMap, 137 const vector<int>& slicedStateAtoms, 138 const unordered_map<int, unordered_map<int, int64_t>>& stateGroupMap); 139 ~MetricProducer()140 virtual ~MetricProducer(){}; 141 initialCondition(const int conditionIndex,const vector<ConditionState> & initialConditionCache)142 ConditionState initialCondition(const int conditionIndex, 143 const vector<ConditionState>& initialConditionCache) const { 144 return conditionIndex >= 0 ? initialConditionCache[conditionIndex] : ConditionState::kTrue; 145 } 146 147 /** 148 * Force a partial bucket split on app upgrade 149 */ notifyAppUpgrade(const int64_t & eventTimeNs)150 virtual void notifyAppUpgrade(const int64_t& eventTimeNs) { 151 std::lock_guard<std::mutex> lock(mMutex); 152 flushLocked(eventTimeNs); 153 }; 154 notifyAppRemoved(const int64_t & eventTimeNs)155 void notifyAppRemoved(const int64_t& eventTimeNs) { 156 // Force buckets to split on removal also. 157 notifyAppUpgrade(eventTimeNs); 158 }; 159 160 /** 161 * Force a partial bucket split on boot complete. 162 */ onStatsdInitCompleted(const int64_t & eventTimeNs)163 virtual void onStatsdInitCompleted(const int64_t& eventTimeNs) { 164 std::lock_guard<std::mutex> lock(mMutex); 165 flushLocked(eventTimeNs); 166 } 167 // Consume the parsed stats log entry that already matched the "what" of the metric. onMatchedLogEvent(const size_t matcherIndex,const LogEvent & event)168 void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) { 169 std::lock_guard<std::mutex> lock(mMutex); 170 onMatchedLogEventLocked(matcherIndex, event); 171 } 172 onConditionChanged(const bool condition,const int64_t eventTime)173 void onConditionChanged(const bool condition, const int64_t eventTime) { 174 std::lock_guard<std::mutex> lock(mMutex); 175 onConditionChangedLocked(condition, eventTime); 176 } 177 onSlicedConditionMayChange(bool overallCondition,const int64_t eventTime)178 void onSlicedConditionMayChange(bool overallCondition, const int64_t eventTime) { 179 std::lock_guard<std::mutex> lock(mMutex); 180 onSlicedConditionMayChangeLocked(overallCondition, eventTime); 181 } 182 isConditionSliced()183 bool isConditionSliced() const { 184 std::lock_guard<std::mutex> lock(mMutex); 185 return mConditionSliced; 186 }; 187 onStateChanged(const int64_t eventTimeNs,const int32_t atomId,const HashableDimensionKey & primaryKey,const FieldValue & oldState,const FieldValue & newState)188 void onStateChanged(const int64_t eventTimeNs, const int32_t atomId, 189 const HashableDimensionKey& primaryKey, const FieldValue& oldState, 190 const FieldValue& newState){}; 191 192 // Output the metrics data to [protoOutput]. All metrics reports end with the same timestamp. 193 // This method clears all the past buckets. onDumpReport(const int64_t dumpTimeNs,const bool include_current_partial_bucket,const bool erase_data,const DumpLatency dumpLatency,std::set<string> * str_set,android::util::ProtoOutputStream * protoOutput)194 void onDumpReport(const int64_t dumpTimeNs, 195 const bool include_current_partial_bucket, 196 const bool erase_data, 197 const DumpLatency dumpLatency, 198 std::set<string> *str_set, 199 android::util::ProtoOutputStream* protoOutput) { 200 std::lock_guard<std::mutex> lock(mMutex); 201 return onDumpReportLocked(dumpTimeNs, include_current_partial_bucket, erase_data, 202 dumpLatency, str_set, protoOutput); 203 } 204 clearPastBuckets(const int64_t dumpTimeNs)205 void clearPastBuckets(const int64_t dumpTimeNs) { 206 std::lock_guard<std::mutex> lock(mMutex); 207 return clearPastBucketsLocked(dumpTimeNs); 208 } 209 prepareFirstBucket()210 void prepareFirstBucket() { 211 std::lock_guard<std::mutex> lock(mMutex); 212 prepareFirstBucketLocked(); 213 } 214 215 // Returns the memory in bytes currently used to store this metric's data. Does not change 216 // state. byteSize()217 size_t byteSize() const { 218 std::lock_guard<std::mutex> lock(mMutex); 219 return byteSizeLocked(); 220 } 221 dumpStates(FILE * out,bool verbose)222 void dumpStates(FILE* out, bool verbose) const { 223 std::lock_guard<std::mutex> lock(mMutex); 224 dumpStatesLocked(out, verbose); 225 } 226 227 // Let MetricProducer drop in-memory data to save memory. 228 // We still need to keep future data valid and anomaly tracking work, which means we will 229 // have to flush old data, informing anomaly trackers then safely drop old data. 230 // We still keep current bucket data for future metrics' validity. dropData(const int64_t dropTimeNs)231 void dropData(const int64_t dropTimeNs) { 232 std::lock_guard<std::mutex> lock(mMutex); 233 dropDataLocked(dropTimeNs); 234 } 235 loadActiveMetric(const ActiveMetric & activeMetric,int64_t currentTimeNs)236 void loadActiveMetric(const ActiveMetric& activeMetric, int64_t currentTimeNs) { 237 std::lock_guard<std::mutex> lock(mMutex); 238 loadActiveMetricLocked(activeMetric, currentTimeNs); 239 } 240 activate(int activationTrackerIndex,int64_t elapsedTimestampNs)241 void activate(int activationTrackerIndex, int64_t elapsedTimestampNs) { 242 std::lock_guard<std::mutex> lock(mMutex); 243 activateLocked(activationTrackerIndex, elapsedTimestampNs); 244 } 245 cancelEventActivation(int deactivationTrackerIndex)246 void cancelEventActivation(int deactivationTrackerIndex) { 247 std::lock_guard<std::mutex> lock(mMutex); 248 cancelEventActivationLocked(deactivationTrackerIndex); 249 } 250 isActive()251 bool isActive() const { 252 std::lock_guard<std::mutex> lock(mMutex); 253 return isActiveLocked(); 254 } 255 256 void flushIfExpire(int64_t elapsedTimestampNs); 257 258 void writeActiveMetricToProtoOutputStream( 259 int64_t currentTimeNs, const DumpReportReason reason, ProtoOutputStream* proto); 260 261 // Start: getters/setters getMetricId()262 inline const int64_t& getMetricId() const { 263 return mMetricId; 264 } 265 266 // For test only. getCurrentBucketNum()267 inline int64_t getCurrentBucketNum() const { 268 return mCurrentBucketNum; 269 } 270 getBucketSizeInNs()271 int64_t getBucketSizeInNs() const { 272 std::lock_guard<std::mutex> lock(mMutex); 273 return mBucketSizeNs; 274 } 275 getSlicedStateAtoms()276 inline const std::vector<int> getSlicedStateAtoms() { 277 std::lock_guard<std::mutex> lock(mMutex); 278 return mSlicedStateAtoms; 279 } 280 281 /* If alert is valid, adds an AnomalyTracker and returns it. If invalid, returns nullptr. */ addAnomalyTracker(const Alert & alert,const sp<AlarmMonitor> & anomalyAlarmMonitor)282 virtual sp<AnomalyTracker> addAnomalyTracker(const Alert &alert, 283 const sp<AlarmMonitor>& anomalyAlarmMonitor) { 284 std::lock_guard<std::mutex> lock(mMutex); 285 sp<AnomalyTracker> anomalyTracker = new AnomalyTracker(alert, mConfigKey); 286 if (anomalyTracker != nullptr) { 287 mAnomalyTrackers.push_back(anomalyTracker); 288 } 289 return anomalyTracker; 290 } 291 // End: getters/setters 292 protected: 293 /** 294 * Flushes the current bucket if the eventTime is after the current bucket's end time. 295 */ flushIfNeededLocked(const int64_t & eventTime)296 virtual void flushIfNeededLocked(const int64_t& eventTime){}; 297 298 /** 299 * For metrics that aggregate (ie, every metric producer except for EventMetricProducer), 300 * we need to be able to flush the current buckets on demand (ie, end the current bucket and 301 * start new bucket). If this function is called when eventTimeNs is greater than the current 302 * bucket's end timestamp, than we flush up to the end of the latest full bucket; otherwise, 303 * we assume that we want to flush a partial bucket. The bucket start timestamp and bucket 304 * number are not changed by this function. This method should only be called by 305 * flushIfNeededLocked or flushLocked or the app upgrade handler; the caller MUST update the 306 * bucket timestamp and bucket number as needed. 307 */ flushCurrentBucketLocked(const int64_t & eventTimeNs,const int64_t & nextBucketStartTimeNs)308 virtual void flushCurrentBucketLocked(const int64_t& eventTimeNs, 309 const int64_t& nextBucketStartTimeNs) {}; 310 311 /** 312 * Flushes all the data including the current partial bucket. 313 */ flushLocked(const int64_t & eventTimeNs)314 virtual void flushLocked(const int64_t& eventTimeNs) { 315 flushIfNeededLocked(eventTimeNs); 316 flushCurrentBucketLocked(eventTimeNs, eventTimeNs); 317 }; 318 319 /* 320 * Individual metrics can implement their own business logic here. All pre-processing is done. 321 * 322 * [matcherIndex]: the index of the matcher which matched this event. This is interesting to 323 * DurationMetric, because it has start/stop/stop_all 3 matchers. 324 * [eventKey]: the extracted dimension key for the final output. if the metric doesn't have 325 * dimensions, it will be DEFAULT_DIMENSION_KEY 326 * [conditionKey]: the keys of conditions which should be used to query the condition for this 327 * target event (from MetricConditionLink). This is passed to individual metrics 328 * because DurationMetric needs it to be cached. 329 * [condition]: whether condition is met. If condition is sliced, this is the result coming from 330 * query with ConditionWizard; If condition is not sliced, this is the 331 * nonSlicedCondition. 332 * [event]: the log event, just in case the metric needs its data, e.g., EventMetric. 333 */ 334 virtual void onMatchedLogEventInternalLocked( 335 const size_t matcherIndex, const MetricDimensionKey& eventKey, 336 const ConditionKey& conditionKey, bool condition, const LogEvent& event, 337 const map<int, HashableDimensionKey>& statePrimaryKeys) = 0; 338 339 // Consume the parsed stats log entry that already matched the "what" of the metric. 340 virtual void onMatchedLogEventLocked(const size_t matcherIndex, const LogEvent& event); 341 virtual void onConditionChangedLocked(const bool condition, const int64_t eventTime) = 0; 342 virtual void onSlicedConditionMayChangeLocked(bool overallCondition, 343 const int64_t eventTime) = 0; 344 virtual void onDumpReportLocked(const int64_t dumpTimeNs, 345 const bool include_current_partial_bucket, 346 const bool erase_data, 347 const DumpLatency dumpLatency, 348 std::set<string> *str_set, 349 android::util::ProtoOutputStream* protoOutput) = 0; 350 virtual void clearPastBucketsLocked(const int64_t dumpTimeNs) = 0; prepareFirstBucketLocked()351 virtual void prepareFirstBucketLocked(){}; 352 virtual size_t byteSizeLocked() const = 0; 353 virtual void dumpStatesLocked(FILE* out, bool verbose) const = 0; 354 virtual void dropDataLocked(const int64_t dropTimeNs) = 0; 355 void loadActiveMetricLocked(const ActiveMetric& activeMetric, int64_t currentTimeNs); 356 void activateLocked(int activationTrackerIndex, int64_t elapsedTimestampNs); 357 void cancelEventActivationLocked(int deactivationTrackerIndex); 358 359 bool evaluateActiveStateLocked(int64_t elapsedTimestampNs); 360 onActiveStateChangedLocked(const int64_t & eventTimeNs)361 virtual void onActiveStateChangedLocked(const int64_t& eventTimeNs) { 362 if (!mIsActive) { 363 flushLocked(eventTimeNs); 364 } 365 } 366 isActiveLocked()367 inline bool isActiveLocked() const { 368 return mIsActive; 369 } 370 371 // Convenience to compute the current bucket's end time, which is always aligned with the 372 // start time of the metric. getCurrentBucketEndTimeNs()373 int64_t getCurrentBucketEndTimeNs() const { 374 return mTimeBaseNs + (mCurrentBucketNum + 1) * mBucketSizeNs; 375 } 376 getBucketNumFromEndTimeNs(const int64_t endNs)377 int64_t getBucketNumFromEndTimeNs(const int64_t endNs) { 378 return (endNs - mTimeBaseNs) / mBucketSizeNs - 1; 379 } 380 381 // Query StateManager for original state value using the queryKey. 382 // The field and value are output. 383 void queryStateValue(const int32_t atomId, const HashableDimensionKey& queryKey, 384 FieldValue* value); 385 386 // If a state map exists for the given atom, replace the original state 387 // value with the group id mapped to the value. 388 // If no state map exists, keep the original state value. 389 void mapStateValue(const int32_t atomId, FieldValue* value); 390 391 // Returns a HashableDimensionKey with unknown state value for each state 392 // atom. 393 HashableDimensionKey getUnknownStateKey(); 394 395 DropEvent buildDropEvent(const int64_t dropTimeNs, const BucketDropReason reason); 396 397 // Returns true if the number of drop events in the current bucket has 398 // exceeded the maximum number allowed, which is currently capped at 10. 399 bool maxDropEventsReached(); 400 401 const int64_t mMetricId; 402 403 const ConfigKey mConfigKey; 404 405 // The time when this metric producer was first created. The end time for the current bucket 406 // can be computed from this based on mCurrentBucketNum. 407 int64_t mTimeBaseNs; 408 409 // Start time may not be aligned with the start of statsd if there is an app upgrade in the 410 // middle of a bucket. 411 int64_t mCurrentBucketStartTimeNs; 412 413 // Used by anomaly detector to track which bucket we are in. This is not sent with the produced 414 // report. 415 int64_t mCurrentBucketNum; 416 417 int64_t mBucketSizeNs; 418 419 ConditionState mCondition; 420 421 int mConditionTrackerIndex; 422 423 bool mConditionSliced; 424 425 sp<ConditionWizard> mWizard; 426 427 bool mContainANYPositionInDimensionsInWhat; 428 429 bool mSliceByPositionALL; 430 431 vector<Matcher> mDimensionsInWhat; // The dimensions_in_what defined in statsd_config 432 433 // True iff the metric to condition links cover all dimension fields in the condition tracker. 434 // This field is always false for combinational condition trackers. 435 bool mHasLinksToAllConditionDimensionsInTracker; 436 437 std::vector<Metric2Condition> mMetric2ConditionLinks; 438 439 std::vector<sp<AnomalyTracker>> mAnomalyTrackers; 440 441 mutable std::mutex mMutex; 442 443 // When the metric producer has multiple activations, these activations are ORed to determine 444 // whether the metric producer is ready to generate metrics. 445 std::unordered_map<int, std::shared_ptr<Activation>> mEventActivationMap; 446 447 // Maps index of atom matcher for deactivation to a list of Activation structs. 448 std::unordered_map<int, std::vector<std::shared_ptr<Activation>>> mEventDeactivationMap; 449 450 bool mIsActive; 451 452 // The slice_by_state atom ids defined in statsd_config. 453 const std::vector<int32_t> mSlicedStateAtoms; 454 455 // Maps atom ids and state values to group_ids (<atom_id, <value, group_id>>). 456 const std::unordered_map<int32_t, std::unordered_map<int, int64_t>> mStateGroupMap; 457 458 // MetricStateLinks defined in statsd_config that link fields in the state 459 // atom to fields in the "what" atom. 460 std::vector<Metric2State> mMetric2StateLinks; 461 462 SkippedBucket mCurrentSkippedBucket; 463 // Buckets that were invalidated and had their data dropped. 464 std::vector<SkippedBucket> mSkippedBuckets; 465 466 FRIEND_TEST(CountMetricE2eTest, TestSlicedState); 467 FRIEND_TEST(CountMetricE2eTest, TestSlicedStateWithMap); 468 FRIEND_TEST(CountMetricE2eTest, TestMultipleSlicedStates); 469 FRIEND_TEST(CountMetricE2eTest, TestSlicedStateWithPrimaryFields); 470 FRIEND_TEST(CountMetricE2eTest, TestInitialConditionChanges); 471 472 FRIEND_TEST(DurationMetricE2eTest, TestOneBucket); 473 FRIEND_TEST(DurationMetricE2eTest, TestTwoBuckets); 474 FRIEND_TEST(DurationMetricE2eTest, TestWithActivation); 475 FRIEND_TEST(DurationMetricE2eTest, TestWithCondition); 476 FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedCondition); 477 FRIEND_TEST(DurationMetricE2eTest, TestWithActivationAndSlicedCondition); 478 FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedState); 479 FRIEND_TEST(DurationMetricE2eTest, TestWithConditionAndSlicedState); 480 FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedStateMapped); 481 FRIEND_TEST(DurationMetricE2eTest, TestSlicedStatePrimaryFieldsNotSubsetDimInWhat); 482 FRIEND_TEST(DurationMetricE2eTest, TestWithSlicedStatePrimaryFieldsSubset); 483 484 FRIEND_TEST(MetricActivationE2eTest, TestCountMetric); 485 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithOneDeactivation); 486 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoDeactivations); 487 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithSameDeactivation); 488 FRIEND_TEST(MetricActivationE2eTest, TestCountMetricWithTwoMetricsTwoDeactivations); 489 490 FRIEND_TEST(StatsLogProcessorTest, TestActiveConfigMetricDiskWriteRead); 491 FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBoot); 492 FRIEND_TEST(StatsLogProcessorTest, TestActivationOnBootMultipleActivations); 493 FRIEND_TEST(StatsLogProcessorTest, 494 TestActivationOnBootMultipleActivationsDifferentActivationTypes); 495 FRIEND_TEST(StatsLogProcessorTest, TestActivationsPersistAcrossSystemServerRestart); 496 497 FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState); 498 FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState_WithDimensions); 499 FRIEND_TEST(ValueMetricE2eTest, TestInitWithSlicedState_WithIncorrectDimensions); 500 FRIEND_TEST(ValueMetricE2eTest, TestInitialConditionChanges); 501 502 FRIEND_TEST(MetricsManagerTest, TestInitialConditions); 503 }; 504 505 } // namespace statsd 506 } // namespace os 507 } // namespace android 508 #endif // METRIC_PRODUCER_H 509