1 /* 2 * Copyright 2017, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #pragma once 17 18 #include <gtest/gtest_prod.h> 19 #include <log/log_time.h> 20 #include <src/guardrail/stats_log_enums.pb.h> 21 22 #include <list> 23 #include <mutex> 24 #include <string> 25 #include <unordered_map> 26 #include <vector> 27 28 #include "config/ConfigKey.h" 29 #include "logd/logevent_util.h" 30 31 namespace android { 32 namespace os { 33 namespace statsd { 34 35 struct InvalidConfigReason { 36 InvalidConfigReasonEnum reason; 37 std::optional<int64_t> metricId; 38 std::optional<int64_t> stateId; 39 std::optional<int64_t> alertId; 40 std::optional<int64_t> alarmId; 41 std::optional<int64_t> subscriptionId; 42 std::vector<int64_t> matcherIds; 43 std::vector<int64_t> conditionIds; InvalidConfigReasonInvalidConfigReason44 InvalidConfigReason(){}; InvalidConfigReasonInvalidConfigReason45 InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){}; InvalidConfigReasonInvalidConfigReason46 InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId) 47 : reason(reason), metricId(metricId){}; 48 bool operator==(const InvalidConfigReason& other) const { 49 return (this->reason == other.reason) && (this->metricId == other.metricId) && 50 (this->stateId == other.stateId) && (this->alertId == other.alertId) && 51 (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) && 52 (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds); 53 } 54 }; 55 56 typedef struct { 57 int64_t insertError = 0; 58 int64_t tableCreationError = 0; 59 int64_t tableDeletionError = 0; 60 std::list<int64_t> flushLatencyNs; 61 int64_t categoryChangedCount = 0; 62 } RestrictedMetricStats; 63 64 struct DumpReportStats { DumpReportStatsDumpReportStats65 DumpReportStats(int32_t dumpReportSec, int32_t dumpReportSize, int32_t reportNumber) 66 : mDumpReportTimeSec(dumpReportSec), 67 mDumpReportSizeBytes(dumpReportSize), 68 mDumpReportNumber(reportNumber) { 69 } 70 int32_t mDumpReportTimeSec = 0; 71 int32_t mDumpReportSizeBytes = 0; 72 int32_t mDumpReportNumber = 0; 73 }; 74 75 struct ConfigStats { 76 int32_t uid; 77 int64_t id; 78 int32_t creation_time_sec; 79 int32_t deletion_time_sec = 0; 80 int32_t reset_time_sec = 0; 81 int32_t metric_count; 82 int32_t condition_count; 83 int32_t matcher_count; 84 int32_t alert_count; 85 bool is_valid; 86 bool device_info_table_creation_failed = false; 87 int32_t db_corrupted_count = 0; 88 int32_t db_deletion_stat_failed = 0; 89 int32_t db_deletion_size_exceeded_limit = 0; 90 int32_t db_deletion_config_invalid = 0; 91 int32_t db_deletion_too_old = 0; 92 int32_t db_deletion_config_removed = 0; 93 int32_t db_deletion_config_updated = 0; 94 // Stores the number of ConfigMetadataProvider promotion failures 95 int32_t config_metadata_provider_promote_failure = 0; 96 97 // Stores reasons for why config is valid or not 98 std::optional<InvalidConfigReason> reason; 99 100 std::list<int32_t> broadcast_sent_time_sec; 101 102 // Times at which this config is activated. 103 std::list<int32_t> activation_time_sec; 104 105 // Times at which this config is deactivated. 106 std::list<int32_t> deactivation_time_sec; 107 108 std::list<int32_t> data_drop_time_sec; 109 // Number of bytes dropped at corresponding time. 110 std::list<int64_t> data_drop_bytes; 111 112 std::list<DumpReportStats> dump_report_stats; 113 114 // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount. 115 std::map<const int64_t, int> matcher_stats; 116 117 // Stores the number of output tuple of condition trackers when it's bigger than 118 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 119 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 120 std::map<const int64_t, int> condition_stats; 121 122 // Stores the number of output tuple of metric producers when it's bigger than 123 // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1, 124 // it means some data has been dropped. The map size is capped by kMaxConfigCount. 125 std::map<const int64_t, int> metric_stats; 126 127 // Stores the max number of output tuple of dimensions in condition across dimensions in what 128 // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is 129 // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by 130 // kMaxConfigCount. 131 std::map<const int64_t, int> metric_dimension_in_condition_stats; 132 133 // Stores the number of times an anomaly detection alert has been declared. 134 // The map size is capped by kMaxConfigCount. 135 std::map<const int64_t, int> alert_stats; 136 137 // Stores the config ID for each sub-config used. 138 std::list<std::pair<const int64_t, const int32_t>> annotations; 139 140 // Maps metric ID of restricted metric to its stats. 141 std::map<int64_t, RestrictedMetricStats> restricted_metric_stats; 142 143 std::list<int64_t> total_flush_latency_ns; 144 145 // Stores the last 20 timestamps for computing sqlite db size. 146 std::list<int64_t> total_db_size_timestamps; 147 148 // Stores the last 20 sizes of the sqlite db. 149 std::list<int64_t> total_db_sizes; 150 }; 151 152 struct UidMapStats { 153 int32_t changes = 0; 154 int32_t bytes_used = 0; 155 int32_t dropped_changes = 0; 156 int32_t deleted_apps = 0; 157 }; 158 159 struct SubscriptionStats { 160 int32_t pushed_atom_count = 0; 161 int32_t pulled_atom_count = 0; 162 int32_t start_time_sec = 0; 163 int32_t end_time_sec = 0; 164 int32_t flush_count = 0; 165 }; 166 167 // Keeps track of stats of statsd. 168 // Single instance shared across the process. All public methods are thread safe. 169 class StatsdStats { 170 public: 171 static StatsdStats& getInstance(); ~StatsdStats()172 ~StatsdStats(){}; 173 174 const static int kDimensionKeySizeSoftLimit = 500; 175 static constexpr int kDimensionKeySizeHardLimit = 800; 176 static constexpr int kDimensionKeySizeHardLimitMin = 800; 177 static constexpr int kDimensionKeySizeHardLimitMax = 3000; 178 179 // Per atom dimension key size limit 180 static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap; 181 182 const static int kMaxConfigCountPerUid = 20; 183 const static int kMaxAlertCountPerConfig = 200; 184 const static int kMaxConditionCountPerConfig = 500; 185 const static int kMaxMetricCountPerConfig = 3000; 186 const static int kMaxMatcherCountPerConfig = 3500; 187 188 // The max number of old config stats we keep. 189 const static int kMaxIceBoxSize = 20; 190 191 const static int kMaxLoggerErrors = 20; 192 193 const static int kMaxSystemServerRestarts = 20; 194 195 const static int kMaxTimestampCount = 20; 196 197 const static int kMaxLogSourceCount = 150; 198 199 const static int kMaxPullAtomPackages = 100; 200 201 const static int kMaxRestrictedMetricQueryCount = 20; 202 203 const static int kMaxRestrictedMetricFlushLatencyCount = 20; 204 205 const static int kMaxRestrictedConfigFlushLatencyCount = 20; 206 207 const static int kMaxRestrictedConfigDbSizeCount = 20; 208 209 // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd 210 // drops the metrics data in memory. 211 static const size_t kDefaultMaxMetricsBytesPerConfig = 2 * 1024 * 1024; 212 213 // Hard limit for custom memory allowed for storing metrics per configuration. 214 static const size_t kHardMaxMetricsBytesPerConfig = 20 * 1024 * 1024; 215 216 // Max memory allowed for storing metrics per configuration before triggering a intent to fetch 217 // data. 218 static const size_t kHardMaxTriggerGetDataBytes = 10 * 1024 * 1024; 219 220 // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the 221 // data subscriber that it's time to call getData. 222 static const size_t kDefaultBytesPerConfigTriggerGetData = 192 * 1024; 223 224 // Soft memory limit per restricted configuration. Once this limit is exceeded, 225 // we begin flush in-memory restricted metrics to database. 226 static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024; 227 228 // Cap the UID map's memory usage to this. This should be fairly high since the UID information 229 // is critical for understanding the metrics. 230 const static size_t kMaxBytesUsedUidMap = 50 * 1024; 231 232 // The number of deleted apps that are stored in the uid map. 233 const static int kMaxDeletedAppsInUidMap = 100; 234 235 /* Minimum period between two broadcasts in nanoseconds. */ 236 static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC; 237 238 /* Min period between two checks of byte size per config key in nanoseconds. */ 239 static const int64_t kMinByteSizeCheckPeriodNs = 1 * 60 * NS_PER_SEC; 240 241 // Min period between two checks of byte size per config key in nanoseconds for V2 memory 242 // calculations. 243 static const int64_t kMinByteSizeV2CheckPeriodNs = 5 * 60 * NS_PER_SEC; 244 245 /* Min period between two checks of restricted metrics TTLs. */ 246 static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC; 247 248 /* Min period between two flush operations of restricted metrics. */ 249 static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC; 250 251 /* Min period between two db guardrail check operations of restricted metrics. */ 252 static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC; 253 254 /* Minimum period between two activation broadcasts in nanoseconds. */ 255 static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC; 256 257 // Maximum age (30 days) that files on disk can exist in seconds. 258 static const int kMaxAgeSecond = 60 * 60 * 24 * 30; 259 260 // Maximum age (2 days) that local history files on disk can exist in seconds. 261 static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2; 262 263 // Maximum number of files (1000) that can be in stats directory on disk. 264 static const int kMaxFileNumber = 1000; 265 266 // Maximum size of all files that can be written to stats directory on disk. 267 static const int kMaxFileSize = 50 * 1024 * 1024; 268 269 // How long to try to clear puller cache from last time 270 static const long kPullerCacheClearIntervalSec = 1; 271 272 // Max time to do a pull. 273 static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC; 274 275 // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId. 276 static const int kMaxNonPlatformPushedAtoms = 600; 277 278 // Maximum number of pushed atoms error statsd stats will track. 279 static const int kMaxPushedAtomErrorStatsSize = 100; 280 281 // Maximum number of socket loss stats to track. 282 static const int kMaxSocketLossStatsSize = 50; 283 284 // Maximum atom id value that we consider a platform pushed atom. 285 // This should be updated once highest pushed atom id in atoms.proto approaches this value. 286 static const int kMaxPushedAtomId = 1500; 287 288 // Atom id that is the start of the pulled atoms. 289 static const int kPullAtomStartTag = 10000; 290 291 // Atom id that is the start of vendor atoms. 292 static const int kVendorAtomStartTag = 100000; 293 294 // Vendor pulled atom start id. 295 static const int32_t kVendorPulledAtomStartTag = 150000; 296 297 // Beginning of range for timestamp truncation. 298 static const int32_t kTimestampTruncationStartTag = 300000; 299 300 // End of range for timestamp truncation. 301 static const int32_t kTimestampTruncationEndTag = 304999; 302 303 // Max accepted atom id. 304 static const int32_t kMaxAtomTag = 200000; 305 306 static const int64_t kInt64Max = 0x7fffffffffffffffLL; 307 308 static const int32_t kMaxLoggedBucketDropEvents = 10; 309 310 static const int32_t kNumBinsInSocketBatchReadHistogram = 30; 311 static const int32_t kLargeBatchReadThreshold = 1000; 312 static const int32_t kMaxLargeBatchReadSize = 20; 313 static const int32_t kMaxLargeBatchReadAtomThreshold = 50; 314 315 /** 316 * Report a new config has been received and report the static stats about the config. 317 * 318 * The static stats include: the count of metrics, conditions, matchers, and alerts. 319 * If the config is not valid, this config stats will be put into icebox immediately. 320 */ 321 void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount, 322 int matchersCount, int alertCount, 323 const std::list<std::pair<const int64_t, const int32_t>>& annotations, 324 const std::optional<InvalidConfigReason>& reason); 325 /** 326 * Report a config has been removed. 327 */ 328 void noteConfigRemoved(const ConfigKey& key); 329 /** 330 * Report a config has been reset when ttl expires. 331 */ 332 void noteConfigReset(const ConfigKey& key); 333 334 /** 335 * Report a broadcast has been sent to a config owner to collect the data. 336 */ 337 void noteBroadcastSent(const ConfigKey& key); 338 339 /** 340 * Report that a config has become activated or deactivated. 341 * This can be different from whether or not a broadcast is sent if the 342 * guardrail prevented the broadcast from being sent. 343 */ 344 void noteActiveStatusChanged(const ConfigKey& key, bool activate); 345 346 /** 347 * Report a config's metrics data has been dropped. 348 */ 349 void noteDataDropped(const ConfigKey& key, const size_t totalBytes); 350 351 /** 352 * Report metrics data report has been sent. 353 * 354 * The report may be requested via StatsManager API, or through adb cmd. 355 */ 356 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, 357 const int32_t reportNumber); 358 359 /** 360 * Report failure in creating the device info metadata table for restricted configs. 361 */ 362 void noteDeviceInfoTableCreationFailed(const ConfigKey& key); 363 364 /** 365 * Report db corruption for restricted configs. 366 */ 367 void noteDbCorrupted(const ConfigKey& key); 368 369 /** 370 * Report db exceeded the size limit for restricted configs. 371 */ 372 void noteDbSizeExceeded(const ConfigKey& key); 373 374 /** 375 * Report db size check with stat for restricted configs failed. 376 */ 377 void noteDbStatFailed(const ConfigKey& key); 378 379 /** 380 * Report restricted config is invalid. 381 */ 382 void noteDbConfigInvalid(const ConfigKey& key); 383 384 /** 385 * Report db is too old for restricted configs. 386 */ 387 void noteDbTooOld(const ConfigKey& key); 388 389 /** 390 * Report db was deleted due to config removal. 391 */ 392 void noteDbDeletionConfigRemoved(const ConfigKey& key); 393 394 /** 395 * Report db was deleted due to config update. 396 */ 397 void noteDbDeletionConfigUpdated(const ConfigKey& key); 398 399 /** 400 * Reports that the promotion for ConfigMetadataProvider failed. 401 */ 402 void noteConfigMetadataProviderPromotionFailed(const ConfigKey& key); 403 404 /** 405 * Report the size of output tuple of a condition. 406 * 407 * Note: only report when the condition has an output dimension, and the tuple 408 * count > kDimensionKeySizeSoftLimit. 409 * 410 * [key]: The config key that this condition belongs to. 411 * [id]: The id of the condition. 412 * [size]: The output tuple size. 413 */ 414 void noteConditionDimensionSize(const ConfigKey& key, int64_t id, int size); 415 416 /** 417 * Report the size of output tuple of a metric. 418 * 419 * Note: only report when the metric has an output dimension, and the tuple 420 * count > kDimensionKeySizeSoftLimit. 421 * 422 * [key]: The config key that this metric belongs to. 423 * [id]: The id of the metric. 424 * [size]: The output tuple size. 425 */ 426 void noteMetricDimensionSize(const ConfigKey& key, int64_t id, int size); 427 428 /** 429 * Report the max size of output tuple of dimension in condition across dimensions in what. 430 * 431 * Note: only report when the metric has an output dimension in condition, and the max tuple 432 * count > kDimensionKeySizeSoftLimit. 433 * 434 * [key]: The config key that this metric belongs to. 435 * [id]: The id of the metric. 436 * [size]: The output tuple size. 437 */ 438 void noteMetricDimensionInConditionSize(const ConfigKey& key, int64_t id, int size); 439 440 /** 441 * Report a matcher has been matched. 442 * 443 * [key]: The config key that this matcher belongs to. 444 * [id]: The id of the matcher. 445 */ 446 void noteMatcherMatched(const ConfigKey& key, int64_t id); 447 448 /** 449 * Report that an anomaly detection alert has been declared. 450 * 451 * [key]: The config key that this alert belongs to. 452 * [id]: The id of the alert. 453 */ 454 void noteAnomalyDeclared(const ConfigKey& key, int64_t id); 455 456 /** 457 * Report an atom event has been logged. 458 */ 459 void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped); 460 461 /** 462 * Report that statsd modified the anomaly alarm registered with StatsCompanionService. 463 */ 464 void noteRegisteredAnomalyAlarmChanged(); 465 466 /** 467 * Report that statsd modified the periodic alarm registered with StatsCompanionService. 468 */ 469 void noteRegisteredPeriodicAlarmChanged(); 470 471 /** 472 * Records the number of delta entries that are being dropped from the uid map. 473 */ 474 void noteUidMapDropped(int deltas); 475 476 /** 477 * Records that an app was deleted (from statsd's map). 478 */ 479 void noteUidMapAppDeletionDropped(); 480 481 /** 482 * Updates the number of changes currently stored in the uid map. 483 */ 484 void setUidMapChanges(int changes); 485 void setCurrentUidMapMemory(int bytes); 486 487 /* 488 * Updates minimum interval between pulls for an pulled atom. 489 */ 490 void updateMinPullIntervalSec(int pullAtomId, long intervalSec); 491 492 /* 493 * Notes an atom is pulled. 494 */ 495 void notePull(int pullAtomId); 496 497 /* 498 * Notes an atom is served from puller cache. 499 */ 500 void notePullFromCache(int pullAtomId); 501 502 /* 503 * Notify data error for pulled atom. 504 */ 505 void notePullDataError(int pullAtomId); 506 507 /* 508 * Records time for actual pulling, not including those served from cache and not including 509 * statsd processing delays. 510 */ 511 void notePullTime(int pullAtomId, int64_t pullTimeNs); 512 513 /* 514 * Records pull delay for a pulled atom, including those served from cache and including statsd 515 * processing delays. 516 */ 517 void notePullDelay(int pullAtomId, int64_t pullDelayNs); 518 519 /* 520 * Records pull exceeds timeout for the puller. 521 */ 522 void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis); 523 524 /* 525 * Records pull exceeds max delay for a metric. 526 */ 527 void notePullExceedMaxDelay(int pullAtomId); 528 529 /* 530 * Records when system server restarts. 531 */ 532 void noteSystemServerRestart(int32_t timeSec); 533 534 /** 535 * Records statsd skipped an event. 536 */ 537 void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError, 538 int32_t lastAtomTag, int32_t uid, int32_t pid); 539 540 /** 541 * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if 542 * the pull timed out, or if the outgoing binder call failed. 543 * This count will only increment if the puller was actually invoked. 544 * 545 * It does not include a pull not occurring due to not finding the appropriate 546 * puller. These cases are covered in other counts. 547 */ 548 void notePullFailed(int atomId); 549 550 /** 551 * Records that the pull of an atom has failed due to not having a uid provider. 552 */ 553 void notePullUidProviderNotFound(int atomId); 554 555 /** 556 * Records that the pull of an atom has failed due not finding a puller registered by a 557 * trusted uid. 558 */ 559 void notePullerNotFound(int atomId); 560 561 /** 562 * Records that the pull has failed due to the outgoing binder call failing. 563 */ 564 void notePullBinderCallFailed(int atomId); 565 566 /** 567 * A pull with no data occurred 568 */ 569 void noteEmptyData(int atomId); 570 571 /** 572 * Records that a puller callback for the given atomId was registered or unregistered. 573 * 574 * @param registered True if the callback was registered, false if was unregistered. 575 */ 576 void notePullerCallbackRegistrationChanged(int atomId, bool registered); 577 578 /** 579 * Hard limit was reached in the cardinality of an atom 580 */ 581 void noteHardDimensionLimitReached(int64_t metricId); 582 583 /** 584 * A log event was too late, arrived in the wrong bucket and was skipped 585 */ 586 void noteLateLogEventSkipped(int64_t metricId); 587 588 /** 589 * Buckets were skipped as time elapsed without any data for them 590 */ 591 void noteSkippedForwardBuckets(int64_t metricId); 592 593 /** 594 * An unsupported value type was received 595 */ 596 void noteBadValueType(int64_t metricId); 597 598 /** 599 * Buckets were dropped due to reclaim memory. 600 */ 601 void noteBucketDropped(int64_t metricId); 602 603 /** 604 * A condition change was too late, arrived in the wrong bucket and was skipped 605 */ 606 void noteConditionChangeInNextBucket(int64_t metricId); 607 608 /** 609 * A bucket has been tagged as invalid. 610 */ 611 void noteInvalidatedBucket(int64_t metricId); 612 613 /** 614 * Tracks the total number of buckets (include skipped/invalid buckets). 615 */ 616 void noteBucketCount(int64_t metricId); 617 618 /** 619 * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and 620 * the end of the bucket. 621 */ 622 void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs); 623 624 /** 625 * Number of buckets with unknown condition. 626 */ 627 void noteBucketUnknownCondition(int64_t metricId); 628 629 /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp 630 * in the queue */ 631 void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped); 632 633 /* Notes queue max size seen so far and associated timestamp */ 634 void noteEventQueueSize(int32_t size, int64_t eventTimestampNs); 635 636 /** 637 * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast 638 * should have been sent, but instead was skipped due to hitting the guardrail. 639 */ 640 void noteActivationBroadcastGuardrailHit(const int uid); 641 642 /** 643 * Reports that an atom is erroneous or cannot be parsed successfully by 644 * statsd. An atom tag of 0 indicates that the client did not supply the 645 * atom id within the encoding. 646 * 647 * For pushed atoms only, this call should be preceded by a call to 648 * noteAtomLogged. 649 */ 650 void noteAtomError(int atomTag, bool pull = false); 651 652 /** Report query of restricted metric succeed **/ 653 void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage, 654 const std::optional<int32_t> configUid, 655 const int32_t callingUid, int64_t queryLatencyNs); 656 657 /** Report query of restricted metric failed **/ 658 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 659 const std::optional<int32_t> configUid, 660 const int32_t callingUid, const InvalidQueryReason reason); 661 662 /** Report query of restricted metric failed along with an error string **/ 663 void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage, 664 const std::optional<int32_t> configUid, 665 const int32_t callingUid, const InvalidQueryReason reason, 666 const string& error); 667 668 // Reports that a restricted metric fails to be inserted to database. 669 void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId); 670 671 // Reports that a restricted metric fails to create table in database. 672 void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, int64_t metricId); 673 674 // Reports that a restricted metric fails to delete table in database. 675 void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, int64_t metricId); 676 677 // Reports the time it takes for a restricted metric to flush the data to the database. 678 void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, int64_t metricId, 679 const int64_t flushLatencyNs); 680 681 // Reports that a restricted metric had a category change. 682 void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, int64_t metricId); 683 684 // Reports the time is takes to flush a restricted config to the database. 685 void noteRestrictedConfigFlushLatency(const ConfigKey& configKey, 686 const int64_t totalFlushLatencyNs); 687 688 // Reports the size of the internal sqlite db. 689 void noteRestrictedConfigDbSize(const ConfigKey& configKey, int64_t elapsedTimeNs, 690 const int64_t dbSize); 691 692 /** 693 * Records libstatssocket was not able to write into socket. 694 */ 695 void noteAtomSocketLoss(const SocketLossInfo& lossInfo); 696 697 /** 698 * Report a new subscription has started and report the static stats about the subscription 699 * config. 700 * 701 * The static stats include: the count of pushed atoms and pulled atoms. 702 */ 703 void noteSubscriptionStarted(int subId, int32_t pushedAtomCount, int32_t pulledAtomCount); 704 705 /** 706 * Report an existing subscription has ended. 707 */ 708 void noteSubscriptionEnded(int subId); 709 710 /** 711 * Report an existing subscription was flushed. 712 */ 713 void noteSubscriptionFlushed(int subId); 714 715 /** 716 * Report an atom was pulled for a subscription. 717 */ 718 void noteSubscriptionAtomPulled(int atomId); 719 720 /** 721 * Report subscriber pull thread wakeup. 722 */ 723 void noteSubscriptionPullThreadWakeup(); 724 725 void noteBatchSocketRead(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 726 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 727 const std::unordered_map<int32_t, int32_t>& atomCounts); 728 729 /** 730 * Reset the historical stats. Including all stats in icebox, and the tracked stats about 731 * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue 732 * to collect stats after reset() has been called. 733 */ 734 void reset(); 735 736 /** 737 * Output the stats in protobuf binary format to [buffer]. 738 * 739 * [reset]: whether to clear the historical stats after the call. 740 */ 741 void dumpStats(std::vector<uint8_t>* buffer, bool reset); 742 743 /** 744 * Output statsd stats in human readable format to [out] file descriptor. 745 */ 746 void dumpStats(int outFd) const; 747 748 /** 749 * Returns true if dimension guardrail has been hit since boot for given metric. 750 */ 751 bool hasHitDimensionGuardrail(int64_t metricId) const; 752 753 /** 754 * Return soft and hard atom key dimension size limits as an std::pair. 755 */ 756 static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(int atomId, 757 size_t defaultHardLimit); 758 clampDimensionKeySizeLimit(int dimLimit)759 inline static int clampDimensionKeySizeLimit(int dimLimit) { 760 return std::clamp(dimLimit, kDimensionKeySizeHardLimitMin, kDimensionKeySizeHardLimitMax); 761 } 762 763 /** 764 * Return the unique identifier for the statsd stats report. This id is 765 * reset on boot. 766 */ getStatsdStatsId()767 inline int32_t getStatsdStatsId() const { 768 return mStatsdStatsId; 769 } 770 771 /** 772 * Returns true if there is recorded event queue overflow 773 */ 774 bool hasEventQueueOverflow() const; 775 776 typedef std::vector<std::pair<int32_t, int32_t>> QueueOverflowAtomsStats; 777 QueueOverflowAtomsStats getQueueOverflowAtomsStats() const; 778 779 /** 780 * Returns true if there is recorded socket loss 781 */ 782 bool hasSocketLoss() const; 783 784 typedef struct PullTimeoutMetadata { 785 int64_t pullTimeoutUptimeMillis; 786 int64_t pullTimeoutElapsedMillis; PullTimeoutMetadataPullTimeoutMetadata787 PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis) 788 : pullTimeoutUptimeMillis(uptimeMillis), 789 pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */ 790 } 791 } PullTimeoutMetadata; 792 793 typedef struct { 794 long totalPull = 0; 795 long totalPullFromCache = 0; 796 long minPullIntervalSec = LONG_MAX; 797 int64_t avgPullTimeNs = 0; 798 int64_t maxPullTimeNs = 0; 799 long numPullTime = 0; 800 int64_t avgPullDelayNs = 0; 801 int64_t maxPullDelayNs = 0; 802 long numPullDelay = 0; 803 long dataError = 0; 804 long pullTimeout = 0; 805 long pullExceedMaxDelay = 0; 806 long pullFailed = 0; 807 long pullUidProviderNotFound = 0; 808 long pullerNotFound = 0; 809 long emptyData = 0; 810 long registeredCount = 0; 811 long unregisteredCount = 0; 812 int32_t atomErrorCount = 0; 813 long binderCallFailCount = 0; 814 std::list<PullTimeoutMetadata> pullTimeoutMetadata; 815 int32_t subscriptionPullCount = 0; 816 } PulledAtomStats; 817 818 typedef struct { 819 long hardDimensionLimitReached = 0; 820 long lateLogEventSkipped = 0; 821 long skippedForwardBuckets = 0; 822 long badValueType = 0; 823 long conditionChangeInNextBucket = 0; 824 long invalidatedBucket = 0; 825 long bucketDropped = 0; 826 int64_t minBucketBoundaryDelayNs = 0; 827 int64_t maxBucketBoundaryDelayNs = 0; 828 long bucketUnknownCondition = 0; 829 long bucketCount = 0; 830 } AtomMetricStats; 831 832 private: 833 StatsdStats(); 834 835 mutable std::mutex mLock; 836 837 int32_t mStartTimeSec; 838 839 // Random id set using rand() during the initialization. Used to uniquely 840 // identify a session. This is more reliable than mStartTimeSec due to the 841 // unreliable nature of wall clock times. 842 const int32_t mStatsdStatsId; 843 844 // Track the number of dropped entries used by the uid map. 845 UidMapStats mUidMapStats; 846 847 // The stats about the configs that are still in use. 848 // The map size is capped by kMaxConfigCount. 849 std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats; 850 851 // Stores the stats for the configs that are no longer in use. 852 // The size of the vector is capped by kMaxIceBoxSize. 853 std::list<std::shared_ptr<ConfigStats>> mIceBox; 854 855 // Stores the number of times a pushed atom is logged and skipped (if skipped). 856 // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms 857 // out of that range will be put in mNonPlatformPushedAtomStats. 858 // This is a vector, not a map because it will be accessed A LOT -- for each stats log. 859 struct PushedAtomStats { 860 int logCount = 0; 861 int skipCount = 0; 862 }; 863 864 std::vector<PushedAtomStats> mPushedAtomStats; 865 866 // Stores the number of times a pushed atom is logged and skipped for atom ids above 867 // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms. 868 std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats; 869 870 // Stores the number of times a pushed atom is dropped due to queue overflow event. 871 // We do not expect it will happen too often so the map is preferable vs pre-allocated vector 872 // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms. 873 std::unordered_map<int, int> mPushedAtomDropsStats; 874 875 // Maps PullAtomId to its stats. The size is capped by the puller atom counts. 876 std::map<int, PulledAtomStats> mPulledAtomStats; 877 878 // Stores the number of times a pushed atom was logged erroneously. The 879 // corresponding counts for pulled atoms are stored in PulledAtomStats. 880 // The max size of this map is kMaxPushedAtomErrorStatsSize. 881 std::map<int, int> mPushedAtomErrorStats; 882 883 // Stores the number of times a pushed atom was lost due to socket error. 884 // Represents counter per uid per tag per error with indication when the loss event was observed 885 // first & last time. 886 struct SocketLossStats { SocketLossStatsSocketLossStats887 SocketLossStats(int32_t uid, int64_t firstLossTsNanos, int64_t lastLossTsNanos) 888 : mUid(uid), mFirstLossTsNanos(firstLossTsNanos), mLastLossTsNanos(lastLossTsNanos) { 889 } 890 891 int32_t mUid; 892 int64_t mFirstLossTsNanos; 893 int64_t mLastLossTsNanos; 894 // atom loss count per error, atom id 895 struct AtomLossInfo { AtomLossInfoSocketLossStats::AtomLossInfo896 AtomLossInfo(int32_t atomId, int32_t error, int32_t count) 897 : mAtomId(atomId), mError(error), mCount(count) { 898 } 899 int mAtomId; 900 int mError; 901 int mCount; 902 }; 903 std::vector<AtomLossInfo> mLossCountPerErrorAtomId; 904 }; 905 // The max size of this list is kMaxSocketLossStatsSize. 906 std::list<SocketLossStats> mSocketLossStats; 907 908 // Stores the number of times a pushed atom loss info was dropped from the stats 909 // on libstatssocket side due to guardrail hit. 910 // Represents counter per uid. 911 // The max size of this map is kMaxSocketLossStatsSize. 912 std::map<int32_t, int32_t> mSocketLossStatsOverflowCounters; 913 914 // Maps metric ID to its stats. The size is capped by the number of metrics. 915 std::map<int64_t, AtomMetricStats> mAtomMetricStats; 916 917 // Maps uids to times when the activation changed broadcast not sent due to hitting the 918 // guardrail. The size is capped by the number of configs, and up to 20 times per uid. 919 std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats; 920 921 struct LogLossStats { LogLossStatsLogLossStats922 LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid, 923 int32_t pid) 924 : mWallClockSec(sec), 925 mCount(count), 926 mLastError(error), 927 mLastTag(tag), 928 mUid(uid), 929 mPid(pid) { 930 } 931 int32_t mWallClockSec; 932 int32_t mCount; 933 // error code defined in linux/errno.h 934 int32_t mLastError; 935 int32_t mLastTag; 936 int32_t mUid; 937 int32_t mPid; 938 }; 939 940 // Max of {(now - oldestEventTimestamp) when overflow happens}. 941 // This number is helpful to understand how SLOW statsd can be. 942 int64_t mMaxQueueHistoryNs = 0; 943 944 // Min of {(now - oldestEventTimestamp) when overflow happens}. 945 // This number is helpful to understand how FAST the events floods to statsd. 946 int64_t mMinQueueHistoryNs = kInt64Max; 947 948 // Total number of events that are lost due to queue overflow. 949 int32_t mOverflowCount = 0; 950 951 // Max number of events stored into the queue seen so far. 952 int32_t mEventQueueMaxSizeObserved = 0; 953 954 // Event timestamp for associated max size hit. 955 int64_t mEventQueueMaxSizeObservedElapsedNanos = 0; 956 957 // Timestamps when we detect log loss, and the number of logs lost. 958 std::list<LogLossStats> mLogLossStats; 959 960 std::list<int32_t> mSystemServerRestartSec; 961 962 std::vector<int64_t> mSocketBatchReadHistogram; 963 964 // Stores stats about large socket batch reads 965 struct LargeBatchSocketReadStats { LargeBatchSocketReadStatsLargeBatchSocketReadStats966 LargeBatchSocketReadStats(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs, 967 int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs, 968 const std::unordered_map<int32_t, int32_t>& atomCounts) 969 : mSize(size), 970 mLastReadTimeNs(lastReadTimeNs), 971 mCurrReadTimeNs(currReadTimeNs), 972 mMinAtomReadTimeNs(minAtomReadTimeNs), 973 mMaxAtomReadTimeNs(maxAtomReadTimeNs), 974 mCommonAtomCounts(atomCounts) { 975 } 976 977 int32_t mSize; 978 // The elapsed time of the previous and current read times. 979 int64_t mLastReadTimeNs; 980 int64_t mCurrReadTimeNs; 981 // The min and max times of the LogEvents processed in the batch 982 int64_t mMinAtomReadTimeNs; 983 int64_t mMaxAtomReadTimeNs; 984 // Map of atom id to count for atoms logged more than kMaxLargeBatchReadAtomThreshold times. 985 std::unordered_map<int32_t, int32_t> mCommonAtomCounts; 986 }; 987 // The max size of this list is kMaxSocketLossStatsSize. 988 std::list<LargeBatchSocketReadStats> mLargeBatchSocketReadStats; 989 990 struct RestrictedMetricQueryStats { RestrictedMetricQueryStatsRestrictedMetricQueryStats991 RestrictedMetricQueryStats(int32_t callingUid, int64_t configId, 992 const string& configPackage, std::optional<int32_t> configUid, 993 int64_t queryTimeNs, 994 std::optional<InvalidQueryReason> invalidQueryReason, 995 const string& error, std::optional<int64_t> queryLatencyNs) 996 : mCallingUid(callingUid), 997 mConfigId(configId), 998 mConfigPackage(configPackage), 999 mConfigUid(configUid), 1000 mQueryWallTimeNs(queryTimeNs), 1001 mInvalidQueryReason(invalidQueryReason), 1002 mError(error), 1003 mQueryLatencyNs(queryLatencyNs) { 1004 mHasError = invalidQueryReason.has_value(); 1005 } 1006 int32_t mCallingUid; 1007 int64_t mConfigId; 1008 string mConfigPackage; 1009 std::optional<int32_t> mConfigUid; 1010 int64_t mQueryWallTimeNs; 1011 std::optional<InvalidQueryReason> mInvalidQueryReason; 1012 bool mHasError; 1013 string mError; 1014 std::optional<int64_t> mQueryLatencyNs; 1015 }; 1016 std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats; 1017 1018 void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage, 1019 const std::optional<int32_t> configUid, 1020 const int32_t callingUid, 1021 const InvalidQueryReason reason, 1022 const string& error); 1023 1024 int32_t mSubscriptionPullThreadWakeupCount = 0; 1025 1026 // Maps Subscription ID to the corresponding SubscriptionStats struct object. 1027 // Size of this map is capped by ShellSubscriber::kMaxSubscriptions. 1028 std::map<int32_t, SubscriptionStats> mSubscriptionStats; 1029 1030 // Stores the number of times statsd modified the anomaly alarm registered with 1031 // StatsCompanionService. 1032 int mAnomalyAlarmRegisteredStats = 0; 1033 1034 // Stores the number of times statsd registers the periodic alarm changes 1035 int mPeriodicAlarmRegisteredStats = 0; 1036 1037 void noteConfigResetInternalLocked(const ConfigKey& key); 1038 1039 void noteConfigRemovedInternalLocked(const ConfigKey& key); 1040 1041 void resetInternalLocked(); 1042 1043 void noteAtomLoggedLocked(int atomId, bool isSkipped); 1044 1045 void noteAtomDroppedLocked(int atomId); 1046 1047 void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec); 1048 1049 void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, int32_t timeSec, 1050 const int32_t reportNumber); 1051 1052 void noteBroadcastSent(const ConfigKey& key, int32_t timeSec); 1053 1054 void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec); 1055 1056 void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec); 1057 1058 void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats); 1059 1060 int getPushedAtomErrorsLocked(int atomId) const; 1061 1062 int getPushedAtomDropsLocked(int atomId) const; 1063 1064 bool hasRestrictedConfigErrors(const std::shared_ptr<ConfigStats>& configStats) const; 1065 1066 /** 1067 * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference 1068 * will live as long as `this`. 1069 */ 1070 StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId); 1071 1072 FRIEND_TEST(LogEventQueue_test, TestQueueMaxSize); 1073 FRIEND_TEST(SocketParseMessageTest, TestProcessMessage); 1074 FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved); 1075 FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit); 1076 FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor); 1077 FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats); 1078 FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats); 1079 FRIEND_TEST(StatsdStatsTest, TestAtomLog); 1080 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats); 1081 FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats); 1082 FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats); 1083 FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats); 1084 FRIEND_TEST(StatsdStatsTest, TestConfigMetadataProviderPromotionFailed); 1085 FRIEND_TEST(StatsdStatsTest, TestConfigRemove); 1086 FRIEND_TEST(StatsdStatsTest, TestHasHitDimensionGuardrail); 1087 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd); 1088 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId); 1089 FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId); 1090 FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog); 1091 FRIEND_TEST(StatsdStatsTest, TestPullAtomStats); 1092 FRIEND_TEST(StatsdStatsTest, TestQueueStats); 1093 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats); 1094 FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats); 1095 FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider); 1096 FRIEND_TEST(StatsdStatsTest, TestSocketLossStats); 1097 FRIEND_TEST(StatsdStatsTest, TestSocketLossStatsOverflowCounter); 1098 FRIEND_TEST(StatsdStatsTest, TestSubStats); 1099 FRIEND_TEST(StatsdStatsTest, TestSubscriptionAtomPulled); 1100 FRIEND_TEST(StatsdStatsTest, TestSubscriptionEnded); 1101 FRIEND_TEST(StatsdStatsTest, TestSubscriptionFlushed); 1102 FRIEND_TEST(StatsdStatsTest, TestSubscriptionPullThreadWakeup); 1103 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStarted); 1104 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedMaxActiveSubscriptions); 1105 FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedRemoveFinishedSubscription); 1106 FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash); 1107 FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold); 1108 FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd); 1109 FRIEND_TEST(StatsdStatsTest, TestSocketBatchReadStats); 1110 }; 1111 1112 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1113 const int64_t matcherId); 1114 1115 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason, 1116 const int64_t metricId, 1117 const int64_t matcherId); 1118 1119 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1120 const int64_t conditionId); 1121 1122 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason, 1123 const int64_t metricId, 1124 const int64_t conditionId); 1125 1126 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason, 1127 const int64_t metricId, 1128 const int64_t stateId); 1129 1130 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1131 const int64_t alertId); 1132 1133 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason, 1134 const int64_t metricId, 1135 const int64_t alertId); 1136 1137 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason, 1138 const int64_t alarmId); 1139 1140 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason, 1141 const int64_t subscriptionId); 1142 1143 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm( 1144 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alarmId); 1145 1146 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert( 1147 const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alertId); 1148 1149 } // namespace statsd 1150 } // namespace os 1151 } // namespace android 1152