1 /*
2  * Copyright 2017, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <gtest/gtest_prod.h>
19 #include <log/log_time.h>
20 #include <src/guardrail/stats_log_enums.pb.h>
21 
22 #include <list>
23 #include <mutex>
24 #include <string>
25 #include <unordered_map>
26 #include <vector>
27 
28 #include "config/ConfigKey.h"
29 #include "logd/logevent_util.h"
30 
31 namespace android {
32 namespace os {
33 namespace statsd {
34 
35 struct InvalidConfigReason {
36     InvalidConfigReasonEnum reason;
37     std::optional<int64_t> metricId;
38     std::optional<int64_t> stateId;
39     std::optional<int64_t> alertId;
40     std::optional<int64_t> alarmId;
41     std::optional<int64_t> subscriptionId;
42     std::vector<int64_t> matcherIds;
43     std::vector<int64_t> conditionIds;
InvalidConfigReasonInvalidConfigReason44     InvalidConfigReason(){};
InvalidConfigReasonInvalidConfigReason45     InvalidConfigReason(InvalidConfigReasonEnum reason) : reason(reason){};
InvalidConfigReasonInvalidConfigReason46     InvalidConfigReason(InvalidConfigReasonEnum reason, int64_t metricId)
47         : reason(reason), metricId(metricId){};
48     bool operator==(const InvalidConfigReason& other) const {
49         return (this->reason == other.reason) && (this->metricId == other.metricId) &&
50                (this->stateId == other.stateId) && (this->alertId == other.alertId) &&
51                (this->alarmId == other.alarmId) && (this->subscriptionId == other.subscriptionId) &&
52                (this->matcherIds == other.matcherIds) && (this->conditionIds == other.conditionIds);
53     }
54 };
55 
56 typedef struct {
57     int64_t insertError = 0;
58     int64_t tableCreationError = 0;
59     int64_t tableDeletionError = 0;
60     std::list<int64_t> flushLatencyNs;
61     int64_t categoryChangedCount = 0;
62 } RestrictedMetricStats;
63 
64 struct DumpReportStats {
DumpReportStatsDumpReportStats65     DumpReportStats(int32_t dumpReportSec, int32_t dumpReportSize, int32_t reportNumber)
66         : mDumpReportTimeSec(dumpReportSec),
67           mDumpReportSizeBytes(dumpReportSize),
68           mDumpReportNumber(reportNumber) {
69     }
70     int32_t mDumpReportTimeSec = 0;
71     int32_t mDumpReportSizeBytes = 0;
72     int32_t mDumpReportNumber = 0;
73 };
74 
75 struct ConfigStats {
76     int32_t uid;
77     int64_t id;
78     int32_t creation_time_sec;
79     int32_t deletion_time_sec = 0;
80     int32_t reset_time_sec = 0;
81     int32_t metric_count;
82     int32_t condition_count;
83     int32_t matcher_count;
84     int32_t alert_count;
85     bool is_valid;
86     bool device_info_table_creation_failed = false;
87     int32_t db_corrupted_count = 0;
88     int32_t db_deletion_stat_failed = 0;
89     int32_t db_deletion_size_exceeded_limit = 0;
90     int32_t db_deletion_config_invalid = 0;
91     int32_t db_deletion_too_old = 0;
92     int32_t db_deletion_config_removed = 0;
93     int32_t db_deletion_config_updated = 0;
94     // Stores the number of ConfigMetadataProvider promotion failures
95     int32_t config_metadata_provider_promote_failure = 0;
96 
97     // Stores reasons for why config is valid or not
98     std::optional<InvalidConfigReason> reason;
99 
100     std::list<int32_t> broadcast_sent_time_sec;
101 
102     // Times at which this config is activated.
103     std::list<int32_t> activation_time_sec;
104 
105     // Times at which this config is deactivated.
106     std::list<int32_t> deactivation_time_sec;
107 
108     std::list<int32_t> data_drop_time_sec;
109     // Number of bytes dropped at corresponding time.
110     std::list<int64_t> data_drop_bytes;
111 
112     std::list<DumpReportStats> dump_report_stats;
113 
114     // Stores how many times a matcher have been matched. The map size is capped by kMaxConfigCount.
115     std::map<const int64_t, int> matcher_stats;
116 
117     // Stores the number of output tuple of condition trackers when it's bigger than
118     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
119     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
120     std::map<const int64_t, int> condition_stats;
121 
122     // Stores the number of output tuple of metric producers when it's bigger than
123     // kDimensionKeySizeSoftLimit. When you see the number is kDimensionKeySizeHardLimit +1,
124     // it means some data has been dropped. The map size is capped by kMaxConfigCount.
125     std::map<const int64_t, int> metric_stats;
126 
127     // Stores the max number of output tuple of dimensions in condition across dimensions in what
128     // when it's bigger than kDimensionKeySizeSoftLimit. When you see the number is
129     // kDimensionKeySizeHardLimit +1, it means some data has been dropped. The map size is capped by
130     // kMaxConfigCount.
131     std::map<const int64_t, int> metric_dimension_in_condition_stats;
132 
133     // Stores the number of times an anomaly detection alert has been declared.
134     // The map size is capped by kMaxConfigCount.
135     std::map<const int64_t, int> alert_stats;
136 
137     // Stores the config ID for each sub-config used.
138     std::list<std::pair<const int64_t, const int32_t>> annotations;
139 
140     // Maps metric ID of restricted metric to its stats.
141     std::map<int64_t, RestrictedMetricStats> restricted_metric_stats;
142 
143     std::list<int64_t> total_flush_latency_ns;
144 
145     // Stores the last 20 timestamps for computing sqlite db size.
146     std::list<int64_t> total_db_size_timestamps;
147 
148     // Stores the last 20 sizes of the sqlite db.
149     std::list<int64_t> total_db_sizes;
150 };
151 
152 struct UidMapStats {
153     int32_t changes = 0;
154     int32_t bytes_used = 0;
155     int32_t dropped_changes = 0;
156     int32_t deleted_apps = 0;
157 };
158 
159 struct SubscriptionStats {
160     int32_t pushed_atom_count = 0;
161     int32_t pulled_atom_count = 0;
162     int32_t start_time_sec = 0;
163     int32_t end_time_sec = 0;
164     int32_t flush_count = 0;
165 };
166 
167 // Keeps track of stats of statsd.
168 // Single instance shared across the process. All public methods are thread safe.
169 class StatsdStats {
170 public:
171     static StatsdStats& getInstance();
~StatsdStats()172     ~StatsdStats(){};
173 
174     const static int kDimensionKeySizeSoftLimit = 500;
175     static constexpr int kDimensionKeySizeHardLimit = 800;
176     static constexpr int kDimensionKeySizeHardLimitMin = 800;
177     static constexpr int kDimensionKeySizeHardLimitMax = 3000;
178 
179     // Per atom dimension key size limit
180     static const std::map<int, std::pair<size_t, size_t>> kAtomDimensionKeySizeLimitMap;
181 
182     const static int kMaxConfigCountPerUid = 20;
183     const static int kMaxAlertCountPerConfig = 200;
184     const static int kMaxConditionCountPerConfig = 500;
185     const static int kMaxMetricCountPerConfig = 3000;
186     const static int kMaxMatcherCountPerConfig = 3500;
187 
188     // The max number of old config stats we keep.
189     const static int kMaxIceBoxSize = 20;
190 
191     const static int kMaxLoggerErrors = 20;
192 
193     const static int kMaxSystemServerRestarts = 20;
194 
195     const static int kMaxTimestampCount = 20;
196 
197     const static int kMaxLogSourceCount = 150;
198 
199     const static int kMaxPullAtomPackages = 100;
200 
201     const static int kMaxRestrictedMetricQueryCount = 20;
202 
203     const static int kMaxRestrictedMetricFlushLatencyCount = 20;
204 
205     const static int kMaxRestrictedConfigFlushLatencyCount = 20;
206 
207     const static int kMaxRestrictedConfigDbSizeCount = 20;
208 
209     // Max memory allowed for storing metrics per configuration. If this limit is exceeded, statsd
210     // drops the metrics data in memory.
211     static const size_t kDefaultMaxMetricsBytesPerConfig = 2 * 1024 * 1024;
212 
213     // Hard limit for custom memory allowed for storing metrics per configuration.
214     static const size_t kHardMaxMetricsBytesPerConfig = 20 * 1024 * 1024;
215 
216     // Max memory allowed for storing metrics per configuration before triggering a intent to fetch
217     // data.
218     static const size_t kHardMaxTriggerGetDataBytes = 10 * 1024 * 1024;
219 
220     // Soft memory limit per configuration. Once this limit is exceeded, we begin notifying the
221     // data subscriber that it's time to call getData.
222     static const size_t kDefaultBytesPerConfigTriggerGetData = 192 * 1024;
223 
224     // Soft memory limit per restricted configuration. Once this limit is exceeded,
225     // we begin flush in-memory restricted metrics to database.
226     static const size_t kBytesPerRestrictedConfigTriggerFlush = 25 * 1024;
227 
228     // Cap the UID map's memory usage to this. This should be fairly high since the UID information
229     // is critical for understanding the metrics.
230     const static size_t kMaxBytesUsedUidMap = 50 * 1024;
231 
232     // The number of deleted apps that are stored in the uid map.
233     const static int kMaxDeletedAppsInUidMap = 100;
234 
235     /* Minimum period between two broadcasts in nanoseconds. */
236     static const int64_t kMinBroadcastPeriodNs = 60 * NS_PER_SEC;
237 
238     /* Min period between two checks of byte size per config key in nanoseconds. */
239     static const int64_t kMinByteSizeCheckPeriodNs = 1 * 60 * NS_PER_SEC;
240 
241     // Min period between two checks of byte size per config key in nanoseconds for V2 memory
242     // calculations.
243     static const int64_t kMinByteSizeV2CheckPeriodNs = 5 * 60 * NS_PER_SEC;
244 
245     /* Min period between two checks of restricted metrics TTLs. */
246     static const int64_t kMinTtlCheckPeriodNs = 60 * 60 * NS_PER_SEC;
247 
248     /* Min period between two flush operations of restricted metrics. */
249     static const int64_t kMinFlushRestrictedPeriodNs = 60 * 60 * NS_PER_SEC;
250 
251     /* Min period between two db guardrail check operations of restricted metrics. */
252     static const int64_t kMinDbGuardrailEnforcementPeriodNs = 60 * 60 * NS_PER_SEC;
253 
254     /* Minimum period between two activation broadcasts in nanoseconds. */
255     static const int64_t kMinActivationBroadcastPeriodNs = 10 * NS_PER_SEC;
256 
257     // Maximum age (30 days) that files on disk can exist in seconds.
258     static const int kMaxAgeSecond = 60 * 60 * 24 * 30;
259 
260     // Maximum age (2 days) that local history files on disk can exist in seconds.
261     static const int kMaxLocalHistoryAgeSecond = 60 * 60 * 24 * 2;
262 
263     // Maximum number of files (1000) that can be in stats directory on disk.
264     static const int kMaxFileNumber = 1000;
265 
266     // Maximum size of all files that can be written to stats directory on disk.
267     static const int kMaxFileSize = 50 * 1024 * 1024;
268 
269     // How long to try to clear puller cache from last time
270     static const long kPullerCacheClearIntervalSec = 1;
271 
272     // Max time to do a pull.
273     static const int64_t kPullMaxDelayNs = 30 * NS_PER_SEC;
274 
275     // Maximum number of pushed atoms statsd stats will track above kMaxPushedAtomId.
276     static const int kMaxNonPlatformPushedAtoms = 600;
277 
278     // Maximum number of pushed atoms error statsd stats will track.
279     static const int kMaxPushedAtomErrorStatsSize = 100;
280 
281     // Maximum number of socket loss stats to track.
282     static const int kMaxSocketLossStatsSize = 50;
283 
284     // Maximum atom id value that we consider a platform pushed atom.
285     // This should be updated once highest pushed atom id in atoms.proto approaches this value.
286     static const int kMaxPushedAtomId = 1500;
287 
288     // Atom id that is the start of the pulled atoms.
289     static const int kPullAtomStartTag = 10000;
290 
291     // Atom id that is the start of vendor atoms.
292     static const int kVendorAtomStartTag = 100000;
293 
294     // Vendor pulled atom start id.
295     static const int32_t kVendorPulledAtomStartTag = 150000;
296 
297     // Beginning of range for timestamp truncation.
298     static const int32_t kTimestampTruncationStartTag = 300000;
299 
300     // End of range for timestamp truncation.
301     static const int32_t kTimestampTruncationEndTag = 304999;
302 
303     // Max accepted atom id.
304     static const int32_t kMaxAtomTag = 200000;
305 
306     static const int64_t kInt64Max = 0x7fffffffffffffffLL;
307 
308     static const int32_t kMaxLoggedBucketDropEvents = 10;
309 
310     static const int32_t kNumBinsInSocketBatchReadHistogram = 30;
311     static const int32_t kLargeBatchReadThreshold = 1000;
312     static const int32_t kMaxLargeBatchReadSize = 20;
313     static const int32_t kMaxLargeBatchReadAtomThreshold = 50;
314 
315     /**
316      * Report a new config has been received and report the static stats about the config.
317      *
318      * The static stats include: the count of metrics, conditions, matchers, and alerts.
319      * If the config is not valid, this config stats will be put into icebox immediately.
320      */
321     void noteConfigReceived(const ConfigKey& key, int metricsCount, int conditionsCount,
322                             int matchersCount, int alertCount,
323                             const std::list<std::pair<const int64_t, const int32_t>>& annotations,
324                             const std::optional<InvalidConfigReason>& reason);
325     /**
326      * Report a config has been removed.
327      */
328     void noteConfigRemoved(const ConfigKey& key);
329     /**
330      * Report a config has been reset when ttl expires.
331      */
332     void noteConfigReset(const ConfigKey& key);
333 
334     /**
335      * Report a broadcast has been sent to a config owner to collect the data.
336      */
337     void noteBroadcastSent(const ConfigKey& key);
338 
339     /**
340      * Report that a config has become activated or deactivated.
341      * This can be different from whether or not a broadcast is sent if the
342      * guardrail prevented the broadcast from being sent.
343      */
344     void noteActiveStatusChanged(const ConfigKey& key, bool activate);
345 
346     /**
347      * Report a config's metrics data has been dropped.
348      */
349     void noteDataDropped(const ConfigKey& key, const size_t totalBytes);
350 
351     /**
352      * Report metrics data report has been sent.
353      *
354      * The report may be requested via StatsManager API, or through adb cmd.
355      */
356     void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes,
357                                const int32_t reportNumber);
358 
359     /**
360      * Report failure in creating the device info metadata table for restricted configs.
361      */
362     void noteDeviceInfoTableCreationFailed(const ConfigKey& key);
363 
364     /**
365      * Report db corruption for restricted configs.
366      */
367     void noteDbCorrupted(const ConfigKey& key);
368 
369     /**
370      * Report db exceeded the size limit for restricted configs.
371      */
372     void noteDbSizeExceeded(const ConfigKey& key);
373 
374     /**
375      * Report db size check with stat for restricted configs failed.
376      */
377     void noteDbStatFailed(const ConfigKey& key);
378 
379     /**
380      * Report restricted config is invalid.
381      */
382     void noteDbConfigInvalid(const ConfigKey& key);
383 
384     /**
385      * Report db is too old for restricted configs.
386      */
387     void noteDbTooOld(const ConfigKey& key);
388 
389     /**
390      * Report db was deleted due to config removal.
391      */
392     void noteDbDeletionConfigRemoved(const ConfigKey& key);
393 
394     /**
395      * Report db was deleted due to config update.
396      */
397     void noteDbDeletionConfigUpdated(const ConfigKey& key);
398 
399     /**
400      * Reports that the promotion for ConfigMetadataProvider failed.
401      */
402     void noteConfigMetadataProviderPromotionFailed(const ConfigKey& key);
403 
404     /**
405      * Report the size of output tuple of a condition.
406      *
407      * Note: only report when the condition has an output dimension, and the tuple
408      * count > kDimensionKeySizeSoftLimit.
409      *
410      * [key]: The config key that this condition belongs to.
411      * [id]: The id of the condition.
412      * [size]: The output tuple size.
413      */
414     void noteConditionDimensionSize(const ConfigKey& key, int64_t id, int size);
415 
416     /**
417      * Report the size of output tuple of a metric.
418      *
419      * Note: only report when the metric has an output dimension, and the tuple
420      * count > kDimensionKeySizeSoftLimit.
421      *
422      * [key]: The config key that this metric belongs to.
423      * [id]: The id of the metric.
424      * [size]: The output tuple size.
425      */
426     void noteMetricDimensionSize(const ConfigKey& key, int64_t id, int size);
427 
428     /**
429      * Report the max size of output tuple of dimension in condition across dimensions in what.
430      *
431      * Note: only report when the metric has an output dimension in condition, and the max tuple
432      * count > kDimensionKeySizeSoftLimit.
433      *
434      * [key]: The config key that this metric belongs to.
435      * [id]: The id of the metric.
436      * [size]: The output tuple size.
437      */
438     void noteMetricDimensionInConditionSize(const ConfigKey& key, int64_t id, int size);
439 
440     /**
441      * Report a matcher has been matched.
442      *
443      * [key]: The config key that this matcher belongs to.
444      * [id]: The id of the matcher.
445      */
446     void noteMatcherMatched(const ConfigKey& key, int64_t id);
447 
448     /**
449      * Report that an anomaly detection alert has been declared.
450      *
451      * [key]: The config key that this alert belongs to.
452      * [id]: The id of the alert.
453      */
454     void noteAnomalyDeclared(const ConfigKey& key, int64_t id);
455 
456     /**
457      * Report an atom event has been logged.
458      */
459     void noteAtomLogged(int atomId, int32_t timeSec, bool isSkipped);
460 
461     /**
462      * Report that statsd modified the anomaly alarm registered with StatsCompanionService.
463      */
464     void noteRegisteredAnomalyAlarmChanged();
465 
466     /**
467      * Report that statsd modified the periodic alarm registered with StatsCompanionService.
468      */
469     void noteRegisteredPeriodicAlarmChanged();
470 
471     /**
472      * Records the number of delta entries that are being dropped from the uid map.
473      */
474     void noteUidMapDropped(int deltas);
475 
476     /**
477      * Records that an app was deleted (from statsd's map).
478      */
479     void noteUidMapAppDeletionDropped();
480 
481     /**
482      * Updates the number of changes currently stored in the uid map.
483      */
484     void setUidMapChanges(int changes);
485     void setCurrentUidMapMemory(int bytes);
486 
487     /*
488      * Updates minimum interval between pulls for an pulled atom.
489      */
490     void updateMinPullIntervalSec(int pullAtomId, long intervalSec);
491 
492     /*
493      * Notes an atom is pulled.
494      */
495     void notePull(int pullAtomId);
496 
497     /*
498      * Notes an atom is served from puller cache.
499      */
500     void notePullFromCache(int pullAtomId);
501 
502     /*
503      * Notify data error for pulled atom.
504      */
505     void notePullDataError(int pullAtomId);
506 
507     /*
508      * Records time for actual pulling, not including those served from cache and not including
509      * statsd processing delays.
510      */
511     void notePullTime(int pullAtomId, int64_t pullTimeNs);
512 
513     /*
514      * Records pull delay for a pulled atom, including those served from cache and including statsd
515      * processing delays.
516      */
517     void notePullDelay(int pullAtomId, int64_t pullDelayNs);
518 
519     /*
520      * Records pull exceeds timeout for the puller.
521      */
522     void notePullTimeout(int pullAtomId, int64_t pullUptimeMillis, int64_t pullElapsedMillis);
523 
524     /*
525      * Records pull exceeds max delay for a metric.
526      */
527     void notePullExceedMaxDelay(int pullAtomId);
528 
529     /*
530      * Records when system server restarts.
531      */
532     void noteSystemServerRestart(int32_t timeSec);
533 
534     /**
535      * Records statsd skipped an event.
536      */
537     void noteLogLost(int32_t wallClockTimeSec, int32_t count, int32_t lastError,
538                      int32_t lastAtomTag, int32_t uid, int32_t pid);
539 
540     /**
541      * Records that the pull of an atom has failed. Eg, if the client indicated the pull failed, if
542      * the pull timed out, or if the outgoing binder call failed.
543      * This count will only increment if the puller was actually invoked.
544      *
545      * It does not include a pull not occurring due to not finding the appropriate
546      * puller. These cases are covered in other counts.
547      */
548     void notePullFailed(int atomId);
549 
550     /**
551      * Records that the pull of an atom has failed due to not having a uid provider.
552      */
553     void notePullUidProviderNotFound(int atomId);
554 
555     /**
556      * Records that the pull of an atom has failed due not finding a puller registered by a
557      * trusted uid.
558      */
559     void notePullerNotFound(int atomId);
560 
561     /**
562      * Records that the pull has failed due to the outgoing binder call failing.
563      */
564     void notePullBinderCallFailed(int atomId);
565 
566     /**
567      * A pull with no data occurred
568      */
569     void noteEmptyData(int atomId);
570 
571     /**
572      * Records that a puller callback for the given atomId was registered or unregistered.
573      *
574      * @param registered True if the callback was registered, false if was unregistered.
575      */
576     void notePullerCallbackRegistrationChanged(int atomId, bool registered);
577 
578     /**
579      * Hard limit was reached in the cardinality of an atom
580      */
581     void noteHardDimensionLimitReached(int64_t metricId);
582 
583     /**
584      * A log event was too late, arrived in the wrong bucket and was skipped
585      */
586     void noteLateLogEventSkipped(int64_t metricId);
587 
588     /**
589      * Buckets were skipped as time elapsed without any data for them
590      */
591     void noteSkippedForwardBuckets(int64_t metricId);
592 
593     /**
594      * An unsupported value type was received
595      */
596     void noteBadValueType(int64_t metricId);
597 
598     /**
599      * Buckets were dropped due to reclaim memory.
600      */
601     void noteBucketDropped(int64_t metricId);
602 
603     /**
604      * A condition change was too late, arrived in the wrong bucket and was skipped
605      */
606     void noteConditionChangeInNextBucket(int64_t metricId);
607 
608     /**
609      * A bucket has been tagged as invalid.
610      */
611     void noteInvalidatedBucket(int64_t metricId);
612 
613     /**
614      * Tracks the total number of buckets (include skipped/invalid buckets).
615      */
616     void noteBucketCount(int64_t metricId);
617 
618     /**
619      * For pulls at bucket boundaries, it represents the misalignment between the real timestamp and
620      * the end of the bucket.
621      */
622     void noteBucketBoundaryDelayNs(int64_t metricId, int64_t timeDelayNs);
623 
624     /**
625      * Number of buckets with unknown condition.
626      */
627     void noteBucketUnknownCondition(int64_t metricId);
628 
629     /* Reports one event id has been dropped due to queue overflow, and the oldest event timestamp
630      * in the queue */
631     void noteEventQueueOverflow(int64_t oldestEventTimestampNs, int32_t atomId, bool isSkipped);
632 
633     /* Notes queue max size seen so far and associated timestamp */
634     void noteEventQueueSize(int32_t size, int64_t eventTimestampNs);
635 
636     /**
637      * Reports that the activation broadcast guardrail was hit for this uid. Namely, the broadcast
638      * should have been sent, but instead was skipped due to hitting the guardrail.
639      */
640     void noteActivationBroadcastGuardrailHit(const int uid);
641 
642     /**
643      * Reports that an atom is erroneous or cannot be parsed successfully by
644      * statsd. An atom tag of 0 indicates that the client did not supply the
645      * atom id within the encoding.
646      *
647      * For pushed atoms only, this call should be preceded by a call to
648      * noteAtomLogged.
649      */
650     void noteAtomError(int atomTag, bool pull = false);
651 
652     /** Report query of restricted metric succeed **/
653     void noteQueryRestrictedMetricSucceed(const int64_t configId, const string& configPackage,
654                                           const std::optional<int32_t> configUid,
655                                           const int32_t callingUid, int64_t queryLatencyNs);
656 
657     /** Report query of restricted metric failed **/
658     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
659                                          const std::optional<int32_t> configUid,
660                                          const int32_t callingUid, const InvalidQueryReason reason);
661 
662     /** Report query of restricted metric failed along with an error string **/
663     void noteQueryRestrictedMetricFailed(const int64_t configId, const string& configPackage,
664                                          const std::optional<int32_t> configUid,
665                                          const int32_t callingUid, const InvalidQueryReason reason,
666                                          const string& error);
667 
668     // Reports that a restricted metric fails to be inserted to database.
669     void noteRestrictedMetricInsertError(const ConfigKey& configKey, int64_t metricId);
670 
671     // Reports that a restricted metric fails to create table in database.
672     void noteRestrictedMetricTableCreationError(const ConfigKey& configKey, int64_t metricId);
673 
674     // Reports that a restricted metric fails to delete table in database.
675     void noteRestrictedMetricTableDeletionError(const ConfigKey& configKey, int64_t metricId);
676 
677     // Reports the time it takes for a restricted metric to flush the data to the database.
678     void noteRestrictedMetricFlushLatency(const ConfigKey& configKey, int64_t metricId,
679                                           const int64_t flushLatencyNs);
680 
681     // Reports that a restricted metric had a category change.
682     void noteRestrictedMetricCategoryChanged(const ConfigKey& configKey, int64_t metricId);
683 
684     // Reports the time is takes to flush a restricted config to the database.
685     void noteRestrictedConfigFlushLatency(const ConfigKey& configKey,
686                                           const int64_t totalFlushLatencyNs);
687 
688     // Reports the size of the internal sqlite db.
689     void noteRestrictedConfigDbSize(const ConfigKey& configKey, int64_t elapsedTimeNs,
690                                     const int64_t dbSize);
691 
692     /**
693      * Records libstatssocket was not able to write into socket.
694      */
695     void noteAtomSocketLoss(const SocketLossInfo& lossInfo);
696 
697     /**
698      * Report a new subscription has started and report the static stats about the subscription
699      * config.
700      *
701      * The static stats include: the count of pushed atoms and pulled atoms.
702      */
703     void noteSubscriptionStarted(int subId, int32_t pushedAtomCount, int32_t pulledAtomCount);
704 
705     /**
706      * Report an existing subscription has ended.
707      */
708     void noteSubscriptionEnded(int subId);
709 
710     /**
711      * Report an existing subscription was flushed.
712      */
713     void noteSubscriptionFlushed(int subId);
714 
715     /**
716      * Report an atom was pulled for a subscription.
717      */
718     void noteSubscriptionAtomPulled(int atomId);
719 
720     /**
721      * Report subscriber pull thread wakeup.
722      */
723     void noteSubscriptionPullThreadWakeup();
724 
725     void noteBatchSocketRead(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs,
726                              int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs,
727                              const std::unordered_map<int32_t, int32_t>& atomCounts);
728 
729     /**
730      * Reset the historical stats. Including all stats in icebox, and the tracked stats about
731      * metrics, matchers, and atoms. The active configs will be kept and StatsdStats will continue
732      * to collect stats after reset() has been called.
733      */
734     void reset();
735 
736     /**
737      * Output the stats in protobuf binary format to [buffer].
738      *
739      * [reset]: whether to clear the historical stats after the call.
740      */
741     void dumpStats(std::vector<uint8_t>* buffer, bool reset);
742 
743     /**
744      * Output statsd stats in human readable format to [out] file descriptor.
745      */
746     void dumpStats(int outFd) const;
747 
748     /**
749      * Returns true if dimension guardrail has been hit since boot for given metric.
750      */
751     bool hasHitDimensionGuardrail(int64_t metricId) const;
752 
753     /**
754      * Return soft and hard atom key dimension size limits as an std::pair.
755      */
756     static std::pair<size_t, size_t> getAtomDimensionKeySizeLimits(int atomId,
757                                                                    size_t defaultHardLimit);
758 
clampDimensionKeySizeLimit(int dimLimit)759     inline static int clampDimensionKeySizeLimit(int dimLimit) {
760         return std::clamp(dimLimit, kDimensionKeySizeHardLimitMin, kDimensionKeySizeHardLimitMax);
761     }
762 
763     /**
764      * Return the unique identifier for the statsd stats report. This id is
765      * reset on boot.
766      */
getStatsdStatsId()767     inline int32_t getStatsdStatsId() const {
768         return mStatsdStatsId;
769     }
770 
771     /**
772      * Returns true if there is recorded event queue overflow
773      */
774     bool hasEventQueueOverflow() const;
775 
776     typedef std::vector<std::pair<int32_t, int32_t>> QueueOverflowAtomsStats;
777     QueueOverflowAtomsStats getQueueOverflowAtomsStats() const;
778 
779     /**
780      * Returns true if there is recorded socket loss
781      */
782     bool hasSocketLoss() const;
783 
784     typedef struct PullTimeoutMetadata {
785         int64_t pullTimeoutUptimeMillis;
786         int64_t pullTimeoutElapsedMillis;
PullTimeoutMetadataPullTimeoutMetadata787         PullTimeoutMetadata(int64_t uptimeMillis, int64_t elapsedMillis)
788             : pullTimeoutUptimeMillis(uptimeMillis),
789               pullTimeoutElapsedMillis(elapsedMillis) { /* do nothing */
790         }
791     } PullTimeoutMetadata;
792 
793     typedef struct {
794         long totalPull = 0;
795         long totalPullFromCache = 0;
796         long minPullIntervalSec = LONG_MAX;
797         int64_t avgPullTimeNs = 0;
798         int64_t maxPullTimeNs = 0;
799         long numPullTime = 0;
800         int64_t avgPullDelayNs = 0;
801         int64_t maxPullDelayNs = 0;
802         long numPullDelay = 0;
803         long dataError = 0;
804         long pullTimeout = 0;
805         long pullExceedMaxDelay = 0;
806         long pullFailed = 0;
807         long pullUidProviderNotFound = 0;
808         long pullerNotFound = 0;
809         long emptyData = 0;
810         long registeredCount = 0;
811         long unregisteredCount = 0;
812         int32_t atomErrorCount = 0;
813         long binderCallFailCount = 0;
814         std::list<PullTimeoutMetadata> pullTimeoutMetadata;
815         int32_t subscriptionPullCount = 0;
816     } PulledAtomStats;
817 
818     typedef struct {
819         long hardDimensionLimitReached = 0;
820         long lateLogEventSkipped = 0;
821         long skippedForwardBuckets = 0;
822         long badValueType = 0;
823         long conditionChangeInNextBucket = 0;
824         long invalidatedBucket = 0;
825         long bucketDropped = 0;
826         int64_t minBucketBoundaryDelayNs = 0;
827         int64_t maxBucketBoundaryDelayNs = 0;
828         long bucketUnknownCondition = 0;
829         long bucketCount = 0;
830     } AtomMetricStats;
831 
832 private:
833     StatsdStats();
834 
835     mutable std::mutex mLock;
836 
837     int32_t mStartTimeSec;
838 
839     // Random id set using rand() during the initialization. Used to uniquely
840     // identify a session. This is more reliable than mStartTimeSec due to the
841     // unreliable nature of wall clock times.
842     const int32_t mStatsdStatsId;
843 
844     // Track the number of dropped entries used by the uid map.
845     UidMapStats mUidMapStats;
846 
847     // The stats about the configs that are still in use.
848     // The map size is capped by kMaxConfigCount.
849     std::map<const ConfigKey, std::shared_ptr<ConfigStats>> mConfigStats;
850 
851     // Stores the stats for the configs that are no longer in use.
852     // The size of the vector is capped by kMaxIceBoxSize.
853     std::list<std::shared_ptr<ConfigStats>> mIceBox;
854 
855     // Stores the number of times a pushed atom is logged and skipped (if skipped).
856     // The size of the vector is the largest pushed atom id in atoms.proto + 1. Atoms
857     // out of that range will be put in mNonPlatformPushedAtomStats.
858     // This is a vector, not a map because it will be accessed A LOT -- for each stats log.
859     struct PushedAtomStats {
860         int logCount = 0;
861         int skipCount = 0;
862     };
863 
864     std::vector<PushedAtomStats> mPushedAtomStats;
865 
866     // Stores the number of times a pushed atom is logged and skipped for atom ids above
867     // kMaxPushedAtomId. The max size of the map is kMaxNonPlatformPushedAtoms.
868     std::unordered_map<int, PushedAtomStats> mNonPlatformPushedAtomStats;
869 
870     // Stores the number of times a pushed atom is dropped due to queue overflow event.
871     // We do not expect it will happen too often so the map is preferable vs pre-allocated vector
872     // The max size of the map is kMaxPushedAtomId + kMaxNonPlatformPushedAtoms.
873     std::unordered_map<int, int> mPushedAtomDropsStats;
874 
875     // Maps PullAtomId to its stats. The size is capped by the puller atom counts.
876     std::map<int, PulledAtomStats> mPulledAtomStats;
877 
878     // Stores the number of times a pushed atom was logged erroneously. The
879     // corresponding counts for pulled atoms are stored in PulledAtomStats.
880     // The max size of this map is kMaxPushedAtomErrorStatsSize.
881     std::map<int, int> mPushedAtomErrorStats;
882 
883     // Stores the number of times a pushed atom was lost due to socket error.
884     // Represents counter per uid per tag per error with indication when the loss event was observed
885     // first & last time.
886     struct SocketLossStats {
SocketLossStatsSocketLossStats887         SocketLossStats(int32_t uid, int64_t firstLossTsNanos, int64_t lastLossTsNanos)
888             : mUid(uid), mFirstLossTsNanos(firstLossTsNanos), mLastLossTsNanos(lastLossTsNanos) {
889         }
890 
891         int32_t mUid;
892         int64_t mFirstLossTsNanos;
893         int64_t mLastLossTsNanos;
894         // atom loss count per error, atom id
895         struct AtomLossInfo {
AtomLossInfoSocketLossStats::AtomLossInfo896             AtomLossInfo(int32_t atomId, int32_t error, int32_t count)
897                 : mAtomId(atomId), mError(error), mCount(count) {
898             }
899             int mAtomId;
900             int mError;
901             int mCount;
902         };
903         std::vector<AtomLossInfo> mLossCountPerErrorAtomId;
904     };
905     // The max size of this list is kMaxSocketLossStatsSize.
906     std::list<SocketLossStats> mSocketLossStats;
907 
908     // Stores the number of times a pushed atom loss info was dropped from the stats
909     // on libstatssocket side due to guardrail hit.
910     // Represents counter per uid.
911     // The max size of this map is kMaxSocketLossStatsSize.
912     std::map<int32_t, int32_t> mSocketLossStatsOverflowCounters;
913 
914     // Maps metric ID to its stats. The size is capped by the number of metrics.
915     std::map<int64_t, AtomMetricStats> mAtomMetricStats;
916 
917     // Maps uids to times when the activation changed broadcast not sent due to hitting the
918     // guardrail. The size is capped by the number of configs, and up to 20 times per uid.
919     std::map<int, std::list<int32_t>> mActivationBroadcastGuardrailStats;
920 
921     struct LogLossStats {
LogLossStatsLogLossStats922         LogLossStats(int32_t sec, int32_t count, int32_t error, int32_t tag, int32_t uid,
923                      int32_t pid)
924             : mWallClockSec(sec),
925               mCount(count),
926               mLastError(error),
927               mLastTag(tag),
928               mUid(uid),
929               mPid(pid) {
930         }
931         int32_t mWallClockSec;
932         int32_t mCount;
933         // error code defined in linux/errno.h
934         int32_t mLastError;
935         int32_t mLastTag;
936         int32_t mUid;
937         int32_t mPid;
938     };
939 
940     // Max of {(now - oldestEventTimestamp) when overflow happens}.
941     // This number is helpful to understand how SLOW statsd can be.
942     int64_t mMaxQueueHistoryNs = 0;
943 
944     // Min of {(now - oldestEventTimestamp) when overflow happens}.
945     // This number is helpful to understand how FAST the events floods to statsd.
946     int64_t mMinQueueHistoryNs = kInt64Max;
947 
948     // Total number of events that are lost due to queue overflow.
949     int32_t mOverflowCount = 0;
950 
951     // Max number of events stored into the queue seen so far.
952     int32_t mEventQueueMaxSizeObserved = 0;
953 
954     // Event timestamp for associated max size hit.
955     int64_t mEventQueueMaxSizeObservedElapsedNanos = 0;
956 
957     // Timestamps when we detect log loss, and the number of logs lost.
958     std::list<LogLossStats> mLogLossStats;
959 
960     std::list<int32_t> mSystemServerRestartSec;
961 
962     std::vector<int64_t> mSocketBatchReadHistogram;
963 
964     // Stores stats about large socket batch reads
965     struct LargeBatchSocketReadStats {
LargeBatchSocketReadStatsLargeBatchSocketReadStats966         LargeBatchSocketReadStats(int32_t size, int64_t lastReadTimeNs, int64_t currReadTimeNs,
967                                   int64_t minAtomReadTimeNs, int64_t maxAtomReadTimeNs,
968                                   const std::unordered_map<int32_t, int32_t>& atomCounts)
969             : mSize(size),
970               mLastReadTimeNs(lastReadTimeNs),
971               mCurrReadTimeNs(currReadTimeNs),
972               mMinAtomReadTimeNs(minAtomReadTimeNs),
973               mMaxAtomReadTimeNs(maxAtomReadTimeNs),
974               mCommonAtomCounts(atomCounts) {
975         }
976 
977         int32_t mSize;
978         // The elapsed time of the previous and current read times.
979         int64_t mLastReadTimeNs;
980         int64_t mCurrReadTimeNs;
981         // The min and max times of the LogEvents processed in the batch
982         int64_t mMinAtomReadTimeNs;
983         int64_t mMaxAtomReadTimeNs;
984         // Map of atom id to count for atoms logged more than kMaxLargeBatchReadAtomThreshold times.
985         std::unordered_map<int32_t, int32_t> mCommonAtomCounts;
986     };
987     // The max size of this list is kMaxSocketLossStatsSize.
988     std::list<LargeBatchSocketReadStats> mLargeBatchSocketReadStats;
989 
990     struct RestrictedMetricQueryStats {
RestrictedMetricQueryStatsRestrictedMetricQueryStats991         RestrictedMetricQueryStats(int32_t callingUid, int64_t configId,
992                                    const string& configPackage, std::optional<int32_t> configUid,
993                                    int64_t queryTimeNs,
994                                    std::optional<InvalidQueryReason> invalidQueryReason,
995                                    const string& error, std::optional<int64_t> queryLatencyNs)
996             : mCallingUid(callingUid),
997               mConfigId(configId),
998               mConfigPackage(configPackage),
999               mConfigUid(configUid),
1000               mQueryWallTimeNs(queryTimeNs),
1001               mInvalidQueryReason(invalidQueryReason),
1002               mError(error),
1003               mQueryLatencyNs(queryLatencyNs) {
1004             mHasError = invalidQueryReason.has_value();
1005         }
1006         int32_t mCallingUid;
1007         int64_t mConfigId;
1008         string mConfigPackage;
1009         std::optional<int32_t> mConfigUid;
1010         int64_t mQueryWallTimeNs;
1011         std::optional<InvalidQueryReason> mInvalidQueryReason;
1012         bool mHasError;
1013         string mError;
1014         std::optional<int64_t> mQueryLatencyNs;
1015     };
1016     std::list<RestrictedMetricQueryStats> mRestrictedMetricQueryStats;
1017 
1018     void noteQueryRestrictedMetricFailedLocked(const int64_t configId, const string& configPackage,
1019                                                const std::optional<int32_t> configUid,
1020                                                const int32_t callingUid,
1021                                                const InvalidQueryReason reason,
1022                                                const string& error);
1023 
1024     int32_t mSubscriptionPullThreadWakeupCount = 0;
1025 
1026     // Maps Subscription ID to the corresponding SubscriptionStats struct object.
1027     // Size of this map is capped by ShellSubscriber::kMaxSubscriptions.
1028     std::map<int32_t, SubscriptionStats> mSubscriptionStats;
1029 
1030     // Stores the number of times statsd modified the anomaly alarm registered with
1031     // StatsCompanionService.
1032     int mAnomalyAlarmRegisteredStats = 0;
1033 
1034     // Stores the number of times statsd registers the periodic alarm changes
1035     int mPeriodicAlarmRegisteredStats = 0;
1036 
1037     void noteConfigResetInternalLocked(const ConfigKey& key);
1038 
1039     void noteConfigRemovedInternalLocked(const ConfigKey& key);
1040 
1041     void resetInternalLocked();
1042 
1043     void noteAtomLoggedLocked(int atomId, bool isSkipped);
1044 
1045     void noteAtomDroppedLocked(int atomId);
1046 
1047     void noteDataDropped(const ConfigKey& key, const size_t totalBytes, int32_t timeSec);
1048 
1049     void noteMetricsReportSent(const ConfigKey& key, const size_t numBytes, int32_t timeSec,
1050                                const int32_t reportNumber);
1051 
1052     void noteBroadcastSent(const ConfigKey& key, int32_t timeSec);
1053 
1054     void noteActiveStatusChanged(const ConfigKey& key, bool activate, int32_t timeSec);
1055 
1056     void noteActivationBroadcastGuardrailHit(const int uid, int32_t timeSec);
1057 
1058     void addToIceBoxLocked(std::shared_ptr<ConfigStats>& stats);
1059 
1060     int getPushedAtomErrorsLocked(int atomId) const;
1061 
1062     int getPushedAtomDropsLocked(int atomId) const;
1063 
1064     bool hasRestrictedConfigErrors(const std::shared_ptr<ConfigStats>& configStats) const;
1065 
1066     /**
1067      * Get a reference to AtomMetricStats for a metric. If none exists, create it. The reference
1068      * will live as long as `this`.
1069      */
1070     StatsdStats::AtomMetricStats& getAtomMetricStats(int64_t metricId);
1071 
1072     FRIEND_TEST(LogEventQueue_test, TestQueueMaxSize);
1073     FRIEND_TEST(SocketParseMessageTest, TestProcessMessage);
1074     FRIEND_TEST(StatsLogProcessorTest, InvalidConfigRemoved);
1075     FRIEND_TEST(StatsdStatsTest, TestActivationBroadcastGuardrailHit);
1076     FRIEND_TEST(StatsdStatsTest, TestAnomalyMonitor);
1077     FRIEND_TEST(StatsdStatsTest, TestAtomDroppedStats);
1078     FRIEND_TEST(StatsdStatsTest, TestAtomErrorStats);
1079     FRIEND_TEST(StatsdStatsTest, TestAtomLog);
1080     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedAndSkippedStats);
1081     FRIEND_TEST(StatsdStatsTest, TestAtomLoggedAndDroppedStats);
1082     FRIEND_TEST(StatsdStatsTest, TestAtomMetricsStats);
1083     FRIEND_TEST(StatsdStatsTest, TestAtomSkippedStats);
1084     FRIEND_TEST(StatsdStatsTest, TestConfigMetadataProviderPromotionFailed);
1085     FRIEND_TEST(StatsdStatsTest, TestConfigRemove);
1086     FRIEND_TEST(StatsdStatsTest, TestHasHitDimensionGuardrail);
1087     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigAdd);
1088     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigMissingMetricId);
1089     FRIEND_TEST(StatsdStatsTest, TestInvalidConfigOnlyMetricId);
1090     FRIEND_TEST(StatsdStatsTest, TestNonPlatformAtomLog);
1091     FRIEND_TEST(StatsdStatsTest, TestPullAtomStats);
1092     FRIEND_TEST(StatsdStatsTest, TestQueueStats);
1093     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsQueryStats);
1094     FRIEND_TEST(StatsdStatsTest, TestRestrictedMetricsStats);
1095     FRIEND_TEST(StatsdStatsTest, TestShardOffsetProvider);
1096     FRIEND_TEST(StatsdStatsTest, TestSocketLossStats);
1097     FRIEND_TEST(StatsdStatsTest, TestSocketLossStatsOverflowCounter);
1098     FRIEND_TEST(StatsdStatsTest, TestSubStats);
1099     FRIEND_TEST(StatsdStatsTest, TestSubscriptionAtomPulled);
1100     FRIEND_TEST(StatsdStatsTest, TestSubscriptionEnded);
1101     FRIEND_TEST(StatsdStatsTest, TestSubscriptionFlushed);
1102     FRIEND_TEST(StatsdStatsTest, TestSubscriptionPullThreadWakeup);
1103     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStarted);
1104     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedMaxActiveSubscriptions);
1105     FRIEND_TEST(StatsdStatsTest, TestSubscriptionStartedRemoveFinishedSubscription);
1106     FRIEND_TEST(StatsdStatsTest, TestSystemServerCrash);
1107     FRIEND_TEST(StatsdStatsTest, TestTimestampThreshold);
1108     FRIEND_TEST(StatsdStatsTest, TestValidConfigAdd);
1109     FRIEND_TEST(StatsdStatsTest, TestSocketBatchReadStats);
1110 };
1111 
1112 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
1113                                                          const int64_t matcherId);
1114 
1115 InvalidConfigReason createInvalidConfigReasonWithMatcher(const InvalidConfigReasonEnum reason,
1116                                                          const int64_t metricId,
1117                                                          const int64_t matcherId);
1118 
1119 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
1120                                                            const int64_t conditionId);
1121 
1122 InvalidConfigReason createInvalidConfigReasonWithPredicate(const InvalidConfigReasonEnum reason,
1123                                                            const int64_t metricId,
1124                                                            const int64_t conditionId);
1125 
1126 InvalidConfigReason createInvalidConfigReasonWithState(const InvalidConfigReasonEnum reason,
1127                                                        const int64_t metricId,
1128                                                        const int64_t stateId);
1129 
1130 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
1131                                                        const int64_t alertId);
1132 
1133 InvalidConfigReason createInvalidConfigReasonWithAlert(const InvalidConfigReasonEnum reason,
1134                                                        const int64_t metricId,
1135                                                        const int64_t alertId);
1136 
1137 InvalidConfigReason createInvalidConfigReasonWithAlarm(const InvalidConfigReasonEnum reason,
1138                                                        const int64_t alarmId);
1139 
1140 InvalidConfigReason createInvalidConfigReasonWithSubscription(const InvalidConfigReasonEnum reason,
1141                                                               const int64_t subscriptionId);
1142 
1143 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlarm(
1144         const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alarmId);
1145 
1146 InvalidConfigReason createInvalidConfigReasonWithSubscriptionAndAlert(
1147         const InvalidConfigReasonEnum reason, int64_t subscriptionId, int64_t alertId);
1148 
1149 }  // namespace statsd
1150 }  // namespace os
1151 }  // namespace android
1152