1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H
18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H
19 
20 #include <android-base/thread_annotations.h>
21 
22 #include <array>
23 #include <condition_variable>
24 #include <functional>
25 #include <limits>
26 #include <map>
27 #include <mutex>
28 #include <queue>
29 #include <string>
30 #include <thread>
31 #include <utility>
32 #include <vector>
33 
34 #include "Telemetry.h"
35 
36 namespace android::nn::telemetry {
37 
38 using ModelArchHash = std::array<uint8_t, BYTE_SIZE_OF_MODEL_ARCH_HASH>;
39 
40 constexpr int64_t kSumTimeDefault = 0;
41 constexpr int64_t kMinTimeDefault = std::numeric_limits<int64_t>::max();
42 constexpr int64_t kMaxTimeDefault = std::numeric_limits<int64_t>::min();
43 
44 // For CompilationCompleted: isExecution = false, executionMode = SYNC, errorCode = 0
45 // For CompilationFailed: isExecution = false, executionMode = SYNC, errorCode != 0
46 // For ExecutionCompleted: isExecution = true, errorCode = 0, fallbackToCpuFromError = false
47 // For ExecutionFailed: isExecution = true, errorCode != 0, fallbackToCpuFromError = false
48 struct AtomKey {
49     bool isExecution;
50     ModelArchHash modelArchHash;
51     std::string deviceId;
52     ExecutionMode executionMode;
53     int32_t errorCode;
54     DataClass inputDataClass;
55     DataClass outputDataClass;
56     bool fallbackToCpuFromError;
57     bool introspectionEnabled;
58     bool cacheEnabled;
59     bool hasControlFlow;
60     bool hasDynamicTemporaries;
61 };
62 
63 bool operator==(const AtomKey& lhs, const AtomKey& rhs);
64 bool operator<(const AtomKey& lhs, const AtomKey& rhs);
65 
66 // For CompilationCompleted, all timings except compilationTimeMillis omitted
67 // For CompilationFailed, all timings omitted
68 // For ExecutionCompleted, compilationTimeMillis timing omitted
69 // For ExecutionFailed, all timings omitted
70 struct AtomValue {
71     int32_t count = 0;
72 
73     // AccumulatedTiming stores all the information needed to calculate the average, min, max, and
74     // standard deviation of all the accumulated timings. When count == 0, AccumulatedTiming is
75     // ignored. When count > 0:
76     // * average = sumTime / count
77     // * minimum = minTime
78     // * maximum = maxTime
79     // * variance = sumSquaredTime / count - average * average
80     // * standard deviation = sqrt(variance)
81     // * sample standard deviation = sqrt(variance * count / (count - 1))
82     struct AccumulatedTiming {
83         int64_t sumTime = kSumTimeDefault;
84         int64_t minTime = kMinTimeDefault;
85         int64_t maxTime = kMaxTimeDefault;
86         // Sum of each squared timing, e.g.: t1^2 + t2^2 + ... + tn^2
87         int64_t sumSquaredTime = kSumTimeDefault;
88         int32_t count = 0;
89     };
90     AccumulatedTiming compilationTimeMillis;
91     AccumulatedTiming durationRuntimeMicros;
92     AccumulatedTiming durationDriverMicros;
93     AccumulatedTiming durationHardwareMicros;
94 };
95 
96 void combineAtomValues(AtomValue* acculatedValue, const AtomValue& value);
97 
98 // Atom type to be sent to Statsd Telemetry
99 using Atom = std::pair<AtomKey, AtomValue>;
100 
101 // Helper class to locally aggregate and retrieve telemetry atoms.
102 class AtomAggregator {
103    public:
104     bool empty() const;
105 
106     void push(Atom&& atom);
107 
108     // Precondition: !empty()
109     Atom pop();
110 
111    private:
112     std::map<AtomKey, AtomValue> mAggregate;
113     // Pointer to keys of mAggregate to ensure atoms are logged in a fair order. Using pointers into
114     // a std::map is guaranteed to work because references to elements are guaranteed to be valid
115     // until that element is erased.
116     std::queue<const AtomKey*> mOrder;
117 };
118 
119 using LoggerFn = std::function<void(Atom&&)>;
120 
121 // AsyncLogger minimizes the call to `write`, so that the calling thread which handles the
122 // compilation or execution is not slowed down by writing to statsd. Instead, AsyncLogger
123 // contains a dedicated thread that will handle logging to statsd in the background.
124 // This class is thread-safe.
125 class AsyncLogger {
126    public:
127     AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration);
128     AsyncLogger(const AsyncLogger&) = delete;
129     AsyncLogger(AsyncLogger&&) = delete;
130     AsyncLogger& operator=(const AsyncLogger&) = delete;
131     AsyncLogger& operator=(AsyncLogger&&) = delete;
132     ~AsyncLogger();
133 
134     void write(Atom&& atom);
135 
136    private:
137     enum class Result {
138         SUCCESS,
139         TEARDOWN,
140     };
141 
142     // Precondition: output != nullptr
143     // Precondition: output->empty()
144     Result takeAll(std::vector<Atom>* output, bool blockUntilDataIsAvailable);
145 
146     Result sleepFor(Duration duration);
147 
148     mutable std::mutex mMutex;
149     mutable std::condition_variable mNotEmptyOrTeardown;
150     mutable std::vector<Atom> mChannel GUARDED_BY(mMutex);
151     mutable bool mTeardown GUARDED_BY(mMutex) = false;
152     std::thread mThread;
153 };
154 
155 // Create an Atom from a diagnostic info object.
156 Atom createAtomFrom(const DiagnosticCompilationInfo* info);
157 Atom createAtomFrom(const DiagnosticExecutionInfo* info);
158 
159 // Log an Atom to statsd from a diagnostic info object.
160 void logCompilationToStatsd(const DiagnosticCompilationInfo* info);
161 void logExecutionToStatsd(const DiagnosticExecutionInfo* info);
162 
163 }  // namespace android::nn::telemetry
164 
165 #endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H
166