1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "TelemetryStatsd"
18
19 #include "TelemetryStatsd.h"
20
21 #include <android-base/logging.h>
22 #include <android-base/no_destructor.h>
23 #include <statslog_neuralnetworks.h>
24
25 #include <algorithm>
26 #include <limits>
27 #include <map>
28 #include <mutex>
29 #include <queue>
30 #include <string>
31 #include <thread>
32 #include <vector>
33
34 #include "FeatureLevel.h"
35 #include "Telemetry.h"
36 #include "Tracing.h"
37
38 namespace android::nn::telemetry {
39 namespace {
40
41 constexpr uint64_t kNoTimeReportedRuntime = std::numeric_limits<uint64_t>::max();
42 constexpr int64_t kNoTimeReportedStatsd = std::numeric_limits<int64_t>::max();
43 constexpr size_t kInitialChannelSize = 100;
44
45 // Statsd specifies that "Atom logging frequency should not exceed once per 10 milliseconds (i.e.
46 // consecutive atom calls should be at least 10 milliseconds apart)." A quiet period of 100ms is
47 // chosen here to reduce the chance that the NNAPI logs too frequently, even from separate
48 // applications.
49 constexpr auto kMinimumLoggingQuietPeriod = std::chrono::milliseconds(100);
50
getUid()51 int32_t getUid() {
52 static const int32_t uid = getuid();
53 return uid;
54 }
55
nanosToMillis(uint64_t time)56 constexpr int64_t nanosToMillis(uint64_t time) {
57 constexpr uint64_t kNanosPerMilli = 1'000'000;
58 return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMilli;
59 }
60
nanosToMicros(uint64_t time)61 constexpr int64_t nanosToMicros(uint64_t time) {
62 constexpr uint64_t kNanosPerMicro = 1'000;
63 return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMicro;
64 }
65
accumulatedTimingFrom(int64_t timing)66 AtomValue::AccumulatedTiming accumulatedTimingFrom(int64_t timing) {
67 if (timing == kNoTimeReportedStatsd) {
68 return {};
69 }
70 return {
71 .sumTime = timing,
72 .minTime = timing,
73 .maxTime = timing,
74 .sumSquaredTime = timing * timing,
75 .count = 1,
76 };
77 }
78
combineAccumulatedTiming(AtomValue::AccumulatedTiming * accumulatedTime,const AtomValue::AccumulatedTiming & timing)79 void combineAccumulatedTiming(AtomValue::AccumulatedTiming* accumulatedTime,
80 const AtomValue::AccumulatedTiming& timing) {
81 if (timing.count == 0) {
82 return;
83 }
84 accumulatedTime->sumTime += timing.sumTime;
85 accumulatedTime->minTime = std::min(accumulatedTime->minTime, timing.minTime);
86 accumulatedTime->maxTime = std::max(accumulatedTime->maxTime, timing.maxTime);
87 accumulatedTime->sumSquaredTime += timing.sumSquaredTime;
88 accumulatedTime->count += timing.count;
89 }
90
makeBytesField(const ModelArchHash & modelArchHash)91 stats::BytesField makeBytesField(const ModelArchHash& modelArchHash) {
92 return stats::BytesField(reinterpret_cast<const char*>(modelArchHash.data()),
93 modelArchHash.size());
94 }
95
makeModelArchHash(const uint8_t * modelArchHash)96 ModelArchHash makeModelArchHash(const uint8_t* modelArchHash) {
97 ModelArchHash output;
98 std::memcpy(output.data(), modelArchHash, BYTE_SIZE_OF_MODEL_ARCH_HASH);
99 return output;
100 }
101
102 #define STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, value) \
103 static_assert(static_cast<int32_t>(DataClass::value) == \
104 stats::NEURAL_NETWORKS_##type##__##inout##_DATA_CLASS__DATA_CLASS_##value)
105
106 #define STATIC_ASSERT_DATA_CLASS_EQ(type, inout) \
107 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, UNKNOWN); \
108 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, OTHER); \
109 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT32); \
110 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT16); \
111 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, QUANT); \
112 STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, MIXED)
113
114 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, INPUT);
115 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, OUTPUT);
116 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, INPUT);
117 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, OUTPUT);
118 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, INPUT);
119 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, OUTPUT);
120 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, INPUT);
121 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, OUTPUT);
122
123 #define STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, value) \
124 static_assert(ANEURALNETWORKS_##value == \
125 stats::NEURAL_NETWORKS_##type##__ERROR_CODE__RESULT_CODE_##value)
126
127 #define STATIC_ASSERT_RESULT_CODE_EQ(type) \
128 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, NO_ERROR); \
129 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUT_OF_MEMORY); \
130 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, INCOMPLETE); \
131 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNEXPECTED_NULL); \
132 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_DATA); \
133 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OP_FAILED); \
134 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_STATE); \
135 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNMAPPABLE); \
136 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUTPUT_INSUFFICIENT_SIZE); \
137 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNAVAILABLE_DEVICE); \
138 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_TRANSIENT); \
139 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_PERSISTENT); \
140 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_TRANSIENT); \
141 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_PERSISTENT); \
142 STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, DEAD_OBJECT)
143
144 STATIC_ASSERT_RESULT_CODE_EQ(COMPILATION_FAILED);
145 STATIC_ASSERT_RESULT_CODE_EQ(EXECUTION_FAILED);
146
147 #undef STATIC_ASSERT_DATA_CLASS_EQ_VALUE
148 #undef STATIC_ASSERT_DATA_CLASS_EQ
149 #undef STATIC_ASSERT_RESULT_CODE_EQ_VALUE
150 #undef STATIC_ASSERT_RESULT_CODE_EQ
151
convertDataClass(DataClass dataClass)152 int32_t convertDataClass(DataClass dataClass) {
153 switch (dataClass) {
154 case DataClass::UNKNOWN:
155 case DataClass::OTHER:
156 case DataClass::FLOAT32:
157 case DataClass::FLOAT16:
158 case DataClass::QUANT:
159 case DataClass::MIXED:
160 return static_cast<int32_t>(dataClass);
161 }
162 return static_cast<int32_t>(DataClass::UNKNOWN);
163 }
164
convertExecutionMode(ExecutionMode executionMode)165 int32_t convertExecutionMode(ExecutionMode executionMode) {
166 switch (executionMode) {
167 case ExecutionMode::ASYNC:
168 return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC;
169 case ExecutionMode::SYNC:
170 return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_SYNC;
171 case ExecutionMode::BURST:
172 return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_BURST;
173 case ExecutionMode::ASYNC_WITH_DEPS:
174 return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC_WITH_DEPS;
175 }
176 return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_UNKNOWN;
177 }
178
convertResultCode(int32_t resultCode)179 int32_t convertResultCode(int32_t resultCode) {
180 return resultCode >= ANEURALNETWORKS_NO_ERROR && resultCode <= ANEURALNETWORKS_DEAD_OBJECT
181 ? resultCode
182 : ANEURALNETWORKS_OP_FAILED;
183 }
184
compressTo64(const ModelArchHash & modelArchHash)185 int64_t compressTo64(const ModelArchHash& modelArchHash) {
186 int64_t hash = 0;
187 const char* data = reinterpret_cast<const char*>(modelArchHash.data());
188 for (size_t i = 0; i + sizeof(int64_t) <= sizeof(ModelArchHash); i += sizeof(int64_t)) {
189 int64_t tmp = 0;
190 std::memcpy(&tmp, data + i, sizeof(int64_t));
191 hash ^= tmp;
192 }
193 return hash;
194 }
195
logAtomToStatsd(Atom && atom)196 void logAtomToStatsd(Atom&& atom) {
197 NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logAtomToStatsd");
198 const auto& [key, value] = atom;
199
200 const auto modelArchHash64 = compressTo64(key.modelArchHash);
201
202 if (!key.isExecution) {
203 if (key.errorCode == ANEURALNETWORKS_NO_ERROR) {
204 stats::stats_write(
205 stats::NEURALNETWORKS_COMPILATION_COMPLETED, getUid(), getSessionId(),
206 kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
207 convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass),
208 key.fallbackToCpuFromError, key.introspectionEnabled, key.cacheEnabled,
209 key.hasControlFlow, key.hasDynamicTemporaries,
210 value.compilationTimeMillis.sumTime, value.compilationTimeMillis.minTime,
211 value.compilationTimeMillis.maxTime, value.compilationTimeMillis.sumSquaredTime,
212 value.compilationTimeMillis.count, value.count, modelArchHash64);
213 } else {
214 stats::stats_write(
215 stats::NEURALNETWORKS_COMPILATION_FAILED, getUid(), getSessionId(),
216 kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
217 convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass),
218 convertResultCode(key.errorCode), key.introspectionEnabled, key.cacheEnabled,
219 key.hasControlFlow, key.hasDynamicTemporaries, value.count, modelArchHash64);
220 }
221 } else {
222 if (key.errorCode == ANEURALNETWORKS_NO_ERROR) {
223 stats::stats_write(
224 stats::NEURALNETWORKS_EXECUTION_COMPLETED, getUid(), getSessionId(),
225 kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
226 convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass),
227 convertDataClass(key.outputDataClass), key.introspectionEnabled,
228 key.cacheEnabled, key.hasControlFlow, key.hasDynamicTemporaries,
229 value.durationRuntimeMicros.sumTime, value.durationRuntimeMicros.minTime,
230 value.durationRuntimeMicros.maxTime, value.durationRuntimeMicros.sumSquaredTime,
231 value.durationRuntimeMicros.count, value.durationDriverMicros.sumTime,
232 value.durationDriverMicros.minTime, value.durationDriverMicros.maxTime,
233 value.durationDriverMicros.sumSquaredTime, value.durationDriverMicros.count,
234 value.durationHardwareMicros.sumTime, value.durationHardwareMicros.minTime,
235 value.durationHardwareMicros.maxTime,
236 value.durationHardwareMicros.sumSquaredTime, value.durationHardwareMicros.count,
237 value.count, modelArchHash64);
238 } else {
239 stats::stats_write(
240 stats::NEURALNETWORKS_EXECUTION_FAILED, getUid(), getSessionId(),
241 kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
242 convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass),
243 convertDataClass(key.outputDataClass), convertResultCode(key.errorCode),
244 key.introspectionEnabled, key.cacheEnabled, key.hasControlFlow,
245 key.hasDynamicTemporaries, value.count, modelArchHash64);
246 }
247 }
248 }
249
getStatsdLogger()250 AsyncLogger& getStatsdLogger() {
251 static base::NoDestructor<AsyncLogger> logger(logAtomToStatsd, kMinimumLoggingQuietPeriod);
252 return *logger;
253 }
254
asTuple(const AtomKey & v)255 constexpr auto asTuple(const AtomKey& v) {
256 return std::tie(v.isExecution, v.modelArchHash, v.deviceId, v.executionMode, v.errorCode,
257 v.inputDataClass, v.outputDataClass, v.fallbackToCpuFromError,
258 v.introspectionEnabled, v.cacheEnabled, v.hasControlFlow,
259 v.hasDynamicTemporaries);
260 };
261
262 } // namespace
263
operator ==(const AtomKey & lhs,const AtomKey & rhs)264 bool operator==(const AtomKey& lhs, const AtomKey& rhs) {
265 return asTuple(lhs) == asTuple(rhs);
266 }
267
operator <(const AtomKey & lhs,const AtomKey & rhs)268 bool operator<(const AtomKey& lhs, const AtomKey& rhs) {
269 return asTuple(lhs) < asTuple(rhs);
270 }
271
combineAtomValues(AtomValue * accumulatedValue,const AtomValue & value)272 void combineAtomValues(AtomValue* accumulatedValue, const AtomValue& value) {
273 accumulatedValue->count += value.count;
274 combineAccumulatedTiming(&accumulatedValue->compilationTimeMillis, value.compilationTimeMillis);
275 combineAccumulatedTiming(&accumulatedValue->durationRuntimeMicros, value.durationRuntimeMicros);
276 combineAccumulatedTiming(&accumulatedValue->durationDriverMicros, value.durationDriverMicros);
277 combineAccumulatedTiming(&accumulatedValue->durationHardwareMicros,
278 value.durationHardwareMicros);
279 }
280
empty() const281 bool AtomAggregator::empty() const {
282 return mOrder.empty();
283 }
284
push(Atom && atom)285 void AtomAggregator::push(Atom&& atom) {
286 const AtomValue& value = atom.second;
287 if (const auto [it, inserted] = mAggregate.try_emplace(std::move(atom.first), value);
288 !inserted) {
289 combineAtomValues(&it->second, value);
290 } else {
291 mOrder.push(&it->first);
292 }
293 }
294
pop()295 std::pair<AtomKey, AtomValue> AtomAggregator::pop() {
296 CHECK(!empty());
297
298 // Find the key of the aggregated atom to log and remove it.
299 const AtomKey* key = mOrder.front();
300 mOrder.pop();
301
302 // Find the value that corresponds to the key and remove the (key,value) from the map.
303 auto node = mAggregate.extract(*key);
304 CHECK(!node.empty());
305
306 return std::make_pair(std::move(node.key()), node.mapped());
307 }
308
AsyncLogger(LoggerFn logger,Duration loggingQuietPeriodDuration)309 AsyncLogger::AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration) {
310 mChannel.reserve(kInitialChannelSize);
311 mThread = std::thread([this, log = std::move(logger), loggingQuietPeriodDuration]() {
312 AtomAggregator data;
313 std::vector<Atom> atoms;
314 atoms.reserve(kInitialChannelSize);
315
316 // Loop until the thread is being torn down.
317 while (true) {
318 // Get data if it's available.
319 const Result result = takeAll(&atoms, /*blockUntilDataIsAvailable=*/data.empty());
320 if (result == Result::TEARDOWN) {
321 break;
322 }
323
324 // Aggregate the data locally.
325 std::for_each(atoms.begin(), atoms.end(),
326 [&data](Atom& atom) { data.push(std::move(atom)); });
327 atoms.clear();
328
329 // Log data if available and sleep.
330 if (!data.empty()) {
331 log(data.pop());
332 const Result result = sleepFor(loggingQuietPeriodDuration);
333 if (result == Result::TEARDOWN) {
334 break;
335 }
336 }
337 }
338 });
339 }
340
write(Atom && atom)341 void AsyncLogger::write(Atom&& atom) {
342 bool wasEmpty = false;
343 {
344 std::lock_guard hold(mMutex);
345 wasEmpty = mChannel.empty();
346 mChannel.push_back(std::move(atom));
347 }
348 if (wasEmpty) {
349 mNotEmptyOrTeardown.notify_one();
350 }
351 }
352
takeAll(std::vector<Atom> * output,bool blockUntilDataIsAvailable)353 AsyncLogger::Result AsyncLogger::takeAll(std::vector<Atom>* output,
354 bool blockUntilDataIsAvailable) {
355 CHECK(output != nullptr);
356 CHECK(output->empty());
357 const auto blockUntil = blockUntilDataIsAvailable ? TimePoint::max() : TimePoint{};
358 std::unique_lock lock(mMutex);
359 mNotEmptyOrTeardown.wait_until(
360 lock, blockUntil, [this]() REQUIRES(mMutex) { return !mChannel.empty() || mTeardown; });
361 std::swap(*output, mChannel);
362 return mTeardown ? Result::TEARDOWN : Result::SUCCESS;
363 }
364
sleepFor(Duration duration)365 AsyncLogger::Result AsyncLogger::sleepFor(Duration duration) {
366 std::unique_lock lock(mMutex);
367 mNotEmptyOrTeardown.wait_for(lock, duration, [this]() REQUIRES(mMutex) { return mTeardown; });
368 return mTeardown ? Result::TEARDOWN : Result::SUCCESS;
369 }
370
~AsyncLogger()371 AsyncLogger::~AsyncLogger() {
372 {
373 std::lock_guard hold(mMutex);
374 mTeardown = true;
375 }
376 mNotEmptyOrTeardown.notify_one();
377 mThread.join();
378 }
379
createAtomFrom(const DiagnosticCompilationInfo * info)380 Atom createAtomFrom(const DiagnosticCompilationInfo* info) {
381 Atom atom = Atom{
382 AtomKey{
383 .isExecution = false,
384 .modelArchHash = makeModelArchHash(info->modelArchHash),
385 .deviceId = info->deviceId,
386 .executionMode = ExecutionMode::SYNC,
387 .errorCode = info->errorCode,
388 .inputDataClass = info->inputDataClass,
389 .outputDataClass = info->outputDataClass,
390 .fallbackToCpuFromError = info->fallbackToCpuFromError,
391 .introspectionEnabled = info->introspectionEnabled,
392 .cacheEnabled = info->cacheEnabled,
393 .hasControlFlow = info->hasControlFlow,
394 .hasDynamicTemporaries = info->hasDynamicTemporaries,
395 },
396 AtomValue{
397 .count = 1,
398 },
399 };
400
401 // Timing information is only relevant for the "Completed" path.
402 if (info->errorCode == ANEURALNETWORKS_NO_ERROR) {
403 auto& value = atom.second;
404 const auto compilationTimeMillis = nanosToMillis(info->compilationTimeNanos);
405 value.compilationTimeMillis = accumulatedTimingFrom(compilationTimeMillis);
406 }
407
408 return atom;
409 }
410
createAtomFrom(const DiagnosticExecutionInfo * info)411 Atom createAtomFrom(const DiagnosticExecutionInfo* info) {
412 Atom atom = Atom{
413 AtomKey{
414 .isExecution = true,
415 .modelArchHash = makeModelArchHash(info->modelArchHash),
416 .deviceId = info->deviceId,
417 .executionMode = info->executionMode,
418 .errorCode = info->errorCode,
419 .inputDataClass = info->inputDataClass,
420 .outputDataClass = info->outputDataClass,
421 .fallbackToCpuFromError = false,
422 .introspectionEnabled = info->introspectionEnabled,
423 .cacheEnabled = info->cacheEnabled,
424 .hasControlFlow = info->hasControlFlow,
425 .hasDynamicTemporaries = info->hasDynamicTemporaries,
426 },
427 AtomValue{
428 .count = 1,
429 },
430 };
431
432 // Timing information is only relevant for the "Completed" path.
433 if (info->errorCode == ANEURALNETWORKS_NO_ERROR) {
434 auto& value = atom.second;
435 const auto durationRuntimeMicros = nanosToMicros(info->durationRuntimeNanos);
436 const auto durationDriverMicros = nanosToMicros(info->durationDriverNanos);
437 const auto durationHardwareMicros = nanosToMicros(info->durationHardwareNanos);
438 value.durationRuntimeMicros = accumulatedTimingFrom(durationRuntimeMicros);
439 value.durationDriverMicros = accumulatedTimingFrom(durationDriverMicros);
440 value.durationHardwareMicros = accumulatedTimingFrom(durationHardwareMicros);
441 };
442
443 return atom;
444 }
445
logCompilationToStatsd(const DiagnosticCompilationInfo * info)446 void logCompilationToStatsd(const DiagnosticCompilationInfo* info) {
447 NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logCompilationStatsd");
448 getStatsdLogger().write(createAtomFrom(info));
449 }
450
logExecutionToStatsd(const DiagnosticExecutionInfo * info)451 void logExecutionToStatsd(const DiagnosticExecutionInfo* info) {
452 NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logExecutionStatsd");
453 getStatsdLogger().write(createAtomFrom(info));
454 }
455
456 } // namespace android::nn::telemetry
457