1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "TelemetryStatsd"
18 
19 #include "TelemetryStatsd.h"
20 
21 #include <android-base/logging.h>
22 #include <android-base/no_destructor.h>
23 #include <statslog_neuralnetworks.h>
24 
25 #include <algorithm>
26 #include <limits>
27 #include <map>
28 #include <mutex>
29 #include <queue>
30 #include <string>
31 #include <thread>
32 #include <vector>
33 
34 #include "FeatureLevel.h"
35 #include "Telemetry.h"
36 #include "Tracing.h"
37 
38 namespace android::nn::telemetry {
39 namespace {
40 
41 constexpr uint64_t kNoTimeReportedRuntime = std::numeric_limits<uint64_t>::max();
42 constexpr int64_t kNoTimeReportedStatsd = std::numeric_limits<int64_t>::max();
43 constexpr size_t kInitialChannelSize = 100;
44 
45 // Statsd specifies that "Atom logging frequency should not exceed once per 10 milliseconds (i.e.
46 // consecutive atom calls should be at least 10 milliseconds apart)." A quiet period of 100ms is
47 // chosen here to reduce the chance that the NNAPI logs too frequently, even from separate
48 // applications.
49 constexpr auto kMinimumLoggingQuietPeriod = std::chrono::milliseconds(100);
50 
getUid()51 int32_t getUid() {
52     static const int32_t uid = getuid();
53     return uid;
54 }
55 
nanosToMillis(uint64_t time)56 constexpr int64_t nanosToMillis(uint64_t time) {
57     constexpr uint64_t kNanosPerMilli = 1'000'000;
58     return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMilli;
59 }
60 
nanosToMicros(uint64_t time)61 constexpr int64_t nanosToMicros(uint64_t time) {
62     constexpr uint64_t kNanosPerMicro = 1'000;
63     return time == kNoTimeReportedRuntime ? kNoTimeReportedStatsd : time / kNanosPerMicro;
64 }
65 
accumulatedTimingFrom(int64_t timing)66 AtomValue::AccumulatedTiming accumulatedTimingFrom(int64_t timing) {
67     if (timing == kNoTimeReportedStatsd) {
68         return {};
69     }
70     return {
71             .sumTime = timing,
72             .minTime = timing,
73             .maxTime = timing,
74             .sumSquaredTime = timing * timing,
75             .count = 1,
76     };
77 }
78 
combineAccumulatedTiming(AtomValue::AccumulatedTiming * accumulatedTime,const AtomValue::AccumulatedTiming & timing)79 void combineAccumulatedTiming(AtomValue::AccumulatedTiming* accumulatedTime,
80                               const AtomValue::AccumulatedTiming& timing) {
81     if (timing.count == 0) {
82         return;
83     }
84     accumulatedTime->sumTime += timing.sumTime;
85     accumulatedTime->minTime = std::min(accumulatedTime->minTime, timing.minTime);
86     accumulatedTime->maxTime = std::max(accumulatedTime->maxTime, timing.maxTime);
87     accumulatedTime->sumSquaredTime += timing.sumSquaredTime;
88     accumulatedTime->count += timing.count;
89 }
90 
makeBytesField(const ModelArchHash & modelArchHash)91 stats::BytesField makeBytesField(const ModelArchHash& modelArchHash) {
92     return stats::BytesField(reinterpret_cast<const char*>(modelArchHash.data()),
93                              modelArchHash.size());
94 }
95 
makeModelArchHash(const uint8_t * modelArchHash)96 ModelArchHash makeModelArchHash(const uint8_t* modelArchHash) {
97     ModelArchHash output;
98     std::memcpy(output.data(), modelArchHash, BYTE_SIZE_OF_MODEL_ARCH_HASH);
99     return output;
100 }
101 
102 #define STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, value) \
103     static_assert(static_cast<int32_t>(DataClass::value) ==   \
104                   stats::NEURAL_NETWORKS_##type##__##inout##_DATA_CLASS__DATA_CLASS_##value)
105 
106 #define STATIC_ASSERT_DATA_CLASS_EQ(type, inout)             \
107     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, UNKNOWN); \
108     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, OTHER);   \
109     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT32); \
110     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, FLOAT16); \
111     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, QUANT);   \
112     STATIC_ASSERT_DATA_CLASS_EQ_VALUE(type, inout, MIXED)
113 
114 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, INPUT);
115 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_COMPLETED, OUTPUT);
116 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, INPUT);
117 STATIC_ASSERT_DATA_CLASS_EQ(COMPILATION_FAILED, OUTPUT);
118 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, INPUT);
119 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_COMPLETED, OUTPUT);
120 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, INPUT);
121 STATIC_ASSERT_DATA_CLASS_EQ(EXECUTION_FAILED, OUTPUT);
122 
123 #define STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, value) \
124     static_assert(ANEURALNETWORKS_##value ==            \
125                   stats::NEURAL_NETWORKS_##type##__ERROR_CODE__RESULT_CODE_##value)
126 
127 #define STATIC_ASSERT_RESULT_CODE_EQ(type)                                   \
128     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, NO_ERROR);                      \
129     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUT_OF_MEMORY);                 \
130     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, INCOMPLETE);                    \
131     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNEXPECTED_NULL);               \
132     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_DATA);                      \
133     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OP_FAILED);                     \
134     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, BAD_STATE);                     \
135     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNMAPPABLE);                    \
136     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, OUTPUT_INSUFFICIENT_SIZE);      \
137     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, UNAVAILABLE_DEVICE);            \
138     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_TRANSIENT);     \
139     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, MISSED_DEADLINE_PERSISTENT);    \
140     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_TRANSIENT);  \
141     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, RESOURCE_EXHAUSTED_PERSISTENT); \
142     STATIC_ASSERT_RESULT_CODE_EQ_VALUE(type, DEAD_OBJECT)
143 
144 STATIC_ASSERT_RESULT_CODE_EQ(COMPILATION_FAILED);
145 STATIC_ASSERT_RESULT_CODE_EQ(EXECUTION_FAILED);
146 
147 #undef STATIC_ASSERT_DATA_CLASS_EQ_VALUE
148 #undef STATIC_ASSERT_DATA_CLASS_EQ
149 #undef STATIC_ASSERT_RESULT_CODE_EQ_VALUE
150 #undef STATIC_ASSERT_RESULT_CODE_EQ
151 
convertDataClass(DataClass dataClass)152 int32_t convertDataClass(DataClass dataClass) {
153     switch (dataClass) {
154         case DataClass::UNKNOWN:
155         case DataClass::OTHER:
156         case DataClass::FLOAT32:
157         case DataClass::FLOAT16:
158         case DataClass::QUANT:
159         case DataClass::MIXED:
160             return static_cast<int32_t>(dataClass);
161     }
162     return static_cast<int32_t>(DataClass::UNKNOWN);
163 }
164 
convertExecutionMode(ExecutionMode executionMode)165 int32_t convertExecutionMode(ExecutionMode executionMode) {
166     switch (executionMode) {
167         case ExecutionMode::ASYNC:
168             return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC;
169         case ExecutionMode::SYNC:
170             return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_SYNC;
171         case ExecutionMode::BURST:
172             return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_BURST;
173         case ExecutionMode::ASYNC_WITH_DEPS:
174             return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_ASYNC_WITH_DEPS;
175     }
176     return stats::NEURAL_NETWORKS_EXECUTION_FAILED__MODE__MODE_UNKNOWN;
177 }
178 
convertResultCode(int32_t resultCode)179 int32_t convertResultCode(int32_t resultCode) {
180     return resultCode >= ANEURALNETWORKS_NO_ERROR && resultCode <= ANEURALNETWORKS_DEAD_OBJECT
181                    ? resultCode
182                    : ANEURALNETWORKS_OP_FAILED;
183 }
184 
compressTo64(const ModelArchHash & modelArchHash)185 int64_t compressTo64(const ModelArchHash& modelArchHash) {
186     int64_t hash = 0;
187     const char* data = reinterpret_cast<const char*>(modelArchHash.data());
188     for (size_t i = 0; i + sizeof(int64_t) <= sizeof(ModelArchHash); i += sizeof(int64_t)) {
189         int64_t tmp = 0;
190         std::memcpy(&tmp, data + i, sizeof(int64_t));
191         hash ^= tmp;
192     }
193     return hash;
194 }
195 
logAtomToStatsd(Atom && atom)196 void logAtomToStatsd(Atom&& atom) {
197     NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logAtomToStatsd");
198     const auto& [key, value] = atom;
199 
200     const auto modelArchHash64 = compressTo64(key.modelArchHash);
201 
202     if (!key.isExecution) {
203         if (key.errorCode == ANEURALNETWORKS_NO_ERROR) {
204             stats::stats_write(
205                     stats::NEURALNETWORKS_COMPILATION_COMPLETED, getUid(), getSessionId(),
206                     kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
207                     convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass),
208                     key.fallbackToCpuFromError, key.introspectionEnabled, key.cacheEnabled,
209                     key.hasControlFlow, key.hasDynamicTemporaries,
210                     value.compilationTimeMillis.sumTime, value.compilationTimeMillis.minTime,
211                     value.compilationTimeMillis.maxTime, value.compilationTimeMillis.sumSquaredTime,
212                     value.compilationTimeMillis.count, value.count, modelArchHash64);
213         } else {
214             stats::stats_write(
215                     stats::NEURALNETWORKS_COMPILATION_FAILED, getUid(), getSessionId(),
216                     kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
217                     convertDataClass(key.inputDataClass), convertDataClass(key.outputDataClass),
218                     convertResultCode(key.errorCode), key.introspectionEnabled, key.cacheEnabled,
219                     key.hasControlFlow, key.hasDynamicTemporaries, value.count, modelArchHash64);
220         }
221     } else {
222         if (key.errorCode == ANEURALNETWORKS_NO_ERROR) {
223             stats::stats_write(
224                     stats::NEURALNETWORKS_EXECUTION_COMPLETED, getUid(), getSessionId(),
225                     kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
226                     convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass),
227                     convertDataClass(key.outputDataClass), key.introspectionEnabled,
228                     key.cacheEnabled, key.hasControlFlow, key.hasDynamicTemporaries,
229                     value.durationRuntimeMicros.sumTime, value.durationRuntimeMicros.minTime,
230                     value.durationRuntimeMicros.maxTime, value.durationRuntimeMicros.sumSquaredTime,
231                     value.durationRuntimeMicros.count, value.durationDriverMicros.sumTime,
232                     value.durationDriverMicros.minTime, value.durationDriverMicros.maxTime,
233                     value.durationDriverMicros.sumSquaredTime, value.durationDriverMicros.count,
234                     value.durationHardwareMicros.sumTime, value.durationHardwareMicros.minTime,
235                     value.durationHardwareMicros.maxTime,
236                     value.durationHardwareMicros.sumSquaredTime, value.durationHardwareMicros.count,
237                     value.count, modelArchHash64);
238         } else {
239             stats::stats_write(
240                     stats::NEURALNETWORKS_EXECUTION_FAILED, getUid(), getSessionId(),
241                     kNnapiApexVersion, makeBytesField(key.modelArchHash), key.deviceId.c_str(),
242                     convertExecutionMode(key.executionMode), convertDataClass(key.inputDataClass),
243                     convertDataClass(key.outputDataClass), convertResultCode(key.errorCode),
244                     key.introspectionEnabled, key.cacheEnabled, key.hasControlFlow,
245                     key.hasDynamicTemporaries, value.count, modelArchHash64);
246         }
247     }
248 }
249 
getStatsdLogger()250 AsyncLogger& getStatsdLogger() {
251     static base::NoDestructor<AsyncLogger> logger(logAtomToStatsd, kMinimumLoggingQuietPeriod);
252     return *logger;
253 }
254 
asTuple(const AtomKey & v)255 constexpr auto asTuple(const AtomKey& v) {
256     return std::tie(v.isExecution, v.modelArchHash, v.deviceId, v.executionMode, v.errorCode,
257                     v.inputDataClass, v.outputDataClass, v.fallbackToCpuFromError,
258                     v.introspectionEnabled, v.cacheEnabled, v.hasControlFlow,
259                     v.hasDynamicTemporaries);
260 };
261 
262 }  // namespace
263 
operator ==(const AtomKey & lhs,const AtomKey & rhs)264 bool operator==(const AtomKey& lhs, const AtomKey& rhs) {
265     return asTuple(lhs) == asTuple(rhs);
266 }
267 
operator <(const AtomKey & lhs,const AtomKey & rhs)268 bool operator<(const AtomKey& lhs, const AtomKey& rhs) {
269     return asTuple(lhs) < asTuple(rhs);
270 }
271 
combineAtomValues(AtomValue * accumulatedValue,const AtomValue & value)272 void combineAtomValues(AtomValue* accumulatedValue, const AtomValue& value) {
273     accumulatedValue->count += value.count;
274     combineAccumulatedTiming(&accumulatedValue->compilationTimeMillis, value.compilationTimeMillis);
275     combineAccumulatedTiming(&accumulatedValue->durationRuntimeMicros, value.durationRuntimeMicros);
276     combineAccumulatedTiming(&accumulatedValue->durationDriverMicros, value.durationDriverMicros);
277     combineAccumulatedTiming(&accumulatedValue->durationHardwareMicros,
278                              value.durationHardwareMicros);
279 }
280 
empty() const281 bool AtomAggregator::empty() const {
282     return mOrder.empty();
283 }
284 
push(Atom && atom)285 void AtomAggregator::push(Atom&& atom) {
286     const AtomValue& value = atom.second;
287     if (const auto [it, inserted] = mAggregate.try_emplace(std::move(atom.first), value);
288         !inserted) {
289         combineAtomValues(&it->second, value);
290     } else {
291         mOrder.push(&it->first);
292     }
293 }
294 
pop()295 std::pair<AtomKey, AtomValue> AtomAggregator::pop() {
296     CHECK(!empty());
297 
298     // Find the key of the aggregated atom to log and remove it.
299     const AtomKey* key = mOrder.front();
300     mOrder.pop();
301 
302     // Find the value that corresponds to the key and remove the (key,value) from the map.
303     auto node = mAggregate.extract(*key);
304     CHECK(!node.empty());
305 
306     return std::make_pair(std::move(node.key()), node.mapped());
307 }
308 
AsyncLogger(LoggerFn logger,Duration loggingQuietPeriodDuration)309 AsyncLogger::AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration) {
310     mChannel.reserve(kInitialChannelSize);
311     mThread = std::thread([this, log = std::move(logger), loggingQuietPeriodDuration]() {
312         AtomAggregator data;
313         std::vector<Atom> atoms;
314         atoms.reserve(kInitialChannelSize);
315 
316         // Loop until the thread is being torn down.
317         while (true) {
318             // Get data if it's available.
319             const Result result = takeAll(&atoms, /*blockUntilDataIsAvailable=*/data.empty());
320             if (result == Result::TEARDOWN) {
321                 break;
322             }
323 
324             // Aggregate the data locally.
325             std::for_each(atoms.begin(), atoms.end(),
326                           [&data](Atom& atom) { data.push(std::move(atom)); });
327             atoms.clear();
328 
329             // Log data if available and sleep.
330             if (!data.empty()) {
331                 log(data.pop());
332                 const Result result = sleepFor(loggingQuietPeriodDuration);
333                 if (result == Result::TEARDOWN) {
334                     break;
335                 }
336             }
337         }
338     });
339 }
340 
write(Atom && atom)341 void AsyncLogger::write(Atom&& atom) {
342     bool wasEmpty = false;
343     {
344         std::lock_guard hold(mMutex);
345         wasEmpty = mChannel.empty();
346         mChannel.push_back(std::move(atom));
347     }
348     if (wasEmpty) {
349         mNotEmptyOrTeardown.notify_one();
350     }
351 }
352 
takeAll(std::vector<Atom> * output,bool blockUntilDataIsAvailable)353 AsyncLogger::Result AsyncLogger::takeAll(std::vector<Atom>* output,
354                                          bool blockUntilDataIsAvailable) {
355     CHECK(output != nullptr);
356     CHECK(output->empty());
357     const auto blockUntil = blockUntilDataIsAvailable ? TimePoint::max() : TimePoint{};
358     std::unique_lock lock(mMutex);
359     mNotEmptyOrTeardown.wait_until(
360             lock, blockUntil, [this]() REQUIRES(mMutex) { return !mChannel.empty() || mTeardown; });
361     std::swap(*output, mChannel);
362     return mTeardown ? Result::TEARDOWN : Result::SUCCESS;
363 }
364 
sleepFor(Duration duration)365 AsyncLogger::Result AsyncLogger::sleepFor(Duration duration) {
366     std::unique_lock lock(mMutex);
367     mNotEmptyOrTeardown.wait_for(lock, duration, [this]() REQUIRES(mMutex) { return mTeardown; });
368     return mTeardown ? Result::TEARDOWN : Result::SUCCESS;
369 }
370 
~AsyncLogger()371 AsyncLogger::~AsyncLogger() {
372     {
373         std::lock_guard hold(mMutex);
374         mTeardown = true;
375     }
376     mNotEmptyOrTeardown.notify_one();
377     mThread.join();
378 }
379 
createAtomFrom(const DiagnosticCompilationInfo * info)380 Atom createAtomFrom(const DiagnosticCompilationInfo* info) {
381     Atom atom = Atom{
382             AtomKey{
383                     .isExecution = false,
384                     .modelArchHash = makeModelArchHash(info->modelArchHash),
385                     .deviceId = info->deviceId,
386                     .executionMode = ExecutionMode::SYNC,
387                     .errorCode = info->errorCode,
388                     .inputDataClass = info->inputDataClass,
389                     .outputDataClass = info->outputDataClass,
390                     .fallbackToCpuFromError = info->fallbackToCpuFromError,
391                     .introspectionEnabled = info->introspectionEnabled,
392                     .cacheEnabled = info->cacheEnabled,
393                     .hasControlFlow = info->hasControlFlow,
394                     .hasDynamicTemporaries = info->hasDynamicTemporaries,
395             },
396             AtomValue{
397                     .count = 1,
398             },
399     };
400 
401     // Timing information is only relevant for the "Completed" path.
402     if (info->errorCode == ANEURALNETWORKS_NO_ERROR) {
403         auto& value = atom.second;
404         const auto compilationTimeMillis = nanosToMillis(info->compilationTimeNanos);
405         value.compilationTimeMillis = accumulatedTimingFrom(compilationTimeMillis);
406     }
407 
408     return atom;
409 }
410 
createAtomFrom(const DiagnosticExecutionInfo * info)411 Atom createAtomFrom(const DiagnosticExecutionInfo* info) {
412     Atom atom = Atom{
413             AtomKey{
414                     .isExecution = true,
415                     .modelArchHash = makeModelArchHash(info->modelArchHash),
416                     .deviceId = info->deviceId,
417                     .executionMode = info->executionMode,
418                     .errorCode = info->errorCode,
419                     .inputDataClass = info->inputDataClass,
420                     .outputDataClass = info->outputDataClass,
421                     .fallbackToCpuFromError = false,
422                     .introspectionEnabled = info->introspectionEnabled,
423                     .cacheEnabled = info->cacheEnabled,
424                     .hasControlFlow = info->hasControlFlow,
425                     .hasDynamicTemporaries = info->hasDynamicTemporaries,
426             },
427             AtomValue{
428                     .count = 1,
429             },
430     };
431 
432     // Timing information is only relevant for the "Completed" path.
433     if (info->errorCode == ANEURALNETWORKS_NO_ERROR) {
434         auto& value = atom.second;
435         const auto durationRuntimeMicros = nanosToMicros(info->durationRuntimeNanos);
436         const auto durationDriverMicros = nanosToMicros(info->durationDriverNanos);
437         const auto durationHardwareMicros = nanosToMicros(info->durationHardwareNanos);
438         value.durationRuntimeMicros = accumulatedTimingFrom(durationRuntimeMicros);
439         value.durationDriverMicros = accumulatedTimingFrom(durationDriverMicros);
440         value.durationHardwareMicros = accumulatedTimingFrom(durationHardwareMicros);
441     };
442 
443     return atom;
444 }
445 
logCompilationToStatsd(const DiagnosticCompilationInfo * info)446 void logCompilationToStatsd(const DiagnosticCompilationInfo* info) {
447     NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logCompilationStatsd");
448     getStatsdLogger().write(createAtomFrom(info));
449 }
450 
logExecutionToStatsd(const DiagnosticExecutionInfo * info)451 void logExecutionToStatsd(const DiagnosticExecutionInfo* info) {
452     NNTRACE_RT(NNTRACE_PHASE_UNSPECIFIED, "logExecutionStatsd");
453     getStatsdLogger().write(createAtomFrom(info));
454 }
455 
456 }  // namespace android::nn::telemetry
457