1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18
19 #include <algorithm>
20 #include <condition_variable>
21 #include <numeric>
22
23 #include "Telemetry.h"
24 #include "TelemetryStatsd.h"
25
26 namespace android::nn::telemetry {
27
28 constexpr auto kNoTiming = std::numeric_limits<uint64_t>::max();
29 constexpr auto kNoAggregateTiming = AtomValue::AccumulatedTiming{};
30 constexpr ModelArchHash kExampleModelArchHash = {1, 2, 3};
31 constexpr const char* kExampleDeviceId = "driver1=version1,driver2=version2";
32 constexpr auto kLongTime = std::chrono::seconds(60 * 60 * 24);
33
34 const AtomKey kExampleKey = {
35 .isExecution = true,
36 .modelArchHash = kExampleModelArchHash,
37 .deviceId = kExampleDeviceId,
38 .executionMode = ExecutionMode::SYNC,
39 .errorCode = ANEURALNETWORKS_NO_ERROR,
40 .inputDataClass = DataClass::FLOAT32,
41 .outputDataClass = DataClass::FLOAT32,
42 .fallbackToCpuFromError = false,
43 .introspectionEnabled = false,
44 .cacheEnabled = false,
45 .hasControlFlow = false,
46 .hasDynamicTemporaries = false,
47 };
48
49 // This class is thread-safe.
50 class Signal {
51 public:
signal()52 void signal() {
53 {
54 std::lock_guard hold(mMutex);
55 mSignalled = true;
56 }
57 mWaitForSignal.notify_all();
58 }
59
wait()60 void wait() {
61 std::unique_lock lock(mMutex);
62 mWaitForSignal.wait(lock, [this]() REQUIRES(mMutex) { return mSignalled; });
63 }
64
65 private:
66 mutable std::mutex mMutex;
67 mutable std::condition_variable mWaitForSignal;
68 mutable bool mSignalled GUARDED_BY(mMutex) = false;
69 };
70
operator ==(const AtomValue::AccumulatedTiming & lhs,const AtomValue::AccumulatedTiming & rhs)71 bool operator==(const AtomValue::AccumulatedTiming& lhs, const AtomValue::AccumulatedTiming& rhs) {
72 constexpr auto toTuple = [](const AtomValue::AccumulatedTiming& v) {
73 return std::tie(v.sumTime, v.minTime, v.maxTime, v.sumSquaredTime, v.count);
74 };
75 return toTuple(lhs) == toTuple(rhs);
76 }
77
operator ==(const AtomValue & lhs,const AtomValue & rhs)78 bool operator==(const AtomValue& lhs, const AtomValue& rhs) {
79 constexpr auto toTuple = [](const AtomValue& v) {
80 return std::tie(v.count, v.compilationTimeMillis, v.durationRuntimeMicros,
81 v.durationDriverMicros, v.durationHardwareMicros);
82 };
83 return toTuple(lhs) == toTuple(rhs);
84 }
85
accumulatedTimingsFrom(std::initializer_list<int64_t> values)86 AtomValue::AccumulatedTiming accumulatedTimingsFrom(std::initializer_list<int64_t> values) {
87 CHECK_GT(values.size(), 0u);
88 const int64_t sumTime = std::accumulate(values.begin(), values.end(), 0, std::plus<>{});
89 const int64_t sumSquaredTime = std::accumulate(
90 values.begin(), values.end(), 0, [](int64_t acc, int64_t v) { return acc + v * v; });
91 const auto [minIt, maxIt] = std::minmax_element(values.begin(), values.end());
92 return {
93 .sumTime = sumTime,
94 .minTime = *minIt,
95 .maxTime = *maxIt,
96 .sumSquaredTime = sumSquaredTime,
97 .count = static_cast<int32_t>(values.size()),
98 };
99 }
100
TEST(StatsdTelemetryTest,AtomKeyEquality)101 TEST(StatsdTelemetryTest, AtomKeyEquality) {
102 EXPECT_EQ(kExampleKey, kExampleKey);
103 }
104
TEST(StatsdTelemetryTest,AtomKeyLessThan)105 TEST(StatsdTelemetryTest, AtomKeyLessThan) {
106 const auto key1 = kExampleKey;
107 auto key2 = key1;
108 key2.errorCode = ANEURALNETWORKS_DEAD_OBJECT;
109 EXPECT_LT(key1, key2);
110 }
111
TEST(StatsdTelemetryTest,CombineAtomValues)112 TEST(StatsdTelemetryTest, CombineAtomValues) {
113 AtomValue value1 = {
114 .count = 3,
115 .compilationTimeMillis = accumulatedTimingsFrom({50, 100, 150}),
116 };
117 const AtomValue value2 = {
118 .count = 1,
119 .compilationTimeMillis = accumulatedTimingsFrom({75}),
120 };
121 const AtomValue valueResult = {
122 .count = 4,
123 .compilationTimeMillis = accumulatedTimingsFrom({50, 75, 100, 150}),
124 };
125
126 combineAtomValues(&value1, value2);
127 EXPECT_EQ(value1, valueResult);
128 }
129
TEST(StatsdTelemetryTest,CombineAtomValueWithLeftIdentity)130 TEST(StatsdTelemetryTest, CombineAtomValueWithLeftIdentity) {
131 AtomValue value1 = {};
132 const AtomValue value2 = {
133 .count = 1,
134 .compilationTimeMillis = accumulatedTimingsFrom({75}),
135 };
136 const AtomValue valueResult = value2;
137
138 combineAtomValues(&value1, value2);
139 EXPECT_EQ(value1, valueResult);
140 }
141
TEST(StatsdTelemetryTest,CombineAtomValueWithRightIdentity)142 TEST(StatsdTelemetryTest, CombineAtomValueWithRightIdentity) {
143 AtomValue value1 = {
144 .count = 3,
145 .compilationTimeMillis = accumulatedTimingsFrom({50, 100, 150}),
146 };
147 const AtomValue value2 = {};
148 const AtomValue valueResult = value1;
149
150 combineAtomValues(&value1, value2);
151 EXPECT_EQ(value1, valueResult);
152 }
153
TEST(StatsdTelemetryTest,AtomAggregatorStartEmpty)154 TEST(StatsdTelemetryTest, AtomAggregatorStartEmpty) {
155 AtomAggregator aggregator;
156 EXPECT_TRUE(aggregator.empty());
157 }
158
TEST(StatsdTelemetryTest,AtomAggregatorNotEmptyAfterPush)159 TEST(StatsdTelemetryTest, AtomAggregatorNotEmptyAfterPush) {
160 AtomAggregator aggregator;
161 aggregator.push({kExampleKey, {}});
162 EXPECT_FALSE(aggregator.empty());
163 }
164
TEST(StatsdTelemetryTest,AtomAggregatorEmptyAfterPop)165 TEST(StatsdTelemetryTest, AtomAggregatorEmptyAfterPop) {
166 AtomAggregator aggregator;
167 aggregator.push({kExampleKey, {}});
168 const auto [k, v] = aggregator.pop();
169 EXPECT_TRUE(aggregator.empty());
170 EXPECT_EQ(k, kExampleKey);
171 }
172
TEST(StatsdTelemetryTest,AtomAggregatorTwoDifferentKeys)173 TEST(StatsdTelemetryTest, AtomAggregatorTwoDifferentKeys) {
174 const auto key1 = kExampleKey;
175 auto key2 = key1;
176 key2.executionMode = ExecutionMode::ASYNC;
177 const auto value1 = AtomValue{.count = 2};
178 const auto value2 = AtomValue{.count = 3};
179
180 AtomAggregator aggregator;
181 aggregator.push({key1, value1});
182 aggregator.push({key2, value2});
183
184 const auto [resultKey, resultValue] = aggregator.pop();
185
186 EXPECT_EQ(resultKey, key1);
187 EXPECT_EQ(resultValue, value1);
188 EXPECT_FALSE(aggregator.empty());
189 }
190
TEST(StatsdTelemetryTest,AtomAggregatorTwoSameKeys)191 TEST(StatsdTelemetryTest, AtomAggregatorTwoSameKeys) {
192 const auto key1 = kExampleKey;
193 const auto value1 = AtomValue{.count = 2};
194 const auto value2 = AtomValue{.count = 3};
195
196 AtomAggregator aggregator;
197 aggregator.push({key1, value1});
198 aggregator.push({key1, value2});
199
200 const auto [resultKey, resultValue] = aggregator.pop();
201
202 EXPECT_EQ(resultKey, key1);
203 EXPECT_EQ(resultValue, AtomValue{.count = 5});
204 EXPECT_TRUE(aggregator.empty());
205 }
206
TEST(StatsdTelemetryTest,AtomAggregatorPush)207 TEST(StatsdTelemetryTest, AtomAggregatorPush) {
208 const AtomKey key1 = kExampleKey;
209 AtomKey key2 = key1;
210 key2.executionMode = ExecutionMode::ASYNC;
211 const auto value1 = AtomValue{.count = 2};
212 const auto value2 = AtomValue{.count = 3};
213 const auto value3 = AtomValue{.count = 6};
214
215 AtomAggregator aggregator;
216 aggregator.push({key1, value1});
217 aggregator.push({key2, value2});
218 aggregator.push({key1, value3});
219
220 const auto [resultKey1, resultValue1] = aggregator.pop();
221 const auto [resultKey2, resultValue2] = aggregator.pop();
222
223 EXPECT_EQ(resultKey1, key1);
224 EXPECT_EQ(resultKey2, key2);
225 EXPECT_EQ(resultValue1, AtomValue{.count = 8});
226 EXPECT_EQ(resultValue2, AtomValue{.count = 3});
227 EXPECT_TRUE(aggregator.empty());
228 }
229
TEST(StatsdTelemetryTest,AsyncLoggerTeardownWhileWaitingForData)230 TEST(StatsdTelemetryTest, AsyncLoggerTeardownWhileWaitingForData) {
231 constexpr auto fn = [](Atom&& /*atom*/) {};
232 const auto start = Clock::now();
233 { AsyncLogger logger(fn, kLongTime); }
234 const auto elapsed = Clock::now() - start;
235 EXPECT_LT(elapsed, kLongTime);
236 }
237
TEST(StatsdTelemetryTest,AsyncLoggerTeardownDuringSleep)238 TEST(StatsdTelemetryTest, AsyncLoggerTeardownDuringSleep) {
239 Signal loggingOccurred;
240 auto fn = [&loggingOccurred](Atom&& /*atom*/) mutable { loggingOccurred.signal(); };
241
242 const auto start = Clock::now();
243 {
244 AsyncLogger logger(fn, kLongTime);
245 logger.write({});
246 loggingOccurred.wait();
247 }
248 const auto elapsed = Clock::now() - start;
249
250 EXPECT_LT(elapsed, kLongTime);
251 }
252
TEST(StatsdTelemetryTest,AsyncLoggerVerifyQuietPeriod)253 TEST(StatsdTelemetryTest, AsyncLoggerVerifyQuietPeriod) {
254 std::atomic<uint32_t> count = 0;
255 Signal loggingOccurred;
256 const auto fn = [&count, &loggingOccurred](Atom&& /*atom*/) {
257 ++count;
258 loggingOccurred.signal();
259 };
260
261 {
262 AsyncLogger logger(fn, kLongTime);
263 logger.write({});
264 loggingOccurred.wait();
265
266 // At this point, logger is in the quiet period because it has already logged once. Send
267 // many more atoms and ensure the logging function is not called a second time.
268 for (int32_t error = ANEURALNETWORKS_NO_ERROR; error <= ANEURALNETWORKS_DEAD_OBJECT;
269 ++error) {
270 auto key = kExampleKey;
271 key.errorCode = error;
272 logger.write({key, AtomValue{.count = 1}});
273 }
274 }
275
276 EXPECT_EQ(count, 1u);
277 }
278
TEST(StatsdTelemetryTest,AsyncLoggerVerifyAllDataSent)279 TEST(StatsdTelemetryTest, AsyncLoggerVerifyAllDataSent) {
280 const uint32_t targetCount = ANEURALNETWORKS_DEAD_OBJECT - ANEURALNETWORKS_NO_ERROR + 1;
281 std::atomic<uint32_t> count = 0;
282 Signal allDataSent;
283 const auto fn = [&count, &allDataSent](Atom&& /*atom*/) {
284 const uint32_t currentCount = ++count;
285 if (currentCount == targetCount) {
286 allDataSent.signal();
287 }
288 };
289
290 {
291 AsyncLogger logger(fn, std::chrono::nanoseconds(0));
292 for (int32_t error = ANEURALNETWORKS_NO_ERROR; error <= ANEURALNETWORKS_DEAD_OBJECT;
293 ++error) {
294 auto key = kExampleKey;
295 key.errorCode = error;
296 logger.write({key, AtomValue{.count = 1}});
297 }
298 allDataSent.wait();
299 }
300
301 EXPECT_EQ(count, targetCount);
302 }
303
TEST(StatsdTelemetryTest,createAtomFromCompilationInfoWhenNoError)304 TEST(StatsdTelemetryTest, createAtomFromCompilationInfoWhenNoError) {
305 const DiagnosticCompilationInfo info{
306 .modelArchHash = kExampleModelArchHash.data(),
307 .deviceId = kExampleDeviceId,
308 .errorCode = ANEURALNETWORKS_NO_ERROR,
309 .inputDataClass = DataClass::FLOAT32,
310 .outputDataClass = DataClass::QUANT,
311 .compilationTimeNanos = 10'000'000u,
312 .fallbackToCpuFromError = false,
313 .introspectionEnabled = true,
314 .hasControlFlow = false,
315 .hasDynamicTemporaries = true,
316 };
317
318 const auto [key, value] = createAtomFrom(&info);
319
320 EXPECT_FALSE(key.isExecution);
321 EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
322 EXPECT_EQ(key.deviceId, kExampleDeviceId);
323 EXPECT_EQ(key.executionMode, ExecutionMode::SYNC);
324 EXPECT_EQ(key.errorCode, info.errorCode);
325 EXPECT_EQ(key.inputDataClass, info.inputDataClass);
326 EXPECT_EQ(key.outputDataClass, info.outputDataClass);
327 EXPECT_EQ(key.fallbackToCpuFromError, info.fallbackToCpuFromError);
328 EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
329 EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
330 EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
331 EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
332
333 EXPECT_EQ(value.count, 1);
334
335 const auto compilationTimeMillis =
336 accumulatedTimingsFrom({static_cast<int64_t>(info.compilationTimeNanos / 1'000'000u)});
337 EXPECT_EQ(value.compilationTimeMillis, compilationTimeMillis);
338
339 EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
340 EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
341 EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
342 }
343
TEST(StatsdTelemetryTest,createAtomFromCompilationInfoWhenError)344 TEST(StatsdTelemetryTest, createAtomFromCompilationInfoWhenError) {
345 const DiagnosticCompilationInfo info{
346 .modelArchHash = kExampleModelArchHash.data(),
347 .deviceId = kExampleDeviceId,
348 .errorCode = ANEURALNETWORKS_OP_FAILED,
349 .inputDataClass = DataClass::FLOAT32,
350 .outputDataClass = DataClass::QUANT,
351 .compilationTimeNanos = kNoTiming,
352 .fallbackToCpuFromError = true,
353 .introspectionEnabled = false,
354 .hasControlFlow = true,
355 .hasDynamicTemporaries = false,
356 };
357
358 const auto [key, value] = createAtomFrom(&info);
359
360 EXPECT_FALSE(key.isExecution);
361 EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
362 EXPECT_EQ(key.deviceId, kExampleDeviceId);
363 EXPECT_EQ(key.executionMode, ExecutionMode::SYNC);
364 EXPECT_EQ(key.errorCode, info.errorCode);
365 EXPECT_EQ(key.inputDataClass, info.inputDataClass);
366 EXPECT_EQ(key.outputDataClass, info.outputDataClass);
367 EXPECT_EQ(key.fallbackToCpuFromError, info.fallbackToCpuFromError);
368 EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
369 EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
370 EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
371 EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
372
373 EXPECT_EQ(value.count, 1);
374
375 EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
376 EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
377 EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
378 EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
379 }
380
TEST(StatsdTelemetryTest,createAtomFromExecutionInfoWhenNoError)381 TEST(StatsdTelemetryTest, createAtomFromExecutionInfoWhenNoError) {
382 const DiagnosticExecutionInfo info{
383 .modelArchHash = kExampleModelArchHash.data(),
384 .deviceId = kExampleDeviceId,
385 .executionMode = ExecutionMode::SYNC,
386 .inputDataClass = DataClass::FLOAT32,
387 .outputDataClass = DataClass::QUANT,
388 .errorCode = ANEURALNETWORKS_NO_ERROR,
389 .durationRuntimeNanos = 350'000u,
390 .durationDriverNanos = 350'000u,
391 .durationHardwareNanos = 350'000u,
392 .introspectionEnabled = false,
393 .cacheEnabled = true,
394 .hasControlFlow = false,
395 .hasDynamicTemporaries = true,
396 };
397
398 const auto [key, value] = createAtomFrom(&info);
399
400 EXPECT_TRUE(key.isExecution);
401 EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
402 EXPECT_EQ(key.deviceId, kExampleDeviceId);
403 EXPECT_EQ(key.executionMode, info.executionMode);
404 EXPECT_EQ(key.errorCode, info.errorCode);
405 EXPECT_EQ(key.inputDataClass, info.inputDataClass);
406 EXPECT_EQ(key.outputDataClass, info.outputDataClass);
407 EXPECT_FALSE(key.fallbackToCpuFromError);
408 EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
409 EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
410 EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
411 EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
412
413 EXPECT_EQ(value.count, 1);
414
415 EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
416
417 const auto durationRuntimeMicros =
418 accumulatedTimingsFrom({static_cast<int64_t>(info.durationRuntimeNanos / 1'000u)});
419 const auto durationDriverMicros =
420 accumulatedTimingsFrom({static_cast<int64_t>(info.durationDriverNanos / 1'000u)});
421 const auto durationHardwareMicros =
422 accumulatedTimingsFrom({static_cast<int64_t>(info.durationHardwareNanos / 1'000u)});
423
424 EXPECT_EQ(value.durationRuntimeMicros, durationRuntimeMicros);
425 EXPECT_EQ(value.durationDriverMicros, durationDriverMicros);
426 EXPECT_EQ(value.durationHardwareMicros, durationHardwareMicros);
427 }
428
TEST(StatsdTelemetryTest,createAtomFromExecutionInfoWhenError)429 TEST(StatsdTelemetryTest, createAtomFromExecutionInfoWhenError) {
430 const DiagnosticExecutionInfo info{
431 .modelArchHash = kExampleModelArchHash.data(),
432 .deviceId = kExampleDeviceId,
433 .executionMode = ExecutionMode::SYNC,
434 .inputDataClass = DataClass::FLOAT32,
435 .outputDataClass = DataClass::QUANT,
436 .errorCode = ANEURALNETWORKS_OP_FAILED,
437 .durationRuntimeNanos = kNoTiming,
438 .durationDriverNanos = kNoTiming,
439 .durationHardwareNanos = kNoTiming,
440 .introspectionEnabled = true,
441 .cacheEnabled = false,
442 .hasControlFlow = true,
443 .hasDynamicTemporaries = false,
444 };
445
446 const auto [key, value] = createAtomFrom(&info);
447
448 EXPECT_TRUE(key.isExecution);
449 EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
450 EXPECT_EQ(key.deviceId, kExampleDeviceId);
451 EXPECT_EQ(key.executionMode, info.executionMode);
452 EXPECT_EQ(key.errorCode, info.errorCode);
453 EXPECT_EQ(key.inputDataClass, info.inputDataClass);
454 EXPECT_EQ(key.outputDataClass, info.outputDataClass);
455 EXPECT_FALSE(key.fallbackToCpuFromError);
456 EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
457 EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
458 EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
459 EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
460
461 EXPECT_EQ(value.count, 1);
462
463 EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
464 EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
465 EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
466 EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
467 }
468
469 } // namespace android::nn::telemetry
470