1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <gtest/gtest.h>
18 
19 #include <algorithm>
20 #include <condition_variable>
21 #include <numeric>
22 
23 #include "Telemetry.h"
24 #include "TelemetryStatsd.h"
25 
26 namespace android::nn::telemetry {
27 
28 constexpr auto kNoTiming = std::numeric_limits<uint64_t>::max();
29 constexpr auto kNoAggregateTiming = AtomValue::AccumulatedTiming{};
30 constexpr ModelArchHash kExampleModelArchHash = {1, 2, 3};
31 constexpr const char* kExampleDeviceId = "driver1=version1,driver2=version2";
32 constexpr auto kLongTime = std::chrono::seconds(60 * 60 * 24);
33 
34 const AtomKey kExampleKey = {
35         .isExecution = true,
36         .modelArchHash = kExampleModelArchHash,
37         .deviceId = kExampleDeviceId,
38         .executionMode = ExecutionMode::SYNC,
39         .errorCode = ANEURALNETWORKS_NO_ERROR,
40         .inputDataClass = DataClass::FLOAT32,
41         .outputDataClass = DataClass::FLOAT32,
42         .fallbackToCpuFromError = false,
43         .introspectionEnabled = false,
44         .cacheEnabled = false,
45         .hasControlFlow = false,
46         .hasDynamicTemporaries = false,
47 };
48 
49 // This class is thread-safe.
50 class Signal {
51    public:
signal()52     void signal() {
53         {
54             std::lock_guard hold(mMutex);
55             mSignalled = true;
56         }
57         mWaitForSignal.notify_all();
58     }
59 
wait()60     void wait() {
61         std::unique_lock lock(mMutex);
62         mWaitForSignal.wait(lock, [this]() REQUIRES(mMutex) { return mSignalled; });
63     }
64 
65    private:
66     mutable std::mutex mMutex;
67     mutable std::condition_variable mWaitForSignal;
68     mutable bool mSignalled GUARDED_BY(mMutex) = false;
69 };
70 
operator ==(const AtomValue::AccumulatedTiming & lhs,const AtomValue::AccumulatedTiming & rhs)71 bool operator==(const AtomValue::AccumulatedTiming& lhs, const AtomValue::AccumulatedTiming& rhs) {
72     constexpr auto toTuple = [](const AtomValue::AccumulatedTiming& v) {
73         return std::tie(v.sumTime, v.minTime, v.maxTime, v.sumSquaredTime, v.count);
74     };
75     return toTuple(lhs) == toTuple(rhs);
76 }
77 
operator ==(const AtomValue & lhs,const AtomValue & rhs)78 bool operator==(const AtomValue& lhs, const AtomValue& rhs) {
79     constexpr auto toTuple = [](const AtomValue& v) {
80         return std::tie(v.count, v.compilationTimeMillis, v.durationRuntimeMicros,
81                         v.durationDriverMicros, v.durationHardwareMicros);
82     };
83     return toTuple(lhs) == toTuple(rhs);
84 }
85 
accumulatedTimingsFrom(std::initializer_list<int64_t> values)86 AtomValue::AccumulatedTiming accumulatedTimingsFrom(std::initializer_list<int64_t> values) {
87     CHECK_GT(values.size(), 0u);
88     const int64_t sumTime = std::accumulate(values.begin(), values.end(), 0, std::plus<>{});
89     const int64_t sumSquaredTime = std::accumulate(
90             values.begin(), values.end(), 0, [](int64_t acc, int64_t v) { return acc + v * v; });
91     const auto [minIt, maxIt] = std::minmax_element(values.begin(), values.end());
92     return {
93             .sumTime = sumTime,
94             .minTime = *minIt,
95             .maxTime = *maxIt,
96             .sumSquaredTime = sumSquaredTime,
97             .count = static_cast<int32_t>(values.size()),
98     };
99 }
100 
TEST(StatsdTelemetryTest,AtomKeyEquality)101 TEST(StatsdTelemetryTest, AtomKeyEquality) {
102     EXPECT_EQ(kExampleKey, kExampleKey);
103 }
104 
TEST(StatsdTelemetryTest,AtomKeyLessThan)105 TEST(StatsdTelemetryTest, AtomKeyLessThan) {
106     const auto key1 = kExampleKey;
107     auto key2 = key1;
108     key2.errorCode = ANEURALNETWORKS_DEAD_OBJECT;
109     EXPECT_LT(key1, key2);
110 }
111 
TEST(StatsdTelemetryTest,CombineAtomValues)112 TEST(StatsdTelemetryTest, CombineAtomValues) {
113     AtomValue value1 = {
114             .count = 3,
115             .compilationTimeMillis = accumulatedTimingsFrom({50, 100, 150}),
116     };
117     const AtomValue value2 = {
118             .count = 1,
119             .compilationTimeMillis = accumulatedTimingsFrom({75}),
120     };
121     const AtomValue valueResult = {
122             .count = 4,
123             .compilationTimeMillis = accumulatedTimingsFrom({50, 75, 100, 150}),
124     };
125 
126     combineAtomValues(&value1, value2);
127     EXPECT_EQ(value1, valueResult);
128 }
129 
TEST(StatsdTelemetryTest,CombineAtomValueWithLeftIdentity)130 TEST(StatsdTelemetryTest, CombineAtomValueWithLeftIdentity) {
131     AtomValue value1 = {};
132     const AtomValue value2 = {
133             .count = 1,
134             .compilationTimeMillis = accumulatedTimingsFrom({75}),
135     };
136     const AtomValue valueResult = value2;
137 
138     combineAtomValues(&value1, value2);
139     EXPECT_EQ(value1, valueResult);
140 }
141 
TEST(StatsdTelemetryTest,CombineAtomValueWithRightIdentity)142 TEST(StatsdTelemetryTest, CombineAtomValueWithRightIdentity) {
143     AtomValue value1 = {
144             .count = 3,
145             .compilationTimeMillis = accumulatedTimingsFrom({50, 100, 150}),
146     };
147     const AtomValue value2 = {};
148     const AtomValue valueResult = value1;
149 
150     combineAtomValues(&value1, value2);
151     EXPECT_EQ(value1, valueResult);
152 }
153 
TEST(StatsdTelemetryTest,AtomAggregatorStartEmpty)154 TEST(StatsdTelemetryTest, AtomAggregatorStartEmpty) {
155     AtomAggregator aggregator;
156     EXPECT_TRUE(aggregator.empty());
157 }
158 
TEST(StatsdTelemetryTest,AtomAggregatorNotEmptyAfterPush)159 TEST(StatsdTelemetryTest, AtomAggregatorNotEmptyAfterPush) {
160     AtomAggregator aggregator;
161     aggregator.push({kExampleKey, {}});
162     EXPECT_FALSE(aggregator.empty());
163 }
164 
TEST(StatsdTelemetryTest,AtomAggregatorEmptyAfterPop)165 TEST(StatsdTelemetryTest, AtomAggregatorEmptyAfterPop) {
166     AtomAggregator aggregator;
167     aggregator.push({kExampleKey, {}});
168     const auto [k, v] = aggregator.pop();
169     EXPECT_TRUE(aggregator.empty());
170     EXPECT_EQ(k, kExampleKey);
171 }
172 
TEST(StatsdTelemetryTest,AtomAggregatorTwoDifferentKeys)173 TEST(StatsdTelemetryTest, AtomAggregatorTwoDifferentKeys) {
174     const auto key1 = kExampleKey;
175     auto key2 = key1;
176     key2.executionMode = ExecutionMode::ASYNC;
177     const auto value1 = AtomValue{.count = 2};
178     const auto value2 = AtomValue{.count = 3};
179 
180     AtomAggregator aggregator;
181     aggregator.push({key1, value1});
182     aggregator.push({key2, value2});
183 
184     const auto [resultKey, resultValue] = aggregator.pop();
185 
186     EXPECT_EQ(resultKey, key1);
187     EXPECT_EQ(resultValue, value1);
188     EXPECT_FALSE(aggregator.empty());
189 }
190 
TEST(StatsdTelemetryTest,AtomAggregatorTwoSameKeys)191 TEST(StatsdTelemetryTest, AtomAggregatorTwoSameKeys) {
192     const auto key1 = kExampleKey;
193     const auto value1 = AtomValue{.count = 2};
194     const auto value2 = AtomValue{.count = 3};
195 
196     AtomAggregator aggregator;
197     aggregator.push({key1, value1});
198     aggregator.push({key1, value2});
199 
200     const auto [resultKey, resultValue] = aggregator.pop();
201 
202     EXPECT_EQ(resultKey, key1);
203     EXPECT_EQ(resultValue, AtomValue{.count = 5});
204     EXPECT_TRUE(aggregator.empty());
205 }
206 
TEST(StatsdTelemetryTest,AtomAggregatorPush)207 TEST(StatsdTelemetryTest, AtomAggregatorPush) {
208     const AtomKey key1 = kExampleKey;
209     AtomKey key2 = key1;
210     key2.executionMode = ExecutionMode::ASYNC;
211     const auto value1 = AtomValue{.count = 2};
212     const auto value2 = AtomValue{.count = 3};
213     const auto value3 = AtomValue{.count = 6};
214 
215     AtomAggregator aggregator;
216     aggregator.push({key1, value1});
217     aggregator.push({key2, value2});
218     aggregator.push({key1, value3});
219 
220     const auto [resultKey1, resultValue1] = aggregator.pop();
221     const auto [resultKey2, resultValue2] = aggregator.pop();
222 
223     EXPECT_EQ(resultKey1, key1);
224     EXPECT_EQ(resultKey2, key2);
225     EXPECT_EQ(resultValue1, AtomValue{.count = 8});
226     EXPECT_EQ(resultValue2, AtomValue{.count = 3});
227     EXPECT_TRUE(aggregator.empty());
228 }
229 
TEST(StatsdTelemetryTest,AsyncLoggerTeardownWhileWaitingForData)230 TEST(StatsdTelemetryTest, AsyncLoggerTeardownWhileWaitingForData) {
231     constexpr auto fn = [](Atom&& /*atom*/) {};
232     const auto start = Clock::now();
233     { AsyncLogger logger(fn, kLongTime); }
234     const auto elapsed = Clock::now() - start;
235     EXPECT_LT(elapsed, kLongTime);
236 }
237 
TEST(StatsdTelemetryTest,AsyncLoggerTeardownDuringSleep)238 TEST(StatsdTelemetryTest, AsyncLoggerTeardownDuringSleep) {
239     Signal loggingOccurred;
240     auto fn = [&loggingOccurred](Atom&& /*atom*/) mutable { loggingOccurred.signal(); };
241 
242     const auto start = Clock::now();
243     {
244         AsyncLogger logger(fn, kLongTime);
245         logger.write({});
246         loggingOccurred.wait();
247     }
248     const auto elapsed = Clock::now() - start;
249 
250     EXPECT_LT(elapsed, kLongTime);
251 }
252 
TEST(StatsdTelemetryTest,AsyncLoggerVerifyQuietPeriod)253 TEST(StatsdTelemetryTest, AsyncLoggerVerifyQuietPeriod) {
254     std::atomic<uint32_t> count = 0;
255     Signal loggingOccurred;
256     const auto fn = [&count, &loggingOccurred](Atom&& /*atom*/) {
257         ++count;
258         loggingOccurred.signal();
259     };
260 
261     {
262         AsyncLogger logger(fn, kLongTime);
263         logger.write({});
264         loggingOccurred.wait();
265 
266         // At this point, logger is in the quiet period because it has already logged once. Send
267         // many more atoms and ensure the logging function is not called a second time.
268         for (int32_t error = ANEURALNETWORKS_NO_ERROR; error <= ANEURALNETWORKS_DEAD_OBJECT;
269              ++error) {
270             auto key = kExampleKey;
271             key.errorCode = error;
272             logger.write({key, AtomValue{.count = 1}});
273         }
274     }
275 
276     EXPECT_EQ(count, 1u);
277 }
278 
TEST(StatsdTelemetryTest,AsyncLoggerVerifyAllDataSent)279 TEST(StatsdTelemetryTest, AsyncLoggerVerifyAllDataSent) {
280     const uint32_t targetCount = ANEURALNETWORKS_DEAD_OBJECT - ANEURALNETWORKS_NO_ERROR + 1;
281     std::atomic<uint32_t> count = 0;
282     Signal allDataSent;
283     const auto fn = [&count, &allDataSent](Atom&& /*atom*/) {
284         const uint32_t currentCount = ++count;
285         if (currentCount == targetCount) {
286             allDataSent.signal();
287         }
288     };
289 
290     {
291         AsyncLogger logger(fn, std::chrono::nanoseconds(0));
292         for (int32_t error = ANEURALNETWORKS_NO_ERROR; error <= ANEURALNETWORKS_DEAD_OBJECT;
293              ++error) {
294             auto key = kExampleKey;
295             key.errorCode = error;
296             logger.write({key, AtomValue{.count = 1}});
297         }
298         allDataSent.wait();
299     }
300 
301     EXPECT_EQ(count, targetCount);
302 }
303 
TEST(StatsdTelemetryTest,createAtomFromCompilationInfoWhenNoError)304 TEST(StatsdTelemetryTest, createAtomFromCompilationInfoWhenNoError) {
305     const DiagnosticCompilationInfo info{
306             .modelArchHash = kExampleModelArchHash.data(),
307             .deviceId = kExampleDeviceId,
308             .errorCode = ANEURALNETWORKS_NO_ERROR,
309             .inputDataClass = DataClass::FLOAT32,
310             .outputDataClass = DataClass::QUANT,
311             .compilationTimeNanos = 10'000'000u,
312             .fallbackToCpuFromError = false,
313             .introspectionEnabled = true,
314             .hasControlFlow = false,
315             .hasDynamicTemporaries = true,
316     };
317 
318     const auto [key, value] = createAtomFrom(&info);
319 
320     EXPECT_FALSE(key.isExecution);
321     EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
322     EXPECT_EQ(key.deviceId, kExampleDeviceId);
323     EXPECT_EQ(key.executionMode, ExecutionMode::SYNC);
324     EXPECT_EQ(key.errorCode, info.errorCode);
325     EXPECT_EQ(key.inputDataClass, info.inputDataClass);
326     EXPECT_EQ(key.outputDataClass, info.outputDataClass);
327     EXPECT_EQ(key.fallbackToCpuFromError, info.fallbackToCpuFromError);
328     EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
329     EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
330     EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
331     EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
332 
333     EXPECT_EQ(value.count, 1);
334 
335     const auto compilationTimeMillis =
336             accumulatedTimingsFrom({static_cast<int64_t>(info.compilationTimeNanos / 1'000'000u)});
337     EXPECT_EQ(value.compilationTimeMillis, compilationTimeMillis);
338 
339     EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
340     EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
341     EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
342 }
343 
TEST(StatsdTelemetryTest,createAtomFromCompilationInfoWhenError)344 TEST(StatsdTelemetryTest, createAtomFromCompilationInfoWhenError) {
345     const DiagnosticCompilationInfo info{
346             .modelArchHash = kExampleModelArchHash.data(),
347             .deviceId = kExampleDeviceId,
348             .errorCode = ANEURALNETWORKS_OP_FAILED,
349             .inputDataClass = DataClass::FLOAT32,
350             .outputDataClass = DataClass::QUANT,
351             .compilationTimeNanos = kNoTiming,
352             .fallbackToCpuFromError = true,
353             .introspectionEnabled = false,
354             .hasControlFlow = true,
355             .hasDynamicTemporaries = false,
356     };
357 
358     const auto [key, value] = createAtomFrom(&info);
359 
360     EXPECT_FALSE(key.isExecution);
361     EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
362     EXPECT_EQ(key.deviceId, kExampleDeviceId);
363     EXPECT_EQ(key.executionMode, ExecutionMode::SYNC);
364     EXPECT_EQ(key.errorCode, info.errorCode);
365     EXPECT_EQ(key.inputDataClass, info.inputDataClass);
366     EXPECT_EQ(key.outputDataClass, info.outputDataClass);
367     EXPECT_EQ(key.fallbackToCpuFromError, info.fallbackToCpuFromError);
368     EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
369     EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
370     EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
371     EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
372 
373     EXPECT_EQ(value.count, 1);
374 
375     EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
376     EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
377     EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
378     EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
379 }
380 
TEST(StatsdTelemetryTest,createAtomFromExecutionInfoWhenNoError)381 TEST(StatsdTelemetryTest, createAtomFromExecutionInfoWhenNoError) {
382     const DiagnosticExecutionInfo info{
383             .modelArchHash = kExampleModelArchHash.data(),
384             .deviceId = kExampleDeviceId,
385             .executionMode = ExecutionMode::SYNC,
386             .inputDataClass = DataClass::FLOAT32,
387             .outputDataClass = DataClass::QUANT,
388             .errorCode = ANEURALNETWORKS_NO_ERROR,
389             .durationRuntimeNanos = 350'000u,
390             .durationDriverNanos = 350'000u,
391             .durationHardwareNanos = 350'000u,
392             .introspectionEnabled = false,
393             .cacheEnabled = true,
394             .hasControlFlow = false,
395             .hasDynamicTemporaries = true,
396     };
397 
398     const auto [key, value] = createAtomFrom(&info);
399 
400     EXPECT_TRUE(key.isExecution);
401     EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
402     EXPECT_EQ(key.deviceId, kExampleDeviceId);
403     EXPECT_EQ(key.executionMode, info.executionMode);
404     EXPECT_EQ(key.errorCode, info.errorCode);
405     EXPECT_EQ(key.inputDataClass, info.inputDataClass);
406     EXPECT_EQ(key.outputDataClass, info.outputDataClass);
407     EXPECT_FALSE(key.fallbackToCpuFromError);
408     EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
409     EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
410     EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
411     EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
412 
413     EXPECT_EQ(value.count, 1);
414 
415     EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
416 
417     const auto durationRuntimeMicros =
418             accumulatedTimingsFrom({static_cast<int64_t>(info.durationRuntimeNanos / 1'000u)});
419     const auto durationDriverMicros =
420             accumulatedTimingsFrom({static_cast<int64_t>(info.durationDriverNanos / 1'000u)});
421     const auto durationHardwareMicros =
422             accumulatedTimingsFrom({static_cast<int64_t>(info.durationHardwareNanos / 1'000u)});
423 
424     EXPECT_EQ(value.durationRuntimeMicros, durationRuntimeMicros);
425     EXPECT_EQ(value.durationDriverMicros, durationDriverMicros);
426     EXPECT_EQ(value.durationHardwareMicros, durationHardwareMicros);
427 }
428 
TEST(StatsdTelemetryTest,createAtomFromExecutionInfoWhenError)429 TEST(StatsdTelemetryTest, createAtomFromExecutionInfoWhenError) {
430     const DiagnosticExecutionInfo info{
431             .modelArchHash = kExampleModelArchHash.data(),
432             .deviceId = kExampleDeviceId,
433             .executionMode = ExecutionMode::SYNC,
434             .inputDataClass = DataClass::FLOAT32,
435             .outputDataClass = DataClass::QUANT,
436             .errorCode = ANEURALNETWORKS_OP_FAILED,
437             .durationRuntimeNanos = kNoTiming,
438             .durationDriverNanos = kNoTiming,
439             .durationHardwareNanos = kNoTiming,
440             .introspectionEnabled = true,
441             .cacheEnabled = false,
442             .hasControlFlow = true,
443             .hasDynamicTemporaries = false,
444     };
445 
446     const auto [key, value] = createAtomFrom(&info);
447 
448     EXPECT_TRUE(key.isExecution);
449     EXPECT_EQ(key.modelArchHash, kExampleModelArchHash);
450     EXPECT_EQ(key.deviceId, kExampleDeviceId);
451     EXPECT_EQ(key.executionMode, info.executionMode);
452     EXPECT_EQ(key.errorCode, info.errorCode);
453     EXPECT_EQ(key.inputDataClass, info.inputDataClass);
454     EXPECT_EQ(key.outputDataClass, info.outputDataClass);
455     EXPECT_FALSE(key.fallbackToCpuFromError);
456     EXPECT_EQ(key.introspectionEnabled, info.introspectionEnabled);
457     EXPECT_EQ(key.cacheEnabled, info.cacheEnabled);
458     EXPECT_EQ(key.hasControlFlow, info.hasControlFlow);
459     EXPECT_EQ(key.hasDynamicTemporaries, info.hasDynamicTemporaries);
460 
461     EXPECT_EQ(value.count, 1);
462 
463     EXPECT_EQ(value.compilationTimeMillis, kNoAggregateTiming);
464     EXPECT_EQ(value.durationRuntimeMicros, kNoAggregateTiming);
465     EXPECT_EQ(value.durationDriverMicros, kNoAggregateTiming);
466     EXPECT_EQ(value.durationHardwareMicros, kNoAggregateTiming);
467 }
468 
469 }  // namespace android::nn::telemetry
470