1 /* 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 18 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 19 20 #include <android-base/thread_annotations.h> 21 22 #include <array> 23 #include <condition_variable> 24 #include <functional> 25 #include <limits> 26 #include <map> 27 #include <mutex> 28 #include <queue> 29 #include <string> 30 #include <thread> 31 #include <utility> 32 #include <vector> 33 34 #include "Telemetry.h" 35 36 namespace android::nn::telemetry { 37 38 using ModelArchHash = std::array<uint8_t, BYTE_SIZE_OF_MODEL_ARCH_HASH>; 39 40 constexpr int64_t kSumTimeDefault = 0; 41 constexpr int64_t kMinTimeDefault = std::numeric_limits<int64_t>::max(); 42 constexpr int64_t kMaxTimeDefault = std::numeric_limits<int64_t>::min(); 43 44 // For CompilationCompleted: isExecution = false, executionMode = SYNC, errorCode = 0 45 // For CompilationFailed: isExecution = false, executionMode = SYNC, errorCode != 0 46 // For ExecutionCompleted: isExecution = true, errorCode = 0, fallbackToCpuFromError = false 47 // For ExecutionFailed: isExecution = true, errorCode != 0, fallbackToCpuFromError = false 48 struct AtomKey { 49 bool isExecution; 50 ModelArchHash modelArchHash; 51 std::string deviceId; 52 ExecutionMode executionMode; 53 int32_t errorCode; 54 DataClass inputDataClass; 55 DataClass outputDataClass; 56 bool fallbackToCpuFromError; 57 bool introspectionEnabled; 58 bool cacheEnabled; 59 bool hasControlFlow; 60 bool hasDynamicTemporaries; 61 }; 62 63 bool operator==(const AtomKey& lhs, const AtomKey& rhs); 64 bool operator<(const AtomKey& lhs, const AtomKey& rhs); 65 66 // For CompilationCompleted, all timings except compilationTimeMillis omitted 67 // For CompilationFailed, all timings omitted 68 // For ExecutionCompleted, compilationTimeMillis timing omitted 69 // For ExecutionFailed, all timings omitted 70 struct AtomValue { 71 int32_t count = 0; 72 73 // AccumulatedTiming stores all the information needed to calculate the average, min, max, and 74 // standard deviation of all the accumulated timings. When count == 0, AccumulatedTiming is 75 // ignored. When count > 0: 76 // * average = sumTime / count 77 // * minimum = minTime 78 // * maximum = maxTime 79 // * variance = sumSquaredTime / count - average * average 80 // * standard deviation = sqrt(variance) 81 // * sample standard deviation = sqrt(variance * count / (count - 1)) 82 struct AccumulatedTiming { 83 int64_t sumTime = kSumTimeDefault; 84 int64_t minTime = kMinTimeDefault; 85 int64_t maxTime = kMaxTimeDefault; 86 // Sum of each squared timing, e.g.: t1^2 + t2^2 + ... + tn^2 87 int64_t sumSquaredTime = kSumTimeDefault; 88 int32_t count = 0; 89 }; 90 AccumulatedTiming compilationTimeMillis; 91 AccumulatedTiming durationRuntimeMicros; 92 AccumulatedTiming durationDriverMicros; 93 AccumulatedTiming durationHardwareMicros; 94 }; 95 96 void combineAtomValues(AtomValue* acculatedValue, const AtomValue& value); 97 98 // Atom type to be sent to Statsd Telemetry 99 using Atom = std::pair<AtomKey, AtomValue>; 100 101 // Helper class to locally aggregate and retrieve telemetry atoms. 102 class AtomAggregator { 103 public: 104 bool empty() const; 105 106 void push(Atom&& atom); 107 108 // Precondition: !empty() 109 Atom pop(); 110 111 private: 112 std::map<AtomKey, AtomValue> mAggregate; 113 // Pointer to keys of mAggregate to ensure atoms are logged in a fair order. Using pointers into 114 // a std::map is guaranteed to work because references to elements are guaranteed to be valid 115 // until that element is erased. 116 std::queue<const AtomKey*> mOrder; 117 }; 118 119 using LoggerFn = std::function<void(Atom&&)>; 120 121 // AsyncLogger minimizes the call to `write`, so that the calling thread which handles the 122 // compilation or execution is not slowed down by writing to statsd. Instead, AsyncLogger 123 // contains a dedicated thread that will handle logging to statsd in the background. 124 // This class is thread-safe. 125 class AsyncLogger { 126 public: 127 AsyncLogger(LoggerFn logger, Duration loggingQuietPeriodDuration); 128 AsyncLogger(const AsyncLogger&) = delete; 129 AsyncLogger(AsyncLogger&&) = delete; 130 AsyncLogger& operator=(const AsyncLogger&) = delete; 131 AsyncLogger& operator=(AsyncLogger&&) = delete; 132 ~AsyncLogger(); 133 134 void write(Atom&& atom); 135 136 private: 137 enum class Result { 138 SUCCESS, 139 TEARDOWN, 140 }; 141 142 // Precondition: output != nullptr 143 // Precondition: output->empty() 144 Result takeAll(std::vector<Atom>* output, bool blockUntilDataIsAvailable); 145 146 Result sleepFor(Duration duration); 147 148 mutable std::mutex mMutex; 149 mutable std::condition_variable mNotEmptyOrTeardown; 150 mutable std::vector<Atom> mChannel GUARDED_BY(mMutex); 151 mutable bool mTeardown GUARDED_BY(mMutex) = false; 152 std::thread mThread; 153 }; 154 155 // Create an Atom from a diagnostic info object. 156 Atom createAtomFrom(const DiagnosticCompilationInfo* info); 157 Atom createAtomFrom(const DiagnosticExecutionInfo* info); 158 159 // Log an Atom to statsd from a diagnostic info object. 160 void logCompilationToStatsd(const DiagnosticCompilationInfo* info); 161 void logExecutionToStatsd(const DiagnosticExecutionInfo* info); 162 163 } // namespace android::nn::telemetry 164 165 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_TELEMETRY_STATSD_H 166