1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Provides C++ classes to more easily use the Neural Networks API. 18 19 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 20 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 21 22 #include "NeuralNetworks.h" 23 24 #include <math.h> 25 #include <algorithm> 26 #include <optional> 27 #include <string> 28 #include <utility> 29 #include <vector> 30 31 namespace android { 32 namespace nn { 33 namespace wrapper { 34 35 enum class Type { 36 FLOAT32 = ANEURALNETWORKS_FLOAT32, 37 INT32 = ANEURALNETWORKS_INT32, 38 UINT32 = ANEURALNETWORKS_UINT32, 39 TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32, 40 TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32, 41 TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, 42 BOOL = ANEURALNETWORKS_BOOL, 43 TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM, 44 TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16, 45 TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8, 46 FLOAT16 = ANEURALNETWORKS_FLOAT16, 47 TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL, 48 TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, 49 TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM, 50 MODEL = ANEURALNETWORKS_MODEL, 51 }; 52 53 enum class ExecutePreference { 54 PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER, 55 PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, 56 PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED 57 }; 58 59 enum class ExecutePriority { 60 LOW = ANEURALNETWORKS_PRIORITY_LOW, 61 MEDIUM = ANEURALNETWORKS_PRIORITY_MEDIUM, 62 HIGH = ANEURALNETWORKS_PRIORITY_HIGH, 63 DEFAULT = ANEURALNETWORKS_PRIORITY_DEFAULT, 64 }; 65 66 enum class Result { 67 NO_ERROR = ANEURALNETWORKS_NO_ERROR, 68 OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY, 69 INCOMPLETE = ANEURALNETWORKS_INCOMPLETE, 70 UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL, 71 BAD_DATA = ANEURALNETWORKS_BAD_DATA, 72 OP_FAILED = ANEURALNETWORKS_OP_FAILED, 73 UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE, 74 BAD_STATE = ANEURALNETWORKS_BAD_STATE, 75 OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE, 76 UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE, 77 MISSED_DEADLINE_TRANSIENT = ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT, 78 MISSED_DEADLINE_PERSISTENT = ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, 79 }; 80 81 struct SymmPerChannelQuantParams { 82 ANeuralNetworksSymmPerChannelQuantParams params; 83 std::vector<float> scales; 84 SymmPerChannelQuantParamsSymmPerChannelQuantParams85 SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim) 86 : scales(std::move(scalesVec)) { 87 params = { 88 .channelDim = channelDim, 89 .scaleCount = static_cast<uint32_t>(scales.size()), 90 .scales = scales.size() > 0 ? scales.data() : nullptr, 91 }; 92 } 93 SymmPerChannelQuantParamsSymmPerChannelQuantParams94 SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other) 95 : params(other.params), scales(other.scales) { 96 params.scales = scales.size() > 0 ? scales.data() : nullptr; 97 } 98 99 SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) { 100 if (this != &other) { 101 params = other.params; 102 scales = other.scales; 103 params.scales = scales.size() > 0 ? scales.data() : nullptr; 104 } 105 return *this; 106 } 107 }; 108 109 struct OperandType { 110 ANeuralNetworksOperandType operandType; 111 std::vector<uint32_t> dimensions; 112 std::optional<SymmPerChannelQuantParams> channelQuant; 113 OperandTypeOperandType114 OperandType(const OperandType& other) 115 : operandType(other.operandType), 116 dimensions(other.dimensions), 117 channelQuant(other.channelQuant) { 118 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 119 } 120 121 OperandType& operator=(const OperandType& other) { 122 if (this != &other) { 123 operandType = other.operandType; 124 dimensions = other.dimensions; 125 channelQuant = other.channelQuant; 126 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 127 } 128 return *this; 129 } 130 131 OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0) dimensionsOperandType132 : dimensions(std::move(d)), channelQuant(std::nullopt) { 133 operandType = { 134 .type = static_cast<int32_t>(type), 135 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 136 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 137 .scale = scale, 138 .zeroPoint = zeroPoint, 139 }; 140 } 141 OperandTypeOperandType142 OperandType(Type type, std::vector<uint32_t> data, SymmPerChannelQuantParams&& channelQuant) 143 : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) { 144 assert(type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL); 145 146 operandType = { 147 .type = static_cast<int32_t>(type), 148 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 149 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 150 .scale = 0.0f, 151 .zeroPoint = 0, 152 }; 153 } 154 }; 155 156 class Memory { 157 public: Memory(size_t size,int protect,int fd,size_t offset)158 Memory(size_t size, int protect, int fd, size_t offset) { 159 mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) == 160 ANEURALNETWORKS_NO_ERROR; 161 } 162 Memory(AHardwareBuffer * buffer)163 Memory(AHardwareBuffer* buffer) { 164 mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) == 165 ANEURALNETWORKS_NO_ERROR; 166 } 167 ~Memory()168 ~Memory() { ANeuralNetworksMemory_free(mMemory); } 169 170 // Disallow copy semantics to ensure the runtime object can only be freed 171 // once. Copy semantics could be enabled if some sort of reference counting 172 // or deep-copy system for runtime objects is added later. 173 Memory(const Memory&) = delete; 174 Memory& operator=(const Memory&) = delete; 175 176 // Move semantics to remove access to the runtime object from the wrapper 177 // object that is being moved. This ensures the runtime object will be 178 // freed only once. Memory(Memory && other)179 Memory(Memory&& other) { *this = std::move(other); } 180 Memory& operator=(Memory&& other) { 181 if (this != &other) { 182 ANeuralNetworksMemory_free(mMemory); 183 mMemory = other.mMemory; 184 mValid = other.mValid; 185 other.mMemory = nullptr; 186 other.mValid = false; 187 } 188 return *this; 189 } 190 get()191 ANeuralNetworksMemory* get() const { return mMemory; } isValid()192 bool isValid() const { return mValid; } 193 194 private: 195 ANeuralNetworksMemory* mMemory = nullptr; 196 bool mValid = true; 197 }; 198 199 class Model { 200 public: Model()201 Model() { 202 // TODO handle the value returned by this call 203 ANeuralNetworksModel_create(&mModel); 204 } ~Model()205 ~Model() { ANeuralNetworksModel_free(mModel); } 206 207 // Disallow copy semantics to ensure the runtime object can only be freed 208 // once. Copy semantics could be enabled if some sort of reference counting 209 // or deep-copy system for runtime objects is added later. 210 Model(const Model&) = delete; 211 Model& operator=(const Model&) = delete; 212 213 // Move semantics to remove access to the runtime object from the wrapper 214 // object that is being moved. This ensures the runtime object will be 215 // freed only once. Model(Model && other)216 Model(Model&& other) { *this = std::move(other); } 217 Model& operator=(Model&& other) { 218 if (this != &other) { 219 ANeuralNetworksModel_free(mModel); 220 mModel = other.mModel; 221 mNextOperandId = other.mNextOperandId; 222 mValid = other.mValid; 223 other.mModel = nullptr; 224 other.mNextOperandId = 0; 225 other.mValid = false; 226 } 227 return *this; 228 } 229 finish()230 Result finish() { 231 if (mValid) { 232 auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel)); 233 if (result != Result::NO_ERROR) { 234 mValid = false; 235 } 236 return result; 237 } else { 238 return Result::BAD_STATE; 239 } 240 } 241 addOperand(const OperandType * type)242 uint32_t addOperand(const OperandType* type) { 243 if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) != 244 ANEURALNETWORKS_NO_ERROR) { 245 mValid = false; 246 } 247 if (type->channelQuant) { 248 if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( 249 mModel, mNextOperandId, &type->channelQuant.value().params) != 250 ANEURALNETWORKS_NO_ERROR) { 251 mValid = false; 252 } 253 } 254 return mNextOperandId++; 255 } 256 setOperandValue(uint32_t index,const void * buffer,size_t length)257 void setOperandValue(uint32_t index, const void* buffer, size_t length) { 258 if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) != 259 ANEURALNETWORKS_NO_ERROR) { 260 mValid = false; 261 } 262 } 263 setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)264 void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 265 size_t length) { 266 if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset, 267 length) != ANEURALNETWORKS_NO_ERROR) { 268 mValid = false; 269 } 270 } 271 addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)272 void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs, 273 const std::vector<uint32_t>& outputs) { 274 if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()), 275 inputs.data(), static_cast<uint32_t>(outputs.size()), 276 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 277 mValid = false; 278 } 279 } identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)280 void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs, 281 const std::vector<uint32_t>& outputs) { 282 if (ANeuralNetworksModel_identifyInputsAndOutputs( 283 mModel, static_cast<uint32_t>(inputs.size()), inputs.data(), 284 static_cast<uint32_t>(outputs.size()), 285 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 286 mValid = false; 287 } 288 } 289 relaxComputationFloat32toFloat16(bool isRelax)290 void relaxComputationFloat32toFloat16(bool isRelax) { 291 if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) == 292 ANEURALNETWORKS_NO_ERROR) { 293 mRelaxed = isRelax; 294 } 295 } 296 getHandle()297 ANeuralNetworksModel* getHandle() const { return mModel; } isValid()298 bool isValid() const { return mValid; } isRelaxed()299 bool isRelaxed() const { return mRelaxed; } 300 301 protected: 302 ANeuralNetworksModel* mModel = nullptr; 303 // We keep track of the operand ID as a convenience to the caller. 304 uint32_t mNextOperandId = 0; 305 bool mValid = true; 306 bool mRelaxed = false; 307 }; 308 309 class Event { 310 public: Event()311 Event() {} ~Event()312 ~Event() { ANeuralNetworksEvent_free(mEvent); } 313 314 // Disallow copy semantics to ensure the runtime object can only be freed 315 // once. Copy semantics could be enabled if some sort of reference counting 316 // or deep-copy system for runtime objects is added later. 317 Event(const Event&) = delete; 318 Event& operator=(const Event&) = delete; 319 320 // Move semantics to remove access to the runtime object from the wrapper 321 // object that is being moved. This ensures the runtime object will be 322 // freed only once. Event(Event && other)323 Event(Event&& other) { *this = std::move(other); } 324 Event& operator=(Event&& other) { 325 if (this != &other) { 326 ANeuralNetworksEvent_free(mEvent); 327 mEvent = other.mEvent; 328 other.mEvent = nullptr; 329 } 330 return *this; 331 } 332 wait()333 Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); } 334 335 // Only for use by Execution set(ANeuralNetworksEvent * newEvent)336 void set(ANeuralNetworksEvent* newEvent) { 337 ANeuralNetworksEvent_free(mEvent); 338 mEvent = newEvent; 339 } 340 341 // Only for use by Execution getHandle()342 ANeuralNetworksEvent* getHandle() const { return mEvent; } 343 344 private: 345 ANeuralNetworksEvent* mEvent = nullptr; 346 }; 347 348 class Compilation { 349 public: Compilation(const Model * model)350 Compilation(const Model* model) { 351 int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation); 352 if (result != 0) { 353 // TODO Handle the error 354 } 355 } 356 ~Compilation()357 ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); } 358 359 // Disallow copy semantics to ensure the runtime object can only be freed 360 // once. Copy semantics could be enabled if some sort of reference counting 361 // or deep-copy system for runtime objects is added later. 362 Compilation(const Compilation&) = delete; 363 Compilation& operator=(const Compilation&) = delete; 364 365 // Move semantics to remove access to the runtime object from the wrapper 366 // object that is being moved. This ensures the runtime object will be 367 // freed only once. Compilation(Compilation && other)368 Compilation(Compilation&& other) { *this = std::move(other); } 369 Compilation& operator=(Compilation&& other) { 370 if (this != &other) { 371 ANeuralNetworksCompilation_free(mCompilation); 372 mCompilation = other.mCompilation; 373 other.mCompilation = nullptr; 374 } 375 return *this; 376 } 377 setPreference(ExecutePreference preference)378 Result setPreference(ExecutePreference preference) { 379 return static_cast<Result>(ANeuralNetworksCompilation_setPreference( 380 mCompilation, static_cast<int32_t>(preference))); 381 } 382 setPriority(ExecutePriority priority)383 Result setPriority(ExecutePriority priority) { 384 return static_cast<Result>(ANeuralNetworksCompilation_setPriority( 385 mCompilation, static_cast<int32_t>(priority))); 386 } 387 setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)388 Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) { 389 if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) { 390 return Result::BAD_DATA; 391 } 392 return static_cast<Result>(ANeuralNetworksCompilation_setCaching( 393 mCompilation, cacheDir.c_str(), token.data())); 394 } 395 finish()396 Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); } 397 getHandle()398 ANeuralNetworksCompilation* getHandle() const { return mCompilation; } 399 400 private: 401 ANeuralNetworksCompilation* mCompilation = nullptr; 402 }; 403 404 class Execution { 405 public: Execution(const Compilation * compilation)406 Execution(const Compilation* compilation) { 407 int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution); 408 if (result != 0) { 409 // TODO Handle the error 410 } 411 } 412 ~Execution()413 ~Execution() { ANeuralNetworksExecution_free(mExecution); } 414 415 // Disallow copy semantics to ensure the runtime object can only be freed 416 // once. Copy semantics could be enabled if some sort of reference counting 417 // or deep-copy system for runtime objects is added later. 418 Execution(const Execution&) = delete; 419 Execution& operator=(const Execution&) = delete; 420 421 // Move semantics to remove access to the runtime object from the wrapper 422 // object that is being moved. This ensures the runtime object will be 423 // freed only once. Execution(Execution && other)424 Execution(Execution&& other) { *this = std::move(other); } 425 Execution& operator=(Execution&& other) { 426 if (this != &other) { 427 ANeuralNetworksExecution_free(mExecution); 428 mExecution = other.mExecution; 429 other.mExecution = nullptr; 430 } 431 return *this; 432 } 433 434 Result setInput(uint32_t index, const void* buffer, size_t length, 435 const ANeuralNetworksOperandType* type = nullptr) { 436 return static_cast<Result>( 437 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length)); 438 } 439 440 Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 441 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 442 return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory( 443 mExecution, index, type, memory->get(), offset, length)); 444 } 445 446 Result setOutput(uint32_t index, void* buffer, size_t length, 447 const ANeuralNetworksOperandType* type = nullptr) { 448 return static_cast<Result>( 449 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length)); 450 } 451 452 Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 453 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 454 return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory( 455 mExecution, index, type, memory->get(), offset, length)); 456 } 457 startCompute(Event * event)458 Result startCompute(Event* event) { 459 ANeuralNetworksEvent* ev = nullptr; 460 Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev)); 461 event->set(ev); 462 return result; 463 } 464 startComputeWithDependencies(const std::vector<const Event * > & dependencies,uint64_t duration,Event * event)465 Result startComputeWithDependencies(const std::vector<const Event*>& dependencies, 466 uint64_t duration, Event* event) { 467 std::vector<const ANeuralNetworksEvent*> deps(dependencies.size()); 468 std::transform(dependencies.begin(), dependencies.end(), deps.begin(), 469 [](const Event* e) { return e->getHandle(); }); 470 ANeuralNetworksEvent* ev = nullptr; 471 Result result = static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies( 472 mExecution, deps.data(), deps.size(), duration, &ev)); 473 event->set(ev); 474 return result; 475 } 476 compute()477 Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); } 478 getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)479 Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) { 480 uint32_t rank = 0; 481 Result result = static_cast<Result>( 482 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank)); 483 dimensions->resize(rank); 484 if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) || 485 rank == 0) { 486 return result; 487 } 488 result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions( 489 mExecution, index, dimensions->data())); 490 return result; 491 } 492 493 private: 494 ANeuralNetworksExecution* mExecution = nullptr; 495 }; 496 497 } // namespace wrapper 498 } // namespace nn 499 } // namespace android 500 501 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 502