1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 19 20 #include <android-base/chrono_utils.h> 21 #include <android-base/expected.h> 22 #include <android-base/unique_fd.h> 23 24 #include <array> 25 #include <chrono> 26 #include <functional> 27 #include <limits> 28 #include <memory> 29 #include <optional> 30 #include <string> 31 #include <type_traits> 32 #include <utility> 33 #include <variant> 34 #include <vector> 35 36 #include "nnapi/OperandTypes.h" 37 #include "nnapi/OperationTypes.h" 38 #include "nnapi/Result.h" 39 40 // Forward declare AHardwareBuffer 41 extern "C" typedef struct AHardwareBuffer AHardwareBuffer; 42 43 namespace android::nn { 44 45 // Forward declarations 46 47 class IBuffer; 48 class IBurst; 49 class IDevice; 50 class IExecution; 51 class IPreparedModel; 52 struct Memory; 53 54 // Constants 55 56 constexpr float kDefaultExecTime = std::numeric_limits<float>::max(); 57 constexpr float kDefaultPowerUsage = std::numeric_limits<float>::max(); 58 constexpr uint32_t kByteSizeOfCacheToken = 32; 59 constexpr uint32_t kMaxNumberOfCacheFiles = 32; 60 61 /** 62 * Numeric values of extension operand and operation types have the 63 * following structure: 64 * - 16 high bits represent the "prefix", which corresponds uniquely to the 65 * extension name. 66 * - 16 low bits represent the type ID within the extension. 67 */ 68 constexpr uint8_t kExtensionTypeBits = 16; 69 constexpr uint8_t kExtensionPrefixBits = 16; 70 constexpr uint32_t kTypeWithinExtensionMask = 0xFFFF; 71 72 constexpr uint32_t kDefaultRequestMemoryAlignment = 64; 73 constexpr uint32_t kDefaultRequestMemoryPadding = 64; 74 constexpr uint32_t kMinMemoryAlignment = alignof(std::max_align_t); 75 constexpr uint32_t kMinMemoryPadding = 1; 76 constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2}; 77 constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15}; 78 79 // Aliases 80 81 using SharedBuffer = std::shared_ptr<const IBuffer>; 82 using SharedBurst = std::shared_ptr<const IBurst>; 83 using SharedDevice = std::shared_ptr<const IDevice>; 84 using SharedExecution = std::shared_ptr<const IExecution>; 85 using SharedMemory = std::shared_ptr<const Memory>; 86 using SharedPreparedModel = std::shared_ptr<const IPreparedModel>; 87 88 // Canonical types 89 90 /** 91 * Status of a device. 92 */ 93 enum class DeviceStatus { 94 AVAILABLE = 0, 95 BUSY = 1, 96 OFFLINE = 2, 97 UNKNOWN = 3, 98 }; 99 100 /** 101 * Execution preferences. 102 */ 103 enum class ExecutionPreference { 104 /** 105 * Prefer executing in a way that minimizes battery drain. 106 * This is desirable for compilations that will be executed often. 107 */ 108 LOW_POWER = 0, 109 /** 110 * Prefer returning a single answer as fast as possible, even if this causes 111 * more power consumption. 112 */ 113 FAST_SINGLE_ANSWER = 1, 114 /** 115 * Prefer maximizing the throughput of successive frames, for example when 116 * processing successive frames coming from the camera. 117 */ 118 SUSTAINED_SPEED = 2, 119 DEFAULT = FAST_SINGLE_ANSWER, 120 }; 121 122 /** 123 * Device types. 124 * 125 * The type of NNAPI device. 126 */ 127 enum class DeviceType { 128 /** The device type cannot be provided. */ 129 UNKNOWN = 0, 130 /** The device does not fall into any category below. */ 131 OTHER = 1, 132 /** The device runs NNAPI models on single or multi-core CPU. */ 133 CPU = 2, 134 /** The device can run NNAPI models and also accelerate graphics APIs such 135 * as OpenGL ES and Vulkan. */ 136 GPU = 3, 137 /** Dedicated accelerator for Machine Learning workloads. */ 138 ACCELERATOR = 4, 139 }; 140 141 /** 142 * Specifies whether or not to measure timing information during execution. 143 */ 144 enum class MeasureTiming { 145 NO = 0, 146 YES = 1, 147 }; 148 149 /** 150 * Priority given to a prepared model for execution. 151 */ 152 enum class Priority { 153 LOW = 0, 154 MEDIUM = 1, 155 HIGH = 2, 156 DEFAULT = MEDIUM, 157 }; 158 159 // TODO: Should more errors from NeuralNetworks.h be incorporated? The left name shows errors that 160 // appear in NeuralNetworks.h but not in the HAL, and the right column shows what these values could 161 // map to: 162 // * OUT_OF_MEMORY ==> GENERAL_FAILURE / RESOURCE_EXHAUSTED_* 163 // * INCOMPLETE ==> GENERAL_FAILURE 164 // * UNEXPECTED_NULL ==> INVALID_ARGUMENT 165 // * UNMAPPABLE ==> GENERAL_FAILURE 166 // * BAD_STATE ==> INVALID_ARGUMENT 167 enum class ErrorStatus { 168 NONE = 0, 169 DEVICE_UNAVAILABLE = 1, 170 GENERAL_FAILURE = 2, 171 OUTPUT_INSUFFICIENT_SIZE = 3, 172 INVALID_ARGUMENT = 4, 173 MISSED_DEADLINE_TRANSIENT = 5, 174 MISSED_DEADLINE_PERSISTENT = 6, 175 RESOURCE_EXHAUSTED_TRANSIENT = 7, 176 RESOURCE_EXHAUSTED_PERSISTENT = 8, 177 DEAD_OBJECT = 10000, 178 }; 179 180 struct GeneralError { 181 // NOLINTNEXTLINE(google-explicit-constructor) 182 /*implicit*/ GeneralError(std::string message = {}, 183 ErrorStatus code = ErrorStatus::GENERAL_FAILURE); 184 185 std::string message; 186 ErrorStatus code; 187 }; 188 189 template <typename Type> 190 using GeneralResult = base::expected<Type, GeneralError>; 191 192 /** 193 * Fused activation function types. 194 */ 195 enum class FusedActivationFunc : int32_t { 196 /** NO fused activation function. */ 197 NONE = 0, 198 /** Fused ReLU activation function. */ 199 RELU = 1, 200 /** Fused ReLU1 activation function. */ 201 RELU1 = 2, 202 /** Fused ReLU6 activation function. */ 203 RELU6 = 3, 204 }; 205 206 using Dimension = uint32_t; 207 using Dimensions = std::vector<Dimension>; 208 209 using CacheToken = std::array<uint8_t, kByteSizeOfCacheToken>; 210 211 /** 212 * Describes the shape information of an output operand after execution. 213 */ 214 struct OutputShape { 215 /** 216 * Dimensions of the operand. 217 */ 218 std::vector<uint32_t> dimensions; 219 220 /** 221 * Whether the provided buffer size is sufficient for the output. 222 */ 223 bool isSufficient = false; 224 }; 225 226 struct ExecutionError { 227 // NOLINTNEXTLINE(google-explicit-constructor) 228 /*implicit*/ ExecutionError(std::string message = {}, 229 ErrorStatus code = ErrorStatus::GENERAL_FAILURE, 230 std::vector<OutputShape> outputShapes = {}); 231 232 // NOLINTNEXTLINE(google-explicit-constructor) 233 /*implicit*/ ExecutionError(GeneralError error); 234 235 std::string message; 236 ErrorStatus code; 237 // OutputShapes for code == OUTPUT_INSUFFICIENT_SIZE 238 std::vector<OutputShape> outputShapes; 239 }; 240 241 template <typename Type> 242 using ExecutionResult = base::expected<Type, ExecutionError>; 243 244 /** 245 * The capabilities of a driver. 246 * 247 * This represents performance of non-extension operations. 248 * 249 * Performance of an operation other than {@link OperationType::IF} and 250 * {@link OperationType::WHILE} comes from the type of its first operand. 251 */ 252 struct Capabilities { 253 /** 254 * Performance information for the reference workload. 255 * 256 * Used by a driver to report its performance characteristics. 257 */ 258 struct PerformanceInfo { 259 /** 260 * Ratio of the time taken by the driver to execute the 261 * workload compared to the time the CPU would take for the 262 * same workload. A lower number is better. 263 */ 264 float execTime = kDefaultExecTime; 265 266 /** 267 * Ratio of the energy used by the driver compared to what 268 * the CPU would use for doing the same workload. A lower number 269 * is better. 270 */ 271 float powerUsage = kDefaultPowerUsage; 272 }; 273 274 /** 275 * Driver performance when operating on a particular data type. 276 * In the case of float32 data, this is used when the calculations 277 * are not relaxed. 278 */ 279 struct OperandPerformance { 280 OperandType type{}; 281 PerformanceInfo info; 282 }; 283 284 class OperandPerformanceTable { 285 public: 286 static Result<OperandPerformanceTable> create( 287 std::vector<OperandPerformance> operandPerformances); 288 289 PerformanceInfo lookup(OperandType type) const; 290 const std::vector<OperandPerformance>& asVector() const; 291 292 private: 293 explicit OperandPerformanceTable(std::vector<OperandPerformance> operandPerformances); 294 std::vector<OperandPerformance> mSorted; 295 }; 296 297 /** 298 * Driver performance when operating on float32 data but performing 299 * calculations with range and/or precision as low as that of the IEEE 300 * 754 16-bit floating-point format. 301 */ 302 PerformanceInfo relaxedFloat32toFloat16PerformanceScalar; 303 PerformanceInfo relaxedFloat32toFloat16PerformanceTensor; 304 305 /** 306 * Performance by operand type. Must be sorted by OperandType. 307 * 308 * If a particular {@link OperandType} is not present in operandPerformance, 309 * its performance is treated as 310 * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }. 311 * 312 * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver 313 * must not report operand performance for {@link OperandType::SUBGRAPH}. 314 */ 315 OperandPerformanceTable operandPerformance; 316 317 /** 318 * Performance of an {@link OperationType::IF} operation is the sum of 319 * {@link Capabilities::ifPerformance} and the mean of performance for the 320 * two branch subgraphs, where performance for a subgraph is the sum of the 321 * performance of all operations within the subgraph. 322 */ 323 PerformanceInfo ifPerformance; 324 325 /** 326 * Performance of a {@link OperationType::WHILE} operation is the sum of 327 * {@link Capabilities::whilePerformance}, performance for the condition 328 * subgraph and performance for the body subgraph, where performance for a 329 * subgraph is the sum of the performance of all operations within the 330 * subgraph. 331 */ 332 PerformanceInfo whilePerformance; 333 }; 334 335 /** 336 * Information about an extension. 337 */ 338 struct Extension { 339 /** 340 * Information about an extension operand type. 341 */ 342 struct OperandTypeInformation { 343 /** 344 * The extension operand type. 345 */ 346 uint16_t type = 0; 347 348 /** 349 * Indicates whether the extension operand type represents a tensor or 350 * a scalar. 351 */ 352 bool isTensor = false; 353 354 /** 355 * The byte size of the operand (if scalar) or of a single element (if 356 * tensor). 357 */ 358 uint32_t byteSize = 0; 359 }; 360 361 /** 362 * The extension name. 363 * 364 * The name must consist of lowercase latin letters, numbers, periods, and 365 * underscore signs. The name must contain at least one period. 366 * 367 * The name must start with the reverse domain name of the vendor. 368 * 369 * Example: com.google.test_extension 370 */ 371 std::string name; 372 373 /** 374 * Information about operand types defined by the extension. 375 */ 376 std::vector<OperandTypeInformation> operandTypes; 377 }; 378 379 /** 380 * Describes one operation of the model's graph. 381 */ 382 struct Operation { 383 /** 384 * The operation type. 385 */ 386 OperationType type{}; 387 388 /** 389 * Describes the table that contains the indexes of the inputs of the 390 * operation. The offset is the index in the operandIndexes table. 391 */ 392 std::vector<uint32_t> inputs; 393 394 /** 395 * Describes the table that contains the indexes of the outputs of the 396 * operation. The offset is the index in the operandIndexes table. 397 */ 398 std::vector<uint32_t> outputs; 399 }; 400 401 /** 402 * Describes the location of a data object. 403 */ 404 struct DataLocation { 405 /** 406 * The address of the memory where the data is found. 407 * 408 * This field is only active when lifetime is POINTER. 409 */ 410 std::variant<const void*, void*> pointer; 411 412 /** 413 * The index of the memory pool where this location is found. 414 */ 415 uint32_t poolIndex = 0; 416 417 /** 418 * Offset in bytes from the start of the pool. 419 */ 420 uint32_t offset = 0; 421 422 /** 423 * The length of the data in bytes. 424 */ 425 uint32_t length = 0; 426 427 /** 428 * The end padding of the specified memory region in bytes. 429 */ 430 uint32_t padding = 0; 431 }; 432 433 /** 434 * Describes one operand of the model's graph. 435 */ 436 struct Operand { 437 /** 438 * How an operand is used. 439 */ 440 enum class LifeTime { 441 /** 442 * The operand is internal to the model. It's created by an operation and 443 * consumed by other operations. It must be an output operand of 444 * exactly one operation. 445 */ 446 TEMPORARY_VARIABLE = 0, 447 448 /** 449 * The operand is an input of a subgraph. It must not be an output 450 * operand of any operation. 451 * 452 * An operand can't be both input and output of a subgraph. 453 */ 454 SUBGRAPH_INPUT = 1, 455 456 /** 457 * The operand is an output of a subgraph. It must be an output 458 * operand of exactly one operation. 459 * 460 * An operand can't be both input and output of a subgraph. 461 */ 462 SUBGRAPH_OUTPUT = 2, 463 464 /** 465 * The operand is a constant found in Model::operandValues. It must 466 * not be an output operand of any operation. 467 */ 468 CONSTANT_COPY = 3, 469 470 /** 471 * The operand is a constant that was specified via a Memory 472 * object. It must not be an output operand of any operation. 473 */ 474 CONSTANT_REFERENCE = 4, 475 476 /** 477 * The operand does not have a value. This is valid only for optional 478 * arguments of operations. 479 */ 480 NO_VALUE = 5, 481 482 /** 483 * The operand is a reference to a subgraph. It must be an input to one 484 * or more {@link OperationType::IF} or {@link OperationType::WHILE} 485 * operations. 486 */ 487 SUBGRAPH = 6, 488 489 /** 490 * This operand is a constant found in a user buffer. It must not be an 491 * output operand of any operation. 492 */ 493 POINTER = 7, 494 }; 495 496 /** 497 * No additional parameters. 498 */ 499 using NoParams = std::monostate; 500 501 /** 502 * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand. 503 */ 504 struct SymmPerChannelQuantParams { 505 /** Array of scaling values for each channel. Each value must be greater than zero. */ 506 std::vector<float> scales; 507 /** Index of the channel dimension */ 508 uint32_t channelDim = 0; 509 }; 510 511 /** 512 * Extension operand parameters. 513 * 514 * The framework treats this as an opaque data blob. 515 * The format is up to individual extensions. 516 */ 517 using ExtensionParams = std::vector<uint8_t>; 518 519 /** 520 * Additional parameters specific to a particular operand type. 521 */ 522 using ExtraParams = std::variant<NoParams, SymmPerChannelQuantParams, ExtensionParams>; 523 524 /** 525 * The data type. 526 * 527 * Besides the values listed in {@link OperationType}, any value equal or over 528 * (1 << kExtensionTypeBits) is possible and should be interpreted 529 * as an extension type according to {@link Model::extensionNameToPrefix}. 530 */ 531 OperandType type{}; 532 533 /** 534 * Dimensions of the operand. 535 * 536 * For a scalar operand, dimensions.size() must be 0. 537 * 538 * A tensor operand with all dimensions specified has "fully 539 * specified" dimensions. Whenever possible (i.e., whenever the 540 * dimensions are known at model construction time), a tensor 541 * operand should have (but is not required to have) fully 542 * specified dimensions, in order to enable the best possible 543 * performance. 544 * 545 * If a tensor operand's dimensions are not fully specified, the 546 * dimensions of the operand are deduced from the operand 547 * dimensions and values of the operation for which that operand 548 * is an output or from the corresponding {@link OperationType::IF} or 549 * {@link OperationType::WHILE} operation input operand dimensions in the 550 * case of referenced subgraph input operands. 551 * 552 * In the following situations, a tensor operand's dimensions must 553 * be fully specified: 554 * 555 * - The operand has lifetime CONSTANT_COPY, CONSTANT_REFERENCE, or 556 * POINTER. 557 * 558 * - The operand has lifetime SUBGRAPH_INPUT and belongs to the main 559 * subgraph. Fully specified dimensions must either be present in the 560 * Operand or they must be provided in the corresponding 561 * RequestArgument. 562 * EXCEPTION: If the input is optional and omitted 563 * (by setting the hasNoValue field of the corresponding 564 * RequestArgument to true) then it need not have fully 565 * specified dimensions. 566 * 567 * A tensor operand with some number of unspecified dimensions is 568 * represented by setting each unspecified dimension to 0. 569 * 570 * A tensor operand with unspecified rank is represented by providing 571 * an empty dimensions vector. 572 */ 573 Dimensions dimensions; 574 575 /** 576 * Quantized scale of the operand. 577 * 578 * Must be 0 when not applicable to an operand type. 579 * 580 * See {@link OperandType}. 581 */ 582 float scale = 0.0f; 583 584 /** 585 * Quantized zero-point offset of the operand. 586 * 587 * Must be 0 when not applicable to an operand type. 588 * 589 * See {@link OperandType}. 590 */ 591 int32_t zeroPoint = 0; 592 593 /** 594 * How the operand is used. 595 */ 596 LifeTime lifetime{}; 597 598 /** 599 * Where to find the data for this operand. 600 * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT, 601 * or NO_VALUE: 602 * - All the fields must be 0. 603 * If the lifetime is CONSTANT_COPY: 604 * - location.pointer is null. 605 * - location.poolIndex is 0. 606 * - location.offset is the offset in bytes into Model::operandValues. 607 * - location.length is set. 608 * - location.padding is 0. 609 * If the lifetime is CONSTANT_REFERENCE: 610 * - location.pointer is null. 611 * - location.poolIndex is set. 612 * - location.offset is the offset in bytes into the specified pool. 613 * - location.length is set. 614 * - location.padding is set. 615 * If the lifetime is SUBGRAPH: 616 * - location.pointer is null. 617 * - location.poolIndex is 0. 618 * - location.offset is the index of the referenced subgraph in 619 * {@link Model::referenced}. 620 * - location.length is 0. 621 * - location.padding is 0. 622 * If the lifetime is POINTER: 623 * - location.pointer is non-null. 624 * - location.poolIndex is 0. 625 * - location.offset is 0. 626 * - location.length is set. 627 * - location.padding is 0. 628 */ 629 DataLocation location; 630 631 /** 632 * Additional parameters specific to a particular operand type. 633 */ 634 ExtraParams extraParams; 635 }; 636 637 using Handle = base::unique_fd; 638 using SharedHandle = std::shared_ptr<const Handle>; 639 640 struct Memory { 641 struct Ashmem { 642 base::unique_fd fd; 643 size_t size; 644 }; 645 646 struct Fd { 647 size_t size; 648 int prot; 649 base::unique_fd fd; 650 size_t offset; 651 }; 652 653 // RAII wrapper for AHardwareBuffer 654 struct HardwareBuffer { 655 using Deleter = std::add_pointer_t<void(AHardwareBuffer*)>; 656 using Handle = std::unique_ptr<AHardwareBuffer, Deleter>; 657 Handle handle; 658 }; 659 660 struct Unknown { 661 struct Handle { 662 std::vector<base::unique_fd> fds; 663 std::vector<int> ints; 664 }; 665 Handle handle; 666 size_t size; 667 std::string name; 668 }; 669 670 std::variant<Ashmem, Fd, HardwareBuffer, Unknown> handle; 671 }; 672 673 /** 674 * The mapping between extension names and prefixes of values like operand and operation type, and 675 * token in {@link TokenValuePair}. 676 * 677 * An operand or operation whose numeric type value is above {@link IDevice::OPERAND_TYPE_BASE_MAX} 678 * or {@link IDevice::OPERATION_TYPE_BASE_MAX} respectively should be interpreted as an extension 679 * operand/operation. The low kExtensionTypeBits bits of the value correspond to the type ID within 680 * the extension and the high kExtensionPrefixBits bits encode the "prefix", which maps uniquely to 681 * the extension name. The sign bit is always 0. 682 * 683 * For example, if a model contains an operation whose value is 0x7AAABBBB and 684 * Model::extensionNameToPrefix contains an entry with prefix=0x7AAA and 685 * name="vendor.test.test_extension", then the operation should be interpreted as the operation 686 * 0xBBBB of the extension named vendor.test.test_extension. 687 * 688 * This is a one-to-one correspondence. That is, there must be at most one prefix corresponding to 689 * each extension name and at most one extension name corresponding to each prefix. 690 */ 691 struct ExtensionNameAndPrefix { 692 /** 693 * The extension name. 694 * 695 * See {@link Extension::name} for the format specification. 696 */ 697 std::string name; 698 699 /** 700 * The extension prefix. Only the lowest 15 bits are used, so the value must be less than 32768. 701 */ 702 uint16_t prefix = 0; 703 }; 704 705 /** 706 * A Neural Network Model. 707 * 708 * This includes not only the execution graph, but also constant data such as 709 * weights or scalars added at construction time. The only information that 710 * may not be known is the shape of the input tensors. 711 */ 712 struct Model { 713 /** 714 * An excerpt of the execution graph. 715 */ 716 struct Subgraph { 717 /** 718 * All operands included in the subgraph. 719 */ 720 std::vector<Operand> operands; 721 722 /** 723 * All operations included in the subgraph. 724 * 725 * The operations are sorted into execution order. Every operand 726 * with lifetime SUBGRAPH_OUTPUT or TEMPORARY_VARIABLE must be 727 * written before it is read. 728 */ 729 std::vector<Operation> operations; 730 731 /** 732 * Input indexes of the subgraph. There must be at least one. 733 * 734 * Each value corresponds to the index of the operand in "operands". 735 */ 736 std::vector<uint32_t> inputIndexes; 737 738 /** 739 * Output indexes of the subgraph. There must be at least one. 740 * 741 * Each value corresponds to the index of the operand in "operands". 742 */ 743 std::vector<uint32_t> outputIndexes; 744 }; 745 746 class OperandValues { 747 public: 748 OperandValues(); 749 OperandValues(const uint8_t* data, size_t length); 750 751 // Append a segment of memory (starting at `data` with `length` number of bytes) to the back 752 // of `OperandValues`, adding padding as necessary so that the appended data is aligned. 753 // Refer to `getAlignmentForLength` for more information on alignment (such as what the 754 // current alignments are for different data lengths). 755 DataLocation append(const uint8_t* data, size_t length); 756 757 const uint8_t* data() const; 758 size_t size() const; 759 760 private: 761 std::vector<uint8_t> mData; 762 }; 763 764 /** 765 * The top-level subgraph. 766 */ 767 Subgraph main; 768 769 /** 770 * Referenced subgraphs. 771 * 772 * Each subgraph is referenced by the main subgraph or at least one other 773 * referenced subgraph. 774 * 775 * There must be no reference cycles. 776 */ 777 std::vector<Subgraph> referenced; 778 779 /** 780 * A byte buffer containing operand data that were copied into the model. 781 * 782 * An operand's value must be located here if and only if Operand::lifetime 783 * equals Operand::LifeTime::CONSTANT_COPY. 784 */ 785 OperandValues operandValues; 786 787 /** 788 * A collection of shared memory pools containing operand values. 789 * 790 * An operand's value must be located here if and only if Operand::lifetime 791 * equals Operand::LifeTime::CONSTANT_REFERENCE. 792 */ 793 std::vector<SharedMemory> pools; 794 795 /** 796 * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 797 * precision as low as that of the IEEE 754 16-bit floating-point format. 798 * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 799 * range and precision of the IEEE 754 32-bit floating-point format. 800 */ 801 bool relaxComputationFloat32toFloat16 = false; 802 803 /** 804 * The mapping between extension names and prefixes of operand and 805 * operation type values. 806 * 807 * An operand or operation whose numeric type value is equal to or greater 808 * than (1 << kExtensionTypeBits) should be interpreted 809 * as an extension operand. The low 810 * {@link kExtensionTypeBits} bits of the value correspond to the type ID 811 * within the extension and the high {@link kExtensionPrefixBits} bits encode 812 * the "prefix", which maps uniquely to the extension name. 813 * 814 * For example, if a model contains an operation whose value is 815 * 0xAAAABBBB and extensionNameToPrefix contains an entry with 816 * prefix=0xAAAA and name="vendor.test.test_extension", then 817 * the operation should be interpreted as the operation 0xBBBB 818 * of the extension named vendor.test.test_extension. 819 * 820 * This is a one-to-one correspondence. That is, there must be at most one 821 * prefix corresponding to each extension name and at most one extension 822 * name corresponding to each prefix. 823 */ 824 std::vector<ExtensionNameAndPrefix> extensionNameToPrefix; 825 }; 826 827 /** 828 * A buffer descriptor. Describes the properties of a buffer. 829 */ 830 struct BufferDesc { 831 /** 832 * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number 833 * of unspecified dimensions is represented by setting each unspecified dimension to 0. A 834 * buffer with unspecified rank is represented by providing an empty dimensions vector. 835 */ 836 Dimensions dimensions; 837 }; 838 839 /** 840 * Describes a role of an input or output to a prepared model. 841 */ 842 struct BufferRole { 843 /** 844 * The index of the IPreparedModel within the "preparedModel" argument passed in 845 * IDevice::allocate. 846 */ 847 uint32_t modelIndex = 0; 848 849 /** 850 * The index of the input or output operand. 851 */ 852 uint32_t ioIndex = 0; 853 854 /** 855 * A floating-point value within the range (0.0, 1.0]. Describes how likely the 856 * buffer is to be used in the specified role. This is provided as a hint to 857 * optimize the case when multiple roles prefer different buffer locations or data 858 * layouts. 859 */ 860 float probability = 0.0f; 861 }; 862 863 /** 864 * Inputs to be sent to and outputs to be retrieved from a prepared model. 865 * 866 * A Request serves two primary tasks: 867 * 1) Provides the input and output data to be used when executing the model. 868 * 2) Specifies any updates to the input operand metadata that were left 869 * unspecified at model preparation time. 870 * 871 * An output must not overlap with any other output, with an input, or 872 * with an operand of lifetime CONSTANT_REFERENCE. 873 */ 874 struct Request { 875 /** 876 * Metadata information specifying the location of the input or output data and 877 * any updates to the input or output operand. 878 */ 879 struct Argument { 880 enum class LifeTime { 881 POOL = 0, 882 NO_VALUE = 1, 883 POINTER = 2, 884 }; 885 886 LifeTime lifetime{}; 887 888 /** 889 * The location within one of the memory pools passed in the Request. 890 */ 891 DataLocation location; 892 893 /** 894 * Updated dimension information. 895 * 896 * If dimensions.size() > 0, dimension information was provided 897 * along with the argument. This can be the case for models that 898 * accept inputs of varying size. This can't change the rank, just 899 * the value of the dimensions that were unspecified in the 900 * model. If dimensions.size() > 0, then all dimensions must be 901 * specified here; and any dimension that was specified in the 902 * model must have the same value here. 903 * 904 * If the dimensions in the model are not fully specified, then 905 * they must be fully specified here, unless hasNoValue is set to 906 * true. If the dimensions in the model are fully specified, then 907 * either dimensions.size() may be 0, or the dimensions in the 908 * model must be identical to the dimensions here. 909 */ 910 Dimensions dimensions; 911 }; 912 913 /** 914 * Specifies a driver-managed buffer. It is the token corresponding to an 915 * IBuffer returned from IDevice::allocate, and is specific to the IDevice 916 * object. 917 */ 918 enum class MemoryDomainToken : uint32_t {}; 919 920 /** 921 * A memory pool. 922 */ 923 using MemoryPool = std::variant<SharedMemory, MemoryDomainToken, SharedBuffer>; 924 925 /** 926 * Input data and information to be used in the execution of a prepared 927 * model. 928 * 929 * The index of the input corresponds to the index in Model::main::inputIndexes. 930 * E.g., inputs[i] corresponds to Model::main::inputIndexes[i]. 931 */ 932 std::vector<Argument> inputs; 933 934 /** 935 * Output data and information to be used in the execution of a prepared 936 * model. 937 * 938 * The index of the output corresponds to the index in Model::main::outputIndexes. 939 * E.g., outputs[i] corresponds to Model::main::outputIndexes[i]. 940 */ 941 std::vector<Argument> outputs; 942 943 /** 944 * A collection of memory pools containing operand data for both the 945 * inputs and the outputs to a model. 946 */ 947 std::vector<MemoryPool> pools; 948 }; 949 950 // Representation of sync_fence. 951 class SyncFence { 952 public: 953 static SyncFence createAsSignaled(); 954 static SyncFence create(base::unique_fd fd); 955 static Result<SyncFence> create(SharedHandle syncFence); 956 957 // The function syncWait() has the same semantics as the system function 958 // ::sync_wait(), except that the syncWait() return value is semantically 959 // richer. 960 enum class FenceState { 961 ACTIVE, // fence has not been signaled 962 SIGNALED, // fence has been signaled 963 ERROR, // fence has been placed in the error state 964 UNKNOWN, // either bad argument passed to syncWait(), or internal error 965 }; 966 using Timeout = std::chrono::duration<int, std::milli>; 967 using OptionalTimeout = std::optional<Timeout>; 968 969 FenceState syncWait(OptionalTimeout optionalTimeout) const; 970 971 SharedHandle getSharedHandle() const; 972 bool hasFd() const; 973 int getFd() const; 974 975 private: 976 explicit SyncFence(SharedHandle syncFence); 977 978 SharedHandle mSyncFence; 979 }; 980 981 using Clock = base::boot_clock; 982 983 using Duration = std::chrono::nanoseconds; 984 using OptionalDuration = std::optional<Duration>; 985 986 using TimePoint = std::chrono::time_point<Clock, Duration>; 987 using OptionalTimePoint = std::optional<TimePoint>; 988 989 /** 990 * Timing information measured during execution. Each time is a duration from 991 * the beginning of some task to the end of that task, including time when that 992 * task is not active (for example, preempted by some other task, or 993 * waiting for some resource to become available). 994 * 995 * Times are measured in nanoseconds. 996 */ 997 struct Timing { 998 /** Execution time on device (not driver, which runs on host processor). */ 999 OptionalDuration timeOnDevice; 1000 /** Execution time in driver (including time on device). */ 1001 OptionalDuration timeInDriver; 1002 }; 1003 1004 // Returns status, timingLaunched, timingFenced 1005 using ExecuteFencedInfoCallback = std::function<GeneralResult<std::pair<Timing, Timing>>()>; 1006 1007 // Version is a tuple that contains what NNAPI feature level is supported/required and whether 1008 // runtime-only features are supported/required. 1009 struct Version { 1010 enum class Level : uint8_t { 1011 FEATURE_LEVEL_1, 1012 FEATURE_LEVEL_2, 1013 FEATURE_LEVEL_3, 1014 FEATURE_LEVEL_4, 1015 FEATURE_LEVEL_5, 1016 FEATURE_LEVEL_6, 1017 FEATURE_LEVEL_7, 1018 FEATURE_LEVEL_8, 1019 #ifdef NN_EXPERIMENTAL_FEATURE 1020 FEATURE_LEVEL_EXPERIMENTAL, 1021 #endif // NN_EXPERIMENTAL_FEATURE 1022 }; 1023 1024 Level level; 1025 bool runtimeOnlyFeatures = false; 1026 }; 1027 1028 constexpr auto kVersionFeatureLevel1 = Version{.level = Version::Level::FEATURE_LEVEL_1}; 1029 constexpr auto kVersionFeatureLevel2 = Version{.level = Version::Level::FEATURE_LEVEL_2}; 1030 constexpr auto kVersionFeatureLevel3 = Version{.level = Version::Level::FEATURE_LEVEL_3}; 1031 constexpr auto kVersionFeatureLevel4 = Version{.level = Version::Level::FEATURE_LEVEL_4}; 1032 constexpr auto kVersionFeatureLevel5 = Version{.level = Version::Level::FEATURE_LEVEL_5}; 1033 constexpr auto kVersionFeatureLevel6 = Version{.level = Version::Level::FEATURE_LEVEL_6}; 1034 constexpr auto kVersionFeatureLevel7 = Version{.level = Version::Level::FEATURE_LEVEL_7}; 1035 constexpr auto kVersionFeatureLevel8 = Version{.level = Version::Level::FEATURE_LEVEL_8}; 1036 #ifdef NN_EXPERIMENTAL_FEATURE 1037 constexpr auto kVersionFeatureLevelExperimental = 1038 Version{.level = Version::Level::FEATURE_LEVEL_EXPERIMENTAL}; 1039 #endif // NN_EXPERIMENTAL_FEATURE 1040 1041 // Describes the memory preference of an operand. 1042 struct MemoryPreference { 1043 // Must be a power of 2. 1044 // For pointer buffers, the alignment is satisfied if the address of the pointer is a multiple 1045 // of the "alignment" value. For memory pools, the alignment is satisfied if the offset of the 1046 // sub-region specified by DataLocation is a multiple of the "alignment" value. 1047 uint32_t alignment; 1048 // Must be a power of 2. 1049 // For both pointer buffers and memory pools, the padding is satisfied if the padded length is 1050 // greater than or equal to the raw size of the operand (i.e. the size of an element multiplied 1051 // by the number of elements) rounding up to a multiple of the "padding" value. In DataLocation, 1052 // the padded length equals to the sum of the length and padding fields. 1053 uint32_t padding; 1054 }; 1055 1056 /** 1057 * A type that is used to represent a token / byte array data pair. 1058 */ 1059 struct TokenValuePair { 1060 /** 1061 * A 32bit integer token. The token is created by combining the 1062 * extension prefix and enum defined within the extension. Of the 32 bits in the token, the high 1063 * kExtensionPrefixBits bits is the extension prefix and the low kExtensionTypeBits bits 1064 * represents the enum within the extension. 1065 * 1066 * For example, if a token value is 0x7AAA000B and corresponding {@link ExtensionNameAndPrefix} 1067 * contains an entry with prefix=0x7AAA and name="vendor.test.test_extension", then the token 1068 * should be interpreted as the enum value 0x000B of the extension named 1069 * vendor.test.test_extension. 1070 */ 1071 int32_t token; 1072 /** 1073 * A byte array containing the raw data. 1074 */ 1075 std::vector<uint8_t> value; 1076 }; 1077 1078 } // namespace android::nn 1079 1080 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 1081