1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H
18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H
19 
20 #include <android-base/chrono_utils.h>
21 #include <android-base/expected.h>
22 #include <android-base/unique_fd.h>
23 
24 #include <array>
25 #include <chrono>
26 #include <functional>
27 #include <limits>
28 #include <memory>
29 #include <optional>
30 #include <string>
31 #include <type_traits>
32 #include <utility>
33 #include <variant>
34 #include <vector>
35 
36 #include "nnapi/OperandTypes.h"
37 #include "nnapi/OperationTypes.h"
38 #include "nnapi/Result.h"
39 
40 // Forward declare AHardwareBuffer
41 extern "C" typedef struct AHardwareBuffer AHardwareBuffer;
42 
43 namespace android::nn {
44 
45 // Forward declarations
46 
47 class IBuffer;
48 class IBurst;
49 class IDevice;
50 class IExecution;
51 class IPreparedModel;
52 struct Memory;
53 
54 // Constants
55 
56 constexpr float kDefaultExecTime = std::numeric_limits<float>::max();
57 constexpr float kDefaultPowerUsage = std::numeric_limits<float>::max();
58 constexpr uint32_t kByteSizeOfCacheToken = 32;
59 constexpr uint32_t kMaxNumberOfCacheFiles = 32;
60 
61 /**
62  * Numeric values of extension operand and operation types have the
63  * following structure:
64  * - 16 high bits represent the "prefix", which corresponds uniquely to the
65  *   extension name.
66  * - 16 low bits represent the type ID within the extension.
67  */
68 constexpr uint8_t kExtensionTypeBits = 16;
69 constexpr uint8_t kExtensionPrefixBits = 16;
70 constexpr uint32_t kTypeWithinExtensionMask = 0xFFFF;
71 
72 constexpr uint32_t kDefaultRequestMemoryAlignment = 64;
73 constexpr uint32_t kDefaultRequestMemoryPadding = 64;
74 constexpr uint32_t kMinMemoryAlignment = alignof(std::max_align_t);
75 constexpr uint32_t kMinMemoryPadding = 1;
76 constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2};
77 constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15};
78 
79 // Aliases
80 
81 using SharedBuffer = std::shared_ptr<const IBuffer>;
82 using SharedBurst = std::shared_ptr<const IBurst>;
83 using SharedDevice = std::shared_ptr<const IDevice>;
84 using SharedExecution = std::shared_ptr<const IExecution>;
85 using SharedMemory = std::shared_ptr<const Memory>;
86 using SharedPreparedModel = std::shared_ptr<const IPreparedModel>;
87 
88 // Canonical types
89 
90 /**
91  * Status of a device.
92  */
93 enum class DeviceStatus {
94     AVAILABLE = 0,
95     BUSY = 1,
96     OFFLINE = 2,
97     UNKNOWN = 3,
98 };
99 
100 /**
101  * Execution preferences.
102  */
103 enum class ExecutionPreference {
104     /**
105      * Prefer executing in a way that minimizes battery drain.
106      * This is desirable for compilations that will be executed often.
107      */
108     LOW_POWER = 0,
109     /**
110      * Prefer returning a single answer as fast as possible, even if this causes
111      * more power consumption.
112      */
113     FAST_SINGLE_ANSWER = 1,
114     /**
115      * Prefer maximizing the throughput of successive frames, for example when
116      * processing successive frames coming from the camera.
117      */
118     SUSTAINED_SPEED = 2,
119     DEFAULT = FAST_SINGLE_ANSWER,
120 };
121 
122 /**
123  * Device types.
124  *
125  * The type of NNAPI device.
126  */
127 enum class DeviceType {
128     /** The device type cannot be provided. */
129     UNKNOWN = 0,
130     /** The device does not fall into any category below. */
131     OTHER = 1,
132     /** The device runs NNAPI models on single or multi-core CPU. */
133     CPU = 2,
134     /** The device can run NNAPI models and also accelerate graphics APIs such
135      * as OpenGL ES and Vulkan. */
136     GPU = 3,
137     /** Dedicated accelerator for Machine Learning workloads. */
138     ACCELERATOR = 4,
139 };
140 
141 /**
142  * Specifies whether or not to measure timing information during execution.
143  */
144 enum class MeasureTiming {
145     NO = 0,
146     YES = 1,
147 };
148 
149 /**
150  * Priority given to a prepared model for execution.
151  */
152 enum class Priority {
153     LOW = 0,
154     MEDIUM = 1,
155     HIGH = 2,
156     DEFAULT = MEDIUM,
157 };
158 
159 // TODO: Should more errors from NeuralNetworks.h be incorporated? The left name shows errors that
160 // appear in NeuralNetworks.h but not in the HAL, and the right column shows what these values could
161 // map to:
162 // * OUT_OF_MEMORY ==> GENERAL_FAILURE / RESOURCE_EXHAUSTED_*
163 // * INCOMPLETE ==> GENERAL_FAILURE
164 // * UNEXPECTED_NULL ==> INVALID_ARGUMENT
165 // * UNMAPPABLE ==> GENERAL_FAILURE
166 // * BAD_STATE ==> INVALID_ARGUMENT
167 enum class ErrorStatus {
168     NONE = 0,
169     DEVICE_UNAVAILABLE = 1,
170     GENERAL_FAILURE = 2,
171     OUTPUT_INSUFFICIENT_SIZE = 3,
172     INVALID_ARGUMENT = 4,
173     MISSED_DEADLINE_TRANSIENT = 5,
174     MISSED_DEADLINE_PERSISTENT = 6,
175     RESOURCE_EXHAUSTED_TRANSIENT = 7,
176     RESOURCE_EXHAUSTED_PERSISTENT = 8,
177     DEAD_OBJECT = 10000,
178 };
179 
180 struct GeneralError {
181     // NOLINTNEXTLINE(google-explicit-constructor)
182     /*implicit*/ GeneralError(std::string message = {},
183                               ErrorStatus code = ErrorStatus::GENERAL_FAILURE);
184 
185     std::string message;
186     ErrorStatus code;
187 };
188 
189 template <typename Type>
190 using GeneralResult = base::expected<Type, GeneralError>;
191 
192 /**
193  * Fused activation function types.
194  */
195 enum class FusedActivationFunc : int32_t {
196     /** NO fused activation function. */
197     NONE = 0,
198     /** Fused ReLU activation function. */
199     RELU = 1,
200     /** Fused ReLU1 activation function. */
201     RELU1 = 2,
202     /** Fused ReLU6 activation function. */
203     RELU6 = 3,
204 };
205 
206 using Dimension = uint32_t;
207 using Dimensions = std::vector<Dimension>;
208 
209 using CacheToken = std::array<uint8_t, kByteSizeOfCacheToken>;
210 
211 /**
212  * Describes the shape information of an output operand after execution.
213  */
214 struct OutputShape {
215     /**
216      * Dimensions of the operand.
217      */
218     std::vector<uint32_t> dimensions;
219 
220     /**
221      * Whether the provided buffer size is sufficient for the output.
222      */
223     bool isSufficient = false;
224 };
225 
226 struct ExecutionError {
227     // NOLINTNEXTLINE(google-explicit-constructor)
228     /*implicit*/ ExecutionError(std::string message = {},
229                                 ErrorStatus code = ErrorStatus::GENERAL_FAILURE,
230                                 std::vector<OutputShape> outputShapes = {});
231 
232     // NOLINTNEXTLINE(google-explicit-constructor)
233     /*implicit*/ ExecutionError(GeneralError error);
234 
235     std::string message;
236     ErrorStatus code;
237     // OutputShapes for code == OUTPUT_INSUFFICIENT_SIZE
238     std::vector<OutputShape> outputShapes;
239 };
240 
241 template <typename Type>
242 using ExecutionResult = base::expected<Type, ExecutionError>;
243 
244 /**
245  * The capabilities of a driver.
246  *
247  * This represents performance of non-extension operations.
248  *
249  * Performance of an operation other than {@link OperationType::IF} and
250  * {@link OperationType::WHILE} comes from the type of its first operand.
251  */
252 struct Capabilities {
253     /**
254      * Performance information for the reference workload.
255      *
256      * Used by a driver to report its performance characteristics.
257      */
258     struct PerformanceInfo {
259         /**
260          * Ratio of the time taken by the driver to execute the
261          * workload compared to the time the CPU would take for the
262          * same workload. A lower number is better.
263          */
264         float execTime = kDefaultExecTime;
265 
266         /**
267          * Ratio of the energy used by the driver compared to what
268          * the CPU would use for doing the same workload. A lower number
269          * is better.
270          */
271         float powerUsage = kDefaultPowerUsage;
272     };
273 
274     /**
275      * Driver performance when operating on a particular data type.
276      * In the case of float32 data, this is used when the calculations
277      * are not relaxed.
278      */
279     struct OperandPerformance {
280         OperandType type{};
281         PerformanceInfo info;
282     };
283 
284     class OperandPerformanceTable {
285        public:
286         static Result<OperandPerformanceTable> create(
287                 std::vector<OperandPerformance> operandPerformances);
288 
289         PerformanceInfo lookup(OperandType type) const;
290         const std::vector<OperandPerformance>& asVector() const;
291 
292        private:
293         explicit OperandPerformanceTable(std::vector<OperandPerformance> operandPerformances);
294         std::vector<OperandPerformance> mSorted;
295     };
296 
297     /**
298      * Driver performance when operating on float32 data but performing
299      * calculations with range and/or precision as low as that of the IEEE
300      * 754 16-bit floating-point format.
301      */
302     PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
303     PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
304 
305     /**
306      * Performance by operand type. Must be sorted by OperandType.
307      *
308      * If a particular {@link OperandType} is not present in operandPerformance,
309      * its performance is treated as
310      * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
311      *
312      * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver
313      * must not report operand performance for {@link OperandType::SUBGRAPH}.
314      */
315     OperandPerformanceTable operandPerformance;
316 
317     /**
318      * Performance of an {@link OperationType::IF} operation is the sum of
319      * {@link Capabilities::ifPerformance} and the mean of performance for the
320      * two branch subgraphs, where performance for a subgraph is the sum of the
321      * performance of all operations within the subgraph.
322      */
323     PerformanceInfo ifPerformance;
324 
325     /**
326      * Performance of a {@link OperationType::WHILE} operation is the sum of
327      * {@link Capabilities::whilePerformance}, performance for the condition
328      * subgraph and performance for the body subgraph, where performance for a
329      * subgraph is the sum of the performance of all operations within the
330      * subgraph.
331      */
332     PerformanceInfo whilePerformance;
333 };
334 
335 /**
336  * Information about an extension.
337  */
338 struct Extension {
339     /**
340      * Information about an extension operand type.
341      */
342     struct OperandTypeInformation {
343         /**
344          * The extension operand type.
345          */
346         uint16_t type = 0;
347 
348         /**
349          * Indicates whether the extension operand type represents a tensor or
350          * a scalar.
351          */
352         bool isTensor = false;
353 
354         /**
355          * The byte size of the operand (if scalar) or of a single element (if
356          * tensor).
357          */
358         uint32_t byteSize = 0;
359     };
360 
361     /**
362      * The extension name.
363      *
364      * The name must consist of lowercase latin letters, numbers, periods, and
365      * underscore signs. The name must contain at least one period.
366      *
367      * The name must start with the reverse domain name of the vendor.
368      *
369      * Example: com.google.test_extension
370      */
371     std::string name;
372 
373     /**
374      * Information about operand types defined by the extension.
375      */
376     std::vector<OperandTypeInformation> operandTypes;
377 };
378 
379 /**
380  * Describes one operation of the model's graph.
381  */
382 struct Operation {
383     /**
384      * The operation type.
385      */
386     OperationType type{};
387 
388     /**
389      * Describes the table that contains the indexes of the inputs of the
390      * operation. The offset is the index in the operandIndexes table.
391      */
392     std::vector<uint32_t> inputs;
393 
394     /**
395      * Describes the table that contains the indexes of the outputs of the
396      * operation. The offset is the index in the operandIndexes table.
397      */
398     std::vector<uint32_t> outputs;
399 };
400 
401 /**
402  * Describes the location of a data object.
403  */
404 struct DataLocation {
405     /**
406      * The address of the memory where the data is found.
407      *
408      * This field is only active when lifetime is POINTER.
409      */
410     std::variant<const void*, void*> pointer;
411 
412     /**
413      * The index of the memory pool where this location is found.
414      */
415     uint32_t poolIndex = 0;
416 
417     /**
418      * Offset in bytes from the start of the pool.
419      */
420     uint32_t offset = 0;
421 
422     /**
423      * The length of the data in bytes.
424      */
425     uint32_t length = 0;
426 
427     /**
428      * The end padding of the specified memory region in bytes.
429      */
430     uint32_t padding = 0;
431 };
432 
433 /**
434  * Describes one operand of the model's graph.
435  */
436 struct Operand {
437     /**
438      * How an operand is used.
439      */
440     enum class LifeTime {
441         /**
442          * The operand is internal to the model. It's created by an operation and
443          * consumed by other operations. It must be an output operand of
444          * exactly one operation.
445          */
446         TEMPORARY_VARIABLE = 0,
447 
448         /**
449          * The operand is an input of a subgraph. It must not be an output
450          * operand of any operation.
451          *
452          * An operand can't be both input and output of a subgraph.
453          */
454         SUBGRAPH_INPUT = 1,
455 
456         /**
457          * The operand is an output of a subgraph. It must be an output
458          * operand of exactly one operation.
459          *
460          * An operand can't be both input and output of a subgraph.
461          */
462         SUBGRAPH_OUTPUT = 2,
463 
464         /**
465          * The operand is a constant found in Model::operandValues. It must
466          * not be an output operand of any operation.
467          */
468         CONSTANT_COPY = 3,
469 
470         /**
471          * The operand is a constant that was specified via a Memory
472          * object. It must not be an output operand of any operation.
473          */
474         CONSTANT_REFERENCE = 4,
475 
476         /**
477          * The operand does not have a value. This is valid only for optional
478          * arguments of operations.
479          */
480         NO_VALUE = 5,
481 
482         /**
483          * The operand is a reference to a subgraph. It must be an input to one
484          * or more {@link OperationType::IF} or {@link OperationType::WHILE}
485          * operations.
486          */
487         SUBGRAPH = 6,
488 
489         /**
490          * This operand is a constant found in a user buffer. It must not be an
491          * output operand of any operation.
492          */
493         POINTER = 7,
494     };
495 
496     /**
497      * No additional parameters.
498      */
499     using NoParams = std::monostate;
500 
501     /**
502      * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand.
503      */
504     struct SymmPerChannelQuantParams {
505         /** Array of scaling values for each channel. Each value must be greater than zero. */
506         std::vector<float> scales;
507         /** Index of the channel dimension */
508         uint32_t channelDim = 0;
509     };
510 
511     /**
512      * Extension operand parameters.
513      *
514      * The framework treats this as an opaque data blob.
515      * The format is up to individual extensions.
516      */
517     using ExtensionParams = std::vector<uint8_t>;
518 
519     /**
520      * Additional parameters specific to a particular operand type.
521      */
522     using ExtraParams = std::variant<NoParams, SymmPerChannelQuantParams, ExtensionParams>;
523 
524     /**
525      * The data type.
526      *
527      * Besides the values listed in {@link OperationType}, any value equal or over
528      * (1 << kExtensionTypeBits) is possible and should be interpreted
529      * as an extension type according to {@link Model::extensionNameToPrefix}.
530      */
531     OperandType type{};
532 
533     /**
534      * Dimensions of the operand.
535      *
536      * For a scalar operand, dimensions.size() must be 0.
537      *
538      * A tensor operand with all dimensions specified has "fully
539      * specified" dimensions. Whenever possible (i.e., whenever the
540      * dimensions are known at model construction time), a tensor
541      * operand should have (but is not required to have) fully
542      * specified dimensions, in order to enable the best possible
543      * performance.
544      *
545      * If a tensor operand's dimensions are not fully specified, the
546      * dimensions of the operand are deduced from the operand
547      * dimensions and values of the operation for which that operand
548      * is an output or from the corresponding {@link OperationType::IF} or
549      * {@link OperationType::WHILE} operation input operand dimensions in the
550      * case of referenced subgraph input operands.
551      *
552      * In the following situations, a tensor operand's dimensions must
553      * be fully specified:
554      *
555      *     - The operand has lifetime CONSTANT_COPY, CONSTANT_REFERENCE, or
556      *       POINTER.
557      *
558      *     - The operand has lifetime SUBGRAPH_INPUT and belongs to the main
559      *       subgraph. Fully specified dimensions must either be present in the
560      *       Operand or they must be provided in the corresponding
561      *       RequestArgument.
562      *       EXCEPTION: If the input is optional and omitted
563      *       (by setting the hasNoValue field of the corresponding
564      *       RequestArgument to true) then it need not have fully
565      *       specified dimensions.
566      *
567      * A tensor operand with some number of unspecified dimensions is
568      * represented by setting each unspecified dimension to 0.
569      *
570      * A tensor operand with unspecified rank is represented by providing
571      * an empty dimensions vector.
572      */
573     Dimensions dimensions;
574 
575     /**
576      * Quantized scale of the operand.
577      *
578      * Must be 0 when not applicable to an operand type.
579      *
580      * See {@link OperandType}.
581      */
582     float scale = 0.0f;
583 
584     /**
585      * Quantized zero-point offset of the operand.
586      *
587      * Must be 0 when not applicable to an operand type.
588      *
589      * See {@link OperandType}.
590      */
591     int32_t zeroPoint = 0;
592 
593     /**
594      * How the operand is used.
595      */
596     LifeTime lifetime{};
597 
598     /**
599      * Where to find the data for this operand.
600      * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT,
601      * or NO_VALUE:
602      * - All the fields must be 0.
603      * If the lifetime is CONSTANT_COPY:
604      * - location.pointer is null.
605      * - location.poolIndex is 0.
606      * - location.offset is the offset in bytes into Model::operandValues.
607      * - location.length is set.
608      * - location.padding is 0.
609      * If the lifetime is CONSTANT_REFERENCE:
610      * - location.pointer is null.
611      * - location.poolIndex is set.
612      * - location.offset is the offset in bytes into the specified pool.
613      * - location.length is set.
614      * - location.padding is set.
615      * If the lifetime is SUBGRAPH:
616      * - location.pointer is null.
617      * - location.poolIndex is 0.
618      * - location.offset is the index of the referenced subgraph in
619      *   {@link Model::referenced}.
620      * - location.length is 0.
621      * - location.padding is 0.
622      * If the lifetime is POINTER:
623      * - location.pointer is non-null.
624      * - location.poolIndex is 0.
625      * - location.offset is 0.
626      * - location.length is set.
627      * - location.padding is 0.
628      */
629     DataLocation location;
630 
631     /**
632      * Additional parameters specific to a particular operand type.
633      */
634     ExtraParams extraParams;
635 };
636 
637 using Handle = base::unique_fd;
638 using SharedHandle = std::shared_ptr<const Handle>;
639 
640 struct Memory {
641     struct Ashmem {
642         base::unique_fd fd;
643         size_t size;
644     };
645 
646     struct Fd {
647         size_t size;
648         int prot;
649         base::unique_fd fd;
650         size_t offset;
651     };
652 
653     // RAII wrapper for AHardwareBuffer
654     struct HardwareBuffer {
655         using Deleter = std::add_pointer_t<void(AHardwareBuffer*)>;
656         using Handle = std::unique_ptr<AHardwareBuffer, Deleter>;
657         Handle handle;
658     };
659 
660     struct Unknown {
661         struct Handle {
662             std::vector<base::unique_fd> fds;
663             std::vector<int> ints;
664         };
665         Handle handle;
666         size_t size;
667         std::string name;
668     };
669 
670     std::variant<Ashmem, Fd, HardwareBuffer, Unknown> handle;
671 };
672 
673 /**
674  * The mapping between extension names and prefixes of values like operand and operation type, and
675  * token in {@link TokenValuePair}.
676  *
677  * An operand or operation whose numeric type value is above {@link IDevice::OPERAND_TYPE_BASE_MAX}
678  * or {@link IDevice::OPERATION_TYPE_BASE_MAX} respectively should be interpreted as an extension
679  * operand/operation. The low kExtensionTypeBits bits of the value correspond to the type ID within
680  * the extension and the high kExtensionPrefixBits bits encode the "prefix", which maps uniquely to
681  * the extension name. The sign bit is always 0.
682  *
683  * For example, if a model contains an operation whose value is 0x7AAABBBB and
684  * Model::extensionNameToPrefix contains an entry with prefix=0x7AAA and
685  * name="vendor.test.test_extension", then the operation should be interpreted as the operation
686  * 0xBBBB of the extension named vendor.test.test_extension.
687  *
688  * This is a one-to-one correspondence. That is, there must be at most one prefix corresponding to
689  * each extension name and at most one extension name corresponding to each prefix.
690  */
691 struct ExtensionNameAndPrefix {
692     /**
693      * The extension name.
694      *
695      * See {@link Extension::name} for the format specification.
696      */
697     std::string name;
698 
699     /**
700      * The extension prefix. Only the lowest 15 bits are used, so the value must be less than 32768.
701      */
702     uint16_t prefix = 0;
703 };
704 
705 /**
706  * A Neural Network Model.
707  *
708  * This includes not only the execution graph, but also constant data such as
709  * weights or scalars added at construction time. The only information that
710  * may not be known is the shape of the input tensors.
711  */
712 struct Model {
713     /**
714      * An excerpt of the execution graph.
715      */
716     struct Subgraph {
717         /**
718          * All operands included in the subgraph.
719          */
720         std::vector<Operand> operands;
721 
722         /**
723          * All operations included in the subgraph.
724          *
725          * The operations are sorted into execution order. Every operand
726          * with lifetime SUBGRAPH_OUTPUT or TEMPORARY_VARIABLE must be
727          * written before it is read.
728          */
729         std::vector<Operation> operations;
730 
731         /**
732          * Input indexes of the subgraph. There must be at least one.
733          *
734          * Each value corresponds to the index of the operand in "operands".
735          */
736         std::vector<uint32_t> inputIndexes;
737 
738         /**
739          * Output indexes of the subgraph. There must be at least one.
740          *
741          * Each value corresponds to the index of the operand in "operands".
742          */
743         std::vector<uint32_t> outputIndexes;
744     };
745 
746     class OperandValues {
747        public:
748         OperandValues();
749         OperandValues(const uint8_t* data, size_t length);
750 
751         // Append a segment of memory (starting at `data` with `length` number of bytes) to the back
752         // of `OperandValues`, adding padding as necessary so that the appended data is aligned.
753         // Refer to `getAlignmentForLength` for more information on alignment (such as what the
754         // current alignments are for different data lengths).
755         DataLocation append(const uint8_t* data, size_t length);
756 
757         const uint8_t* data() const;
758         size_t size() const;
759 
760        private:
761         std::vector<uint8_t> mData;
762     };
763 
764     /**
765      * The top-level subgraph.
766      */
767     Subgraph main;
768 
769     /**
770      * Referenced subgraphs.
771      *
772      * Each subgraph is referenced by the main subgraph or at least one other
773      * referenced subgraph.
774      *
775      * There must be no reference cycles.
776      */
777     std::vector<Subgraph> referenced;
778 
779     /**
780      * A byte buffer containing operand data that were copied into the model.
781      *
782      * An operand's value must be located here if and only if Operand::lifetime
783      * equals Operand::LifeTime::CONSTANT_COPY.
784      */
785     OperandValues operandValues;
786 
787     /**
788      * A collection of shared memory pools containing operand values.
789      *
790      * An operand's value must be located here if and only if Operand::lifetime
791      * equals Operand::LifeTime::CONSTANT_REFERENCE.
792      */
793     std::vector<SharedMemory> pools;
794 
795     /**
796      * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
797      * precision as low as that of the IEEE 754 16-bit floating-point format.
798      * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
799      * range and precision of the IEEE 754 32-bit floating-point format.
800      */
801     bool relaxComputationFloat32toFloat16 = false;
802 
803     /**
804      * The mapping between extension names and prefixes of operand and
805      * operation type values.
806      *
807      * An operand or operation whose numeric type value is equal to or greater
808      * than (1 << kExtensionTypeBits) should be interpreted
809      * as an extension operand. The low
810      * {@link kExtensionTypeBits} bits of the value correspond to the type ID
811      * within the extension and the high {@link kExtensionPrefixBits} bits encode
812      * the "prefix", which maps uniquely to the extension name.
813      *
814      * For example, if a model contains an operation whose value is
815      * 0xAAAABBBB and extensionNameToPrefix contains an entry with
816      * prefix=0xAAAA and name="vendor.test.test_extension", then
817      * the operation should be interpreted as the operation 0xBBBB
818      * of the extension named vendor.test.test_extension.
819      *
820      * This is a one-to-one correspondence. That is, there must be at most one
821      * prefix corresponding to each extension name and at most one extension
822      * name corresponding to each prefix.
823      */
824     std::vector<ExtensionNameAndPrefix> extensionNameToPrefix;
825 };
826 
827 /**
828  * A buffer descriptor. Describes the properties of a buffer.
829  */
830 struct BufferDesc {
831     /**
832      * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number
833      * of unspecified dimensions is represented by setting each unspecified dimension to 0. A
834      * buffer with unspecified rank is represented by providing an empty dimensions vector.
835      */
836     Dimensions dimensions;
837 };
838 
839 /**
840  * Describes a role of an input or output to a prepared model.
841  */
842 struct BufferRole {
843     /**
844      * The index of the IPreparedModel within the "preparedModel" argument passed in
845      * IDevice::allocate.
846      */
847     uint32_t modelIndex = 0;
848 
849     /**
850      * The index of the input or output operand.
851      */
852     uint32_t ioIndex = 0;
853 
854     /**
855      * A floating-point value within the range (0.0, 1.0]. Describes how likely the
856      * buffer is to be used in the specified role. This is provided as a hint to
857      * optimize the case when multiple roles prefer different buffer locations or data
858      * layouts.
859      */
860     float probability = 0.0f;
861 };
862 
863 /**
864  * Inputs to be sent to and outputs to be retrieved from a prepared model.
865  *
866  * A Request serves two primary tasks:
867  * 1) Provides the input and output data to be used when executing the model.
868  * 2) Specifies any updates to the input operand metadata that were left
869  *    unspecified at model preparation time.
870  *
871  * An output must not overlap with any other output, with an input, or
872  * with an operand of lifetime CONSTANT_REFERENCE.
873  */
874 struct Request {
875     /**
876      * Metadata information specifying the location of the input or output data and
877      * any updates to the input or output operand.
878      */
879     struct Argument {
880         enum class LifeTime {
881             POOL = 0,
882             NO_VALUE = 1,
883             POINTER = 2,
884         };
885 
886         LifeTime lifetime{};
887 
888         /**
889          * The location within one of the memory pools passed in the Request.
890          */
891         DataLocation location;
892 
893         /**
894          * Updated dimension information.
895          *
896          * If dimensions.size() > 0, dimension information was provided
897          * along with the argument. This can be the case for models that
898          * accept inputs of varying size. This can't change the rank, just
899          * the value of the dimensions that were unspecified in the
900          * model. If dimensions.size() > 0, then all dimensions must be
901          * specified here; and any dimension that was specified in the
902          * model must have the same value here.
903          *
904          * If the dimensions in the model are not fully specified, then
905          * they must be fully specified here, unless hasNoValue is set to
906          * true. If the dimensions in the model are fully specified, then
907          * either dimensions.size() may be 0, or the dimensions in the
908          * model must be identical to the dimensions here.
909          */
910         Dimensions dimensions;
911     };
912 
913     /**
914      * Specifies a driver-managed buffer. It is the token corresponding to an
915      * IBuffer returned from IDevice::allocate, and is specific to the IDevice
916      * object.
917      */
918     enum class MemoryDomainToken : uint32_t {};
919 
920     /**
921      * A memory pool.
922      */
923     using MemoryPool = std::variant<SharedMemory, MemoryDomainToken, SharedBuffer>;
924 
925     /**
926      * Input data and information to be used in the execution of a prepared
927      * model.
928      *
929      * The index of the input corresponds to the index in Model::main::inputIndexes.
930      *   E.g., inputs[i] corresponds to Model::main::inputIndexes[i].
931      */
932     std::vector<Argument> inputs;
933 
934     /**
935      * Output data and information to be used in the execution of a prepared
936      * model.
937      *
938      * The index of the output corresponds to the index in Model::main::outputIndexes.
939      *   E.g., outputs[i] corresponds to Model::main::outputIndexes[i].
940      */
941     std::vector<Argument> outputs;
942 
943     /**
944      * A collection of memory pools containing operand data for both the
945      * inputs and the outputs to a model.
946      */
947     std::vector<MemoryPool> pools;
948 };
949 
950 // Representation of sync_fence.
951 class SyncFence {
952    public:
953     static SyncFence createAsSignaled();
954     static SyncFence create(base::unique_fd fd);
955     static Result<SyncFence> create(SharedHandle syncFence);
956 
957     // The function syncWait() has the same semantics as the system function
958     // ::sync_wait(), except that the syncWait() return value is semantically
959     // richer.
960     enum class FenceState {
961         ACTIVE,    // fence has not been signaled
962         SIGNALED,  // fence has been signaled
963         ERROR,     // fence has been placed in the error state
964         UNKNOWN,   // either bad argument passed to syncWait(), or internal error
965     };
966     using Timeout = std::chrono::duration<int, std::milli>;
967     using OptionalTimeout = std::optional<Timeout>;
968 
969     FenceState syncWait(OptionalTimeout optionalTimeout) const;
970 
971     SharedHandle getSharedHandle() const;
972     bool hasFd() const;
973     int getFd() const;
974 
975    private:
976     explicit SyncFence(SharedHandle syncFence);
977 
978     SharedHandle mSyncFence;
979 };
980 
981 using Clock = base::boot_clock;
982 
983 using Duration = std::chrono::nanoseconds;
984 using OptionalDuration = std::optional<Duration>;
985 
986 using TimePoint = std::chrono::time_point<Clock, Duration>;
987 using OptionalTimePoint = std::optional<TimePoint>;
988 
989 /**
990  * Timing information measured during execution. Each time is a duration from
991  * the beginning of some task to the end of that task, including time when that
992  * task is not active (for example, preempted by some other task, or
993  * waiting for some resource to become available).
994  *
995  * Times are measured in nanoseconds.
996  */
997 struct Timing {
998     /** Execution time on device (not driver, which runs on host processor). */
999     OptionalDuration timeOnDevice;
1000     /** Execution time in driver (including time on device). */
1001     OptionalDuration timeInDriver;
1002 };
1003 
1004 // Returns status, timingLaunched, timingFenced
1005 using ExecuteFencedInfoCallback = std::function<GeneralResult<std::pair<Timing, Timing>>()>;
1006 
1007 // Version is a tuple that contains what NNAPI feature level is supported/required and whether
1008 // runtime-only features are supported/required.
1009 struct Version {
1010     enum class Level : uint8_t {
1011         FEATURE_LEVEL_1,
1012         FEATURE_LEVEL_2,
1013         FEATURE_LEVEL_3,
1014         FEATURE_LEVEL_4,
1015         FEATURE_LEVEL_5,
1016         FEATURE_LEVEL_6,
1017         FEATURE_LEVEL_7,
1018         FEATURE_LEVEL_8,
1019 #ifdef NN_EXPERIMENTAL_FEATURE
1020         FEATURE_LEVEL_EXPERIMENTAL,
1021 #endif  // NN_EXPERIMENTAL_FEATURE
1022     };
1023 
1024     Level level;
1025     bool runtimeOnlyFeatures = false;
1026 };
1027 
1028 constexpr auto kVersionFeatureLevel1 = Version{.level = Version::Level::FEATURE_LEVEL_1};
1029 constexpr auto kVersionFeatureLevel2 = Version{.level = Version::Level::FEATURE_LEVEL_2};
1030 constexpr auto kVersionFeatureLevel3 = Version{.level = Version::Level::FEATURE_LEVEL_3};
1031 constexpr auto kVersionFeatureLevel4 = Version{.level = Version::Level::FEATURE_LEVEL_4};
1032 constexpr auto kVersionFeatureLevel5 = Version{.level = Version::Level::FEATURE_LEVEL_5};
1033 constexpr auto kVersionFeatureLevel6 = Version{.level = Version::Level::FEATURE_LEVEL_6};
1034 constexpr auto kVersionFeatureLevel7 = Version{.level = Version::Level::FEATURE_LEVEL_7};
1035 constexpr auto kVersionFeatureLevel8 = Version{.level = Version::Level::FEATURE_LEVEL_8};
1036 #ifdef NN_EXPERIMENTAL_FEATURE
1037 constexpr auto kVersionFeatureLevelExperimental =
1038         Version{.level = Version::Level::FEATURE_LEVEL_EXPERIMENTAL};
1039 #endif  // NN_EXPERIMENTAL_FEATURE
1040 
1041 // Describes the memory preference of an operand.
1042 struct MemoryPreference {
1043     // Must be a power of 2.
1044     // For pointer buffers, the alignment is satisfied if the address of the pointer is a multiple
1045     // of the "alignment" value. For memory pools, the alignment is satisfied if the offset of the
1046     // sub-region specified by DataLocation is a multiple of the "alignment" value.
1047     uint32_t alignment;
1048     // Must be a power of 2.
1049     // For both pointer buffers and memory pools, the padding is satisfied if the padded length is
1050     // greater than or equal to the raw size of the operand (i.e. the size of an element multiplied
1051     // by the number of elements) rounding up to a multiple of the "padding" value. In DataLocation,
1052     // the padded length equals to the sum of the length and padding fields.
1053     uint32_t padding;
1054 };
1055 
1056 /**
1057  * A type that is used to represent a token / byte array data pair.
1058  */
1059 struct TokenValuePair {
1060     /**
1061      * A 32bit integer token. The token is created by combining the
1062      * extension prefix and enum defined within the extension. Of the 32 bits in the token, the high
1063      * kExtensionPrefixBits bits is the extension prefix and the low kExtensionTypeBits bits
1064      * represents the enum within the extension.
1065      *
1066      * For example, if a token value is 0x7AAA000B and corresponding {@link ExtensionNameAndPrefix}
1067      * contains an entry with prefix=0x7AAA and name="vendor.test.test_extension", then the token
1068      * should be interpreted as the enum value 0x000B of the extension named
1069      * vendor.test.test_extension.
1070      */
1071     int32_t token;
1072     /**
1073      * A byte array containing the raw data.
1074      */
1075     std::vector<uint8_t> value;
1076 };
1077 
1078 }  // namespace android::nn
1079 
1080 #endif  // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H
1081