1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Class used to build a model through a succession of successive calls
18 // to the NN API.
19 
20 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H
21 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H
22 
23 #include <LegacyUtils.h>
24 
25 #include <memory>
26 #include <vector>
27 
28 #include "Memory.h"
29 #include "ModelArchHasher.h"
30 #include "NeuralNetworks.h"
31 
32 namespace android {
33 namespace nn {
34 
35 class CompilationBuilder;
36 class Device;
37 class ExecutionPlan;
38 class RuntimeMemory;
39 
40 class ModelBuilder {
41    public:
ModelBuilder()42     ModelBuilder() {}
43     // Returns an operand/operation type corresponding to a given extension operand/operation type.
44     int getExtensionType(const char* extensionName, uint16_t typeWithinExtension, int32_t* type);
45     // Adds an operand to the model.
46     int addOperand(const ANeuralNetworksOperandType& type);
47     int setOperandValue(uint32_t index, const void* buffer, size_t length);
48     int setOperandValueFromMemory(uint32_t index, const RuntimeMemory* memory, uint32_t offset,
49                                   size_t length);
50     int setOperandValueFromModel(uint32_t index, const ModelBuilder* value);
51     int setOperandSymmPerChannelQuantParams(
52             uint32_t index, const ANeuralNetworksSymmPerChannelQuantParams& extraParams);
53     int setOperandExtensionData(uint32_t index, const void* data, size_t length);
54 
55     int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs,
56                      uint32_t outputCount, const uint32_t* outputs);
57     int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
58                                  const uint32_t* outputs);
59     int relaxComputationFloat32toFloat16(bool allow);
isComputationFloat32RelaxedToFloat16()60     bool isComputationFloat32RelaxedToFloat16() const { return mRelaxComputationFloat32toFloat16; }
61 
62     int finish();
isFinished()63     bool isFinished() const { return mCompletedModel; }
isValid()64     bool isValid() const { return !mInvalidModel; }
65 
hasOEMOperation()66     bool hasOEMOperation() const { return mHasOEMOperation; }
hasExtensionOperation()67     bool hasExtensionOperation() const { return mHasExtensionOperation; }
hasControlFlow()68     bool hasControlFlow() const { return mHasControlFlow; }
69 
70     // explicitDeviceList is true if the list of devices was provided explicitly
71     // via the ANeuralNetworksModel_createForDevices API (which has certain
72     // special semantics) and false otherwise.
73     int createCompilation(CompilationBuilder** compilation,
74                           const std::vector<std::shared_ptr<Device>>& devices,
75                           bool explicitDeviceList = false);
76 
77     Model makeModel() const;
78 
operandCount()79     uint32_t operandCount() const {
80         // We don't allow more than uint32_t worth of operands
81         return static_cast<uint32_t>(mOperands.size());
82     }
operationCount()83     uint32_t operationCount() const {
84         // We don't allow more than uint32_t worth of operations
85         return static_cast<uint32_t>(mOperations.size());
86     }
inputCount()87     uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); }
outputCount()88     uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); }
getInputOperandIndex(uint32_t i)89     uint32_t getInputOperandIndex(uint32_t i) const {
90         CHECK_LT(i, mInputIndexes.size());
91         return mInputIndexes[i];
92     }
getInputOperandIndexes()93     const std::vector<uint32_t>& getInputOperandIndexes() const { return mInputIndexes; }
getInputOperand(uint32_t i)94     const Operand& getInputOperand(uint32_t i) const {
95         uint32_t index = getInputOperandIndex(i);
96         CHECK_LT(index, mOperands.size());
97         return mOperands[index];
98     }
getOutputOperandIndex(uint32_t i)99     uint32_t getOutputOperandIndex(uint32_t i) const {
100         CHECK_LT(i, mOutputIndexes.size());
101         return mOutputIndexes[i];
102     }
getOutputOperandIndexes()103     const std::vector<uint32_t>& getOutputOperandIndexes() const { return mOutputIndexes; }
getOutputOperand(uint32_t i)104     const Operand& getOutputOperand(uint32_t i) const {
105         uint32_t index = getOutputOperandIndex(i);
106         CHECK_LT(index, mOperands.size());
107         return mOperands[index];
108     }
getOperand(uint32_t index)109     const Operand& getOperand(uint32_t index) const { return mOperands[index]; }
getOperation(uint32_t index)110     const Operation& getOperation(uint32_t index) const { return mOperations[index]; }
getMemories()111     const MemoryTracker& getMemories() const { return mMemories; }
getOperations()112     const std::vector<Operation>& getOperations() const { return mOperations; }
getSortedOperationMapping()113     const std::vector<uint32_t>& getSortedOperationMapping() const {
114         return mSortedOperationIndexMap;
115     }
getPointerToOperandValue(uint32_t offset)116     const uint8_t* getPointerToOperandValue(uint32_t offset) const {
117         return mSmallOperandValues.data() + offset;
118     }
referencedModelCount()119     uint32_t referencedModelCount() const {
120         return static_cast<uint32_t>(mReferencedModels.size());
121     }
getReferencedModel(uint32_t i)122     const ModelBuilder* getReferencedModel(uint32_t i) const {
123         CHECK_LT(i, mReferencedModels.size());
124         return mReferencedModels[i];
125     }
getReferencedModel(const Operand & operand)126     const ModelBuilder* getReferencedModel(const Operand& operand) const {
127         CHECK(operand.lifetime == Operand::LifeTime::SUBGRAPH);
128         return getReferencedModel(operand.location.offset);
129     }
130 
131     // simulateFailureResultCode == ANEURALNETWORKS_NO_ERROR means behave normally.
132     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference,
133                          uint32_t priority, const OptionalTimePoint& deadline, ExecutionPlan* plan,
134                          const std::vector<TokenValuePair>& metaData,
135                          int simulateFailureResultCode = ANEURALNETWORKS_NO_ERROR) const;
136 
137     const uint8_t* getModelArchHash() const;
138 
139    private:
140     // TODO(b/132322449): move partitionTheWork, findBestDeviceForEachOperation,
141     // getPerformance, supportedByControlFlowInterpreter,
142     // isControlFlowOperationWithOperandOfUnknownSize, partitionTheWorkInternal,
143     // sortIntoRunOrder to CompilationBuilder?
144 
145     // Populates bestDeviceForOperation
146     //
147     // For 0 <= i < operationCount(), produces
148     //
149     //     0 <= (*bestDeviceForOperation)[i] <= devices.size()
150     //
151     // (*bestDeviceForOperation)[i] == devices.size() is a special value meaning
152     // that this is a control flow operation scheduled for interpreted execution
153     // (see LogicalStep).
154     int findBestDeviceForEachOperation(uint32_t preference,
155                                        const std::vector<std::shared_ptr<Device>>& devices,
156                                        std::vector<int>* bestDeviceForOperation) const;
157     float getPerformance(uint32_t preference, const std::shared_ptr<Device> device) const;
158     float getPerformance(uint32_t preference, const std::shared_ptr<Device> device,
159                          uint32_t operationIndex) const;
160     bool supportedByControlFlowInterpreter(uint32_t operationIndex) const;
161 
162     // Returns true if the operation is IF or WHILE and has an inner or outer
163     // input or output of unknown size.
164     bool isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const;
165 
166     int partitionTheWorkInternal(uint32_t sourceModelIndex,
167                                  const std::vector<std::shared_ptr<Device>>& devices,
168                                  uint32_t preference, uint32_t priority,
169                                  const OptionalTimePoint& deadline, ExecutionPlan* plan) const;
170 
171     // Return true if either mCompleteModel or mInvalidModel is true.
172     bool badState(const char* name);
173 
174     // Removes some trailing operation inputs that are set to default values.
175     //
176     // Some drivers reject operations based on the argument count even when the
177     // optional arguments are set to default values. This transformation enables
178     // more drivers to execute the model. See http://b/147105700.
179     void removeTrailingArgumentsWithDefaultValues();
180     uint32_t getNumTrailingArgumentsToRemove(const Operation& operation) const;
181 
182     // Sorts the operations to be in the correct order for single threaded
183     // node-at-a-time execution.
184     bool sortIntoRunOrder();
185 
186     // Copies the large values to a shared memory, if we have any.
187     int copyLargeValuesToSharedMemory();
188 
189     // Mark that the model should be simplified during ModelBuilder::makeModel, removing arguments
190     // from operations that already match the default values, dead operands, dead pools, dead
191     // subgraphs, and dead extensions.
192     void simplifyModel();
193 
194     // The operations of the graph.
195     std::vector<Operation> mOperations;
196     // The mapping from sorted index to the original index of operations in mOperations.
197     // mSortedOperationIndexMap is empty before sortIntoRunOrder() is called.
198     std::vector<uint32_t> mSortedOperationIndexMap;
199     // Is at least one of those operations an OEM_OPERATION?
200     bool mHasOEMOperation = false;
201     // Is at least one of those operations an extension operation?
202     bool mHasExtensionOperation = false;
203     // The description of the operands of the graph.
204     std::vector<Operand> mOperands;
205     // Is at least one of those operands an OEM operand?
206     bool mHasOEMOperand = false;
207     // The indexes of input operands of the model.
208     std::vector<uint32_t> mInputIndexes;
209     // The indexes of output operands of the model.
210     std::vector<uint32_t> mOutputIndexes;
211     // Whether the model should be simplified during ModelBuilder::makeModel, removing arguments
212     // from operations that already match the default values, dead operands, dead pools, dead
213     // subgraphs, and dead extensions.
214     bool mSimplifyModel = false;
215 
216     MemoryTracker mMemories;
217 
218     // The value of the small operands that are defined at model
219     // creation time.
220     std::vector<uint8_t> mSmallOperandValues;
221 
222     struct LargeValue {
223         uint32_t operandIndex;
224         const void* buffer;
225     };
226     // Operand index and buffer pointer for all the large operand values of this model.
227     std::vector<LargeValue> mLargeOperandValues;
228     // The shared memory region that will contain the large values.
229     std::unique_ptr<MemoryAshmem> mLargeValueMemory;
230 
231     // Once the model has been finished, we should not allow further
232     // modifications to the model.
233     bool mCompletedModel = false;
234 
235     // Any invalid manipulation of the model will mark the model invalid.
236     // No further modifications are allowed to the model.
237     bool mInvalidModel = false;
238 
239     // 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
240     // precision as low as that of the IEEE 754 16-bit floating-point format.
241     // 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
242     // range and precision of the IEEE 754 32-bit floating-point format.
243     bool mRelaxComputationFloat32toFloat16 = false;
244 
245     // Models referenced by operands in this model.
246     std::vector<const ModelBuilder*> mReferencedModels;
247 
248     // Main subgraphs of models referenced by operands in this model. Required
249     // for validateOperation().
250     std::vector<Model::Subgraph> mReferencedSubgraphsForValidation;
251 
252     // Does the model contain control flow operands or operations?
253     bool mHasControlFlow = false;
254 
255     // Model architecture hash, used for telemetry.
256     uint8_t mModelArchHash[BYTE_SIZE_OF_MODEL_ARCH_HASH];
257 
258     class ModelMaker;
259 };
260 
261 }  // namespace nn
262 }  // namespace android
263 
264 #endif  // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H
265