1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Provides C++ classes to more easily use the Neural Networks API.
18 
19 #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
20 #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
21 
22 #include "NeuralNetworks.h"
23 
24 #include <math.h>
25 #include <optional>
26 #include <string>
27 #include <vector>
28 
29 namespace android {
30 namespace nn {
31 namespace wrapper {
32 
33 enum class Type {
34     FLOAT32 = ANEURALNETWORKS_FLOAT32,
35     INT32 = ANEURALNETWORKS_INT32,
36     UINT32 = ANEURALNETWORKS_UINT32,
37     TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
38     TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32,
39     TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
40     BOOL = ANEURALNETWORKS_BOOL,
41     TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
42     TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
43     TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8,
44     FLOAT16 = ANEURALNETWORKS_FLOAT16,
45     TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
46     TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
47     TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
48 };
49 
50 enum class ExecutePreference {
51     PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER,
52     PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER,
53     PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED
54 };
55 
56 enum class Result {
57     NO_ERROR = ANEURALNETWORKS_NO_ERROR,
58     OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY,
59     INCOMPLETE = ANEURALNETWORKS_INCOMPLETE,
60     UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL,
61     BAD_DATA = ANEURALNETWORKS_BAD_DATA,
62     OP_FAILED = ANEURALNETWORKS_OP_FAILED,
63     UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE,
64     BAD_STATE = ANEURALNETWORKS_BAD_STATE,
65     OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE,
66     UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE,
67 };
68 
69 struct SymmPerChannelQuantParams {
70     ANeuralNetworksSymmPerChannelQuantParams params;
71     std::vector<float> scales;
72 
SymmPerChannelQuantParamsSymmPerChannelQuantParams73     SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim)
74         : scales(std::move(scalesVec)) {
75         params = {
76                 .channelDim = channelDim,
77                 .scaleCount = static_cast<uint32_t>(scales.size()),
78                 .scales = scales.size() > 0 ? scales.data() : nullptr,
79         };
80     }
81 
SymmPerChannelQuantParamsSymmPerChannelQuantParams82     SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other)
83         : params(other.params), scales(other.scales) {
84         params.scales = scales.size() > 0 ? scales.data() : nullptr;
85     }
86 
87     SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) {
88         if (this != &other) {
89             params = other.params;
90             scales = other.scales;
91             params.scales = scales.size() > 0 ? scales.data() : nullptr;
92         }
93         return *this;
94     }
95 };
96 
97 struct OperandType {
98     ANeuralNetworksOperandType operandType;
99     std::vector<uint32_t> dimensions;
100     std::optional<SymmPerChannelQuantParams> channelQuant;
101 
OperandTypeOperandType102     OperandType(const OperandType& other)
103         : operandType(other.operandType),
104           dimensions(other.dimensions),
105           channelQuant(other.channelQuant) {
106         operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
107     }
108 
109     OperandType& operator=(const OperandType& other) {
110         if (this != &other) {
111             operandType = other.operandType;
112             dimensions = other.dimensions;
113             channelQuant = other.channelQuant;
114             operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
115         }
116         return *this;
117     }
118 
119     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
dimensionsOperandType120         : dimensions(std::move(d)), channelQuant(std::nullopt) {
121         operandType = {
122                 .type = static_cast<int32_t>(type),
123                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
124                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
125                 .scale = scale,
126                 .zeroPoint = zeroPoint,
127         };
128     }
129 
OperandTypeOperandType130     OperandType(Type type, std::vector<uint32_t> data, float scale, int32_t zeroPoint,
131                 SymmPerChannelQuantParams&& channelQuant)
132         : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) {
133         operandType = {
134                 .type = static_cast<int32_t>(type),
135                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
136                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
137                 .scale = scale,
138                 .zeroPoint = zeroPoint,
139         };
140     }
141 };
142 
143 class Memory {
144    public:
Memory(size_t size,int protect,int fd,size_t offset)145     Memory(size_t size, int protect, int fd, size_t offset) {
146         mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
147                  ANEURALNETWORKS_NO_ERROR;
148     }
149 
Memory(AHardwareBuffer * buffer)150     Memory(AHardwareBuffer* buffer) {
151         mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
152                  ANEURALNETWORKS_NO_ERROR;
153     }
154 
~Memory()155     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
156 
157     // Disallow copy semantics to ensure the runtime object can only be freed
158     // once. Copy semantics could be enabled if some sort of reference counting
159     // or deep-copy system for runtime objects is added later.
160     Memory(const Memory&) = delete;
161     Memory& operator=(const Memory&) = delete;
162 
163     // Move semantics to remove access to the runtime object from the wrapper
164     // object that is being moved. This ensures the runtime object will be
165     // freed only once.
Memory(Memory && other)166     Memory(Memory&& other) { *this = std::move(other); }
167     Memory& operator=(Memory&& other) {
168         if (this != &other) {
169             ANeuralNetworksMemory_free(mMemory);
170             mMemory = other.mMemory;
171             mValid = other.mValid;
172             other.mMemory = nullptr;
173             other.mValid = false;
174         }
175         return *this;
176     }
177 
get()178     ANeuralNetworksMemory* get() const { return mMemory; }
isValid()179     bool isValid() const { return mValid; }
180 
181    private:
182     ANeuralNetworksMemory* mMemory = nullptr;
183     bool mValid = true;
184 };
185 
186 class Model {
187    public:
Model()188     Model() {
189         // TODO handle the value returned by this call
190         ANeuralNetworksModel_create(&mModel);
191     }
~Model()192     ~Model() { ANeuralNetworksModel_free(mModel); }
193 
194     // Disallow copy semantics to ensure the runtime object can only be freed
195     // once. Copy semantics could be enabled if some sort of reference counting
196     // or deep-copy system for runtime objects is added later.
197     Model(const Model&) = delete;
198     Model& operator=(const Model&) = delete;
199 
200     // Move semantics to remove access to the runtime object from the wrapper
201     // object that is being moved. This ensures the runtime object will be
202     // freed only once.
Model(Model && other)203     Model(Model&& other) { *this = std::move(other); }
204     Model& operator=(Model&& other) {
205         if (this != &other) {
206             ANeuralNetworksModel_free(mModel);
207             mModel = other.mModel;
208             mNextOperandId = other.mNextOperandId;
209             mValid = other.mValid;
210             other.mModel = nullptr;
211             other.mNextOperandId = 0;
212             other.mValid = false;
213         }
214         return *this;
215     }
216 
finish()217     Result finish() {
218         if (mValid) {
219             auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
220             if (result != Result::NO_ERROR) {
221                 mValid = false;
222             }
223             return result;
224         } else {
225             return Result::BAD_STATE;
226         }
227     }
228 
addOperand(const OperandType * type)229     uint32_t addOperand(const OperandType* type) {
230         if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
231             ANEURALNETWORKS_NO_ERROR) {
232             mValid = false;
233         }
234         if (type->channelQuant) {
235             if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
236                         mModel, mNextOperandId, &type->channelQuant.value().params) !=
237                 ANEURALNETWORKS_NO_ERROR) {
238                 mValid = false;
239             }
240         }
241         return mNextOperandId++;
242     }
243 
setOperandValue(uint32_t index,const void * buffer,size_t length)244     void setOperandValue(uint32_t index, const void* buffer, size_t length) {
245         if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
246             ANEURALNETWORKS_NO_ERROR) {
247             mValid = false;
248         }
249     }
250 
setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)251     void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
252                                    size_t length) {
253         if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
254                                                            length) != ANEURALNETWORKS_NO_ERROR) {
255             mValid = false;
256         }
257     }
258 
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)259     void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
260                       const std::vector<uint32_t>& outputs) {
261         if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
262                                               inputs.data(), static_cast<uint32_t>(outputs.size()),
263                                               outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
264             mValid = false;
265         }
266     }
identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)267     void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
268                                   const std::vector<uint32_t>& outputs) {
269         if (ANeuralNetworksModel_identifyInputsAndOutputs(
270                     mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
271                     static_cast<uint32_t>(outputs.size()),
272                     outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
273             mValid = false;
274         }
275     }
276 
relaxComputationFloat32toFloat16(bool isRelax)277     void relaxComputationFloat32toFloat16(bool isRelax) {
278         if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
279             ANEURALNETWORKS_NO_ERROR) {
280             mRelaxed = isRelax;
281         }
282     }
283 
getHandle()284     ANeuralNetworksModel* getHandle() const { return mModel; }
isValid()285     bool isValid() const { return mValid; }
isRelaxed()286     bool isRelaxed() const { return mRelaxed; }
287 
288    protected:
289     ANeuralNetworksModel* mModel = nullptr;
290     // We keep track of the operand ID as a convenience to the caller.
291     uint32_t mNextOperandId = 0;
292     bool mValid = true;
293     bool mRelaxed = false;
294 };
295 
296 class Event {
297    public:
Event()298     Event() {}
~Event()299     ~Event() { ANeuralNetworksEvent_free(mEvent); }
300 
301     // Disallow copy semantics to ensure the runtime object can only be freed
302     // once. Copy semantics could be enabled if some sort of reference counting
303     // or deep-copy system for runtime objects is added later.
304     Event(const Event&) = delete;
305     Event& operator=(const Event&) = delete;
306 
307     // Move semantics to remove access to the runtime object from the wrapper
308     // object that is being moved. This ensures the runtime object will be
309     // freed only once.
Event(Event && other)310     Event(Event&& other) { *this = std::move(other); }
311     Event& operator=(Event&& other) {
312         if (this != &other) {
313             ANeuralNetworksEvent_free(mEvent);
314             mEvent = other.mEvent;
315             other.mEvent = nullptr;
316         }
317         return *this;
318     }
319 
wait()320     Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); }
321 
322     // Only for use by Execution
set(ANeuralNetworksEvent * newEvent)323     void set(ANeuralNetworksEvent* newEvent) {
324         ANeuralNetworksEvent_free(mEvent);
325         mEvent = newEvent;
326     }
327 
328    private:
329     ANeuralNetworksEvent* mEvent = nullptr;
330 };
331 
332 class Compilation {
333    public:
Compilation(const Model * model)334     Compilation(const Model* model) {
335         int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation);
336         if (result != 0) {
337             // TODO Handle the error
338         }
339     }
340 
~Compilation()341     ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); }
342 
343     // Disallow copy semantics to ensure the runtime object can only be freed
344     // once. Copy semantics could be enabled if some sort of reference counting
345     // or deep-copy system for runtime objects is added later.
346     Compilation(const Compilation&) = delete;
347     Compilation& operator=(const Compilation&) = delete;
348 
349     // Move semantics to remove access to the runtime object from the wrapper
350     // object that is being moved. This ensures the runtime object will be
351     // freed only once.
Compilation(Compilation && other)352     Compilation(Compilation&& other) { *this = std::move(other); }
353     Compilation& operator=(Compilation&& other) {
354         if (this != &other) {
355             ANeuralNetworksCompilation_free(mCompilation);
356             mCompilation = other.mCompilation;
357             other.mCompilation = nullptr;
358         }
359         return *this;
360     }
361 
setPreference(ExecutePreference preference)362     Result setPreference(ExecutePreference preference) {
363         return static_cast<Result>(ANeuralNetworksCompilation_setPreference(
364                 mCompilation, static_cast<int32_t>(preference)));
365     }
366 
setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)367     Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) {
368         if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) {
369             return Result::BAD_DATA;
370         }
371         return static_cast<Result>(ANeuralNetworksCompilation_setCaching(
372                 mCompilation, cacheDir.c_str(), token.data()));
373     }
374 
finish()375     Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); }
376 
getHandle()377     ANeuralNetworksCompilation* getHandle() const { return mCompilation; }
378 
379    private:
380     ANeuralNetworksCompilation* mCompilation = nullptr;
381 };
382 
383 class Execution {
384    public:
Execution(const Compilation * compilation)385     Execution(const Compilation* compilation) {
386         int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution);
387         if (result != 0) {
388             // TODO Handle the error
389         }
390     }
391 
~Execution()392     ~Execution() { ANeuralNetworksExecution_free(mExecution); }
393 
394     // Disallow copy semantics to ensure the runtime object can only be freed
395     // once. Copy semantics could be enabled if some sort of reference counting
396     // or deep-copy system for runtime objects is added later.
397     Execution(const Execution&) = delete;
398     Execution& operator=(const Execution&) = delete;
399 
400     // Move semantics to remove access to the runtime object from the wrapper
401     // object that is being moved. This ensures the runtime object will be
402     // freed only once.
Execution(Execution && other)403     Execution(Execution&& other) { *this = std::move(other); }
404     Execution& operator=(Execution&& other) {
405         if (this != &other) {
406             ANeuralNetworksExecution_free(mExecution);
407             mExecution = other.mExecution;
408             other.mExecution = nullptr;
409         }
410         return *this;
411     }
412 
413     Result setInput(uint32_t index, const void* buffer, size_t length,
414                     const ANeuralNetworksOperandType* type = nullptr) {
415         return static_cast<Result>(
416                 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length));
417     }
418 
419     Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
420                               uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
421         return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory(
422                 mExecution, index, type, memory->get(), offset, length));
423     }
424 
425     Result setOutput(uint32_t index, void* buffer, size_t length,
426                      const ANeuralNetworksOperandType* type = nullptr) {
427         return static_cast<Result>(
428                 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length));
429     }
430 
431     Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
432                                uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
433         return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory(
434                 mExecution, index, type, memory->get(), offset, length));
435     }
436 
startCompute(Event * event)437     Result startCompute(Event* event) {
438         ANeuralNetworksEvent* ev = nullptr;
439         Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev));
440         event->set(ev);
441         return result;
442     }
443 
compute()444     Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
445 
getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)446     Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
447         uint32_t rank = 0;
448         Result result = static_cast<Result>(
449                 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
450         dimensions->resize(rank);
451         if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
452             rank == 0) {
453             return result;
454         }
455         result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
456                 mExecution, index, dimensions->data()));
457         return result;
458     }
459 
460    private:
461     ANeuralNetworksExecution* mExecution = nullptr;
462 };
463 
464 }  // namespace wrapper
465 }  // namespace nn
466 }  // namespace android
467 
468 #endif  //  ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
469