1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Provides C++ classes to more easily use the Neural Networks API.
18 
19 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
20 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
21 
22 #include "NeuralNetworks.h"
23 
24 #include <math.h>
25 #include <algorithm>
26 #include <optional>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 namespace android {
32 namespace nn {
33 namespace wrapper {
34 
35 enum class Type {
36     FLOAT32 = ANEURALNETWORKS_FLOAT32,
37     INT32 = ANEURALNETWORKS_INT32,
38     UINT32 = ANEURALNETWORKS_UINT32,
39     TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
40     TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32,
41     TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
42     BOOL = ANEURALNETWORKS_BOOL,
43     TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
44     TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
45     TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8,
46     FLOAT16 = ANEURALNETWORKS_FLOAT16,
47     TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
48     TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
49     TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
50     MODEL = ANEURALNETWORKS_MODEL,
51 };
52 
53 enum class ExecutePreference {
54     PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER,
55     PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER,
56     PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED
57 };
58 
59 enum class ExecutePriority {
60     LOW = ANEURALNETWORKS_PRIORITY_LOW,
61     MEDIUM = ANEURALNETWORKS_PRIORITY_MEDIUM,
62     HIGH = ANEURALNETWORKS_PRIORITY_HIGH,
63     DEFAULT = ANEURALNETWORKS_PRIORITY_DEFAULT,
64 };
65 
66 enum class Result {
67     NO_ERROR = ANEURALNETWORKS_NO_ERROR,
68     OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY,
69     INCOMPLETE = ANEURALNETWORKS_INCOMPLETE,
70     UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL,
71     BAD_DATA = ANEURALNETWORKS_BAD_DATA,
72     OP_FAILED = ANEURALNETWORKS_OP_FAILED,
73     UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE,
74     BAD_STATE = ANEURALNETWORKS_BAD_STATE,
75     OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE,
76     UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE,
77     MISSED_DEADLINE_TRANSIENT = ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT,
78     MISSED_DEADLINE_PERSISTENT = ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT,
79 };
80 
81 struct SymmPerChannelQuantParams {
82     ANeuralNetworksSymmPerChannelQuantParams params;
83     std::vector<float> scales;
84 
SymmPerChannelQuantParamsSymmPerChannelQuantParams85     SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim)
86         : scales(std::move(scalesVec)) {
87         params = {
88                 .channelDim = channelDim,
89                 .scaleCount = static_cast<uint32_t>(scales.size()),
90                 .scales = scales.size() > 0 ? scales.data() : nullptr,
91         };
92     }
93 
SymmPerChannelQuantParamsSymmPerChannelQuantParams94     SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other)
95         : params(other.params), scales(other.scales) {
96         params.scales = scales.size() > 0 ? scales.data() : nullptr;
97     }
98 
99     SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) {
100         if (this != &other) {
101             params = other.params;
102             scales = other.scales;
103             params.scales = scales.size() > 0 ? scales.data() : nullptr;
104         }
105         return *this;
106     }
107 };
108 
109 struct OperandType {
110     ANeuralNetworksOperandType operandType;
111     std::vector<uint32_t> dimensions;
112     std::optional<SymmPerChannelQuantParams> channelQuant;
113 
OperandTypeOperandType114     OperandType(const OperandType& other)
115         : operandType(other.operandType),
116           dimensions(other.dimensions),
117           channelQuant(other.channelQuant) {
118         operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
119     }
120 
121     OperandType& operator=(const OperandType& other) {
122         if (this != &other) {
123             operandType = other.operandType;
124             dimensions = other.dimensions;
125             channelQuant = other.channelQuant;
126             operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
127         }
128         return *this;
129     }
130 
131     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
dimensionsOperandType132         : dimensions(std::move(d)), channelQuant(std::nullopt) {
133         operandType = {
134                 .type = static_cast<int32_t>(type),
135                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
136                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
137                 .scale = scale,
138                 .zeroPoint = zeroPoint,
139         };
140     }
141 
OperandTypeOperandType142     OperandType(Type type, std::vector<uint32_t> data, SymmPerChannelQuantParams&& channelQuant)
143         : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) {
144         assert(type == Type::TENSOR_QUANT8_SYMM_PER_CHANNEL);
145 
146         operandType = {
147                 .type = static_cast<int32_t>(type),
148                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
149                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
150                 .scale = 0.0f,
151                 .zeroPoint = 0,
152         };
153     }
154 };
155 
156 class Memory {
157    public:
Memory(size_t size,int protect,int fd,size_t offset)158     Memory(size_t size, int protect, int fd, size_t offset) {
159         mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
160                  ANEURALNETWORKS_NO_ERROR;
161     }
162 
Memory(AHardwareBuffer * buffer)163     Memory(AHardwareBuffer* buffer) {
164         mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
165                  ANEURALNETWORKS_NO_ERROR;
166     }
167 
~Memory()168     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
169 
170     // Disallow copy semantics to ensure the runtime object can only be freed
171     // once. Copy semantics could be enabled if some sort of reference counting
172     // or deep-copy system for runtime objects is added later.
173     Memory(const Memory&) = delete;
174     Memory& operator=(const Memory&) = delete;
175 
176     // Move semantics to remove access to the runtime object from the wrapper
177     // object that is being moved. This ensures the runtime object will be
178     // freed only once.
Memory(Memory && other)179     Memory(Memory&& other) { *this = std::move(other); }
180     Memory& operator=(Memory&& other) {
181         if (this != &other) {
182             ANeuralNetworksMemory_free(mMemory);
183             mMemory = other.mMemory;
184             mValid = other.mValid;
185             other.mMemory = nullptr;
186             other.mValid = false;
187         }
188         return *this;
189     }
190 
get()191     ANeuralNetworksMemory* get() const { return mMemory; }
isValid()192     bool isValid() const { return mValid; }
193 
194    private:
195     ANeuralNetworksMemory* mMemory = nullptr;
196     bool mValid = true;
197 };
198 
199 class Model {
200    public:
Model()201     Model() {
202         // TODO handle the value returned by this call
203         ANeuralNetworksModel_create(&mModel);
204     }
~Model()205     ~Model() { ANeuralNetworksModel_free(mModel); }
206 
207     // Disallow copy semantics to ensure the runtime object can only be freed
208     // once. Copy semantics could be enabled if some sort of reference counting
209     // or deep-copy system for runtime objects is added later.
210     Model(const Model&) = delete;
211     Model& operator=(const Model&) = delete;
212 
213     // Move semantics to remove access to the runtime object from the wrapper
214     // object that is being moved. This ensures the runtime object will be
215     // freed only once.
Model(Model && other)216     Model(Model&& other) { *this = std::move(other); }
217     Model& operator=(Model&& other) {
218         if (this != &other) {
219             ANeuralNetworksModel_free(mModel);
220             mModel = other.mModel;
221             mNextOperandId = other.mNextOperandId;
222             mValid = other.mValid;
223             other.mModel = nullptr;
224             other.mNextOperandId = 0;
225             other.mValid = false;
226         }
227         return *this;
228     }
229 
finish()230     Result finish() {
231         if (mValid) {
232             auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
233             if (result != Result::NO_ERROR) {
234                 mValid = false;
235             }
236             return result;
237         } else {
238             return Result::BAD_STATE;
239         }
240     }
241 
addOperand(const OperandType * type)242     uint32_t addOperand(const OperandType* type) {
243         if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
244             ANEURALNETWORKS_NO_ERROR) {
245             mValid = false;
246         }
247         if (type->channelQuant) {
248             if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
249                         mModel, mNextOperandId, &type->channelQuant.value().params) !=
250                 ANEURALNETWORKS_NO_ERROR) {
251                 mValid = false;
252             }
253         }
254         return mNextOperandId++;
255     }
256 
setOperandValue(uint32_t index,const void * buffer,size_t length)257     void setOperandValue(uint32_t index, const void* buffer, size_t length) {
258         if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
259             ANEURALNETWORKS_NO_ERROR) {
260             mValid = false;
261         }
262     }
263 
setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)264     void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
265                                    size_t length) {
266         if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
267                                                            length) != ANEURALNETWORKS_NO_ERROR) {
268             mValid = false;
269         }
270     }
271 
addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)272     void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
273                       const std::vector<uint32_t>& outputs) {
274         if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
275                                               inputs.data(), static_cast<uint32_t>(outputs.size()),
276                                               outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
277             mValid = false;
278         }
279     }
identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)280     void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
281                                   const std::vector<uint32_t>& outputs) {
282         if (ANeuralNetworksModel_identifyInputsAndOutputs(
283                     mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
284                     static_cast<uint32_t>(outputs.size()),
285                     outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
286             mValid = false;
287         }
288     }
289 
relaxComputationFloat32toFloat16(bool isRelax)290     void relaxComputationFloat32toFloat16(bool isRelax) {
291         if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
292             ANEURALNETWORKS_NO_ERROR) {
293             mRelaxed = isRelax;
294         }
295     }
296 
getHandle()297     ANeuralNetworksModel* getHandle() const { return mModel; }
isValid()298     bool isValid() const { return mValid; }
isRelaxed()299     bool isRelaxed() const { return mRelaxed; }
300 
301    protected:
302     ANeuralNetworksModel* mModel = nullptr;
303     // We keep track of the operand ID as a convenience to the caller.
304     uint32_t mNextOperandId = 0;
305     bool mValid = true;
306     bool mRelaxed = false;
307 };
308 
309 class Event {
310    public:
Event()311     Event() {}
~Event()312     ~Event() { ANeuralNetworksEvent_free(mEvent); }
313 
314     // Disallow copy semantics to ensure the runtime object can only be freed
315     // once. Copy semantics could be enabled if some sort of reference counting
316     // or deep-copy system for runtime objects is added later.
317     Event(const Event&) = delete;
318     Event& operator=(const Event&) = delete;
319 
320     // Move semantics to remove access to the runtime object from the wrapper
321     // object that is being moved. This ensures the runtime object will be
322     // freed only once.
Event(Event && other)323     Event(Event&& other) { *this = std::move(other); }
324     Event& operator=(Event&& other) {
325         if (this != &other) {
326             ANeuralNetworksEvent_free(mEvent);
327             mEvent = other.mEvent;
328             other.mEvent = nullptr;
329         }
330         return *this;
331     }
332 
wait()333     Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); }
334 
335     // Only for use by Execution
set(ANeuralNetworksEvent * newEvent)336     void set(ANeuralNetworksEvent* newEvent) {
337         ANeuralNetworksEvent_free(mEvent);
338         mEvent = newEvent;
339     }
340 
341     // Only for use by Execution
getHandle()342     ANeuralNetworksEvent* getHandle() const { return mEvent; }
343 
344    private:
345     ANeuralNetworksEvent* mEvent = nullptr;
346 };
347 
348 class Compilation {
349    public:
Compilation(const Model * model)350     Compilation(const Model* model) {
351         int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation);
352         if (result != 0) {
353             // TODO Handle the error
354         }
355     }
356 
~Compilation()357     ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); }
358 
359     // Disallow copy semantics to ensure the runtime object can only be freed
360     // once. Copy semantics could be enabled if some sort of reference counting
361     // or deep-copy system for runtime objects is added later.
362     Compilation(const Compilation&) = delete;
363     Compilation& operator=(const Compilation&) = delete;
364 
365     // Move semantics to remove access to the runtime object from the wrapper
366     // object that is being moved. This ensures the runtime object will be
367     // freed only once.
Compilation(Compilation && other)368     Compilation(Compilation&& other) { *this = std::move(other); }
369     Compilation& operator=(Compilation&& other) {
370         if (this != &other) {
371             ANeuralNetworksCompilation_free(mCompilation);
372             mCompilation = other.mCompilation;
373             other.mCompilation = nullptr;
374         }
375         return *this;
376     }
377 
setPreference(ExecutePreference preference)378     Result setPreference(ExecutePreference preference) {
379         return static_cast<Result>(ANeuralNetworksCompilation_setPreference(
380                 mCompilation, static_cast<int32_t>(preference)));
381     }
382 
setPriority(ExecutePriority priority)383     Result setPriority(ExecutePriority priority) {
384         return static_cast<Result>(ANeuralNetworksCompilation_setPriority(
385                 mCompilation, static_cast<int32_t>(priority)));
386     }
387 
setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)388     Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) {
389         if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) {
390             return Result::BAD_DATA;
391         }
392         return static_cast<Result>(ANeuralNetworksCompilation_setCaching(
393                 mCompilation, cacheDir.c_str(), token.data()));
394     }
395 
finish()396     Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); }
397 
getHandle()398     ANeuralNetworksCompilation* getHandle() const { return mCompilation; }
399 
400    private:
401     ANeuralNetworksCompilation* mCompilation = nullptr;
402 };
403 
404 class Execution {
405    public:
Execution(const Compilation * compilation)406     Execution(const Compilation* compilation) {
407         int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution);
408         if (result != 0) {
409             // TODO Handle the error
410         }
411     }
412 
~Execution()413     ~Execution() { ANeuralNetworksExecution_free(mExecution); }
414 
415     // Disallow copy semantics to ensure the runtime object can only be freed
416     // once. Copy semantics could be enabled if some sort of reference counting
417     // or deep-copy system for runtime objects is added later.
418     Execution(const Execution&) = delete;
419     Execution& operator=(const Execution&) = delete;
420 
421     // Move semantics to remove access to the runtime object from the wrapper
422     // object that is being moved. This ensures the runtime object will be
423     // freed only once.
Execution(Execution && other)424     Execution(Execution&& other) { *this = std::move(other); }
425     Execution& operator=(Execution&& other) {
426         if (this != &other) {
427             ANeuralNetworksExecution_free(mExecution);
428             mExecution = other.mExecution;
429             other.mExecution = nullptr;
430         }
431         return *this;
432     }
433 
434     Result setInput(uint32_t index, const void* buffer, size_t length,
435                     const ANeuralNetworksOperandType* type = nullptr) {
436         return static_cast<Result>(
437                 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length));
438     }
439 
440     Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
441                               uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
442         return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory(
443                 mExecution, index, type, memory->get(), offset, length));
444     }
445 
446     Result setOutput(uint32_t index, void* buffer, size_t length,
447                      const ANeuralNetworksOperandType* type = nullptr) {
448         return static_cast<Result>(
449                 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length));
450     }
451 
452     Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
453                                uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
454         return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory(
455                 mExecution, index, type, memory->get(), offset, length));
456     }
457 
startCompute(Event * event)458     Result startCompute(Event* event) {
459         ANeuralNetworksEvent* ev = nullptr;
460         Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev));
461         event->set(ev);
462         return result;
463     }
464 
startComputeWithDependencies(const std::vector<const Event * > & dependencies,uint64_t duration,Event * event)465     Result startComputeWithDependencies(const std::vector<const Event*>& dependencies,
466                                         uint64_t duration, Event* event) {
467         std::vector<const ANeuralNetworksEvent*> deps(dependencies.size());
468         std::transform(dependencies.begin(), dependencies.end(), deps.begin(),
469                        [](const Event* e) { return e->getHandle(); });
470         ANeuralNetworksEvent* ev = nullptr;
471         Result result = static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies(
472                 mExecution, deps.data(), deps.size(), duration, &ev));
473         event->set(ev);
474         return result;
475     }
476 
compute()477     Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
478 
getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)479     Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
480         uint32_t rank = 0;
481         Result result = static_cast<Result>(
482                 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
483         dimensions->resize(rank);
484         if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
485             rank == 0) {
486             return result;
487         }
488         result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
489                 mExecution, index, dimensions->data()));
490         return result;
491     }
492 
493    private:
494     ANeuralNetworksExecution* mExecution = nullptr;
495 };
496 
497 }  // namespace wrapper
498 }  // namespace nn
499 }  // namespace android
500 
501 #endif  //  ANDROID_FRAMEWORKS_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
502