1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Contains the implementation of the operations.
18 
19 #define LOG_TAG "Operations"
20 
21 #include "CpuOperationUtils.h"
22 #include "Operations.h"
23 
24 #include "tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h"
25 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
26 
27 #include "Tracing.h"
28 
29 namespace android {
30 namespace nn {
31 
copyData(const void * inputData,const Shape & inputShape,void * outputData,const Shape & outputShape)32 bool copyData(const void* inputData, const Shape& inputShape, void* outputData,
33               const Shape& outputShape) {
34     NNTRACE_COMP("copyData");
35     size_t count = nonExtensionOperandSizeOfData(inputShape.type, inputShape.dimensions);
36     memcpy(outputData, inputData, count);
37     return true;
38 }
39 
40 template <typename T>
depthToSpaceGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)41 bool depthToSpaceGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
42                          T* outputData, const Shape& outputShape) {
43     NNTRACE_COMP("optimized_ops::DepthToSpace");
44     tflite::optimized_ops::DepthToSpace(inputData, convertShapeToDims(inputShape), blockSize,
45                                         outputData, convertShapeToDims(outputShape));
46     return true;
47 }
48 template bool depthToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
49                                          int32_t blockSize, float* outputData,
50                                          const Shape& outputShape);
51 template bool depthToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
52                                             int32_t blockSize, _Float16* outputData,
53                                             const Shape& outputShape);
54 template bool depthToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
55                                            int32_t blockSize, uint8_t* outputData,
56                                            const Shape& outputShape);
57 
58 template <typename T>
spaceToDepthGeneric(const T * inputData,const Shape & inputShape,int32_t blockSize,T * outputData,const Shape & outputShape)59 bool spaceToDepthGeneric(const T* inputData, const Shape& inputShape, int32_t blockSize,
60                          T* outputData, const Shape& outputShape) {
61     NNTRACE_COMP("optimized_ops::SpaceToDepth");
62     tflite::optimized_ops::SpaceToDepth(inputData, convertShapeToDims(inputShape), blockSize,
63                                         outputData, convertShapeToDims(outputShape));
64     return true;
65 }
66 template bool spaceToDepthGeneric<float>(const float* inputData, const Shape& inputShape,
67                                          int32_t blockSize, float* outputData,
68                                          const Shape& outputShape);
69 template bool spaceToDepthGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
70                                             int32_t blockSize, _Float16* outputData,
71                                             const Shape& outputShape);
72 template bool spaceToDepthGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
73                                            int32_t blockSize, uint8_t* outputData,
74                                            const Shape& outputShape);
75 
76 template <typename T>
padGeneric(const T * inputData,const Shape & inputShape,const int32_t * paddings,T padValue,T * outputData,const Shape & outputShape)77 bool padGeneric(const T* inputData, const Shape& inputShape, const int32_t* paddings, T padValue,
78                 T* outputData, const Shape& outputShape) {
79     NNTRACE_TRANS("padGeneric");
80 
81     // Based on
82     // http://google3/third_party/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h?l=6194&rcl=213557260
83 
84     // TFLite runtime calls are currently fixed at 4 dimensions. Copy inputs so
85     // we can pad them to 4 dims (yes, we are "padding the padding").
86     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(inputShape));
87     NN_OPS_CHECK(numInputDims <= 4);
88     std::vector<int> leftPaddings(4 - numInputDims, 0);
89     std::vector<int> rightPaddings(4 - numInputDims, 0);
90     for (int32_t i = 0; i < numInputDims; ++i) {
91         leftPaddings.push_back(paddings[i * 2]);
92         rightPaddings.push_back(paddings[i * 2 + 1]);
93     }
94     const int leftBPadding = leftPaddings[0];
95     const int leftHPadding = leftPaddings[1];
96     const int leftWPadding = leftPaddings[2];
97     const int leftDPadding = leftPaddings[3];
98     const int rightBPadding = rightPaddings[0];
99     const int rightHPadding = rightPaddings[1];
100     const int rightWPadding = rightPaddings[2];
101     const int rightDPadding = rightPaddings[3];
102 
103     const auto extInputShape =
104             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(inputShape));
105     const auto extOutputShape =
106             tflite::RuntimeShape::ExtendedShape(4, convertShapeToTflshape(outputShape));
107 
108     const int outputBatch = extOutputShape.Dims(0);
109     const int outputHeight = extOutputShape.Dims(1);
110     const int outputWidth = extOutputShape.Dims(2);
111     const int outputDepth = extOutputShape.Dims(3);
112 
113     const int inputDepth = extInputShape.Dims(3);
114 
115     NNTRACE_COMP_SWITCH("padGeneric");
116 
117     if (leftBPadding != 0) {
118         tflite::optimized_ops::TypedMemset<T>(
119                 outputData, padValue, leftBPadding * outputHeight * outputWidth * outputDepth);
120     }
121     for (int outB = leftBPadding; outB < outputBatch - rightBPadding; ++outB) {
122         if (leftHPadding != 0) {
123             tflite::optimized_ops::TypedMemset<T>(
124                     outputData + tflite::Offset(extOutputShape, outB, 0, 0, 0), padValue,
125                     leftHPadding * outputWidth * outputDepth);
126         }
127         for (int outH = leftHPadding; outH < outputHeight - rightHPadding; ++outH) {
128             if (leftWPadding != 0) {
129                 tflite::optimized_ops::TypedMemset<T>(
130                         outputData + tflite::Offset(extOutputShape, outB, outH, 0, 0), padValue,
131                         leftWPadding * outputDepth);
132             }
133             for (int outW = leftWPadding; outW < outputWidth - rightWPadding; ++outW) {
134                 if (leftDPadding != 0) {
135                     tflite::optimized_ops::TypedMemset<T>(
136                             outputData + tflite::Offset(extOutputShape, outB, outH, outW, 0),
137                             padValue, leftDPadding);
138                 }
139 
140                 T* out =
141                         outputData + tflite::Offset(extOutputShape, outB, outH, outW, leftDPadding);
142                 const T* in =
143                         inputData + tflite::Offset(extInputShape, outB - leftBPadding,
144                                                    outH - leftHPadding, outW - leftWPadding, 0);
145                 memcpy(out, in, inputDepth * sizeof(T));
146 
147                 if (rightDPadding != 0) {
148                     tflite::optimized_ops::TypedMemset<T>(
149                             outputData + tflite::Offset(extOutputShape, outB, outH, outW,
150                                                         outputDepth - rightDPadding),
151                             padValue, rightDPadding);
152                 }
153             }
154             if (rightWPadding != 0) {
155                 tflite::optimized_ops::TypedMemset<T>(
156                         outputData + tflite::Offset(extOutputShape, outB, outH,
157                                                     outputWidth - rightWPadding, 0),
158                         padValue, rightWPadding * outputDepth);
159             }
160         }
161         if (rightHPadding != 0) {
162             tflite::optimized_ops::TypedMemset<T>(
163                     outputData + tflite::Offset(extOutputShape, outB, outputHeight - rightHPadding,
164                                                 0, 0),
165                     padValue, rightHPadding * outputWidth * outputDepth);
166         }
167     }
168     if (rightBPadding != 0) {
169         tflite::optimized_ops::TypedMemset<T>(
170                 outputData + tflite::Offset(extOutputShape, outputBatch - rightBPadding, 0, 0, 0),
171                 padValue, rightBPadding * outputHeight * outputWidth * outputDepth);
172     }
173 
174     return true;
175 }
176 template bool padGeneric<float>(const float* inputData, const Shape& inputShape,
177                                 const int32_t* paddings, float padValue, float* outputData,
178                                 const Shape& outputShape);
179 template bool padGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
180                                    const int32_t* paddings, _Float16 padValue, _Float16* outputData,
181                                    const Shape& outputShape);
182 template bool padGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
183                                   const int32_t* paddings, uint8_t padValue, uint8_t* outputData,
184                                   const Shape& outputShape);
185 
186 template <typename T>
batchToSpaceGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,T * outputData,const Shape & outputShape)187 bool batchToSpaceGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
188                          T* outputData, const Shape& outputShape) {
189     // Needed by low level implementation, but not really used.
190     tflite::Dims<4> blockSizeDim, cropsDim;
191     const int32 crops[4] = {0, 0, 0, 0};
192     NNTRACE_COMP("optimized_ops::BatchToSpaceND");
193     tflite::optimized_ops::BatchToSpaceND(inputData, convertShapeToDims(inputShape), blockSize,
194                                           blockSizeDim, crops, cropsDim, outputData,
195                                           convertShapeToDims(outputShape));
196     return true;
197 }
198 template bool batchToSpaceGeneric<float>(const float* inputData, const Shape& inputShape,
199                                          const int32_t* blockSize, float* outputData,
200                                          const Shape& outputShape);
201 template bool batchToSpaceGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
202                                             const int32_t* blockSize, _Float16* outputData,
203                                             const Shape& outputShape);
204 template bool batchToSpaceGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
205                                            const int32_t* blockSize, uint8_t* outputData,
206                                            const Shape& outputShape);
207 
208 template <typename T>
spaceToBatchGeneric(const T * inputData,const Shape & inputShape,const int32_t * blockSize,const int32_t * padding,const Shape & paddingShape,T * outputData,const Shape & outputShape)209 bool spaceToBatchGeneric(const T* inputData, const Shape& inputShape, const int32_t* blockSize,
210                          const int32_t* padding, const Shape& paddingShape, T* outputData,
211                          const Shape& outputShape) {
212     // Needed by low level implementation, but not really used.
213     tflite::RuntimeShape blockSizeDim;
214     NNTRACE_COMP("optimized_ops::SpaceToBatchND");
215     tflite::optimized_ops::SpaceToBatchND(
216             {.output_offset = outputShape.offset}, convertShapeToTflshape(inputShape), inputData,
217             blockSizeDim, blockSize, convertShapeToTflshape(paddingShape), padding,
218             convertShapeToTflshape(outputShape), outputData);
219     return true;
220 }
221 template bool spaceToBatchGeneric<float>(const float* inputData, const Shape& inputShape,
222                                          const int32_t* blockSize, const int32_t* padding,
223                                          const Shape& paddingShape, float* outputData,
224                                          const Shape& outputShape);
225 template bool spaceToBatchGeneric<_Float16>(const _Float16* inputData, const Shape& inputShape,
226                                             const int32_t* blockSize, const int32_t* padding,
227                                             const Shape& paddingShape, _Float16* outputData,
228                                             const Shape& outputShape);
229 template bool spaceToBatchGeneric<uint8_t>(const uint8_t* inputData, const Shape& inputShape,
230                                            const int32_t* blockSize, const int32_t* padding,
231                                            const Shape& paddingShape, uint8_t* outputData,
232                                            const Shape& outputShape);
233 
234 }  // namespace nn
235 }  // namespace android
236