1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Operations.h"
18 #include "CpuOperationUtils.h"
19
20 #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
21
22 namespace android {
23 namespace nn {
24
reluFloat32(const float * inputData,const Shape & inputShape,float * outputData,const Shape & outputShape)25 bool reluFloat32(const float* inputData, const Shape& inputShape,
26 float* outputData, const Shape& outputShape) {
27 int numElements = getNumberOfElements(inputShape);
28 for (int i=0; i<numElements; i++, inputData++, outputData++) {
29 *outputData = std::max(0.f, *inputData);
30 }
31 return true;
32 }
33
relu1Float32(const float * inputData,const Shape & inputShape,float * outputData,const Shape & outputShape)34 bool relu1Float32(const float* inputData, const Shape& inputShape,
35 float* outputData, const Shape& outputShape) {
36 int numElements = getNumberOfElements(inputShape);
37 for (int i=0; i<numElements; i++, inputData++, outputData++) {
38 *outputData = std::min(std::max(-1.f, *inputData), 1.f);
39 }
40 return true;
41 }
42
relu6Float32(const float * inputData,const Shape & inputShape,float * outputData,const Shape & outputShape)43 bool relu6Float32(const float* inputData, const Shape& inputShape,
44 float* outputData, const Shape& outputShape) {
45 int numElements = getNumberOfElements(inputShape);
46 for (int i=0; i<numElements; i++, inputData++, outputData++) {
47 *outputData = std::min(std::max(0.f, *inputData), 6.f);
48 }
49 return true;
50 }
51
tanhFloat32(const float * inputData,const Shape & inputShape,float * outputData,const Shape & outputShape)52 bool tanhFloat32(const float* inputData, const Shape& inputShape,
53 float* outputData, const Shape& outputShape) {
54 int numElements = getNumberOfElements(inputShape);
55 for (int i=0; i<numElements; i++, inputData++, outputData++) {
56 *outputData = std::tanh(*inputData);
57 }
58 return true;
59 }
60
logisticFloat32(const float * inputData,const Shape & inputShape,float * outputData,const Shape & outputShape)61 bool logisticFloat32(const float* inputData, const Shape& inputShape,
62 float* outputData, const Shape& outputShape) {
63 int numElements = getNumberOfElements(inputShape);
64 for (int i=0; i<numElements; i++, inputData++, outputData++) {
65 *outputData = 1.f / (1.f + std::exp(-*inputData));
66 }
67 return true;
68 }
69
softmaxFloat32(const float * inputData,const Shape & inputShape,const float beta,float * outputData,const Shape & outputShape)70 bool softmaxFloat32(const float* inputData, const Shape& inputShape,
71 const float beta,
72 float* outputData, const Shape& outputShape) {
73 tflite::Dims<4> dim;
74 if (getNumberOfDimensions(inputShape) == 2) {
75 uint32_t batch_size = getSizeOfDimension(inputShape, 0);
76 uint32_t input_size = getNumberOfElements(inputShape) / batch_size;
77
78 Shape shapeIn4D;
79 shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
80 dim = convertShapeToDims(shapeIn4D);
81 } else if (getNumberOfDimensions(inputShape) == 4) {
82 dim = convertShapeToDims(inputShape);
83 } else {
84 LOG(ERROR) << "only 2D and 4D tensors supported";
85 return false;
86 }
87
88 tflite::optimized_ops::Softmax(inputData, dim, beta,
89 outputData, dim);
90 return true;
91 }
92
93 #define ANDROID_NN_RELUX_QUANT8(activation) \
94 int numElements = getNumberOfElements(inputShape); \
95 int32_t output_activation_min = 0; \
96 int32_t output_activation_max = 0; \
97 \
98 CalculateActivationRangeUint8(activation, inputShape, \
99 &output_activation_min, \
100 &output_activation_max); \
101 \
102 for (int i=0; i<numElements; i++, inputData++, outputData++) { \
103 *outputData = std::min((uint8_t)output_activation_max, \
104 std::max((uint8_t)output_activation_min, *inputData)); \
105 }
106
107
reluQuant8(const uint8_t * inputData,const Shape & inputShape,uint8_t * outputData,const Shape & outputShape)108 bool reluQuant8(const uint8_t* inputData, const Shape& inputShape,
109 uint8_t* outputData, const Shape& outputShape) {
110 ANDROID_NN_RELUX_QUANT8(kActivationRelu)
111 return true;
112 }
113
relu1Quant8(const uint8_t * inputData,const Shape & inputShape,uint8_t * outputData,const Shape & outputShape)114 bool relu1Quant8(const uint8_t* inputData, const Shape& inputShape,
115 uint8_t* outputData, const Shape& outputShape) {
116 ANDROID_NN_RELUX_QUANT8(kActivationRelu1)
117 return true;
118 }
119
relu6Quant8(const uint8_t * inputData,const Shape & inputShape,uint8_t * outputData,const Shape & outputShape)120 bool relu6Quant8(const uint8_t* inputData, const Shape& inputShape,
121 uint8_t* outputData, const Shape& outputShape) {
122 ANDROID_NN_RELUX_QUANT8(kActivationRelu6)
123 return true;
124 }
125
126 #undef ANDROID_NN_RELUX_QUANT8
127
logisticQuant8(const uint8_t * inputData,const Shape & inputShape,uint8_t * outputData,const Shape & outputShape)128 bool logisticQuant8(const uint8_t* inputData, const Shape& inputShape,
129 uint8_t* outputData, const Shape& outputShape) {
130 if (outputShape.offset != 0 || outputShape.scale != 1.f / 256) {
131 LOG(ERROR) << "incorrect scale / offset for output";
132 return false;
133 }
134
135 int numElements = getNumberOfElements(inputShape);
136 static constexpr int kInputIntegerBits = 4;
137
138 const double input_real_multiplier =
139 inputShape.scale *
140 static_cast<double>(1 << (31 - kInputIntegerBits));
141
142 int32_t input_multiplier = 0;
143 int32_t input_left_shift = 0;
144 if (!QuantizeMultiplierGreaterThanOne(input_real_multiplier,
145 &input_multiplier,
146 &input_left_shift)) {
147 return false;
148 }
149 int32_t input_range_radius =
150 CalculateInputRadius(kInputIntegerBits, input_left_shift);
151
152 tflite::optimized_ops::Logistic(
153 inputData, convertShapeToDims(inputShape),
154 inputShape.offset, input_range_radius,
155 input_multiplier, input_left_shift,
156 outputData, convertShapeToDims(outputShape));
157
158 return true;
159 }
160
softmaxQuant8(const uint8_t * inputData,const Shape & inputShape,const float beta,uint8_t * outputData,const Shape & outputShape)161 bool softmaxQuant8(const uint8_t* inputData, const Shape& inputShape,
162 const float beta,
163 uint8_t* outputData, const Shape& outputShape) {
164 tflite::Dims<4> dim;
165 if (getNumberOfDimensions(inputShape) == 2) {
166 uint32_t batch_size = getSizeOfDimension(inputShape, 0);
167 uint32_t input_size = getNumberOfElements(inputShape) / batch_size;
168
169 Shape shapeIn4D;
170 shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
171 dim = convertShapeToDims(shapeIn4D);
172 } else if (getNumberOfDimensions(inputShape) == 4) {
173 dim = convertShapeToDims(inputShape);
174 } else {
175 LOG(ERROR) << "only 2D and 4D tensors supported";
176 return false;
177 }
178
179 if (outputShape.offset != 0 || outputShape.scale != 1.f / 256) {
180 LOG(ERROR) << "incorrect scale / offset for output";
181 return false;
182 }
183
184 static const int32_t kScaledDiffIntegerBits = 5;
185 const double input_beta_real_multiplier = std::min(
186 1.0 * beta * inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)),
187 (1ll << 31) - 1.0);
188
189 int32_t input_multiplier = 0;
190 int32_t input_left_shift = 0;
191 if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier,
192 &input_multiplier,
193 &input_left_shift)) {
194 return false;
195 }
196 float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits,
197 input_left_shift);
198
199 tflite::optimized_ops::Softmax(inputData, dim, input_multiplier,
200 input_left_shift, diff_min,
201 outputData, dim);
202 return true;
203 }
204
205
206 } // namespace nn
207 } // namespace android
208