1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30 ==============================================================================*/
31 #include "tensorflow_models/seq_flow_lite/tflite_ops/layer_norm.h"
32
33 #include <unordered_set>
34 #include <vector>
35
36 #include "tensorflow_models/seq_flow_lite/tflite_ops/quantization_util.h"
37 #include "tensorflow/lite/kernels/kernel_util.h"
38
39 namespace seq_flow_lite {
40 namespace ops {
41 namespace custom {
42
43 namespace {
44
45 const int kInputIndex = 0;
46 const int kScaleIndex = 1;
47 const int kOffsetIndex = 2;
48 const int kAxisIndex = 3;
49 const int kOutputIndex = 0;
50
Resize(TfLiteContext * context,TfLiteNode * node)51 TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) {
52 if (node->outputs->size != 1) {
53 return kTfLiteError;
54 }
55
56 TfLiteTensor* input = &context->tensors[node->inputs->data[kInputIndex]];
57 TfLiteTensor* scale = &context->tensors[node->inputs->data[kScaleIndex]];
58 TfLiteTensor* offset = &context->tensors[node->inputs->data[kOffsetIndex]];
59 TF_LITE_ENSURE_EQ(context, input->type, kTfLiteUInt8);
60 TF_LITE_ENSURE_EQ(context, offset->dims->data[0], 1);
61 TF_LITE_ENSURE_EQ(context, offset->dims->size, 1);
62 TF_LITE_ENSURE_EQ(context, offset->type, kTfLiteUInt8);
63 TF_LITE_ENSURE_EQ(context, scale->dims->data[0], 1);
64 TF_LITE_ENSURE_EQ(context, scale->dims->size, 1);
65 TF_LITE_ENSURE_EQ(context, scale->type, kTfLiteUInt8);
66 if (node->inputs->size == 4) {
67 TfLiteTensor* axis = &context->tensors[node->inputs->data[kAxisIndex]];
68 TF_LITE_ENSURE_EQ(context, axis->type, kTfLiteInt32);
69 }
70
71 TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputIndex]];
72 TF_LITE_ENSURE_EQ(context, output->type, kTfLiteUInt8);
73 return context->ResizeTensor(context, output,
74 TfLiteIntArrayCopy(input->dims));
75 }
76
GetNumberOfSteps(const TfLiteTensor * input)77 int GetNumberOfSteps(const TfLiteTensor* input) {
78 int number_of_steps = 1;
79 for (int i = 0; i < input->dims->size; ++i) {
80 number_of_steps *= input->dims->data[i];
81 }
82 return number_of_steps;
83 }
84
GetNumberOfFeatures(const TfLiteTensor * input,const int * axis,const int num_axis)85 inline int GetNumberOfFeatures(const TfLiteTensor* input, const int* axis,
86 const int num_axis) {
87 int num_features = 1;
88 for (int i = 0; i < num_axis; ++i) {
89 num_features *= input->dims->data[axis[i]];
90 }
91 return num_features;
92 }
93
94 // Performs sanity checks on input axis and resolves into valid dimensions.
ResolveAxis(const int num_dims,const int * axis,const int num_axis,int * out_axis,int * out_num_axis)95 inline bool ResolveAxis(const int num_dims, const int* axis, const int num_axis,
96 int* out_axis, int* out_num_axis) {
97 *out_num_axis = 0;
98 // Short-circuit axis resolution for scalars; the axis will go unused.
99 if (num_dims == 0) {
100 return true;
101 }
102
103 // Using an unordered set to reduce complexity in looking up duplicates.
104 std::unordered_set<int> unique_indices;
105 for (int64_t idx = 0; idx < num_axis; ++idx) {
106 // Handle negative index.
107 int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
108 assert(current >= 0 && current < num_dims);
109 // Only adding the axis if it wasn't added before.
110 if (unique_indices.find(current) == unique_indices.end()) {
111 unique_indices.insert(current);
112 out_axis[*out_num_axis] = current;
113 *out_num_axis += 1;
114 }
115 }
116 return true;
117 }
118
119 // Given current position in the input array, the api computes the next valid
120 // index.
ValidIndex(const int * input_dims,const int input_dims_size,int * curr_pos)121 bool ValidIndex(const int* input_dims, const int input_dims_size,
122 int* curr_pos) {
123 if (input_dims_size == 0) {
124 return false;
125 }
126 assert(input_dims != nullptr);
127 assert(curr_pos != nullptr);
128 for (int idx = input_dims_size - 1; idx >= 0; --idx) {
129 int current_val = curr_pos[idx] + 1;
130 assert(input_dims[idx] >= current_val);
131 if (input_dims[idx] == current_val) {
132 curr_pos[idx] = 0;
133 } else {
134 curr_pos[idx] = current_val;
135 return true;
136 }
137 }
138 return false;
139 }
140
141 // Gets next offset depending on reduction axis. Implementation borrowed from
142 // tflite reduce mean implementation.
GetOffset(const int * input_dims,const int input_dims_size,const int * curr_pos,const int * axis,const int axis_size)143 int GetOffset(const int* input_dims, const int input_dims_size,
144 const int* curr_pos, const int* axis, const int axis_size) {
145 if (input_dims_size == 0) return 0;
146 assert(input_dims != nullptr);
147 assert(curr_pos != nullptr);
148 int offset = 0;
149 for (int idx = 0; idx < input_dims_size; ++idx) {
150 // if idx is part of reduction axes, we skip offset calculation.
151 bool is_axis = false;
152 if (axis != nullptr) {
153 for (int redux = 0; redux < axis_size; ++redux) {
154 if (idx == axis[redux]) {
155 is_axis = true;
156 break;
157 }
158 }
159 }
160 if (!is_axis) offset = offset * input_dims[idx] + curr_pos[idx];
161 }
162
163 return offset;
164 }
165
166 // TODO(b/132896827): Current implementation needs further evaluation to reduce
167 // space time complexities.
FlexibleLayerNorm(const TfLiteTensor * input,const float scale,const float offset,const int * axis,const int num_axis,TfLiteTensor * output)168 TfLiteStatus FlexibleLayerNorm(const TfLiteTensor* input, const float scale,
169 const float offset, const int* axis,
170 const int num_axis, TfLiteTensor* output) {
171 int num_features = GetNumberOfFeatures(input, &axis[0], num_axis);
172 int time_steps = static_cast<int>(GetNumberOfSteps(input) / num_features);
173
174 std::vector<float> sum_x(time_steps, 0.0f);
175 std::vector<float> sum_xx(time_steps, 0.0f);
176 std::vector<int> index_iter(input->dims->size, 0);
177
178 // Computing sum and squared sum for features across the reduction axes.
179 do {
180 // Not passing reduction axes to get the input offset as we are simply
181 // iterating through the multidimensional array.
182 int input_offset = GetOffset(input->dims->data, input->dims->size,
183 &index_iter[0], nullptr, 0);
184 // Passing in the valid reduction axes as we would like to get the output
185 // offset after reduction.
186 int stats_offset = GetOffset(input->dims->data, input->dims->size,
187 &index_iter[0], &axis[0], num_axis);
188 float input_val = PodDequantize(*input, input_offset);
189 sum_x[stats_offset] += input_val;
190 sum_xx[stats_offset] += input_val * input_val;
191 } while (ValidIndex(input->dims->data, input->dims->size, &index_iter[0]));
192
193 std::vector<float> multiplier(time_steps, 1.0f);
194 std::vector<float> bias(time_steps, 0.0f);
195
196 // Computing stats for the reduction axes.
197 for (int i = 0; i < time_steps; ++i) {
198 sum_x[i] = sum_x[i] / num_features;
199 sum_xx[i] = sum_xx[i] / num_features;
200 const float variance = sum_xx[i] - sum_x[i] * sum_x[i];
201 const float inverse_stddev = 1 / sqrt(variance + 1e-6);
202 multiplier[i] = inverse_stddev * scale;
203 bias[i] = offset - sum_x[i] * inverse_stddev * scale;
204 }
205
206 const float out_inverse_scale = 1.0f / output->params.scale;
207 const int32_t out_zero_point = output->params.zero_point;
208 uint8_t* out_ptr = output->data.uint8;
209 std::fill(index_iter.begin(), index_iter.end(), 0);
210
211 // Using the stats to fill the output pointer.
212 do {
213 // Not passing reduction axes to get the input offset as we are simply
214 // iterating through the multidimensional array.
215 int input_offset = GetOffset(input->dims->data, input->dims->size,
216 &index_iter[0], nullptr, 0);
217 // Passing in the valid reduction axes as we would like to get the output
218 // offset after reduction.
219 int stats_offset = GetOffset(input->dims->data, input->dims->size,
220 &index_iter[0], &axis[0], num_axis);
221 float input_val = PodDequantize(*input, input_offset);
222
223 const float value =
224 input_val * multiplier[stats_offset] + bias[stats_offset];
225 out_ptr[input_offset] =
226 PodQuantize(value, out_zero_point, out_inverse_scale);
227 } while (ValidIndex(input->dims->data, input->dims->size, &index_iter[0]));
228
229 return kTfLiteOk;
230 }
231
DefaultLayerNormFloat(const TfLiteTensor * input,const float scale,const float offset,TfLiteTensor * output)232 TfLiteStatus DefaultLayerNormFloat(const TfLiteTensor* input, const float scale,
233 const float offset, TfLiteTensor* output) {
234 const int input_rank = input->dims->size;
235 const int num_features = input->dims->data[input_rank - 1];
236 const int time_steps =
237 static_cast<int>(GetNumberOfSteps(input) / num_features);
238 float* out_ptr = output->data.f;
239 for (int i = 0; i < time_steps; ++i) {
240 float sum_x = 0;
241 float sum_xx = 0;
242 for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) {
243 sum_x += input->data.f[index];
244 sum_xx += input->data.f[index] * input->data.f[index];
245 }
246 const float exp_xx = sum_xx / num_features;
247 const float exp_x = sum_x / num_features;
248 const float variance = exp_xx - exp_x * exp_x;
249 const float inverse_stddev = 1 / sqrt(variance + 1e-6);
250 const float multiplier = inverse_stddev * scale;
251
252 const float bias = offset - exp_x * inverse_stddev * scale;
253 for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) {
254 out_ptr[index] = input->data.f[index] * multiplier + bias;
255 }
256 }
257 return kTfLiteOk;
258 }
259
DefaultLayerNorm(const TfLiteTensor * input,const float scale,const float offset,TfLiteTensor * output)260 TfLiteStatus DefaultLayerNorm(const TfLiteTensor* input, const float scale,
261 const float offset, TfLiteTensor* output) {
262 const int input_rank = input->dims->size;
263 const int num_features = input->dims->data[input_rank - 1];
264 const int time_steps =
265 static_cast<int>(GetNumberOfSteps(input) / num_features);
266
267 std::vector<float> temp_buffer(num_features, 0.0f);
268 const float out_inverse_scale = 1.0f / output->params.scale;
269 const int32_t out_zero_point = output->params.zero_point;
270 uint8_t* out_ptr = output->data.uint8;
271 for (int i = 0; i < time_steps; ++i) {
272 float sum_x = 0;
273 float sum_xx = 0;
274 for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) {
275 temp_buffer[j] = PodDequantize(*input, index);
276 sum_x += temp_buffer[j];
277 sum_xx += temp_buffer[j] * temp_buffer[j];
278 }
279 const float exp_xx = sum_xx / num_features;
280 const float exp_x = sum_x / num_features;
281 const float variance = exp_xx - exp_x * exp_x;
282 const float inverse_stddev = 1 / sqrt(variance + 1e-6);
283 const float multiplier = inverse_stddev * scale;
284 const float bias = offset - exp_x * inverse_stddev * scale;
285 for (int j = 0, index = i * num_features; j < num_features; ++j, ++index) {
286 const float value = temp_buffer[j] * multiplier + bias;
287 out_ptr[index] = PodQuantize(value, out_zero_point, out_inverse_scale);
288 }
289 }
290 return kTfLiteOk;
291 }
292
Eval(TfLiteContext * context,TfLiteNode * node)293 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
294 const TfLiteTensor* input =
295 &context->tensors[node->inputs->data[kInputIndex]];
296 TfLiteTensor* output = &context->tensors[node->outputs->data[kOutputIndex]];
297 TfLiteTensor scale_tensor = context->tensors[node->inputs->data[kScaleIndex]];
298 TfLiteTensor offset_tensor =
299 context->tensors[node->inputs->data[kOffsetIndex]];
300 float scale = 1.0;
301 float offset = 0.0;
302 if (input->type == kTfLiteUInt8) {
303 scale = PodDequantize(scale_tensor, 0);
304 offset = PodDequantize(offset_tensor, 0);
305 } else {
306 scale = scale_tensor.data.f[0];
307 offset = offset_tensor.data.f[0];
308 }
309
310 TfLiteTensor* axis = &context->tensors[node->inputs->data[kAxisIndex]];
311 int num_axis = static_cast<int>(tflite::NumElements(axis));
312 // For backward compatibility reasons, we handle the default layer norm for
313 // last channel as below.
314 if (num_axis == 1 && (axis->data.i32[0] == -1 ||
315 axis->data.i32[0] == (input->dims->size - 1))) {
316 if (input->type == kTfLiteUInt8) {
317 return DefaultLayerNorm(input, scale, offset, output);
318 } else if (input->type == kTfLiteFloat32) {
319 return DefaultLayerNormFloat(input, scale, offset, output);
320 } else {
321 TF_LITE_ENSURE_MSG(context, false,
322 "Input should be eith Uint8 or Float32.");
323 }
324 }
325
326 std::vector<int> resolved_axis(num_axis);
327 // Resolve axis.
328 int num_resolved_axis = 0;
329 if (!ResolveAxis(input->dims->size, axis->data.i32, num_axis,
330 &resolved_axis[0], &num_resolved_axis)) {
331 return kTfLiteError;
332 }
333
334 return FlexibleLayerNorm(input, scale, offset, &resolved_axis[0],
335 num_resolved_axis, output);
336 }
337
338 } // namespace
339
Register_LAYER_NORM()340 TfLiteRegistration* Register_LAYER_NORM() {
341 static TfLiteRegistration r = {nullptr, nullptr, Resize, Eval};
342 return &r;
343 }
344
345 } // namespace custom
346 } // namespace ops
347 } // namespace seq_flow_lite
348