1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <math.h>
17
18 #include "tensorflow/lite/c/builtin_op_data.h"
19 #include "tensorflow/lite/c/common.h"
20 #include "tensorflow/lite/kernels/internal/common.h"
21 #include "tensorflow/lite/kernels/internal/quantization_util.h"
22 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
23 #include "tensorflow/lite/kernels/kernel_util.h"
24 #include "tensorflow/lite/kernels/op_macros.h"
25 #include "tensorflow/lite/micro/kernels/activation_utils.h"
26 #include "tensorflow/lite/micro/kernels/kernel_util.h"
27 #include "tensorflow/lite/micro/kernels/svdf.h"
28 #include "tensorflow/lite/micro/micro_utils.h"
29
30 namespace tflite {
31
EvalIntegerSvdfReference(TfLiteContext * context,TfLiteNode * node,const TfLiteEvalTensor * input_tensor,const TfLiteEvalTensor * weights_feature_tensor,const TfLiteEvalTensor * weights_time_tensor,const TfLiteEvalTensor * bias_tensor,const TfLiteSVDFParams * params,TfLiteEvalTensor * activation_state_tensor,TfLiteEvalTensor * output_tensor,const OpData & data)32 void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
33 const TfLiteEvalTensor* input_tensor,
34 const TfLiteEvalTensor* weights_feature_tensor,
35 const TfLiteEvalTensor* weights_time_tensor,
36 const TfLiteEvalTensor* bias_tensor,
37 const TfLiteSVDFParams* params,
38 TfLiteEvalTensor* activation_state_tensor,
39 TfLiteEvalTensor* output_tensor,
40 const OpData& data) {
41 const int n_rank = params->rank;
42 const int n_batch = input_tensor->dims->data[0];
43 const int n_input = input_tensor->dims->data[1];
44 const int n_filter = weights_feature_tensor->dims->data[0];
45 const int n_unit = n_filter / n_rank;
46 const int n_memory = weights_time_tensor->dims->data[1];
47
48 TFLITE_DCHECK(context != nullptr);
49 TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
50
51 int32_t* scratch_tensor = static_cast<int32_t*>(
52 context->GetScratchBuffer(context, data.scratch_tensor_index));
53 int32_t* scratch_output_tensor = static_cast<int32_t*>(
54 context->GetScratchBuffer(context, data.scratch_output_tensor_index));
55
56 // Shift states.
57 int16_t* const state_ptr =
58 tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
59
60 // Left shift the activation_state.
61 {
62 int16_t* new_state_start = state_ptr;
63 const int16_t* old_state_start = state_ptr + 1;
64 const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
65 while (old_state_start != old_state_end) {
66 *new_state_start++ = *old_state_start++;
67 }
68 }
69
70 // Note: no need to clear the latest activation, matmul is not accumulative.
71
72 // Feature matmul.
73 {
74 int16_t* state =
75 tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
76 const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
77 const int8_t* weight_feature =
78 tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
79 const int32_t output_max = std::numeric_limits<int16_t>::max();
80 const int32_t output_min = std::numeric_limits<int16_t>::min();
81 int16_t* result_in_batch = state + (n_memory - 1);
82 for (int b = 0; b < n_batch; b++) {
83 const int8_t* matrix_ptr = weight_feature;
84 for (int r = 0; r < n_filter; r++) {
85 int32_t dot_prod = 0;
86 const int8_t* vector_in_batch = input + b * n_input;
87 for (int c = 0; c < n_input; c++) {
88 dot_prod +=
89 *matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
90 }
91 dot_prod = MultiplyByQuantizedMultiplier(
92 dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
93 dot_prod = std::min(std::max(output_min, dot_prod), output_max);
94 // This assumes state is symmetrically quantized. Otherwise last bit of
95 // state should be initialized to its zero point and accumulate the
96 // dot_prod.
97 // Equivalent as the following:
98 // result_in_batch = zero point, which happens to be zero.
99 // result_in_batch += dot_prod_56.
100 *result_in_batch = dot_prod;
101 result_in_batch += n_memory;
102 }
103 }
104 }
105
106 // Time.
107 {
108 for (int b = 0; b < n_batch; ++b) {
109 int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
110
111 // Perform batched vector dot product:
112 const int16_t* vector1_ptr =
113 tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
114 const int16_t* vector2_ptr =
115 tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
116 b * n_memory * n_filter;
117
118 for (int i = 0; i < n_filter; i++) {
119 *scratch_ptr_batch = 0;
120 for (int j = 0; j < n_memory; j++) {
121 *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
122 }
123 scratch_ptr_batch++;
124 }
125 }
126 }
127
128 // Reduce, add bias, rescale, activation.
129 {
130 // Add bias.
131 if (bias_tensor) {
132 // Vector batch assign:
133 const int32_t* bias_data =
134 tflite::micro::GetTensorData<int32_t>(bias_tensor);
135 for (int i = 0; i < n_batch; ++i) {
136 int32_t* output_ptr = scratch_output_tensor + i * n_unit;
137 const int32_t* bias_ptr = bias_data;
138 for (int j = 0; j < n_unit; ++j) {
139 *output_ptr++ = *bias_ptr++;
140 }
141 }
142 } else {
143 int32_t* output_ptr = scratch_output_tensor;
144 for (int i = 0; i < n_batch * n_unit; ++i) {
145 *output_ptr++ = 0;
146 }
147 }
148
149 // Reduce.
150 for (int b = 0; b < n_batch; ++b) {
151 int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
152 int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
153
154 // Reduction sum vector
155 for (int i = 0; i < n_unit; ++i) {
156 for (int j = 0; j < n_rank; ++j) {
157 output_temp_ptr[i] += *scratch_ptr_batch++;
158 }
159 }
160 }
161
162 // Rescale.
163 const int32_t output_max = std::numeric_limits<int8_t>::max();
164 const int32_t output_min = std::numeric_limits<int8_t>::min();
165 for (int i = 0; i < n_batch * n_unit; ++i) {
166 int32_t x1 = scratch_output_tensor[i];
167 int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
168 data.effective_scale_2_b);
169 int32_t x3 = x2 + data.output_zero_point;
170 int32_t x4 = std::min(std::max(output_min, x3), output_max);
171 tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
172 static_cast<int8_t>(x4);
173 }
174 }
175 }
176
177 } // namespace tflite
178