1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <math.h>
17 
18 #include "tensorflow/lite/c/builtin_op_data.h"
19 #include "tensorflow/lite/c/common.h"
20 #include "tensorflow/lite/kernels/internal/common.h"
21 #include "tensorflow/lite/kernels/internal/quantization_util.h"
22 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
23 #include "tensorflow/lite/kernels/kernel_util.h"
24 #include "tensorflow/lite/kernels/op_macros.h"
25 #include "tensorflow/lite/micro/kernels/activation_utils.h"
26 #include "tensorflow/lite/micro/kernels/kernel_util.h"
27 #include "tensorflow/lite/micro/kernels/svdf.h"
28 #include "tensorflow/lite/micro/micro_utils.h"
29 
30 namespace tflite {
31 
EvalIntegerSvdfReference(TfLiteContext * context,TfLiteNode * node,const TfLiteEvalTensor * input_tensor,const TfLiteEvalTensor * weights_feature_tensor,const TfLiteEvalTensor * weights_time_tensor,const TfLiteEvalTensor * bias_tensor,const TfLiteSVDFParams * params,TfLiteEvalTensor * activation_state_tensor,TfLiteEvalTensor * output_tensor,const OpData & data)32 void EvalIntegerSvdfReference(TfLiteContext* context, TfLiteNode* node,
33                               const TfLiteEvalTensor* input_tensor,
34                               const TfLiteEvalTensor* weights_feature_tensor,
35                               const TfLiteEvalTensor* weights_time_tensor,
36                               const TfLiteEvalTensor* bias_tensor,
37                               const TfLiteSVDFParams* params,
38                               TfLiteEvalTensor* activation_state_tensor,
39                               TfLiteEvalTensor* output_tensor,
40                               const OpData& data) {
41   const int n_rank = params->rank;
42   const int n_batch = input_tensor->dims->data[0];
43   const int n_input = input_tensor->dims->data[1];
44   const int n_filter = weights_feature_tensor->dims->data[0];
45   const int n_unit = n_filter / n_rank;
46   const int n_memory = weights_time_tensor->dims->data[1];
47 
48   TFLITE_DCHECK(context != nullptr);
49   TFLITE_DCHECK(context->GetScratchBuffer != nullptr);
50 
51   int32_t* scratch_tensor = static_cast<int32_t*>(
52       context->GetScratchBuffer(context, data.scratch_tensor_index));
53   int32_t* scratch_output_tensor = static_cast<int32_t*>(
54       context->GetScratchBuffer(context, data.scratch_output_tensor_index));
55 
56   // Shift states.
57   int16_t* const state_ptr =
58       tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
59 
60   // Left shift the activation_state.
61   {
62     int16_t* new_state_start = state_ptr;
63     const int16_t* old_state_start = state_ptr + 1;
64     const int16_t* old_state_end = state_ptr + n_batch * n_filter * n_memory;
65     while (old_state_start != old_state_end) {
66       *new_state_start++ = *old_state_start++;
67     }
68   }
69 
70   // Note: no need to clear the latest activation, matmul is not accumulative.
71 
72   // Feature matmul.
73   {
74     int16_t* state =
75         tflite::micro::GetTensorData<int16_t>(activation_state_tensor);
76     const int8_t* input = tflite::micro::GetTensorData<int8_t>(input_tensor);
77     const int8_t* weight_feature =
78         tflite::micro::GetTensorData<int8_t>(weights_feature_tensor);
79     const int32_t output_max = std::numeric_limits<int16_t>::max();
80     const int32_t output_min = std::numeric_limits<int16_t>::min();
81     int16_t* result_in_batch = state + (n_memory - 1);
82     for (int b = 0; b < n_batch; b++) {
83       const int8_t* matrix_ptr = weight_feature;
84       for (int r = 0; r < n_filter; r++) {
85         int32_t dot_prod = 0;
86         const int8_t* vector_in_batch = input + b * n_input;
87         for (int c = 0; c < n_input; c++) {
88           dot_prod +=
89               *matrix_ptr++ * (*vector_in_batch++ - data.input_zero_point);
90         }
91         dot_prod = MultiplyByQuantizedMultiplier(
92             dot_prod, data.effective_scale_1_a, data.effective_scale_1_b);
93         dot_prod = std::min(std::max(output_min, dot_prod), output_max);
94         // This assumes state is symmetrically quantized. Otherwise last bit of
95         // state should be initialized to its zero point and accumulate the
96         // dot_prod.
97         // Equivalent as the following:
98         //     result_in_batch = zero point, which happens to be zero.
99         //     result_in_batch += dot_prod_56.
100         *result_in_batch = dot_prod;
101         result_in_batch += n_memory;
102       }
103     }
104   }
105 
106   // Time.
107   {
108     for (int b = 0; b < n_batch; ++b) {
109       int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
110 
111       // Perform batched vector dot product:
112       const int16_t* vector1_ptr =
113           tflite::micro::GetTensorData<int16_t>(weights_time_tensor);
114       const int16_t* vector2_ptr =
115           tflite::micro::GetTensorData<int16_t>(activation_state_tensor) +
116           b * n_memory * n_filter;
117 
118       for (int i = 0; i < n_filter; i++) {
119         *scratch_ptr_batch = 0;
120         for (int j = 0; j < n_memory; j++) {
121           *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
122         }
123         scratch_ptr_batch++;
124       }
125     }
126   }
127 
128   // Reduce, add bias, rescale, activation.
129   {
130     // Add bias.
131     if (bias_tensor) {
132       // Vector batch assign:
133       const int32_t* bias_data =
134           tflite::micro::GetTensorData<int32_t>(bias_tensor);
135       for (int i = 0; i < n_batch; ++i) {
136         int32_t* output_ptr = scratch_output_tensor + i * n_unit;
137         const int32_t* bias_ptr = bias_data;
138         for (int j = 0; j < n_unit; ++j) {
139           *output_ptr++ = *bias_ptr++;
140         }
141       }
142     } else {
143       int32_t* output_ptr = scratch_output_tensor;
144       for (int i = 0; i < n_batch * n_unit; ++i) {
145         *output_ptr++ = 0;
146       }
147     }
148 
149     // Reduce.
150     for (int b = 0; b < n_batch; ++b) {
151       int32_t* output_temp_ptr = scratch_output_tensor + b * n_unit;
152       int32_t* scratch_ptr_batch = scratch_tensor + b * n_filter;
153 
154       // Reduction sum vector
155       for (int i = 0; i < n_unit; ++i) {
156         for (int j = 0; j < n_rank; ++j) {
157           output_temp_ptr[i] += *scratch_ptr_batch++;
158         }
159       }
160     }
161 
162     // Rescale.
163     const int32_t output_max = std::numeric_limits<int8_t>::max();
164     const int32_t output_min = std::numeric_limits<int8_t>::min();
165     for (int i = 0; i < n_batch * n_unit; ++i) {
166       int32_t x1 = scratch_output_tensor[i];
167       int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a,
168                                                  data.effective_scale_2_b);
169       int32_t x3 = x2 + data.output_zero_point;
170       int32_t x4 = std::min(std::max(output_min, x3), output_max);
171       tflite::micro::GetTensorData<int8_t>(output_tensor)[i] =
172           static_cast<int8_t>(x4);
173     }
174   }
175 }
176 
177 }  // namespace tflite
178