1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/matmul_builder.h"
16 
17 #include <stdint.h>
18 
19 #include <limits>
20 
21 #include "hexagon/hexagon_nn_ops.h"
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
25 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
26 #include "tensorflow/lite/kernels/kernel_util.h"
27 
28 namespace tflite {
29 namespace delegates {
30 namespace hexagon {
31 namespace {
GetDims(int * batch_size,int * height_size,int * width_size,int * depth_size,const TfLiteIntArray * dims)32 void GetDims(int* batch_size, int* height_size, int* width_size,
33              int* depth_size, const TfLiteIntArray* dims) {
34   int* dim[] = {batch_size, height_size, width_size, depth_size};
35   for (int i = 0; i < 4; ++i) *(dim[i]) = 1;
36   for (int i = 4 - dims->size; i < 4; ++i) {
37     *dim[i] = dims->data[i - (4 - dims->size)];
38   }
39 }
40 
41 constexpr uint8_t k8BitSignFlipConstant = 0x80;
42 
AddFullyConnectedHelper(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,const OpBuilder::TensorID weights_id,const OpBuilder::TensorID weights_min_id,const OpBuilder::TensorID weights_max_id,GraphBuilder * graph_builder,TfLiteContext * context,OpBuilder * matmul_op,OpBuilder::TensorID * node_output)43 TfLiteStatus AddFullyConnectedHelper(const TfLiteIntArray* inputs,
44                                      const TfLiteIntArray* outputs,
45                                      const OpBuilder::TensorID weights_id,
46                                      const OpBuilder::TensorID weights_min_id,
47                                      const OpBuilder::TensorID weights_max_id,
48                                      GraphBuilder* graph_builder,
49                                      TfLiteContext* context,
50                                      OpBuilder* matmul_op,
51                                      OpBuilder::TensorID* node_output) {
52   static int scalar_shape[] = {1, 1, 1, 1};
53   // Data tensor.
54   int data_tensor_id = inputs->data[0];
55   const auto& data_tensor = context->tensors[data_tensor_id];
56   float data_min, data_max;
57   TF_LITE_ENSURE_STATUS(OpBuilder::ComputeMinAndMaxQuantValues(
58       data_tensor, &data_min, &data_max));
59   auto* data_min_const = graph_builder->AddConstNodeWithData(
60       scalar_shape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
61   auto* data_max_const = graph_builder->AddConstNodeWithData(
62       scalar_shape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
63 
64   // Data and weight tensors in required order.
65   matmul_op->AddInput(graph_builder->GetHexagonTensorId(data_tensor_id));
66   matmul_op->AddInput(weights_id);
67   matmul_op->AddInput(OpBuilder::TensorID(data_min_const->GetID(), 0));
68   matmul_op->AddInput(OpBuilder::TensorID(data_max_const->GetID(), 0));
69   matmul_op->AddInput(weights_min_id);
70   matmul_op->AddInput(weights_max_id);
71 
72   // Outputs for the MatMul node, which are in int32 format.
73   // Output shape should still be the same.
74   int output_batch_size, output_height_size, output_width_size,
75       output_depth_size;
76   GetDims(&output_batch_size, &output_height_size, &output_width_size,
77           &output_depth_size, context->tensors[outputs->data[0]].dims);
78   const auto& matmul_out =
79       matmul_op->AddOutput(sizeof(int), 4,
80                            {output_batch_size, output_height_size,
81                             output_width_size, output_depth_size});
82   const auto& matmul_out_min =
83       matmul_op->AddOutput(sizeof(float), 4, scalar_shape);
84   const auto& matmul_out_max =
85       matmul_op->AddOutput(sizeof(float), 4, scalar_shape);
86 
87   // Bias tensor.
88   int bias_tensor_id = inputs->data[2];
89   OpBuilder::TensorID matmul_and_bias_out = matmul_out,
90                       matmul_and_bias_out_min = matmul_out_min,
91                       matmul_and_bias_out_max = matmul_out_max;
92   if (bias_tensor_id != -1) {
93     const auto& bias_tensor = context->tensors[bias_tensor_id];
94     float bias_min, bias_max;
95     OpBuilder::ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max);
96     auto* bias_min_const = graph_builder->AddConstNodeWithData(
97         scalar_shape, reinterpret_cast<char*>(&bias_min), sizeof(bias_min));
98     auto* bias_max_const = graph_builder->AddConstNodeWithData(
99         scalar_shape, reinterpret_cast<char*>(&bias_max), sizeof(bias_max));
100 
101     // MatMul + Bias.
102     auto* bias_add_op = graph_builder->AddNode(matmul_op->GetTFLiteNodeID());
103     bias_add_op->SetOpType(OP_QuantizedBiasAdd_32p32to32);
104     bias_add_op->AddInput(matmul_out);
105     bias_add_op->AddInput(graph_builder->GetHexagonTensorId(bias_tensor_id));
106     bias_add_op->AddInput(matmul_out_min);
107     bias_add_op->AddInput(matmul_out_max);
108     bias_add_op->AddInput(OpBuilder::TensorID(bias_min_const->GetID(), 0));
109     bias_add_op->AddInput(OpBuilder::TensorID(bias_max_const->GetID(), 0));
110     matmul_and_bias_out =
111         bias_add_op->AddOutput(sizeof(int), 4,
112                                {output_batch_size, output_height_size,
113                                 output_width_size, output_depth_size});
114     matmul_and_bias_out_min =
115         bias_add_op->AddOutput(sizeof(float), 4, scalar_shape);
116     matmul_and_bias_out_max =
117         bias_add_op->AddOutput(sizeof(float), 4, scalar_shape);
118   }
119 
120   float output_min, output_max;
121   // Quantize 32-bit result into 8-bit format using output tensor min/max.
122   OpBuilder::ComputeMinAndMaxQuantValues(context->tensors[outputs->data[0]],
123                                          &output_min, &output_max);
124   auto* output_min_const = graph_builder->AddConstNodeWithData(
125       scalar_shape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
126   auto* output_max_const = graph_builder->AddConstNodeWithData(
127       scalar_shape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
128   auto* quantize_biasadd_op =
129       graph_builder->AddNode(matmul_op->GetTFLiteNodeID());
130   quantize_biasadd_op->SetOpType(OP_Requantize_32to8);
131   quantize_biasadd_op->AddInput(matmul_and_bias_out);
132   quantize_biasadd_op->AddInput(matmul_and_bias_out_min);
133   quantize_biasadd_op->AddInput(matmul_and_bias_out_max);
134   quantize_biasadd_op->AddInput(
135       OpBuilder::TensorID(output_min_const->GetID(), 0));
136   quantize_biasadd_op->AddInput(
137       OpBuilder::TensorID(output_max_const->GetID(), 0));
138   *node_output =
139       quantize_biasadd_op->AddOutput(sizeof(uint8_t), 4,
140                                      {output_batch_size, output_height_size,
141                                       output_width_size, output_depth_size});
142   quantize_biasadd_op->AddOutput(sizeof(float), 4, scalar_shape);
143   quantize_biasadd_op->AddOutput(sizeof(float), 4, scalar_shape);
144   return kTfLiteOk;
145 }
146 
147 }  // namespace
148 
149 // The TFLite 'Fully-connected' quantized op corresponds to the following
150 // subgraph in Hexagon:
151 // Data (8-bit), Weights (const, 8-bit) => MatMul => MatMul out (int32)
152 // MatMul out (int32), Bias (int32) => QuantizedBiasAdd => BiasAdd out (int32)
153 // BiasAdd out (int32) => Requantize_32to8 => Output (8-bit)
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)154 TfLiteStatus MatMulWithConstWeightsOpBuilder::PopulateSubGraph(
155     const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
156     TfLiteContext* context) {
157   // Weights vector.
158   int weights_tensor_id = inputs->data[1];
159   const auto& weights_tensor = context->tensors[weights_tensor_id];
160   if (weights_tensor.allocation_type != kTfLiteMmapRo) {
161     context->ReportError(
162         context, "Weights tensor doesn't have correct allocation type: %s",
163         weights_tensor.name);
164     return kTfLiteError;
165   }
166   int batch_size, height_size, width_size, depth_size;
167   // Hexagon lib expects the weight tensor in NHCW, TFLite uses NHWC.
168   // Transpose NHWC -> NHCW
169   GetDims(&batch_size, &height_size, &width_size, &depth_size,
170           weights_tensor.dims);
171   weights_shape_ = {batch_size, height_size, depth_size, width_size};
172   RuntimeShape nhwc_shape({batch_size, height_size, width_size, depth_size});
173   RuntimeShape nhcw_shape({batch_size, height_size, depth_size, width_size});
174   std::vector<uint8_t> nhcw(NumElements(&weights_tensor));
175   TransposeParams transpose_params;
176   transpose_params.perm_count = 4;
177   transpose_params.perm[0] = 0;
178   transpose_params.perm[1] = 1;
179   transpose_params.perm[2] = 3;
180   transpose_params.perm[3] = 2;
181   if (weights_tensor.type == kTfLiteInt8) {
182     optimized_ops::Transpose<int8_t>(transpose_params, nhwc_shape,
183                                      weights_tensor.data.int8, nhcw_shape,
184                                      reinterpret_cast<int8_t*>(nhcw.data()));
185     // Flip bits on the weight values so that the int8 values are treated
186     // as uint8.
187     for (int i = 0; i < nhcw.size(); ++i) {
188       nhcw[i] = nhcw[i] ^ k8BitSignFlipConstant;
189     }
190   } else {
191     optimized_ops::Transpose<uint8_t>(transpose_params, nhwc_shape,
192                                       weights_tensor.data.uint8, nhcw_shape,
193                                       nhcw.data());
194   }
195   auto* const_weights_node = graph_builder_->AddConstNodeWithData(
196       weights_shape_.data(), reinterpret_cast<char*>(nhcw.data()),
197       weights_tensor.bytes);
198   graph_builder_->AddTensorWithID(weights_tensor_id,
199                                   const_weights_node->GetID(), 0, true);
200   ComputeMinAndMaxQuantValues(weights_tensor, &weights_min_, &weights_max_);
201   auto* weights_min_const = graph_builder_->AddConstNodeWithData(
202       kScalarShape, reinterpret_cast<char*>(&weights_min_),
203       sizeof(weights_min_));
204   auto* weights_max_const = graph_builder_->AddConstNodeWithData(
205       kScalarShape, reinterpret_cast<char*>(&weights_max_),
206       sizeof(weights_max_));
207 
208   return AddFullyConnectedHelper(
209       inputs, outputs, graph_builder_->GetHexagonTensorId(weights_tensor_id),
210       TensorID(weights_min_const->GetID(), 0),
211       TensorID(weights_max_const->GetID(), 0), graph_builder_, context, this,
212       &node_output_);
213 }
214 
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)215 TfLiteStatus MatMulWithConstWeightsOpBuilder::RegisterOutputs(
216     const TfLiteIntArray* outputs, TfLiteContext* context) {
217   // Should be only 1 output.
218   graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
219                                   node_output_.second);
220   return kTfLiteOk;
221 }
222 
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)223 TfLiteStatus MatMulOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
224                                                const TfLiteIntArray* outputs,
225                                                TfLiteContext* context) {
226   const int weights_tensor_id = inputs->data[1];
227   const auto& weights_tensor = context->tensors[weights_tensor_id];
228   int batch_size, height_size, width_size, depth_size;
229   GetDims(&batch_size, &height_size, &width_size, &depth_size,
230           weights_tensor.dims);
231   weights_shape_ = {batch_size, height_size, depth_size, width_size};
232   // Permutation for transposing.
233   int permutation[] = {0, 1, 3, 2};
234   const int permutation_shape[] = {1, 1, 1, 4};
235   auto permutation_node = graph_builder_->AddConstNodeWithData(
236       permutation_shape, reinterpret_cast<char*>(permutation),
237       4 * sizeof(permutation[0]));
238   AddInput(graph_builder_->GetHexagonTensorId(weights_tensor_id));
239   AddInput(TensorID(permutation_node->GetID(), 0));
240 
241   ComputeMinAndMaxQuantValues(weights_tensor, &weights_min_, &weights_max_);
242   auto* weights_min_const = graph_builder_->AddConstNodeWithData(
243       kScalarShape, reinterpret_cast<char*>(&weights_min_),
244       sizeof(weights_min_));
245   auto* weights_max_const = graph_builder_->AddConstNodeWithData(
246       kScalarShape, reinterpret_cast<char*>(&weights_max_),
247       sizeof(weights_max_));
248   AddInput(TensorID(weights_min_const->GetID(), 0));
249   AddInput(TensorID(weights_max_const->GetID(), 0));
250 
251   auto transposed_weights = AddOutput(sizeof(uint8_t), 4, weights_shape_);
252   auto transposed_weights_min = AddOutput(sizeof(float), 4, kScalarShape);
253   auto transposed_weights_max = AddOutput(sizeof(float), 4, kScalarShape);
254 
255   auto* matmul_op = graph_builder_->AddNode(GetTFLiteNodeID());
256   matmul_op->SetOpType(OP_QuantizedMatMul_8x8to32);
257 
258   AddFullyConnected(inputs, outputs, transposed_weights, transposed_weights_min,
259                     transposed_weights_max, context, matmul_op);
260   return kTfLiteOk;
261 }
262 
AddFullyConnected(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,const TensorID weights_id,const TensorID weights_min_id,const TensorID weights_max_id,TfLiteContext * context,OpBuilder * matmul_op)263 TfLiteStatus MatMulOpBuilder::AddFullyConnected(const TfLiteIntArray* inputs,
264                                                 const TfLiteIntArray* outputs,
265                                                 const TensorID weights_id,
266                                                 const TensorID weights_min_id,
267                                                 const TensorID weights_max_id,
268                                                 TfLiteContext* context,
269                                                 OpBuilder* matmul_op) {
270   return AddFullyConnectedHelper(inputs, outputs, weights_id, weights_min_id,
271                                  weights_max_id, graph_builder_, context,
272                                  matmul_op, &node_output_);
273 }
274 
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)275 TfLiteStatus MatMulOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
276                                               TfLiteContext* context) {
277   // Should be only 1 output.
278   graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
279                                   node_output_.second);
280   return kTfLiteOk;
281 }
282 
CreateMatMulWithConstWeightsOpBuilder(GraphBuilder * graph_builder,int op_type)283 OpBuilder* CreateMatMulWithConstWeightsOpBuilder(GraphBuilder* graph_builder,
284                                                  int op_type) {
285   return new MatMulWithConstWeightsOpBuilder(graph_builder, op_type);
286 }
287 
CreateMatMulOpBuilder(GraphBuilder * graph_builder,int op_type)288 OpBuilder* CreateMatMulOpBuilder(GraphBuilder* graph_builder, int op_type) {
289   return new MatMulOpBuilder(graph_builder, op_type);
290 }
291 
292 }  // namespace hexagon
293 }  // namespace delegates
294 }  // namespace tflite
295