1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/matmul_builder.h"
16
17 #include <stdint.h>
18
19 #include <limits>
20
21 #include "hexagon/hexagon_nn_ops.h"
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
25 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
26 #include "tensorflow/lite/kernels/kernel_util.h"
27
28 namespace tflite {
29 namespace delegates {
30 namespace hexagon {
31 namespace {
GetDims(int * batch_size,int * height_size,int * width_size,int * depth_size,const TfLiteIntArray * dims)32 void GetDims(int* batch_size, int* height_size, int* width_size,
33 int* depth_size, const TfLiteIntArray* dims) {
34 int* dim[] = {batch_size, height_size, width_size, depth_size};
35 for (int i = 0; i < 4; ++i) *(dim[i]) = 1;
36 for (int i = 4 - dims->size; i < 4; ++i) {
37 *dim[i] = dims->data[i - (4 - dims->size)];
38 }
39 }
40
41 constexpr uint8_t k8BitSignFlipConstant = 0x80;
42
AddFullyConnectedHelper(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,const OpBuilder::TensorID weights_id,const OpBuilder::TensorID weights_min_id,const OpBuilder::TensorID weights_max_id,GraphBuilder * graph_builder,TfLiteContext * context,OpBuilder * matmul_op,OpBuilder::TensorID * node_output)43 TfLiteStatus AddFullyConnectedHelper(const TfLiteIntArray* inputs,
44 const TfLiteIntArray* outputs,
45 const OpBuilder::TensorID weights_id,
46 const OpBuilder::TensorID weights_min_id,
47 const OpBuilder::TensorID weights_max_id,
48 GraphBuilder* graph_builder,
49 TfLiteContext* context,
50 OpBuilder* matmul_op,
51 OpBuilder::TensorID* node_output) {
52 static int scalar_shape[] = {1, 1, 1, 1};
53 // Data tensor.
54 int data_tensor_id = inputs->data[0];
55 const auto& data_tensor = context->tensors[data_tensor_id];
56 float data_min, data_max;
57 TF_LITE_ENSURE_STATUS(OpBuilder::ComputeMinAndMaxQuantValues(
58 data_tensor, &data_min, &data_max));
59 auto* data_min_const = graph_builder->AddConstNodeWithData(
60 scalar_shape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
61 auto* data_max_const = graph_builder->AddConstNodeWithData(
62 scalar_shape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
63
64 // Data and weight tensors in required order.
65 matmul_op->AddInput(graph_builder->GetHexagonTensorId(data_tensor_id));
66 matmul_op->AddInput(weights_id);
67 matmul_op->AddInput(OpBuilder::TensorID(data_min_const->GetID(), 0));
68 matmul_op->AddInput(OpBuilder::TensorID(data_max_const->GetID(), 0));
69 matmul_op->AddInput(weights_min_id);
70 matmul_op->AddInput(weights_max_id);
71
72 // Outputs for the MatMul node, which are in int32 format.
73 // Output shape should still be the same.
74 int output_batch_size, output_height_size, output_width_size,
75 output_depth_size;
76 GetDims(&output_batch_size, &output_height_size, &output_width_size,
77 &output_depth_size, context->tensors[outputs->data[0]].dims);
78 const auto& matmul_out =
79 matmul_op->AddOutput(sizeof(int), 4,
80 {output_batch_size, output_height_size,
81 output_width_size, output_depth_size});
82 const auto& matmul_out_min =
83 matmul_op->AddOutput(sizeof(float), 4, scalar_shape);
84 const auto& matmul_out_max =
85 matmul_op->AddOutput(sizeof(float), 4, scalar_shape);
86
87 // Bias tensor.
88 int bias_tensor_id = inputs->data[2];
89 OpBuilder::TensorID matmul_and_bias_out = matmul_out,
90 matmul_and_bias_out_min = matmul_out_min,
91 matmul_and_bias_out_max = matmul_out_max;
92 if (bias_tensor_id != -1) {
93 const auto& bias_tensor = context->tensors[bias_tensor_id];
94 float bias_min, bias_max;
95 OpBuilder::ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max);
96 auto* bias_min_const = graph_builder->AddConstNodeWithData(
97 scalar_shape, reinterpret_cast<char*>(&bias_min), sizeof(bias_min));
98 auto* bias_max_const = graph_builder->AddConstNodeWithData(
99 scalar_shape, reinterpret_cast<char*>(&bias_max), sizeof(bias_max));
100
101 // MatMul + Bias.
102 auto* bias_add_op = graph_builder->AddNode(matmul_op->GetTFLiteNodeID());
103 bias_add_op->SetOpType(OP_QuantizedBiasAdd_32p32to32);
104 bias_add_op->AddInput(matmul_out);
105 bias_add_op->AddInput(graph_builder->GetHexagonTensorId(bias_tensor_id));
106 bias_add_op->AddInput(matmul_out_min);
107 bias_add_op->AddInput(matmul_out_max);
108 bias_add_op->AddInput(OpBuilder::TensorID(bias_min_const->GetID(), 0));
109 bias_add_op->AddInput(OpBuilder::TensorID(bias_max_const->GetID(), 0));
110 matmul_and_bias_out =
111 bias_add_op->AddOutput(sizeof(int), 4,
112 {output_batch_size, output_height_size,
113 output_width_size, output_depth_size});
114 matmul_and_bias_out_min =
115 bias_add_op->AddOutput(sizeof(float), 4, scalar_shape);
116 matmul_and_bias_out_max =
117 bias_add_op->AddOutput(sizeof(float), 4, scalar_shape);
118 }
119
120 float output_min, output_max;
121 // Quantize 32-bit result into 8-bit format using output tensor min/max.
122 OpBuilder::ComputeMinAndMaxQuantValues(context->tensors[outputs->data[0]],
123 &output_min, &output_max);
124 auto* output_min_const = graph_builder->AddConstNodeWithData(
125 scalar_shape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
126 auto* output_max_const = graph_builder->AddConstNodeWithData(
127 scalar_shape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
128 auto* quantize_biasadd_op =
129 graph_builder->AddNode(matmul_op->GetTFLiteNodeID());
130 quantize_biasadd_op->SetOpType(OP_Requantize_32to8);
131 quantize_biasadd_op->AddInput(matmul_and_bias_out);
132 quantize_biasadd_op->AddInput(matmul_and_bias_out_min);
133 quantize_biasadd_op->AddInput(matmul_and_bias_out_max);
134 quantize_biasadd_op->AddInput(
135 OpBuilder::TensorID(output_min_const->GetID(), 0));
136 quantize_biasadd_op->AddInput(
137 OpBuilder::TensorID(output_max_const->GetID(), 0));
138 *node_output =
139 quantize_biasadd_op->AddOutput(sizeof(uint8_t), 4,
140 {output_batch_size, output_height_size,
141 output_width_size, output_depth_size});
142 quantize_biasadd_op->AddOutput(sizeof(float), 4, scalar_shape);
143 quantize_biasadd_op->AddOutput(sizeof(float), 4, scalar_shape);
144 return kTfLiteOk;
145 }
146
147 } // namespace
148
149 // The TFLite 'Fully-connected' quantized op corresponds to the following
150 // subgraph in Hexagon:
151 // Data (8-bit), Weights (const, 8-bit) => MatMul => MatMul out (int32)
152 // MatMul out (int32), Bias (int32) => QuantizedBiasAdd => BiasAdd out (int32)
153 // BiasAdd out (int32) => Requantize_32to8 => Output (8-bit)
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)154 TfLiteStatus MatMulWithConstWeightsOpBuilder::PopulateSubGraph(
155 const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
156 TfLiteContext* context) {
157 // Weights vector.
158 int weights_tensor_id = inputs->data[1];
159 const auto& weights_tensor = context->tensors[weights_tensor_id];
160 if (weights_tensor.allocation_type != kTfLiteMmapRo) {
161 context->ReportError(
162 context, "Weights tensor doesn't have correct allocation type: %s",
163 weights_tensor.name);
164 return kTfLiteError;
165 }
166 int batch_size, height_size, width_size, depth_size;
167 // Hexagon lib expects the weight tensor in NHCW, TFLite uses NHWC.
168 // Transpose NHWC -> NHCW
169 GetDims(&batch_size, &height_size, &width_size, &depth_size,
170 weights_tensor.dims);
171 weights_shape_ = {batch_size, height_size, depth_size, width_size};
172 RuntimeShape nhwc_shape({batch_size, height_size, width_size, depth_size});
173 RuntimeShape nhcw_shape({batch_size, height_size, depth_size, width_size});
174 std::vector<uint8_t> nhcw(NumElements(&weights_tensor));
175 TransposeParams transpose_params;
176 transpose_params.perm_count = 4;
177 transpose_params.perm[0] = 0;
178 transpose_params.perm[1] = 1;
179 transpose_params.perm[2] = 3;
180 transpose_params.perm[3] = 2;
181 if (weights_tensor.type == kTfLiteInt8) {
182 optimized_ops::Transpose<int8_t>(transpose_params, nhwc_shape,
183 weights_tensor.data.int8, nhcw_shape,
184 reinterpret_cast<int8_t*>(nhcw.data()));
185 // Flip bits on the weight values so that the int8 values are treated
186 // as uint8.
187 for (int i = 0; i < nhcw.size(); ++i) {
188 nhcw[i] = nhcw[i] ^ k8BitSignFlipConstant;
189 }
190 } else {
191 optimized_ops::Transpose<uint8_t>(transpose_params, nhwc_shape,
192 weights_tensor.data.uint8, nhcw_shape,
193 nhcw.data());
194 }
195 auto* const_weights_node = graph_builder_->AddConstNodeWithData(
196 weights_shape_.data(), reinterpret_cast<char*>(nhcw.data()),
197 weights_tensor.bytes);
198 graph_builder_->AddTensorWithID(weights_tensor_id,
199 const_weights_node->GetID(), 0, true);
200 ComputeMinAndMaxQuantValues(weights_tensor, &weights_min_, &weights_max_);
201 auto* weights_min_const = graph_builder_->AddConstNodeWithData(
202 kScalarShape, reinterpret_cast<char*>(&weights_min_),
203 sizeof(weights_min_));
204 auto* weights_max_const = graph_builder_->AddConstNodeWithData(
205 kScalarShape, reinterpret_cast<char*>(&weights_max_),
206 sizeof(weights_max_));
207
208 return AddFullyConnectedHelper(
209 inputs, outputs, graph_builder_->GetHexagonTensorId(weights_tensor_id),
210 TensorID(weights_min_const->GetID(), 0),
211 TensorID(weights_max_const->GetID(), 0), graph_builder_, context, this,
212 &node_output_);
213 }
214
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)215 TfLiteStatus MatMulWithConstWeightsOpBuilder::RegisterOutputs(
216 const TfLiteIntArray* outputs, TfLiteContext* context) {
217 // Should be only 1 output.
218 graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
219 node_output_.second);
220 return kTfLiteOk;
221 }
222
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)223 TfLiteStatus MatMulOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
224 const TfLiteIntArray* outputs,
225 TfLiteContext* context) {
226 const int weights_tensor_id = inputs->data[1];
227 const auto& weights_tensor = context->tensors[weights_tensor_id];
228 int batch_size, height_size, width_size, depth_size;
229 GetDims(&batch_size, &height_size, &width_size, &depth_size,
230 weights_tensor.dims);
231 weights_shape_ = {batch_size, height_size, depth_size, width_size};
232 // Permutation for transposing.
233 int permutation[] = {0, 1, 3, 2};
234 const int permutation_shape[] = {1, 1, 1, 4};
235 auto permutation_node = graph_builder_->AddConstNodeWithData(
236 permutation_shape, reinterpret_cast<char*>(permutation),
237 4 * sizeof(permutation[0]));
238 AddInput(graph_builder_->GetHexagonTensorId(weights_tensor_id));
239 AddInput(TensorID(permutation_node->GetID(), 0));
240
241 ComputeMinAndMaxQuantValues(weights_tensor, &weights_min_, &weights_max_);
242 auto* weights_min_const = graph_builder_->AddConstNodeWithData(
243 kScalarShape, reinterpret_cast<char*>(&weights_min_),
244 sizeof(weights_min_));
245 auto* weights_max_const = graph_builder_->AddConstNodeWithData(
246 kScalarShape, reinterpret_cast<char*>(&weights_max_),
247 sizeof(weights_max_));
248 AddInput(TensorID(weights_min_const->GetID(), 0));
249 AddInput(TensorID(weights_max_const->GetID(), 0));
250
251 auto transposed_weights = AddOutput(sizeof(uint8_t), 4, weights_shape_);
252 auto transposed_weights_min = AddOutput(sizeof(float), 4, kScalarShape);
253 auto transposed_weights_max = AddOutput(sizeof(float), 4, kScalarShape);
254
255 auto* matmul_op = graph_builder_->AddNode(GetTFLiteNodeID());
256 matmul_op->SetOpType(OP_QuantizedMatMul_8x8to32);
257
258 AddFullyConnected(inputs, outputs, transposed_weights, transposed_weights_min,
259 transposed_weights_max, context, matmul_op);
260 return kTfLiteOk;
261 }
262
AddFullyConnected(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,const TensorID weights_id,const TensorID weights_min_id,const TensorID weights_max_id,TfLiteContext * context,OpBuilder * matmul_op)263 TfLiteStatus MatMulOpBuilder::AddFullyConnected(const TfLiteIntArray* inputs,
264 const TfLiteIntArray* outputs,
265 const TensorID weights_id,
266 const TensorID weights_min_id,
267 const TensorID weights_max_id,
268 TfLiteContext* context,
269 OpBuilder* matmul_op) {
270 return AddFullyConnectedHelper(inputs, outputs, weights_id, weights_min_id,
271 weights_max_id, graph_builder_, context,
272 matmul_op, &node_output_);
273 }
274
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)275 TfLiteStatus MatMulOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
276 TfLiteContext* context) {
277 // Should be only 1 output.
278 graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
279 node_output_.second);
280 return kTfLiteOk;
281 }
282
CreateMatMulWithConstWeightsOpBuilder(GraphBuilder * graph_builder,int op_type)283 OpBuilder* CreateMatMulWithConstWeightsOpBuilder(GraphBuilder* graph_builder,
284 int op_type) {
285 return new MatMulWithConstWeightsOpBuilder(graph_builder, op_type);
286 }
287
CreateMatMulOpBuilder(GraphBuilder * graph_builder,int op_type)288 OpBuilder* CreateMatMulOpBuilder(GraphBuilder* graph_builder, int op_type) {
289 return new MatMulOpBuilder(graph_builder, op_type);
290 }
291
292 } // namespace hexagon
293 } // namespace delegates
294 } // namespace tflite
295