1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/op_builder.h"
16 
17 #include "hexagon/hexagon_nn_ops.h"
18 #include "tensorflow/lite/builtin_ops.h"
19 #include "tensorflow/lite/c/common.h"
20 #include "tensorflow/lite/delegates/hexagon/builders/op_factory.h"
21 #include <farmhash.h>
22 
23 namespace tflite {
24 namespace delegates {
25 namespace hexagon {
26 namespace {
27 // Farmhash Fingerprint
CombineFingerprints(uint64_t l,uint64_t h)28 inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) {
29   // Murmur-inspired hashing.
30   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
31   uint64_t a = (l ^ h) * kMul;
32   a ^= (a >> 47);
33   uint64_t b = (h ^ a) * kMul;
34   b ^= (b >> 44);
35   b *= kMul;
36   b ^= (b >> 41);
37   b *= kMul;
38   return b;
39 }
40 
ComputeHash(const int shape[],const char * data,const int data_len)41 inline uint64_t ComputeHash(const int shape[], const char* data,
42                             const int data_len) {
43   return CombineFingerprints(
44       ::util::Fingerprint64(data, data_len),
45       ::util::Fingerprint64(reinterpret_cast<const char*>(shape),
46                               sizeof(shape[0]) * 4));
47 }
48 
ComputeHash(const TfLiteTensor & tensor,const int shape[],int int8_to_uint8)49 inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[],
50                             int int8_to_uint8) {
51   auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes);
52   auto int8_to_uint8_hash = ::util::Fingerprint64(
53       reinterpret_cast<char*>(&int8_to_uint8), sizeof(int8_to_uint8));
54   return CombineFingerprints(data_hash, int8_to_uint8_hash);
55 }
56 
GetElementSize(TfLiteType type)57 int GetElementSize(TfLiteType type) {
58   switch (type) {
59     case kTfLiteFloat32:
60       return sizeof(float);
61     case kTfLiteBool:
62       return sizeof(bool);
63     case kTfLiteInt32:
64       return sizeof(int32_t);
65     case kTfLiteInt8:
66       return sizeof(int8_t);
67     case kTfLiteUInt8:
68       return sizeof(uint8_t);
69     default:
70       return sizeof(int8_t);
71   }
72 }
73 }  // namespace
74 
CreateOpBuilderFromTfLiteOp(int op_type,TfLiteNode * node)75 OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type,
76                                                      TfLiteNode* node) {
77   switch (op_type) {
78     case kTfLiteBuiltinAdd:
79       return CreateArithmeticBuilder(this, OP_QuantizedAdd_8p8to8);
80     case kTfLiteBuiltinArgMax:
81       return CreateArgMinMaxOpBuilder(this, OP_ArgMax_8toInt32);
82     case kTfLiteBuiltinArgMin:
83       return CreateArgMinMaxOpBuilder(this, OP_ArgMin_8);
84     case kTfLiteBuiltinMul:
85       // The 32-bit version of Mul is more accurate, and robust to disparities
86       // in input/output ranges.
87       return CreateArithmeticBuilder(this, OP_QuantizedMul_8x8to32);
88     case kTfLiteBuiltinSub:
89       return CreateArithmeticBuilder(this, OP_QuantizedSub_8p8to8);
90     case kTfLiteBuiltinMean:
91       return CreateReduceBuilder(this, OP_QuantizedMean_8);
92     case kTfLiteBuiltinSum:
93       return CreateReduceBuilder(this, OP_QuantizedSum_8to32);
94     case kTfLiteBuiltinPad:
95       return CreatePadBuilder(this, OP_QuantizedPad_8);
96     case kTfLiteBuiltinMirrorPad:
97       return CreateMirrorPadBuilder(this, OP_MirrorPad_8);
98     case kTfLiteBuiltinFullyConnected: {
99       const auto& weights_tensor = context_->tensors[node->inputs->data[1]];
100       if (weights_tensor.allocation_type == kTfLiteMmapRo)
101         return CreateMatMulWithConstWeightsOpBuilder(
102             this, OP_QuantizedMatMul_8x8to32);
103       else
104         return CreateMatMulOpBuilder(this, OP_Transpose_8);
105     }
106     case kTfLiteBuiltinAveragePool2d:
107       return CreatePool2DBuilder(this, OP_QuantizedAvgPool_8);
108     case kTfLiteBuiltinMaxPool2d:
109       return CreatePool2DBuilder(this, OP_QuantizedMaxPool_8);
110     case kTfLiteBuiltinConcatenation:
111       return CreateConcatBuilder(this, OP_QuantizedConcat_8);
112     case kTfLiteBuiltinConv2d:
113       return CreateConv2DBuilder(this, OP_Supernode_8x8p32to8);
114     case kTfLiteBuiltinTransposeConv:
115       return CreateTransposeConv2DBuilder(
116           this, OP_QuantizedTransposeConv2d_8x8p32to8);
117     case kTfLiteBuiltinDepthwiseConv2d:
118       return CreateConv2DBuilder(this, OP_DepthwiseSupernode_8x8p32to8);
119     case kTfLiteBuiltinReshape:
120       return CreateReshapeBuilder(this, OP_Reshape);
121     case kTfLiteBuiltinSoftmax:
122       return CreateSoftmaxBuilder(this, OP_QuantizedSoftmax_8);
123     case kTfLiteBuiltinResizeNearestNeighbor:
124       return CreateResizeNearestNeighborBuilder(this,
125                                                 OP_ResizeNearestNeighbor_8);
126     case kTfLiteBuiltinL2Normalization:
127       return CreateL2NormalizationBuilder(this, OP_L2Normalize_8);
128     case kTfLiteBuiltinRelu:
129       return CreateActivationBuilder(this, OP_QuantizedRelu_8);
130     case kTfLiteBuiltinRelu6:
131       return CreateActivationBuilder(this, OP_QuantizedReluX_8);
132     case kTfLiteBuiltinTanh:
133       return CreateActivationBuilder(this, OP_QuantizedTanh_8);
134     case kTfLiteBuiltinLogistic:
135       return CreateActivationBuilder(this, OP_QuantizedSigmoid_8);
136     case kTfLiteBuiltinSplit:
137       return CreateSplitBuilder(this, OP_QuantizedSplit_8);
138     case kTfLiteBuiltinResizeBilinear:
139       return CreateResizeBilinearOpBuilder(this, OP_QuantizedResizeBilinear_8);
140     case kTfLiteBuiltinNeg:
141       return CreateNegOpBuilder(this, OP_QuantizedNeg_8);
142     case kTfLiteBuiltinTranspose:
143       return CreateTransposeBuilder(this, OP_Transpose_8);
144     case kTfLiteBuiltinSpaceToDepth:
145       return CreateSpaceToDepthBuilder(this, OP_SpaceToDepth_8);
146     case kTfLiteBuiltinDepthToSpace:
147       return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8);
148     case kTfLiteBuiltinQuantize:
149       return CreateQuantizeBuilder(this, OP_Requantize_8to8);
150     case kTfLiteBuiltinHardSwish:
151       return CreateHardSwishBuilder(this, OP_QuantizedHardSwish_8);
152     case kTfLiteBuiltinMinimum:
153       return CreateMinMaxBuilder(this, OP_QuantizedMinimum_8);
154     case kTfLiteBuiltinMaximum:
155       return CreateMinMaxBuilder(this, OP_QuantizedMaximum_8);
156     case kTfLiteBuiltinSlice:
157       return CreateSliceOpBuilder(this, OP_QuantizedSlice_8);
158     case kTfLiteBuiltinPack:
159       return CreatePackBuilder(this, OP_QuantizedPack_8);
160     case kTfLiteBuiltinStridedSlice:
161       return CreateStridedSliceBuilder(this, OP_QuantizedStridedSlice_8);
162     case kTfLiteBuiltinSquaredDifference:
163       return CreateSquaredDifferenceOpBuilder(this, OP_QuantizedSub_8p8to8);
164     case kTfLiteBuiltinRsqrt:
165       return CreateRSqrtOpBuilder(this, OP_QuantizedSqrt_8);
166     default:
167       context_->ReportError(context_, "Op not supported: %d", op_type);
168       return nullptr;
169   }
170 }
171 
LookupConstData(uint64_t cache_key)172 OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) {
173   auto lookup_result = cache_.find(cache_key);
174   if (lookup_result != cache_.end()) return lookup_result->second;
175   return nullptr;
176 }
177 
AddToCache(uint64_t cache_key,OpBuilder * value)178 void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) {
179   cache_[cache_key] = value;
180 }
181 
AddConstNodeWithData(const int shape[],char * data,int data_size)182 OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data,
183                                               int data_size) {
184   auto cache_key = ComputeHash(shape, data, data_size);
185   if (auto lookup_result = LookupConstData(cache_key)) return lookup_result;
186   builders_.emplace_back(new OpBuilder(this, OP_Const));
187   builders_.back()->SetConstNode();
188   builders_.back()->SetNodeId(builders_.size());
189   int error = hexagon_nn_->hexagon_nn_append_const_node(
190       graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3],
191       reinterpret_cast<const uint8_t*>(data), data_size);
192   if (error != 0) {
193     TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d",
194                        static_cast<int>(builders_.size()));
195     return nullptr;
196   }
197   AddToCache(cache_key, builders_.back().get());
198   return builders_.back().get();
199 }
200 
AddConstNodeWithData(int tensor_id,const TfLiteTensor & tensor,bool int8_to_uint8)201 OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id,
202                                               const TfLiteTensor& tensor,
203                                               bool int8_to_uint8) {
204   // Fetch shape of tensor and pad 1's so it is always 4D.
205   int batch_size, height_size, width_size, depth_size;
206   GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
207   const int shape[] = {batch_size, height_size, width_size, depth_size};
208 
209   auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0);
210   if (auto lookup_result = LookupConstData(cache_key)) {
211     // If tensor is cached but with no id, that can happen when the same
212     // data is added from a constant value (not tensor). We can cache the data
213     // and reuse it.
214     // We assign the tensor to this cached const node before returning.
215     if (!HasTensor(tensor_id))
216       AddTensorWithID(tensor_id, lookup_result->GetID(), 0);
217     return lookup_result;
218   }
219   builders_.emplace_back(new OpBuilder(this, OP_Const));
220   const int node_id = builders_.size();
221   builders_.back()->SetConstNode();
222   builders_.back()->SetNodeId(node_id);
223   int error = hexagon_nn_->hexagon_nn_append_const_node(
224       graph_id_, node_id, batch_size, height_size, width_size, depth_size,
225       reinterpret_cast<const uint8_t*>(tensor.data.raw), tensor.bytes);
226   if (error > 0) {
227     context_->ReportError(
228         context_, "Failed to add const node for tensor with id: %d", tensor_id);
229     return nullptr;
230   }
231   AddTensorWithID(tensor_id, node_id, 0);
232   // We need to return the builder with result, so we can't rely
233   // on builders_.back() as it can change while casting, so we hold pointer
234   // and update with value from casting if needed.
235   OpBuilder* result_builder = builders_.back().get();
236   // Cast int8 to uint8 if requested.
237   // This will add cast op to uint8 and update tensor map to point
238   // to the casted tensor.
239   if (int8_to_uint8 && tensor.type == kTfLiteInt8) {
240     AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id,
241               &result_builder);
242   }
243   AddToCache(cache_key, result_builder);
244   return result_builder;
245 }
246 
247 // TODO(b/154604279): Support these casting ops in Hexagon op profiling (which
248 // seems to key tensors on a single op, which may not be the case now).
AddCastOp(TfLiteContext * context,int op_type,int tensor_id,OpBuilder ** cast_op_builder)249 TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type,
250                                      int tensor_id,
251                                      OpBuilder** cast_op_builder) {
252   // Create a new OpBuilder for casting the tensor.
253   OpBuilder* cast_builder = CreateCastBuilder(this, op_type);
254   builders_.emplace_back(cast_builder);
255   cast_builder->SetNodeId(builders_.size());
256   // We cast the tensor in-place, so there is only 1 input & output which is the
257   // same.
258   auto* tensor_data = TfLiteIntArrayCreate(1);
259   tensor_data->data[0] = tensor_id;
260 
261   TF_LITE_ENSURE_STATUS(
262       cast_builder->PopulateSubGraph(tensor_data, tensor_data, context));
263   TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context));
264 
265   TfLiteIntArrayFree(tensor_data);
266   if (cast_op_builder != nullptr) *cast_op_builder = cast_builder;
267   return kTfLiteOk;
268 }
269 
AddInputTensors(const TfLiteIntArray * input_tensors,TfLiteContext * context)270 TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors,
271                                            TfLiteContext* context) {
272   auto* input_op = AddNode();
273   input_op->SetOpType(OP_INPUT);
274 
275   // We need to track num_inputs since not all input_tensors are actual input
276   // data. Some are constants.
277   int num_inputs = 0;
278   for (int i = 0; i < input_tensors->size; ++i) {
279     const int tensor_id = input_tensors->data[i];
280     const auto& tensor = context->tensors[tensor_id];
281     if (tensor.allocation_type == kTfLiteMmapRo) continue;
282     input_op->AddOutput(tensor.dims, GetElementSize(tensor.type));
283     AddTensorWithID(tensor_id, input_op->GetID(), num_inputs);
284     // If tensor is of type int8, add an op to cast it to uint8.
285     if (tensor.type == kTfLiteInt8) {
286       TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8,
287                                       tensor_id, /*cast_op_builder=*/nullptr));
288     }
289     ++num_inputs;
290   }
291 
292   return kTfLiteOk;
293 }
294 
AddOutputTensors(const TfLiteIntArray * output_tensors,TfLiteContext * context)295 TfLiteStatus GraphBuilder::AddOutputTensors(
296     const TfLiteIntArray* output_tensors, TfLiteContext* context) {
297   std::vector<OpBuilder::TensorID> hexagon_output_ids;
298   hexagon_output_ids.reserve(output_tensors->size);
299 
300   for (int i = 0; i < output_tensors->size; ++i) {
301     const int tensor_id = output_tensors->data[i];
302     const auto& tensor = context->tensors[tensor_id];
303     // If tensor is of type int8, add an op to cast it to uint8.
304     if (tensor.type == kTfLiteInt8) {
305       TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8,
306                                       tensor_id, /*cast_op_builder=*/nullptr));
307     }
308     hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id));
309   }
310 
311   // Add Hexagon OUTPUT op.
312   auto* output_op = AddNode();
313   output_op->SetOpType(OP_OUTPUT);
314   for (auto hexagon_output : hexagon_output_ids) {
315     output_op->AddInput(hexagon_output);
316   }
317 
318   return kTfLiteOk;
319 }
320 
AddOutput(const TfLiteIntArray * dims,int element_size)321 OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims,
322                                          int element_size) {
323   op_node_.outputs.push_back(hexagon_nn_output());
324   op_node_.outputs.back().elementsize = element_size;
325   op_node_.outputs.back().rank = 4;
326   // TODO(karimnosseir): What is a good to estimate the max size ?
327   int batch_size, height_size, width_size, depth_size;
328   GetDims(&batch_size, &height_size, &width_size, &depth_size, dims);
329   auto& max_sizes = op_node_.outputs.back().max_sizes;
330   if (graph_builder_->GraphHasDynamicBatch()) {
331     max_sizes[0] = graph_builder_->GetMaxBatchSize();
332   } else {
333     max_sizes[0] = batch_size;
334   }
335   max_sizes[1] = height_size;
336   max_sizes[2] = width_size;
337   max_sizes[3] = depth_size;
338   return TensorID(GetID(), op_node_.outputs.size() - 1);
339 }
340 
AddOutput(int elementsize,int rank,const int * max_sizes_vect)341 OpBuilder::TensorID OpBuilder::AddOutput(int elementsize, int rank,
342                                          const int* max_sizes_vect) {
343   op_node_.outputs.push_back(hexagon_nn_output());
344   op_node_.outputs.back().elementsize = elementsize;
345   op_node_.outputs.back().rank = rank;
346   auto& max_sizes = op_node_.outputs.back().max_sizes;
347   for (int i = 0; i < rank; ++i) {
348     max_sizes[i] = max_sizes_vect[i];
349   }
350   if (graph_builder_->GraphHasDynamicBatch()) {
351     max_sizes[0] = graph_builder_->GetMaxBatchSize();
352   }
353   return TensorID(GetID(), op_node_.outputs.size() - 1);
354 }
355 
AddOutput(int elementsize,int rank,const std::vector<int> & max_sizes_vect)356 OpBuilder::TensorID OpBuilder::AddOutput(
357     int elementsize, int rank, const std::vector<int>& max_sizes_vect) {
358   return AddOutput(elementsize, rank, max_sizes_vect.data());
359 }
360 
Build()361 const OpNode* OpBuilder::Build() {
362   for (const auto& id : input_ids_) {
363     op_node_.inputs.push_back(hexagon_nn_input());
364     op_node_.inputs.back().src_id = id.first;
365     op_node_.inputs.back().output_idx = id.second;
366   }
367   return &op_node_;
368 }
369 
ComputeAndAddMinAndMax(TfLiteContext * context,const TfLiteTensor & tensor)370 TfLiteStatus OpBuilder::ComputeAndAddMinAndMax(TfLiteContext* context,
371                                                const TfLiteTensor& tensor) {
372   float tensor_min, tensor_max;
373   TF_LITE_ENSURE_STATUS(
374       ComputeMinAndMaxQuantValues(tensor, &tensor_min, &tensor_max));
375   auto* min_const_node = graph_builder_->AddConstNodeWithData(
376       kScalarShape, reinterpret_cast<char*>(&tensor_min), sizeof(tensor_min));
377   auto* max_const_node = graph_builder_->AddConstNodeWithData(
378       kScalarShape, reinterpret_cast<char*>(&tensor_max), sizeof(tensor_max));
379   AddInput(TensorID(min_const_node->GetID(), 0));
380   AddInput(TensorID(max_const_node->GetID(), 0));
381 
382   return kTfLiteOk;
383 }
384 
385 // Static
386 constexpr int OpBuilder::kScalarShape[];
387 
AddNode(int tflite_node_index)388 OpBuilder* GraphBuilder::AddNode(int tflite_node_index) {
389   OpBuilder* op = new OpBuilder(this, OP_Nop);
390   builders_.emplace_back(op);
391   op->SetNodeId(builders_.size());
392   op->SetTFLiteNodeId(tflite_node_index);
393   return op;
394 }
395 
AddNodeFromTfLiteOp(int op_type,TfLiteNode * node,int tflite_node_index)396 OpBuilder* GraphBuilder::AddNodeFromTfLiteOp(int op_type, TfLiteNode* node,
397                                              int tflite_node_index) {
398   OpBuilder* op = CreateOpBuilderFromTfLiteOp(op_type, node);
399   builders_.emplace_back(op);
400   op->SetNodeId(builders_.size());
401   op->SetTFLiteNodeId(tflite_node_index);
402   op->SetBuiltinData(node->builtin_data);
403   op->SetTfLiteNode(node);
404   return op;
405 }
406 
AddBatchSeqConfig(int max_size_for_batch,TfLiteIntArray * input_batch_dimensions,TfLiteIntArray * output_batch_dimensions)407 void GraphBuilder::AddBatchSeqConfig(int max_size_for_batch,
408                                      TfLiteIntArray* input_batch_dimensions,
409                                      TfLiteIntArray* output_batch_dimensions) {
410   OpBuilder* batch_seq_node =
411       CreateBatchSeqBuilder(this, OP_BatchSeqConfig, max_size_for_batch,
412                             input_batch_dimensions, output_batch_dimensions);
413   builders_.emplace_back(batch_seq_node);
414   batch_seq_node->SetNodeId(builders_.size());
415   batch_seq_node->PopulateSubGraph(nullptr, nullptr, nullptr);
416   max_size_for_batch_ = max_size_for_batch;
417 }
418 
419 }  // namespace hexagon
420 }  // namespace delegates
421 }  // namespace tflite
422