1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.h"
16 
17 #include <vector>
18 
19 #include "tensorflow/lite/builtin_ops.h"
20 #include "tensorflow/lite/c/builtin_op_data.h"
21 #include "tensorflow/lite/c/common.h"
22 #include "tensorflow/lite/context_util.h"
23 #include "tensorflow/lite/delegates/hexagon/hexagon_implementation.h"
24 #include "tensorflow/lite/delegates/hexagon/utils.h"
25 #include "tensorflow/lite/kernels/kernel_util.h"
26 
27 namespace tflite {
28 
29 namespace {
30 // Returns uint64 representing total cycles in 'perf_info' by
31 // combining lo and hi counters.
GetCycles(const hexagon_nn_perfinfo & perf_info)32 inline uint64_t GetCycles(const hexagon_nn_perfinfo& perf_info) {
33   uint64_t res = perf_info.counter_hi;
34   res <<= 32;
35   res |= perf_info.counter_lo;
36   return res;
37 }
38 }  // namespace
39 
ReportError(TfLiteContext * context,const std::string & msg)40 void HexagonDelegateKernel::ReportError(TfLiteContext* context,
41                                         const std::string& msg) {
42   PrintLog();
43   TF_LITE_KERNEL_LOG(context, "Failed: %s.", msg.c_str());
44 }
45 
Init(TfLiteContext * context,const TfLiteDelegateParams * params)46 TfLiteStatus HexagonDelegateKernel::Init(TfLiteContext* context,
47                                          const TfLiteDelegateParams* params) {
48   hexagon_nn_ = HexagonNNImplementation();
49   if (hexagon_nn_ == nullptr) {
50     TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
51     return kTfLiteError;
52   }
53 
54   // Ensure Hexagon NNLib is ready to start working.
55   int error = hexagon_nn_->hexagon_nn_config();
56   if (error != 0) {
57     TF_LITE_KERNEL_LOG(context, "hexagon_nn_config failed. Error: %d", error);
58     return kTfLiteError;
59   }
60 
61   // Initialize an empty graph.
62   error = hexagon_nn_->hexagon_nn_init(&graph_id_);
63   if (error != 0) {
64     ReportError(context, "failed to init");
65     return kTfLiteError;
66   }
67   error =
68       hexagon_nn_->hexagon_nn_set_debug_level(graph_id_, params_.debug_level);
69   if (error != 0) {
70     TF_LITE_KERNEL_LOG(context, "Failed to set debug level, error: %d", error);
71     return kTfLiteError;
72   }
73   error = hexagon_nn_->hexagon_nn_set_powersave_level(params_.powersave_level);
74   if (error != 0) {
75     TF_LITE_KERNEL_LOG(context, "Failed to set powersave level, error %d",
76                        error);
77     return kTfLiteError;
78   }
79 
80   for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
81     nodes_.push_back(node_index);
82   }
83 
84   TF_LITE_ENSURE_STATUS(
85       BuildGraph(context, params->input_tensors, params->output_tensors));
86   return kTfLiteOk;
87 }
88 
Eval(TfLiteContext * context,TfLiteNode * node)89 TfLiteStatus HexagonDelegateKernel::Eval(TfLiteContext* context,
90                                          TfLiteNode* node) {
91   if (hexagon_nn_ == nullptr) {
92     TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
93     return kTfLiteError;
94   }
95   // Allocate inputs.
96   std::vector<hexagon_nn_tensordef> input_tensors;
97   for (int input_idx = 0; input_idx < node->inputs->size; ++input_idx) {
98     const auto tensor_index = node->inputs->data[input_idx];
99     if (tensor_index == kTfLiteOptionalTensor) {
100       continue;
101     }
102     TfLiteTensor* tensor = &context->tensors[tensor_index];
103     // Const tensors should have been handled at delegation time..
104     if (tensor->allocation_type != kTfLiteMmapRo) {
105       char* data_ptr = tensor->data.raw;
106 
107       if (tensor->dims->size > 4) {
108         ReportError(context, "Only up to 4d tensor are supported.");
109         return kTfLiteError;
110       }
111       input_tensors.emplace_back();
112       auto& input_tensor = input_tensors.back();
113       input_tensor.data = reinterpret_cast<unsigned char*>(data_ptr);
114       input_tensor.dataLen = tensor->bytes;
115       input_tensor.data_valid_len = tensor->bytes;
116       TF_LITE_ENSURE_STATUS(
117           Get4DShape(&input_tensor.batches, &input_tensor.height,
118                      &input_tensor.width, &input_tensor.depth, tensor->dims));
119     }
120   }
121 
122   // Allocate outputs.
123   std::vector<hexagon_nn_tensordef> output_tensors;
124   for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
125     if (tensor_index == kTfLiteOptionalTensor) {
126       continue;
127     }
128     TfLiteTensor* tensor = &context->tensors[tensor_index];
129     if (tensor->allocation_type != kTfLiteMmapRo) {
130       if (tensor->dims->size > 4) {
131         ReportError(context, "Only up to 4d tensor are supported.");
132         return kTfLiteError;
133       }
134       output_tensors.emplace_back();
135       auto& output_tensor = output_tensors.back();
136       output_tensor.data = reinterpret_cast<unsigned char*>(tensor->data.raw);
137       output_tensor.dataLen = tensor->bytes;
138     }
139   }
140 
141   if (params_.print_graph_profile) {
142     hexagon_nn_->hexagon_nn_reset_perfinfo(graph_id_, 0);
143   }
144 
145   // Execute.
146   int error = hexagon_nn_->hexagon_nn_execute_new(
147       graph_id_, input_tensors.data(), input_tensors.size(),
148       output_tensors.data(), output_tensors.size());
149   if (error != 0) {
150     ReportError(context, "Failed to execute graph.");
151     return kTfLiteError;
152   }
153 
154   if (params_.print_graph_profile) {
155     PrintPerformanceData(reinterpret_cast<Profiler*>(context->profiler));
156   }
157   return kTfLiteOk;
158 }
159 
ResizeOutputTensors(TfLiteContext * context,TfLiteNode * node)160 TfLiteStatus HexagonDelegateKernel::ResizeOutputTensors(TfLiteContext* context,
161                                                         TfLiteNode* node) {
162   if (!params_.enable_dynamic_batch_size) return kTfLiteError;
163   int new_batch = -1;
164   for (int i = 0; i < params_.input_batch_dimensions->size; ++i) {
165     // If this input has no dynamic shape skip it.
166     if (params_.input_batch_dimensions->data[i] == -1) continue;
167     int input_tensor_index = node->inputs->data[i];
168     TfLiteTensor* input_tensor = &context->tensors[input_tensor_index];
169     new_batch =
170         input_tensor->dims->data[params_.input_batch_dimensions->data[i]];
171     break;
172   }
173   if (new_batch == -1) {
174     TF_LITE_KERNEL_LOG(context, "Invalid Batch size.");
175     return kTfLiteError;
176   }
177   for (int i = 0; i < node->outputs->size; ++i) {
178     // If this output has no dynamic shape skip it.
179     if (params_.output_batch_dimensions->data[i] == -1) continue;
180     int output_tensor_index = node->outputs->data[i];
181     TfLiteTensor* output_tensor = &context->tensors[output_tensor_index];
182     TfLiteIntArray* new_shape = TfLiteIntArrayCopy(output_tensor->dims);
183     new_shape->data[params_.output_batch_dimensions->data[i]] = new_batch;
184     TF_LITE_ENSURE_OK(context,
185                       context->ResizeTensor(context, output_tensor, new_shape));
186   }
187   return kTfLiteOk;
188 }
189 
Prepare(TfLiteContext * context,TfLiteNode * node)190 TfLiteStatus HexagonDelegateKernel::Prepare(TfLiteContext* context,
191                                             TfLiteNode* node) {
192   if (graph_prepared_) {
193     if (!params_.enable_dynamic_batch_size)
194       TF_LITE_KERNEL_LOG(context, "Calling prepare multiple times");
195     // Graph already prepared, but we must resize TFLite output tensors
196     // based on the new input shape.
197     return ResizeOutputTensors(context, node);
198   }
199   if (hexagon_nn_ == nullptr) {
200     ReportError(context, "Hexagon interface not available. prepare");
201     return kTfLiteError;
202   }
203   int status = hexagon_nn_->hexagon_nn_prepare(graph_id_);
204   if (status != 0) {
205     ReportError(context, "Failed to prepare graph.\n");
206     return kTfLiteError;
207   }
208 
209   // Check input/output tensors.
210   std::vector<int> tensors;
211   for (auto tensor_index : TfLiteIntArrayView(node->inputs)) {
212     tensors.push_back(tensor_index);
213   }
214   for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
215     tensors.push_back(tensor_index);
216   }
217   for (auto tensor_index : tensors) {
218     if (tensor_index == kTfLiteOptionalTensor) {
219       continue;
220     }
221     TfLiteTensor* tensor = &context->tensors[tensor_index];
222     // Const tensors should be added as const nodes during graph construction.
223     if (tensor->allocation_type != kTfLiteMmapRo && tensor->dims->size > 4) {
224       ReportError(context, "Only up to 4d tensor are supported.");
225       return kTfLiteError;
226     }
227   }
228 
229   if (params_.print_graph_debug) {
230     PrintDebuggingGraph();
231   }
232 
233   // Mark graph as prepared, since we can't prepare it multiple times.
234   graph_prepared_ = true;
235 
236   return kTfLiteOk;
237 }
238 
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)239 TfLiteStatus HexagonDelegateKernel::BuildGraph(
240     TfLiteContext* context, const TfLiteIntArray* input_tensors,
241     const TfLiteIntArray* output_tensors) {
242   builder_.reset(
243       new delegates::hexagon::GraphBuilder(hexagon_nn_, context, graph_id_));
244   if (params_.enable_dynamic_batch_size) {
245     builder_->AddBatchSeqConfig(params_.max_batch_size,
246                                 params_.input_batch_dimensions,
247                                 params_.output_batch_dimensions);
248   }
249   // Add inputs to the graph.
250   TF_LITE_ENSURE_STATUS(builder_->AddInputTensors(input_tensors, context));
251 
252   // Add all ops.
253   TfLiteNode* node;
254   TfLiteRegistration* reg;
255   for (int node_index : nodes_) {
256     TF_LITE_ENSURE_STATUS(
257         context->GetNodeAndRegistration(context, node_index, &node, &reg));
258     // Const inputs needs to be added to the hexagon graph as const nodes.
259     // Adding them earlier here to the graph
260     // - Simplifies separate builders
261     // - Simplifies int8 vs uint8 cases, builders don't need to handle them.
262     for (int i = 0; i < node->inputs->size; ++i) {
263       const int tensor_id = node->inputs->data[i];
264       if (tensor_id == -1) continue;
265       const auto& input_tensor = context->tensors[tensor_id];
266       if (input_tensor.allocation_type == kTfLiteMmapRo) {
267         builder_->AddConstNodeWithData(
268             tensor_id, input_tensor,
269             /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8));
270       }
271     }
272     auto* op_builder =
273         builder_->AddNodeFromTfLiteOp(reg->builtin_code, node, node_index);
274     TF_LITE_ENSURE_STATUS(
275         op_builder->PopulateSubGraph(node->inputs, node->outputs, context));
276     TF_LITE_ENSURE_STATUS(op_builder->RegisterOutputs(node->outputs, context));
277   }
278 
279   // Add Outputs.
280   TF_LITE_ENSURE_STATUS(builder_->AddOutputTensors(output_tensors, context));
281 
282   builder_->Build();
283 
284   return kTfLiteOk;
285 }
286 
~HexagonDelegateKernel()287 HexagonDelegateKernel::~HexagonDelegateKernel() {
288   if (graph_id_ != -1) {
289     hexagon_nn_->hexagon_nn_teardown(graph_id_);
290   }
291 }
292 
PrintLog()293 void HexagonDelegateKernel::PrintLog() {
294   std::vector<unsigned char> buf(3000000);
295   time_t my_time = time(nullptr);
296   hexagon_nn_->hexagon_nn_getlog(graph_id_, buf.data(), buf.size());
297   printf("----------------\n");
298   printf("Timestamp: %s\n\n", ctime(&my_time));
299   printf("Log\n%s\n", buf.data());
300   printf("----------------\n");
301   fflush(stdout);
302 }
303 
PrintPerformanceData(Profiler * profiler)304 void HexagonDelegateKernel::PrintPerformanceData(Profiler* profiler) {
305   if (profiler == nullptr) {
306     return;
307   }
308   const int kMaxNodes = 2048;
309   const int kMaxNameLen = 100;
310   std::vector<hexagon_nn_perfinfo> perf_data(kMaxNodes);
311   std::vector<char> op_name(kMaxNameLen);
312   uint64_t counter = 0;
313   unsigned int num_nodes;
314   if (hexagon_nn_->hexagon_nn_get_perfinfo(graph_id_, perf_data.data(),
315                                            kMaxNodes, &num_nodes) != 0) {
316     printf("Failed fetching perf data.\n");
317     return;
318   }
319   for (int i = 0; i < num_nodes; i++) {
320     counter = GetCycles(perf_data[i]);
321     int op_type_id = builder_->GetOpTypeId(perf_data[i].node_id);
322     if (op_type_id >= 0 && hexagon_nn_->hexagon_nn_op_id_to_name(
323                                op_type_id, op_name.data(), kMaxNameLen) != 0) {
324       printf("Failed to fetch name for %u with type %d\n", perf_data[i].node_id,
325              op_type_id);
326       continue;
327     }
328     int node_id = builder_->GetTFLiteNodeID(perf_data[i].node_id);
329     if (node_id != -1 && op_type_id >= 0) {
330       profiler->AddEvent((op_type_id < 0 ? "" : op_name.data()),
331                          Profiler::EventType::OPERATOR_INVOKE_EVENT, 0, counter,
332                          node_id);
333     }
334   }
335 }
336 
PrintDebuggingGraph()337 void HexagonDelegateKernel::PrintDebuggingGraph() {
338   const int kMaxBufLen = 100000;
339   std::vector<unsigned char> buf(kMaxBufLen);
340   if (hexagon_nn_->hexagon_nn_snpprint(graph_id_, buf.data(), kMaxBufLen) !=
341       0) {
342     printf("Error fetching graph debug details.\n");
343     return;
344   }
345   printf("------- Graph Debugging Start -------\n");
346   printf("%s\n", buf.data());
347   printf("------- Graph Debugging End -------\n");
348 }
349 
Teardown()350 void HexagonDelegateKernel::Teardown() {
351   auto* hexagon_nn = HexagonNNImplementation();
352   if (hexagon_nn != nullptr) {
353     hexagon_nn->hexagon_nn_global_teardown();
354   }
355 }
356 
InitState()357 void HexagonDelegateKernel::InitState() {
358   auto* hexagon_nn = HexagonNNImplementation();
359   if (hexagon_nn != nullptr) {
360     hexagon_nn->hexagon_nn_global_init();
361   }
362 }
363 }  // namespace tflite
364