1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 /* Before calling this test program, download a model as follows.
16 $ curl
17 https://storage.googleapis.com/download.tensorflow.org/models/tensorflow_inception_v3_stripped_optimized_quantized.pb
18 \ -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb
19 $ adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \
20 /data/local/tmp
21 $ curl
22 https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt
23 -o /tmp/imagenet_comp_graph_label_strings.txt
24 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
25 */
26 
27 // define EIGEN_USE_THREADS to include quantization_utils.h
28 #define EIGEN_USE_THREADS
29 
30 #include <memory>
31 
32 #include "absl/base/casts.h"
33 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
34 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
35 #include "tensorflow/core/framework/tensor_shape.pb.h"
36 #include "tensorflow/core/framework/tensor_testutil.h"
37 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
38 #include "tensorflow/core/kernels/hexagon/graph_transferer.h"
39 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
40 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
41 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
42 #include "tensorflow/core/kernels/i_remote_fused_graph_ops_definitions.h"
43 #include "tensorflow/core/kernels/quantization_utils.h"
44 #include "tensorflow/core/lib/core/status.h"
45 #include "tensorflow/core/lib/core/status_test_util.h"
46 #include "tensorflow/core/lib/io/path.h"
47 #include "tensorflow/core/lib/strings/str_util.h"
48 #include "tensorflow/core/platform/env.h"
49 #include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
50 #include "tensorflow/core/platform/test.h"
51 #include "tensorflow/core/public/session.h"
52 #include "tensorflow/core/public/session_options.h"
53 
54 namespace tensorflow {
55 
56 using ByteArray = HexagonControlWrapper::ByteArray;
57 
58 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
59 constexpr const char* const MODEL_FILENAME =
60     "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
61 constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
62     "/data/local/tmp/"
63     "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
64     "pb";
65 constexpr const char* const FUSED_MODEL_FILENAME =
66     "/data/local/tmp/"
67     "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
68 constexpr const char* const REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME =
69     "remote_fused_graph_execute_node";
70 constexpr bool USE_SHAPE_INFERENCE = false;
71 
72 const bool DBG_DUMP_FLOAT_DATA = false;
73 const int WIDTH = 299;
74 const int HEIGHT = 299;
75 const int DEPTH = 3;
76 const int EXPECTED_FIRST_RESULT_ID = 59;
77 const int EXECUTION_REPEAT_COUNT = 10;
78 
CheckHexagonControllerVersion()79 static void CheckHexagonControllerVersion() {
80   HexagonControlWrapper hexagon_control_wrapper;
81   const int version = hexagon_control_wrapper.GetVersion();
82   ASSERT_GE(version, 1);
83   LOG(INFO) << "Hexagon controller version is " << version;
84 }
85 
DumpTop10Results(const int byte_size,const float * const float_array)86 static void DumpTop10Results(const int byte_size,
87                              const float* const float_array) {
88   const int element_count = byte_size / sizeof(float);
89   const string label_filename =
90       "/data/local/tmp/imagenet_comp_graph_label_strings.txt";
91   string label_str;
92   TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str));
93   std::vector<string> labels = str_util::Split(label_str, '\n');
94   GraphTransferUtils::DumpTopNFloatResults(
95       float_array, labels.data(),
96       std::min(element_count, static_cast<int>(labels.size())),
97       10 /* show top_n results */);
98 }
99 
DumpTop10Results(const std::vector<ByteArray> & outputs)100 static void DumpTop10Results(const std::vector<ByteArray>& outputs) {
101   CHECK(outputs.size() == 1);
102   const int byte_size = std::get<1>(outputs.at(0));
103   const float* float_array =
104       reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
105   DumpTop10Results(byte_size, float_array);
106 }
107 
CheckFirstResult(const std::vector<ByteArray> & outputs,const int expected_first_id)108 static void CheckFirstResult(const std::vector<ByteArray>& outputs,
109                              const int expected_first_id) {
110   EXPECT_GE(outputs.size(), 1);
111   const int byte_size = std::get<1>(outputs.at(0));
112   const int element_count = byte_size / sizeof(float);
113   const float* float_array =
114       reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
115   EXPECT_GE(element_count, 1);
116   std::vector<string> labels(element_count);
117   std::priority_queue<std::tuple<float, int, string>> queue =
118       GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(),
119                                               element_count);
120   const std::tuple<float, int, string>& entry = queue.top();
121   EXPECT_EQ(expected_first_id, std::get<1>(entry));
122 }
123 
LoadImage(std::vector<float> * img_floats_ptr)124 static void LoadImage(std::vector<float>* img_floats_ptr) {
125   CHECK(img_floats_ptr != nullptr);
126   std::vector<float>& img_floats = *img_floats_ptr;
127   // Read the data from the bitmap file into memory
128   string bmp;
129   TF_CHECK_OK(ReadFileToString(Env::Default(), IMAGE_FILENAME, &bmp));
130   const int fsize = bmp.size();
131   LOG(INFO) << "Read " << IMAGE_FILENAME << ", size = " << fsize << "bytes";
132   const int64 pixel_count = WIDTH * HEIGHT * DEPTH;
133   CHECK(fsize >= 22 /* pos of height */ + sizeof(int));
134   CHECK(bmp.data() != nullptr);
135   uint8* const img_bytes = absl::bit_cast<uint8*>(bmp.data());
136   const int header_size = *(reinterpret_cast<int*>(img_bytes + 10));
137   LOG(INFO) << "header size = " << header_size;
138   const int size = *(reinterpret_cast<int*>(img_bytes + 14));
139   LOG(INFO) << "image size = " << size;
140   const int width = *(reinterpret_cast<int*>(img_bytes + 18));
141   LOG(INFO) << "width = " << width;
142   const int height = *(reinterpret_cast<int*>(img_bytes + 22));
143   LOG(INFO) << "height = " << height;
144   CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size);
145 
146   uint8* const bmp_pixels = &img_bytes[header_size];
147 
148   img_floats.resize(pixel_count);
149   int src_pixel_index = 0;
150   CHECK(pixel_count % 3 == 0);
151   for (int i = 0; i < pixel_count / 3; ++i) {
152     const int src_pos = 3 * src_pixel_index;
153     const int dst_pos = 3 * i;
154     ++src_pixel_index;
155     CHECK(src_pos + 2 + header_size < fsize);
156     CHECK(dst_pos + 2 < pixel_count);
157     // Convert (B, G, R) in bitmap to (R, G, B)
158     img_floats[dst_pos] =
159         (static_cast<float>(bmp_pixels[src_pos + 2]) - 128.0f) / 128.0f;
160     img_floats[dst_pos + 1] =
161         (static_cast<float>(bmp_pixels[src_pos + 1]) - 128.0f) / 128.0f;
162     img_floats[dst_pos + 2] =
163         (static_cast<float>(bmp_pixels[src_pos]) - 128.0f) / 128.0f;
164     if (DBG_DUMP_FLOAT_DATA) {
165       LOG(INFO) << i << " (" << img_floats[dst_pos] << ", "
166                 << img_floats[dst_pos + 1] << ", " << img_floats[dst_pos + 2]
167                 << ") (" << static_cast<int>(bmp_pixels[src_pos + 2]) << ", "
168                 << static_cast<int>(bmp_pixels[src_pos + 1]) << ", "
169                 << static_cast<int>(bmp_pixels[src_pos]) << ")";
170     }
171     if (src_pixel_index % (WIDTH + 1) == (WIDTH - 1)) {
172       // skip bmp padding
173       ++src_pixel_index;
174     }
175   }
176 }
177 
QuantizeImage(const std::vector<float> & float_vec,std::vector<quint8> * quint8_vec)178 static void QuantizeImage(const std::vector<float>& float_vec,
179                           std::vector<quint8>* quint8_vec) {
180   quint8_vec->resize(float_vec.size());
181   for (int i = 0; i < float_vec.size(); ++i) {
182     quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
183   }
184 }
185 
BuildImageTensor(const std::vector<float> & img_floats)186 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
187   LOG(INFO) << "Loading image finished.";
188   Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
189   CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
190   CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
191   LOG(INFO) << "Copy data to tensor.";
192   std::memcpy(img_tensor.flat<float>().data(), img_floats.data(),
193               img_tensor.TotalBytes());
194   return img_tensor;
195 }
196 
BuildQuantizedImageTensor(const std::vector<quint8> & quantized_img)197 static Tensor BuildQuantizedImageTensor(
198     const std::vector<quint8>& quantized_img) {
199   LOG(INFO) << "Loading image finished.";
200   Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
201   CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
202   CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
203   LOG(INFO) << "Copy data to tensor.";
204   std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
205               img_tensor.TotalBytes());
206   return img_tensor;
207 }
208 
209 /* static */ RemoteFusedGraphExecuteInfo
BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(const GraphTransferInfo & graph_transfer_info)210 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
211     const GraphTransferInfo& graph_transfer_info) {
212   RemoteFusedGraphExecuteInfo execute_info;
213   execute_info.set_executor_name("build_hexagon_remote_fused_graph_executor");
214   for (const GraphTransferGraphInputNodeInfo& input :
215        graph_transfer_info.graph_input_node_info()) {
216     execute_info.add_graph_input_node_name(input.name());
217     RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
218         *execute_info.add_default_graph_input_tensor_shape();
219     tensor_shape_type.set_dtype(input.dtype());
220     TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
221     for (const int64 dim : input.shape()) {
222       tensor_shape_proto.add_dim()->set_size(dim);
223     }
224   }
225 
226   for (const GraphTransferGraphOutputNodeInfo& output :
227        graph_transfer_info.graph_output_node_info()) {
228     execute_info.add_graph_output_node_name(output.name());
229     RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
230         *execute_info.add_default_graph_output_tensor_shape();
231     tensor_shape_type.set_dtype(output.dtype());
232     TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
233     for (const int64 dim : output.shape()) {
234       tensor_shape_proto.add_dim()->set_size(dim);
235     }
236   }
237 
238   execute_info.set_serialized_executor_parameters(
239       graph_transfer_info.SerializeAsString());
240   return execute_info;
241 }
242 
RunInferenceByHexagonControlWrapper(const GraphTransferer & gt,const Tensor & img_tensor)243 static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
244                                                 const Tensor& img_tensor) {
245   const RemoteFusedGraphExecuteInfo execute_info =
246       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
247           gt.GetGraphTransferInfo());
248 
249   HexagonControlWrapper hexagon_control_wrapper;
250   // 1. Initialize hexagon
251   hexagon_control_wrapper.Init(execute_info);
252 
253   // 2. Setup graph in hexagon
254   hexagon_control_wrapper.SetupGraph();
255 
256   // 3. Fill input node's output
257   hexagon_control_wrapper.FillInputNode("Mul", img_tensor);
258 
259   // 4. Execute graph
260   const int64 start_time_us = Env::Default()->NowMicros();
261   for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
262     hexagon_control_wrapper.ExecuteGraph();
263   }
264   const int64 end_time_us = Env::Default()->NowMicros();
265 
266   // 5-1. Read output node's outputs
267   std::vector<ByteArray> outputs;
268   hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
269 
270   // 5-2. Dump results
271   DumpTop10Results(outputs);
272   CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
273   LOG(INFO) << "Average execution time = "
274             << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";
275 
276   // 6. Teardown graph in hexagon
277   hexagon_control_wrapper.TeardownGraph();
278 
279   // 7. Finalize hexagon
280   hexagon_control_wrapper.Finalize();
281 }
282 
RunFusedGraph(const GraphDef & fused_graph_def)283 static void RunFusedGraph(const GraphDef& fused_graph_def) {
284   // Setup input tensor
285   std::vector<float> img_floats;
286   LoadImage(&img_floats);
287 
288   LOG(INFO) << "Ioading image finished.";
289   const Tensor img_tensor = BuildImageTensor(img_floats);
290 
291   // Setup session
292   std::vector<Tensor> output_tensors;
293   SessionOptions session_options;
294   session_options.env = Env::Default();
295   std::unique_ptr<Session> session =
296       std::unique_ptr<Session>(NewSession(session_options));
297   TF_ASSERT_OK(session->Create(fused_graph_def));
298 
299   // Setup session arguments
300   RunOptions run_options;
301   run_options.set_trace_level(RunOptions::FULL_TRACE);
302   RunMetadata run_metadata;
303 
304   std::vector<std::pair<string, tensorflow::Tensor>> input_tensors;
305   input_tensors.emplace_back("Mul", img_tensor);
306   std::vector<string> output_node_names;
307   output_node_names.emplace_back(REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME);
308 
309   LOG(INFO) << "Run graph";
310   // Run inference with all node as output
311   TF_ASSERT_OK(session->Run(run_options, input_tensors, output_node_names, {},
312                             &output_tensors, &run_metadata));
313   ASSERT_EQ(1, output_tensors.size());
314   const Tensor& output_tensor = output_tensors.at(0);
315   LOG(INFO) << "Output byte size = " << output_tensor.TotalBytes();
316   LOG(INFO) << "Output shape = " << output_tensor.shape().DebugString();
317   DumpTop10Results(
318       output_tensor.TotalBytes(),
319       reinterpret_cast<const float*>(output_tensor.flat<float>().data()));
320 }
321 
CompareGraphTransferInfo(const GraphTransferInfo & gfi0,const GraphTransferInfo & gfi1)322 static void CompareGraphTransferInfo(const GraphTransferInfo& gfi0,
323                                      const GraphTransferInfo& gfi1) {
324   LOG(INFO) << "(1) node count: " << gfi1.node_info_size() << ", "
325             << gfi1.const_node_info_size();
326 
327   // 1. check node_info
328   ASSERT_EQ(gfi0.node_info_size(), gfi1.node_info_size());
329   for (int i = 0; i < gfi0.node_info_size(); ++i) {
330     const GraphTransferNodeInfo& ni0 = gfi0.node_info(i);
331     const GraphTransferNodeInfo& ni1 = gfi1.node_info(i);
332     EXPECT_EQ(ni0.DebugString(), ni1.DebugString());
333     EXPECT_EQ(ni0.ByteSizeLong(), ni1.ByteSizeLong());
334   }
335 
336   // 2. check const_node_info
337   ASSERT_EQ(gfi0.const_node_info_size(), gfi1.const_node_info_size());
338   for (int i = 0; i < gfi0.const_node_info_size(); ++i) {
339     const GraphTransferConstNodeInfo& cni0 = gfi0.const_node_info(i);
340     const GraphTransferConstNodeInfo& cni1 = gfi1.const_node_info(i);
341     ASSERT_EQ(cni0.shape_size(), cni1.shape_size());
342     for (int j = 0; j < cni0.shape_size(); ++j) {
343       EXPECT_EQ(cni0.shape(j), cni1.shape(j));
344     }
345     EXPECT_EQ(cni0.ByteSizeLong(), cni1.ByteSizeLong());
346     EXPECT_EQ(cni0.DebugString(), cni1.DebugString());
347   }
348 
349   // 3. check node_input_info
350   ASSERT_EQ(gfi0.node_input_info_size(), gfi1.node_input_info_size());
351   for (int i = 0; i < gfi0.node_input_info_size(); ++i) {
352     const GraphTransferNodeInputInfo& nii0 = gfi0.node_input_info(i);
353     const GraphTransferNodeInputInfo& nii1 = gfi1.node_input_info(i);
354     EXPECT_EQ(nii0.ByteSizeLong(), nii1.ByteSizeLong());
355     EXPECT_EQ(nii0.DebugString(), nii1.DebugString());
356   }
357 
358   // 4. check node_output_info
359   ASSERT_EQ(gfi0.node_output_info_size(), gfi1.node_output_info_size());
360   for (int i = 0; i < gfi0.node_output_info_size(); ++i) {
361     const GraphTransferNodeOutputInfo& noi0 = gfi0.node_output_info(i);
362     const GraphTransferNodeOutputInfo& noi1 = gfi1.node_output_info(i);
363     ASSERT_EQ(noi0.max_byte_size_size(), noi1.max_byte_size_size());
364     for (int j = 0; j < noi0.max_byte_size_size(); ++j) {
365       EXPECT_EQ(noi0.max_byte_size(j), noi1.max_byte_size(j));
366     }
367     EXPECT_EQ(noi0.ByteSizeLong(), noi1.ByteSizeLong());
368     EXPECT_EQ(noi0.DebugString(), noi1.DebugString());
369   }
370 
371   // 5. check graph_input_node_info
372   ASSERT_EQ(gfi0.graph_input_node_info_size(),
373             gfi1.graph_input_node_info_size());
374   for (int i = 0; i < gfi0.graph_input_node_info_size(); ++i) {
375     const GraphTransferGraphInputNodeInfo& gini0 =
376         gfi0.graph_input_node_info(i);
377     const GraphTransferGraphInputNodeInfo& gini1 =
378         gfi0.graph_input_node_info(i);
379     EXPECT_EQ(gini0.ByteSizeLong(), gini1.ByteSizeLong());
380     EXPECT_EQ(gini0.DebugString(), gini1.DebugString());
381   }
382 
383   // 6. check graph_output_node_info
384   ASSERT_EQ(gfi0.graph_output_node_info_size(),
385             gfi1.graph_output_node_info_size());
386   for (int i = 0; i < gfi0.graph_output_node_info_size(); ++i) {
387     const GraphTransferGraphOutputNodeInfo& goni0 =
388         gfi0.graph_output_node_info(i);
389     const GraphTransferGraphOutputNodeInfo& goni1 =
390         gfi0.graph_output_node_info(i);
391     EXPECT_EQ(goni0.ByteSizeLong(), goni1.ByteSizeLong());
392     EXPECT_EQ(goni0.DebugString(), goni1.DebugString());
393   }
394 }
395 
396 // CAVEAT: This test only runs when you specify hexagon library using
397 // makefile.
398 // CAVEAT: This test is disabled by default because hexagon can keep only
399 // two inception graphs on memory which are allocated by other two tests.
400 // Memory of these graphs are not released until process is killed right now.
401 // TODO(satok): Figure out how to release memory on hexagon without process
402 // termination.
403 #ifdef USE_HEXAGON_LIBS
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper)404 TEST(GraphTransferer,
405      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper) {
406   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
407   CheckHexagonControllerVersion();
408 
409   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
410       &HexagonOpsDefinitions::getInstance();
411   std::vector<std::pair<string, Tensor>> inputs;
412   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
413   std::vector<string> output_node_names = {"softmax"};
414 
415   GraphTransferer gt;
416   gt.EnableStrictCheckMode(false);
417   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
418   ClockCycleProfiler prof;
419   prof.Start();
420   Status status = gt.LoadGraphFromProtoFile(
421       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
422       false,  // is_text_proto
423       false,  // shape_inference_for_unknown_shape
424       true    // dry_run_for_unknown_shape
425   );
426   ASSERT_TRUE(status.ok()) << status;
427   prof.Stop();
428   prof.DumpStatistics("LoadGraphFromProtoFile");
429 
430   std::vector<float> img_floats;
431   LoadImage(&img_floats);
432   const Tensor img_tensor = BuildImageTensor(img_floats);
433   RunInferenceByHexagonControlWrapper(gt, img_tensor);
434 }
435 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput)436 TEST(GraphTransferer,
437      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
438   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
439             << "with quantized input";
440   CheckHexagonControllerVersion();
441 
442   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
443       &HexagonOpsDefinitions::getInstance();
444   std::vector<std::pair<string, Tensor>> inputs;
445   inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
446   std::vector<string> output_node_names = {"softmax"};
447 
448   GraphTransferer gt;
449   gt.EnableStrictCheckMode(false);
450   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
451   ClockCycleProfiler prof;
452   prof.Start();
453   Status status = gt.LoadGraphFromProtoFile(
454       *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
455       output_node_names,
456       /*is_text_proto=*/false,
457       /*shape_inference_for_unknown_shape=*/false,
458       /*dry_run_for_unknown_shape=*/true);
459   ASSERT_TRUE(status.ok()) << status;
460   prof.Stop();
461   prof.DumpStatistics("LoadGraphFromProtoFile");
462 
463   std::vector<float> img_floats;
464   LoadImage(&img_floats);
465   std::vector<quint8> quantized_img;
466   QuantizeImage(img_floats, &quantized_img);
467   const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
468   RunInferenceByHexagonControlWrapper(gt, img_tensor);
469 }
470 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference)471 TEST(GraphTransferer,
472      DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference) {
473   LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
474   CheckHexagonControllerVersion();
475 
476   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
477       &HexagonOpsDefinitions::getInstance();
478   std::vector<std::pair<string, Tensor>> inputs;
479   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
480   std::vector<string> output_node_names = {"softmax"};
481 
482   GraphTransferer gt;
483   gt.EnableStrictCheckMode(false);
484   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
485   ClockCycleProfiler prof;
486   prof.Start();
487   Status status = gt.LoadGraphFromProtoFile(
488       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
489       false,  // is_text_proto
490       true,   // shape_inference_for_unknown_shape
491       false   // dry_run_for_unknown_shape
492   );
493   ASSERT_TRUE(status.ok()) << status;
494   prof.Stop();
495   prof.DumpStatistics("LoadGraphFromProtoFile");
496 
497   std::vector<float> img_floats;
498   LoadImage(&img_floats);
499   const Tensor img_tensor = BuildImageTensor(img_floats);
500   RunInferenceByHexagonControlWrapper(gt, img_tensor);
501 }
502 
TEST(GraphTransferer,RunInceptionV3OnHexagonExampleWithTfRuntime)503 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {
504   LOG(INFO) << "Fuse and run inception v3 on hexagon with tf runtime";
505   CheckHexagonControllerVersion();
506 
507   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
508       &HexagonOpsDefinitions::getInstance();
509   std::vector<std::pair<string, Tensor>> inputs;
510   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
511   std::vector<string> outputs = {"softmax"};
512 
513   std::vector<float> img_floats;
514   LoadImage(&img_floats);
515 
516   LOG(INFO) << "Ioading image finished.";
517 
518   GraphDef graph_def;
519   Status status = ReadBinaryProto(Env::Default(), MODEL_FILENAME, &graph_def);
520 
521   ASSERT_TRUE(status.ok());
522 
523   LOG(INFO) << "Build fused graph";
524   GraphDef fused_graph_def = GraphTransferUtils::BuildFusedGraphDef(
525       HexagonOpsDefinitions::getInstance(),
526       REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME, inputs, outputs, &graph_def);
527 
528   RunFusedGraph(fused_graph_def);
529 }
530 
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph)531 TEST(GraphTransferer, DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph) {
532   LOG(INFO) << "Run inception v3 with fused graph";
533   CheckHexagonControllerVersion();
534 
535   GraphDef fused_graph_def;
536   Status status =
537       ReadBinaryProto(Env::Default(), FUSED_MODEL_FILENAME, &fused_graph_def);
538   RunFusedGraph(fused_graph_def);
539 }
540 
TEST(GraphTransferer,DISABLED_CheckShapeInferencePerformance)541 TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
542   CheckHexagonControllerVersion();
543   profile_utils::CpuUtils::EnableClockCycleProfiling(true);
544 
545   const IRemoteFusedGraphOpsDefinitions* ops_definitions =
546       &HexagonOpsDefinitions::getInstance();
547   std::vector<std::pair<string, Tensor>> inputs;
548   inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
549   std::vector<string> output_node_names = {"softmax"};
550 
551   RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info0;
552   GraphTransferer gt0;
553   gt0.EnableStrictCheckMode(false);
554   ClockCycleProfiler prof0;
555   prof0.Start();
556   Status status = gt0.LoadGraphFromProtoFile(
557       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
558       false,  // is_text_proto
559       false,  // shape_inference_for_unknown_shape
560       true    // dry_run_for_unknown_shape
561   );
562   const GraphTransferInfo& gfi0 = gt0.GetGraphTransferInfo();
563 
564   ASSERT_TRUE(status.ok());
565   prof0.Stop();
566   prof0.DumpStatistics("Estimate shape by dryrun");
567 
568   LOG(INFO) << "(0) node count: " << gfi0.node_info_size() << ", "
569             << gfi0.const_node_info_size();
570 
571   RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info1;
572   GraphTransferer gt1;
573   gt1.EnableStrictCheckMode(true);
574   ClockCycleProfiler prof1;
575   prof1.Start();
576   status = gt1.LoadGraphFromProtoFile(
577       *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
578       false,  // is_text_proto
579       true,   // shape_inference_for_unknown_shape
580       false   // dry_run_for_unknown_shape
581   );
582   const GraphTransferInfo& gfi1 = gt1.GetGraphTransferInfo();
583 
584   ASSERT_TRUE(status.ok());
585   prof1.Stop();
586   prof1.DumpStatistics("Estiame shape by shape inference");
587 
588   CompareGraphTransferInfo(gfi0, gfi1);
589 
590   const RemoteFusedGraphExecuteInfo ei0 =
591       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi0);
592   const RemoteFusedGraphExecuteInfo ei1 =
593       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi1);
594 
595   GraphTransferInfo rgfi0;
596   rgfi0.ParseFromString(ei0.serialized_executor_parameters());
597   GraphTransferInfo rgfi1;
598   rgfi1.ParseFromString(ei1.serialized_executor_parameters());
599 
600   CompareGraphTransferInfo(rgfi0, rgfi1);
601   CompareGraphTransferInfo(gfi0, rgfi0);
602   CompareGraphTransferInfo(gfi1, rgfi1);
603 }
604 #endif
605 
606 }  // namespace tensorflow
607