1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 /* Before calling this test program, download a model as follows.
16 $ curl
17 https://storage.googleapis.com/download.tensorflow.org/models/tensorflow_inception_v3_stripped_optimized_quantized.pb
18 \ -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb
19 $ adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \
20 /data/local/tmp
21 $ curl
22 https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt
23 -o /tmp/imagenet_comp_graph_label_strings.txt
24 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
25 */
26
27 // define EIGEN_USE_THREADS to include quantization_utils.h
28 #define EIGEN_USE_THREADS
29
30 #include <memory>
31
32 #include "absl/base/casts.h"
33 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
34 #include "tensorflow/core/framework/remote_fused_graph_execute_info.pb.h"
35 #include "tensorflow/core/framework/tensor_shape.pb.h"
36 #include "tensorflow/core/framework/tensor_testutil.h"
37 #include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h"
38 #include "tensorflow/core/kernels/hexagon/graph_transferer.h"
39 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
40 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
41 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
42 #include "tensorflow/core/kernels/i_remote_fused_graph_ops_definitions.h"
43 #include "tensorflow/core/kernels/quantization_utils.h"
44 #include "tensorflow/core/lib/core/status.h"
45 #include "tensorflow/core/lib/core/status_test_util.h"
46 #include "tensorflow/core/lib/io/path.h"
47 #include "tensorflow/core/lib/strings/str_util.h"
48 #include "tensorflow/core/platform/env.h"
49 #include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
50 #include "tensorflow/core/platform/test.h"
51 #include "tensorflow/core/public/session.h"
52 #include "tensorflow/core/public/session_options.h"
53
54 namespace tensorflow {
55
56 using ByteArray = HexagonControlWrapper::ByteArray;
57
58 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
59 constexpr const char* const MODEL_FILENAME =
60 "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
61 constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
62 "/data/local/tmp/"
63 "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
64 "pb";
65 constexpr const char* const FUSED_MODEL_FILENAME =
66 "/data/local/tmp/"
67 "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
68 constexpr const char* const REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME =
69 "remote_fused_graph_execute_node";
70 constexpr bool USE_SHAPE_INFERENCE = false;
71
72 const bool DBG_DUMP_FLOAT_DATA = false;
73 const int WIDTH = 299;
74 const int HEIGHT = 299;
75 const int DEPTH = 3;
76 const int EXPECTED_FIRST_RESULT_ID = 59;
77 const int EXECUTION_REPEAT_COUNT = 10;
78
CheckHexagonControllerVersion()79 static void CheckHexagonControllerVersion() {
80 HexagonControlWrapper hexagon_control_wrapper;
81 const int version = hexagon_control_wrapper.GetVersion();
82 ASSERT_GE(version, 1);
83 LOG(INFO) << "Hexagon controller version is " << version;
84 }
85
DumpTop10Results(const int byte_size,const float * const float_array)86 static void DumpTop10Results(const int byte_size,
87 const float* const float_array) {
88 const int element_count = byte_size / sizeof(float);
89 const string label_filename =
90 "/data/local/tmp/imagenet_comp_graph_label_strings.txt";
91 string label_str;
92 TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str));
93 std::vector<string> labels = str_util::Split(label_str, '\n');
94 GraphTransferUtils::DumpTopNFloatResults(
95 float_array, labels.data(),
96 std::min(element_count, static_cast<int>(labels.size())),
97 10 /* show top_n results */);
98 }
99
DumpTop10Results(const std::vector<ByteArray> & outputs)100 static void DumpTop10Results(const std::vector<ByteArray>& outputs) {
101 CHECK(outputs.size() == 1);
102 const int byte_size = std::get<1>(outputs.at(0));
103 const float* float_array =
104 reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
105 DumpTop10Results(byte_size, float_array);
106 }
107
CheckFirstResult(const std::vector<ByteArray> & outputs,const int expected_first_id)108 static void CheckFirstResult(const std::vector<ByteArray>& outputs,
109 const int expected_first_id) {
110 EXPECT_GE(outputs.size(), 1);
111 const int byte_size = std::get<1>(outputs.at(0));
112 const int element_count = byte_size / sizeof(float);
113 const float* float_array =
114 reinterpret_cast<float*>(std::get<0>(outputs.at(0)));
115 EXPECT_GE(element_count, 1);
116 std::vector<string> labels(element_count);
117 std::priority_queue<std::tuple<float, int, string>> queue =
118 GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(),
119 element_count);
120 const std::tuple<float, int, string>& entry = queue.top();
121 EXPECT_EQ(expected_first_id, std::get<1>(entry));
122 }
123
LoadImage(std::vector<float> * img_floats_ptr)124 static void LoadImage(std::vector<float>* img_floats_ptr) {
125 CHECK(img_floats_ptr != nullptr);
126 std::vector<float>& img_floats = *img_floats_ptr;
127 // Read the data from the bitmap file into memory
128 string bmp;
129 TF_CHECK_OK(ReadFileToString(Env::Default(), IMAGE_FILENAME, &bmp));
130 const int fsize = bmp.size();
131 LOG(INFO) << "Read " << IMAGE_FILENAME << ", size = " << fsize << "bytes";
132 const int64 pixel_count = WIDTH * HEIGHT * DEPTH;
133 CHECK(fsize >= 22 /* pos of height */ + sizeof(int));
134 CHECK(bmp.data() != nullptr);
135 uint8* const img_bytes = absl::bit_cast<uint8*>(bmp.data());
136 const int header_size = *(reinterpret_cast<int*>(img_bytes + 10));
137 LOG(INFO) << "header size = " << header_size;
138 const int size = *(reinterpret_cast<int*>(img_bytes + 14));
139 LOG(INFO) << "image size = " << size;
140 const int width = *(reinterpret_cast<int*>(img_bytes + 18));
141 LOG(INFO) << "width = " << width;
142 const int height = *(reinterpret_cast<int*>(img_bytes + 22));
143 LOG(INFO) << "height = " << height;
144 CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size);
145
146 uint8* const bmp_pixels = &img_bytes[header_size];
147
148 img_floats.resize(pixel_count);
149 int src_pixel_index = 0;
150 CHECK(pixel_count % 3 == 0);
151 for (int i = 0; i < pixel_count / 3; ++i) {
152 const int src_pos = 3 * src_pixel_index;
153 const int dst_pos = 3 * i;
154 ++src_pixel_index;
155 CHECK(src_pos + 2 + header_size < fsize);
156 CHECK(dst_pos + 2 < pixel_count);
157 // Convert (B, G, R) in bitmap to (R, G, B)
158 img_floats[dst_pos] =
159 (static_cast<float>(bmp_pixels[src_pos + 2]) - 128.0f) / 128.0f;
160 img_floats[dst_pos + 1] =
161 (static_cast<float>(bmp_pixels[src_pos + 1]) - 128.0f) / 128.0f;
162 img_floats[dst_pos + 2] =
163 (static_cast<float>(bmp_pixels[src_pos]) - 128.0f) / 128.0f;
164 if (DBG_DUMP_FLOAT_DATA) {
165 LOG(INFO) << i << " (" << img_floats[dst_pos] << ", "
166 << img_floats[dst_pos + 1] << ", " << img_floats[dst_pos + 2]
167 << ") (" << static_cast<int>(bmp_pixels[src_pos + 2]) << ", "
168 << static_cast<int>(bmp_pixels[src_pos + 1]) << ", "
169 << static_cast<int>(bmp_pixels[src_pos]) << ")";
170 }
171 if (src_pixel_index % (WIDTH + 1) == (WIDTH - 1)) {
172 // skip bmp padding
173 ++src_pixel_index;
174 }
175 }
176 }
177
QuantizeImage(const std::vector<float> & float_vec,std::vector<quint8> * quint8_vec)178 static void QuantizeImage(const std::vector<float>& float_vec,
179 std::vector<quint8>* quint8_vec) {
180 quint8_vec->resize(float_vec.size());
181 for (int i = 0; i < float_vec.size(); ++i) {
182 quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
183 }
184 }
185
BuildImageTensor(const std::vector<float> & img_floats)186 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
187 LOG(INFO) << "Loading image finished.";
188 Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
189 CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
190 CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
191 LOG(INFO) << "Copy data to tensor.";
192 std::memcpy(img_tensor.flat<float>().data(), img_floats.data(),
193 img_tensor.TotalBytes());
194 return img_tensor;
195 }
196
BuildQuantizedImageTensor(const std::vector<quint8> & quantized_img)197 static Tensor BuildQuantizedImageTensor(
198 const std::vector<quint8>& quantized_img) {
199 LOG(INFO) << "Loading image finished.";
200 Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
201 CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
202 CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
203 LOG(INFO) << "Copy data to tensor.";
204 std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
205 img_tensor.TotalBytes());
206 return img_tensor;
207 }
208
209 /* static */ RemoteFusedGraphExecuteInfo
BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(const GraphTransferInfo & graph_transfer_info)210 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
211 const GraphTransferInfo& graph_transfer_info) {
212 RemoteFusedGraphExecuteInfo execute_info;
213 execute_info.set_executor_name("build_hexagon_remote_fused_graph_executor");
214 for (const GraphTransferGraphInputNodeInfo& input :
215 graph_transfer_info.graph_input_node_info()) {
216 execute_info.add_graph_input_node_name(input.name());
217 RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
218 *execute_info.add_default_graph_input_tensor_shape();
219 tensor_shape_type.set_dtype(input.dtype());
220 TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
221 for (const int64 dim : input.shape()) {
222 tensor_shape_proto.add_dim()->set_size(dim);
223 }
224 }
225
226 for (const GraphTransferGraphOutputNodeInfo& output :
227 graph_transfer_info.graph_output_node_info()) {
228 execute_info.add_graph_output_node_name(output.name());
229 RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& tensor_shape_type =
230 *execute_info.add_default_graph_output_tensor_shape();
231 tensor_shape_type.set_dtype(output.dtype());
232 TensorShapeProto& tensor_shape_proto = *tensor_shape_type.mutable_shape();
233 for (const int64 dim : output.shape()) {
234 tensor_shape_proto.add_dim()->set_size(dim);
235 }
236 }
237
238 execute_info.set_serialized_executor_parameters(
239 graph_transfer_info.SerializeAsString());
240 return execute_info;
241 }
242
RunInferenceByHexagonControlWrapper(const GraphTransferer & gt,const Tensor & img_tensor)243 static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
244 const Tensor& img_tensor) {
245 const RemoteFusedGraphExecuteInfo execute_info =
246 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
247 gt.GetGraphTransferInfo());
248
249 HexagonControlWrapper hexagon_control_wrapper;
250 // 1. Initialize hexagon
251 hexagon_control_wrapper.Init(execute_info);
252
253 // 2. Setup graph in hexagon
254 hexagon_control_wrapper.SetupGraph();
255
256 // 3. Fill input node's output
257 hexagon_control_wrapper.FillInputNode("Mul", img_tensor);
258
259 // 4. Execute graph
260 const int64 start_time_us = Env::Default()->NowMicros();
261 for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
262 hexagon_control_wrapper.ExecuteGraph();
263 }
264 const int64 end_time_us = Env::Default()->NowMicros();
265
266 // 5-1. Read output node's outputs
267 std::vector<ByteArray> outputs;
268 hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
269
270 // 5-2. Dump results
271 DumpTop10Results(outputs);
272 CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
273 LOG(INFO) << "Average execution time = "
274 << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";
275
276 // 6. Teardown graph in hexagon
277 hexagon_control_wrapper.TeardownGraph();
278
279 // 7. Finalize hexagon
280 hexagon_control_wrapper.Finalize();
281 }
282
RunFusedGraph(const GraphDef & fused_graph_def)283 static void RunFusedGraph(const GraphDef& fused_graph_def) {
284 // Setup input tensor
285 std::vector<float> img_floats;
286 LoadImage(&img_floats);
287
288 LOG(INFO) << "Ioading image finished.";
289 const Tensor img_tensor = BuildImageTensor(img_floats);
290
291 // Setup session
292 std::vector<Tensor> output_tensors;
293 SessionOptions session_options;
294 session_options.env = Env::Default();
295 std::unique_ptr<Session> session =
296 std::unique_ptr<Session>(NewSession(session_options));
297 TF_ASSERT_OK(session->Create(fused_graph_def));
298
299 // Setup session arguments
300 RunOptions run_options;
301 run_options.set_trace_level(RunOptions::FULL_TRACE);
302 RunMetadata run_metadata;
303
304 std::vector<std::pair<string, tensorflow::Tensor>> input_tensors;
305 input_tensors.emplace_back("Mul", img_tensor);
306 std::vector<string> output_node_names;
307 output_node_names.emplace_back(REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME);
308
309 LOG(INFO) << "Run graph";
310 // Run inference with all node as output
311 TF_ASSERT_OK(session->Run(run_options, input_tensors, output_node_names, {},
312 &output_tensors, &run_metadata));
313 ASSERT_EQ(1, output_tensors.size());
314 const Tensor& output_tensor = output_tensors.at(0);
315 LOG(INFO) << "Output byte size = " << output_tensor.TotalBytes();
316 LOG(INFO) << "Output shape = " << output_tensor.shape().DebugString();
317 DumpTop10Results(
318 output_tensor.TotalBytes(),
319 reinterpret_cast<const float*>(output_tensor.flat<float>().data()));
320 }
321
CompareGraphTransferInfo(const GraphTransferInfo & gfi0,const GraphTransferInfo & gfi1)322 static void CompareGraphTransferInfo(const GraphTransferInfo& gfi0,
323 const GraphTransferInfo& gfi1) {
324 LOG(INFO) << "(1) node count: " << gfi1.node_info_size() << ", "
325 << gfi1.const_node_info_size();
326
327 // 1. check node_info
328 ASSERT_EQ(gfi0.node_info_size(), gfi1.node_info_size());
329 for (int i = 0; i < gfi0.node_info_size(); ++i) {
330 const GraphTransferNodeInfo& ni0 = gfi0.node_info(i);
331 const GraphTransferNodeInfo& ni1 = gfi1.node_info(i);
332 EXPECT_EQ(ni0.DebugString(), ni1.DebugString());
333 EXPECT_EQ(ni0.ByteSizeLong(), ni1.ByteSizeLong());
334 }
335
336 // 2. check const_node_info
337 ASSERT_EQ(gfi0.const_node_info_size(), gfi1.const_node_info_size());
338 for (int i = 0; i < gfi0.const_node_info_size(); ++i) {
339 const GraphTransferConstNodeInfo& cni0 = gfi0.const_node_info(i);
340 const GraphTransferConstNodeInfo& cni1 = gfi1.const_node_info(i);
341 ASSERT_EQ(cni0.shape_size(), cni1.shape_size());
342 for (int j = 0; j < cni0.shape_size(); ++j) {
343 EXPECT_EQ(cni0.shape(j), cni1.shape(j));
344 }
345 EXPECT_EQ(cni0.ByteSizeLong(), cni1.ByteSizeLong());
346 EXPECT_EQ(cni0.DebugString(), cni1.DebugString());
347 }
348
349 // 3. check node_input_info
350 ASSERT_EQ(gfi0.node_input_info_size(), gfi1.node_input_info_size());
351 for (int i = 0; i < gfi0.node_input_info_size(); ++i) {
352 const GraphTransferNodeInputInfo& nii0 = gfi0.node_input_info(i);
353 const GraphTransferNodeInputInfo& nii1 = gfi1.node_input_info(i);
354 EXPECT_EQ(nii0.ByteSizeLong(), nii1.ByteSizeLong());
355 EXPECT_EQ(nii0.DebugString(), nii1.DebugString());
356 }
357
358 // 4. check node_output_info
359 ASSERT_EQ(gfi0.node_output_info_size(), gfi1.node_output_info_size());
360 for (int i = 0; i < gfi0.node_output_info_size(); ++i) {
361 const GraphTransferNodeOutputInfo& noi0 = gfi0.node_output_info(i);
362 const GraphTransferNodeOutputInfo& noi1 = gfi1.node_output_info(i);
363 ASSERT_EQ(noi0.max_byte_size_size(), noi1.max_byte_size_size());
364 for (int j = 0; j < noi0.max_byte_size_size(); ++j) {
365 EXPECT_EQ(noi0.max_byte_size(j), noi1.max_byte_size(j));
366 }
367 EXPECT_EQ(noi0.ByteSizeLong(), noi1.ByteSizeLong());
368 EXPECT_EQ(noi0.DebugString(), noi1.DebugString());
369 }
370
371 // 5. check graph_input_node_info
372 ASSERT_EQ(gfi0.graph_input_node_info_size(),
373 gfi1.graph_input_node_info_size());
374 for (int i = 0; i < gfi0.graph_input_node_info_size(); ++i) {
375 const GraphTransferGraphInputNodeInfo& gini0 =
376 gfi0.graph_input_node_info(i);
377 const GraphTransferGraphInputNodeInfo& gini1 =
378 gfi0.graph_input_node_info(i);
379 EXPECT_EQ(gini0.ByteSizeLong(), gini1.ByteSizeLong());
380 EXPECT_EQ(gini0.DebugString(), gini1.DebugString());
381 }
382
383 // 6. check graph_output_node_info
384 ASSERT_EQ(gfi0.graph_output_node_info_size(),
385 gfi1.graph_output_node_info_size());
386 for (int i = 0; i < gfi0.graph_output_node_info_size(); ++i) {
387 const GraphTransferGraphOutputNodeInfo& goni0 =
388 gfi0.graph_output_node_info(i);
389 const GraphTransferGraphOutputNodeInfo& goni1 =
390 gfi0.graph_output_node_info(i);
391 EXPECT_EQ(goni0.ByteSizeLong(), goni1.ByteSizeLong());
392 EXPECT_EQ(goni0.DebugString(), goni1.DebugString());
393 }
394 }
395
396 // CAVEAT: This test only runs when you specify hexagon library using
397 // makefile.
398 // CAVEAT: This test is disabled by default because hexagon can keep only
399 // two inception graphs on memory which are allocated by other two tests.
400 // Memory of these graphs are not released until process is killed right now.
401 // TODO(satok): Figure out how to release memory on hexagon without process
402 // termination.
403 #ifdef USE_HEXAGON_LIBS
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper)404 TEST(GraphTransferer,
405 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapper) {
406 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
407 CheckHexagonControllerVersion();
408
409 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
410 &HexagonOpsDefinitions::getInstance();
411 std::vector<std::pair<string, Tensor>> inputs;
412 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
413 std::vector<string> output_node_names = {"softmax"};
414
415 GraphTransferer gt;
416 gt.EnableStrictCheckMode(false);
417 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
418 ClockCycleProfiler prof;
419 prof.Start();
420 Status status = gt.LoadGraphFromProtoFile(
421 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
422 false, // is_text_proto
423 false, // shape_inference_for_unknown_shape
424 true // dry_run_for_unknown_shape
425 );
426 ASSERT_TRUE(status.ok()) << status;
427 prof.Stop();
428 prof.DumpStatistics("LoadGraphFromProtoFile");
429
430 std::vector<float> img_floats;
431 LoadImage(&img_floats);
432 const Tensor img_tensor = BuildImageTensor(img_floats);
433 RunInferenceByHexagonControlWrapper(gt, img_tensor);
434 }
435
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput)436 TEST(GraphTransferer,
437 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
438 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
439 << "with quantized input";
440 CheckHexagonControllerVersion();
441
442 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
443 &HexagonOpsDefinitions::getInstance();
444 std::vector<std::pair<string, Tensor>> inputs;
445 inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
446 std::vector<string> output_node_names = {"softmax"};
447
448 GraphTransferer gt;
449 gt.EnableStrictCheckMode(false);
450 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
451 ClockCycleProfiler prof;
452 prof.Start();
453 Status status = gt.LoadGraphFromProtoFile(
454 *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
455 output_node_names,
456 /*is_text_proto=*/false,
457 /*shape_inference_for_unknown_shape=*/false,
458 /*dry_run_for_unknown_shape=*/true);
459 ASSERT_TRUE(status.ok()) << status;
460 prof.Stop();
461 prof.DumpStatistics("LoadGraphFromProtoFile");
462
463 std::vector<float> img_floats;
464 LoadImage(&img_floats);
465 std::vector<quint8> quantized_img;
466 QuantizeImage(img_floats, &quantized_img);
467 const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
468 RunInferenceByHexagonControlWrapper(gt, img_tensor);
469 }
470
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference)471 TEST(GraphTransferer,
472 DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperShapeInference) {
473 LOG(INFO) << "Run inception v3 on hexagon with hexagon controller";
474 CheckHexagonControllerVersion();
475
476 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
477 &HexagonOpsDefinitions::getInstance();
478 std::vector<std::pair<string, Tensor>> inputs;
479 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
480 std::vector<string> output_node_names = {"softmax"};
481
482 GraphTransferer gt;
483 gt.EnableStrictCheckMode(false);
484 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
485 ClockCycleProfiler prof;
486 prof.Start();
487 Status status = gt.LoadGraphFromProtoFile(
488 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
489 false, // is_text_proto
490 true, // shape_inference_for_unknown_shape
491 false // dry_run_for_unknown_shape
492 );
493 ASSERT_TRUE(status.ok()) << status;
494 prof.Stop();
495 prof.DumpStatistics("LoadGraphFromProtoFile");
496
497 std::vector<float> img_floats;
498 LoadImage(&img_floats);
499 const Tensor img_tensor = BuildImageTensor(img_floats);
500 RunInferenceByHexagonControlWrapper(gt, img_tensor);
501 }
502
TEST(GraphTransferer,RunInceptionV3OnHexagonExampleWithTfRuntime)503 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {
504 LOG(INFO) << "Fuse and run inception v3 on hexagon with tf runtime";
505 CheckHexagonControllerVersion();
506
507 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
508 &HexagonOpsDefinitions::getInstance();
509 std::vector<std::pair<string, Tensor>> inputs;
510 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
511 std::vector<string> outputs = {"softmax"};
512
513 std::vector<float> img_floats;
514 LoadImage(&img_floats);
515
516 LOG(INFO) << "Ioading image finished.";
517
518 GraphDef graph_def;
519 Status status = ReadBinaryProto(Env::Default(), MODEL_FILENAME, &graph_def);
520
521 ASSERT_TRUE(status.ok());
522
523 LOG(INFO) << "Build fused graph";
524 GraphDef fused_graph_def = GraphTransferUtils::BuildFusedGraphDef(
525 HexagonOpsDefinitions::getInstance(),
526 REMOTE_FUSED_GRAPH_EXECUTE_NODE_NAME, inputs, outputs, &graph_def);
527
528 RunFusedGraph(fused_graph_def);
529 }
530
TEST(GraphTransferer,DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph)531 TEST(GraphTransferer, DISABLED_RunInceptionV3OnHexagonExampleWithFusedGraph) {
532 LOG(INFO) << "Run inception v3 with fused graph";
533 CheckHexagonControllerVersion();
534
535 GraphDef fused_graph_def;
536 Status status =
537 ReadBinaryProto(Env::Default(), FUSED_MODEL_FILENAME, &fused_graph_def);
538 RunFusedGraph(fused_graph_def);
539 }
540
TEST(GraphTransferer,DISABLED_CheckShapeInferencePerformance)541 TEST(GraphTransferer, DISABLED_CheckShapeInferencePerformance) {
542 CheckHexagonControllerVersion();
543 profile_utils::CpuUtils::EnableClockCycleProfiling(true);
544
545 const IRemoteFusedGraphOpsDefinitions* ops_definitions =
546 &HexagonOpsDefinitions::getInstance();
547 std::vector<std::pair<string, Tensor>> inputs;
548 inputs.emplace_back("Mul", Tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH}));
549 std::vector<string> output_node_names = {"softmax"};
550
551 RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info0;
552 GraphTransferer gt0;
553 gt0.EnableStrictCheckMode(false);
554 ClockCycleProfiler prof0;
555 prof0.Start();
556 Status status = gt0.LoadGraphFromProtoFile(
557 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
558 false, // is_text_proto
559 false, // shape_inference_for_unknown_shape
560 true // dry_run_for_unknown_shape
561 );
562 const GraphTransferInfo& gfi0 = gt0.GetGraphTransferInfo();
563
564 ASSERT_TRUE(status.ok());
565 prof0.Stop();
566 prof0.DumpStatistics("Estimate shape by dryrun");
567
568 LOG(INFO) << "(0) node count: " << gfi0.node_info_size() << ", "
569 << gfi0.const_node_info_size();
570
571 RemoteFusedGraphExecuteUtils::TensorShapeMap output_tensor_info1;
572 GraphTransferer gt1;
573 gt1.EnableStrictCheckMode(true);
574 ClockCycleProfiler prof1;
575 prof1.Start();
576 status = gt1.LoadGraphFromProtoFile(
577 *ops_definitions, MODEL_FILENAME, inputs, output_node_names,
578 false, // is_text_proto
579 true, // shape_inference_for_unknown_shape
580 false // dry_run_for_unknown_shape
581 );
582 const GraphTransferInfo& gfi1 = gt1.GetGraphTransferInfo();
583
584 ASSERT_TRUE(status.ok());
585 prof1.Stop();
586 prof1.DumpStatistics("Estiame shape by shape inference");
587
588 CompareGraphTransferInfo(gfi0, gfi1);
589
590 const RemoteFusedGraphExecuteInfo ei0 =
591 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi0);
592 const RemoteFusedGraphExecuteInfo ei1 =
593 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(gfi1);
594
595 GraphTransferInfo rgfi0;
596 rgfi0.ParseFromString(ei0.serialized_executor_parameters());
597 GraphTransferInfo rgfi1;
598 rgfi1.ParseFromString(ei1.serialized_executor_parameters());
599
600 CompareGraphTransferInfo(rgfi0, rgfi1);
601 CompareGraphTransferInfo(gfi0, rgfi0);
602 CompareGraphTransferInfo(gfi1, rgfi1);
603 }
604 #endif
605
606 } // namespace tensorflow
607