1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/kernels/hexagon/graph_transferer.h"
17
18 #include <algorithm>
19 #include <cinttypes>
20
21 #include "tensorflow/core/common_runtime/graph_constructor.h"
22 #include "tensorflow/core/framework/graph.pb.h"
23 #include "tensorflow/core/framework/graph_transfer_info.pb.h"
24 #include "tensorflow/core/framework/op.h"
25 #include "tensorflow/core/graph/algorithm.h"
26 #include "tensorflow/core/graph/node_builder.h"
27 #include "tensorflow/core/platform/env.h"
28 #include "tensorflow/core/platform/types.h"
29 #include "tensorflow/core/public/session.h"
30 #include "tensorflow/core/public/session_options.h"
31 #include "tensorflow/core/util/tensor_slice_writer.h"
32
33 namespace tensorflow {
34
35 // function alias
36 constexpr auto AddOutputTensorShapeTypeByTensorShapeMap =
37 &RemoteFusedGraphExecuteUtils::AddOutputTensorShapeTypeByTensorShapeMap;
38
39 constexpr bool DBG_DUMP_VERIFICATION_STRING = false;
40 constexpr bool DBG_DUMP_PARAMS = false;
41
42 const char RESHAPE_NODE_TYPE_STRING[] = "Reshape";
43 const char SOURCE_NODE_NAME[] = "_SOURCE";
44 const char SINK_NODE_NAME[] = "_SINK";
45 const char INPUTS_NODE_PREFIX[] = "inputs_for_";
46 const char OUTPUTS_NODE_PREFIX[] = "outputs_for_";
47 const char DATA_NODE_PREFIX[] = "data_for_op_";
48 const char CONST_SHAPE_PREFIX[] = "const_shape_";
49 const char CONST_VAL_PREFIX[] = "const_val_";
50 const char CONST_TENSOR_PREFIX[] = "const_tensor_";
51 const char PADDING_ATTR_NAME[] = "padding";
52 const char STRIDES_ATTR_NAME[] = "strides";
53 const char KEEP_DIMS_ATTR_NAME[] = "keep_dims";
54 const char KSIZE_ATTR_NAME[] = "ksize";
55 const char NULL_OUTPUT_NAME[] = "NULL";
56 const char AGGREGATED_INPUT_NODE_NAME[] = "graph_transfer_aggregated_input";
57 const int PADDING_NA_ID = 0; // VALID = 1, SAME = 2
58
59 // This is a temporary workaround to support android build
60 // where std::string is not supported even with c++11 option.
61 template <typename T>
ToString(T val)62 static string ToString(T val) {
63 std::stringstream stream;
64 stream << val;
65 return stream.str();
66 }
67
FindMutableNodeByName(const string & name,Graph * graph)68 static Node* FindMutableNodeByName(const string& name, Graph* graph) {
69 const TensorId tid = ParseTensorName(name);
70 for (Node* node : graph->nodes()) {
71 if (node != nullptr && node->name() == tid.first) {
72 return node;
73 }
74 }
75 return nullptr;
76 }
77
GraphTransferer()78 GraphTransferer::GraphTransferer() {
79 graph_transfer_info_ = new GraphTransferInfo();
80 }
81
~GraphTransferer()82 GraphTransferer::~GraphTransferer() { delete graph_transfer_info_; }
83
84 /**
85 * graph loading functions
86 * - LoadGraphFromProto
87 * - LoadGraphFromProptoFile
88 * These functions read a graph definition and store parameters
89 * of node to transfer the graph to SOC.
90 */
LoadGraphFromProto(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const GraphDef & graph_def,const std::vector<std::pair<string,Tensor>> & input_node_info_list,const std::vector<string> & output_node_names,const bool shape_inference_for_unknown_shape)91 Status GraphTransferer::LoadGraphFromProto(
92 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
93 const GraphDef& graph_def,
94 const std::vector<std::pair<string, Tensor>>& input_node_info_list,
95 const std::vector<string>& output_node_names,
96 const bool shape_inference_for_unknown_shape) {
97 Graph graph(OpRegistry::Global());
98 ShapeRefiner shape_refiner(graph.versions(), graph.op_registry());
99 Status status = ImportGraphDef({}, graph_def, &graph, &shape_refiner);
100 if (!status.ok()) {
101 return status;
102 }
103
104 if (shape_inference_for_unknown_shape) {
105 status = RemoteFusedGraphExecuteUtils::PropagateShapeInference(
106 graph_def, input_node_info_list, &graph, &shape_refiner);
107 if (!status.ok()) {
108 return status;
109 }
110 }
111
112 TF_RETURN_IF_ERROR(TransformGraphToAddAggregatedInputNode(
113 input_node_info_list, &graph, &shape_refiner));
114
115 std::unordered_multimap<string, const Node*> op_name_to_node_multimap(
116 graph.num_nodes());
117 for (const Node* const node : graph.nodes()) {
118 if (node == nullptr) {
119 continue;
120 }
121 CacheNode(*node);
122 }
123
124 for (const Node* const node : graph.nodes()) {
125 if (node == nullptr) {
126 continue;
127 }
128 VLOG(1) << "<Node> " << node->name();
129 for (const Node* const input_node : node->in_nodes()) {
130 const string& name = input_node->name();
131 op_name_to_node_multimap.emplace(name, node);
132 VLOG(1) << "Add dependency: " << name << " -> " << node->name();
133 }
134 }
135
136 for (const Node* const node : graph.nodes()) {
137 if (node == nullptr) {
138 continue;
139 }
140 status = RegisterNodeIfAllInputsAreCached(
141 ops_definitions, shape_refiner, *node, false, input_node_info_list,
142 output_node_names);
143 if (!status.ok()) {
144 LOG(ERROR) << "Failed to transfer graph " << status;
145 return status;
146 }
147 }
148
149 SortParams(output_node_names);
150
151 for (const std::pair<string, Tensor>& input_node_info :
152 input_node_info_list) {
153 GraphTransferGraphInputNodeInfo& graph_input_node_info =
154 *graph_transfer_info_->add_graph_input_node_info();
155 graph_input_node_info.set_name(input_node_info.first);
156 graph_input_node_info.set_dtype(input_node_info.second.dtype());
157 for (const int64 dim : ToTensorShapeArray(input_node_info.second.shape())) {
158 graph_input_node_info.add_shape(dim);
159 }
160 }
161
162 for (const string& output_node_name : output_node_names) {
163 const TensorId tid = ParseTensorName(output_node_name);
164 const string node_name(tid.first);
165 const int port = tid.second;
166 const int node_id = node_name_to_id_cache_map_.at(node_name);
167 const Node* node = node_name_cache_list_.at(node_id);
168 CHECK_NOTNULL(node);
169
170 GraphTransferGraphOutputNodeInfo& graph_output_node_info =
171 *graph_transfer_info_->add_graph_output_node_info();
172 graph_output_node_info.set_name(strings::StrCat(node_name, ":", port));
173
174 // Get output tensor shape type
175 std::vector<DataType> data_types;
176 std::vector<TensorShape> shapes;
177 status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
178 node->attrs(), &data_types, &shapes);
179 if (status.ok()) {
180 CHECK(data_types.size() > port);
181 graph_output_node_info.set_dtype(data_types.at(port));
182 for (const int64 dim : ToTensorShapeArray(shapes.at(port))) {
183 graph_output_node_info.add_shape(dim);
184 }
185 }
186 }
187
188 ClearCache();
189 if (DBG_DUMP_PARAMS) {
190 DumpNodeTransferParams();
191 }
192 if (DBG_DUMP_VERIFICATION_STRING) {
193 DumpVerificationStringOfNodeTransferParams();
194 }
195 return Status();
196 }
197
LoadGraphFromProtoFile(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const string & graph_def_path,const std::vector<std::pair<string,Tensor>> & input_node_info_list,const std::vector<string> & output_node_names,const bool is_text_proto,const bool shape_inference_for_unknown_shape,const bool dry_run_for_unknown_shape)198 Status GraphTransferer::LoadGraphFromProtoFile(
199 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
200 const string& graph_def_path,
201 const std::vector<std::pair<string, Tensor>>& input_node_info_list,
202 const std::vector<string>& output_node_names, const bool is_text_proto,
203 const bool shape_inference_for_unknown_shape,
204 const bool dry_run_for_unknown_shape) {
205 GraphDef graph_def;
206 string output;
207 Status status;
208 VLOG(1) << "Parse file " << graph_def_path;
209 if (is_text_proto) {
210 status = ReadFileToString(Env::Default(), graph_def_path, &output);
211 if (!protobuf::TextFormat::ParseFromString(output, &graph_def)) {
212 return errors::InvalidArgument("Cannot parse proto string.");
213 }
214 } else {
215 status = ReadBinaryProto(Env::Default(), graph_def_path, &graph_def);
216 }
217 if (!status.ok()) {
218 VLOG(1) << "Failed to load graph " << status;
219 return status;
220 }
221 if (dry_run_for_unknown_shape) {
222 VLOG(1) << "Dry run graph to obtain shape of nodes";
223 RemoteFusedGraphExecuteUtils::TensorShapeMap tensor_shape_map;
224 status = RemoteFusedGraphExecuteUtils::DryRunInferenceForAllNode(
225 graph_def, input_node_info_list, true, &tensor_shape_map);
226 if (!status.ok()) {
227 return status;
228 }
229 for (NodeDef& node_def : *graph_def.mutable_node()) {
230 TF_CHECK_OK(AddOutputTensorShapeTypeByTensorShapeMap(tensor_shape_map,
231 &node_def));
232 }
233 }
234 VLOG(1) << "Load graph with output tensors";
235 return LoadGraphFromProto(ops_definitions, graph_def, input_node_info_list,
236 output_node_names,
237 shape_inference_for_unknown_shape);
238 }
239
SortParams(const std::vector<string> & output_node_names)240 void GraphTransferer::SortParams(const std::vector<string>& output_node_names) {
241 // TODO(satok): optimize complexity
242 std::unordered_map<int, GraphTransferNodeInputInfo*> input_map;
243 for (GraphTransferNodeInputInfo& input :
244 *graph_transfer_info_->mutable_node_input_info()) {
245 input_map.emplace(input.node_id(), &input);
246 }
247
248 // Setup dependency map placeholder
249 std::vector<int> output_node_ids;
250 std::unordered_map<int, std::unordered_set<int>> dependency_map;
251 for (const GraphTransferNodeInfo& params :
252 graph_transfer_info_->node_info()) {
253 const int node_id = params.node_id();
254 for (const string& output_node_name : output_node_names) {
255 if (params.name() == output_node_name) {
256 output_node_ids.emplace_back(node_id);
257 }
258 }
259
260 dependency_map.emplace(std::piecewise_construct, std::make_tuple(node_id),
261 std::make_tuple());
262 if (params.input_count() == 0) {
263 continue;
264 }
265 CHECK_EQ(input_map.count(node_id), 1);
266 for (const GraphTransferNodeInput& node_input :
267 input_map.at(node_id)->node_input()) {
268 dependency_map.at(node_id).emplace(node_input.node_id());
269 }
270 }
271
272 // Create dependency map traversed from output nodes
273 std::unordered_set<int> completed;
274 for (int output_node_id : output_node_ids) {
275 FillDependencyRec(output_node_id, dependency_map, completed);
276 }
277
278 std::sort(graph_transfer_info_->mutable_node_info()->begin(),
279 graph_transfer_info_->mutable_node_info()->end(),
280 TransferParamsComparator(dependency_map));
281 }
282
EnableStrictCheckMode(const bool enable)283 void GraphTransferer::EnableStrictCheckMode(const bool enable) {
284 strict_check_mode_ = enable;
285 }
286
SetSerializedGraphTransferInfo(const string & serialized_proto)287 void GraphTransferer::SetSerializedGraphTransferInfo(
288 const string& serialized_proto) {
289 graph_transfer_info_->ParseFromString(serialized_proto);
290 }
291
GetGraphTransferInfo() const292 const GraphTransferInfo& GraphTransferer::GetGraphTransferInfo() const {
293 return *graph_transfer_info_;
294 }
295
GetMutableGraphTransferInfo()296 GraphTransferInfo& GraphTransferer::GetMutableGraphTransferInfo() {
297 return *graph_transfer_info_;
298 }
299
CacheNode(const Node & node)300 void GraphTransferer::CacheNode(const Node& node) {
301 if (node_name_to_id_cache_map_.count(node.name()) > 0) {
302 return;
303 }
304 node_name_cache_list_.emplace_back(&node);
305 const int node_id = node_name_cache_list_.size() - 1;
306 bool emplace_succeeded = false;
307 std::tie(std::ignore, emplace_succeeded) =
308 node_name_to_id_cache_map_.emplace(node.name(), node_id);
309 CHECK(emplace_succeeded);
310 }
311
AreAllInputsCached(const Node & node) const312 bool GraphTransferer::AreAllInputsCached(const Node& node) const {
313 for (const Node* const input_node : node.in_nodes()) {
314 if (node_name_to_id_cache_map_.count(input_node->name()) <= 0) {
315 VLOG(1) << "input_node " << input_node->name() << " of " << node.name()
316 << " is not cached yet.";
317 return false;
318 }
319 }
320 return true;
321 }
322
TransformGraphToAddAggregatedInputNode(const std::vector<std::pair<string,Tensor>> & input_node_info_list,Graph * graph,ShapeRefiner * shape_refiner)323 Status GraphTransferer::TransformGraphToAddAggregatedInputNode(
324 const std::vector<std::pair<string, Tensor>>& input_node_info_list,
325 Graph* graph, ShapeRefiner* shape_refiner) {
326 // Transform a remote fused graph to add an aggregated input node which takes
327 // all inputs of the remote graph.
328 DataTypeVector input_data_types;
329 std::vector<DataType> data_types;
330 std::vector<TensorShape> shapes;
331 std::vector<string> input_nodes;
332 for (int i = 0; i < input_node_info_list.size(); ++i) {
333 Node* node = FindMutableNodeByName(input_node_info_list.at(i).first, graph);
334 CHECK_NOTNULL(node);
335 input_nodes.emplace_back(node->name());
336 input_data_types.emplace_back(input_node_info_list.at(i).second.dtype());
337 data_types.emplace_back(input_node_info_list.at(i).second.dtype());
338 shapes.emplace_back(input_node_info_list.at(i).second.shape());
339 }
340
341 auto builder =
342 NodeBuilder(AGGREGATED_INPUT_NODE_NAME, "RemoteFusedGraphExecute")
343 .Input(std::vector<NodeBuilder::NodeOut>{})
344 .Attr("Tinputs", DataTypeVector{})
345 .Attr("Toutputs", input_data_types)
346 .Attr("serialized_remote_fused_graph_execute_info", "")
347 .Attr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_DATA_TYPES,
348 data_types)
349 .Attr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_SHAPES, shapes);
350
351 Node* input_node;
352 TF_RETURN_IF_ERROR(builder.Finalize(graph, &input_node));
353 CHECK_NOTNULL(input_node);
354
355 bool refined;
356 TF_RETURN_IF_ERROR(
357 shape_refiner->UpdateNode(input_node, false /* relax */, &refined));
358
359 shape_inference::InferenceContext* context =
360 shape_refiner->GetContext(input_node);
361 for (int i = 0; i < input_node_info_list.size(); ++i) {
362 shape_inference::ShapeHandle handle;
363 TF_RETURN_IF_ERROR(context->MakeShapeFromTensorShape(
364 input_node_info_list.at(i).second.shape(), &handle));
365 TF_RETURN_IF_ERROR(shape_refiner->SetShape(input_node, i, handle));
366 }
367
368 // Cache the aggregate input node first as it's consumed first.
369 CacheNode(*input_node);
370
371 std::vector<Node*> original_input_nodes(input_nodes.size());
372
373 for (int i = 0; i < input_nodes.size(); ++i) {
374 const string& node_name = input_nodes.at(i);
375 Node* original_input_node = FindMutableNodeByName(node_name, graph);
376 CHECK_NOTNULL(original_input_node);
377 CHECK_EQ(1, original_input_node->num_outputs()); // replaced by identity.
378 Node* created_node;
379 TF_RETURN_IF_ERROR(RemoteFusedGraphExecuteUtils::BuildIdentityOpNode(
380 node_name, AGGREGATED_INPUT_NODE_NAME, i, data_types.at(i), graph,
381 &created_node));
382 CHECK_NOTNULL(created_node);
383 std::vector<DataType> data_types;
384 std::vector<TensorShape> shapes;
385 Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
386 original_input_node->attrs(), &data_types, &shapes);
387 if (status.ok()) {
388 created_node->AddAttr(
389 RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_DATA_TYPES, data_types);
390 created_node->AddAttr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_SHAPES,
391 shapes);
392 }
393 for (const Edge* out_edge : original_input_node->out_edges()) {
394 Node* dst = out_edge->dst();
395 int dst_port = out_edge->dst_input();
396 // Unused edge will be removed when removing node.
397 graph->AddEdge(created_node, 0, dst, dst_port);
398 }
399 original_input_nodes[i] = original_input_node;
400
401 TF_RETURN_IF_ERROR(
402 shape_refiner->UpdateNode(created_node, false /* relax */, &refined));
403
404 shape_inference::InferenceContext* context =
405 shape_refiner->GetContext(created_node);
406 CHECK_NOTNULL(context);
407
408 // Cache replaced input node next to the aggregated input node.
409 CacheNode(*created_node);
410 }
411
412 // Remove original input nodes after adding new input nodes to avoid
413 // reusing same pointer in Graph.
414 for (Node* original_input_node : original_input_nodes) {
415 graph->RemoveNode(original_input_node);
416 }
417
418 return Status::OK();
419 }
420
RegisterNode(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node,const std::vector<std::pair<string,Tensor>> & input_node_info_list,const std::vector<string> & output_node_names)421 Status GraphTransferer::RegisterNode(
422 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
423 const ShapeRefiner& shape_refiner, const Node& node,
424 const std::vector<std::pair<string, Tensor>>& input_node_info_list,
425 const std::vector<string>& output_node_names) {
426 VLOG(1) << "Register node: " << node.name() << ", " << std::hex
427 << node_name_to_id_cache_map_.at(node.name());
428 if (node.name() == SOURCE_NODE_NAME || node.name() == SINK_NODE_NAME) {
429 // Just ignore sink and source
430 return Status::OK();
431 } else if (node.name() == AGGREGATED_INPUT_NODE_NAME) {
432 RegisterInputNode(ops_definitions, shape_refiner, node);
433 return Status::OK();
434 } else if (node.IsConstant()) {
435 RegisterConstantNode(shape_refiner, node);
436 } else if (IsPadNode(node)) {
437 RegisterPadNode(ops_definitions, shape_refiner, node);
438 } else if (HasPaddingAndStrides(node)) {
439 RegisterNodeWithPaddingAndStrides(ops_definitions, shape_refiner, node);
440 } else if (NeedsToAddRank(node)) {
441 RegisterNodeWithRank(ops_definitions, shape_refiner, node);
442 } else if (IsNodeFlattenReshape(node, shape_refiner)) {
443 RegisterFlattenNode(ops_definitions, shape_refiner, node);
444 } else if (ops_definitions.GetOpIdFor(node.type_string(), {}) !=
445 IRemoteFusedGraphOpsDefinitions::INVALID_OP_ID) {
446 // TODO(satok): Set correct data type if it's given.
447 RegisterGenericNode(ops_definitions, shape_refiner, node);
448 } else {
449 return errors::InvalidArgument(node.type_string() +
450 " has not been implemented yet.");
451 }
452
453 return Status::OK();
454 }
455
RegisterConstantNode(const ShapeRefiner & shape_refiner,const Node & node)456 void GraphTransferer::RegisterConstantNode(const ShapeRefiner& shape_refiner,
457 const Node& node) {
458 VLOG(1) << "Register constant node: " << node.name();
459 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
460 const int id = node_name_to_id_cache_map_[node.name()];
461 const int output_node_size = node.num_outputs();
462 CHECK_EQ(output_node_size, 1);
463 // TODO(satok): support multiple outputs?
464 const int output_index = 0;
465 const DataType dt = node.output_type(output_index);
466 const size_t max_bytes_per_data = DataTypeSize(dt);
467 CHECK_GT(max_bytes_per_data, 0)
468 << "dt = " << dt << ", " + DataTypeString(dt) << ", "
469 << max_bytes_per_data << ", " << static_cast<int>(DataTypeSize(dt))
470 << ",,,,,,,";
471 shape_inference::InferenceContext* context = shape_refiner.GetContext(&node);
472 shape_inference::ShapeHandle shape_handle = context->output(output_index);
473 const shape_inference::DimensionHandle num_elements_dim =
474 context->NumElements(shape_handle);
475 std::array<int64, SHAPE_ARRAY_SIZE> shape_array;
476 int data_size;
477 // Shape of constant node must be known
478 CHECK(context->ValueKnown(num_elements_dim));
479 const int64 num_output_elements = context->Value(num_elements_dim);
480 data_size = max_bytes_per_data * num_output_elements;
481 shape_array = BuildShapeArray(shape_handle, context);
482
483 GraphTransferConstNodeInfo& const_node_info =
484 *graph_transfer_info_->add_const_node_info();
485 const_node_info.set_name(node.name());
486 const_node_info.set_node_id(id);
487 // TODO(satok): Make this generic. Never assume rank is 4.
488 CHECK_EQ(4, SHAPE_ARRAY_SIZE);
489 const_node_info.add_shape(shape_array[0]);
490 const_node_info.add_shape(shape_array[1]);
491 const_node_info.add_shape(shape_array[2]);
492 const_node_info.add_shape(shape_array[3]);
493 const TensorProto* proto = nullptr;
494 TF_CHECK_OK(GetNodeAttr(node.attrs(), "value", &proto));
495 Tensor const_tensor;
496 TF_CHECK_OK(MakeTensorFromProto(*proto, &const_tensor));
497
498 const_node_info.set_dtype(const_tensor.dtype());
499 if (data_size > 0) {
500 const_node_info.set_data(const_tensor.tensor_data().data(), data_size);
501 }
502 }
503
RegisterConstantShape(const std::vector<int> & shape)504 int GraphTransferer::RegisterConstantShape(const std::vector<int>& shape) {
505 VLOG(1) << "Cache constant shape.";
506 // TODO(satok): Handle non-4dim strides
507 CHECK_EQ(shape.size(), 4);
508 const string shape_name = CONST_SHAPE_PREFIX + ToString(shape.at(0)) + 'x' +
509 ToString(shape.at(1)) + 'x' +
510 ToString(shape.at(2)) + 'x' + ToString(shape.at(3));
511 if (node_name_to_id_cache_map_.count(shape_name) <= 0) {
512 node_name_cache_list_.emplace_back(nullptr);
513 const int id = node_name_cache_list_.size() - 1;
514 node_name_to_id_cache_map_.emplace(shape_name, id);
515 GraphTransferConstNodeInfo& const_node_info =
516 *graph_transfer_info_->add_const_node_info();
517 const_node_info.set_name(shape_name);
518 const_node_info.set_node_id(id);
519 // TODO(satok): Make this generic. Never assume rank is 5.
520 const_node_info.add_shape(static_cast<int64>(shape[0]));
521 const_node_info.add_shape(static_cast<int64>(shape[1]));
522 const_node_info.add_shape(static_cast<int64>(shape[2]));
523 const_node_info.add_shape(static_cast<int64>(shape[3]));
524 }
525 return node_name_to_id_cache_map_[shape_name];
526 }
527
RegisterConstTensor(const Tensor & tensor,const string & suffix)528 int GraphTransferer::RegisterConstTensor(const Tensor& tensor,
529 const string& suffix) {
530 VLOG(1) << "Cache const tensor.";
531 const int dims = tensor.shape().dims();
532 CHECK(dims <= 4);
533 const string node_name = strings::StrCat(CONST_TENSOR_PREFIX, "_", suffix);
534 if (node_name_to_id_cache_map_.count(node_name) <= 0) {
535 node_name_cache_list_.emplace_back(nullptr);
536 const int id = node_name_cache_list_.size() - 1;
537 node_name_to_id_cache_map_.emplace(node_name, id);
538 GraphTransferConstNodeInfo& const_node_info =
539 *graph_transfer_info_->add_const_node_info();
540 const_node_info.set_name(node_name);
541 const_node_info.set_node_id(id);
542 CHECK_EQ(4, SHAPE_ARRAY_SIZE);
543 for (int i = 0; i < SHAPE_ARRAY_SIZE; ++i) {
544 if (i < SHAPE_ARRAY_SIZE - dims) {
545 const_node_info.add_shape(1);
546 } else {
547 const_node_info.add_shape(
548 tensor.shape().dim_size(i - (SHAPE_ARRAY_SIZE - dims)));
549 }
550 }
551 const_node_info.set_dtype(tensor.dtype());
552 const_node_info.set_data(tensor.tensor_data().data(),
553 tensor.tensor_data().size());
554 }
555 return node_name_to_id_cache_map_[node_name];
556 }
557
RegisterConstScalar(const DataType dt,const int val,const int dst_id,const int dst_input_count)558 int GraphTransferer::RegisterConstScalar(const DataType dt, const int val,
559 const int dst_id,
560 const int dst_input_count) {
561 VLOG(1) << "Cache const.";
562 const string val_name =
563 CONST_VAL_PREFIX + ToString(dst_id) + '_' + ToString(dst_input_count);
564 if (node_name_to_id_cache_map_.count(val_name) <= 0) {
565 node_name_cache_list_.emplace_back(nullptr);
566 const int id = node_name_cache_list_.size() - 1;
567 node_name_to_id_cache_map_.emplace(val_name, id);
568 GraphTransferConstNodeInfo& const_node_info =
569 *graph_transfer_info_->add_const_node_info();
570 const_node_info.set_name(val_name);
571 const_node_info.set_node_id(id);
572 // TODO(satok): Do not assume rank is 4 here.
573 const_node_info.add_shape(static_cast<int64>(1));
574 const_node_info.add_shape(static_cast<int64>(1));
575 const_node_info.add_shape(static_cast<int64>(1));
576 const_node_info.add_shape(static_cast<int64>(1));
577 const_node_info.set_data(&val, DataTypeSize(dt));
578 }
579 return node_name_to_id_cache_map_[val_name];
580 }
581
HasPaddingAndStrides(const Node & node)582 bool GraphTransferer::HasPaddingAndStrides(const Node& node) {
583 auto attrs = node.attrs();
584 return attrs.Find(PADDING_ATTR_NAME) != nullptr &&
585 attrs.Find(STRIDES_ATTR_NAME) != nullptr;
586 }
587
NeedsToAddRank(const Node & node)588 bool GraphTransferer::NeedsToAddRank(const Node& node) {
589 const StringPiece op_type(node.type_string());
590 if (op_type == "Transpose" || op_type == "ExpandDims") {
591 return true;
592 }
593 return false;
594 }
595
IsPadNode(const Node & node)596 bool GraphTransferer::IsPadNode(const Node& node) {
597 const StringPiece op_type(node.type_string());
598 if (op_type == "Pad") {
599 return true;
600 }
601 return false;
602 }
603
IsNodeFlattenReshape(const Node & node,const ShapeRefiner & shape_refiner)604 bool GraphTransferer::IsNodeFlattenReshape(const Node& node,
605 const ShapeRefiner& shape_refiner) {
606 // Check if node is reshape op
607 if (node.type_string() != RESHAPE_NODE_TYPE_STRING) {
608 return false;
609 }
610
611 shape_inference::InferenceContext* context = shape_refiner.GetContext(&node);
612 // Check if output count is valid
613 if (context->num_outputs() != 1) {
614 return false;
615 }
616
617 shape_inference::ShapeHandle shape_handle = context->output(0);
618 std::array<int64, SHAPE_ARRAY_SIZE> shape_array;
619 const shape_inference::DimensionHandle dim_handle =
620 context->NumElements(shape_handle);
621
622 // Obtain shape of output of node
623 if (context->ValueKnown(dim_handle)) {
624 shape_array = BuildShapeArray(shape_handle, context);
625 } else {
626 std::vector<TensorShape> shapes;
627 TF_CHECK_OK(RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
628 node.attrs(), nullptr, &shapes));
629
630 // Number of outputs should be 1 for reshape node.
631 CHECK_EQ(1, shapes.size());
632 shape_array = ToTensorShapeArray(shapes.at(0));
633 }
634
635 // check if reshape op just does flatten
636 if (shape_array[0] == 1 && shape_array[1] == 1 && shape_array[2] == 1) {
637 return true;
638 } else {
639 return false;
640 }
641 }
642
RegisterNodeWithPaddingAndStrides(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)643 void GraphTransferer::RegisterNodeWithPaddingAndStrides(
644 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
645 const ShapeRefiner& shape_refiner, const Node& node) {
646 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
647 const int id = node_name_to_id_cache_map_[node.name()];
648 shape_inference::InferenceContext* context = shape_refiner.GetContext(&node);
649 CHECK(node.attrs().Find(PADDING_ATTR_NAME));
650 // TODO(satok): Use context->GetAttr(...) instead?
651 Padding padding;
652 TF_CHECK_OK(context->GetAttr(PADDING_ATTR_NAME, &padding));
653 CHECK(node.attrs().Find(STRIDES_ATTR_NAME));
654 std::vector<int32> strides;
655 TF_CHECK_OK(context->GetAttr(STRIDES_ATTR_NAME, &strides));
656 const int stride_id = RegisterConstantShape(strides);
657 std::vector<int> extra_inputs{stride_id};
658 if (node.attrs().Find(KSIZE_ATTR_NAME)) {
659 std::vector<int32> kernel_sizes;
660 TF_CHECK_OK(context->GetAttr(KSIZE_ATTR_NAME, &kernel_sizes));
661 const int ksize_id = RegisterConstantShape(kernel_sizes);
662 extra_inputs.insert(extra_inputs.begin(), ksize_id);
663 }
664 // TODO(satok): Set correct data type if it's given.
665 const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
666 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
667 << "Op " << node.type_string() << " not found in map(id = " << op_type_id
668 << ")";
669 // Safety check of padding id
670 CHECK(padding == Padding::SAME);
671 AppendNodeParamsWithIoParams(
672 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
673 static_cast<int>(padding), node.num_inputs(), extra_inputs,
674 node.num_outputs(), true /* append_input */, true /* append_output */);
675 }
676
RegisterNodeWithRank(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)677 void GraphTransferer::RegisterNodeWithRank(
678 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
679 const ShapeRefiner& shape_refiner, const Node& node) {
680 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
681 const int id = node_name_to_id_cache_map_[node.name()];
682 shape_inference::InferenceContext* context = shape_refiner.GetContext(&node);
683 const Node* input0_node;
684 TF_CHECK_OK(node.input_node(0, &input0_node));
685 CHECK_NOTNULL(input0_node);
686 std::vector<TensorShape> shapes;
687 Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
688 input0_node->attrs(), nullptr, &shapes);
689 CHECK_EQ(1, shapes.size()) << "Output size should be 1.";
690 const int const_val_id =
691 RegisterConstScalar(DT_INT32, shapes.at(0).dims(), id, node.num_inputs());
692 std::vector<int> extra_inputs{const_val_id};
693 // TODO(satok): Set correct data type if it's given.
694 const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
695 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
696 << "Op " << node.type_string() << " not found in map(id = " << op_type_id
697 << ")";
698 bool keep_dims = false;
699 int padding_id = PADDING_NA_ID;
700 if (context->GetAttr(KEEP_DIMS_ATTR_NAME, &keep_dims).ok()) {
701 padding_id = keep_dims ? Padding::SAME : Padding::VALID;
702 }
703
704 AppendNodeParamsWithIoParams(
705 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
706 padding_id, node.num_inputs(), extra_inputs, node.num_outputs(),
707 true /* append_input */, true /* append_output */);
708 }
709
RegisterPadNode(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)710 void GraphTransferer::RegisterPadNode(
711 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
712 const ShapeRefiner& shape_refiner, const Node& node) {
713 static constexpr int PAD_WIDTH = 4;
714 static constexpr int PAD_HEIGHT = 2;
715 VLOG(1) << "Register generic node: " << node.name();
716 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
717 const int id = node_name_to_id_cache_map_[node.name()];
718
719 // TODO(satok): Set correct data type if it's given.
720 const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
721 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount());
722
723 CHECK_EQ(2, node.num_inputs());
724
725 GraphTransferNodeInputInfo& node_input_info =
726 *graph_transfer_info_->add_node_input_info();
727 node_input_info.set_node_id(id);
728
729 AddNodeInputByInputIndex(node, 0, &node_input_info);
730
731 const Edge* edge = nullptr;
732 TF_CHECK_OK(node.input_edge(1, &edge));
733 const Node* input_node = edge->src();
734 CHECK_NOTNULL(input_node);
735 CHECK(input_node->IsConstant());
736
737 const TensorProto* tensor_proto = nullptr;
738 TF_CHECK_OK(GetNodeAttr(input_node->attrs(), "value", &tensor_proto));
739 CHECK_NOTNULL(tensor_proto);
740 Tensor const_tensor;
741 TF_CHECK_OK(MakeTensorFromProto(*tensor_proto, &const_tensor));
742 CHECK_EQ(2, const_tensor.shape().dims());
743 CHECK_EQ(PAD_HEIGHT, const_tensor.shape().dim_size(1));
744 if (const_tensor.shape().dim_size(0) == PAD_WIDTH) {
745 AddNodeInputByInputIndex(node, 1, &node_input_info);
746 } else if (const_tensor.shape().dim_size(0) < PAD_WIDTH) {
747 const int width = const_tensor.shape().dim_size(0);
748 const TensorProto* proto = nullptr;
749 TF_CHECK_OK(GetNodeAttr(input_node->attrs(), "value", &proto));
750 Tensor const_tensor;
751 TF_CHECK_OK(MakeTensorFromProto(*proto, &const_tensor));
752 CHECK_EQ(DT_INT32, const_tensor.dtype());
753 // reshape tensor input to be rank 4.
754 // TODO(satok): Never assume rank is 4.
755 Tensor new_const_tensor(const_tensor.dtype(), TensorShape{4, 2});
756 for (int i = 0; i < PAD_HEIGHT; ++i) {
757 for (int j = 0; j < PAD_WIDTH; ++j) {
758 if (j < PAD_WIDTH - width) {
759 new_const_tensor.matrix<int32>()(j, i) = 0;
760 } else {
761 new_const_tensor.matrix<int32>()(j, i) =
762 const_tensor.matrix<int32>()(j - (PAD_WIDTH - width), i);
763 }
764 }
765 }
766
767 const int id = RegisterConstTensor(
768 new_const_tensor,
769 strings::StrCat(input_node->name(), "_", node.name(), "_1"));
770
771 GraphTransferNodeInput& node_input = *node_input_info.add_node_input();
772 node_input.set_node_id(id);
773 node_input.set_output_port(0);
774 } else {
775 LOG(FATAL);
776 }
777
778 AppendNodeParamsWithIoParams(
779 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
780 PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(),
781 false /* append_input */, true /* append_output */);
782 }
783
RegisterInputNode(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)784 void GraphTransferer::RegisterInputNode(
785 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
786 const ShapeRefiner& shape_refiner, const Node& node) {
787 const string op_type = node.type_string();
788 VLOG(1) << "Register input node: " << node.name() << ", " << op_type;
789 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
790 const int id = node_name_to_id_cache_map_[node.name()];
791 // TODO(satok): Set correct data type if it's given.
792 const int op_type_id = ops_definitions.GetOpIdFor("INPUT", {});
793 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
794 << "Op" << node.name() << ", " << op_type << " is not supported,"
795 << op_type_id;
796 AppendNodeParamsWithIoParams(
797 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
798 PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(),
799 true /* append_input */, true /* append_output */);
800 }
801
RegisterFlattenNode(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)802 void GraphTransferer::RegisterFlattenNode(
803 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
804 const ShapeRefiner& shape_refiner, const Node& node) {
805 VLOG(1) << "Register flatten node: " << node.name();
806 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
807 const int id = node_name_to_id_cache_map_[node.name()];
808 // TODO(satok): Remove dependency to specific type
809 const string op_type = "FLATTEN";
810 // TODO(satok): Set correct data type if it's given.
811 const int op_type_id = ops_definitions.GetOpIdFor(op_type, {});
812 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount());
813
814 AppendNodeParamsWithIoParams(
815 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
816 PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(),
817 true /* append_input */, true /* append_output */);
818 }
819
RegisterGenericNode(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node)820 void GraphTransferer::RegisterGenericNode(
821 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
822 const ShapeRefiner& shape_refiner, const Node& node) {
823 VLOG(1) << "Register generic node: " << node.name();
824 CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
825 const int id = node_name_to_id_cache_map_[node.name()];
826 // TODO(satok): Set correct data type if it's given.
827 const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
828 CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount());
829
830 AppendNodeParamsWithIoParams(
831 shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
832 PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(),
833 true /* append_input */, true /* append_output */);
834 }
835
836 // TODO(satok): Remove this function.
837 // TODO(satok): Remove only_register_const_node.
RegisterNodeIfAllInputsAreCached(const IRemoteFusedGraphOpsDefinitions & ops_definitions,const ShapeRefiner & shape_refiner,const Node & node,const bool only_register_const_node,const std::vector<std::pair<string,Tensor>> & input_node_info_list,const std::vector<string> & output_node_names)838 Status GraphTransferer::RegisterNodeIfAllInputsAreCached(
839 const IRemoteFusedGraphOpsDefinitions& ops_definitions,
840 const ShapeRefiner& shape_refiner, const Node& node,
841 const bool only_register_const_node,
842 const std::vector<std::pair<string, Tensor>>& input_node_info_list,
843 const std::vector<string>& output_node_names) {
844 if (only_register_const_node && !node.IsConstant()) {
845 return Status();
846 }
847 CHECK(AreAllInputsCached(node));
848 return RegisterNode(ops_definitions, shape_refiner, node,
849 input_node_info_list, output_node_names);
850 }
851
852 // CAVEAT: Append inputs and outputs params accordingly
AppendNodeParams(const string & name,const int id,const string & type,const int type_id,const int padding,const int inputs_size,const std::vector<int> & extra_inputs,const int outputs_size)853 void GraphTransferer::AppendNodeParams(const string& name, const int id,
854 const string& type, const int type_id,
855 const int padding, const int inputs_size,
856 const std::vector<int>& extra_inputs,
857 const int outputs_size) {
858 GraphTransferNodeInfo& node_info = *graph_transfer_info_->add_node_info();
859 node_info.set_name(name);
860 node_info.set_node_id(id);
861 node_info.set_type_name(type);
862 node_info.set_soc_op_id(type_id);
863 node_info.set_padding_id(padding);
864 node_info.set_input_count(inputs_size +
865 static_cast<int>(extra_inputs.size()));
866 node_info.set_output_count(static_cast<int>(outputs_size));
867 }
868
AddNodeInputByInputIndex(const Node & node,const int idx,GraphTransferNodeInputInfo * node_input_info)869 void GraphTransferer::AddNodeInputByInputIndex(
870 const Node& node, const int idx,
871 GraphTransferNodeInputInfo* node_input_info) {
872 const Edge* edge = nullptr;
873 TF_CHECK_OK(node.input_edge(idx, &edge));
874 const Node* input_node = edge->src();
875 CHECK_NOTNULL(input_node);
876 const int port = edge->src_output();
877
878 const std::string& op_name = input_node->name();
879 CHECK_GT(node_name_to_id_cache_map_.count(op_name), 0) << op_name;
880 const int src_id = node_name_to_id_cache_map_[op_name];
881 GraphTransferNodeInput& node_input = *node_input_info->add_node_input();
882 node_input.set_node_id(src_id);
883 node_input.set_output_port(port);
884 }
885
AppendNodeInputParams(const int id,const Node & node,const std::vector<int> & extra_inputs)886 void GraphTransferer::AppendNodeInputParams(
887 const int id, const Node& node, const std::vector<int>& extra_inputs) {
888 VLOG(1) << "Append input params: " << node.name() << ", " << node.num_inputs()
889 << ", " << extra_inputs.size();
890 GraphTransferNodeInputInfo& node_input_info =
891 *graph_transfer_info_->add_node_input_info();
892 node_input_info.set_node_id(id);
893 for (int i = 0; i < node.num_inputs(); ++i) {
894 AddNodeInputByInputIndex(node, i, &node_input_info);
895 }
896 for (const int extra_input : extra_inputs) {
897 GraphTransferNodeInput& node_input = *node_input_info.add_node_input();
898 node_input.set_node_id(extra_input);
899 node_input.set_output_port(0);
900 }
901 }
902
AppendNodeOutputParams(const ShapeRefiner & shape_refiner,const int id,const Node & node)903 void GraphTransferer::AppendNodeOutputParams(const ShapeRefiner& shape_refiner,
904 const int id, const Node& node) {
905 VLOG(1) << "Append output params: " << node.name() << ", "
906 << node.num_outputs();
907 GraphTransferNodeOutputInfo& node_output_info =
908 *graph_transfer_info_->add_node_output_info();
909 node_output_info.set_node_id(id);
910
911 std::vector<DataType> data_types;
912 std::vector<TensorShape> shapes;
913 Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
914 node.attrs(), &data_types, &shapes);
915
916 for (int i = 0; i < node.num_outputs(); ++i) {
917 int data_size = -1;
918 const int output_index = i;
919 const DataType dt = node.output_type(output_index);
920 const size_t max_bytes_per_data = DataTypeSize(dt);
921
922 shape_inference::InferenceContext* context =
923 shape_refiner.GetContext(&node);
924
925 if (context != nullptr && context->ValueKnown(context->NumElements(
926 context->output(output_index)))) {
927 const shape_inference::DimensionHandle num_elements_dim =
928 context->NumElements(context->output(output_index));
929 const int64 num_output_elements = context->Value(num_elements_dim);
930 data_size = max_bytes_per_data * num_output_elements;
931 if (status.ok()) {
932 TF_CHECK_OK(status);
933 CHECK_EQ(shapes.at(i).num_elements(), num_output_elements);
934 }
935 } else {
936 TF_CHECK_OK(status);
937 // Use attribute attached to node
938 data_size = max_bytes_per_data * shapes.at(i).num_elements();
939 }
940 CHECK_GE(data_size, 0);
941 node_output_info.add_max_byte_size(data_size);
942 }
943 }
944
AppendNodeParamsWithIoParams(const ShapeRefiner & shape_refiner,const Node & node,const string & name,const int id,const string & type,const int type_id,const int padding,const int inputs_size,const std::vector<int> & extra_inputs,const int outputs_size,const bool append_input_params,const bool append_output_params)945 void GraphTransferer::AppendNodeParamsWithIoParams(
946 const ShapeRefiner& shape_refiner, const Node& node, const string& name,
947 const int id, const string& type, const int type_id, const int padding,
948 const int inputs_size, const std::vector<int>& extra_inputs,
949 const int outputs_size, const bool append_input_params,
950 const bool append_output_params) {
951 VLOG(1) << "Append node with io params: " << node.name();
952 if (append_input_params) {
953 AppendNodeInputParams(id, node, extra_inputs);
954 }
955 if (append_output_params) {
956 AppendNodeOutputParams(shape_refiner, id, node);
957 }
958 AppendNodeParams(name, id, type, type_id, padding, inputs_size, extra_inputs,
959 outputs_size);
960 }
961
962 /* static */ std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>
BuildShapeArray(const shape_inference::ShapeHandle & shape_handle,shape_inference::InferenceContext * context)963 GraphTransferer::BuildShapeArray(
964 const shape_inference::ShapeHandle& shape_handle,
965 shape_inference::InferenceContext* context) {
966 switch (context->Rank(shape_handle)) {
967 case 0:
968 return std::array<int64, SHAPE_ARRAY_SIZE>{{1, 1, 1, 1}};
969 case 1:
970 return std::array<int64, SHAPE_ARRAY_SIZE>{
971 {1, 1, 1, context->Value(context->Dim(shape_handle, 0))}};
972 case 2:
973 return std::array<int64, SHAPE_ARRAY_SIZE>{
974 {1, 1, context->Value(context->Dim(shape_handle, 0)),
975 context->Value(context->Dim(shape_handle, 1))}};
976 case 3:
977 return std::array<int64, SHAPE_ARRAY_SIZE>{
978 {1, context->Value(context->Dim(shape_handle, 0)),
979 context->Value(context->Dim(shape_handle, 1)),
980 context->Value(context->Dim(shape_handle, 2))}};
981 case 4:
982 return std::array<int64, SHAPE_ARRAY_SIZE>{
983 {context->Value(context->Dim(shape_handle, 0)),
984 context->Value(context->Dim(shape_handle, 1)),
985 context->Value(context->Dim(shape_handle, 2)),
986 context->Value(context->Dim(shape_handle, 3))}};
987 default:
988 // TODO(satok): Support more ranks?
989 LOG(FATAL);
990 return std::array<int64, SHAPE_ARRAY_SIZE>();
991 }
992 }
993
994 /* static */ std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>
ToTensorShapeArray(const TensorShape & shape)995 GraphTransferer::ToTensorShapeArray(const TensorShape& shape) {
996 switch (shape.dims()) {
997 case 0:
998 return std::array<int64, SHAPE_ARRAY_SIZE>{{1, 1, 1, 1}};
999 case 1:
1000 return std::array<int64, SHAPE_ARRAY_SIZE>{{1, 1, 1, shape.dim_size(0)}};
1001 case 2:
1002 return std::array<int64, SHAPE_ARRAY_SIZE>{
1003 {1, 1, shape.dim_size(0), shape.dim_size(1)}};
1004 case 3:
1005 return std::array<int64, SHAPE_ARRAY_SIZE>{
1006 {1, shape.dim_size(0), shape.dim_size(1), shape.dim_size(2)}};
1007 case 4:
1008 return std::array<int64, SHAPE_ARRAY_SIZE>{
1009 {shape.dim_size(0), shape.dim_size(1), shape.dim_size(2),
1010 shape.dim_size(3)}};
1011 default:
1012 // TODO(satok): Support more ranks?
1013 LOG(FATAL);
1014 return std::array<int64, SHAPE_ARRAY_SIZE>();
1015 }
1016 }
1017
ToPaddingDebugString(const int padding)1018 /* static */ string GraphTransferer::ToPaddingDebugString(const int padding) {
1019 switch (padding) {
1020 case 0:
1021 return "NN_PAD_NA";
1022 case Padding::VALID:
1023 return "NN_PAD_VALID";
1024 case Padding::SAME:
1025 return "NN_PAD_SAME";
1026 default:
1027 LOG(FATAL);
1028 return "";
1029 }
1030 }
1031
TransferParamsComparator(const std::unordered_map<int,std::unordered_set<int>> & dep_map)1032 GraphTransferer::TransferParamsComparator::TransferParamsComparator(
1033 const std::unordered_map<int, std::unordered_set<int>>& dep_map)
1034 : dependency_map_(dep_map) {}
1035
operator ()(const GraphTransferNodeInfo & obj0,const GraphTransferNodeInfo & obj1)1036 bool GraphTransferer::TransferParamsComparator::operator()(
1037 const GraphTransferNodeInfo& obj0, const GraphTransferNodeInfo& obj1) {
1038 const int node_id0 = obj0.node_id();
1039 const int node_id1 = obj1.node_id();
1040 bool obj0_uses_obj1 = false;
1041 if (dependency_map_.count(node_id0) > 0) {
1042 obj0_uses_obj1 = dependency_map_.at(node_id0).count(node_id1) > 0;
1043 }
1044 bool obj1_uses_obj0 = false;
1045 if (dependency_map_.count(node_id1) > 0) {
1046 obj1_uses_obj0 = dependency_map_.at(node_id1).count(node_id0) > 0;
1047 }
1048 CHECK(!obj0_uses_obj1 || !obj1_uses_obj0);
1049 if (obj0_uses_obj1) {
1050 return false;
1051 } else if (obj1_uses_obj0) {
1052 return true;
1053 }
1054 // If there is no dependency between two nodes, it expects that
1055 // the execution order follows node id order.
1056 return node_id0 < node_id1;
1057 }
1058
FillDependencyRec(const int node_id,std::unordered_map<int,std::unordered_set<int>> & dep_map,std::unordered_set<int> & completed)1059 /* static */ void GraphTransferer::FillDependencyRec(
1060 const int node_id,
1061 std::unordered_map<int, std::unordered_set<int>>& dep_map,
1062 std::unordered_set<int>& completed) {
1063 if (dep_map.count(node_id) == 0 || dep_map.at(node_id).empty() ||
1064 completed.count(node_id) == 1) {
1065 return;
1066 }
1067 CHECK_EQ(dep_map.count(node_id), 1);
1068
1069 // Complete children's dependency map
1070 for (int child_node_id : dep_map.at(node_id)) {
1071 CHECK(child_node_id != node_id);
1072 if (completed.count(child_node_id) != 0) {
1073 continue;
1074 }
1075 FillDependencyRec(child_node_id, dep_map, completed);
1076 }
1077
1078 // Find additional depending ids
1079 std::vector<int> depending_ids;
1080 for (int child_node_id : dep_map.at(node_id)) {
1081 if (dep_map.count(child_node_id) == 0) {
1082 continue;
1083 }
1084 for (int depending_id : dep_map.at(child_node_id)) {
1085 depending_ids.emplace_back(depending_id);
1086 }
1087 }
1088
1089 // Insert additional depending ids
1090 for (int depending_id : depending_ids) {
1091 if (dep_map.at(node_id).count(depending_id) == 0) {
1092 dep_map.at(node_id).emplace(depending_id);
1093 }
1094 }
1095
1096 // DP: Record completed node id
1097 completed.emplace(node_id);
1098 }
1099
MakeTensorFromProto(const TensorProto & tensor_proto,Tensor * tensor)1100 /* static */ Status GraphTransferer::MakeTensorFromProto(
1101 const TensorProto& tensor_proto, Tensor* tensor) {
1102 if (tensor_proto.dtype() > 0 && tensor_proto.dtype() <= DataType_MAX) {
1103 Tensor parsed(tensor_proto.dtype());
1104 if (parsed.FromProto(cpu_allocator(), tensor_proto)) {
1105 *tensor = parsed;
1106 return Status::OK();
1107 }
1108 }
1109 return errors::InvalidArgument("Cannot parse tensor from proto: ",
1110 tensor_proto.DebugString());
1111 }
1112
ClearCache()1113 void GraphTransferer::ClearCache() {
1114 node_name_cache_list_.clear();
1115 node_name_to_id_cache_map_.clear();
1116 }
1117
DumpNodeTransferParams() const1118 void GraphTransferer::DumpNodeTransferParams() const {
1119 LOG(INFO) << "*** Const Nodes ***";
1120 for (const GraphTransferConstNodeInfo& params :
1121 graph_transfer_info_->const_node_info()) {
1122 // TODO(satok): Stop assuming shape size is 4.
1123 CHECK_EQ(params.shape_size(), 4);
1124 LOG(INFO) << "[ " << params.node_id() << " \"" << params.name()
1125 << "\" (Const)";
1126 LOG(INFO) << " shape: " << params.shape(0) << params.shape(1)
1127 << params.shape(2) << params.shape(3);
1128 LOG(INFO) << " data_name: "
1129 << (params.data().length() <= 0
1130 ? ""
1131 : DATA_NODE_PREFIX + ToString(params.node_id()));
1132 LOG(INFO) << " data_size: " << params.data().length() << " bytes"
1133 << " ]";
1134 }
1135 LOG(INFO) << "******\n";
1136 LOG(INFO) << "*** Op Nodes ***";
1137 for (const GraphTransferNodeInfo& params :
1138 graph_transfer_info_->node_info()) {
1139 LOG(INFO) << "[ " << params.node_id() << " \"" << params.name();
1140 LOG(INFO) << " type: " << params.type_name();
1141 LOG(INFO) << " padding: " << ToPaddingDebugString(params.padding_id());
1142 LOG(INFO) << " inputs: " << INPUTS_NODE_PREFIX + ToString(params.node_id())
1143 << ", size = " << params.input_count();
1144 LOG(INFO) << " outputs: "
1145 << (params.output_count() <= 0
1146 ? NULL_OUTPUT_NAME
1147 : (OUTPUTS_NODE_PREFIX + ToString(params.node_id())))
1148 << ", size = " << params.output_count() << " ]";
1149 }
1150 LOG(INFO) << "******\n";
1151 LOG(INFO) << "*** Node input params ***";
1152 for (const GraphTransferNodeInputInfo& params :
1153 graph_transfer_info_->node_input_info()) {
1154 LOG(INFO) << "[ " << params.node_id() << " ]";
1155 for (const GraphTransferNodeInput& node_input : params.node_input()) {
1156 LOG(INFO) << " src node id = " << node_input.node_id()
1157 << ", output port = " << node_input.output_port();
1158 }
1159 }
1160 LOG(INFO) << "******\n";
1161 LOG(INFO) << "*** Node output params ***";
1162 for (const GraphTransferNodeOutputInfo& params :
1163 graph_transfer_info_->node_output_info()) {
1164 LOG(INFO) << "[ " << params.node_id() << " ]";
1165 for (const int max_size : params.max_byte_size()) {
1166 LOG(INFO) << " max_size = " << max_size;
1167 }
1168 }
1169 LOG(INFO) << "******\n";
1170 }
1171
DumpVerificationStringOfNodeTransferParams() const1172 void GraphTransferer::DumpVerificationStringOfNodeTransferParams() const {
1173 for (const GraphTransferConstNodeInfo& params :
1174 graph_transfer_info_->const_node_info()) {
1175 std::stringstream sstream;
1176 // TODO(satok): Stop assuming shape size is 4.
1177 CHECK_EQ(params.shape_size(), 4);
1178 sstream << "---(CONST) [" << std::hex << params.node_id() << std::dec << ","
1179 << params.shape(0) << "," << params.shape(1) << ","
1180 << params.shape(2) << "," << params.shape(3) << ","
1181 << (params.data().length() <= 0
1182 ? ""
1183 : DATA_NODE_PREFIX + ToString(params.node_id()))
1184 << "," << params.data().length() << "," << params.name() << "]";
1185 LOG(INFO) << sstream.str();
1186 }
1187 LOG(INFO) << "Const node count = "
1188 << graph_transfer_info_->const_node_info_size();
1189 for (const GraphTransferNodeInfo& params :
1190 graph_transfer_info_->node_info()) {
1191 std::stringstream sstream;
1192 sstream << "---(OP) [" << params.name().c_str() << "," << std::hex
1193 << params.node_id() << std::dec << "," << params.soc_op_id() << ","
1194 << ToPaddingDebugString(params.padding_id()) << ","
1195 << INPUTS_NODE_PREFIX + ToString(params.node_id()) << ","
1196 << params.input_count() << ","
1197 << (params.output_count() <= 0
1198 ? NULL_OUTPUT_NAME
1199 : (OUTPUTS_NODE_PREFIX + ToString(params.node_id())))
1200 << "," << params.output_count() << "," << params.type_name() << "]";
1201 LOG(INFO) << sstream.str();
1202 }
1203 LOG(INFO) << "Op node count = " << graph_transfer_info_->node_info_size();
1204 for (const GraphTransferNodeInputInfo& params :
1205 graph_transfer_info_->node_input_info()) {
1206 std::stringstream sstream;
1207 sstream << "---(INPUT) [" << std::hex << params.node_id() << std::dec;
1208 for (const GraphTransferNodeInput& node_input : params.node_input()) {
1209 sstream << "," << std::hex << node_input.node_id() << std::dec << ","
1210 << node_input.output_port();
1211 }
1212 sstream << "]";
1213 LOG(INFO) << sstream.str();
1214 }
1215 LOG(INFO) << "Input params count = "
1216 << graph_transfer_info_->node_input_info_size();
1217 for (const GraphTransferNodeOutputInfo& params :
1218 graph_transfer_info_->node_output_info()) {
1219 std::stringstream sstream;
1220 sstream << "---(OUTPUT) [" << std::hex << params.node_id() << std::dec;
1221 for (const int max_size : params.max_byte_size()) {
1222 sstream << "," << max_size;
1223 }
1224 sstream << "]";
1225 LOG(INFO) << sstream.str();
1226 }
1227 LOG(INFO) << "Output params count = "
1228 << graph_transfer_info_->node_output_info_size();
1229 }
1230
1231 } // namespace tensorflow
1232