1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/quantize_training.h"
17 
18 #include <algorithm>
19 #include <atomic>
20 #include <set>
21 #include <unordered_map>
22 #include <vector>
23 
24 #include "tensorflow/core/common_runtime/graph_constructor.h"
25 #include "tensorflow/core/common_runtime/memory_types.h"
26 #include "tensorflow/core/framework/log_memory.h"
27 #include "tensorflow/core/framework/op_kernel.h"
28 #include "tensorflow/core/graph/algorithm.h"
29 #include "tensorflow/core/graph/node_builder.h"
30 #include "tensorflow/core/graph/subgraph.h"
31 #include "tensorflow/core/lib/strings/strcat.h"
32 #include "tensorflow/core/public/session_options.h"
33 
34 namespace tensorflow {
35 namespace {
36 
37 // TODO(suharshs): If desired, make these values configurable.
38 const uint32 kAllowedInputs = 2;
39 const float kEMADecay = 0.999;
40 
41 // Node types to rewrite. Insert quantize_and_dequantize op for their inputs.
42 const auto* nodes_to_rewrite =
43     new std::unordered_set<string, StringPieceHasher>{"MatMul", "Conv2D"};
44 
45 // Contains necessary parameters to convert an edge.
46 struct EdgeToConvert {
47   // edge is not owned here.
48   const Edge* edge;
49   int32 num_bits;
50   bool signed_input;
51   bool range_given;
52   float input_min;
53   float input_max;
54 
EdgeToConverttensorflow::__anon77b5fe080111::EdgeToConvert55   EdgeToConvert(const Edge* e, int32 bits, bool sign, bool range, float min,
56                 float max)
57       : edge(e),
58         num_bits(bits),
59         signed_input(sign),
60         range_given(range),
61         input_min(min),
62         input_max(max) {}
63 };
64 
65 // Decide if a node is in backward pass by checking if its name is led by
66 // "gradients".
67 // TODO(jmchen): Make this check more robust as it is not guaranteed that the
68 // forward node will not be named with a leading "gradients".
IsGradientNode(const Graph * graph,const Node * node)69 inline bool IsGradientNode(const Graph* graph, const Node* node) {
70   static const string tag = "gradients";
71   return (node->name().compare(0, tag.size(), tag) == 0);
72 }
73 
74 // Find the type of the input to set the parameters for the
75 // quantize_and_dequantize op.
76 // Returns true if the root tensor op type is known, false otherwise.
FindType(const Graph * graph,const Node * node,bool * signed_input,bool * range_given,float * input_min,float * input_max)77 bool FindType(const Graph* graph, const Node* node, bool* signed_input,
78               bool* range_given, float* input_min, float* input_max) {
79   const string& src_op = node->type_string();
80   if (src_op == "Const" || src_op == "Variable" || src_op == "VariableV2") {
81     *signed_input = true;
82     *range_given = false;
83   } else if (src_op == "Relu") {
84     // Range is not given for Relu.
85     *signed_input = false;
86     *range_given = false;
87   } else if (src_op == "Relu6") {
88     // TODO(suharshs): Also the theoretical min and max is 0 and 6, if the
89     // actual activations are somewhere in within this range, we can quantize
90     // this even further. This is true for other activations like Sigmoid6 too.
91     *signed_input = false;
92     *range_given = true;
93     *input_min = 0;
94     *input_max = 6;
95   } else if (src_op == "Sigmoid") {
96     *signed_input = false;
97     *range_given = true;
98     *input_min = 0;
99     *input_max = 1;
100   } else if (src_op == "Tanh") {
101     *signed_input = true;
102     *range_given = true;
103     *input_min = -1;
104     *input_max = 1;
105   } else if (src_op == "Reshape" || src_op == "ConcatV2") {
106     // Reshape has 2 inputs and the first one is the tensor.
107     // ConcatV2 has many inputs but they should all have the same activation
108     // function (i.e. Inception). So we just recurse on the first input.
109     for (const Edge* edge : node->in_edges()) {
110       if (edge->src_output() != Graph::kControlSlot && edge->dst_input() == 0) {
111         FindType(graph, edge->src(), signed_input, range_given, input_min,
112                  input_max);
113       }
114     }
115   } else if (src_op == "Identity" || src_op == "MaxPool" ||
116              src_op == "AvgPool" || src_op == "MaxPool3D" ||
117              src_op == "AvgPool3D") {
118     // All these Ops only have 1 data input.
119     for (const Edge* edge : node->in_edges()) {
120       if (edge->src_output() != Graph::kControlSlot) {
121         FindType(graph, edge->src(), signed_input, range_given, input_min,
122                  input_max);
123       }
124     }
125   } else {
126     // Unknown type, could be the model input examples.
127     // TODO(jmchen): Set the params for input with user's hint.
128     *signed_input = true;
129     *range_given = false;
130     return false;
131   }
132 
133   return true;
134 }
135 
136 // Find the Save op and inputs.
FindSaveOp(const Graph * graph,Node ** save_op,std::vector<const Edge * > * in_edges,bool * found)137 Status FindSaveOp(const Graph* graph, Node** save_op,
138                   std::vector<const Edge*>* in_edges, bool* found) {
139   *found = false;
140   for (Node* node : graph->op_nodes()) {
141     if (node->type_string() == "SaveV2") {
142       // We found multiple save ops.
143       if (*found) {
144         return errors::InvalidArgument("Input graph has multiple SaveV2 ops.");
145       }
146       *save_op = node;
147       *found = true;
148       TF_RETURN_IF_ERROR(node->input_edges(in_edges));
149     }
150   }
151   return Status::OK();
152 }
153 
FindRestoreAllOp(const Graph * graph,StringPiece save_prefix)154 Node* FindRestoreAllOp(const Graph* graph, StringPiece save_prefix) {
155   for (Node* node : graph->op_nodes()) {
156     // The restore_all op should have the same prefix of the save_op.
157     if (node->name() == strings::StrCat(save_prefix, "/restore_all")) {
158       return node;
159     }
160   }
161   return nullptr;
162 }
163 
164 // Strips the last "/suffix" from a name.
165 // We use this to construct the name of restore ops in the same way they are
166 // constructed by the Saver.
GetNodeNamePrefix(const Node * node)167 StringPiece GetNodeNamePrefix(const Node* node) {
168   StringPiece name = node->name();
169   return name.substr(0, name.rfind('/'));
170 }
171 
FillStringTensor(Tensor * dst,const Tensor & src)172 void FillStringTensor(Tensor* dst, const Tensor& src) {
173   auto dst_flat = dst->flat<tstring>();
174   auto src_flat = src.flat<tstring>();
175   for (int i = 0; i < src.NumElements(); i++) {
176     dst_flat(i) = src_flat(i);
177   }
178 }
179 
180 // Add the added_variables as an inputs to the Save op.
181 // We change the inputs of the SaveV2 op to include the names of the added
182 // variables. We also add the variables as inputs to the save op.
ConnectVariablesToSaveOp(Graph * graph,Node * save_op,const std::vector<const Edge * > & in_edges,const std::vector<Node * > & added_variables)183 Status ConnectVariablesToSaveOp(Graph* graph, Node* save_op,
184                                 const std::vector<const Edge*>& in_edges,
185                                 const std::vector<Node*>& added_variables) {
186   Node* tensor_names_op = in_edges[1]->src();
187   Node* shape_and_slices_op = in_edges[2]->src();
188 
189   // Get the tensor_names and shape_and_slices tensors from the const op.
190   Tensor tensor_names;
191   Tensor shape_and_slices;
192   TF_RETURN_IF_ERROR(
193       GetNodeAttr(tensor_names_op->attrs(), "value", &tensor_names));
194   TF_RETURN_IF_ERROR(
195       GetNodeAttr(shape_and_slices_op->attrs(), "value", &shape_and_slices));
196 
197   int tn_size = tensor_names.NumElements();
198   int var_size = added_variables.size();
199 
200   // Create a new save_op that has inputs to all the new variables.
201   NodeBuilder save_op_builder =
202       NodeBuilder(save_op->name(), save_op->type_string());
203   // The first three inputs are prefix, tensor_names, and shapes_and_slices.
204   for (int i = 0; i < 3; i++) {
205     save_op_builder = save_op_builder.Input(in_edges[i]->src());
206   }
207   std::vector<NodeBuilder::NodeOut> var_nodeouts;
208   var_nodeouts.reserve(tn_size + var_size);
209   // The rest of the inputs need to be used the construct the tensor list arg.
210   for (int i = 3; i < in_edges.size(); i++) {
211     var_nodeouts.emplace_back(in_edges[i]->src());
212   }
213 
214   // Add the new values to the tensors and the op input.
215   Tensor new_tensor_names(DT_STRING, TensorShape({tn_size + var_size}));
216   Tensor new_shape_and_slices(DT_STRING, TensorShape({tn_size + var_size}));
217   FillStringTensor(&new_tensor_names, tensor_names);
218   FillStringTensor(&new_shape_and_slices, shape_and_slices);
219   for (int i = 0; i < var_size; i++) {
220     Node* var = added_variables[i];
221     new_tensor_names.flat<tstring>()(tn_size + i) = var->name();
222     new_shape_and_slices.flat<tstring>()(tn_size + i) = "";
223     var_nodeouts.emplace_back(var);
224   }
225   save_op_builder = save_op_builder.Input(var_nodeouts);
226 
227   // Update the attrs.
228   tensor_names_op->AddAttr("value", new_tensor_names);
229   shape_and_slices_op->AddAttr("value", new_shape_and_slices);
230 
231   // Remove the old save_op and add the new one.
232   Node* new_save_op;
233   TF_RETURN_IF_ERROR(save_op_builder.Finalize(graph, &new_save_op));
234   // Add outputs to the new_save_op, all outputs are control edges.
235   for (const Edge* edge : save_op->out_edges()) {
236     graph->AddControlEdge(new_save_op, edge->dst());
237   }
238   graph->RemoveNode(save_op);
239 
240   return Status::OK();
241 }
242 
243 // Add a restore subgraph for each variable and connect to the restore_all op.
244 // For each variable we add the following subgraph:
245 //           Assign----restore_all
246 //          |      |
247 //   RestoreV2    Variable
AddRestoreVariableSubgraphs(Graph * graph,Node * save_op,const std::vector<const Edge * > & in_edges,const std::vector<Node * > & variables)248 Status AddRestoreVariableSubgraphs(Graph* graph, Node* save_op,
249                                    const std::vector<const Edge*>& in_edges,
250                                    const std::vector<Node*>& variables) {
251   Node* prefix_op = in_edges[0]->src();
252   StringPiece name_prefix = GetNodeNamePrefix(save_op);
253   Node* restore_all = FindRestoreAllOp(graph, name_prefix);
254   if (restore_all == nullptr) {
255     return errors::InvalidArgument("graph has SaveOp, but no restore_all NoOp");
256   }
257   const string restore_op_name = strings::StrCat(name_prefix, "/RestoreV2");
258   const string assign_op_name = strings::StrCat(name_prefix, "/Assign");
259   for (Node* var : variables) {
260     // Add an extra prefix after calling graph->NewName because the "unique"
261     // name may conflict with names generated for Send nodes.
262     // TODO(b/77547936): fix this more generally and get rid of the extra prefix
263     // here.
264     string new_restore_op_name =
265         strings::StrCat(graph->NewName(restore_op_name), "_qt");
266     string new_assign_op_name =
267         strings::StrCat(graph->NewName(assign_op_name), "_qt");
268     string tensor_names_op_name =
269         strings::StrCat(new_restore_op_name, "/tensor_names");
270     string shape_and_slices_op_name =
271         strings::StrCat(new_restore_op_name, "/shape_and_slices");
272 
273     // Construct the tensor_names input with the variable name.
274     Node* tensor_names;
275     Tensor tensor_names_val(DT_STRING, TensorShape({1}));
276     tensor_names_val.flat<tstring>()(0) = var->name();
277     TF_RETURN_IF_ERROR(NodeBuilder(tensor_names_op_name, "Const")
278                            .Attr("dtype", DT_STRING)
279                            .Attr("value", tensor_names_val)
280                            .Finalize(graph, &tensor_names));
281 
282     // Construct the shape_and_slices input with empty string.
283     Node* shape_and_slices;
284     Tensor shape_and_slices_val(DT_STRING, TensorShape({1}));
285     shape_and_slices_val.flat<tstring>()(0) = "";
286     TF_RETURN_IF_ERROR(NodeBuilder(shape_and_slices_op_name, "Const")
287                            .Attr("dtype", DT_STRING)
288                            .Attr("value", shape_and_slices_val)
289                            .Finalize(graph, &shape_and_slices));
290 
291     // Build the new Restore op for this variable.
292     Node* restore_op;
293     TF_RETURN_IF_ERROR(NodeBuilder(new_restore_op_name, "RestoreV2")
294                            .Input(prefix_op)
295                            .Input(tensor_names)
296                            .Input(shape_and_slices)
297                            .Attr("dtypes", {DT_FLOAT})
298                            .Finalize(graph, &restore_op));
299 
300     // Create Assign op, attaching the variable and Restore op to it.
301     Node* assign_op;
302     TF_RETURN_IF_ERROR(NodeBuilder(new_assign_op_name, "Assign")
303                            .Input(var)
304                            .Input(restore_op)
305                            .Finalize(graph, &assign_op));
306 
307     // Add a control edge from the assign op to restore_all op.
308     graph->AddControlEdge(assign_op, restore_all);
309   }
310   return Status::OK();
311 }
312 
313 // Adds new variables to save and restore ops matching the Save and Restore
314 // graphs created in tensorflow/python/training/saver.py.
AddSaveAndRestore(Graph * graph,const std::vector<Node * > & variables)315 Status AddSaveAndRestore(Graph* graph, const std::vector<Node*>& variables) {
316   Node* save_op = nullptr;
317   std::vector<const Edge*> in_edges;
318   bool found = false;
319   TF_RETURN_IF_ERROR(FindSaveOp(graph, &save_op, &in_edges, &found));
320   if (found) {
321     TF_RETURN_IF_ERROR(
322         AddRestoreVariableSubgraphs(graph, save_op, in_edges, variables));
323     TF_RETURN_IF_ERROR(
324         ConnectVariablesToSaveOp(graph, save_op, in_edges, variables));
325   }
326   return Status::OK();
327 }
328 
329 // Sets output to the Node that computes reduction axes corresponding to all
330 // dimensions of input and return.
MakeReductionAxes(Graph * graph,string name_prefix,Node * input,Node ** output)331 Status MakeReductionAxes(Graph* graph, string name_prefix, Node* input,
332                          Node** output) {
333   name_prefix = strings::StrCat(name_prefix, "/ReductionAxes");
334   Node* start;
335   Tensor zero_tensor(DT_INT32, TensorShape());
336   zero_tensor.flat<int32>()(0) = 0;
337   TF_RETURN_IF_ERROR(
338       NodeBuilder(strings::StrCat(name_prefix, "/RangeStart"), "Const")
339           .Attr("dtype", DT_INT32)
340           .Attr("value", zero_tensor)
341           .Finalize(graph, &start));
342   Node* delta;
343   Tensor one_tensor(DT_INT32, TensorShape());
344   one_tensor.flat<int32>()(0) = 1;
345   TF_RETURN_IF_ERROR(
346       NodeBuilder(strings::StrCat(name_prefix, "/RangeDelta"), "Const")
347           .Attr("dtype", DT_INT32)
348           .Attr("value", one_tensor)
349           .Finalize(graph, &delta));
350   Node* rank;
351   TF_RETURN_IF_ERROR(
352       NodeBuilder(strings::StrCat(name_prefix, "/InputRank"), "Rank")
353           .Input(input)
354           .Finalize(graph, &rank));
355   TF_RETURN_IF_ERROR(
356       NodeBuilder(strings::StrCat(name_prefix, "/ReductionAxes"), "Range")
357           .Input(start)
358           .Input(rank)
359           .Input(delta)
360           .Finalize(graph, output));
361   return Status::OK();
362 }
363 
364 // Computes the exponential moving average of input, updated in update_variable.
MakeExponentialMovingAverage(Graph * graph,string name_prefix,const NodeBuilder::NodeOut & input,Node * decay,Node * update_variable,Node ** assign_value)365 Status MakeExponentialMovingAverage(Graph* graph, string name_prefix,
366                                     const NodeBuilder::NodeOut& input,
367                                     Node* decay, Node* update_variable,
368                                     Node** assign_value) {
369   // variable_t+1 = variable_t - [(variable_t - value) * (1 - decay)]
370   name_prefix = strings::StrCat(name_prefix, "/EMA");
371   Node* one;
372   Tensor one_tensor(DT_FLOAT, TensorShape());
373   one_tensor.flat<float>()(0) = 1.0;
374   TF_RETURN_IF_ERROR(
375       NodeBuilder(strings::StrCat(name_prefix, "/OneConst"), "Const")
376           .Attr("dtype", DT_FLOAT)
377           .Attr("value", one_tensor)
378           .Finalize(graph, &one));
379   Node* decay_complement;
380   TF_RETURN_IF_ERROR(
381       NodeBuilder(strings::StrCat(name_prefix, "/DecayComplement"), "Sub")
382           .Input(one)
383           .Input(decay)
384           .Finalize(graph, &decay_complement));
385 
386   Node* value_diff;
387   TF_RETURN_IF_ERROR(
388       NodeBuilder(strings::StrCat(name_prefix, "/ValueDiff"), "Sub")
389           .Input(update_variable)
390           .Input(input)
391           .Finalize(graph, &value_diff));
392   Node* update_value;
393   TF_RETURN_IF_ERROR(
394       NodeBuilder(strings::StrCat(name_prefix, "/UpdateValue"), "Mul")
395           .Input(value_diff)
396           .Input(decay_complement)
397           .Finalize(graph, &update_value));
398 
399   TF_RETURN_IF_ERROR(
400       NodeBuilder(strings::StrCat(name_prefix, "/EMAValue"), "Sub")
401           .Input(update_variable)
402           .Input(update_value)
403           .Finalize(graph, assign_value));
404   return Status::OK();
405 }
406 
407 // Creates an automatically initialized exponential moving average variable.
408 // This uses a switch op to assign a value to the variable on the first run,
409 // and update with the moving average for all other runs:
410 //                   init_val
411 //                      |
412 //      var--is_init--switch
413 //       |      true /      \ false
414 //       |          |        |
415 //       |         EMA    init_val
416 //       |           \      /
417 //       +----------- assign
MakeInitializedEMAVariable(Graph * graph,const string & name,Node * decay,Node * init_val,std::vector<Node * > * added_variables,Node ** var)418 Status MakeInitializedEMAVariable(Graph* graph, const string& name, Node* decay,
419                                   Node* init_val,
420                                   std::vector<Node*>* added_variables,
421                                   Node** var) {
422   // TODO(suharshs): Update this to use ResourceVariables when they are ready.
423   TF_RETURN_IF_ERROR(
424       NodeBuilder(strings::StrCat(name, "/Variable"), "VariableV2")
425           .Attr("shape", TensorShape())
426           .Attr("dtype", DT_FLOAT)
427           .Finalize(graph, var));
428   added_variables->push_back(*var);
429 
430   Node* is_initialized;
431   TF_RETURN_IF_ERROR(NodeBuilder(strings::StrCat(name, "/IsInitialized"),
432                                  "IsVariableInitialized")
433                          .Input(*var)
434                          .Finalize(graph, &is_initialized));
435   Node* switch_node;
436   TF_RETURN_IF_ERROR(NodeBuilder(strings::StrCat(name, "/Switch"), "Switch")
437                          .Input(init_val)
438                          .Input(is_initialized)
439                          .Finalize(graph, &switch_node));
440   NodeBuilder::NodeOut output_false = NodeBuilder::NodeOut(switch_node, 0);
441   NodeBuilder::NodeOut output_true = NodeBuilder::NodeOut(switch_node, 1);
442 
443   Node* ema_value;
444   TF_RETURN_IF_ERROR(MakeExponentialMovingAverage(graph, name, output_true,
445                                                   decay, *var, &ema_value));
446 
447   Node* assign_value;
448   TF_RETURN_IF_ERROR(NodeBuilder(strings::StrCat(name, "/Merge"), "Merge")
449                          .Input({output_false, ema_value})
450                          .Finalize(graph, &assign_value));
451 
452   TF_RETURN_IF_ERROR(
453       NodeBuilder(strings::StrCat(name, "/AssignValue"), "Assign")
454           .Input(*var)
455           .Input(assign_value)
456           .Finalize(graph, var));
457   return Status::OK();
458 }
459 
460 // Computes the min and max EMA of input and stores them in min_var and max_var.
MakeEMAMinMaxVars(Graph * graph,const string & name_prefix,Node * input,std::vector<Node * > * added_variables,Node ** min_var,Node ** max_var)461 Status MakeEMAMinMaxVars(Graph* graph, const string& name_prefix, Node* input,
462                          std::vector<Node*>* added_variables, Node** min_var,
463                          Node** max_var) {
464   // TODO(suharshs): The decay will be constant, so we could make only one for
465   // all quantize_and_dequantize ops to share, this would have to live outside
466   // this function.
467   Tensor decay_tensor(DT_FLOAT, TensorShape());
468   decay_tensor.flat<float>()(0) = kEMADecay;
469   Node* decay;
470   TF_RETURN_IF_ERROR(
471       NodeBuilder(strings::StrCat(name_prefix, "/Decay"), "Const")
472           .Attr("dtype", DT_FLOAT)
473           .Attr("value", decay_tensor)
474           .Finalize(graph, &decay));
475 
476   Node* reduction_axes;
477   TF_RETURN_IF_ERROR(
478       MakeReductionAxes(graph, name_prefix, input, &reduction_axes));
479   Node* min;
480   string min_name = strings::StrCat(name_prefix, "/Min");
481   TF_RETURN_IF_ERROR(NodeBuilder(min_name, "Min")
482                          .Input(input)
483                          .Input(reduction_axes)
484                          .Finalize(graph, &min));
485   Node* max;
486   string max_name = strings::StrCat(name_prefix, "/Max");
487   TF_RETURN_IF_ERROR(NodeBuilder(max_name, "Max")
488                          .Input(input)
489                          .Input(reduction_axes)
490                          .Finalize(graph, &max));
491   TF_RETURN_IF_ERROR(MakeInitializedEMAVariable(graph, min_name, decay, min,
492                                                 added_variables, min_var));
493   TF_RETURN_IF_ERROR(MakeInitializedEMAVariable(graph, max_name, decay, max,
494                                                 added_variables, max_var));
495   return Status::OK();
496 }
497 
498 // Makes an input min and max constant if the range is given. Otherwise, makes
499 // min and max variables that are updated by an EMA.
MakeInputMinMax(Graph * graph,const string & name_prefix,const EdgeToConvert & edge,std::vector<Node * > * added_variables,Node ** input_min,Node ** input_max)500 Status MakeInputMinMax(Graph* graph, const string& name_prefix,
501                        const EdgeToConvert& edge,
502                        std::vector<Node*>* added_variables, Node** input_min,
503                        Node** input_max) {
504   if (edge.range_given) {
505     // Make constant nodes for the input_min and input_max if the range is
506     // provided.
507     Tensor input_min_tensor(DT_FLOAT, TensorShape());
508     input_min_tensor.flat<float>()(0) = edge.input_min;
509     TF_RETURN_IF_ERROR(
510         NodeBuilder(strings::StrCat(name_prefix, "/InputMin"), "Const")
511             .Attr("dtype", DT_FLOAT)
512             .Attr("value", input_min_tensor)
513             .Finalize(graph, input_min));
514     Tensor input_max_tensor(DT_FLOAT, TensorShape());
515     input_max_tensor.flat<float>()(0) = edge.input_max;
516     TF_RETURN_IF_ERROR(
517         NodeBuilder(strings::StrCat(name_prefix, "/InputMax"), "Const")
518             .Attr("dtype", DT_FLOAT)
519             .Attr("value", input_max_tensor)
520             .Finalize(graph, input_max));
521   } else {
522     // If the range is not given, estimate the range with EMA variables.
523     TF_RETURN_IF_ERROR(MakeEMAMinMaxVars(graph, name_prefix, edge.edge->src(),
524                                          added_variables, input_min,
525                                          input_max));
526   }
527 
528   return Status::OK();
529 }
530 
531 // Adds a QuantizeAndDequantizeV2 or FakeQuantizeWithMinMaxVars op
532 // (and required input nodes) based on edge.
533 // The result is stored in convert_node.
MakeQuantizeOp(Graph * graph,const string & name_prefix,const string & quant_op_type,const EdgeToConvert & edge,std::vector<Node * > * added_variables,Node ** convert_node)534 Status MakeQuantizeOp(Graph* graph, const string& name_prefix,
535                       const string& quant_op_type, const EdgeToConvert& edge,
536                       std::vector<Node*>* added_variables,
537                       Node** convert_node) {
538   Node* input_min;
539   Node* input_max;
540   TF_RETURN_IF_ERROR(MakeInputMinMax(graph, name_prefix, edge, added_variables,
541                                      &input_min, &input_max));
542   string quant_name = strings::StrCat(name_prefix, "/", quant_op_type);
543   if (quant_op_type == "QuantizeAndDequantizeV2") {
544     TF_RETURN_IF_ERROR(NodeBuilder(quant_name, quant_op_type)
545                            .Input(edge.edge->src())
546                            .Input(input_min)
547                            .Input(input_max)
548                            .Attr("signed_input", edge.signed_input)
549                            .Attr("num_bits", edge.num_bits)
550                            .Attr("range_given", true)
551                            .Finalize(graph, convert_node));
552   } else if (quant_op_type == "FakeQuantWithMinMaxVars") {
553     TF_RETURN_IF_ERROR(NodeBuilder(quant_name, quant_op_type)
554                            .Input(edge.edge->src())
555                            .Input(input_min)
556                            .Input(input_max)
557                            .Attr("num_bits", edge.num_bits)
558                            .Finalize(graph, convert_node));
559   } else {
560     return errors::InvalidArgument("Unknown quant op type: ", quant_op_type);
561   }
562   return Status::OK();
563 }
564 
565 // Insert conversion op, connect it to the graph and remove the old edge.
ProcessTargetEdges(Graph * graph,const string & quant_op_type,const std::vector<EdgeToConvert> & target_edges)566 Status ProcessTargetEdges(Graph* graph, const string& quant_op_type,
567                           const std::vector<EdgeToConvert>& target_edges) {
568   // Remember previously converted ops to avoid duplicated conversion on the
569   // same input.
570   std::unordered_map<string, Node*, StringPieceHasher> name_index;
571   std::vector<Node*> added_variables;
572   for (const EdgeToConvert edge : target_edges) {
573     Node* convert_node;
574     string name_prefix = edge.edge->src()->name();
575 
576     auto iter = name_index.find(name_prefix);
577     if (iter == name_index.end()) {
578       TF_RETURN_IF_ERROR(MakeQuantizeOp(graph, name_prefix, quant_op_type, edge,
579                                         &added_variables, &convert_node));
580       name_index[name_prefix] = convert_node;
581     } else {
582       convert_node = iter->second;
583     }
584 
585     graph->AddEdge(convert_node, 0, edge.edge->dst(), edge.edge->dst_input());
586     graph->RemoveEdge(edge.edge);
587   }
588 
589   TF_RETURN_IF_ERROR(AddSaveAndRestore(graph, added_variables));
590 
591   return Status::OK();
592 }
593 
594 }  // namespace
595 
DoQuantizeTraining(int32 num_bits,const string & quant_op_type,Graph * graph)596 Status DoQuantizeTraining(int32 num_bits, const string& quant_op_type,
597                           Graph* graph) {
598   if (graph == nullptr) {
599     return errors::InvalidArgument("Cannot accept empty graph pointer.");
600   }
601 
602   if (num_bits < 1 || num_bits > 63) {
603     return errors::OutOfRange("num_bits should be in range [1, 63] but is: ",
604                               num_bits);
605   }
606   int potential_input = 0;
607   std::vector<EdgeToConvert> target_edges;
608   for (Node* node : graph->nodes()) {
609     if (nodes_to_rewrite->find(node->type_string()) !=
610             nodes_to_rewrite->end() &&
611         !IsGradientNode(graph, node)) {
612       // Find out which types are the inputs and convert them accordingly.
613       // 1. Const/Variable OP: This is quantized as signed tensors with no given
614       // range.
615       // 2. Activation OP: Set the range accordingly for different types of
616       // activations. Currently we handle {Relu, Relu6, Sigmoid, Tanh}
617       // 3. Identity OP: The quantization parameters depend on its input.
618       // 4. Pooling OPs: various pooling ops. Also depends on its input.
619       // 5. Reshape OP: Also depends on the first input to this op.
620       // 6. Not-Listed-Above OP: If there is only 1 such op, consider it as the
621       // model input. However, if there are >1 unknown ops, then returns an
622       // error for now to avoid unexpected behavior.
623       // Note: The list above might not be a complete list. Please let us
624       // know if you see the error so we can handle your case.
625       for (const Edge* edge : node->in_edges()) {
626         if (edge->src_output() == Graph::kControlSlot) {
627           // Skip the control dependency input.
628           continue;
629         } else {
630           bool signed_input = false;
631           bool range_given = false;
632           float input_min = 0;
633           float input_max = 0;
634           bool known_op = FindType(graph, edge->src(), &signed_input,
635                                    &range_given, &input_min, &input_max);
636           if (!known_op) {
637             // Unknown op is considered as input.
638             potential_input++;
639             if (potential_input > kAllowedInputs) {
640               return errors::Unimplemented(
641                   "Found an unknown op: ", edge->src()->name(),
642                   " with type: ", edge->src()->type_string(),
643                   "; Unknown ops are considered as model input for now and "
644                   "only ",
645                   kAllowedInputs, " inputs are supported currently.");
646             }
647           }
648 
649           target_edges.emplace_back(EdgeToConvert(
650               edge, num_bits, signed_input, range_given, input_min, input_max));
651         }
652       }
653     }
654   }
655 
656   TF_RETURN_IF_ERROR(ProcessTargetEdges(graph, quant_op_type, target_edges));
657 
658   return Status::OK();
659 }
660 
DoQuantizeTrainingOnGraphDef(const GraphDef & input_graphdef,int32 num_bits,const string & quant_op_type,GraphDef * result_graphdef)661 Status DoQuantizeTrainingOnGraphDef(const GraphDef& input_graphdef,
662                                     int32 num_bits, const string& quant_op_type,
663                                     GraphDef* result_graphdef) {
664   Graph graph(OpRegistry::Global());
665   GraphConstructorOptions opts;
666   TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(opts, input_graphdef, &graph));
667 
668   // Call the rewriter on the graph.
669   TF_RETURN_IF_ERROR(DoQuantizeTraining(num_bits, quant_op_type, &graph));
670 
671   // Convert the result graph back to a GraphDef.
672   graph.ToGraphDef(result_graphdef);
673   return Status::OK();
674 }
675 
DoQuantizeTrainingOnSerializedGraphDef(const string & input_graph_string,int32 num_bits,const string & quant_op_type,string * result_graph_string)676 Status DoQuantizeTrainingOnSerializedGraphDef(const string& input_graph_string,
677                                               int32 num_bits,
678                                               const string& quant_op_type,
679                                               string* result_graph_string) {
680   // First create the graph from the GraphDef.
681   GraphDef input_graphdef;
682   if (!ParseProtoUnlimited(&input_graphdef, input_graph_string)) {
683     return errors::InvalidArgument(
684         "input_graph_string is not a serialized GraphDef protocol buffer");
685   }
686   GraphDef output_graphdef;
687   TF_RETURN_IF_ERROR(DoQuantizeTrainingOnGraphDef(
688       input_graphdef, num_bits, quant_op_type, &output_graphdef));
689 
690   if (!output_graphdef.SerializeToString(result_graph_string)) {
691     return errors::Internal(
692         "quantize training transformation resulted in invalid GraphDef");
693   }
694   return Status::OK();
695 }
696 
697 }  // namespace tensorflow
698