1// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14syntax = "proto2";
15import "tensorflow/lite/toco/types.proto";
16
17package toco;
18
19// Supported I/O file formats. Some formats may be input-only or output-only.
20enum FileFormat {
21  FILE_FORMAT_UNKNOWN = 0;
22
23  // GraphDef, third_party/tensorflow/core/framework/graph.proto
24  TENSORFLOW_GRAPHDEF = 1;
25
26  // Tensorflow's mobile inference model.
27  // third_party/tensorflow/contrib/tflite/schema.fbs
28  TFLITE = 2;
29
30  // GraphViz
31  // Export-only.
32  GRAPHVIZ_DOT = 3;
33}
34
35// TocoFlags encodes extra parameters that drive tooling operations, that
36// are not normally encoded in model files and in general may not be thought
37// of as properties of models, instead describing how models are to be
38// processed in the context of the present tooling job.
39//
40// Next ID to use: 29.
41message TocoFlags {
42  // Input file format
43  optional FileFormat input_format = 1;
44
45  // Output file format
46  optional FileFormat output_format = 2;
47
48  // Similar to inference_type, but allows to control specifically the
49  // quantization of input arrays, separately from other arrays.
50  //
51  // If not set, then the value of inference_type is implicitly used, i.e.
52  // by default input arrays are quantized like other arrays.
53  //
54  // Like inference_type, this only affects real-number arrays. By "real-number"
55  // we mean float arrays, and quantized arrays. This excludes plain
56  // integer arrays, strings arrays, and every other data type.
57  //
58  // The typical use for this flag is for vision models taking a bitmap
59  // as input, typically with uint8 channels, yet still requiring floating-point
60  // inference. For such image models, the uint8 input is quantized, i.e.
61  // the uint8 values are interpreted as real numbers, and the quantization
62  // parameters used for such input arrays are their mean_value, std_value
63  // parameters.
64  optional IODataType inference_input_type = 11;
65
66  // Sets the type of real-number arrays in the output file, that is, controls
67  // the representation (quantization) of real numbers in the output file,
68  // except for input arrays, which are controlled by inference_input_type.
69  //
70  // NOTE: this flag only impacts real-number arrays. By "real-number"
71  // we mean float arrays, and quantized arrays. This excludes plain
72  // integer arrays, strings arrays, and every other data type.
73  //
74  // For real-number arrays, the impact of this flag is to allow the output
75  // file to choose a different real-numbers representation (quantization)
76  // from what the input file used. For any other types of arrays, changing
77  // the data type would not make sense.
78  //
79  // Specifically:
80  //    - If FLOAT, then real-numbers arrays will be of type float in
81  //      the output file. If they were quantized in the input file, then
82  //      they get dequantized.
83  //    - If QUANTIZED_UINT8, then real-numbers arrays will be quantized
84  //      as uint8 in the output file. If they were float in the input file,
85  //      then they get quantized.
86  //    - If not set, then all real-numbers arrays retain the same type in the
87  //      output file as they have in the input file.
88  //
89  optional IODataType inference_type = 4;
90
91  // default_ranges_min and default_ranges_max are helpers to experiment
92  // with quantization of models. Normally, quantization requires the input
93  // model to have (min, max) range information for every activations array.
94  // This is needed in order to know how to quantize arrays and still achieve
95  // satisfactory accuracy. However, in some circumstances one would just like
96  // to estimate the performance of quantized inference, without caring about
97  // accuracy. That is what default_ranges_min and default_ranges_max are for:
98  // when specified, they will be used as default (min, max) range boundaries
99  // for all activation arrays that lack (min, max) range information, thus
100  // allowing for quantization to proceed.
101  //
102  // It should be clear from the above explanation that these parameters are
103  // for experimentation purposes only and should not be used in production:
104  // they make it easy to quantize models, but the resulting quantized model
105  // will be inaccurate.
106  //
107  // These values only apply to arrays quantized with the kUint8 data type.
108  optional float default_ranges_min = 5;
109  optional float default_ranges_max = 6;
110  // Equivalent versions of default_ranges_min/_max for arrays quantized with
111  // the kInt16 data type.
112  optional float default_int16_ranges_min = 15;
113  optional float default_int16_ranges_max = 16;
114
115  // Ignore and discard FakeQuant nodes. For instance, that can be used to
116  // generate plain float code without fake-quantization from a quantized
117  // graph.
118  optional bool drop_fake_quant = 7;
119
120  // Normally, FakeQuant nodes must be strict boundaries for graph
121  // transformations, in order to ensure that quantized inference has the
122  // exact same arithmetic behavior as quantized training --- which is the
123  // whole point of quantized training and of FakeQuant nodes in the first
124  // place. However, that entails subtle requirements on where exactly
125  // FakeQuant nodes must be placed in the graph. Some quantized graphs
126  // have FakeQuant nodes at unexpected locations, that prevent graph
127  // transformations that are necessary in order to generate inference
128  // code for these graphs. Such graphs should be fixed, but as a
129  // temporary work-around, setting this reorder_across_fake_quant flag
130  // allows toco to perform necessary graph transformations on them,
131  // at the cost of no longer faithfully matching inference and training
132  // arithmetic.
133  optional bool reorder_across_fake_quant = 8;
134
135  // If true, allow TOCO to create TF Lite Custom operators for all the
136  // unsupported Tensorflow ops.
137  optional bool allow_custom_ops = 10;
138
139  // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF.
140  // If true, then control dependencies will be immediately dropped during
141  // import.
142  // If not set, the default behavior is as follows:
143  //    - Default to false if the output format is TENSORFLOW_GRAPHDEF.
144  //    - Default to true in all other cases.
145  optional bool drop_control_dependency = 12;
146
147  // Disables transformations that fuse subgraphs such as known LSTMs (not all
148  // LSTMs are identified).
149  optional bool debug_disable_recurrent_cell_fusion = 13;
150
151  // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized
152  // array data types throughout the graph. The graph must be properly annotated
153  // with FakeQuant* ops on at least the edges and may contain additional ops on
154  // the interior of the graph to widen/narrow as desired.
155  //
156  // Input and output array data types may change because of this propagation
157  // and users must be sure to query the final data_type values.
158  optional bool propagate_fake_quant_num_bits = 14;
159
160  // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0.
161  // This flag allows nudging them to 1 to allow proceeding, with moderate
162  // inaccuracy.
163  optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17;
164
165  // Minimum size of constant arrays to deduplicate; arrays smaller will not be
166  // deduplicated.
167  optional int64 dedupe_array_min_size_bytes = 18 [default = 64];
168
169  // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite.
170  // Ignored if the output format is not TFLite.
171  optional bool split_tflite_lstm_inputs = 19 [default = true];
172
173  // Store weights as quantized weights followed by dequantize operations.
174  // Computation is still done in float, but reduces model size (at the cost of
175  // accuracy and latency).
176  // DEPRECATED: Please use post_training_quantize instead.
177  optional bool quantize_weights = 20 [default = false];
178
179  // Full filepath of folder to dump the graphs at various stages of processing
180  // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order
181  // to keep the requirements of the output file.
182  optional string dump_graphviz_dir = 24;
183
184  // Boolean indicating whether to dump the graph after every graph
185  // transformation.
186  optional bool dump_graphviz_include_video = 25;
187
188  // Boolean indicating whether to quantize the weights of the converted float
189  // model. Model size will be reduced and there will be latency improvements
190  // (at the cost of accuracy).
191  optional bool post_training_quantize = 26 [default = false];
192
193  // This flag only works when converting to TensorFlow Lite format.
194  // When enabled, unsupported ops will be converted to select TensorFlow ops.
195  // TODO(ycling): Consider to rename the following 2 flags and don't call it
196  // "Flex".
197  // `enable_select_tf_ops` should always be used with `allow_custom_ops`.
198  // WARNING: Experimental interface, subject to change
199  optional bool enable_select_tf_ops = 27 [default = false];
200
201  // This flag only works when converting to TensorFlow Lite format.
202  // When enabled, all TensorFlow ops will be converted to select TensorFlow
203  // ops.
204  // This will force `enable_select_tf_ops` to true.
205  // `force_select_tf_ops` should always be used with `enable_select_tf_ops`.
206  // WARNING: Experimental interface, subject to change
207  optional bool force_select_tf_ops = 28 [default = false];
208}
209