1syntax = "proto3";
2
3package tensorflow;
4
5import "tensorflow/core/framework/attr_value.proto";
6import "tensorflow/core/protobuf/verifier_config.proto";
7
8option cc_enable_arenas = true;
9option java_outer_classname = "RewriterConfigProtos";
10option java_multiple_files = true;
11option java_package = "org.tensorflow.framework";
12option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
13
14message AutoParallelOptions {
15  bool enable = 1;
16  int32 num_replicas = 2;
17}
18
19message ScopedAllocatorOptions {
20  // If present, only perform optimization for these ops.
21  repeated string enable_op = 1;
22}
23
24message RewriterConfig {
25  // Graph rewriting is experimental and subject to change, not covered by any
26  // API stability guarantees.
27
28  // Configuration options for the meta-optimizer. Unless otherwise noted, these
29  // configuration options do not apply to explicitly triggered optimization
30  // passes in the optimizers field.
31
32  enum Toggle {
33    DEFAULT = 0;
34    ON = 1;
35    OFF = 2;
36    // Enable some aggressive optimizations that use assumptions that TF graphs
37    // may break. For example, assume the shape of a placeholder matches its
38    // actual feed.
39    AGGRESSIVE = 3;
40  }
41
42  // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF.
43  enum CpuLayout {
44    NO_CONVERSION_ON_CPU = 0;
45    NCHW_TO_NHWC = 1;
46    NHWC_TO_NCHW = 2;
47  }
48
49  // Enum controlling the number of times to run optimizers. The default is to
50  // run them twice.
51  enum NumIterationsType {
52    DEFAULT_NUM_ITERS = 0;
53    ONE = 1;
54    TWO = 2;
55  }
56
57  // CPU Conversion settings between NHCW and NCHW.
58  CpuLayout cpu_layout_conversion = 50;
59
60  // Optimize tensor layouts (default is ON)
61  // e.g. This will try to use NCHW layout on GPU which is faster.
62  Toggle layout_optimizer = 1;
63  // Fold constants (default is ON)
64  // Statically infer the value of tensors when possible, and materialize the
65  // result using constants.
66  Toggle constant_folding = 3;
67  // Shape optimizations (default is ON)
68  // Simplify computations made on shapes.
69  Toggle shape_optimization = 13;
70  // Remapping (default is ON)
71  // Remap subgraphs onto more efficient implementations.
72  Toggle remapping = 14;
73  // Common subgraph elimination (default is ON)
74  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
75  Toggle common_subgraph_elimination = 24;
76  // Arithmetic optimizations (default is ON)
77  // e.g. Simplify arithmetic ops; merge ops with same value (like constants).
78  Toggle arithmetic_optimization = 7;
79  // Control dependency optimizations (default is ON).
80  // Remove redundant control dependencies, which may enable other optimization.
81  Toggle dependency_optimization = 8;
82  // Loop optimizations (default is ON).
83  Toggle loop_optimization = 9;
84  // Function optimizations (default is ON).
85  Toggle function_optimization = 10;
86  // Strips debug-related nodes from the graph (off by default).
87  Toggle debug_stripper = 11;
88  // If true, don't remove unnecessary ops from the graph
89  bool disable_model_pruning = 2;
90  // Try to allocate some independent Op outputs contiguously in order to
91  // merge or eliminate downstream Ops (off by default).
92  Toggle scoped_allocator_optimization = 15;
93  // Force small ops onto the CPU (default is OFF).
94  Toggle pin_to_host_optimization = 18;
95  // Enable the swap of kernel implementations based on the device placement
96  // (default is ON).
97  Toggle implementation_selector = 22;
98  // Optimize data types for CUDA (default is OFF).
99  // This will try to use float16 on GPU which is faster.
100  // Note that this can change the numerical stability of the graph and may
101  // require the use of loss scaling to maintain model convergence.
102  Toggle auto_mixed_precision = 23;
103  // Optimize data types for MKL (default is OFF).
104  // This will try to use bfloat16 on CPUs, which is faster.
105  // Note that this can change the numerical stability of the graph.
106  Toggle auto_mixed_precision_mkl = 25;
107  // Disable the entire meta optimizer (off by default).
108  bool disable_meta_optimizer = 19;
109
110  // Controls how many times we run the optimizers in meta optimizer (default
111  // is once).
112  NumIterationsType meta_optimizer_iterations = 12;
113
114  // The minimum number of nodes in a graph to optimizer. For smaller graphs,
115  // optimization is skipped.
116  // 0 means the system picks an appropriate number.
117  // < 0 means do not skip optimization.
118  int32 min_graph_nodes = 17;
119
120  // Disable optimizations that assume compressed tensors. Note that this flag
121  // is experimental and may be removed in the future.
122  bool experimental_disable_compressed_tensor_optimization = 26;
123
124  // Disable folding quantization emulation ops such as FakeQuantWithMinMax* and
125  // QuantizeAndDequantize*. Some compilers (e.g. the TF-to-tflite converter)
126  // have to extract quantization configs (e.g. min/max range, number of bits,
127  // and per-channel) from the quantization emulation ops. Note that this flag
128  // is experimental and may be removed in the future. See b/174138564 for more
129  // details.
130  bool experimental_disable_folding_quantization_emulation = 27;
131
132  enum MemOptType {
133    // The default setting (SCHEDULING and SWAPPING HEURISTICS only)
134    DEFAULT_MEM_OPT = 0;
135    // Disabled in the meta-optimizer.
136    NO_MEM_OPT = 1;
137    // Driven by manual op-level annotations.
138    MANUAL = 2;
139
140    // Driven by heuristics. The behavior of these heuristics is subject to
141    // change. Currently includes an experimental recomputation and swapping
142    // heuristics. Manual annotations are respected, but additional nodes are
143    // selected automatically.
144
145    // Swapping heuristic will move a tensor from the GPU to the CPU and move
146    // it back when needed to reduce peak memory usage.
147    SWAPPING_HEURISTICS = 4;
148    // Recomputation heuristics will recompute ops (such as Relu activation)
149    // during backprop instead of storing them, reducing peak memory usage.
150    RECOMPUTATION_HEURISTICS = 5;
151    // Scheduling will split big ops such as AddN and try to enforce a schedule
152    // of the new computations that decreases peak memory usage.
153    SCHEDULING_HEURISTICS = 6;
154    // Use any combination of swapping and recomputation heuristics.
155    HEURISTICS = 3;
156  }
157  // Configures memory optimization passes through the meta-optimizer. Has no
158  // effect on manually requested memory optimization passes in the optimizers
159  // field.
160  MemOptType memory_optimization = 4;
161  // A node name scope for node names which are valid outputs of recomputations.
162  // Inputs to nodes that match this scope may be recomputed (subject either to
163  // manual annotation of those input nodes or to manual annotation and
164  // heuristics depending on memory_optimization), but the nodes themselves will
165  // not be recomputed. This matches any sub-scopes as well, meaning the scope
166  // can appear not just as a top-level scope. For example, if the value is
167  // "gradients/", the default, it will match node name "gradients/foo",
168  // "foo/gradients/bar", but not "foo_gradients/"
169  string memory_optimizer_target_node_name_scope = 6;
170  // Maximum number of milliseconds to spend optimizing a single graph before
171  // timing out. If equal to 0 the system picks a default (currently 5 minutes).
172  // If less than 0 the optimizer will never time out.
173  int64 meta_optimizer_timeout_ms = 20;
174
175  // Configures AutoParallel optimization passes either through the
176  // meta-optimizer or when manually specified through the optimizers field.
177  AutoParallelOptions auto_parallel = 5;
178
179  // If true, any optimization pass failing will cause the MetaOptimizer to
180  // stop with an error. By default - or when set to false, failing passes are
181  // skipped silently.
182  bool fail_on_optimizer_errors = 21;
183
184  ScopedAllocatorOptions scoped_allocator_opts = 16;
185
186  // If non-empty, will use this as an alternative way to specify a list of
187  // optimizations to turn on and the order of the optimizations (replacing the
188  // meta-optimizer).
189  //
190  // Of the RewriterConfig options, only the AutoParallel configuration options
191  // (the auto_parallel field) apply to manually requested optimization passes
192  // ("autoparallel"). Memory optimization passes ("memory") invoked here are
193  // not configurable (in contrast to memory optimization passes through the
194  // meta-optimizer) and act only on manual op annotations.
195  //
196  // Custom optimizers (see custom_optimizers) that are not part of this
197  // schedule will be run after - in the order that they were specified.
198  repeated string optimizers = 100;
199
200  // Message to describe custom graph optimizer and its parameters
201  message CustomGraphOptimizer {
202    string name = 1;
203    map<string, AttrValue> parameter_map = 2;
204  }
205
206  // list of CustomGraphOptimizers to apply.
207  repeated CustomGraphOptimizer custom_optimizers = 200;
208
209  // VerifierConfig specifying the verifiers to be run after every optimizer.
210  VerifierConfig inter_optimizer_verifier_config = 300;
211
212  // VerifierConfig specifying the verifiers to be run at the end, after all
213  // optimizers have run.
214  VerifierConfig post_optimization_verifier_config = 301;
215}
216