1syntax = "proto3";
2
3package tensorflow.data;
4
5// Represents the type of auto-sharding we enable.
6enum AutoShardPolicy {
7  AUTO = 0;
8  FILE = 1;
9  DATA = 2;
10  OFF = -1;
11}
12
13message DistributeOptions {
14  // The type of sharding that auto-shard should attempt. If this is set to
15  // FILE, then we will attempt to shard by files (each worker will get a set of
16  // files to process). If we cannot find a set of files to shard for at least
17  // one file per worker, we will error out. When this option is selected, make
18  // sure that you have enough files so that each worker gets at least one file.
19  // There will be a runtime error thrown if there are insufficient files. If
20  // this is set to DATA, then we will shard by elements produced by the
21  // dataset, and each worker will process the whole dataset and discard the
22  // portion that is not for itself. If this is set to OFF, then we will not
23  // autoshard, and each worker will receive a copy of the full dataset. This
24  // option is set to AUTO by default, AUTO will attempt to first shard by FILE,
25  // and fall back to sharding by DATA if we cannot find a set of files to
26  // shard.
27  AutoShardPolicy auto_shard_policy = 1;
28  // The number of devices attached to this input pipeline.
29  oneof optional_num_devices {
30    int32 num_devices = 2;
31  }
32}
33
34message MapVectorization {
35  // Whether to vectorize map transformations.
36  oneof optional_enabled {
37    bool enabled = 1;
38  }
39  // Whether to use ChooseFastestBranchDataset with this transformation. If
40  // True, the pipeline picks between the vectorized and original segment at
41  // runtime based on their iterations speed.
42  oneof optional_use_choose_fastest {
43    bool use_choose_fastest = 2;
44  }
45}
46
47message OptimizationOptions {
48  // Whether to apply default graph optimizations. If False, only graph
49  // optimizations that have been explicitly enabled will be applied.
50  oneof optional_apply_default_optimizations {
51    bool apply_default_optimizations = 1;
52  }
53  // Whether to automatically tune performance knobs.
54  oneof optional_autotune {
55    bool autotune = 2;
56  }
57  // When autotuning is enabled (through autotune), determines whether to also
58  // autotune buffer sizes for datasets with parallelism.
59  oneof optional_autotune_buffers {
60    bool autotune_buffers = 3;
61  }
62  // When autotuning is enabled (through autotune), determines the CPU budget to
63  // use. Values greater than the number of schedulable CPU cores are allowed
64  // but may result in CPU contention.
65  oneof optional_autotune_cpu_budget {
66    int32 autotune_cpu_budget = 4;
67  }
68  // When autotuning is enabled (through autotune), determines the RAM budget to
69  // use. Values greater than the available RAM in bytes may result in OOM. If
70  // 0, defaults to half of the available RAM in bytes.
71  oneof optional_autotune_ram_budget {
72    int32 autotune_ram_budget = 5;
73  }
74  // Whether to fuse filter transformations.
75  oneof optional_filter_fusion {
76    bool filter_fusion = 6;
77  }
78  // Whether to fuse filter dataset that predicts random_uniform < rate into a
79  // sampling dataset.
80  oneof optional_filter_with_random_uniform_fusion {
81    bool filter_with_random_uniform_fusion = 7;
82  }
83  // Whether to hoist tf.random_uniform() ops out of map transformations.
84  oneof optional_hoist_random_uniform {
85    bool hoist_random_uniform = 8;
86  }
87  // Whether to fuse map and batch transformations.
88  oneof optional_map_and_batch_fusion {
89    bool map_and_batch_fusion = 9;
90  }
91  // Whether to fuse map and filter transformations.
92  oneof optional_map_and_filter_fusion {
93    bool map_and_filter_fusion = 10;
94  }
95  // Whether to fuse map transformations.
96  oneof optional_map_fusion {
97    bool map_fusion = 11;
98  }
99  // Whether to parallelize stateless map transformations.
100  oneof optional_map_parallelization {
101    bool map_parallelization = 12;
102  }
103  // The map vectorization options associated with the dataset.
104  MapVectorization map_vectorization = 13;
105  // Whether to eliminate no-op transformations.
106  oneof optional_noop_elimination {
107    bool noop_elimination = 14;
108  }
109  // Whether to parallelize copying of batch elements. This optimization is
110  // highly experimental and can cause performance degradation (e.g. when the
111  // parallelization overhead exceeds the benefits of performing the data copies
112  // in parallel). You should only enable this optimization if a) your input
113  // pipeline is bottlenecked on batching and b) you have validated that this
114  // optimization improves performance.
115  oneof optional_parallel_batch {
116    bool parallel_batch = 15;
117  }
118  // Whether to reorder ops that will discard data to the front of unary
119  // cardinality preserving transformations, e.g. dataset.map(...).take(3) will
120  // be optimized to dataset.take(3).map(...). For now this optimization will
121  // move `skip`, `shard` and `take` to the front of `map` and `prefetch`. This
122  // optimization is only for performance; it will not affect the output of the
123  // dataset.
124  oneof optional_reorder_data_discarding_ops {
125    bool reorder_data_discarding_ops = 16;
126  }
127  // Whether to fuse shuffle and repeat transformations.
128  oneof optional_shuffle_and_repeat_fusion {
129    bool shuffle_and_repeat_fusion = 17;
130  }
131}
132
133message ThreadingOptions {
134  // If set, it overrides the maximum degree of intra-op parallelism.
135  oneof optional_max_intra_op_parallelism {
136    int32 max_intra_op_parallelism = 1;
137  }
138  // If set, the dataset will use a private threadpool of the given size.
139  oneof optional_private_threadpool_size {
140    int32 private_threadpool_size = 2;
141  }
142}
143
144// Represents how to handle external state during serialization.
145enum ExternalStatePolicy {
146  WARN = 0;
147  IGNORE = 1;
148  FAIL = 2;
149}
150
151// Message stored with Dataset objects to control how datasets are processed and
152// optimized.
153message Options {
154  // Whether the outputs need to be produced in deterministic order.
155  oneof optional_deterministic {
156    bool deterministic = 1;
157  }
158  // The distribution strategy options associated with the dataset.
159  DistributeOptions distribute_options = 2;
160  // The optimization options associated with the dataset.
161  OptimizationOptions optimization_options = 3;
162  // Whether to introduce 'slack' in the last `prefetch` of the input pipeline,
163  // if it exists. This may reduce CPU contention with accelerator host-side
164  // activity at the start of a step. The slack frequency is determined by the
165  // number of devices attached to this input pipeline.
166  oneof optional_slack {
167    bool slack = 4;
168  }
169  // The threading options associated with the dataset.
170  ThreadingOptions threading_options = 5;
171  // This option can be used to override the default policy for how to handle
172  // external state when serializing a dataset or checkpointing its iterator.
173  // There are three settings available - IGNORE: External state is ignored
174  // without a warning; WARN: External state is ignored and a warning is logged;
175  // FAIL: External state results in an error.
176  oneof optional_external_state_policy {
177    ExternalStatePolicy external_state_policy = 6;
178  }
179}
180