1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Experimental shuffle ops.""" 16from __future__ import absolute_import 17from __future__ import division 18from __future__ import print_function 19 20from tensorflow.python.data.ops import dataset_ops 21from tensorflow.python.data.util import random_seed 22from tensorflow.python.framework import constant_op 23from tensorflow.python.framework import dtypes 24from tensorflow.python.framework import ops 25from tensorflow.python.ops import gen_dataset_ops 26from tensorflow.python.util import deprecation 27from tensorflow.python.util.tf_export import tf_export 28 29 30class _ShuffleAndRepeatDataset(dataset_ops.UnaryUnchangedStructureDataset): 31 """A `Dataset` that fuses `shuffle` and `repeat`.""" 32 33 def __init__(self, input_dataset, buffer_size, count=None, seed=None): 34 self._input_dataset = input_dataset 35 self._buffer_size = ops.convert_to_tensor( 36 buffer_size, dtype=dtypes.int64, name="buffer_size") 37 if count is None: 38 self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count") 39 else: 40 self._count = ops.convert_to_tensor( 41 count, dtype=dtypes.int64, name="count") 42 self._seed, self._seed2 = random_seed.get_seed(seed) 43 variant_tensor = gen_dataset_ops.shuffle_and_repeat_dataset( 44 self._input_dataset._variant_tensor, # pylint: disable=protected-access 45 buffer_size=self._buffer_size, 46 count=self._count, 47 seed=self._seed, 48 seed2=self._seed2, 49 **self._flat_structure) 50 super(_ShuffleAndRepeatDataset, self).__init__(input_dataset, 51 variant_tensor) 52 53 54@deprecation.deprecated( 55 None, 56 "Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by " 57 "`tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take " 58 "care of using the fused implementation.") 59@tf_export("data.experimental.shuffle_and_repeat") 60def shuffle_and_repeat(buffer_size, count=None, seed=None): 61 """Shuffles and repeats a Dataset, reshuffling with each repetition. 62 63 >>> d = tf.data.Dataset.from_tensor_slices([1, 2, 3]) 64 >>> d = d.apply(tf.data.experimental.shuffle_and_repeat(2, count=2)) 65 >>> [elem.numpy() for elem in d] # doctest: +SKIP 66 [2, 3, 1, 1, 3, 2] 67 68 ```python 69 dataset.apply( 70 tf.data.experimental.shuffle_and_repeat(buffer_size, count, seed)) 71 ``` 72 73 produces the same output as 74 75 ```python 76 dataset.shuffle( 77 buffer_size, seed=seed, reshuffle_each_iteration=True).repeat(count) 78 ``` 79 80 In each repetition, this dataset fills a buffer with `buffer_size` elements, 81 then randomly samples elements from this buffer, replacing the selected 82 elements with new elements. For perfect shuffling, set the buffer size equal 83 to the full size of the dataset. 84 85 For instance, if your dataset contains 10,000 elements but `buffer_size` is 86 set to 1,000, then `shuffle` will initially select a random element from 87 only the first 1,000 elements in the buffer. Once an element is selected, 88 its space in the buffer is replaced by the next (i.e. 1,001-st) element, 89 maintaining the 1,000 element buffer. 90 91 Args: 92 buffer_size: A `tf.int64` scalar `tf.Tensor`, representing the maximum 93 number elements that will be buffered when prefetching. 94 count: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the number 95 of times the dataset should be repeated. The default behavior (if `count` 96 is `None` or `-1`) is for the dataset be repeated indefinitely. 97 seed: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the random 98 seed that will be used to create the distribution. See 99 `tf.random.set_seed` for behavior. 100 101 Returns: 102 A `Dataset` transformation function, which can be passed to 103 `tf.data.Dataset.apply`. 104 """ 105 106 def _apply_fn(dataset): # pylint: disable=missing-docstring 107 return _ShuffleAndRepeatDataset(dataset, buffer_size, count, seed) 108 109 return _apply_fn 110