1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Classes for wrapping a model to operate on different data shapes."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import abc
22
23from tensorflow.contrib.timeseries.python.timeseries import feature_keys
24from tensorflow.contrib.timeseries.python.timeseries import math_utils
25from tensorflow.contrib.timeseries.python.timeseries.model import ModelOutputs
26
27from tensorflow.python.estimator import estimator_lib
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import ops
30from tensorflow.python.ops import array_ops
31from tensorflow.python.ops import math_ops
32from tensorflow.python.util import nest
33
34
35class PassthroughStateManager(object):
36  """A minimal wrapper for models which do not need state management."""
37
38  def __init__(self):
39    self._input_statistics = None
40    self._graph_initialized = False
41
42  def initialize_graph(self, model, input_statistics=None):
43    """Adds required operations to the graph."""
44    del model  # unused
45    self._graph_initialized = True
46    self._input_statistics = input_statistics
47
48  def define_loss(self, model, features, mode):
49    """Wrap "model" with StateManager-specific operations.
50
51    Args:
52      model: The model (inheriting from TimeSeriesModel) to manage state for.
53      features: A dictionary with the following key/value pairs:
54        feature_keys.TrainEvalFeatures.TIMES: A [batch size x window size]
55            Tensor with times for each observation.
56        feature_keys.TrainEvalFeatures.VALUES: A [batch size x window size x num
57            features] Tensor with values for each observation.
58      mode: The tf.estimator.ModeKeys mode to use (TRAIN or EVAL).
59    Returns:
60      A ModelOutputs object.
61    Raises:
62      ValueError: If start state was specified.
63    """
64    if feature_keys.State.STATE_TUPLE in features:
65      raise ValueError(
66          "Overriding start state is not supported for this model.")
67    return model.define_loss(features, mode)
68
69
70class _OverridableStateManager(PassthroughStateManager):
71  """Base class for state managers which support overriding model state."""
72
73  @abc.abstractmethod
74  def _define_loss_with_saved_state(self, model, features, mode):
75    pass
76
77  def define_loss(self, model, features, mode):
78    """Switches between explicit start state and managed state."""
79    if feature_keys.FilteringFeatures.STATE_TUPLE in features:
80      # Explicit start state has been provided, so we should use that.
81      if mode == estimator_lib.ModeKeys.TRAIN:
82        raise ValueError(
83            "Overriding saved state for training is not supported (but a value "
84            "for feature {} was specified).".format(
85                feature_keys.FilteringFeatures.STATE_TUPLE))
86      start_state = features[feature_keys.FilteringFeatures.STATE_TUPLE]
87      del features[feature_keys.FilteringFeatures.STATE_TUPLE]
88      return model.get_batch_loss(
89          features=features, mode=mode, state=start_state)
90    else:
91      # No explicit start state; use managed state.
92      return self._define_loss_with_saved_state(
93          model=model, features=features, mode=mode)
94
95
96class FilteringOnlyStateManager(_OverridableStateManager):
97  """State manager for models which use state only for filtering.
98
99  Window-based models (ARModel) do not require state to be fed during training
100  (instead requiring a specific window size). Rather than requiring a minimum
101  window size for filtering, these models maintain this window in their state,
102  and so need state to be fed.
103  """
104
105  def _define_loss_with_saved_state(self, model, features, mode):
106    return model.define_loss(features, mode)
107
108
109class ChainingStateManager(_OverridableStateManager):
110  """Maintains state across a batch for SequentialTimeSeriesModel subclasses.
111
112  The batch dimension is treated as indexing sequential chunks of the same
113  timeseries. End state from each chunk is fed as start state to the next chunk
114  during the next timestep. This is an approximation to full-batch training for
115  sequential models, but is typically much faster while still accurately
116  recovering parameters. The speedup comes from reduced scheduling overhead of
117  TensorFlow ops, since each operation can do much more work.
118  """
119
120  def __init__(self, state_saving_interval=20, checkpoint_state=False):
121    """Initialize the state manager.
122
123    Args:
124      state_saving_interval: This state manager saves intermediate model state
125          every `state_saving_interval` times. Larger values save memory, and
126          checkpoint size if `checkpoint_state` is enabled, but models
127          will need to impute across artificial gaps of up to this size
128          (i.e. gaps not appearing in the original data). This imputation may
129          affect training. Set state_saving_interval to 1 to avoid any
130          artificial imputation.
131      checkpoint_state: If True, saved intermediate model state will be
132          written to checkpoints. Checkpoints will then scale with dataset
133          size. If False, state will be freshly imputed from the beginning of a
134          series each time the model is restored, which means it may take a few
135          iterations for state to warm up.
136    """
137    super(ChainingStateManager, self).__init__()
138    self._checkpoint_state = checkpoint_state
139    self._state_saving_interval = state_saving_interval
140    self._start_state = None
141    self._cached_states = None
142
143  def initialize_graph(self, model, input_statistics=None):
144    """Adds required operations to the graph."""
145    super(ChainingStateManager, self).initialize_graph(
146        model=model, input_statistics=input_statistics)
147    self._start_state = model.get_start_state()
148    self._cached_states = math_utils.TupleOfTensorsLookup(
149        key_dtype=dtypes.int64,
150        default_values=self._start_state,
151        empty_key=-1,
152        deleted_key=-2,
153        name="cached_states",
154        checkpoint=self._checkpoint_state)
155
156  def _define_loss_with_saved_state(self, model, features, mode):
157    """Feeds end state from one training iteration into the next.
158
159    Args:
160      model: The model to wrap. Compatible with children of TimeSeriesModel.
161      features: Dictionary with Tensor values defining the data to be
162        processed. The expected key/value pairs are at minimum:
163          feature_keys.TrainEvalFeatures.TIMES: A [number of chunks x window
164            size] Tensor with times for each observation, the result of chunking
165            a single longer time series.
166          feature_keys.TrainEvalFeatures.VALUES: A [number of chunks x window
167            size x num features] Tensor with values for each observation,
168            corresponding to times.
169      mode: The tf.estimator.ModeKeys mode to use. For EVAL and INFER, no
170          batching is performed, which may be slow. This is to avoid giving
171          cached and almost certainly stale values.
172    Returns:
173      A ModelOutputs object.
174    Raises:
175      ValueError: If initialize_graph has not been called.
176    """
177    if not self._graph_initialized:
178      raise ValueError("ChainingStateManager requires initialize_graph() to be "
179                       "called before use.")
180    (loss_op, end_state, batch_predictions) = self._update_cached_states(
181        model=model,
182        features=features,
183        mode=mode)
184    # Add a batch dimension so state can be used directly (e.g. for predictions)
185    # without the user manually reshaping it.
186    last_end_state_flat = [end_state_value[-1][None]
187                           for end_state_value in nest.flatten(end_state)]
188    batch_predictions["observed"] = features[
189        feature_keys.TrainEvalFeatures.VALUES]
190    return ModelOutputs(
191        loss=loss_op,
192        end_state=nest.pack_sequence_as(end_state, last_end_state_flat),
193        predictions=batch_predictions,
194        prediction_times=features[feature_keys.TrainEvalFeatures.TIMES])
195
196  def _get_chunk_number(self, time):
197    return time // self._state_saving_interval
198
199  def _get_cached_states(self, times):
200    """Retrieve cached states for a batch of times."""
201    read_chunk_numbers = self._get_chunk_number(times)
202    looked_up_state = list(self._cached_states.lookup(
203        math_ops.cast(read_chunk_numbers, dtypes.int64)))
204    looked_up_state = tuple(looked_up_state)
205    # We need to special-case the first chunk in a series to explicitly rely on
206    # the model's starting state so that gradients flow back to it. Otherwise it
207    # would affect only initialization, and would not be read from or updated
208    # during training. Not doing this also isolates that part of the graph,
209    # leading to errors on model reload if there are trainable variables
210    # affecting a model's start state.
211    if self._input_statistics is not None:
212      start_time = self._input_statistics.start_time
213    else:
214      start_time = 0
215    set_to_start_state = math_ops.equal(read_chunk_numbers,
216                                        self._get_chunk_number(start_time))
217    new_states = []
218    for start_state_value, cache_variable in zip(
219        nest.flatten(
220            math_utils.replicate_state(self._start_state,
221                                       array_ops.shape(times)[0])),
222        nest.flatten(looked_up_state)):
223
224      new_states.append(
225          array_ops.where(set_to_start_state, start_state_value,
226                          cache_variable))
227    looked_up_state = nest.pack_sequence_as(looked_up_state, new_states)
228    return looked_up_state
229
230  def _update_cached_states(self, model, features, mode):
231    """Read, process, and write chunks to the cache."""
232    times = features[feature_keys.TrainEvalFeatures.TIMES]
233    looked_up_state = self._get_cached_states(times[:, 0])
234    (model_loss, intermediate_states,
235     batch_predictions) = model.per_step_batch_loss(
236         features=features,
237         mode=mode,
238         state=looked_up_state)
239    # We need to at least write to the bucket after the one we read from.
240    min_chunk_numbers = self._get_chunk_number(times) + 1
241    # We write to the bucket that would have been read had the window started at
242    # the next sample (except for the last sample in the window, which gets
243    # written to the next bucket). This assumes fixed missing times (i.e. if we
244    # were presented with times [10, 50] we will never see times [30, 50]).
245    #
246    # TODO(allenl): Retrieve the highest time less than the current time rather
247    # than relying on fixed bucketing.
248    write_chunk_numbers = math_ops.maximum(
249        self._get_chunk_number(array_ops.concat(
250            [times[:, 1:], times[:, -1:] + 1], axis=1)),
251        min_chunk_numbers)
252    # Write once for every computed state; this may mean that we write multiple
253    # times to the same cell, but later writes will take precedence.
254    save_ops = [
255        self._cached_states.insert(
256            keys=write_chunk_numbers,
257            values=intermediate_states)]
258    end_state = nest.pack_sequence_as(
259        intermediate_states,
260        [state_element[:, -1]
261         for state_element in nest.flatten(intermediate_states)])
262    with ops.control_dependencies(save_ops):
263      # Make sure end states get saved at each iteration
264      loss_op = array_ops.identity(model_loss)
265    return loss_op, end_state, batch_predictions
266