1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Estimators for time series models."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import functools
22
23from tensorflow.contrib.timeseries.python.timeseries import ar_model
24from tensorflow.contrib.timeseries.python.timeseries import feature_keys
25from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib
26from tensorflow.contrib.timeseries.python.timeseries import math_utils
27from tensorflow.contrib.timeseries.python.timeseries import state_management
28from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model
29from tensorflow.contrib.timeseries.python.timeseries.state_space_models import structural_ensemble
30from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filtering_postprocessor import StateInterpolatingAnomalyDetector
31
32from tensorflow.python.estimator import estimator_lib
33from tensorflow.python.estimator.canned import optimizers
34from tensorflow.python.estimator.export import export_lib
35from tensorflow.python.feature_column import feature_column_lib as feature_column
36from tensorflow.python.framework import dtypes
37from tensorflow.python.framework import ops
38from tensorflow.python.framework import tensor_shape
39from tensorflow.python.framework import tensor_util
40from tensorflow.python.ops import array_ops
41from tensorflow.python.ops import math_ops
42from tensorflow.python.ops import parsing_ops
43from tensorflow.python.training import training as train
44from tensorflow.python.util import nest
45
46
47class TimeSeriesRegressor(estimator_lib.Estimator):
48  """An Estimator to fit and evaluate a time series model."""
49
50  def __init__(self, model, state_manager=None, optimizer=None, model_dir=None,
51               config=None, head_type=ts_head_lib.TimeSeriesRegressionHead):
52    """Initialize the Estimator.
53
54    Args:
55      model: The time series model to wrap (inheriting from TimeSeriesModel).
56      state_manager: The state manager to use, or (by default)
57          PassthroughStateManager if none is needed.
58      optimizer: The optimization algorithm to use when training, inheriting
59          from tf.train.Optimizer. Defaults to Adam with step size 0.02.
60      model_dir: See `Estimator`.
61      config: See `Estimator`.
62      head_type: The kind of head to use for the model (inheriting from
63          `TimeSeriesRegressionHead`).
64    """
65    input_statistics_generator = math_utils.InputStatisticsFromMiniBatch(
66        dtype=model.dtype, num_features=model.num_features)
67    if state_manager is None:
68      if isinstance(model, ar_model.ARModel):
69        state_manager = state_management.FilteringOnlyStateManager()
70      else:
71        state_manager = state_management.PassthroughStateManager()
72    if optimizer is None:
73      optimizer = train.AdamOptimizer(0.02)
74    self._model = model
75    ts_regression_head = head_type(
76        model=model, state_manager=state_manager, optimizer=optimizer,
77        input_statistics_generator=input_statistics_generator)
78    model_fn = ts_regression_head.create_estimator_spec
79    super(TimeSeriesRegressor, self).__init__(
80        model_fn=model_fn,
81        model_dir=model_dir,
82        config=config)
83
84  def _model_start_state_placeholders(
85      self, batch_size_tensor, static_batch_size=None):
86    """Creates placeholders with zeroed start state for the current model."""
87    gathered_state = {}
88    # Models may not know the shape of their state without creating some
89    # variables/ops. Avoid polluting the default graph by making a new one. We
90    # use only static metadata from the returned Tensors.
91    with ops.Graph().as_default():
92      self._model.initialize_graph()
93      # Evaluate the initial state as same-dtype "zero" values. These zero
94      # constants aren't used, but are necessary for feeding to
95      # placeholder_with_default for the "cold start" case where state is not
96      # fed to the model.
97      def _zeros_like_constant(tensor):
98        return tensor_util.constant_value(array_ops.zeros_like(tensor))
99      start_state = nest.map_structure(
100          _zeros_like_constant, self._model.get_start_state())
101    for prefixed_state_name, state in ts_head_lib.state_to_dictionary(
102        start_state).items():
103      state_shape_with_batch = tensor_shape.TensorShape(
104          (static_batch_size,)).concatenate(state.shape)
105      default_state_broadcast = array_ops.tile(
106          state[None, ...],
107          multiples=array_ops.concat(
108              [batch_size_tensor[None],
109               array_ops.ones(len(state.shape), dtype=dtypes.int32)],
110              axis=0))
111      gathered_state[prefixed_state_name] = array_ops.placeholder_with_default(
112          input=default_state_broadcast,
113          name=prefixed_state_name,
114          shape=state_shape_with_batch)
115    return gathered_state
116
117  def build_one_shot_parsing_serving_input_receiver_fn(
118      self, filtering_length, prediction_length, default_batch_size=None,
119      values_input_dtype=None, truncate_values=False):
120    """Build an input_receiver_fn for export_savedmodel accepting tf.Examples.
121
122    Only compatible with `OneShotPredictionHead` (see `head`).
123
124    Args:
125      filtering_length: The number of time steps used as input to the model, for
126        which values are provided. If more than `filtering_length` values are
127        provided (via `truncate_values`), only the first `filtering_length`
128        values are used.
129      prediction_length: The number of time steps requested as predictions from
130        the model. Times and all exogenous features must be provided for these
131        steps.
132      default_batch_size: If specified, must be a scalar integer. Sets the batch
133        size in the static shape information of all feature Tensors, which means
134        only this batch size will be accepted by the exported model. If None
135        (default), static shape information for batch sizes is omitted.
136      values_input_dtype: An optional dtype specification for values in the
137        tf.Example protos (either float32 or int64, since these are the numeric
138        types supported by tf.Example). After parsing, values are cast to the
139        model's dtype (float32 or float64).
140      truncate_values: If True, expects `filtering_length + prediction_length`
141        values to be provided, but only uses the first `filtering_length`. If
142        False (default), exactly `filtering_length` values must be provided.
143
144    Returns:
145      An input_receiver_fn which may be passed to the Estimator's
146      export_savedmodel.
147
148      Expects features contained in a vector of serialized tf.Examples with
149      shape [batch size] (dtype `tf.string`), each tf.Example containing
150      features with the following shapes:
151        times: [filtering_length + prediction_length] integer
152        values: [filtering_length, num features] floating point. If
153          `truncate_values` is True, expects `filtering_length +
154          prediction_length` values but only uses the first `filtering_length`.
155        all exogenous features: [filtering_length + prediction_length, ...]
156          (various dtypes)
157    """
158    if values_input_dtype is None:
159      values_input_dtype = dtypes.float32
160    if truncate_values:
161      values_proto_length = filtering_length + prediction_length
162    else:
163      values_proto_length = filtering_length
164
165    def _serving_input_receiver_fn():
166      """A receiver function to be passed to export_savedmodel."""
167      times_column = feature_column.numeric_column(
168          key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64)
169      values_column = feature_column.numeric_column(
170          key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype,
171          shape=(self._model.num_features,))
172      parsed_features_no_sequence = (
173          feature_column.make_parse_example_spec(
174              list(self._model.exogenous_feature_columns)
175              + [times_column, values_column]))
176      parsed_features = {}
177      for key, feature_spec in parsed_features_no_sequence.items():
178        if isinstance(feature_spec, parsing_ops.FixedLenFeature):
179          if key == feature_keys.TrainEvalFeatures.VALUES:
180            parsed_features[key] = feature_spec._replace(
181                shape=((values_proto_length,)
182                       + feature_spec.shape))
183          else:
184            parsed_features[key] = feature_spec._replace(
185                shape=((filtering_length + prediction_length,)
186                       + feature_spec.shape))
187        elif feature_spec.dtype == dtypes.string:
188          parsed_features[key] = parsing_ops.FixedLenFeature(
189              shape=(filtering_length + prediction_length,),
190              dtype=dtypes.string)
191        else:  # VarLenFeature
192          raise ValueError("VarLenFeatures not supported, got %s for key %s"
193                           % (feature_spec, key))
194      tfexamples = array_ops.placeholder(
195          shape=[default_batch_size], dtype=dtypes.string, name="input")
196      features = parsing_ops.parse_example(
197          serialized=tfexamples,
198          features=parsed_features)
199      features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze(
200          features[feature_keys.TrainEvalFeatures.TIMES], axis=-1)
201      features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast(
202          features[feature_keys.TrainEvalFeatures.VALUES],
203          dtype=self._model.dtype)[:, :filtering_length]
204      features.update(
205          self._model_start_state_placeholders(
206              batch_size_tensor=array_ops.shape(
207                  features[feature_keys.TrainEvalFeatures.TIMES])[0],
208              static_batch_size=default_batch_size))
209      return export_lib.ServingInputReceiver(
210          features, {"examples": tfexamples})
211    return _serving_input_receiver_fn
212
213  def build_raw_serving_input_receiver_fn(
214      self, default_batch_size=None, default_series_length=None):
215    """Build an input_receiver_fn for export_savedmodel which accepts arrays.
216
217    Automatically creates placeholders for exogenous `FeatureColumn`s passed to
218    the model.
219
220    Args:
221      default_batch_size: If specified, must be a scalar integer. Sets the batch
222        size in the static shape information of all feature Tensors, which means
223        only this batch size will be accepted by the exported model. If None
224        (default), static shape information for batch sizes is omitted.
225      default_series_length: If specified, must be a scalar integer. Sets the
226        series length in the static shape information of all feature Tensors,
227        which means only this series length will be accepted by the exported
228        model. If None (default), static shape information for series length is
229        omitted.
230    Returns:
231      An input_receiver_fn which may be passed to the Estimator's
232      export_savedmodel.
233    """
234    def _serving_input_receiver_fn():
235      """A receiver function to be passed to export_savedmodel."""
236      placeholders = {}
237      time_placeholder = array_ops.placeholder(
238          name=feature_keys.TrainEvalFeatures.TIMES,
239          dtype=dtypes.int64,
240          shape=[default_batch_size, default_series_length])
241      placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder
242      # Values are only necessary when filtering. For prediction the default
243      # value will be ignored.
244      placeholders[feature_keys.TrainEvalFeatures.VALUES] = (
245          array_ops.placeholder_with_default(
246              name=feature_keys.TrainEvalFeatures.VALUES,
247              input=array_ops.zeros(
248                  shape=[
249                      default_batch_size
250                      if default_batch_size else 0, default_series_length
251                      if default_series_length else 0, self._model.num_features
252                  ],
253                  dtype=self._model.dtype),
254              shape=(default_batch_size, default_series_length,
255                     self._model.num_features)))
256      if self._model.exogenous_feature_columns:
257        with ops.Graph().as_default():
258          # Default placeholders have only an unknown batch dimension. Make them
259          # in a separate graph, then splice in the series length to the shapes
260          # and re-create them in the outer graph.
261          parsed_features = (
262              feature_column.make_parse_example_spec(
263                  self._model.exogenous_feature_columns))
264          placeholder_features = parsing_ops.parse_example(
265              serialized=array_ops.placeholder(
266                  shape=[None], dtype=dtypes.string),
267              features=parsed_features)
268          exogenous_feature_shapes = {
269              key: (value.get_shape(), value.dtype) for key, value
270              in placeholder_features.items()}
271        for feature_key, (batch_only_feature_shape, value_dtype) in (
272            exogenous_feature_shapes.items()):
273          batch_only_feature_shape = (
274              batch_only_feature_shape.with_rank_at_least(1).as_list())
275          feature_shape = ([default_batch_size, default_series_length]
276                           + batch_only_feature_shape[1:])
277          placeholders[feature_key] = array_ops.placeholder(
278              dtype=value_dtype, name=feature_key, shape=feature_shape)
279      batch_size_tensor = array_ops.shape(time_placeholder)[0]
280      placeholders.update(
281          self._model_start_state_placeholders(
282              batch_size_tensor, static_batch_size=default_batch_size))
283      return export_lib.ServingInputReceiver(placeholders, placeholders)
284
285    return _serving_input_receiver_fn
286
287
288class ARRegressor(TimeSeriesRegressor):
289  """An Estimator for an (optionally non-linear) autoregressive model.
290
291  ARRegressor is a window-based model, inputting fixed windows of length
292  `input_window_size` and outputting fixed windows of length
293  `output_window_size`. These two parameters must add up to the window_size
294  passed to the `Chunker` used to create an `input_fn` for training or
295  evaluation. `RandomWindowInputFn` is suggested for both training and
296  evaluation, although it may be seeded for deterministic evaluation.
297  """
298
299  def __init__(
300      self, periodicities, input_window_size, output_window_size,
301      num_features, exogenous_feature_columns=None, num_time_buckets=10,
302      loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, hidden_layer_sizes=None,
303      anomaly_prior_probability=None, anomaly_distribution=None,
304      optimizer=None, model_dir=None, config=None):
305    """Initialize the Estimator.
306
307    Args:
308      periodicities: periodicities of the input data, in the same units as the
309        time feature. Note this can be a single value or a list of values for
310        multiple periodicities.
311      input_window_size: Number of past time steps of data to look at when doing
312        the regression.
313      output_window_size: Number of future time steps to predict. Note that
314        setting it to > 1 empirically seems to give a better fit.
315      num_features: The dimensionality of the time series (one for univariate,
316        more than one for multivariate).
317      exogenous_feature_columns: A list of `tf.feature_column`s (for example
318        `tf.feature_column.embedding_column`) corresponding to exogenous
319        features which provide extra information to the model but are not part
320        of the series to be predicted. Passed to
321        `tf.feature_column.input_layer`.
322      num_time_buckets: Number of buckets into which to divide (time %
323        periodicity) for generating time based features.
324      loss: Loss function to use for training. Currently supported values are
325        SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
326        NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
327        SQUARED_LOSS, the evaluation loss is reported based on un-scaled
328        observations and predictions, while the training loss is computed on
329        normalized data.
330      hidden_layer_sizes: list of sizes of hidden layers.
331      anomaly_prior_probability: If specified, constructs a mixture model under
332        which anomalies (modeled with `anomaly_distribution`) have this prior
333        probability. See `AnomalyMixtureARModel`.
334      anomaly_distribution: May not be specified unless
335        anomaly_prior_probability is specified and is not None. Controls the
336        distribution of anomalies under the mixture model. Currently either
337        `ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY` or
338        `ar_model.AnomalyMixtureARModel.CAUCHY_ANOMALY`. See
339        `AnomalyMixtureARModel`. Defaults to `GAUSSIAN_ANOMALY`.
340      optimizer: The optimization algorithm to use when training, inheriting
341          from tf.train.Optimizer. Defaults to Adagrad with step size 0.1.
342      model_dir: See `Estimator`.
343      config: See `Estimator`.
344    Raises:
345      ValueError: For invalid combinations of arguments.
346    """
347    if optimizer is None:
348      optimizer = train.AdagradOptimizer(0.1)
349    if anomaly_prior_probability is None and anomaly_distribution is not None:
350      raise ValueError("anomaly_prior_probability is required if "
351                       "anomaly_distribution is specified.")
352    if anomaly_prior_probability is None:
353      if anomaly_distribution is None:
354        anomaly_distribution = ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY
355      model = ar_model.ARModel(
356          periodicities=periodicities, num_features=num_features,
357          prediction_model_factory=functools.partial(
358              ar_model.FlatPredictionModel,
359              hidden_layer_sizes=hidden_layer_sizes),
360          exogenous_feature_columns=exogenous_feature_columns,
361          num_time_buckets=num_time_buckets,
362          input_window_size=input_window_size,
363          output_window_size=output_window_size, loss=loss)
364    else:
365      if loss != ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS:
366        raise ValueError(
367            "AnomalyMixtureARModel only supports "
368            "ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS for its loss argument.")
369      model = ar_model.AnomalyMixtureARModel(
370          periodicities=periodicities,
371          input_window_size=input_window_size,
372          output_window_size=output_window_size,
373          num_features=num_features,
374          prediction_model_factory=functools.partial(
375              ar_model.FlatPredictionModel,
376              hidden_layer_sizes=hidden_layer_sizes),
377          exogenous_feature_columns=exogenous_feature_columns,
378          num_time_buckets=num_time_buckets,
379          anomaly_prior_probability=anomaly_prior_probability,
380          anomaly_distribution=anomaly_distribution)
381    state_manager = state_management.FilteringOnlyStateManager()
382    super(ARRegressor, self).__init__(
383        model=model,
384        state_manager=state_manager,
385        optimizer=optimizer,
386        model_dir=model_dir,
387        config=config)
388
389
390# TODO(b/113684821): Add detailed documentation on what the input_fn should do.
391# Add an example of making and returning a Dataset object. Determine if
392# endogenous features can be passed in as FeatureColumns. Move ARModel's loss
393# functions into a more general location.
394class LSTMAutoRegressor(TimeSeriesRegressor):
395  """An Estimator for an LSTM autoregressive model.
396
397  LSTMAutoRegressor is a window-based model, inputting fixed windows of length
398  `input_window_size` and outputting fixed windows of length
399  `output_window_size`. These two parameters must add up to the window_size
400  of data returned by the `input_fn`.
401
402  Each periodicity in the `periodicities` arg is divided by the `num_timesteps`
403  into timesteps that are represented as time features added to the model.
404
405  A good heuristic for picking an appropriate periodicity for a given data set
406  would be the length of cycles in the data. For example, energy usage in a
407  home is typically cyclic each day. If the time feature in a home energy
408  usage dataset is in the unit of hours, then 24 would be an appropriate
409  periodicity. Similarly, a good heuristic for `num_timesteps` is how often the
410  data is expected to change within the cycle. For the aforementioned home
411  energy usage dataset and periodicity of 24, then 48 would be a reasonable
412  value if usage is expected to change every half hour.
413
414  Each feature's value for a given example with time t is the difference
415  between t and the start of the timestep it falls under. If it doesn't fall
416  under a feature's associated timestep, then that feature's value is zero.
417
418  For example: if `periodicities` = (9, 12) and `num_timesteps` = 3, then 6
419  features would be added to the model, 3 for periodicity 9 and 3 for
420  periodicity 12.
421
422  For an example data point where t = 17:
423  - It's in the 3rd timestep for periodicity 9 (2nd period is 9-18 and 3rd
424    timestep is 15-18)
425  - It's in the 2nd timestep for periodicity 12 (2nd period is 12-24 and
426    2nd timestep is between 16-20).
427
428  Therefore the 6 added features for this row with t = 17 would be:
429
430  # Feature name (periodicity#_timestep#), feature value
431  P9_T1, 0 # not in first timestep
432  P9_T2, 0 # not in second timestep
433  P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd timestep
434  P12_T1, 0 # not in first timestep
435  P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd timestep
436  P12_T3, 0 # not in third timestep
437
438  Example Code:
439
440  ```python
441  extra_feature_columns = (
442      feature_column.numeric_column("exogenous_variable"),
443  )
444
445  estimator = LSTMAutoRegressor(
446      periodicities=10,
447      input_window_size=10,
448      output_window_size=5,
449      model_dir="/path/to/model/dir",
450      num_features=1,
451      extra_feature_columns=extra_feature_columns,
452      num_timesteps=50,
453      num_units=10,
454      optimizer=tf.train.ProximalAdagradOptimizer(...))
455
456  # Input builders
457  def input_fn_train():
458    return {
459      "times": tf.range(15)[None, :],
460      "values": tf.random_normal(shape=[1, 15, 1])
461    }
462  estimator.train(input_fn=input_fn_train, steps=100)
463
464  def input_fn_eval():
465    pass
466  metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10)
467
468  def input_fn_predict():
469    pass
470  predictions = estimator.predict(input_fn=input_fn_predict)
471  ```
472  """
473
474  def __init__(self,
475               periodicities,
476               input_window_size,
477               output_window_size,
478               model_dir=None,
479               num_features=1,
480               extra_feature_columns=None,
481               num_timesteps=10,
482               loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS,
483               num_units=128,
484               optimizer="Adam",
485               config=None):
486    """Initialize the Estimator.
487
488    Args:
489      periodicities: periodicities of the input data, in the same units as the
490        time feature (for example 24 if feeding hourly data with a daily
491        periodicity, or 60 * 24 if feeding minute-level data with daily
492        periodicity). Note this can be a single value or a list of values for
493        multiple periodicities.
494      input_window_size: Number of past time steps of data to look at when doing
495        the regression.
496      output_window_size: Number of future time steps to predict. Note that
497        setting this value to > 1 empirically seems to give a better fit.
498      model_dir: Directory to save model parameters, graph and etc. This can
499        also be used to load checkpoints from the directory into a estimator
500        to continue training a previously saved model.
501      num_features: The dimensionality of the time series (default value is
502        one for univariate, more than one for multivariate).
503      extra_feature_columns: A list of `tf.feature_column`s (for example
504        `tf.feature_column.embedding_column`) corresponding to features which
505        provide extra information to the model but are not part of the series to
506        be predicted.
507      num_timesteps: Number of buckets into which to divide (time %
508        periodicity). This value multiplied by the number of periodicities is
509        the number of time features added to the model.
510      loss: Loss function to use for training. Currently supported values are
511        SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for
512        NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For
513        SQUARED_LOSS, the evaluation loss is reported based on un-scaled
514        observations and predictions, while the training loss is computed on
515        normalized data.
516      num_units: The size of the hidden state in the encoder and decoder LSTM
517        cells.
518      optimizer: string, `tf.train.Optimizer` object, or callable that defines
519        the optimizer algorithm to use for training. Defaults to the Adam
520        optimizer with a learning rate of 0.01.
521      config: Optional `estimator.RunConfig` object to configure the runtime
522        settings.
523    """
524    optimizer = optimizers.get_optimizer_instance(
525        optimizer, learning_rate=0.01)
526    model = ar_model.ARModel(
527        periodicities=periodicities,
528        input_window_size=input_window_size,
529        output_window_size=output_window_size,
530        num_features=num_features,
531        exogenous_feature_columns=extra_feature_columns,
532        num_time_buckets=num_timesteps,
533        loss=loss,
534        prediction_model_factory=functools.partial(
535            ar_model.LSTMPredictionModel, num_units=num_units))
536    state_manager = state_management.FilteringOnlyStateManager()
537    super(LSTMAutoRegressor, self).__init__(
538        model=model,
539        state_manager=state_manager,
540        optimizer=optimizer,
541        model_dir=model_dir,
542        config=config,
543        head_type=ts_head_lib.OneShotPredictionHead)
544
545
546class StateSpaceRegressor(TimeSeriesRegressor):
547  """An Estimator for general state space models."""
548
549  def __init__(self, model, state_manager=None, optimizer=None, model_dir=None,
550               config=None, head_type=ts_head_lib.TimeSeriesRegressionHead):
551    """See TimeSeriesRegressor. Uses the ChainingStateManager by default."""
552    if not isinstance(model, state_space_model.StateSpaceModel):
553      raise ValueError(
554          "StateSpaceRegressor only supports state space models (children of "
555          "StateSpaceModel) in its `model` argument, got {}.".format(model))
556    if state_manager is None:
557      state_manager = state_management.ChainingStateManager()
558    super(StateSpaceRegressor, self).__init__(
559        model=model,
560        state_manager=state_manager,
561        optimizer=optimizer,
562        model_dir=model_dir,
563        config=config,
564        head_type=head_type)
565
566
567class StructuralEnsembleRegressor(StateSpaceRegressor):
568  """An Estimator for structural time series models.
569
570  "Structural" refers to the fact that this model explicitly accounts for
571  structure in the data, such as periodicity and trends.
572
573  `StructuralEnsembleRegressor` is a state space model. It contains components
574  for modeling level, local linear trends, periodicity, and mean-reverting
575  transients via a moving average component. Multivariate series are fit with
576  full covariance matrices for observation and latent state transition noise,
577  each feature of the multivariate series having its own latent components.
578
579  Note that unlike `ARRegressor`, `StructuralEnsembleRegressor` is sequential,
580  and so accepts variable window sizes with the same model.
581
582  For training, `RandomWindowInputFn` is recommended as an `input_fn`. Model
583  state is managed through `ChainingStateManager`: since state space models are
584  inherently sequential, we save state from previous iterations to get
585  approximate/eventual consistency while achieving good performance through
586  batched computation.
587
588  For evaluation, either pass a significant chunk of the series in a single
589  window (e.g. set `window_size` to the whole series with
590  `WholeDatasetInputFn`), or use enough random evaluation iterations to cover
591  several passes through the whole dataset. Either method will ensure that stale
592  saved state has been flushed.
593  """
594
595  def __init__(self,
596               periodicities,
597               num_features,
598               cycle_num_latent_values=11,
599               moving_average_order=4,
600               autoregressive_order=0,
601               exogenous_feature_columns=None,
602               exogenous_update_condition=None,
603               dtype=dtypes.float64,
604               anomaly_prior_probability=None,
605               optimizer=None,
606               model_dir=None,
607               config=None,
608               head_type=ts_head_lib.TimeSeriesRegressionHead):
609    """Initialize the Estimator.
610
611    Args:
612      periodicities: The expected periodicity of the data (for example 24 if
613          feeding hourly data with a daily periodicity, or 60 * 24 if feeding
614          minute-level data with daily periodicity). Either a scalar or a
615          list. This parameter can be any real value, and does not control the
616          size of the model. However, increasing this without increasing
617          `num_values_per_cycle` will lead to smoother periodic behavior, as the
618          same number of distinct values will be cycled through over a longer
619          period of time.
620      num_features: The dimensionality of the time series (one for univariate,
621          more than one for multivariate).
622      cycle_num_latent_values: Along with `moving_average_order` and
623          `num_features`, controls the latent state size of the model. Square
624          matrices of size `num_features * (moving_average_order +
625          cycle_num_latent_values + 3)` are created and multiplied, so larger
626          values may be slow. The trade-off is with resolution: cycling between
627          a smaller number of latent values means that only smoother functions
628          can be modeled.
629      moving_average_order: Controls model size (along with
630          `cycle_num_latent_values` and `autoregressive_order`) and the number
631          of steps before transient deviations revert to the mean defined by the
632          period and level/trend components.
633      autoregressive_order: Each contribution from this component is a linear
634          combination of this many previous contributions. Also helps to
635          determine the model size. Learning autoregressive coefficients
636          typically requires more steps and a smaller step size than other
637          components.
638      exogenous_feature_columns: A list of `tf.feature_column`s (for example
639          `tf.feature_column.embedding_column`) corresponding to exogenous
640          features which provide extra information to the model but are not part
641          of the series to be predicted. Passed to
642          `tf.feature_column.input_layer`.
643      exogenous_update_condition: A function taking two Tensor arguments,
644          `times` (shape [batch size]) and `features` (a dictionary mapping
645          exogenous feature keys to Tensors with shapes [batch size, ...]), and
646          returning a boolean Tensor with shape [batch size] indicating whether
647          state should be updated using exogenous features for each part of the
648          batch. Where it is False, no exogenous update is performed. If None
649          (default), exogenous updates are always performed. Useful for avoiding
650          "leaky" frequent exogenous updates when sparse updates are
651          desired. Called only during graph construction. See the "known
652          anomaly" example for example usage.
653      dtype: The floating point data type to compute with. float32 may be
654        faster, but can be problematic for larger models and longer time series.
655      anomaly_prior_probability: If not None, the model attempts to
656          automatically detect and ignore anomalies during training. This
657          parameter then controls the prior probability of an anomaly. Values
658          closer to 0 mean that points will be discarded less frequently. The
659          default value (None) means that anomalies are not discarded, which may
660          be slightly faster.
661      optimizer: The optimization algorithm to use when training, inheriting
662          from tf.train.Optimizer. Defaults to Adam with step size 0.02.
663      model_dir: See `Estimator`.
664      config: See `Estimator`.
665      head_type: The kind of head to use for the model (inheriting from
666          `TimeSeriesRegressionHead`).
667    """
668    if anomaly_prior_probability is not None:
669      filtering_postprocessor = StateInterpolatingAnomalyDetector(
670          anomaly_prior_probability=anomaly_prior_probability)
671    else:
672      filtering_postprocessor = None
673    state_space_model_configuration = (
674        state_space_model.StateSpaceModelConfiguration(
675            num_features=num_features,
676            dtype=dtype,
677            filtering_postprocessor=filtering_postprocessor,
678            exogenous_feature_columns=exogenous_feature_columns,
679            exogenous_update_condition=exogenous_update_condition))
680    model = structural_ensemble.MultiResolutionStructuralEnsemble(
681        cycle_num_latent_values=cycle_num_latent_values,
682        moving_average_order=moving_average_order,
683        autoregressive_order=autoregressive_order,
684        periodicities=periodicities,
685        configuration=state_space_model_configuration)
686    super(StructuralEnsembleRegressor, self).__init__(
687        model=model,
688        optimizer=optimizer,
689        model_dir=model_dir,
690        config=config,
691        head_type=head_type)
692