• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""State space components for modeling seasonality."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy
22
23from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model
24
25from tensorflow.python.framework import dtypes
26from tensorflow.python.framework import ops
27from tensorflow.python.ops import array_ops
28from tensorflow.python.ops import control_flow_ops
29from tensorflow.python.ops import gen_math_ops
30from tensorflow.python.ops import math_ops
31
32
33class CycleStateSpaceModel(state_space_model.StateSpaceModel):
34  """A state space model component which cycles between values.
35
36  Stores N values using N - 1 latent values, the Nth being the negative sum of
37  those explicitly stored. At any given timestep one of these values is
38  observed. Noise is assumed to affect only one of the transitions.
39  """
40
41  def __init__(
42      self,
43      periodicity,
44      configuration=state_space_model.StateSpaceModelConfiguration()):
45    self._periodicity = periodicity
46    super(CycleStateSpaceModel, self).__init__(configuration=configuration)
47
48  def get_state_transition(self):
49    return self.transition_to_powers(array_ops.ones([], dtype=dtypes.int32))
50
51  def get_noise_transform(self):
52    # transition_power_noise_accumulator makes assumptions about this
53    # transformation. If the noise transform is modified or overridden,
54    # transition_power_noise_accumulator must be modified as well (or discarded,
55    # as it is simply an optimization).
56    return array_ops.pad(
57        array_ops.ones([1], dtype=self.dtype),
58        paddings=[(0, self._periodicity - 2)])[..., None]
59
60  def transition_to_powers(self, powers):
61    """Computes powers of the cycle transition matrix efficiently.
62
63    Args:
64      powers: An integer Tensor, shape [...], with powers to raise the
65        transition matrix to.
66    Returns:
67      A floating point Tensor with shape [..., self._periodicity - 1,
68      self._periodicity - 1] containing:
69        (transition^power)_{i, j} = {
70           1  if (i - j) % self._periodicity == power % self._periodicity
71          -1  if (i + 1) % self._periodicity == power % self._periodicity
72           0  otherwise}
73    """
74    powers %= self._periodicity
75    range_shape_padded = array_ops.reshape(
76        math_ops.range(self._periodicity - 1, dtype=powers.dtype),
77        array_ops.concat(
78            [
79                array_ops.ones([array_ops.rank(powers)], dtype=dtypes.int32),
80                [self._periodicity - 1]
81            ],
82            axis=0))
83    is_row_negative = math_ops.equal(range_shape_padded + 1, powers[..., None])
84    row_indicator_shape = array_ops.shape(is_row_negative)
85    negative_row_indicator = array_ops.where(is_row_negative, -array_ops.ones(
86        shape=row_indicator_shape, dtype=self.dtype),
87                                             array_ops.zeros(
88                                                 row_indicator_shape,
89                                                 dtype=self.dtype))
90    coord_diff = (range_shape_padded[..., None]
91                  - range_shape_padded[..., None, :])
92    is_one = math_ops.equal(coord_diff % self._periodicity,
93                            powers[..., None, None])
94    positive_ones = array_ops.where(is_one,
95                                    array_ops.ones(
96                                        array_ops.shape(is_one),
97                                        dtype=self.dtype),
98                                    array_ops.zeros(
99                                        array_ops.shape(is_one),
100                                        dtype=self.dtype))
101    return math_ops.cast(positive_ones + negative_row_indicator[..., None],
102                         self.dtype)
103
104  def transition_power_noise_accumulator(
105      self, num_steps, noise_addition_coefficient=1):
106    r"""Sum the transitioned covariance matrix over a number of steps.
107
108    Assumes that state_transition_noise_covariance is a matrix with a single
109    non-zero value in the upper left.
110
111    Args:
112      num_steps: A [...] shape integer Tensor with numbers of steps to compute
113        power sums for.
114      noise_addition_coefficient: A multiplier for the state transition noise
115        covariance (used in ResolutionCycleModel to compute multiples of full
116        period sums).
117    Returns:
118      The computed power sum, with shape [..., state dimension, state
119      dimension] containing:
120
121        [\sum_{p=0}^{num_steps - 1} (
122           state_transition^p
123           * state_transition_noise_covariance
124           * (state_transition^p)^T)]_{i, j} = {
125          -contribution_{j + 1}                   if j == i - 1
126          contribution_{j + 1} + contribution{j}  if j == i
127          -contribution_{j}                       if j == i + 1
128          0                                        otherwise
129        }
130
131        contribution_k = noise_scalar
132          * ((num_steps + self._periodicity - 1 - (k % self._periodicity))
133             // self._periodicity)
134
135      Where contribution_k is the sum of noise_scalar additions to component k
136      of the periodicity.
137    """
138    noise_addition_scalar = array_ops.squeeze(
139        self.state_transition_noise_covariance, axis=[-1, -2])
140    period_range_reshaped = array_ops.reshape(
141        math_ops.range(self._periodicity, dtype=num_steps.dtype),
142        array_ops.concat(
143            [
144                array_ops.ones([array_ops.rank(num_steps)], dtype=dtypes.int32),
145                [self._periodicity]
146            ],
147            axis=0))
148    reversed_remaining_steps = ((period_range_reshaped
149                                 - (num_steps[..., None] - 1))
150                                % self._periodicity)
151    period_additions_reversed = (ops.convert_to_tensor(
152        noise_addition_coefficient,
153        self.dtype)[..., None] * noise_addition_scalar * math_ops.cast(
154            (num_steps[..., None] + reversed_remaining_steps) //
155            self._periodicity,
156            dtype=self.dtype))
157    period_additions_diag = array_ops.matrix_diag(period_additions_reversed)
158    upper_band = array_ops.concat(
159        [
160            array_ops.zeros_like(period_additions_diag[..., :-1, 0:1]),
161            -period_additions_diag[..., :-1, 0:-2]
162        ],
163        axis=-1)
164    lower_band = array_ops.concat(
165        [
166            array_ops.zeros_like(period_additions_diag[..., 0:1, :-1]),
167            -period_additions_diag[..., 0:-2, :-1]
168        ],
169        axis=-2)
170    period_additions_rotated = array_ops.concat(
171        [
172            period_additions_reversed[..., -1:],
173            period_additions_reversed[..., :-2]
174        ],
175        axis=-1)
176    diagonal = array_ops.matrix_diag(period_additions_reversed[..., :-1] +
177                                     period_additions_rotated)
178    return diagonal + lower_band + upper_band
179
180  def get_observation_model(self, times):
181    """Observe only the first of the rotating latent values.
182
183    See StateSpaceModel.get_observation_model.
184    Args:
185      times: Unused. See the parent class for details.
186    Returns:
187      A static, univariate observation model for later broadcasting.
188    """
189    del times  # Does not rely on times. Uses broadcasting from the parent.
190    return array_ops.concat(
191        values=[
192            array_ops.ones([1], dtype=self.dtype), array_ops.zeros(
193                [self._periodicity - 2], dtype=self.dtype)
194        ],
195        axis=0)
196
197
198class ResolutionCycleModel(CycleStateSpaceModel):
199  """A version of CycleStateSpaceModel with variable resolution.
200
201  Cycles between "num_latent_values" latent values over a period of
202  "periodicity", smoothly interpolating. Simply raises the transition matrix
203  from CycleStateSpaceModel to the power (num_latent_values / periodicity).
204
205  Specifically, ResolutionCycleModel uses the following eigendecomposition of
206  the CycleStateSpaceModel matrix (there are several parameterizations, others
207  leading to roots of the matrix with complex values):
208
209    eigenvectors_{i, j}
210        = root_of_unity(floor(j / 2) + 1, i * (-1)^(j + 1))
211          - root_of_unity(floor(j / 2) + 1, (i + 1) * (-1)^(j + 1))
212    eigenvalues_j = root_of_unity(floor(j / 2) + 1, (-1)^j)
213    root_of_unity(root_number, to_power)
214        = exp(to_power * 2 * pi * sqrt(-1) * root_number
215              / num_latent_values)
216
217  The transition matrix for ResolutionCycleModel is then:
218
219    eigenvectors
220    * diag(eigenvalues^(num_latent_values / periodicity))
221    * eigenvectors^-1
222
223  Since the eigenvalues are paired with their conjugates (conj(e^(sqrt(-1)*x)) =
224  e^(-sqrt(-1)*x)), the resulting matrix has real components (this is why only
225  odd numbers of latent values are supported, since the size of the matrix is
226  one less than the number of latent values and there must be an even number of
227  eigenvalues to pair them off).
228
229  See ./g3doc/periodic_multires_derivation.md for details.
230  """
231
232  def __init__(
233      self,
234      num_latent_values,
235      periodicity,
236      near_integer_threshold=1e-8,
237      configuration=state_space_model.StateSpaceModelConfiguration()):
238    """Initialize the ResolutionCycleModel.
239
240    Args:
241      num_latent_values: Controls the representational power and memory usage of
242        the model. The transition matrix has shape [num_latent_values - 1,
243        num_latent_values - 1]. Must be an odd integer (see class docstring for
244        why).
245      periodicity: The number of steps for cyclic behavior. May be a Tensor, and
246        need not be an integer (although integer values greater than
247        num_latent_values have more efficient special cases).
248      near_integer_threshold: When avoiding singularities, controls how close a
249        number should be to that singularity before the special case takes over.
250      configuration: A StateSpaceModelConfiguration object.
251
252    Raises:
253      ValueError: If num_latent_values is not odd.
254    """
255    if num_latent_values % 2 != 1:
256      raise ValueError("Only odd numbers of latent values are supported.")
257    self._num_latent_values = num_latent_values
258    self._true_periodicity = periodicity
259    self._near_integer_threshold = near_integer_threshold
260    super(ResolutionCycleModel, self).__init__(
261        periodicity=num_latent_values,
262        configuration=configuration)
263
264  def _close_to_integer(self, value):
265    value = math_ops.cast(value, self.dtype)
266    return math_ops.less(
267        math_ops.abs(value - gen_math_ops.round(value)),
268        self._near_integer_threshold)
269
270  def transition_to_powers(self, powers):
271    """Computes TransitionMatrix^power efficiently.
272
273    For an n x n transition matrix we have:
274
275      (TransitionMatrix**power)_{i, j) = (-1) ** i * sin(pi * power) / (n + 1)
276          * ((-1) ** j / sin(pi / (n + 1) * (power - i + j))
277             + 1 / sin(pi / (n + 1) * (power - i - 1)))
278
279    The sin(pi * power) term is zero whenever "power" is an integer. However,
280    the 1 / sin(x) terms (cosecants) occasionally (when their arguments are
281    multiples of pi) cancel out this value. The limit as the argument approaches
282    an integer value gives the "correct" result, but computing these separately
283    gives 0 * inf = NaN. Instead, there is a special case for near-integer
284    values.
285
286    Args:
287      powers: A floating point Tensor of powers to raise the transition matrix
288        to.
289    Returns:
290      A [..., self._num_latent_values - 1, self._num_latent_values - 1] floating
291        point Tensor with the transition matrix raised to each power in
292        `powers`.
293
294    """
295    num_latent_values_float = math_ops.cast(self._num_latent_values, self.dtype)
296    latent_values_per_period = (num_latent_values_float / math_ops.cast(
297        self._true_periodicity, dtype=self.dtype))
298    original_matrix_powers = (math_ops.cast(powers, self.dtype) *
299                              latent_values_per_period)
300    global_coeff = (math_ops.sin(original_matrix_powers * numpy.pi) /
301                    num_latent_values_float)[..., None, None]
302    matrix_dimension_range = array_ops.reshape(
303        math_ops.range(self._num_latent_values - 1),
304        array_ops.concat(
305            [
306                array_ops.ones(
307                    [array_ops.rank(original_matrix_powers)],
308                    dtype=dtypes.int32), [self._num_latent_values - 1]
309            ],
310            axis=0))
311    matrix_dimension_range_float = math_ops.cast(matrix_dimension_range,
312                                                 self.dtype)
313    alternating = math_ops.cast(1 - 2 * (matrix_dimension_range % 2),
314                                self.dtype)
315    row_addend = 1. / math_ops.sin(numpy.pi / num_latent_values_float * (
316        original_matrix_powers[..., None] - matrix_dimension_range_float - 1))
317    column_minus_row = (matrix_dimension_range_float[..., None, :]
318                        - matrix_dimension_range_float[..., None])
319    full_matrix_addend = (alternating[..., None, :] / math_ops.sin(
320        numpy.pi / num_latent_values_float *
321        (original_matrix_powers[..., None, None] + column_minus_row)))
322    continuous_construction = global_coeff * alternating[..., None] * (
323        row_addend[..., None] + full_matrix_addend)
324    # For integer powers, the above formula is only correct in the limit,
325    # yielding NaNs as written. We defer to the super-class in such cases, which
326    # computes integer powers exactly.
327    return array_ops.where(
328        self._close_to_integer(original_matrix_powers),
329        super(ResolutionCycleModel, self).transition_to_powers(
330            math_ops.cast(
331                gen_math_ops.round(original_matrix_powers), dtypes.int64)),
332        continuous_construction)
333
334  def transition_power_noise_accumulator(self, num_steps):
335    """Sum the transitioned covariance matrix over a number of steps.
336
337    Args:
338      num_steps: An integer Tensor of any shape [...] indicating the number of
339        steps to compute for each part of the batch.
340
341    Returns:
342      A [..., self._num_latent_values - 1, self._num_latent_values - 1] floating
343      point Tensor corresponding to each requested number of steps, containing:
344
345          sum_{i=1}^{steps} transition^i * noise_covariance
346              * (transition^i)^T
347    """
348
349    def _whole_periods_folded():
350      """A more efficient special casing for integer periods.
351
352      We knock off full periods, leaving at most self._true_periodicity steps to
353      compute.
354
355      Returns:
356        A tuple of (remaining_whole_steps, current_accumulation):
357          remaining_whole_steps: An integer Tensor with the same shape as the
358            `num_steps` argument to `transition_power_noise_accumulator`,
359            indicating the reduced number of steps which must be computed
360            sequentially and added to `current_accumulation`.
361          current_accumulation: A [..., self._num_latent_values - 1,
362            self._num_latent_values - 1] floating point Tensor corresponding to
363            the accumulations for steps which were computed in this function.
364      """
365      original_transition_noise_addition_coefficient = (math_ops.cast(
366          self._true_periodicity, self.dtype) / math_ops.cast(
367              self._num_latent_values, self.dtype))
368      full_period_accumulation = super(
369          ResolutionCycleModel, self).transition_power_noise_accumulator(
370              noise_addition_coefficient=
371              original_transition_noise_addition_coefficient,
372              num_steps=ops.convert_to_tensor(
373                  self._num_latent_values, dtype=num_steps.dtype))
374      periodicity_integer = math_ops.cast(self._true_periodicity,
375                                          num_steps.dtype)
376      full_periods = math_ops.cast(num_steps // periodicity_integer, self.dtype)
377      current_accumulation = full_periods[..., None, None] * array_ops.reshape(
378          full_period_accumulation,
379          array_ops.concat(
380              [
381                  array_ops.ones(
382                      [array_ops.rank(full_periods)], dtype=dtypes.int32),
383                  array_ops.shape(full_period_accumulation)
384              ],
385              axis=0))
386      remaining_whole_steps = num_steps % periodicity_integer
387      return remaining_whole_steps, current_accumulation
388    def _no_whole_period_computation():
389      """A less efficient special casing for real valued periods.
390
391      This special casing is still preferable to computing using sequential
392      matrix multiplies (parallelizable, more numerically stable), but is linear
393      in the number of steps.
394
395      Returns:
396        Same shapes and types as `_whole_periods_folded`, but no folding is done
397        in this function.
398      """
399      current_accumulation = array_ops.zeros(
400          array_ops.concat(
401              [
402                  array_ops.shape(num_steps),
403                  [self._num_latent_values - 1, self._num_latent_values - 1]
404              ],
405              axis=0),
406          dtype=self.dtype)
407      remaining_whole_steps = num_steps
408      return remaining_whole_steps, current_accumulation
409    # Decide whether it's feasible to compute whole periods in closed form,
410    # taking advantage of the fact that a sum over self._true_periodicity steps
411    # in our transition matrix is proportional to a sum over
412    # self._num_latent_values steps in the unmodified matrix (because each
413    # latent value gets the same treatment). This is possible for integer
414    # self._true_periodicity, since we stay aligned to integer steps. For real
415    # valued self._true_periodicity, or when the cyclic behavior is a higher
416    # resolution than 1 per step, taking whole periods leads to misalignment
417    # with integer steps, which would be difficult to recover from.
418    remaining_whole_steps, current_accumulation = control_flow_ops.cond(
419        self._whole_period_folding(), _whole_periods_folded,
420        _no_whole_period_computation)
421    steps_to_compute = math_ops.reduce_max(remaining_whole_steps)
422    remaining_step_noise_additions = self._power_sum_array(steps_to_compute)
423    noise_addition_scalar = array_ops.squeeze(
424        self.state_transition_noise_covariance, axis=[-1, -2])
425    return current_accumulation + noise_addition_scalar * array_ops.gather(
426        remaining_step_noise_additions, indices=remaining_whole_steps)
427
428  def _whole_period_folding(self):
429    """Decides whether computing a whole period maintains alignment."""
430    return math_ops.logical_and(
431        self._close_to_integer(self._true_periodicity),
432        math_ops.greater_equal(self._true_periodicity, self._num_latent_values))
433
434  def _power_sum_array(self, max_remaining_steps):
435    r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..max_remaining_steps.
436
437    A is the transition matrix and B is the noise covariance.
438
439    This is more efficient in practice than math_utils.power_sums_tensor, since
440    each A^i B (A^i)^T term has a closed-form expression not depending on i - 1.
441    Thus vectorization can replace explicit looping.
442
443    Uses a cumulative sum on the following expression:
444
445      (transition^p * transition_covariance * (transition^p)^T)_{i, j}
446        = (-1)^(i + j) * sin^2(pi * p) / num_latent_values^2
447          * (1/sin(pi / num_latent_values * (p - i))
448             + 1/sin(pi / num_latent_values * (p - i - 1)))
449          * (1/sin(pi / num_latent_values * (p - j))
450             + 1/sin(pi / num_latent_values * (p - j - 1)))
451
452    The expression being derived from the eigenvectors and eigenvalues given in
453    the class docstring (and as with CycleStateSpaceModel taking advantage of
454    the sparsity of the transition covariance).
455
456    Args:
457      max_remaining_steps: A scalar integer Tensor indicating the number of
458        non-trivial values to compute.
459    Returns:
460      A [max_remaining_steps + 1, self._num_latent_values - 1,
461      self._num_latent_values - 1] floating point Tensor S with cumulative power
462      sums.
463
464      S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T
465        S[0] is the zero matrix
466        S[1] is B
467        S[2] is A B A^T + B
468
469    """
470    num_latent_values_float = math_ops.cast(self._num_latent_values, self.dtype)
471    latent_values_per_period = (num_latent_values_float / math_ops.cast(
472        self._true_periodicity, dtype=self.dtype))
473    original_matrix_powers = (math_ops.cast(
474        math_ops.range(max_remaining_steps),
475        self.dtype) * latent_values_per_period)
476    matrix_dimension_range = math_ops.range(
477        self._num_latent_values - 1)[None, ...]
478    matrix_dimension_range_float = math_ops.cast(matrix_dimension_range,
479                                                 self.dtype)
480    def _cosecant_with_freq(coefficient):
481      return 1. / math_ops.sin(numpy.pi / num_latent_values_float * coefficient)
482    power_minus_index = (original_matrix_powers[..., None]
483                         - matrix_dimension_range_float)
484    mesh_values = (_cosecant_with_freq(power_minus_index)
485                   + _cosecant_with_freq(power_minus_index - 1.))
486    meshed = mesh_values[..., None, :] * mesh_values[..., None]
487    full_matrix_alternating = math_ops.cast(1 - 2 * (
488        (matrix_dimension_range[..., None, :] +
489         matrix_dimension_range[..., None]) % 2), self.dtype)
490    def _sine_discontinuity(value):
491      """A special case for dealing with discontinuities.
492
493      Decides whether `value`  is close to an integer, and if so computes:
494
495        lim x->n |sin(x * pi)| / sin(x * pi) = sign(sin(n * pi))
496                                             = cos(n * pi)
497
498      Args:
499        value: The floating point Tensor value which may lead to a
500            discontinuity.
501      Returns:
502        A tuple of (is_discontinuous, sign):
503          is_discontinuous: A boolean Tensor of the same shape as `value`,
504              indicating whether it is near an integer.
505          sign: A floating point Tensor indicating the sign of the discontinuity
506            (being near 1 or -1 when `is_discontinuous` is True), of the same
507            shape and type as `value`.
508      """
509      normalized = value / num_latent_values_float
510      is_discontinuous = self._close_to_integer(normalized)
511      sign = math_ops.cos(normalized * numpy.pi)
512      return is_discontinuous, sign
513    index_discontinuous, index_sign = _sine_discontinuity(
514        original_matrix_powers[..., None]
515        - matrix_dimension_range_float)
516    index_minus_discontinuous, index_minus_sign = _sine_discontinuity(
517        original_matrix_powers[..., None]
518        - matrix_dimension_range_float
519        - 1)
520    ones_mask_vector = math_ops.logical_or(index_discontinuous,
521                                           index_minus_discontinuous)
522    ones_sign_vector = array_ops.where(index_discontinuous, index_sign,
523                                       index_minus_sign)
524    ones_mask = math_ops.logical_and(ones_mask_vector[..., None],
525                                     ones_mask_vector[..., None, :])
526    zeros_mask = self._close_to_integer(original_matrix_powers)
527    zeroed = array_ops.where(zeros_mask, array_ops.zeros_like(meshed), meshed)
528    global_coefficient = (math_ops.sin(numpy.pi * original_matrix_powers) /
529                          num_latent_values_float)
530    masked_meshed = array_ops.where(
531        ones_mask, ones_sign_vector[..., None] * ones_sign_vector[..., None, :],
532        zeroed * global_coefficient[..., None, None]**2)
533    powers_above_zero = full_matrix_alternating * masked_meshed
534    return array_ops.pad(
535        math_ops.cumsum(powers_above_zero), [(1, 0), (0, 0), (0, 0)])
536