1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""State space components for modeling seasonality.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import numpy 22 23from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model 24 25from tensorflow.python.framework import dtypes 26from tensorflow.python.framework import ops 27from tensorflow.python.ops import array_ops 28from tensorflow.python.ops import control_flow_ops 29from tensorflow.python.ops import gen_math_ops 30from tensorflow.python.ops import math_ops 31 32 33class CycleStateSpaceModel(state_space_model.StateSpaceModel): 34 """A state space model component which cycles between values. 35 36 Stores N values using N - 1 latent values, the Nth being the negative sum of 37 those explicitly stored. At any given timestep one of these values is 38 observed. Noise is assumed to affect only one of the transitions. 39 """ 40 41 def __init__( 42 self, 43 periodicity, 44 configuration=state_space_model.StateSpaceModelConfiguration()): 45 self._periodicity = periodicity 46 super(CycleStateSpaceModel, self).__init__(configuration=configuration) 47 48 def get_state_transition(self): 49 return self.transition_to_powers(array_ops.ones([], dtype=dtypes.int32)) 50 51 def get_noise_transform(self): 52 # transition_power_noise_accumulator makes assumptions about this 53 # transformation. If the noise transform is modified or overridden, 54 # transition_power_noise_accumulator must be modified as well (or discarded, 55 # as it is simply an optimization). 56 return array_ops.pad( 57 array_ops.ones([1], dtype=self.dtype), 58 paddings=[(0, self._periodicity - 2)])[..., None] 59 60 def transition_to_powers(self, powers): 61 """Computes powers of the cycle transition matrix efficiently. 62 63 Args: 64 powers: An integer Tensor, shape [...], with powers to raise the 65 transition matrix to. 66 Returns: 67 A floating point Tensor with shape [..., self._periodicity - 1, 68 self._periodicity - 1] containing: 69 (transition^power)_{i, j} = { 70 1 if (i - j) % self._periodicity == power % self._periodicity 71 -1 if (i + 1) % self._periodicity == power % self._periodicity 72 0 otherwise} 73 """ 74 powers %= self._periodicity 75 range_shape_padded = array_ops.reshape( 76 math_ops.range(self._periodicity - 1, dtype=powers.dtype), 77 array_ops.concat( 78 [ 79 array_ops.ones([array_ops.rank(powers)], dtype=dtypes.int32), 80 [self._periodicity - 1] 81 ], 82 axis=0)) 83 is_row_negative = math_ops.equal(range_shape_padded + 1, powers[..., None]) 84 row_indicator_shape = array_ops.shape(is_row_negative) 85 negative_row_indicator = array_ops.where(is_row_negative, -array_ops.ones( 86 shape=row_indicator_shape, dtype=self.dtype), 87 array_ops.zeros( 88 row_indicator_shape, 89 dtype=self.dtype)) 90 coord_diff = (range_shape_padded[..., None] 91 - range_shape_padded[..., None, :]) 92 is_one = math_ops.equal(coord_diff % self._periodicity, 93 powers[..., None, None]) 94 positive_ones = array_ops.where(is_one, 95 array_ops.ones( 96 array_ops.shape(is_one), 97 dtype=self.dtype), 98 array_ops.zeros( 99 array_ops.shape(is_one), 100 dtype=self.dtype)) 101 return math_ops.cast(positive_ones + negative_row_indicator[..., None], 102 self.dtype) 103 104 def transition_power_noise_accumulator( 105 self, num_steps, noise_addition_coefficient=1): 106 r"""Sum the transitioned covariance matrix over a number of steps. 107 108 Assumes that state_transition_noise_covariance is a matrix with a single 109 non-zero value in the upper left. 110 111 Args: 112 num_steps: A [...] shape integer Tensor with numbers of steps to compute 113 power sums for. 114 noise_addition_coefficient: A multiplier for the state transition noise 115 covariance (used in ResolutionCycleModel to compute multiples of full 116 period sums). 117 Returns: 118 The computed power sum, with shape [..., state dimension, state 119 dimension] containing: 120 121 [\sum_{p=0}^{num_steps - 1} ( 122 state_transition^p 123 * state_transition_noise_covariance 124 * (state_transition^p)^T)]_{i, j} = { 125 -contribution_{j + 1} if j == i - 1 126 contribution_{j + 1} + contribution{j} if j == i 127 -contribution_{j} if j == i + 1 128 0 otherwise 129 } 130 131 contribution_k = noise_scalar 132 * ((num_steps + self._periodicity - 1 - (k % self._periodicity)) 133 // self._periodicity) 134 135 Where contribution_k is the sum of noise_scalar additions to component k 136 of the periodicity. 137 """ 138 noise_addition_scalar = array_ops.squeeze( 139 self.state_transition_noise_covariance, axis=[-1, -2]) 140 period_range_reshaped = array_ops.reshape( 141 math_ops.range(self._periodicity, dtype=num_steps.dtype), 142 array_ops.concat( 143 [ 144 array_ops.ones([array_ops.rank(num_steps)], dtype=dtypes.int32), 145 [self._periodicity] 146 ], 147 axis=0)) 148 reversed_remaining_steps = ((period_range_reshaped 149 - (num_steps[..., None] - 1)) 150 % self._periodicity) 151 period_additions_reversed = (ops.convert_to_tensor( 152 noise_addition_coefficient, 153 self.dtype)[..., None] * noise_addition_scalar * math_ops.cast( 154 (num_steps[..., None] + reversed_remaining_steps) // 155 self._periodicity, 156 dtype=self.dtype)) 157 period_additions_diag = array_ops.matrix_diag(period_additions_reversed) 158 upper_band = array_ops.concat( 159 [ 160 array_ops.zeros_like(period_additions_diag[..., :-1, 0:1]), 161 -period_additions_diag[..., :-1, 0:-2] 162 ], 163 axis=-1) 164 lower_band = array_ops.concat( 165 [ 166 array_ops.zeros_like(period_additions_diag[..., 0:1, :-1]), 167 -period_additions_diag[..., 0:-2, :-1] 168 ], 169 axis=-2) 170 period_additions_rotated = array_ops.concat( 171 [ 172 period_additions_reversed[..., -1:], 173 period_additions_reversed[..., :-2] 174 ], 175 axis=-1) 176 diagonal = array_ops.matrix_diag(period_additions_reversed[..., :-1] + 177 period_additions_rotated) 178 return diagonal + lower_band + upper_band 179 180 def get_observation_model(self, times): 181 """Observe only the first of the rotating latent values. 182 183 See StateSpaceModel.get_observation_model. 184 Args: 185 times: Unused. See the parent class for details. 186 Returns: 187 A static, univariate observation model for later broadcasting. 188 """ 189 del times # Does not rely on times. Uses broadcasting from the parent. 190 return array_ops.concat( 191 values=[ 192 array_ops.ones([1], dtype=self.dtype), array_ops.zeros( 193 [self._periodicity - 2], dtype=self.dtype) 194 ], 195 axis=0) 196 197 198class ResolutionCycleModel(CycleStateSpaceModel): 199 """A version of CycleStateSpaceModel with variable resolution. 200 201 Cycles between "num_latent_values" latent values over a period of 202 "periodicity", smoothly interpolating. Simply raises the transition matrix 203 from CycleStateSpaceModel to the power (num_latent_values / periodicity). 204 205 Specifically, ResolutionCycleModel uses the following eigendecomposition of 206 the CycleStateSpaceModel matrix (there are several parameterizations, others 207 leading to roots of the matrix with complex values): 208 209 eigenvectors_{i, j} 210 = root_of_unity(floor(j / 2) + 1, i * (-1)^(j + 1)) 211 - root_of_unity(floor(j / 2) + 1, (i + 1) * (-1)^(j + 1)) 212 eigenvalues_j = root_of_unity(floor(j / 2) + 1, (-1)^j) 213 root_of_unity(root_number, to_power) 214 = exp(to_power * 2 * pi * sqrt(-1) * root_number 215 / num_latent_values) 216 217 The transition matrix for ResolutionCycleModel is then: 218 219 eigenvectors 220 * diag(eigenvalues^(num_latent_values / periodicity)) 221 * eigenvectors^-1 222 223 Since the eigenvalues are paired with their conjugates (conj(e^(sqrt(-1)*x)) = 224 e^(-sqrt(-1)*x)), the resulting matrix has real components (this is why only 225 odd numbers of latent values are supported, since the size of the matrix is 226 one less than the number of latent values and there must be an even number of 227 eigenvalues to pair them off). 228 229 See ./g3doc/periodic_multires_derivation.md for details. 230 """ 231 232 def __init__( 233 self, 234 num_latent_values, 235 periodicity, 236 near_integer_threshold=1e-8, 237 configuration=state_space_model.StateSpaceModelConfiguration()): 238 """Initialize the ResolutionCycleModel. 239 240 Args: 241 num_latent_values: Controls the representational power and memory usage of 242 the model. The transition matrix has shape [num_latent_values - 1, 243 num_latent_values - 1]. Must be an odd integer (see class docstring for 244 why). 245 periodicity: The number of steps for cyclic behavior. May be a Tensor, and 246 need not be an integer (although integer values greater than 247 num_latent_values have more efficient special cases). 248 near_integer_threshold: When avoiding singularities, controls how close a 249 number should be to that singularity before the special case takes over. 250 configuration: A StateSpaceModelConfiguration object. 251 252 Raises: 253 ValueError: If num_latent_values is not odd. 254 """ 255 if num_latent_values % 2 != 1: 256 raise ValueError("Only odd numbers of latent values are supported.") 257 self._num_latent_values = num_latent_values 258 self._true_periodicity = periodicity 259 self._near_integer_threshold = near_integer_threshold 260 super(ResolutionCycleModel, self).__init__( 261 periodicity=num_latent_values, 262 configuration=configuration) 263 264 def _close_to_integer(self, value): 265 value = math_ops.cast(value, self.dtype) 266 return math_ops.less( 267 math_ops.abs(value - gen_math_ops.round(value)), 268 self._near_integer_threshold) 269 270 def transition_to_powers(self, powers): 271 """Computes TransitionMatrix^power efficiently. 272 273 For an n x n transition matrix we have: 274 275 (TransitionMatrix**power)_{i, j) = (-1) ** i * sin(pi * power) / (n + 1) 276 * ((-1) ** j / sin(pi / (n + 1) * (power - i + j)) 277 + 1 / sin(pi / (n + 1) * (power - i - 1))) 278 279 The sin(pi * power) term is zero whenever "power" is an integer. However, 280 the 1 / sin(x) terms (cosecants) occasionally (when their arguments are 281 multiples of pi) cancel out this value. The limit as the argument approaches 282 an integer value gives the "correct" result, but computing these separately 283 gives 0 * inf = NaN. Instead, there is a special case for near-integer 284 values. 285 286 Args: 287 powers: A floating point Tensor of powers to raise the transition matrix 288 to. 289 Returns: 290 A [..., self._num_latent_values - 1, self._num_latent_values - 1] floating 291 point Tensor with the transition matrix raised to each power in 292 `powers`. 293 294 """ 295 num_latent_values_float = math_ops.cast(self._num_latent_values, self.dtype) 296 latent_values_per_period = (num_latent_values_float / math_ops.cast( 297 self._true_periodicity, dtype=self.dtype)) 298 original_matrix_powers = (math_ops.cast(powers, self.dtype) * 299 latent_values_per_period) 300 global_coeff = (math_ops.sin(original_matrix_powers * numpy.pi) / 301 num_latent_values_float)[..., None, None] 302 matrix_dimension_range = array_ops.reshape( 303 math_ops.range(self._num_latent_values - 1), 304 array_ops.concat( 305 [ 306 array_ops.ones( 307 [array_ops.rank(original_matrix_powers)], 308 dtype=dtypes.int32), [self._num_latent_values - 1] 309 ], 310 axis=0)) 311 matrix_dimension_range_float = math_ops.cast(matrix_dimension_range, 312 self.dtype) 313 alternating = math_ops.cast(1 - 2 * (matrix_dimension_range % 2), 314 self.dtype) 315 row_addend = 1. / math_ops.sin(numpy.pi / num_latent_values_float * ( 316 original_matrix_powers[..., None] - matrix_dimension_range_float - 1)) 317 column_minus_row = (matrix_dimension_range_float[..., None, :] 318 - matrix_dimension_range_float[..., None]) 319 full_matrix_addend = (alternating[..., None, :] / math_ops.sin( 320 numpy.pi / num_latent_values_float * 321 (original_matrix_powers[..., None, None] + column_minus_row))) 322 continuous_construction = global_coeff * alternating[..., None] * ( 323 row_addend[..., None] + full_matrix_addend) 324 # For integer powers, the above formula is only correct in the limit, 325 # yielding NaNs as written. We defer to the super-class in such cases, which 326 # computes integer powers exactly. 327 return array_ops.where( 328 self._close_to_integer(original_matrix_powers), 329 super(ResolutionCycleModel, self).transition_to_powers( 330 math_ops.cast( 331 gen_math_ops.round(original_matrix_powers), dtypes.int64)), 332 continuous_construction) 333 334 def transition_power_noise_accumulator(self, num_steps): 335 """Sum the transitioned covariance matrix over a number of steps. 336 337 Args: 338 num_steps: An integer Tensor of any shape [...] indicating the number of 339 steps to compute for each part of the batch. 340 341 Returns: 342 A [..., self._num_latent_values - 1, self._num_latent_values - 1] floating 343 point Tensor corresponding to each requested number of steps, containing: 344 345 sum_{i=1}^{steps} transition^i * noise_covariance 346 * (transition^i)^T 347 """ 348 349 def _whole_periods_folded(): 350 """A more efficient special casing for integer periods. 351 352 We knock off full periods, leaving at most self._true_periodicity steps to 353 compute. 354 355 Returns: 356 A tuple of (remaining_whole_steps, current_accumulation): 357 remaining_whole_steps: An integer Tensor with the same shape as the 358 `num_steps` argument to `transition_power_noise_accumulator`, 359 indicating the reduced number of steps which must be computed 360 sequentially and added to `current_accumulation`. 361 current_accumulation: A [..., self._num_latent_values - 1, 362 self._num_latent_values - 1] floating point Tensor corresponding to 363 the accumulations for steps which were computed in this function. 364 """ 365 original_transition_noise_addition_coefficient = (math_ops.cast( 366 self._true_periodicity, self.dtype) / math_ops.cast( 367 self._num_latent_values, self.dtype)) 368 full_period_accumulation = super( 369 ResolutionCycleModel, self).transition_power_noise_accumulator( 370 noise_addition_coefficient= 371 original_transition_noise_addition_coefficient, 372 num_steps=ops.convert_to_tensor( 373 self._num_latent_values, dtype=num_steps.dtype)) 374 periodicity_integer = math_ops.cast(self._true_periodicity, 375 num_steps.dtype) 376 full_periods = math_ops.cast(num_steps // periodicity_integer, self.dtype) 377 current_accumulation = full_periods[..., None, None] * array_ops.reshape( 378 full_period_accumulation, 379 array_ops.concat( 380 [ 381 array_ops.ones( 382 [array_ops.rank(full_periods)], dtype=dtypes.int32), 383 array_ops.shape(full_period_accumulation) 384 ], 385 axis=0)) 386 remaining_whole_steps = num_steps % periodicity_integer 387 return remaining_whole_steps, current_accumulation 388 def _no_whole_period_computation(): 389 """A less efficient special casing for real valued periods. 390 391 This special casing is still preferable to computing using sequential 392 matrix multiplies (parallelizable, more numerically stable), but is linear 393 in the number of steps. 394 395 Returns: 396 Same shapes and types as `_whole_periods_folded`, but no folding is done 397 in this function. 398 """ 399 current_accumulation = array_ops.zeros( 400 array_ops.concat( 401 [ 402 array_ops.shape(num_steps), 403 [self._num_latent_values - 1, self._num_latent_values - 1] 404 ], 405 axis=0), 406 dtype=self.dtype) 407 remaining_whole_steps = num_steps 408 return remaining_whole_steps, current_accumulation 409 # Decide whether it's feasible to compute whole periods in closed form, 410 # taking advantage of the fact that a sum over self._true_periodicity steps 411 # in our transition matrix is proportional to a sum over 412 # self._num_latent_values steps in the unmodified matrix (because each 413 # latent value gets the same treatment). This is possible for integer 414 # self._true_periodicity, since we stay aligned to integer steps. For real 415 # valued self._true_periodicity, or when the cyclic behavior is a higher 416 # resolution than 1 per step, taking whole periods leads to misalignment 417 # with integer steps, which would be difficult to recover from. 418 remaining_whole_steps, current_accumulation = control_flow_ops.cond( 419 self._whole_period_folding(), _whole_periods_folded, 420 _no_whole_period_computation) 421 steps_to_compute = math_ops.reduce_max(remaining_whole_steps) 422 remaining_step_noise_additions = self._power_sum_array(steps_to_compute) 423 noise_addition_scalar = array_ops.squeeze( 424 self.state_transition_noise_covariance, axis=[-1, -2]) 425 return current_accumulation + noise_addition_scalar * array_ops.gather( 426 remaining_step_noise_additions, indices=remaining_whole_steps) 427 428 def _whole_period_folding(self): 429 """Decides whether computing a whole period maintains alignment.""" 430 return math_ops.logical_and( 431 self._close_to_integer(self._true_periodicity), 432 math_ops.greater_equal(self._true_periodicity, self._num_latent_values)) 433 434 def _power_sum_array(self, max_remaining_steps): 435 r"""Computes \sum_{i=0}^{N-1} A^i B (A^i)^T for N=0..max_remaining_steps. 436 437 A is the transition matrix and B is the noise covariance. 438 439 This is more efficient in practice than math_utils.power_sums_tensor, since 440 each A^i B (A^i)^T term has a closed-form expression not depending on i - 1. 441 Thus vectorization can replace explicit looping. 442 443 Uses a cumulative sum on the following expression: 444 445 (transition^p * transition_covariance * (transition^p)^T)_{i, j} 446 = (-1)^(i + j) * sin^2(pi * p) / num_latent_values^2 447 * (1/sin(pi / num_latent_values * (p - i)) 448 + 1/sin(pi / num_latent_values * (p - i - 1))) 449 * (1/sin(pi / num_latent_values * (p - j)) 450 + 1/sin(pi / num_latent_values * (p - j - 1))) 451 452 The expression being derived from the eigenvectors and eigenvalues given in 453 the class docstring (and as with CycleStateSpaceModel taking advantage of 454 the sparsity of the transition covariance). 455 456 Args: 457 max_remaining_steps: A scalar integer Tensor indicating the number of 458 non-trivial values to compute. 459 Returns: 460 A [max_remaining_steps + 1, self._num_latent_values - 1, 461 self._num_latent_values - 1] floating point Tensor S with cumulative power 462 sums. 463 464 S[N] = \sum_{i=0}^{N-1} A^i B (A^i)^T 465 S[0] is the zero matrix 466 S[1] is B 467 S[2] is A B A^T + B 468 469 """ 470 num_latent_values_float = math_ops.cast(self._num_latent_values, self.dtype) 471 latent_values_per_period = (num_latent_values_float / math_ops.cast( 472 self._true_periodicity, dtype=self.dtype)) 473 original_matrix_powers = (math_ops.cast( 474 math_ops.range(max_remaining_steps), 475 self.dtype) * latent_values_per_period) 476 matrix_dimension_range = math_ops.range( 477 self._num_latent_values - 1)[None, ...] 478 matrix_dimension_range_float = math_ops.cast(matrix_dimension_range, 479 self.dtype) 480 def _cosecant_with_freq(coefficient): 481 return 1. / math_ops.sin(numpy.pi / num_latent_values_float * coefficient) 482 power_minus_index = (original_matrix_powers[..., None] 483 - matrix_dimension_range_float) 484 mesh_values = (_cosecant_with_freq(power_minus_index) 485 + _cosecant_with_freq(power_minus_index - 1.)) 486 meshed = mesh_values[..., None, :] * mesh_values[..., None] 487 full_matrix_alternating = math_ops.cast(1 - 2 * ( 488 (matrix_dimension_range[..., None, :] + 489 matrix_dimension_range[..., None]) % 2), self.dtype) 490 def _sine_discontinuity(value): 491 """A special case for dealing with discontinuities. 492 493 Decides whether `value` is close to an integer, and if so computes: 494 495 lim x->n |sin(x * pi)| / sin(x * pi) = sign(sin(n * pi)) 496 = cos(n * pi) 497 498 Args: 499 value: The floating point Tensor value which may lead to a 500 discontinuity. 501 Returns: 502 A tuple of (is_discontinuous, sign): 503 is_discontinuous: A boolean Tensor of the same shape as `value`, 504 indicating whether it is near an integer. 505 sign: A floating point Tensor indicating the sign of the discontinuity 506 (being near 1 or -1 when `is_discontinuous` is True), of the same 507 shape and type as `value`. 508 """ 509 normalized = value / num_latent_values_float 510 is_discontinuous = self._close_to_integer(normalized) 511 sign = math_ops.cos(normalized * numpy.pi) 512 return is_discontinuous, sign 513 index_discontinuous, index_sign = _sine_discontinuity( 514 original_matrix_powers[..., None] 515 - matrix_dimension_range_float) 516 index_minus_discontinuous, index_minus_sign = _sine_discontinuity( 517 original_matrix_powers[..., None] 518 - matrix_dimension_range_float 519 - 1) 520 ones_mask_vector = math_ops.logical_or(index_discontinuous, 521 index_minus_discontinuous) 522 ones_sign_vector = array_ops.where(index_discontinuous, index_sign, 523 index_minus_sign) 524 ones_mask = math_ops.logical_and(ones_mask_vector[..., None], 525 ones_mask_vector[..., None, :]) 526 zeros_mask = self._close_to_integer(original_matrix_powers) 527 zeroed = array_ops.where(zeros_mask, array_ops.zeros_like(meshed), meshed) 528 global_coefficient = (math_ops.sin(numpy.pi * original_matrix_powers) / 529 num_latent_values_float) 530 masked_meshed = array_ops.where( 531 ones_mask, ones_sign_vector[..., None] * ones_sign_vector[..., None, :], 532 zeroed * global_coefficient[..., None, None]**2) 533 powers_above_zero = full_matrix_alternating * masked_meshed 534 return array_ops.pad( 535 math_ops.cumsum(powers_above_zero), [(1, 0), (0, 0), (0, 0)]) 536