1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Approximate kernel mapper for RBF kernel based on Random Fourier Features.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import math 22 23import numpy as np 24 25from tensorflow.contrib.kernel_methods.python.mappers import dense_kernel_mapper as dkm 26from tensorflow.python.framework import constant_op 27from tensorflow.python.framework import dtypes 28from tensorflow.python.ops import math_ops 29 30 31# TODO(sibyl-vie3Poto,felixyu): add an option to control whether the parameters in the 32# kernel map are trainable. 33class RandomFourierFeatureMapper(dkm.DenseKernelMapper): 34 r"""Class that implements Random Fourier Feature Mapping (RFFM) in TensorFlow. 35 36 The RFFM mapping is used to approximate the Gaussian (RBF) kernel: 37 $$(exp(-||x-y||_2^2 / (2 * \sigma^2))$$ 38 39 The implementation of RFFM is based on the following paper: 40 "Random Features for Large-Scale Kernel Machines" by Ali Rahimi and Ben Recht. 41 (link: https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) 42 43 The mapping uses a matrix \\(\Omega \in R^{d x D}\\) and a bias vector 44 \\(b \in R^D\\) where \\(d\\) is the input dimension (number of dense input 45 features) and \\(D\\) is the output dimension (i.e., dimension of the feature 46 space the input is mapped to). Each entry of \\(\Omega\\) is sampled i.i.d. 47 from a (scaled) Gaussian distribution and each entry of \\(b\\) is sampled 48 independently and uniformly from [0, \\(2 * \pi\\)]. 49 50 For a single input feature vector \\(x \in R^d\\), its RFFM is defined as: 51 $$\sqrt(2/D) * cos(x * \Omega + b)$$ 52 53 where \\(cos\\) is the element-wise cosine function and \\(x, b\\) are 54 represented as row vectors. The aforementioned paper shows that the linear 55 kernel of RFFM-mapped vectors approximates the Gaussian kernel of the initial 56 vectors. 57 58 """ 59 60 def __init__(self, input_dim, output_dim, stddev=1.0, seed=1, name=None): 61 r"""Constructs a RandomFourierFeatureMapper instance. 62 63 Args: 64 input_dim: The dimension (number of features) of the tensors to be mapped. 65 output_dim: The output dimension of the mapping. 66 stddev: The standard deviation of the Gaussian kernel to be approximated. 67 The error of the classifier trained using this approximation is very 68 sensitive to this parameter. 69 seed: An integer used to initialize the parameters (\\(\Omega\\) and 70 \\(b\\)) of the mapper. For repeatable sequences across different 71 invocations of the mapper object (for instance, to ensure consistent 72 mapping both at training and eval/inference if these happen in 73 different invocations), set this to the same integer. 74 name: name for the mapper object. 75 """ 76 # TODO(sibyl-vie3Poto): Maybe infer input_dim and/or output_dim (if not explicitly 77 # provided). input_dim can be inferred lazily, the first time map is called. 78 # output_dim can be inferred from input_dim using heuristics on the error of 79 # the approximation (and, by extension, the error of the classification 80 # based on the approximation). 81 self._input_dim = input_dim 82 self._output_dim = output_dim 83 self._stddev = stddev 84 self._seed = seed 85 self._name = name 86 87 @property 88 def name(self): 89 """Returns a name for the `RandomFourierFeatureMapper` instance. 90 91 If the name provided in the constructor is `None`, then the object's unique 92 id is returned. 93 94 Returns: 95 A name for the `RandomFourierFeatureMapper` instance. 96 """ 97 return self._name or str(id(self)) 98 99 @property 100 def input_dim(self): 101 return self._input_dim 102 103 @property 104 def output_dim(self): 105 return self._output_dim 106 107 def map(self, input_tensor): 108 """Maps each row of input_tensor using random Fourier features. 109 110 Args: 111 input_tensor: a `Tensor` containing input features. It's shape is 112 [batch_size, self._input_dim]. 113 114 Returns: 115 A `Tensor` of shape [batch_size, self._output_dim] containing RFFM-mapped 116 features. 117 118 Raises: 119 InvalidShapeError: if the shape of the `input_tensor` is inconsistent with 120 expected input dimension. 121 """ 122 input_tensor_shape = input_tensor.get_shape() 123 if len(input_tensor_shape) != 2: 124 raise dkm.InvalidShapeError( 125 'The shape of the tensor should be 2. Got %d instead.' % 126 len(input_tensor_shape)) 127 128 features_dim = input_tensor_shape[1] 129 if features_dim != self._input_dim: 130 raise dkm.InvalidShapeError( 131 'Invalid dimension: expected %d input features, got %d instead.' % 132 (self._input_dim, features_dim)) 133 134 # Add ops that compute (deterministically) omega_matrix and bias based on 135 # the provided seed. 136 # TODO(sibyl-vie3Poto): Storing the mapper's parameters (omega_matrix and bias) as 137 # constants incurs no RPC calls to the parameter server during distributed 138 # training. However, if the parameters grow too large (for instance if they 139 # don't fit into memory or if they blow up the size of the GraphDef proto), 140 # stroring them as constants is no longer an option. In this case, we should 141 # have a heuristic to choose out of one of the following alternatives: 142 # a) store them as variables (in the parameter server) 143 # b) store them as worker local variables 144 # c) generating on the fly the omega matrix at each step 145 np.random.seed(self._seed) 146 omega_matrix_shape = [self._input_dim, self._output_dim] 147 bias_shape = [self._output_dim] 148 149 omega_matrix = constant_op.constant( 150 np.random.normal( 151 scale=1.0 / self._stddev, size=omega_matrix_shape), 152 dtype=dtypes.float32) 153 bias = constant_op.constant( 154 np.random.uniform( 155 low=0.0, high=2 * np.pi, size=bias_shape), 156 dtype=dtypes.float32) 157 158 x_omega_plus_bias = math_ops.add( 159 math_ops.matmul(input_tensor, omega_matrix), bias) 160 return math.sqrt(2.0 / self._output_dim) * math_ops.cos(x_omega_plus_bias) 161