1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15# pylint: disable=g-classes-have-attributes 16"""Keras layers that implement explicit (approximate) kernel feature maps.""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import numpy as np 23import six 24 25from tensorflow.python.framework import dtypes 26from tensorflow.python.framework import ops 27from tensorflow.python.framework import tensor_shape 28from tensorflow.python.keras import initializers 29from tensorflow.python.keras.engine import base_layer 30from tensorflow.python.keras.engine import input_spec 31from tensorflow.python.ops import gen_math_ops 32from tensorflow.python.ops import init_ops 33from tensorflow.python.ops import math_ops 34from tensorflow.python.ops import nn 35from tensorflow.python.util.tf_export import keras_export 36 37_SUPPORTED_RBF_KERNEL_TYPES = ['gaussian', 'laplacian'] 38 39 40@keras_export('keras.layers.experimental.RandomFourierFeatures') 41class RandomFourierFeatures(base_layer.Layer): 42 r"""Layer that projects its inputs into a random feature space. 43 44 This layer implements a mapping from input space to a space with `output_dim` 45 dimensions, which approximates shift-invariant kernels. A kernel function 46 `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for some function `k`. 47 Many popular Radial Basis Functions (RBF), including Gaussian and 48 Laplacian kernels, are shift-invariant. 49 50 The implementation of this layer is based on the following paper: 51 ["Random Features for Large-Scale Kernel Machines"]( 52 https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf) 53 by Ali Rahimi and Ben Recht. 54 55 The distribution from which the parameters of the random features map (layer) 56 are sampled determines which shift-invariant kernel the layer approximates 57 (see paper for more details). You can use the distribution of your 58 choice. The layer supports out-of-the-box 59 approximation sof the following two RBF kernels: 60 61 - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))` 62 - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))` 63 64 **Note:** Unlike what is described in the paper and unlike what is used in 65 the Scikit-Learn implementation, the output of this layer does not apply 66 the `sqrt(2 / D)` normalization factor. 67 68 **Usage:** Typically, this layer is used to "kernelize" linear models by 69 applying a non-linear transformation (this layer) to the input features and 70 then training a linear model on top of the transformed features. Depending on 71 the loss function of the linear model, the composition of this layer and the 72 linear model results to models that are equivalent (up to approximation) to 73 kernel SVMs (for hinge loss), kernel logistic regression (for logistic loss), 74 kernel linear regression (for squared loss), etc. 75 76 Examples: 77 78 A kernel multinomial logistic regression model with Gaussian kernel for MNIST: 79 80 ```python 81 model = keras.Sequential([ 82 keras.Input(shape=(784,)), 83 RandomFourierFeatures( 84 output_dim=4096, 85 scale=10., 86 kernel_initializer='gaussian'), 87 layers.Dense(units=10, activation='softmax'), 88 ]) 89 model.compile( 90 optimizer='adam', 91 loss='categorical_crossentropy', 92 metrics=['categorical_accuracy'] 93 ) 94 ``` 95 96 A quasi-SVM classifier for MNIST: 97 98 ```python 99 model = keras.Sequential([ 100 keras.Input(shape=(784,)), 101 RandomFourierFeatures( 102 output_dim=4096, 103 scale=10., 104 kernel_initializer='gaussian'), 105 layers.Dense(units=10), 106 ]) 107 model.compile( 108 optimizer='adam', 109 loss='hinge', 110 metrics=['categorical_accuracy'] 111 ) 112 ``` 113 114 To use another kernel, just replace the layer creation line with: 115 116 ```python 117 random_features_layer = RandomFourierFeatures( 118 output_dim=500, 119 kernel_initializer=<my_initializer>, 120 scale=..., 121 ...) 122 ``` 123 124 Args: 125 output_dim: Positive integer, the dimension of the layer's output, i.e., the 126 number of random features used to approximate the kernel. 127 kernel_initializer: Determines the distribution of the parameters of the 128 random features map (and therefore the kernel approximated by the layer). 129 It can be either a string identifier or a Keras `Initializer` instance. 130 Currently only 'gaussian' and 'laplacian' are supported string 131 identifiers (case insensitive). Note that the kernel matrix is not 132 trainable. 133 scale: For Gaussian and Laplacian kernels, this corresponds to a scaling 134 factor of the corresponding kernel approximated by the layer (see concrete 135 definitions above). When provided, it should be a positive float. If None, 136 a default value is used: if the kernel initializer is set to "gaussian", 137 `scale` defaults to `sqrt(input_dim / 2)`, otherwise, it defaults to 1.0. 138 Both the approximation error of the kernel and the classification quality 139 are sensitive to this parameter. If `trainable` is set to `True`, this 140 parameter is learned end-to-end during training and the provided value 141 serves as the initial value. 142 **Note:** When features from this layer are fed to a linear model, 143 by making `scale` trainable, the resulting optimization problem is 144 no longer convex (even if the loss function used by the linear model 145 is convex). 146 trainable: Whether the scaling parameter of the layer should be trainable. 147 Defaults to `False`. 148 name: String, name to use for this layer. 149 """ 150 151 def __init__(self, 152 output_dim, 153 kernel_initializer='gaussian', 154 scale=None, 155 trainable=False, 156 name=None, 157 **kwargs): 158 if output_dim <= 0: 159 raise ValueError( 160 '`output_dim` should be a positive integer. Given: {}.'.format( 161 output_dim)) 162 if isinstance(kernel_initializer, six.string_types): 163 if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES: 164 raise ValueError( 165 'Unsupported kernel type: \'{}\'. Supported kernel types: {}.' 166 .format(kernel_initializer, _SUPPORTED_RBF_KERNEL_TYPES)) 167 if scale is not None and scale <= 0.0: 168 raise ValueError('When provided, `scale` should be a positive float. ' 169 'Given: {}.'.format(scale)) 170 super(RandomFourierFeatures, self).__init__( 171 trainable=trainable, name=name, **kwargs) 172 self.output_dim = output_dim 173 self.kernel_initializer = kernel_initializer 174 self.scale = scale 175 176 def build(self, input_shape): 177 input_shape = tensor_shape.TensorShape(input_shape) 178 # TODO(sibyl-vie3Poto): Allow higher dimension inputs. Currently the input is expected 179 # to have shape [batch_size, dimension]. 180 if input_shape.rank != 2: 181 raise ValueError( 182 'The rank of the input tensor should be 2. Got {} instead.'.format( 183 input_shape.ndims)) 184 if input_shape.dims[1].value is None: 185 raise ValueError( 186 'The last dimension of the inputs to `RandomFourierFeatures` ' 187 'should be defined. Found `None`.') 188 self.input_spec = input_spec.InputSpec( 189 ndim=2, axes={1: input_shape.dims[1].value}) 190 input_dim = input_shape.dims[1].value 191 192 kernel_initializer = _get_random_features_initializer( 193 self.kernel_initializer, shape=(input_dim, self.output_dim)) 194 195 self.unscaled_kernel = self.add_weight( 196 name='unscaled_kernel', 197 shape=(input_dim, self.output_dim), 198 dtype=dtypes.float32, 199 initializer=kernel_initializer, 200 trainable=False) 201 202 self.bias = self.add_weight( 203 name='bias', 204 shape=(self.output_dim,), 205 dtype=dtypes.float32, 206 initializer=init_ops.random_uniform_initializer( 207 minval=0.0, maxval=2 * np.pi, dtype=dtypes.float32), 208 trainable=False) 209 210 if self.scale is None: 211 self.scale = _get_default_scale(self.kernel_initializer, input_dim) 212 self.kernel_scale = self.add_weight( 213 name='kernel_scale', 214 shape=(1,), 215 dtype=dtypes.float32, 216 initializer=init_ops.constant_initializer(self.scale), 217 trainable=True, 218 constraint='NonNeg') 219 super(RandomFourierFeatures, self).build(input_shape) 220 221 def call(self, inputs): 222 inputs = ops.convert_to_tensor_v2_with_dispatch(inputs, dtype=self.dtype) 223 inputs = math_ops.cast(inputs, dtypes.float32) 224 kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel 225 outputs = gen_math_ops.MatMul(a=inputs, b=kernel) 226 outputs = nn.bias_add(outputs, self.bias) 227 return gen_math_ops.cos(outputs) 228 229 def compute_output_shape(self, input_shape): 230 input_shape = tensor_shape.TensorShape(input_shape) 231 input_shape = input_shape.with_rank(2) 232 if input_shape.dims[-1].value is None: 233 raise ValueError( 234 'The innermost dimension of input shape must be defined. Given: %s' % 235 input_shape) 236 return input_shape[:-1].concatenate(self.output_dim) 237 238 def get_config(self): 239 kernel_initializer = self.kernel_initializer 240 if not isinstance(kernel_initializer, six.string_types): 241 kernel_initializer = initializers.serialize(kernel_initializer) 242 config = { 243 'output_dim': self.output_dim, 244 'kernel_initializer': kernel_initializer, 245 'scale': self.scale, 246 } 247 base_config = super(RandomFourierFeatures, self).get_config() 248 return dict(list(base_config.items()) + list(config.items())) 249 250 251def _get_random_features_initializer(initializer, shape): 252 """Returns Initializer object for random features.""" 253 254 def _get_cauchy_samples(loc, scale, shape): 255 probs = np.random.uniform(low=0., high=1., size=shape) 256 return loc + scale * np.tan(np.pi * (probs - 0.5)) 257 258 random_features_initializer = initializer 259 if isinstance(initializer, six.string_types): 260 if initializer.lower() == 'gaussian': 261 random_features_initializer = init_ops.random_normal_initializer( 262 stddev=1.0) 263 elif initializer.lower() == 'laplacian': 264 random_features_initializer = init_ops.constant_initializer( 265 _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape)) 266 267 else: 268 raise ValueError( 269 'Unsupported kernel type: \'{}\'. Supported kernel types: {}.'.format( 270 random_features_initializer, _SUPPORTED_RBF_KERNEL_TYPES)) 271 return random_features_initializer 272 273 274def _get_default_scale(initializer, input_dim): 275 if (isinstance(initializer, six.string_types) and 276 initializer.lower() == 'gaussian'): 277 return np.sqrt(input_dim / 2.0) 278 return 1.0 279