1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=g-classes-have-attributes
16"""Keras layers that implement explicit (approximate) kernel feature maps."""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import numpy as np
23import six
24
25from tensorflow.python.framework import dtypes
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import tensor_shape
28from tensorflow.python.keras import initializers
29from tensorflow.python.keras.engine import base_layer
30from tensorflow.python.keras.engine import input_spec
31from tensorflow.python.ops import gen_math_ops
32from tensorflow.python.ops import init_ops
33from tensorflow.python.ops import math_ops
34from tensorflow.python.ops import nn
35from tensorflow.python.util.tf_export import keras_export
36
37_SUPPORTED_RBF_KERNEL_TYPES = ['gaussian', 'laplacian']
38
39
40@keras_export('keras.layers.experimental.RandomFourierFeatures')
41class RandomFourierFeatures(base_layer.Layer):
42  r"""Layer that projects its inputs into a random feature space.
43
44  This layer implements a mapping from input space to a space with `output_dim`
45  dimensions, which approximates shift-invariant kernels. A kernel function
46  `K(x, y)` is shift-invariant if `K(x, y) == k(x - y)` for some function `k`.
47  Many popular Radial Basis Functions (RBF), including Gaussian and
48  Laplacian kernels, are shift-invariant.
49
50  The implementation of this layer is based on the following paper:
51  ["Random Features for Large-Scale Kernel Machines"](
52    https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)
53  by Ali Rahimi and Ben Recht.
54
55  The distribution from which the parameters of the random features map (layer)
56  are sampled determines which shift-invariant kernel the layer approximates
57  (see paper for more details). You can use the distribution of your
58  choice. The layer supports out-of-the-box
59  approximation sof the following two RBF kernels:
60
61  - Gaussian: `K(x, y) == exp(- square(x - y) / (2 * square(scale)))`
62  - Laplacian: `K(x, y) = exp(-abs(x - y) / scale))`
63
64  **Note:** Unlike what is described in the paper and unlike what is used in
65  the Scikit-Learn implementation, the output of this layer does not apply
66  the `sqrt(2 / D)` normalization factor.
67
68  **Usage:** Typically, this layer is used to "kernelize" linear models by
69  applying a non-linear transformation (this layer) to the input features and
70  then training a linear model on top of the transformed features. Depending on
71  the loss function of the linear model, the composition of this layer and the
72  linear model results to models that are equivalent (up to approximation) to
73  kernel SVMs (for hinge loss), kernel logistic regression (for logistic loss),
74  kernel linear regression (for squared loss), etc.
75
76  Examples:
77
78  A kernel multinomial logistic regression model with Gaussian kernel for MNIST:
79
80  ```python
81  model = keras.Sequential([
82    keras.Input(shape=(784,)),
83    RandomFourierFeatures(
84        output_dim=4096,
85        scale=10.,
86        kernel_initializer='gaussian'),
87    layers.Dense(units=10, activation='softmax'),
88  ])
89  model.compile(
90      optimizer='adam',
91      loss='categorical_crossentropy',
92      metrics=['categorical_accuracy']
93  )
94  ```
95
96  A quasi-SVM classifier for MNIST:
97
98  ```python
99  model = keras.Sequential([
100    keras.Input(shape=(784,)),
101    RandomFourierFeatures(
102        output_dim=4096,
103        scale=10.,
104        kernel_initializer='gaussian'),
105    layers.Dense(units=10),
106  ])
107  model.compile(
108      optimizer='adam',
109      loss='hinge',
110      metrics=['categorical_accuracy']
111  )
112  ```
113
114  To use another kernel, just replace the layer creation line with:
115
116  ```python
117  random_features_layer = RandomFourierFeatures(
118      output_dim=500,
119      kernel_initializer=<my_initializer>,
120      scale=...,
121      ...)
122  ```
123
124  Args:
125    output_dim: Positive integer, the dimension of the layer's output, i.e., the
126      number of random features used to approximate the kernel.
127    kernel_initializer: Determines the distribution of the parameters of the
128      random features map (and therefore the kernel approximated by the layer).
129      It can be either a string identifier or a Keras `Initializer` instance.
130      Currently only 'gaussian' and 'laplacian' are supported string
131      identifiers (case insensitive). Note that the kernel matrix is not
132      trainable.
133    scale: For Gaussian and Laplacian kernels, this corresponds to a scaling
134      factor of the corresponding kernel approximated by the layer (see concrete
135      definitions above). When provided, it should be a positive float. If None,
136      a default value is used: if the kernel initializer is set to "gaussian",
137      `scale` defaults to `sqrt(input_dim / 2)`, otherwise, it defaults to 1.0.
138      Both the approximation error of the kernel and the classification quality
139      are sensitive to this parameter. If `trainable` is set to `True`, this
140      parameter is learned end-to-end during training and the provided value
141      serves as the initial value.
142      **Note:** When features from this layer are fed to a linear model,
143        by making `scale` trainable, the resulting optimization problem is
144        no longer convex (even if the loss function used by the linear model
145        is convex).
146    trainable: Whether the scaling parameter of the layer should be trainable.
147      Defaults to `False`.
148    name: String, name to use for this layer.
149  """
150
151  def __init__(self,
152               output_dim,
153               kernel_initializer='gaussian',
154               scale=None,
155               trainable=False,
156               name=None,
157               **kwargs):
158    if output_dim <= 0:
159      raise ValueError(
160          '`output_dim` should be a positive integer. Given: {}.'.format(
161              output_dim))
162    if isinstance(kernel_initializer, six.string_types):
163      if kernel_initializer.lower() not in _SUPPORTED_RBF_KERNEL_TYPES:
164        raise ValueError(
165            'Unsupported kernel type: \'{}\'. Supported kernel types: {}.'
166            .format(kernel_initializer, _SUPPORTED_RBF_KERNEL_TYPES))
167    if scale is not None and scale <= 0.0:
168      raise ValueError('When provided, `scale` should be a positive float. '
169                       'Given: {}.'.format(scale))
170    super(RandomFourierFeatures, self).__init__(
171        trainable=trainable, name=name, **kwargs)
172    self.output_dim = output_dim
173    self.kernel_initializer = kernel_initializer
174    self.scale = scale
175
176  def build(self, input_shape):
177    input_shape = tensor_shape.TensorShape(input_shape)
178    # TODO(sibyl-vie3Poto): Allow higher dimension inputs. Currently the input is expected
179    # to have shape [batch_size, dimension].
180    if input_shape.rank != 2:
181      raise ValueError(
182          'The rank of the input tensor should be 2. Got {} instead.'.format(
183              input_shape.ndims))
184    if input_shape.dims[1].value is None:
185      raise ValueError(
186          'The last dimension of the inputs to `RandomFourierFeatures` '
187          'should be defined. Found `None`.')
188    self.input_spec = input_spec.InputSpec(
189        ndim=2, axes={1: input_shape.dims[1].value})
190    input_dim = input_shape.dims[1].value
191
192    kernel_initializer = _get_random_features_initializer(
193        self.kernel_initializer, shape=(input_dim, self.output_dim))
194
195    self.unscaled_kernel = self.add_weight(
196        name='unscaled_kernel',
197        shape=(input_dim, self.output_dim),
198        dtype=dtypes.float32,
199        initializer=kernel_initializer,
200        trainable=False)
201
202    self.bias = self.add_weight(
203        name='bias',
204        shape=(self.output_dim,),
205        dtype=dtypes.float32,
206        initializer=init_ops.random_uniform_initializer(
207            minval=0.0, maxval=2 * np.pi, dtype=dtypes.float32),
208        trainable=False)
209
210    if self.scale is None:
211      self.scale = _get_default_scale(self.kernel_initializer, input_dim)
212    self.kernel_scale = self.add_weight(
213        name='kernel_scale',
214        shape=(1,),
215        dtype=dtypes.float32,
216        initializer=init_ops.constant_initializer(self.scale),
217        trainable=True,
218        constraint='NonNeg')
219    super(RandomFourierFeatures, self).build(input_shape)
220
221  def call(self, inputs):
222    inputs = ops.convert_to_tensor_v2_with_dispatch(inputs, dtype=self.dtype)
223    inputs = math_ops.cast(inputs, dtypes.float32)
224    kernel = (1.0 / self.kernel_scale) * self.unscaled_kernel
225    outputs = gen_math_ops.MatMul(a=inputs, b=kernel)
226    outputs = nn.bias_add(outputs, self.bias)
227    return gen_math_ops.cos(outputs)
228
229  def compute_output_shape(self, input_shape):
230    input_shape = tensor_shape.TensorShape(input_shape)
231    input_shape = input_shape.with_rank(2)
232    if input_shape.dims[-1].value is None:
233      raise ValueError(
234          'The innermost dimension of input shape must be defined. Given: %s' %
235          input_shape)
236    return input_shape[:-1].concatenate(self.output_dim)
237
238  def get_config(self):
239    kernel_initializer = self.kernel_initializer
240    if not isinstance(kernel_initializer, six.string_types):
241      kernel_initializer = initializers.serialize(kernel_initializer)
242    config = {
243        'output_dim': self.output_dim,
244        'kernel_initializer': kernel_initializer,
245        'scale': self.scale,
246    }
247    base_config = super(RandomFourierFeatures, self).get_config()
248    return dict(list(base_config.items()) + list(config.items()))
249
250
251def _get_random_features_initializer(initializer, shape):
252  """Returns Initializer object for random features."""
253
254  def _get_cauchy_samples(loc, scale, shape):
255    probs = np.random.uniform(low=0., high=1., size=shape)
256    return loc + scale * np.tan(np.pi * (probs - 0.5))
257
258  random_features_initializer = initializer
259  if isinstance(initializer, six.string_types):
260    if initializer.lower() == 'gaussian':
261      random_features_initializer = init_ops.random_normal_initializer(
262          stddev=1.0)
263    elif initializer.lower() == 'laplacian':
264      random_features_initializer = init_ops.constant_initializer(
265          _get_cauchy_samples(loc=0.0, scale=1.0, shape=shape))
266
267    else:
268      raise ValueError(
269          'Unsupported kernel type: \'{}\'. Supported kernel types: {}.'.format(
270              random_features_initializer, _SUPPORTED_RBF_KERNEL_TYPES))
271  return random_features_initializer
272
273
274def _get_default_scale(initializer, input_dim):
275  if (isinstance(initializer, six.string_types) and
276      initializer.lower() == 'gaussian'):
277    return np.sqrt(input_dim / 2.0)
278  return 1.0
279