• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Approximate kernel mapper for RBF kernel based on Random Fourier Features."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import math
22
23import numpy as np
24
25from tensorflow.contrib.kernel_methods.python.mappers import dense_kernel_mapper as dkm
26from tensorflow.python.framework import constant_op
27from tensorflow.python.framework import dtypes
28from tensorflow.python.ops import math_ops
29
30
31# TODO(sibyl-vie3Poto,felixyu): add an option to control whether the parameters in the
32# kernel map are trainable.
33class RandomFourierFeatureMapper(dkm.DenseKernelMapper):
34  r"""Class that implements Random Fourier Feature Mapping (RFFM) in TensorFlow.
35
36  The RFFM mapping is used to approximate the Gaussian (RBF) kernel:
37  $$(exp(-||x-y||_2^2 / (2 * \sigma^2))$$
38
39  The implementation of RFFM is based on the following paper:
40  "Random Features for Large-Scale Kernel Machines" by Ali Rahimi and Ben Recht.
41  (link: https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf)
42
43  The mapping uses a matrix \\(\Omega \in R^{d x D}\\) and a bias vector
44  \\(b \in R^D\\) where \\(d\\) is the input dimension (number of dense input
45  features) and \\(D\\) is the output dimension (i.e., dimension of the feature
46  space the input is mapped to). Each entry of \\(\Omega\\) is sampled i.i.d.
47  from a (scaled) Gaussian distribution and each entry of \\(b\\) is sampled
48  independently and uniformly from [0, \\(2 * \pi\\)].
49
50  For a single input feature vector \\(x \in R^d\\), its RFFM is defined as:
51  $$\sqrt(2/D) * cos(x * \Omega + b)$$
52
53  where \\(cos\\) is the element-wise cosine function and \\(x, b\\) are
54  represented as row vectors. The aforementioned paper shows that the linear
55  kernel of RFFM-mapped vectors approximates the Gaussian kernel of the initial
56  vectors.
57
58  """
59
60  def __init__(self, input_dim, output_dim, stddev=1.0, seed=1, name=None):
61    r"""Constructs a RandomFourierFeatureMapper instance.
62
63    Args:
64      input_dim: The dimension (number of features) of the tensors to be mapped.
65      output_dim: The output dimension of the mapping.
66      stddev: The standard deviation of the Gaussian kernel to be approximated.
67        The error of the classifier trained using this approximation is very
68        sensitive to this parameter.
69      seed: An integer used to initialize the parameters (\\(\Omega\\) and
70        \\(b\\)) of the mapper. For repeatable sequences across different
71        invocations of the mapper object (for instance, to ensure consistent
72        mapping both at training and eval/inference if these happen in
73        different invocations), set this to the same integer.
74      name: name for the mapper object.
75    """
76    # TODO(sibyl-vie3Poto): Maybe infer input_dim and/or output_dim (if not explicitly
77    # provided). input_dim can be inferred lazily, the first time map is called.
78    # output_dim can be inferred from input_dim using heuristics on the error of
79    # the approximation (and, by extension, the error of the classification
80    # based on the approximation).
81    self._input_dim = input_dim
82    self._output_dim = output_dim
83    self._stddev = stddev
84    self._seed = seed
85    self._name = name
86
87  @property
88  def name(self):
89    """Returns a name for the `RandomFourierFeatureMapper` instance.
90
91    If the name provided in the constructor is `None`, then the object's unique
92    id is returned.
93
94    Returns:
95      A name for the `RandomFourierFeatureMapper` instance.
96    """
97    return self._name or str(id(self))
98
99  @property
100  def input_dim(self):
101    return self._input_dim
102
103  @property
104  def output_dim(self):
105    return self._output_dim
106
107  def map(self, input_tensor):
108    """Maps each row of input_tensor using random Fourier features.
109
110    Args:
111      input_tensor: a `Tensor` containing input features. It's shape is
112      [batch_size, self._input_dim].
113
114    Returns:
115      A `Tensor` of shape [batch_size, self._output_dim] containing RFFM-mapped
116      features.
117
118    Raises:
119      InvalidShapeError: if the shape of the `input_tensor` is inconsistent with
120        expected input dimension.
121    """
122    input_tensor_shape = input_tensor.get_shape()
123    if len(input_tensor_shape) != 2:
124      raise dkm.InvalidShapeError(
125          'The shape of the tensor should be 2. Got %d instead.' %
126          len(input_tensor_shape))
127
128    features_dim = input_tensor_shape[1]
129    if features_dim != self._input_dim:
130      raise dkm.InvalidShapeError(
131          'Invalid dimension: expected %d input features, got %d instead.' %
132          (self._input_dim, features_dim))
133
134    # Add ops that compute (deterministically) omega_matrix and bias based on
135    # the provided seed.
136    # TODO(sibyl-vie3Poto): Storing the mapper's parameters (omega_matrix and bias) as
137    # constants incurs no RPC calls to the parameter server during distributed
138    # training. However, if the parameters grow too large (for instance if they
139    # don't fit into memory or if they blow up the size of the GraphDef proto),
140    # stroring them as constants is no longer an option. In this case, we should
141    # have a heuristic to choose out of one of the following alternatives:
142    # a) store them as variables (in the parameter server)
143    # b) store them as worker local variables
144    # c) generating on the fly the omega matrix at each step
145    np.random.seed(self._seed)
146    omega_matrix_shape = [self._input_dim, self._output_dim]
147    bias_shape = [self._output_dim]
148
149    omega_matrix = constant_op.constant(
150        np.random.normal(
151            scale=1.0 / self._stddev, size=omega_matrix_shape),
152        dtype=dtypes.float32)
153    bias = constant_op.constant(
154        np.random.uniform(
155            low=0.0, high=2 * np.pi, size=bias_shape),
156        dtype=dtypes.float32)
157
158    x_omega_plus_bias = math_ops.add(
159        math_ops.matmul(input_tensor, omega_matrix), bias)
160    return math.sqrt(2.0 / self._output_dim) * math_ops.cos(x_omega_plus_bias)
161