1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
17 #define TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
18 
19 #include <tuple>
20 
21 #define EIGEN_STACK_ALLOCATION_LIMIT 0
22 #define EIGEN_USE_THREADS
23 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
24 #include "tensorflow/core/framework/tensor_types.h"
25 #include "tensorflow/core/platform/types.h"
26 
StdRound(float input)27 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) {
28 // On Android, std::round() isn't present, just round().
29 #if defined(__ANDROID__)
30   return round(input);
31 #else
32   return std::round(input);
33 #endif
34 }
35 
36 namespace tensorflow {
37 
38 // Gymnastics with nudged zero point is to ensure that real zero maps to
39 // an integer, which is required for e.g. zero-padding in convolutional layers.
40 // Outputs nudged_min, nudged_max, nudged_scale.
Nudge(const float min,const float max,const int quant_min,const int quant_max,float * nudged_min,float * nudged_max,float * scale)41 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(
42     const float min, const float max, const int quant_min, const int quant_max,
43     float* nudged_min, float* nudged_max, float* scale) {
44   const float quant_min_float = static_cast<float>(quant_min);
45   const float quant_max_float = static_cast<float>(quant_max);
46   *scale = (max - min) / (quant_max_float - quant_min_float);
47   const float zero_point_from_min = quant_min_float - min / *scale;
48   const uint16 nudged_zero_point = [zero_point_from_min, quant_min,
49                                     quant_min_float, quant_max,
50                                     quant_max_float] {
51     if (zero_point_from_min < quant_min_float) {
52       return static_cast<uint16>(quant_min);
53     }
54     if (zero_point_from_min > quant_max_float) {
55       return static_cast<uint16>(quant_max);
56     }
57     return static_cast<uint16>(StdRound(zero_point_from_min));
58   }();
59   *nudged_min = (quant_min_float - nudged_zero_point) * (*scale);
60   *nudged_max = (quant_max_float - nudged_zero_point) * (*scale);
61 }
62 
63 template <typename T>
64 using ConstScalar = typename tensorflow::TTypes<T>::ConstScalar;
65 template <typename T>
66 using Scalar = typename tensorflow::TTypes<T>::Scalar;
67 template <typename T>
68 using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
69 template <typename T>
70 using Vec = typename tensorflow::TTypes<T>::Vec;
71 template <typename T>
72 using ConstFlat = typename tensorflow::TTypes<T>::ConstFlat;
73 template <typename T>
74 using Flat = typename tensorflow::TTypes<T>::Flat;
75 
76 // Functor called by FakeQuantWithMinMaxArgsOp to do the work.  Compiles both
77 // for CPU and GPU.
78 template <typename Device>
79 struct FakeQuantWithMinMaxArgsFunctor {
operatorFakeQuantWithMinMaxArgsFunctor80   void operator()(const Device& d, ConstFlat<float> inputs, const float min,
81                   const float max, const int quant_min, const int quant_max,
82                   Flat<float> outputs) {
83     eigen_assert(min <= 0.0f && "min should be <= 0.0");
84     eigen_assert(max >= 0.0f && "max should be >= 0.0");
85     eigen_assert(min < max && "min should be < max");
86 
87     float nudged_min, nudged_max, nudged_scale;
88     Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
89           &nudged_scale);
90     const float inv_nudged_scale = 1.0f / nudged_scale;
91 
92     auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
93     auto clamped_shifted = clamped - nudged_min;
94     outputs.device(d) =
95         (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale +
96         nudged_min;
97   }
98 };
99 
100 // Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work.  Compiles
101 // both for CPU and GPU.
102 template <typename Device>
103 struct FakeQuantWithMinMaxArgsGradientFunctor {
operatorFakeQuantWithMinMaxArgsGradientFunctor104   void operator()(const Device& d, ConstFlat<float> gradients,
105                   ConstFlat<float> inputs, const float min, const float max,
106                   const int quant_min, const int quant_max,
107                   Flat<float> backprops) {
108     eigen_assert(min <= 0.0f && "min should be <= 0.0");
109     eigen_assert(max >= 0.0f && "max should be >= 0.0");
110     eigen_assert(min < max && "min should be < max");
111 
112     float nudged_min, nudged_max, nudged_scale;
113     Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
114           &nudged_scale);
115 
116     auto between_nudged_min_max =
117         (inputs >= nudged_min && inputs <= nudged_max)
118             .select(inputs.constant(1.0f), inputs.constant(0.0f));
119     backprops.device(d) = gradients * between_nudged_min_max;
120   }
121 };
122 
123 // Functor called by FakeQuantWithMinMaxVarsOp to do the work.  Compiles both
124 // for CPU and GPU.
125 template <typename Device>
126 struct FakeQuantWithMinMaxVarsFunctor {
operatorFakeQuantWithMinMaxVarsFunctor127   void operator()(const Device& d, ConstFlat<float> inputs,
128                   ConstScalar<float> min, ConstScalar<float> max,
129                   const int quant_min, const int quant_max,
130                   Flat<float> outputs) {
131     const float min_val = min();
132     const float max_val = max();
133     // If min and max are both zero, we should just return zero.
134     if (min_val == 0.0f && max_val == 0.0f) {
135       outputs.device(d) = outputs.constant(0.0f);
136       return;
137     }
138     float nudged_min, nudged_max, nudged_scale;
139     Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
140           &nudged_scale);
141     const auto nudged_scale_repl = inputs.constant(nudged_scale);
142 
143     const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
144     const auto clamped_shifted = clamped - nudged_min;
145     outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
146                             nudged_scale_repl +
147                         nudged_min;
148   }
149 };
150 
151 // Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work.  Compiles
152 // both for CPU and GPU.
153 template <typename Device>
154 struct FakeQuantWithMinMaxVarsGradientFunctor {
operatorFakeQuantWithMinMaxVarsGradientFunctor155   void operator()(const Device& d, ConstFlat<float> gradients,
156                   ConstFlat<float> inputs, ConstScalar<float> min,
157                   ConstScalar<float> max, const int quant_min,
158                   const int quant_max, Flat<float> backprops_wrt_input,
159                   Scalar<float> backprop_wrt_min,
160                   Scalar<float> backprop_wrt_max) {
161     const float min_val = min();
162     const float max_val = max();
163     // If min and max are both zero, we propagate everything to inputs.
164     if (min_val == 0.0f && max_val == 0.0f) {
165       backprops_wrt_input.device(d) = gradients;
166       backprop_wrt_min.device(d) = backprop_wrt_min.constant(0.0f);
167       backprop_wrt_max.device(d) = backprop_wrt_max.constant(0.0f);
168       return;
169     }
170     float nudged_min, nudged_max, nudged_scale;
171     Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
172           &nudged_scale);
173 
174     const auto between_min_max =
175         (inputs >= nudged_min && inputs <= nudged_max)
176             .select(inputs.constant(1.0f), inputs.constant(0.0f));
177     backprops_wrt_input.device(d) = gradients * between_min_max;
178 
179     const auto below_min =
180         (inputs < nudged_min)
181             .select(inputs.constant(1.0f), inputs.constant(0.0f));
182     backprop_wrt_min.device(d) = (gradients * below_min).sum();
183 
184     const auto above_max =
185         (inputs > nudged_max)
186             .select(inputs.constant(1.0f), inputs.constant(0.0f));
187     backprop_wrt_max.device(d) = (gradients * above_max).sum();
188   }
189 };
190 
191 using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
192 
193 // Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work.
194 // Compiles both for CPU and GPU.
195 //
196 // Already verified: inputs, outputs are of shape [b, d], min, max are of shape
197 // [d].
198 template <typename Device>
199 struct FakeQuantWithMinMaxVarsPerChannelFunctor {
operatorFakeQuantWithMinMaxVarsPerChannelFunctor200   void operator()(const Device& d, TTypes<float>::ConstMatrix inputs,
201                   ConstVec<float> min, ConstVec<float> max, const int quant_min,
202                   const int quant_max, TTypes<float>::Matrix outputs) {
203     for (Index i = 0; i < min.size(); ++i) {
204       const float min_val = min(i);
205       const float max_val = max(i);
206       // If min and max are both zero, we should just return zero.
207       if (min_val == 0.0f && max_val == 0.0f) {
208         auto chip = outputs.chip<1>(i);
209         chip.device(d) = chip.constant(0.0f);
210         continue;
211       }
212       float nudged_min, nudged_max, nudged_scale;
213       Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
214             &nudged_scale);
215       const auto clamped =
216           inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
217       const auto clamped_shifted = clamped - nudged_min;
218 
219       outputs.chip<1>(i).device(d) =
220           (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
221           nudged_min;
222     }
223   }
224 };
225 
226 // Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work.
227 // Compiles both for CPU and GPU.
228 //
229 // Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d],
230 // min, max, backprop_wrt_min, backprop_wrt_max are of shape [d].
231 template <typename Device>
232 struct FakeQuantWithMinMaxVarsPerChannelGradientFunctor {
operatorFakeQuantWithMinMaxVarsPerChannelGradientFunctor233   void operator()(const Device& d, TTypes<float>::ConstMatrix gradients,
234                   TTypes<float>::ConstMatrix inputs, ConstVec<float> min,
235                   ConstVec<float> max, const int quant_min, const int quant_max,
236                   TTypes<float>::Matrix backprops_wrt_input,
237                   Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
238     for (Index i = 0; i < min.size(); ++i) {
239       const float min_val = min(i);
240       const float max_val = max(i);
241       const auto gradients_chip = gradients.chip<1>(i);
242       const auto inputs_chip = inputs.chip<1>(i);
243       // If min and max are both zero, we propagate everything to inputs.
244       if (min_val == 0.0f && max_val == 0.0f) {
245         backprops_wrt_input.chip<1>(i).device(d) = gradients_chip;
246         auto min_chip = backprop_wrt_min.chip<0>(i);
247         auto max_chip = backprop_wrt_max.chip<0>(i);
248         min_chip.device(d) = min_chip.constant(0.0f);
249         max_chip.device(d) = max_chip.constant(0.0f);
250         continue;
251       }
252       float nudged_min, nudged_max, nudged_scale;
253       Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
254             &nudged_scale);
255 
256       const auto between_min_max =
257           (inputs_chip >= nudged_min && inputs_chip <= nudged_max)
258               .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
259       backprops_wrt_input.chip<1>(i).device(d) =
260           gradients_chip * between_min_max;
261 
262       const auto below_min =
263           (inputs_chip < nudged_min)
264               .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
265       Eigen::DSizes<Index, 1> reduce(0);
266       backprop_wrt_min.chip<0>(i).device(d) =
267           (gradients_chip * below_min).sum(reduce);
268 
269       const auto above_max =
270           (inputs_chip > nudged_max)
271               .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
272       backprop_wrt_max.chip<0>(i).device(d) =
273           (gradients_chip * above_max).sum(reduce);
274     }
275   }
276 };
277 
278 }  // namespace tensorflow
279 
280 #endif  // TENSORFLOW_CORE_KERNELS_FAKE_QUANT_OPS_FUNCTOR_H_
281