1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_FUSED_BATCH_NORM_OP_H_
17 #define TENSORFLOW_CORE_KERNELS_FUSED_BATCH_NORM_OP_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 #include "tensorflow/core/framework/op_kernel.h"
21 #include "tensorflow/core/framework/tensor.h"
22 #include "tensorflow/core/framework/tensor_types.h"
23 #include "tensorflow/core/util/tensor_format.h"
24 
25 namespace tensorflow {
26 namespace functor {
27 
28 // FusedBatchNormEx op supports side inputs and activations:
29 //   (1) batch_norm + activation
30 //   (2) batch norm + side input + activation
31 enum class FusedBatchNormActivationMode { kIdentity, kRelu };
32 
33 std::string ToString(FusedBatchNormActivationMode activation_mode);
34 
35 Status ParseActivationMode(OpKernelConstruction* context,
36                            FusedBatchNormActivationMode* activation_mode);
37 
38 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
39 
40 // This function sets a GPU tensor to NaNs.
41 template <class T>
42 struct SetNanFunctor {
43   void operator()(const Eigen::GpuDevice& d, typename TTypes<T>::Flat out);
44 };
45 
46 // This is a functor to launch custom CUDA kernel for FusedBatchNorm with side
47 // input and activation when 'is_training=False'. In training we rely on cuDNN.
48 template <typename Device, typename T, typename U>
49 struct FusedBatchNormInferenceFunctor {
50   void operator()(OpKernelContext* context, TensorFormat tensor_format,
51                   typename TTypes<T, 4>::ConstTensor in,
52                   typename TTypes<U>::ConstVec scale,
53                   typename TTypes<U>::ConstVec offset,
54                   typename TTypes<U>::ConstVec estimated_mean,
55                   typename TTypes<U>::ConstVec estimated_variance,
56                   typename TTypes<T, 4>::ConstTensor side_input, U epsilon,
57                   FusedBatchNormActivationMode activation_mode,
58                   typename TTypes<T, 4>::Tensor out);
59 };
60 
61 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
62 
63 // Functor used by FusedBatchNormGradOp to do the computations when
64 // is_training=False.
65 template <typename Device, typename T, typename U>
66 struct FusedBatchNormFreezeGrad {
operatorFusedBatchNormFreezeGrad67   void operator()(OpKernelContext* context, const Tensor& y_backprop_input,
68                   const Tensor& x_input, const Tensor& scale_input,
69                   const Tensor& pop_mean_input,
70                   const Tensor& pop_variance_input, U epsilon,
71                   Tensor* x_backprop_output, Tensor* scale_backprop_output,
72                   Tensor* offset_backprop_output) {}
73 };
74 
75 }  // namespace functor
76 }  // namespace tensorflow
77 
78 #endif  // TENSORFLOW_CORE_KERNELS_FUSED_BATCH_NORM_OP_H_
79