1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
17 #define TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
18 #ifdef INTEL_MKL
19 
20 #include "absl/base/call_once.h"
21 #include "tensorflow/core/framework/op_kernel.h"
22 #include "tensorflow/core/framework/types.pb.h"
23 #include "tensorflow/core/graph/graph.h"
24 #include "tensorflow/core/lib/core/status.h"
25 #include "tensorflow/core/platform/cpu_info.h"
26 #include "tensorflow/core/util/env_var.h"
27 
28 namespace tensorflow {
29 // Since our ops are going to produce and also consume N addition tensors
30 // (Mkl) for N Tensorflow tensors, we can have following different
31 // orderings among these 2N tensors.
32 //
33 // E.g., for Tensorflow tensors A, B, and C, our ops will produce and
34 // consume A_m, B_m, and C_m additionally.
35 //
36 // INTERLEAVED: in this case 2N tensors are interleaved. So for above
37 //              example, the ordering looks like: A, A_m, B, B_m, C, C_m.
38 //
39 // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
40 //             by N Mkl tensors. So for above example, the ordering looks
41 //             like: A, B, C, A_m, B_m, C_m
42 //
43 // Following APIs map index of original Tensorflow tensors to their
44 // appropriate position based on selected ordering. For contiguous ordering,
45 // we need to know the total number of tensors (parameter total).
46 //
47 typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
48 // NOTE: Currently, we use contiguous ordering. If you change this, then you
49 // would need to change Mkl op definitions in nn_ops.cc.
50 static const MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
51 
52 // Get index of MetaData tensor from index 'n' of Data tensor.
DataIndexToMetaDataIndex(int n,int total_tensors)53 inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
54   if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
55     // For interleaved ordering, Mkl tensor follows immediately after
56     // Tensorflow tensor.
57     return n + 1;
58   } else {
59     CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
60     // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
61     return n + total_tensors / 2;
62   }
63 }
64 
GetTensorDataIndex(int n,int total_tensors)65 int inline GetTensorDataIndex(int n, int total_tensors) {
66   if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
67     return 2 * n;  // index corresponding to nth input/output tensor
68   } else {
69     CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
70     return n;
71   }
72 }
73 
GetTensorMetaDataIndex(int n,int total_tensors)74 int inline GetTensorMetaDataIndex(int n, int total_tensors) {
75   // Get index for TensorData first and then use mapping function
76   // to get TensorMetaData index from TensorData index.
77   int tidx = GetTensorDataIndex(n, total_tensors);
78   return DataIndexToMetaDataIndex(tidx, total_tensors);
79 }
80 
81 // check if the control between src and dst nodes already exists
DoesControlEdgeExist(const Node * src,const Node * dst)82 bool inline DoesControlEdgeExist(const Node* src, const Node* dst) {
83   for (const Edge* edge : src->out_edges()) {
84     if (edge->IsControlEdge() && edge->dst() == dst) {
85       return true;
86     }
87   }
88   return false;
89 }
90 
91 // Check if graph should run in layout-dependent mode or native format mode
92 // based on environment variable setting. User can set
93 // TF_ENABLE_MKL_NATIVE_FORMAT=1 to enable the native format mode.
NativeFormatEnabled()94 bool inline NativeFormatEnabled() {
95   static bool native_fmt_enabled = false;
96   static absl::once_flag once;
97   absl::call_once(once, [&] {
98     TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MKL_NATIVE_FORMAT",
99                                    /*default_value*/ false,
100                                    &native_fmt_enabled));
101   });
102   return native_fmt_enabled;
103 }
104 
105 // Check if the data_format attribute in the node def represents 5D tensor
Check5DFormat(const NodeDef & ndef)106 bool inline Check5DFormat(const NodeDef& ndef) {
107   string data_format;
108   TF_CHECK_OK(GetNodeAttr(ndef, "data_format", &data_format));
109   if (data_format.compare("NCDHW") == 0 || data_format.compare("NDHWC") == 0) {
110     return true;
111   }
112   return false;
113 }
114 
115 namespace mkl_op_registry {
116 // MKL operators whose kernels are registered with 'MklLayoutDependentOp' label
117 // (e.g., MklConv2D) understand input tensors in MKL layout. These operators
118 // get additional meta-tensors for actual input tensors.
119 static const char* kMklLayoutDependentOpLabel = "MklLayoutDependentOp";
120 static const char* kMklLayoutDependentOpLabelPattern =
121     "label='MklLayoutDependentOp'";
122 // MKL operators whose kernels are registered with 'MklNameChangeOp' label
123 // (e.g., MklMatMul, MklTranspose) do not understand input tensors in MKL
124 // layout. These operators do not get additional meta-tensors. The signatures of
125 // these operators are the same as the original TensorFlow operators that they
126 // correspond to. So these ops just go through a name change during graph
127 // rewrite pass.
128 static const char* kMklNameChangeOpLabel = "MklNameChangeOp";
129 static const char* kMklNameChangeOpLabelPattern = "label='MklNameChangeOp'";
130 static const char* kMklQuantizedOpLabel = "QuantizedMklOp";
131 static const char* kMklQuantizedOpLabelPattern = "label='QuantizedMklOp'";
132 
133 // Prefix that we add to Tensorflow op name to construct Mkl op name.
134 static const char* const kMklOpPrefix = "_Mkl";
135 // TODO(intel-tf): PR review feedback (penpornk)
136 // Can we add eager_mode (or is_eager) as an op attribute instead?
137 // This way we don't need to rename the op just to pass eager_mode
138 // through template parameter.
139 static const char* const kMklEagerOpPrefix = "_MklEager";
140 
141 // Prefix that we add to TF op name to construct MKL op that does not
142 // depend on layout propagation. It will be used in both Eager and graph
143 // modes unless there is a reason to have additional op name with
144 // _MklEager prefix.
145 static const char* const kMklNativeOpPrefix = "_MklNative";
146 
147 // Get the name of Mkl Native (does not depend on layout propagation) op
148 // from original TensorFlow op.
GetMklNativeOpName(const string & name)149 inline string GetMklNativeOpName(const string& name) {
150   // There are few operators that don't depend on layout propagation but are
151   // prefixed with _Mkl instead of _MklNative.
152   bool result =
153       (0 == name.compare("ConjugateTranspose") ||
154        0 == name.compare("BatchMatMul") || 0 == name.compare("BatchMatMulV2") ||
155        0 == name.compare("MatMul") || 0 == name.compare("Transpose"));
156   if (result) {
157     return string(kMklOpPrefix) + name;
158   } else {
159     return string(kMklNativeOpPrefix) + name;
160   }
161 }
162 
163 // Get the name of Mkl op from original TensorFlow op
164 // We prefix the original op with _Mkl or _MklNative to get Mkl op.
GetMklOpName(const string & name)165 inline string GetMklOpName(const string& name) {
166   if (!NativeFormatEnabled()) {
167     return string(kMklOpPrefix) + name;
168   } else {
169     return GetMklNativeOpName(name);
170   }
171 }
172 
173 // Get the name of Mkl Eager op from original TensorFlow op
174 // We prefix 'MklEager' to the original op to get Mkl Eager op.
GetMklEagerOpName(const string & name)175 inline string GetMklEagerOpName(const string& name) {
176   return string(kMklEagerOpPrefix) + name;
177 }
178 
179 #ifdef ENABLE_INTEL_MKL_BFLOAT16
IsBF16SupportedByOneDNNOnThisCPU()180 static inline bool IsBF16SupportedByOneDNNOnThisCPU() {
181   return port::TestCPUFeature(port::CPUFeature::AVX512F);
182 }
183 #endif
184 
BF16UnsupportedWarning()185 static inline void BF16UnsupportedWarning() {
186   static absl::once_flag cpu_bfloat16_warn_once_flag;
187   absl::call_once(cpu_bfloat16_warn_once_flag, [] {
188     LOG(ERROR) << "oneDNN BFloat16 support are only on platforms with AVX512. "
189                   "Falling back to default implementation if present.";
190   });
191 }
192 
193 // Check whether opname with type T is registered as MKL operator
194 // that can accept input tensors in MKL layout.
195 //
196 // @input: name of the op
197 // @input: T datatype to be used for checking op
198 // @return: true if opname is registered as Mkl-layout dependent op;
199 // false otherwise
IsMklLayoutDependentOp(const string & op_name,DataType T)200 static inline bool IsMklLayoutDependentOp(const string& op_name, DataType T) {
201   string kernel = KernelsRegisteredForOp(op_name);
202 
203   // Restrict quantized ops to QUINT8 and QINT8 for now
204   if (kernel.find(kMklQuantizedOpLabelPattern) != string::npos) {
205     return (T == DT_QUINT8 || T == DT_QINT8 || T == DT_QINT32);
206   }
207 #ifdef ENABLE_INTEL_MKL_BFLOAT16
208   // Restrict regular ops to FLOAT and BFLOAT16
209   if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) {
210     if (T == DT_FLOAT) return true;
211     if (T == DT_BFLOAT16) {
212       if (IsBF16SupportedByOneDNNOnThisCPU()) {
213         return true;
214       } else {
215         // Restrict bfloat16 ops to platforms with at least AVX512 support, fall
216         // back to Eigen implementation otherwise.
217         BF16UnsupportedWarning();
218         return false;
219       }
220     }
221     return false;
222   }
223 #else
224   // Restrict regular ops to FLOAT
225   if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) {
226     return (T == DT_FLOAT);
227   }
228 #endif  // ENABLE_INTEL_MKL_BFLOAT16
229   return false;
230 }
231 
232 // TODO(mdfaijul): QuantizedConv2D is registered with input: QUINT8
233 // filter:QINT8 for mkldnn integration. First a dummy kernel is created
234 // and then it is replaced by an actual kernel.
IsMklLayoutDependentOp(const string & op_name,DataType Tinput,DataType Tfilter)235 static inline bool IsMklLayoutDependentOp(const string& op_name,
236                                           DataType Tinput, DataType Tfilter) {
237   string kernel = KernelsRegisteredForOp(op_name);
238 
239   // Restrict quantized ops to QUINT8 and QINT8 for now
240   if (kernel.find(kMklQuantizedOpLabelPattern) != string::npos) {
241     return (Tfilter == DT_QINT8);
242   }
243   return false;
244 }
245 
246 // Check whether opname with type T is registered as an MKL operator that
247 // will go through name change.
248 //
249 // @input: name of the op
250 // @input: T datatype to be used for checking op
251 // @return: true if opname is registered as MKL op that will go through name
252 // change; false otherwise
IsMklNameChangeOp(const string & op_name,DataType T)253 static inline bool IsMklNameChangeOp(const string& op_name, DataType T) {
254   string kernel = KernelsRegisteredForOp(op_name);
255   // String returned by KernelsRegisteredForOp looks like below:
256   //
257   // Op = _MklMatMul, kernels =
258   // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX128]
259   // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX64]
260   // device='CPU'; label='MklNameChangeOp'; T in [DT_DOUBLE]
261   // device='CPU'; label='MklNameChangeOp'; T in [DT_FLOAT]
262 
263   // Now we just construct a search string to match what we are looking for.
264   string search_string = kMklNameChangeOpLabelPattern;
265   search_string += string(";") + string(" T in [");
266   search_string += DataType_Name(T) + string("]");
267 
268   // Temporarily replacing earlier check by adding a type-specific check so
269   // that we can selectively decide which type is supported by MKL operators.
270   // That way kernel registration does not decide which operators we support.
271   // We are using this change to temporarily disable BFLOAT16 support. Once
272   // we want to enable it, we will go back to earlier check.
273   bool isTypeAllowed = false;
274   if (kernel.find(search_string) != string::npos) {
275     isTypeAllowed = (T == DT_COMPLEX128 || T == DT_COMPLEX64 ||
276                      T == DT_DOUBLE || T == DT_FLOAT);
277 #ifdef ENABLE_INTEL_MKL_BFLOAT16
278     if (!isTypeAllowed) {
279       if (T == DT_BFLOAT16) {
280         if (IsBF16SupportedByOneDNNOnThisCPU()) {
281           isTypeAllowed = true;
282         } else {
283           // Restrict bfloat16 ops to platforms with at least AVX512 support,
284           // fall back to Eigen implementation otherwise.
285           BF16UnsupportedWarning();
286           isTypeAllowed = false;
287         }
288       }
289     }
290 #endif
291     return isTypeAllowed;
292   }
293 
294   return false;
295 }
296 
297 // Check if the operator with 'op_name' and type 'T' is an MKL operator that
298 // will either understand input tensors in MKL layout or will go through name
299 // rewrite that some operators go through.
IsMklOp(const string & op_name,DataType T)300 static inline bool IsMklOp(const string& op_name, DataType T) {
301   return IsMklLayoutDependentOp(op_name, T) || IsMklNameChangeOp(op_name, T);
302 }
303 
IsMklOp(const Node * n)304 static inline bool IsMklOp(const Node* n) {
305   DataType T;
306   return GetNodeAttr(n->def(), "T", &T).ok() && IsMklOp(n->type_string(), T);
307 }
308 
309 // Check whether opname with type T is registered as MKL-compliant and
310 // is element-wise.
311 //
312 // @input: name of the op
313 // @input: T datatype to be used for checking op
314 // @return: true if opname is registered as element-wise Mkl op;
315 // false otherwise
IsMklElementWiseOp(const string & op_name,DataType T)316 static inline bool IsMklElementWiseOp(const string& op_name, DataType T) {
317   if (!IsMklOp(op_name, T)) {
318     return false;
319   }
320   bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
321                  0 == op_name.compare(GetMklOpName("AddV2")) ||
322                  0 == op_name.compare(GetMklOpName("Sub")) ||
323                  0 == op_name.compare(GetMklOpName("Mul")) ||
324                  0 == op_name.compare(GetMklOpName("Maximum")) ||
325                  0 == op_name.compare(GetMklOpName("SquaredDifference")));
326 
327   return result;
328 }
329 }  // namespace mkl_op_registry
330 }  // namespace tensorflow
331 #endif  // INTEL_MKL
332 #endif  // TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
333