1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #define EIGEN_USE_THREADS
19 
20 #if GOOGLE_CUDA
21 #define EIGEN_USE_GPU
22 #endif
23 
24 #include "tensorflow/core/kernels/constant_op.h"
25 
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/bounds_check.h"
28 #include "tensorflow/core/framework/node_def.pb.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor.pb.h"
32 #include "tensorflow/core/framework/tensor_shape.h"
33 #include "tensorflow/core/framework/tensor_types.h"
34 #include "tensorflow/core/framework/types.h"
35 #include "tensorflow/core/framework/variant_op_registry.h"
36 #include "tensorflow/core/kernels/fill_functor.h"
37 #include "tensorflow/core/platform/macros.h"
38 
39 #ifdef TENSORFLOW_USE_SYCL
40 #include "tensorflow/core/common_runtime/sycl/sycl_util.h"
41 #endif  // TENSORFLOW_USE_SYCL
42 
43 namespace tensorflow {
44 
45 namespace {
46 
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)47 std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef(
48     OpKernelConstruction* ctx) {
49 #ifndef __ANDROID__
50   DCHECK_EQ(NodeDef::descriptor()->field_count(), 6)
51       << "The NodeDef format has changed, and the attr-stripping code may need "
52       << "to be updated.";
53 #endif
54   const NodeDef& original = ctx->def();
55   NodeDef* ret = new NodeDef;
56   ret->set_name(original.name());
57   ret->set_op(original.op());
58   ret->set_device(original.device());
59   // Strip the "value" attr from the returned NodeDef.
60   // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
61   // attrs that affect the cardinality of list-typed inputs and outputs, so it
62   // is safe to drop other attrs from the NodeDef.
63   AddNodeAttr("dtype", ctx->output_type(0), ret);
64   MergeDebugInfo(original, ret);
65   return std::unique_ptr<const NodeDef>(ret);
66 }
67 
68 }  // namespace
69 
ConstantOp(OpKernelConstruction * ctx)70 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
71     : OpKernel(ctx, StripTensorDataFromNodeDef(ctx)),
72       tensor_(ctx->output_type(0)) {
73   const TensorProto* proto = nullptr;
74   OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
75   OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
76                           *proto, AllocatorAttributes(), &tensor_));
77   OP_REQUIRES(
78       ctx, ctx->output_type(0) == tensor_.dtype(),
79       errors::InvalidArgument("Type mismatch between value (",
80                               DataTypeString(tensor_.dtype()), ") and dtype (",
81                               DataTypeString(ctx->output_type(0)), ")"));
82 }
83 
Compute(OpKernelContext * ctx)84 void ConstantOp::Compute(OpKernelContext* ctx) {
85   ctx->set_output(0, tensor_);
86   if (TF_PREDICT_FALSE(ctx->track_allocations())) {
87     ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
88   }
89 }
90 
~ConstantOp()91 ConstantOp::~ConstantOp() {}
92 
93 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
94 
95 #if GOOGLE_CUDA
96 #define REGISTER_KERNEL(D, TYPE)                                      \
97   REGISTER_KERNEL_BUILDER(                                            \
98       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
99       ConstantOp);
100 REGISTER_KERNEL(GPU, Eigen::half);
101 REGISTER_KERNEL(GPU, bfloat16);
102 REGISTER_KERNEL(GPU, float);
103 REGISTER_KERNEL(GPU, double);
104 REGISTER_KERNEL(GPU, uint8);
105 REGISTER_KERNEL(GPU, int8);
106 REGISTER_KERNEL(GPU, qint8);
107 REGISTER_KERNEL(GPU, uint16);
108 REGISTER_KERNEL(GPU, int16);
109 REGISTER_KERNEL(GPU, qint16);
110 REGISTER_KERNEL(GPU, quint16);
111 REGISTER_KERNEL(GPU, uint32);
112 REGISTER_KERNEL(GPU, qint32);
113 REGISTER_KERNEL(GPU, int64);
114 REGISTER_KERNEL(GPU, uint64);
115 REGISTER_KERNEL(GPU, complex64);
116 REGISTER_KERNEL(GPU, complex128);
117 REGISTER_KERNEL(GPU, bool);
118 REGISTER_KERNEL(GPU, Variant);
119 #undef REGISTER_KERNEL
120 #endif
121 
122 #ifdef TENSORFLOW_USE_SYCL
123 #define REGISTER_SYCL_KERNEL(D, TYPE)                                 \
124   REGISTER_KERNEL_BUILDER(                                            \
125       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
126       ConstantOp);
127 REGISTER_SYCL_KERNEL(SYCL, float);
128 REGISTER_SYCL_KERNEL(SYCL, double);
129 REGISTER_SYCL_KERNEL(SYCL, uint8);
130 REGISTER_SYCL_KERNEL(SYCL, int8);
131 REGISTER_SYCL_KERNEL(SYCL, qint8);
132 REGISTER_SYCL_KERNEL(SYCL, uint16);
133 REGISTER_SYCL_KERNEL(SYCL, int16);
134 REGISTER_SYCL_KERNEL(SYCL, qint16);
135 REGISTER_SYCL_KERNEL(SYCL, quint16);
136 REGISTER_SYCL_KERNEL(SYCL, uint32);
137 REGISTER_SYCL_KERNEL(SYCL, qint32);
138 REGISTER_SYCL_KERNEL(SYCL, int64);
139 REGISTER_SYCL_KERNEL(SYCL, uint64);
140 REGISTER_SYCL_KERNEL(SYCL, bool);
141 #undef REGISTER_SYCL_KERNEL
142 #endif
143 
144 typedef Eigen::ThreadPoolDevice CPUDevice;
145 typedef Eigen::GpuDevice GPUDevice;
146 #ifdef TENSORFLOW_USE_SYCL
147 typedef Eigen::SyclDevice SYCLDevice;
148 #endif  // TENSORFLOW_USE_SYCL
149 
150 template <typename Device, typename T, typename Index>
151 class FillOp : public OpKernel {
152  public:
FillOp(OpKernelConstruction * context)153   explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
154 
Compute(OpKernelContext * context)155   void Compute(OpKernelContext* context) override {
156     const Tensor& Tdims = context->input(0);
157     OP_REQUIRES(context, IsLegacyVector(Tdims.shape()),
158                 errors::InvalidArgument("dims must be a vector, got shape ",
159                                         Tdims.shape().DebugString()));
160     const Tensor& Tvalue = context->input(1);
161     OP_REQUIRES(context, IsLegacyScalar(Tvalue.shape()),
162                 errors::InvalidArgument("value must be a scalar, got shape ",
163                                         Tvalue.shape().DebugString()));
164     auto dims = Tdims.flat<Index>();
165     TensorShape shape;
166     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
167                                 reinterpret_cast<const Index*>(dims.data()),
168                                 dims.size(), &shape));
169     Tensor* out = nullptr;
170     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
171     functor::FillFunctor<Device, T> functor;
172     functor(context->eigen_device<Device>(), out->flat<T>(),
173             Tvalue.scalar<T>());
174   }
175 };
176 
177 #define REGISTER_KERNEL(D, TYPE)                                   \
178   REGISTER_KERNEL_BUILDER(Name("Fill")                             \
179                               .Device(DEVICE_##D)                  \
180                               .TypeConstraint<TYPE>("T")           \
181                               .TypeConstraint<int32>("index_type") \
182                               .HostMemory("dims"),                 \
183                           FillOp<D##Device, TYPE, int32>);         \
184   REGISTER_KERNEL_BUILDER(Name("Fill")                             \
185                               .Device(DEVICE_##D)                  \
186                               .TypeConstraint<TYPE>("T")           \
187                               .TypeConstraint<int64>("index_type") \
188                               .HostMemory("dims"),                 \
189                           FillOp<D##Device, TYPE, int64>);
190 
191 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
192 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
193 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
194 // the conversion from uint8 to quint8.
195 REGISTER_KERNEL(CPU, quint8);
196 REGISTER_KERNEL(CPU, quint16);
197 #undef REGISTER_CPU_KERNEL
198 
199 #ifdef TENSORFLOW_USE_SYCL
200 REGISTER_KERNEL(SYCL, float);
201 REGISTER_KERNEL(SYCL, double);
202 REGISTER_KERNEL(SYCL, uint8);
203 REGISTER_KERNEL(SYCL, int8);
204 REGISTER_KERNEL(SYCL, uint16);
205 REGISTER_KERNEL(SYCL, int16);
206 REGISTER_KERNEL(SYCL, int64);
207 
208 REGISTER_KERNEL_BUILDER(Name("Fill")
209                             .Device(DEVICE_SYCL)
210                             .TypeConstraint<int32>("T")
211                             .TypeConstraint<int32>("index_type")
212                             .HostMemory("dims")
213                             .HostMemory("value")
214                             .HostMemory("output"),
215                         FillOp<CPUDevice, int32, int32>);
216 #undef REGISTER_KERNEL_SYCL
217 #endif  // TENSORFLOW_USE_SYCL
218 
219 #if GOOGLE_CUDA
220 REGISTER_KERNEL(GPU, Eigen::half);
221 REGISTER_KERNEL(GPU, bfloat16);
222 REGISTER_KERNEL(GPU, float);
223 REGISTER_KERNEL(GPU, double);
224 REGISTER_KERNEL(GPU, complex64);
225 REGISTER_KERNEL(GPU, complex128);
226 REGISTER_KERNEL(GPU, uint8);
227 REGISTER_KERNEL(GPU, int8);
228 REGISTER_KERNEL(GPU, uint16);
229 REGISTER_KERNEL(GPU, int16);
230 REGISTER_KERNEL(GPU, int64);
231 REGISTER_KERNEL(GPU, bool);
232 // Currently we do not support filling strings on GPU
233 
234 // A special GPU kernel for int32.
235 // TODO(b/25387198): Also enable int32 in device memory. This kernel
236 // registration requires all int32 inputs and outputs to be in host memory.
237 REGISTER_KERNEL_BUILDER(Name("Fill")
238                             .Device(DEVICE_GPU)
239                             .TypeConstraint<int32>("T")
240                             .TypeConstraint<int32>("index_type")
241                             .HostMemory("dims")
242                             .HostMemory("value")
243                             .HostMemory("output"),
244                         FillOp<CPUDevice, int32, int32>);
245 #endif
246 
247 #undef REGISTER_KERNEL
248 
249 template <typename Device, typename T>
250 class ZerosLikeOp : public OpKernel {
251  public:
ZerosLikeOp(OpKernelConstruction * ctx)252   explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
253 
Compute(OpKernelContext * ctx)254   void Compute(OpKernelContext* ctx) override {
255     const Tensor& input = ctx->input(0);
256     const Device& d = ctx->eigen_device<Device>();
257     if (std::is_same<T, Variant>::value) {
258       OP_REQUIRES(
259           ctx, input.dims() == 0,
260           errors::InvalidArgument("ZerosLike non-scalar Tensor with "
261                                   "dtype=DT_VARIANT is not supported."));
262       const Variant& v = input.scalar<Variant>()();
263       // DT_VARIANT tensors must be allocated on CPU since they wrap C++
264       // objects which can not be efficiently represented in GPU memory.
265       int numa_node = DeviceNumaNode(ctx->device());
266       Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
267       Variant* out_v = &(out.scalar<Variant>()());
268       OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
269                               ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
270       ctx->set_output(0, out);
271     } else {
272       Tensor* out = nullptr;
273       OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
274                               {0}, 0, input.shape(), &out));
275       functor::SetZeroFunctor<Device, T> f;
276       f(d, out->flat<T>());
277     }
278   }
279 };
280 
281 #define REGISTER_KERNEL(type, dev)                                      \
282   REGISTER_KERNEL_BUILDER(                                              \
283       Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
284       ZerosLikeOp<dev##Device, type>)
285 
286 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
287 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
288 REGISTER_CPU(Variant);
289 #undef REGISTER_CPU
290 
291 #ifdef TENSORFLOW_USE_SYCL
292 REGISTER_KERNEL(bool, SYCL);
293 REGISTER_KERNEL(float, SYCL);
294 REGISTER_KERNEL(double, SYCL);
295 REGISTER_KERNEL(int64, SYCL);
296 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
297                             .Device(DEVICE_SYCL)
298                             .TypeConstraint<int32>("T")
299                             .HostMemory("y"),
300                         ZerosLikeOp<CPUDevice, int32>);
301 #endif  // TENSORFLOW_USE_SYCL
302 
303 #if GOOGLE_CUDA
304 REGISTER_KERNEL(bool, GPU);
305 REGISTER_KERNEL(Eigen::half, GPU);
306 REGISTER_KERNEL(bfloat16, GPU);
307 REGISTER_KERNEL(float, GPU);
308 REGISTER_KERNEL(double, GPU);
309 REGISTER_KERNEL(complex64, GPU);
310 REGISTER_KERNEL(complex128, GPU);
311 REGISTER_KERNEL(int64, GPU);
312 REGISTER_KERNEL(Variant, GPU);
313 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
314                             .Device(DEVICE_GPU)
315                             .TypeConstraint<int32>("T")
316                             .HostMemory("y"),
317                         ZerosLikeOp<CPUDevice, int32>);
318 #endif  // GOOGLE_CUDA
319 
320 #undef REGISTER_KERNEL
321 
322 template <typename Device, typename T>
323 class OnesLikeOp : public OpKernel {
324  public:
OnesLikeOp(OpKernelConstruction * ctx)325   explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
326 
Compute(OpKernelContext * ctx)327   void Compute(OpKernelContext* ctx) override {
328     const Tensor& input = ctx->input(0);
329     Tensor* out = nullptr;
330     OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
331                             {0}, 0, input.shape(), &out));
332     functor::SetOneFunctor<Device, T> f;
333     f(ctx->eigen_device<Device>(), out->flat<T>());
334   }
335 };
336 
337 #define REGISTER_KERNEL(type, dev)                                     \
338   REGISTER_KERNEL_BUILDER(                                             \
339       Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
340       OnesLikeOp<dev##Device, type>)
341 
342 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
343 TF_CALL_POD_TYPES(REGISTER_CPU);
344 #undef REGISTER_CPU
345 
346 #ifdef TENSORFLOW_USE_SYCL
347 REGISTER_KERNEL(float, SYCL);
348 REGISTER_KERNEL(bool, SYCL);
349 REGISTER_KERNEL_BUILDER(Name("OnesLike")
350                             .Device(DEVICE_SYCL)
351                             .TypeConstraint<int32>("T")
352                             .HostMemory("y"),
353                         OnesLikeOp<CPUDevice, int32>);
354 #endif  // TENSORFLOW_USE_SYCL
355 
356 #if GOOGLE_CUDA
357 REGISTER_KERNEL(bool, GPU);
358 REGISTER_KERNEL(Eigen::half, GPU);
359 REGISTER_KERNEL(bfloat16, GPU);
360 REGISTER_KERNEL(float, GPU);
361 REGISTER_KERNEL(double, GPU);
362 REGISTER_KERNEL(complex64, GPU);
363 REGISTER_KERNEL(complex128, GPU);
364 REGISTER_KERNEL(int64, GPU);
365 REGISTER_KERNEL_BUILDER(Name("OnesLike")
366                             .Device(DEVICE_GPU)
367                             .TypeConstraint<int32>("T")
368                             .HostMemory("y"),
369                         OnesLikeOp<CPUDevice, int32>);
370 #endif  // GOOGLE_CUDA
371 
372 #undef REGISTER_KERNEL
373 
PlaceholderOp(OpKernelConstruction * ctx)374 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
375   OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
376 }
377 
Compute(OpKernelContext * ctx)378 void PlaceholderOp::Compute(OpKernelContext* ctx) {
379   if (expected_shape_.dims() > 0) {
380     OP_REQUIRES(ctx, false,
381                 errors::InvalidArgument(
382                     "You must feed a value for placeholder tensor '", name(),
383                     "' with dtype ", DataTypeString(output_type(0)),
384                     " and shape ", expected_shape_.DebugString()));
385   } else {
386     OP_REQUIRES(ctx, false,
387                 errors::InvalidArgument(
388                     "You must feed a value for placeholder tensor '", name(),
389                     "' with dtype ", DataTypeString(output_type(0))));
390   }
391 }
392 
393 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
394 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
395                         PlaceholderOp);
396 // The following GPU kernel registration is used to address the situation that
397 // a placeholder is added in a GPU device context and soft placement is false.
398 // Since a placeholder should never be executed, adding these GPU kernels has
399 // no effect on graph execution.
400 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_GPU), PlaceholderOp);
401 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_GPU),
402                         PlaceholderOp);
403 
404 #if TENSORFLOW_USE_SYCL
405 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_SYCL), PlaceholderOp);
406 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_SYCL),
407                         PlaceholderOp);
408 #endif  // TENSORFLOW_USE_SYCL
409 }  // namespace tensorflow
410