1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // See docs in ../ops/array_ops.cc.
17
18 #define EIGEN_USE_THREADS
19
20 #if GOOGLE_CUDA
21 #define EIGEN_USE_GPU
22 #endif
23
24 #include "tensorflow/core/kernels/constant_op.h"
25
26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
27 #include "tensorflow/core/framework/bounds_check.h"
28 #include "tensorflow/core/framework/node_def.pb.h"
29 #include "tensorflow/core/framework/register_types.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor.pb.h"
32 #include "tensorflow/core/framework/tensor_shape.h"
33 #include "tensorflow/core/framework/tensor_types.h"
34 #include "tensorflow/core/framework/types.h"
35 #include "tensorflow/core/framework/variant_op_registry.h"
36 #include "tensorflow/core/kernels/fill_functor.h"
37 #include "tensorflow/core/platform/macros.h"
38
39 #ifdef TENSORFLOW_USE_SYCL
40 #include "tensorflow/core/common_runtime/sycl/sycl_util.h"
41 #endif // TENSORFLOW_USE_SYCL
42
43 namespace tensorflow {
44
45 namespace {
46
StripTensorDataFromNodeDef(OpKernelConstruction * ctx)47 std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef(
48 OpKernelConstruction* ctx) {
49 #ifndef __ANDROID__
50 DCHECK_EQ(NodeDef::descriptor()->field_count(), 6)
51 << "The NodeDef format has changed, and the attr-stripping code may need "
52 << "to be updated.";
53 #endif
54 const NodeDef& original = ctx->def();
55 NodeDef* ret = new NodeDef;
56 ret->set_name(original.name());
57 ret->set_op(original.op());
58 ret->set_device(original.device());
59 // Strip the "value" attr from the returned NodeDef.
60 // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
61 // attrs that affect the cardinality of list-typed inputs and outputs, so it
62 // is safe to drop other attrs from the NodeDef.
63 AddNodeAttr("dtype", ctx->output_type(0), ret);
64 MergeDebugInfo(original, ret);
65 return std::unique_ptr<const NodeDef>(ret);
66 }
67
68 } // namespace
69
ConstantOp(OpKernelConstruction * ctx)70 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
71 : OpKernel(ctx, StripTensorDataFromNodeDef(ctx)),
72 tensor_(ctx->output_type(0)) {
73 const TensorProto* proto = nullptr;
74 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
75 OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
76 *proto, AllocatorAttributes(), &tensor_));
77 OP_REQUIRES(
78 ctx, ctx->output_type(0) == tensor_.dtype(),
79 errors::InvalidArgument("Type mismatch between value (",
80 DataTypeString(tensor_.dtype()), ") and dtype (",
81 DataTypeString(ctx->output_type(0)), ")"));
82 }
83
Compute(OpKernelContext * ctx)84 void ConstantOp::Compute(OpKernelContext* ctx) {
85 ctx->set_output(0, tensor_);
86 if (TF_PREDICT_FALSE(ctx->track_allocations())) {
87 ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
88 }
89 }
90
~ConstantOp()91 ConstantOp::~ConstantOp() {}
92
93 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
94
95 #if GOOGLE_CUDA
96 #define REGISTER_KERNEL(D, TYPE) \
97 REGISTER_KERNEL_BUILDER( \
98 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
99 ConstantOp);
100 REGISTER_KERNEL(GPU, Eigen::half);
101 REGISTER_KERNEL(GPU, bfloat16);
102 REGISTER_KERNEL(GPU, float);
103 REGISTER_KERNEL(GPU, double);
104 REGISTER_KERNEL(GPU, uint8);
105 REGISTER_KERNEL(GPU, int8);
106 REGISTER_KERNEL(GPU, qint8);
107 REGISTER_KERNEL(GPU, uint16);
108 REGISTER_KERNEL(GPU, int16);
109 REGISTER_KERNEL(GPU, qint16);
110 REGISTER_KERNEL(GPU, quint16);
111 REGISTER_KERNEL(GPU, uint32);
112 REGISTER_KERNEL(GPU, qint32);
113 REGISTER_KERNEL(GPU, int64);
114 REGISTER_KERNEL(GPU, uint64);
115 REGISTER_KERNEL(GPU, complex64);
116 REGISTER_KERNEL(GPU, complex128);
117 REGISTER_KERNEL(GPU, bool);
118 REGISTER_KERNEL(GPU, Variant);
119 #undef REGISTER_KERNEL
120 #endif
121
122 #ifdef TENSORFLOW_USE_SYCL
123 #define REGISTER_SYCL_KERNEL(D, TYPE) \
124 REGISTER_KERNEL_BUILDER( \
125 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
126 ConstantOp);
127 REGISTER_SYCL_KERNEL(SYCL, float);
128 REGISTER_SYCL_KERNEL(SYCL, double);
129 REGISTER_SYCL_KERNEL(SYCL, uint8);
130 REGISTER_SYCL_KERNEL(SYCL, int8);
131 REGISTER_SYCL_KERNEL(SYCL, qint8);
132 REGISTER_SYCL_KERNEL(SYCL, uint16);
133 REGISTER_SYCL_KERNEL(SYCL, int16);
134 REGISTER_SYCL_KERNEL(SYCL, qint16);
135 REGISTER_SYCL_KERNEL(SYCL, quint16);
136 REGISTER_SYCL_KERNEL(SYCL, uint32);
137 REGISTER_SYCL_KERNEL(SYCL, qint32);
138 REGISTER_SYCL_KERNEL(SYCL, int64);
139 REGISTER_SYCL_KERNEL(SYCL, uint64);
140 REGISTER_SYCL_KERNEL(SYCL, bool);
141 #undef REGISTER_SYCL_KERNEL
142 #endif
143
144 typedef Eigen::ThreadPoolDevice CPUDevice;
145 typedef Eigen::GpuDevice GPUDevice;
146 #ifdef TENSORFLOW_USE_SYCL
147 typedef Eigen::SyclDevice SYCLDevice;
148 #endif // TENSORFLOW_USE_SYCL
149
150 template <typename Device, typename T, typename Index>
151 class FillOp : public OpKernel {
152 public:
FillOp(OpKernelConstruction * context)153 explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
154
Compute(OpKernelContext * context)155 void Compute(OpKernelContext* context) override {
156 const Tensor& Tdims = context->input(0);
157 OP_REQUIRES(context, IsLegacyVector(Tdims.shape()),
158 errors::InvalidArgument("dims must be a vector, got shape ",
159 Tdims.shape().DebugString()));
160 const Tensor& Tvalue = context->input(1);
161 OP_REQUIRES(context, IsLegacyScalar(Tvalue.shape()),
162 errors::InvalidArgument("value must be a scalar, got shape ",
163 Tvalue.shape().DebugString()));
164 auto dims = Tdims.flat<Index>();
165 TensorShape shape;
166 OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
167 reinterpret_cast<const Index*>(dims.data()),
168 dims.size(), &shape));
169 Tensor* out = nullptr;
170 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
171 functor::FillFunctor<Device, T> functor;
172 functor(context->eigen_device<Device>(), out->flat<T>(),
173 Tvalue.scalar<T>());
174 }
175 };
176
177 #define REGISTER_KERNEL(D, TYPE) \
178 REGISTER_KERNEL_BUILDER(Name("Fill") \
179 .Device(DEVICE_##D) \
180 .TypeConstraint<TYPE>("T") \
181 .TypeConstraint<int32>("index_type") \
182 .HostMemory("dims"), \
183 FillOp<D##Device, TYPE, int32>); \
184 REGISTER_KERNEL_BUILDER(Name("Fill") \
185 .Device(DEVICE_##D) \
186 .TypeConstraint<TYPE>("T") \
187 .TypeConstraint<int64>("index_type") \
188 .HostMemory("dims"), \
189 FillOp<D##Device, TYPE, int64>);
190
191 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
192 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
193 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
194 // the conversion from uint8 to quint8.
195 REGISTER_KERNEL(CPU, quint8);
196 REGISTER_KERNEL(CPU, quint16);
197 #undef REGISTER_CPU_KERNEL
198
199 #ifdef TENSORFLOW_USE_SYCL
200 REGISTER_KERNEL(SYCL, float);
201 REGISTER_KERNEL(SYCL, double);
202 REGISTER_KERNEL(SYCL, uint8);
203 REGISTER_KERNEL(SYCL, int8);
204 REGISTER_KERNEL(SYCL, uint16);
205 REGISTER_KERNEL(SYCL, int16);
206 REGISTER_KERNEL(SYCL, int64);
207
208 REGISTER_KERNEL_BUILDER(Name("Fill")
209 .Device(DEVICE_SYCL)
210 .TypeConstraint<int32>("T")
211 .TypeConstraint<int32>("index_type")
212 .HostMemory("dims")
213 .HostMemory("value")
214 .HostMemory("output"),
215 FillOp<CPUDevice, int32, int32>);
216 #undef REGISTER_KERNEL_SYCL
217 #endif // TENSORFLOW_USE_SYCL
218
219 #if GOOGLE_CUDA
220 REGISTER_KERNEL(GPU, Eigen::half);
221 REGISTER_KERNEL(GPU, bfloat16);
222 REGISTER_KERNEL(GPU, float);
223 REGISTER_KERNEL(GPU, double);
224 REGISTER_KERNEL(GPU, complex64);
225 REGISTER_KERNEL(GPU, complex128);
226 REGISTER_KERNEL(GPU, uint8);
227 REGISTER_KERNEL(GPU, int8);
228 REGISTER_KERNEL(GPU, uint16);
229 REGISTER_KERNEL(GPU, int16);
230 REGISTER_KERNEL(GPU, int64);
231 REGISTER_KERNEL(GPU, bool);
232 // Currently we do not support filling strings on GPU
233
234 // A special GPU kernel for int32.
235 // TODO(b/25387198): Also enable int32 in device memory. This kernel
236 // registration requires all int32 inputs and outputs to be in host memory.
237 REGISTER_KERNEL_BUILDER(Name("Fill")
238 .Device(DEVICE_GPU)
239 .TypeConstraint<int32>("T")
240 .TypeConstraint<int32>("index_type")
241 .HostMemory("dims")
242 .HostMemory("value")
243 .HostMemory("output"),
244 FillOp<CPUDevice, int32, int32>);
245 #endif
246
247 #undef REGISTER_KERNEL
248
249 template <typename Device, typename T>
250 class ZerosLikeOp : public OpKernel {
251 public:
ZerosLikeOp(OpKernelConstruction * ctx)252 explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
253
Compute(OpKernelContext * ctx)254 void Compute(OpKernelContext* ctx) override {
255 const Tensor& input = ctx->input(0);
256 const Device& d = ctx->eigen_device<Device>();
257 if (std::is_same<T, Variant>::value) {
258 OP_REQUIRES(
259 ctx, input.dims() == 0,
260 errors::InvalidArgument("ZerosLike non-scalar Tensor with "
261 "dtype=DT_VARIANT is not supported."));
262 const Variant& v = input.scalar<Variant>()();
263 // DT_VARIANT tensors must be allocated on CPU since they wrap C++
264 // objects which can not be efficiently represented in GPU memory.
265 int numa_node = DeviceNumaNode(ctx->device());
266 Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
267 Variant* out_v = &(out.scalar<Variant>()());
268 OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
269 ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
270 ctx->set_output(0, out);
271 } else {
272 Tensor* out = nullptr;
273 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
274 {0}, 0, input.shape(), &out));
275 functor::SetZeroFunctor<Device, T> f;
276 f(d, out->flat<T>());
277 }
278 }
279 };
280
281 #define REGISTER_KERNEL(type, dev) \
282 REGISTER_KERNEL_BUILDER( \
283 Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
284 ZerosLikeOp<dev##Device, type>)
285
286 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
287 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
288 REGISTER_CPU(Variant);
289 #undef REGISTER_CPU
290
291 #ifdef TENSORFLOW_USE_SYCL
292 REGISTER_KERNEL(bool, SYCL);
293 REGISTER_KERNEL(float, SYCL);
294 REGISTER_KERNEL(double, SYCL);
295 REGISTER_KERNEL(int64, SYCL);
296 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
297 .Device(DEVICE_SYCL)
298 .TypeConstraint<int32>("T")
299 .HostMemory("y"),
300 ZerosLikeOp<CPUDevice, int32>);
301 #endif // TENSORFLOW_USE_SYCL
302
303 #if GOOGLE_CUDA
304 REGISTER_KERNEL(bool, GPU);
305 REGISTER_KERNEL(Eigen::half, GPU);
306 REGISTER_KERNEL(bfloat16, GPU);
307 REGISTER_KERNEL(float, GPU);
308 REGISTER_KERNEL(double, GPU);
309 REGISTER_KERNEL(complex64, GPU);
310 REGISTER_KERNEL(complex128, GPU);
311 REGISTER_KERNEL(int64, GPU);
312 REGISTER_KERNEL(Variant, GPU);
313 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
314 .Device(DEVICE_GPU)
315 .TypeConstraint<int32>("T")
316 .HostMemory("y"),
317 ZerosLikeOp<CPUDevice, int32>);
318 #endif // GOOGLE_CUDA
319
320 #undef REGISTER_KERNEL
321
322 template <typename Device, typename T>
323 class OnesLikeOp : public OpKernel {
324 public:
OnesLikeOp(OpKernelConstruction * ctx)325 explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
326
Compute(OpKernelContext * ctx)327 void Compute(OpKernelContext* ctx) override {
328 const Tensor& input = ctx->input(0);
329 Tensor* out = nullptr;
330 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
331 {0}, 0, input.shape(), &out));
332 functor::SetOneFunctor<Device, T> f;
333 f(ctx->eigen_device<Device>(), out->flat<T>());
334 }
335 };
336
337 #define REGISTER_KERNEL(type, dev) \
338 REGISTER_KERNEL_BUILDER( \
339 Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
340 OnesLikeOp<dev##Device, type>)
341
342 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
343 TF_CALL_POD_TYPES(REGISTER_CPU);
344 #undef REGISTER_CPU
345
346 #ifdef TENSORFLOW_USE_SYCL
347 REGISTER_KERNEL(float, SYCL);
348 REGISTER_KERNEL(bool, SYCL);
349 REGISTER_KERNEL_BUILDER(Name("OnesLike")
350 .Device(DEVICE_SYCL)
351 .TypeConstraint<int32>("T")
352 .HostMemory("y"),
353 OnesLikeOp<CPUDevice, int32>);
354 #endif // TENSORFLOW_USE_SYCL
355
356 #if GOOGLE_CUDA
357 REGISTER_KERNEL(bool, GPU);
358 REGISTER_KERNEL(Eigen::half, GPU);
359 REGISTER_KERNEL(bfloat16, GPU);
360 REGISTER_KERNEL(float, GPU);
361 REGISTER_KERNEL(double, GPU);
362 REGISTER_KERNEL(complex64, GPU);
363 REGISTER_KERNEL(complex128, GPU);
364 REGISTER_KERNEL(int64, GPU);
365 REGISTER_KERNEL_BUILDER(Name("OnesLike")
366 .Device(DEVICE_GPU)
367 .TypeConstraint<int32>("T")
368 .HostMemory("y"),
369 OnesLikeOp<CPUDevice, int32>);
370 #endif // GOOGLE_CUDA
371
372 #undef REGISTER_KERNEL
373
PlaceholderOp(OpKernelConstruction * ctx)374 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
375 OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
376 }
377
Compute(OpKernelContext * ctx)378 void PlaceholderOp::Compute(OpKernelContext* ctx) {
379 if (expected_shape_.dims() > 0) {
380 OP_REQUIRES(ctx, false,
381 errors::InvalidArgument(
382 "You must feed a value for placeholder tensor '", name(),
383 "' with dtype ", DataTypeString(output_type(0)),
384 " and shape ", expected_shape_.DebugString()));
385 } else {
386 OP_REQUIRES(ctx, false,
387 errors::InvalidArgument(
388 "You must feed a value for placeholder tensor '", name(),
389 "' with dtype ", DataTypeString(output_type(0))));
390 }
391 }
392
393 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
394 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
395 PlaceholderOp);
396 // The following GPU kernel registration is used to address the situation that
397 // a placeholder is added in a GPU device context and soft placement is false.
398 // Since a placeholder should never be executed, adding these GPU kernels has
399 // no effect on graph execution.
400 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_GPU), PlaceholderOp);
401 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_GPU),
402 PlaceholderOp);
403
404 #if TENSORFLOW_USE_SYCL
405 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_SYCL), PlaceholderOp);
406 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_SYCL),
407 PlaceholderOp);
408 #endif // TENSORFLOW_USE_SYCL
409 } // namespace tensorflow
410