1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // See docs in ../ops/array_ops.cc.
17 
18 #include "tensorflow/core/kernels/shape_ops.h"
19 #include "tensorflow/core/framework/node_def.pb.h"
20 #include "tensorflow/core/framework/register_types.h"
21 
22 namespace tensorflow {
23 
24 // Shape ----------------------------------------
25 REGISTER_KERNEL_BUILDER(Name("Shape")
26                             .Device(DEVICE_CPU)
27                             .HostMemory("output")
28                             .TypeConstraint<int32>("out_type"),
29                         ShapeOp<int32>);
30 REGISTER_KERNEL_BUILDER(Name("Shape")
31                             .Device(DEVICE_CPU)
32                             .HostMemory("output")
33                             .TypeConstraint<int64>("out_type"),
34                         ShapeOp<int64>);
35 
36 #ifdef TENSORFLOW_USE_SYCL
37 #define REGISTER_SYCL_KERNEL(type)                               \
38   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
39                               .Device(DEVICE_SYCL)               \
40                               .HostMemory("output")              \
41                               .TypeConstraint<int32>("out_type") \
42                               .TypeConstraint<type>("T"),        \
43                           ShapeOp<int32>);                       \
44   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
45                               .Device(DEVICE_SYCL)               \
46                               .HostMemory("output")              \
47                               .TypeConstraint<int64>("out_type") \
48                               .TypeConstraint<type>("T"),        \
49                           ShapeOp<int64>);
50 
51 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
52 TF_CALL_bool(REGISTER_SYCL_KERNEL);
53 #undef REGISTER_SYCL_KERNEL
54 
55 REGISTER_KERNEL_BUILDER(Name("Shape")
56                             .Device(DEVICE_SYCL)
57                             .HostMemory("input")
58                             .HostMemory("output")
59                             .TypeConstraint<int32>("T")
60                             .TypeConstraint<int32>("out_type"),
61                         ShapeOp<int32>);
62 REGISTER_KERNEL_BUILDER(Name("Shape")
63                             .Device(DEVICE_SYCL)
64                             .HostMemory("input")
65                             .HostMemory("output")
66                             .TypeConstraint<int32>("T")
67                             .TypeConstraint<int64>("out_type"),
68                         ShapeOp<int64>);
69 #endif  // TENSORFLOW_USE_SYCL
70 
71 #if GOOGLE_CUDA
72 #define REGISTER_GPU_KERNEL(type)                                \
73   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
74                               .Device(DEVICE_GPU)                \
75                               .HostMemory("output")              \
76                               .TypeConstraint<int32>("out_type") \
77                               .TypeConstraint<type>("T"),        \
78                           ShapeOp<int32>);                       \
79   REGISTER_KERNEL_BUILDER(Name("Shape")                          \
80                               .Device(DEVICE_GPU)                \
81                               .HostMemory("output")              \
82                               .TypeConstraint<int64>("out_type") \
83                               .TypeConstraint<type>("T"),        \
84                           ShapeOp<int64>);
85 
86 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
87 TF_CALL_bool(REGISTER_GPU_KERNEL);
88 TF_CALL_variant(REGISTER_GPU_KERNEL);
89 #undef REGISTER_GPU_KERNEL
90 
91 // A special GPU kernel for int32.
92 // TODO(b/25387198): Also enable int32 in device memory. This kernel
93 // registration requires all int32 inputs and outputs to be in host memory.
94 REGISTER_KERNEL_BUILDER(Name("Shape")
95                             .Device(DEVICE_GPU)
96                             .HostMemory("input")
97                             .HostMemory("output")
98                             .TypeConstraint<int32>("T")
99                             .TypeConstraint<int32>("out_type"),
100                         ShapeOp<int32>);
101 REGISTER_KERNEL_BUILDER(Name("Shape")
102                             .Device(DEVICE_GPU)
103                             .HostMemory("input")
104                             .HostMemory("output")
105                             .TypeConstraint<int32>("T")
106                             .TypeConstraint<int64>("out_type"),
107                         ShapeOp<int64>);
108 
109 #endif  // GOOGLE_CUDA
110 
111 // ShapeN ---------------------------------------
112 REGISTER_KERNEL_BUILDER(Name("ShapeN")
113                             .Device(DEVICE_CPU)
114                             .HostMemory("output")
115                             .TypeConstraint<int32>("out_type"),
116                         ShapeNOp<int32>);
117 REGISTER_KERNEL_BUILDER(Name("ShapeN")
118                             .Device(DEVICE_CPU)
119                             .HostMemory("output")
120                             .TypeConstraint<int64>("out_type"),
121                         ShapeNOp<int64>);
122 
123 #if GOOGLE_CUDA
124 #define REGISTER_GPU_KERNEL(type)                                \
125   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
126                               .Device(DEVICE_GPU)                \
127                               .HostMemory("output")              \
128                               .TypeConstraint<int32>("out_type") \
129                               .TypeConstraint<type>("T"),        \
130                           ShapeNOp<int32>);                      \
131   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
132                               .Device(DEVICE_GPU)                \
133                               .HostMemory("output")              \
134                               .TypeConstraint<int64>("out_type") \
135                               .TypeConstraint<type>("T"),        \
136                           ShapeNOp<int64>)
137 
138 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
139 TF_CALL_bool(REGISTER_GPU_KERNEL);
140 #undef REGISTER_GPU_KERNEL
141 
142 // A special GPU kernel for int32.
143 // TODO(b/25387198): Also enable int32 in device memory. This kernel
144 // registration requires all int32 inputs and outputs to be in host memory.
145 REGISTER_KERNEL_BUILDER(Name("ShapeN")
146                             .Device(DEVICE_GPU)
147                             .HostMemory("input")
148                             .HostMemory("output")
149                             .TypeConstraint<int32>("T")
150                             .TypeConstraint<int32>("out_type"),
151                         ShapeNOp<int32>);
152 REGISTER_KERNEL_BUILDER(Name("ShapeN")
153                             .Device(DEVICE_GPU)
154                             .HostMemory("input")
155                             .HostMemory("output")
156                             .TypeConstraint<int32>("T")
157                             .TypeConstraint<int64>("out_type"),
158                         ShapeNOp<int64>);
159 #endif  // GOOGLE_CUDA
160 
161 #ifdef TENSORFLOW_USE_SYCL
162 #define REGISTER_SYCL_KERNEL(type)                               \
163   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
164                               .Device(DEVICE_SYCL)               \
165                               .HostMemory("output")              \
166                               .TypeConstraint<int32>("out_type") \
167                               .TypeConstraint<type>("T"),        \
168                           ShapeNOp<int32>);                      \
169   REGISTER_KERNEL_BUILDER(Name("ShapeN")                         \
170                               .Device(DEVICE_SYCL)               \
171                               .HostMemory("output")              \
172                               .TypeConstraint<int64>("out_type") \
173                               .TypeConstraint<type>("T"),        \
174                           ShapeNOp<int64>)
175 
176 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
177 TF_CALL_bool(REGISTER_SYCL_KERNEL);
178 #undef REGISTER_SYCL_KERNEL
179 
180 REGISTER_KERNEL_BUILDER(Name("ShapeN")
181                             .Device(DEVICE_SYCL)
182                             .HostMemory("input")
183                             .HostMemory("output")
184                             .TypeConstraint<int32>("T")
185                             .TypeConstraint<int32>("out_type"),
186                         ShapeNOp<int32>);
187 REGISTER_KERNEL_BUILDER(Name("ShapeN")
188                             .Device(DEVICE_SYCL)
189                             .HostMemory("input")
190                             .HostMemory("output")
191                             .TypeConstraint<int32>("T")
192                             .TypeConstraint<int64>("out_type"),
193                         ShapeNOp<int64>);
194 #endif  // TENSORFLOW_USE_SYCL
195 
196 // Rank ------------------------------------------
197 REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"),
198                         RankOp);
199 
200 #ifdef TENSORFLOW_USE_SYCL
201 #define REGISTER_SYCL_KERNEL(type)                       \
202   REGISTER_KERNEL_BUILDER(Name("Rank")                   \
203                               .Device(DEVICE_SYCL)       \
204                               .TypeConstraint<type>("T") \
205                               .HostMemory("output"),     \
206                           RankOp);
207 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
208 #undef REGISTER_SYCL_KERNEL
209 
210 REGISTER_KERNEL_BUILDER(Name("Rank")
211                             .Device(DEVICE_SYCL)
212                             .TypeConstraint<int32>("T")
213                             .HostMemory("input")
214                             .HostMemory("output"),
215                         RankOp);
216 
217 REGISTER_KERNEL_BUILDER(Name("Rank")
218                             .Device(DEVICE_SYCL)
219                             .TypeConstraint<bool>("T")
220                             .HostMemory("input")
221                             .HostMemory("output"),
222                         RankOp);
223 #endif  // TENSORFLOW_USE_SYCL
224 
225 #if GOOGLE_CUDA
226 #define REGISTER_GPU_KERNEL(type)                        \
227   REGISTER_KERNEL_BUILDER(Name("Rank")                   \
228                               .Device(DEVICE_GPU)        \
229                               .TypeConstraint<type>("T") \
230                               .HostMemory("output"),     \
231                           RankOp);
232 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
233 TF_CALL_variant(REGISTER_GPU_KERNEL);
234 #undef REGISTER_GPU_KERNEL
235 
236 // A special GPU kernel for int32 and bool.
237 // TODO(b/25387198): Also enable int32 in device memory. This kernel
238 // registration requires all int32 inputs and outputs to be in host memory.
239 REGISTER_KERNEL_BUILDER(Name("Rank")
240                             .Device(DEVICE_GPU)
241                             .TypeConstraint<int32>("T")
242                             .HostMemory("input")
243                             .HostMemory("output"),
244                         RankOp);
245 
246 REGISTER_KERNEL_BUILDER(Name("Rank")
247                             .Device(DEVICE_GPU)
248                             .TypeConstraint<bool>("T")
249                             .HostMemory("input")
250                             .HostMemory("output"),
251                         RankOp);
252 
253 #endif  // GOOGLE_CUDA
254 
255 // Size ------------------------------------------
256 REGISTER_KERNEL_BUILDER(Name("Size")
257                             .Device(DEVICE_CPU)
258                             .HostMemory("output")
259                             .TypeConstraint<int32>("out_type"),
260                         SizeOp<int32>);
261 REGISTER_KERNEL_BUILDER(Name("Size")
262                             .Device(DEVICE_CPU)
263                             .HostMemory("output")
264                             .TypeConstraint<int64>("out_type"),
265                         SizeOp<int64>);
266 
267 #if GOOGLE_CUDA
268 #define REGISTER_GPU_KERNEL(type)                                \
269   REGISTER_KERNEL_BUILDER(Name("Size")                           \
270                               .Device(DEVICE_GPU)                \
271                               .TypeConstraint<type>("T")         \
272                               .TypeConstraint<int32>("out_type") \
273                               .HostMemory("output"),             \
274                           SizeOp<int32>);                        \
275   REGISTER_KERNEL_BUILDER(Name("Size")                           \
276                               .Device(DEVICE_GPU)                \
277                               .TypeConstraint<type>("T")         \
278                               .TypeConstraint<int64>("out_type") \
279                               .HostMemory("output"),             \
280                           SizeOp<int64>);
281 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
282 TF_CALL_bool(REGISTER_GPU_KERNEL);
283 TF_CALL_variant(REGISTER_GPU_KERNEL);
284 #undef REGISTER_GPU_KERNEL
285 
286 // A special GPU kernel for int32.
287 // TODO(b/25387198): Also enable int32 in device memory. This kernel
288 // registration requires all int32 inputs and outputs to be in host memory.
289 REGISTER_KERNEL_BUILDER(Name("Size")
290                             .Device(DEVICE_GPU)
291                             .TypeConstraint<int32>("T")
292                             .TypeConstraint<int32>("out_type")
293                             .HostMemory("input")
294                             .HostMemory("output"),
295                         SizeOp<int32>);
296 REGISTER_KERNEL_BUILDER(Name("Size")
297                             .Device(DEVICE_GPU)
298                             .TypeConstraint<int32>("T")
299                             .TypeConstraint<int64>("out_type")
300                             .HostMemory("input")
301                             .HostMemory("output"),
302                         SizeOp<int64>);
303 
304 #endif  // GOOGLE_CUDA
305 
306 #ifdef TENSORFLOW_USE_SYCL
307 #define REGISTER_SYCL_KERNEL(type)                               \
308   REGISTER_KERNEL_BUILDER(Name("Size")                           \
309                               .Device(DEVICE_SYCL)               \
310                               .TypeConstraint<type>("T")         \
311                               .TypeConstraint<int32>("out_type") \
312                               .HostMemory("output"),             \
313                           SizeOp<int32>);                        \
314   REGISTER_KERNEL_BUILDER(Name("Size")                           \
315                               .Device(DEVICE_SYCL)               \
316                               .TypeConstraint<type>("T")         \
317                               .TypeConstraint<int64>("out_type") \
318                               .HostMemory("output"),             \
319                           SizeOp<int64>);
320 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
321 TF_CALL_bool(REGISTER_SYCL_KERNEL);
322 #undef REGISTER_SYCL_KERNEL
323 
324 REGISTER_KERNEL_BUILDER(Name("Size")
325                             .Device(DEVICE_SYCL)
326                             .TypeConstraint<int32>("T")
327                             .TypeConstraint<int32>("out_type")
328                             .HostMemory("input")
329                             .HostMemory("output"),
330                         SizeOp<int32>);
331 REGISTER_KERNEL_BUILDER(Name("Size")
332                             .Device(DEVICE_SYCL)
333                             .TypeConstraint<int32>("T")
334                             .TypeConstraint<int64>("out_type")
335                             .HostMemory("input")
336                             .HostMemory("output"),
337                         SizeOp<int64>);
338 #endif  // TENSORFLOW_USE_SYCL
339 
340 // ExpandDims ------------------------------------
341 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
342                             .Device(DEVICE_CPU)
343                             .HostMemory("dim")
344                             .TypeConstraint<int32>("Tdim"),
345                         ExpandDimsOp<int32>);
346 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
347                             .Device(DEVICE_CPU)
348                             .HostMemory("dim")
349                             .TypeConstraint<int64>("Tdim"),
350                         ExpandDimsOp<int64>);
351 
352 #if GOOGLE_CUDA
353 #define REGISTER_GPU_KERNEL(type)                            \
354   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
355                               .Device(DEVICE_GPU)            \
356                               .TypeConstraint<type>("T")     \
357                               .TypeConstraint<int32>("Tdim") \
358                               .HostMemory("dim"),            \
359                           ExpandDimsOp<int32>);              \
360   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
361                               .Device(DEVICE_GPU)            \
362                               .TypeConstraint<type>("T")     \
363                               .TypeConstraint<int64>("Tdim") \
364                               .HostMemory("dim"),            \
365                           ExpandDimsOp<int64>);
366 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
367 TF_CALL_bool(REGISTER_GPU_KERNEL);
368 #undef REGISTER_GPU_KERNEL
369 
370 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
371                             .Device(DEVICE_GPU)
372                             .TypeConstraint<int32>("T")
373                             .TypeConstraint<int32>("Tdim")
374                             .HostMemory("input")
375                             .HostMemory("dim")
376                             .HostMemory("output"),
377                         ExpandDimsOp<int32>);
378 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
379                             .Device(DEVICE_GPU)
380                             .TypeConstraint<int32>("T")
381                             .TypeConstraint<int64>("Tdim")
382                             .HostMemory("input")
383                             .HostMemory("dim")
384                             .HostMemory("output"),
385                         ExpandDimsOp<int64>);
386 #endif  // GOOGLE_CUDA
387 
388 #ifdef TENSORFLOW_USE_SYCL
389 #define REGISTER_SYCL_KERNEL(type)                           \
390   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
391                               .Device(DEVICE_SYCL)           \
392                               .TypeConstraint<type>("T")     \
393                               .TypeConstraint<int32>("Tdim") \
394                               .HostMemory("dim"),            \
395                           ExpandDimsOp<int32>);              \
396   REGISTER_KERNEL_BUILDER(Name("ExpandDims")                 \
397                               .Device(DEVICE_SYCL)           \
398                               .TypeConstraint<type>("T")     \
399                               .TypeConstraint<int64>("Tdim") \
400                               .HostMemory("dim"),            \
401                           ExpandDimsOp<int64>);
402 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
403 TF_CALL_bool(REGISTER_SYCL_KERNEL);
404 #undef REGISTER_SYCL_KERNEL
405 
406 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
407                             .Device(DEVICE_SYCL)
408                             .TypeConstraint<int32>("T")
409                             .TypeConstraint<int32>("Tdim")
410                             .HostMemory("input")
411                             .HostMemory("dim")
412                             .HostMemory("output"),
413                         ExpandDimsOp<int32>);
414 REGISTER_KERNEL_BUILDER(Name("ExpandDims")
415                             .Device(DEVICE_SYCL)
416                             .TypeConstraint<int32>("T")
417                             .TypeConstraint<int64>("Tdim")
418                             .HostMemory("input")
419                             .HostMemory("dim")
420                             .HostMemory("output"),
421                         ExpandDimsOp<int64>);
422 #endif  // TENSORFLOW_USE_SYCL
423 
424 // Squeeze ---------------------------------------
425 REGISTER_KERNEL_BUILDER(Name("Squeeze").Device(DEVICE_CPU), SqueezeOp);
426 
427 #if GOOGLE_CUDA
428 #define REGISTER_GPU_KERNEL(type)                                   \
429   REGISTER_KERNEL_BUILDER(                                          \
430       Name("Squeeze").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
431       SqueezeOp);
432 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
433 TF_CALL_bool(REGISTER_GPU_KERNEL);
434 #undef REGISTER_GPU_KERNEL
435 
436 // A special GPU kernel for int32.
437 // TODO(b/25387198): Also enable int32 in device memory. This kernel
438 // registration requires all int32 inputs and outputs to be in host memory.
439 REGISTER_KERNEL_BUILDER(Name("Squeeze")
440                             .Device(DEVICE_GPU)
441                             .TypeConstraint<int32>("T")
442                             .HostMemory("input")
443                             .HostMemory("output"),
444                         SqueezeOp);
445 #endif  // GOOGLE_CUDA
446 
447 #if TENSORFLOW_USE_SYCL
448 #define REGISTER_SYCL_KERNEL(type)                                   \
449   REGISTER_KERNEL_BUILDER(                                           \
450       Name("Squeeze").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
451       SqueezeOp);
452 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
453 TF_CALL_bool(REGISTER_SYCL_KERNEL);
454 #undef REGISTER_SYCL_KERNEL
455 
456 REGISTER_KERNEL_BUILDER(Name("Squeeze")
457                             .Device(DEVICE_SYCL)
458                             .TypeConstraint<int32>("T")
459                             .HostMemory("input")
460                             .HostMemory("output"),
461                         SqueezeOp);
462 #endif  // TENSORFLOW_USE_SYCL
463 
464 class EnsureShapeOp : public OpKernel {
465  public:
EnsureShapeOp(OpKernelConstruction * ctx)466   explicit EnsureShapeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
467     OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
468   }
469 
Compute(OpKernelContext * ctx)470   void Compute(OpKernelContext* ctx) override {
471     TensorShape shape;
472     OP_REQUIRES_OK(ctx, shape_op_helpers::GetShape(ctx, 0, &shape));
473 
474     if (!expected_shape_.IsCompatibleWith(shape)) {
475       ctx->SetStatus(errors::InvalidArgument(
476           "Shape of tensor ", this->def().input(0), " ", shape.DebugString(),
477           " is not compatible with expected shape ",
478           expected_shape_.DebugString(), "."));
479     }
480 
481     // If shape matches, outputs the tensor.
482     if (IsRefType(ctx->input_dtype(0))) {
483       ctx->forward_ref_input_to_ref_output(0, 0);
484     } else {
485       ctx->set_output(0, ctx->input(0));
486     }
487   }
488 
IsExpensive()489   bool IsExpensive() override { return false; }
490 
491  private:
492   PartialTensorShape expected_shape_;
493 };
494 
495 // NOTE(rachelim): The kernel registrations for EnsureShapeOp are identical to
496 // those of the identity op, since the ops have the same device type
497 // constraints.
498 REGISTER_KERNEL_BUILDER(Name("EnsureShape").Device(DEVICE_CPU), EnsureShapeOp);
499 
500 #if TENSORFLOW_USE_SYCL
501 #define REGISTER_SYCL_KERNEL(type)                                       \
502   REGISTER_KERNEL_BUILDER(                                               \
503       Name("EnsureShape").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
504       EnsureShapeOp)
505 
506 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL);
507 
508 #undef REGISTER_SYCL_KERNEL
509 
510 #define REGISTER_SYCL_HOST_KERNEL(type)                   \
511   REGISTER_KERNEL_BUILDER(Name("EnsureShape")             \
512                               .Device(DEVICE_SYCL)        \
513                               .HostMemory("input")        \
514                               .HostMemory("output")       \
515                               .TypeConstraint<type>("T"), \
516                           EnsureShapeOp)
517 
518 REGISTER_SYCL_HOST_KERNEL(int32);
519 REGISTER_SYCL_HOST_KERNEL(bool);
520 
521 #undef REGISTER_SYCL_HOST_KERNEL
522 
523 #endif  // TENSORFLOW_USE_SYCL
524 
525 #define REGISTER_GPU_KERNEL(type)                                       \
526   REGISTER_KERNEL_BUILDER(                                              \
527       Name("EnsureShape").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
528       EnsureShapeOp)
529 
530 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
531 REGISTER_GPU_KERNEL(Variant);
532 
533 #undef REGISTER_GPU_KERNEL
534 
535 #if GOOGLE_CUDA
536 // A special GPU kernel for int32 and bool.
537 // TODO(b/25387198): Also enable int32 in device memory. This kernel
538 // registration requires all int32 inputs and outputs to be in host memory.
539 #define REGISTER_GPU_HOST_KERNEL(type)                    \
540   REGISTER_KERNEL_BUILDER(Name("EnsureShape")             \
541                               .Device(DEVICE_GPU)         \
542                               .HostMemory("input")        \
543                               .HostMemory("output")       \
544                               .TypeConstraint<type>("T"), \
545                           EnsureShapeOp)
546 
547 REGISTER_GPU_HOST_KERNEL(int32);
548 REGISTER_GPU_HOST_KERNEL(bool);
549 REGISTER_GPU_HOST_KERNEL(string);
550 REGISTER_GPU_HOST_KERNEL(ResourceHandle);
551 
552 #undef REGISTER_GPU_HOST_KERNEL
553 
554 #endif
555 }  // namespace tensorflow
556