1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #define EIGEN_USE_THREADS
17
18 #include <stddef.h>
19
20 #include <algorithm>
21 #include <string>
22 #include <vector>
23
24 #include "tensorflow/core/framework/kernel_def_builder.h"
25 #include "tensorflow/core/framework/numeric_types.h"
26 #include "tensorflow/core/framework/op.h"
27 #include "tensorflow/core/framework/op_kernel.h"
28 #include "tensorflow/core/framework/register_types.h"
29 #include "tensorflow/core/framework/shape_inference.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_shape.pb.h"
33 #include "tensorflow/core/framework/tensor_types.h"
34 #include "tensorflow/core/framework/types.h"
35 #include "tensorflow/core/kernels/broadcast_to_op.h"
36 #include "tensorflow/core/kernels/list_kernels.h"
37 #include "tensorflow/core/lib/core/errors.h"
38 #include "tensorflow/core/lib/core/status.h"
39 #include "tensorflow/core/platform/bfloat16.h"
40 #include "tensorflow/core/platform/types.h"
41 #include "tensorflow/core/util/bcast.h"
42 #include "tensorflow/core/util/ragged_to_dense_util.h"
43
44 namespace tensorflow {
45
46 namespace {
47 typedef Eigen::ThreadPoolDevice CPUDevice;
48 using ::std::vector;
49
50 const int kShapeInputIndex = 0;
51 const int kValueInputIndex = 1;
52 const int kDefaultValueInputIndex = 2;
53 const int kFirstPartitionInputIndex = 3;
54
55 template <typename INDEX_TYPE>
56 class RaggedTensorToTensorBaseOp : public OpKernel {
57 public:
58 typedef
59 typename ::tensorflow::TTypes<const INDEX_TYPE>::Flat RowPartitionTensor;
60
RaggedTensorToTensorBaseOp(OpKernelConstruction * context)61 explicit RaggedTensorToTensorBaseOp(OpKernelConstruction* context)
62 : OpKernel(context) {
63 OP_REQUIRES_OK(context, GetRowPartitionTypes<OpKernelConstruction>(
64 context, &row_partition_types_));
65 ragged_rank_ = GetRaggedRank(row_partition_types_);
66 }
67
68 // Returns the relationship between dimension and dimension + 1.
GetRowPartitionTypeByDimension(int dimension)69 RowPartitionType GetRowPartitionTypeByDimension(int dimension) {
70 if (row_partition_types_[0] == RowPartitionType::FIRST_DIM_SIZE) {
71 return row_partition_types_[dimension + 1];
72 } else {
73 return row_partition_types_[dimension];
74 }
75 }
76
77 // Returns the relationship between dimension and dimension + 1.
GetRowPartitionTensor(OpKernelContext * c,int dimension)78 RowPartitionTensor GetRowPartitionTensor(OpKernelContext* c, int dimension) {
79 if (row_partition_types_[0] == RowPartitionType::FIRST_DIM_SIZE) {
80 return c->input(dimension + 1 + kFirstPartitionInputIndex)
81 .flat<INDEX_TYPE>();
82 } else {
83 return c->input(dimension + kFirstPartitionInputIndex).flat<INDEX_TYPE>();
84 }
85 }
86
GetMaxWidth(OpKernelContext * c,int dimension,INDEX_TYPE * result)87 Status GetMaxWidth(OpKernelContext* c, int dimension, INDEX_TYPE* result) {
88 const RowPartitionTensor row_partition_tensor =
89 GetRowPartitionTensor(c, dimension - 1);
90 switch (GetRowPartitionTypeByDimension(dimension - 1)) {
91 case RowPartitionType::VALUE_ROWIDS:
92 *result = GetMaxWidthValueRowID(row_partition_tensor);
93 return Status::OK();
94 case RowPartitionType::ROW_SPLITS:
95 *result = GetMaxWidthRowSplit(row_partition_tensor);
96 return Status::OK();
97 default:
98 return errors::InvalidArgument(
99 "Cannot handle partition type ",
100 RowPartitionTypeToString(
101 GetRowPartitionTypeByDimension(dimension - 1)));
102 }
103 }
104
GetMaxWidthRowSplit(const RowPartitionTensor & row_split)105 static INDEX_TYPE GetMaxWidthRowSplit(const RowPartitionTensor& row_split) {
106 const INDEX_TYPE tensor_length = row_split.size();
107 if (tensor_length == 0 || tensor_length == 1) {
108 return 0;
109 }
110 INDEX_TYPE max_width = 0;
111 for (INDEX_TYPE i = 0; i < tensor_length - 1; ++i) {
112 const INDEX_TYPE current_width = row_split(i + 1) - row_split(i);
113 if (current_width > max_width) {
114 max_width = current_width;
115 }
116 }
117 return max_width;
118 }
119
GetMaxWidthValueRowID(const RowPartitionTensor & value_rowids)120 static INDEX_TYPE GetMaxWidthValueRowID(
121 const RowPartitionTensor& value_rowids) {
122 const INDEX_TYPE index_length = value_rowids.size();
123 if (index_length == 0) {
124 return 0;
125 }
126 INDEX_TYPE first_equal_index = 0;
127 INDEX_TYPE first_equal_index_value = value_rowids(0);
128 INDEX_TYPE max_width = 0;
129 for (INDEX_TYPE i = 1; i < index_length; ++i) {
130 const INDEX_TYPE value = value_rowids(i);
131 if (value != first_equal_index_value) {
132 first_equal_index_value = value;
133 max_width = std::max(i - first_equal_index, max_width);
134 first_equal_index = i;
135 }
136 }
137 return std::max(index_length - first_equal_index, max_width);
138 }
139
CalculateOutputSize(INDEX_TYPE first_dim,OpKernelContext * c,vector<INDEX_TYPE> * result)140 Status CalculateOutputSize(INDEX_TYPE first_dim, OpKernelContext* c,
141 vector<INDEX_TYPE>* result) {
142 TensorShapeProto value_shape_proto;
143 c->input(kValueInputIndex).shape().AsProto(&value_shape_proto);
144
145 TensorShapeProto default_value_shape_proto;
146 c->input(kDefaultValueInputIndex)
147 .shape()
148 .AsProto(&default_value_shape_proto);
149
150 TensorShapeProto output_shape_proto;
151 TF_RETURN_IF_ERROR(ValidateDefaultValueShape(default_value_shape_proto,
152 value_shape_proto));
153
154 TensorShapeProto shape_proto;
155 {
156 PartialTensorShape partial_tensor_shape;
157 TF_RETURN_IF_ERROR(TensorShapeFromTensor(c->input(kShapeInputIndex),
158 &partial_tensor_shape));
159 partial_tensor_shape.AsProto(&shape_proto);
160 }
161
162 TF_RETURN_IF_ERROR(CombineRaggedTensorToTensorShapes(
163 ragged_rank_, shape_proto, value_shape_proto, &output_shape_proto));
164
165 result->reserve(output_shape_proto.dim_size());
166 for (const TensorShapeProto::Dim& dim : output_shape_proto.dim()) {
167 // Note that this may be -1 (if dimension size is unknown).
168 result->push_back(dim.size());
169 }
170
171 if ((*result)[0] < 0) {
172 (*result)[0] = first_dim;
173 }
174 for (int i = 1; i <= ragged_rank_; ++i) {
175 if ((*result)[i] < 0) {
176 TF_RETURN_IF_ERROR(GetMaxWidth(c, i, &(*result)[i]));
177 }
178 }
179 return Status::OK();
180 }
181
182 /**
183 * The output_index represents the index in the output tensor
184 * where the first element of a particular dimension would be written.
185 * If it is -1, it indicates that the index is out of scope.
186 * Example, given first_dimension = 10, first_dimension_output = 6,
187 * and output_index_multiplier = 100:
188 * result = [0 100 200 300 400 500 -1 -1 -1 -1]
189 * If first_dimension_output = 11 instead, then:
190 * result = [0 100 200 300 400 500 600 700 800 900]
191 */
CalculateFirstParentOutputIndex(INDEX_TYPE first_dimension,INDEX_TYPE output_index_multiplier,INDEX_TYPE first_dimension_output,vector<INDEX_TYPE> * result)192 void CalculateFirstParentOutputIndex(INDEX_TYPE first_dimension,
193 INDEX_TYPE output_index_multiplier,
194 INDEX_TYPE first_dimension_output,
195 vector<INDEX_TYPE>* result) {
196 const INDEX_TYPE min_dimension =
197 std::min(first_dimension, first_dimension_output);
198 result->reserve(first_dimension);
199 int current_output_index = 0;
200 for (INDEX_TYPE i = 0; i < min_dimension;
201 ++i, current_output_index += output_index_multiplier) {
202 result->push_back(current_output_index);
203 }
204 for (INDEX_TYPE i = min_dimension; i < first_dimension; ++i) {
205 result->push_back(-1);
206 }
207 DCHECK_EQ(result->size(), first_dimension);
208 }
209
CalculateOutputIndexRowSplit(const RowPartitionTensor & row_split,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)210 void CalculateOutputIndexRowSplit(
211 const RowPartitionTensor& row_split,
212 const vector<INDEX_TYPE>& parent_output_index,
213 INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
214 vector<INDEX_TYPE>* result) {
215 INDEX_TYPE row_split_size = row_split.size();
216 if (row_split_size > 0) {
217 result->reserve(row_split(row_split_size - 1));
218 }
219 for (INDEX_TYPE i = 0; i < row_split_size - 1; ++i) {
220 INDEX_TYPE row_length = row_split(i + 1) - row_split(i);
221 INDEX_TYPE real_length = std::min(output_size, row_length);
222 INDEX_TYPE parent_output_index_current = parent_output_index[i];
223
224 if (parent_output_index_current == -1) {
225 real_length = 0;
226 }
227 for (INDEX_TYPE j = 0; j < real_length; ++j) {
228 result->push_back(parent_output_index_current);
229 parent_output_index_current += output_index_multiplier;
230 }
231 for (INDEX_TYPE j = 0; j < row_length - real_length; ++j) {
232 result->push_back(-1);
233 }
234 }
235 if (row_split_size > 0) {
236 DCHECK_EQ(result->size(), row_split(row_split_size - 1));
237 }
238 }
239
240 // Calculate the output index of the first element of a list.
241 // The parent_output_index is the same computation for the previous list.
242 // -1 indicates an element or list that is out of range.
243 // The output_index_multiplier is the number of output indices one moves
244 // forward for each column.
245 // E.g., given:
246 // value_rowids:[0 1 2 2 2 3 5 5 6]
247 // parent_output_index:[1000 1100 2000 2100 -1 3000 4000]
248 // output_index_multiplier: 10
249 // output_size: 2
250 // You get:
251 // result = [1000 1100 2000 2010 -1 2100 -1 -1 3000]
252 // result[0] = parent_output_index[value_rowids[0]]
253 // result[1] = parent_output_index[value_rowids[1]]
254 // result[2] = parent_output_index[value_rowids[2]]
255 // result[3] = parent_output_index[value_rowids[2] + 10]
256 // result[4] = -1 because it is the third element the size is 2.
257 // result[5] = parent_output_index[value_rowids[3]]
258 // result[6] = -1 because parent_output_index[value_rowids[6]] == -1
259 // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
260 // result[8] = parent_output_index[value_rowids[7]]
CalculateOutputIndexValueRowID(const RowPartitionTensor & value_rowids,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)261 void CalculateOutputIndexValueRowID(
262 const RowPartitionTensor& value_rowids,
263 const vector<INDEX_TYPE>& parent_output_index,
264 INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
265 vector<INDEX_TYPE>* result) {
266 const INDEX_TYPE index_size = value_rowids.size();
267 result->reserve(index_size);
268 if (index_size == 0) {
269 return;
270 }
271
272 INDEX_TYPE current_output_column = 0;
273 INDEX_TYPE current_value_rowid = value_rowids(0);
274 DCHECK_LT(current_value_rowid, parent_output_index.size());
275 INDEX_TYPE current_output_index = parent_output_index[current_value_rowid];
276 result->push_back(current_output_index);
277 for (INDEX_TYPE i = 1; i < index_size; ++i) {
278 INDEX_TYPE next_value_rowid = value_rowids(i);
279 if (next_value_rowid == current_value_rowid) {
280 if (current_output_index >= 0) {
281 ++current_output_column;
282 if (current_output_column < output_size) {
283 current_output_index += output_index_multiplier;
284 } else {
285 current_output_index = -1;
286 }
287 }
288 } else {
289 current_output_column = 0;
290 current_value_rowid = next_value_rowid;
291 DCHECK_LT(next_value_rowid, parent_output_index.size());
292 current_output_index = parent_output_index[next_value_rowid];
293 }
294 result->push_back(current_output_index);
295 }
296 DCHECK_EQ(result->size(), value_rowids.size());
297 }
298
CalculateOutputIndex(OpKernelContext * context,int dimension,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)299 Status CalculateOutputIndex(OpKernelContext* context, int dimension,
300 const vector<INDEX_TYPE>& parent_output_index,
301 INDEX_TYPE output_index_multiplier,
302 INDEX_TYPE output_size,
303 vector<INDEX_TYPE>* result) {
304 const RowPartitionTensor row_partition_tensor =
305 GetRowPartitionTensor(context, dimension);
306 auto partition_type = GetRowPartitionTypeByDimension(dimension);
307 switch (partition_type) {
308 case RowPartitionType::VALUE_ROWIDS:
309 CalculateOutputIndexValueRowID(
310 row_partition_tensor, parent_output_index, output_index_multiplier,
311 output_size, result);
312 return tensorflow::Status::OK();
313 case RowPartitionType::ROW_SPLITS:
314 CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index,
315 output_index_multiplier, output_size,
316 result);
317 return tensorflow::Status::OK();
318 default:
319 return errors::InvalidArgument(
320 "Unsupported partition type:",
321 RowPartitionTypeToString(partition_type));
322 }
323 }
324
GetFirstDimensionSize(OpKernelContext * context,INDEX_TYPE * result)325 Status GetFirstDimensionSize(OpKernelContext* context, INDEX_TYPE* result) {
326 const Tensor first_partition_tensor =
327 context->input(kFirstPartitionInputIndex);
328 const RowPartitionType first_partition_type = row_partition_types_[0];
329 switch (first_partition_type) {
330 case RowPartitionType::FIRST_DIM_SIZE:
331 *result = first_partition_tensor.scalar<INDEX_TYPE>()();
332 return Status::OK();
333 case RowPartitionType::VALUE_ROWIDS:
334 return errors::InvalidArgument(
335 "Cannot handle VALUE_ROWIDS in first dimension.");
336 case RowPartitionType::ROW_SPLITS:
337 *result = first_partition_tensor.shape().dim_size(0) - 1;
338 return Status::OK();
339 default:
340 return errors::InvalidArgument(
341 "Cannot handle type ",
342 RowPartitionTypeToString(first_partition_type));
343 }
344 }
345
Compute(OpKernelContext * context)346 void Compute(OpKernelContext* context) override {
347 INDEX_TYPE first_dimension;
348 OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension));
349 vector<INDEX_TYPE> output_size;
350 OP_REQUIRES_OK(context,
351 CalculateOutputSize(first_dimension, context, &output_size));
352 vector<INDEX_TYPE> multiplier;
353 multiplier.resize(ragged_rank_ + 1);
354
355 multiplier[multiplier.size() - 1] = 1;
356 for (int i = multiplier.size() - 2; i >= 0; --i) {
357 multiplier[i] = multiplier[i + 1] * output_size[i + 1];
358 }
359 // Full size of the tensor.
360 TensorShape output_shape;
361 OP_REQUIRES_OK(context,
362 TensorShapeUtils::MakeShape(output_size, &output_shape));
363 Tensor* output_tensor = nullptr;
364
365 OP_REQUIRES_OK(context,
366 context->allocate_output(0, output_shape, &output_tensor));
367 const INDEX_TYPE full_size = multiplier[0] * output_size[0];
368 if (full_size > 0) {
369 vector<INDEX_TYPE> output_index, new_output_index;
370 int nvals = context->input(kValueInputIndex).shape().dim_size(0);
371 output_index.reserve(nvals);
372 new_output_index.reserve(nvals);
373
374 CalculateFirstParentOutputIndex(first_dimension, multiplier[0],
375 output_size[0], &output_index);
376 for (int i = 1; i <= ragged_rank_; ++i) {
377 OP_REQUIRES_OK(context, CalculateOutputIndex(
378 context, i - 1, output_index, multiplier[i],
379 output_size[i], &new_output_index));
380 output_index.swap(new_output_index);
381 new_output_index.clear();
382 }
383
384 SetOutput(context, ragged_rank_, output_index, output_tensor);
385 }
386 }
387 virtual void SetOutput(OpKernelContext* context, int ragged_rank,
388 const vector<INDEX_TYPE>& output_index,
389 Tensor* output_tensor) = 0;
390
391 private:
392 vector<RowPartitionType> row_partition_types_;
393 int ragged_rank_;
394 };
395
396 template <typename VALUE_TYPE, typename INDEX_TYPE>
slow_copy_array(VALUE_TYPE * dst,const VALUE_TYPE * src,INDEX_TYPE size)397 void slow_copy_array(VALUE_TYPE* dst, const VALUE_TYPE* src, INDEX_TYPE size) {
398 for (INDEX_TYPE index = 0; index < size; ++index) {
399 dst[index] = src[index];
400 }
401 }
402
403 template <typename VALUE_TYPE, typename INDEX_TYPE>
copy_array(VALUE_TYPE * dst,const VALUE_TYPE * src,INDEX_TYPE size)404 void copy_array(VALUE_TYPE* dst, const VALUE_TYPE* src, INDEX_TYPE size) {
405 memcpy(dst, src, size * sizeof(VALUE_TYPE));
406 }
407
408 template <>
copy_array(tstring * dst,const tstring * src,int64 size)409 void copy_array<tstring, int64>(tstring* dst, const tstring* src, int64 size) {
410 slow_copy_array(dst, src, size);
411 }
412
413 template <>
copy_array(tstring * dst,const tstring * src,int32 size)414 void copy_array<tstring, int32>(tstring* dst, const tstring* src, int32 size) {
415 slow_copy_array(dst, src, size);
416 }
417
418 // If we don't specialize for Eigen::half, we get:
419 // undefined behavior, destination object type 'Eigen::half'
420 // is not TriviallyCopyable
421 template <>
copy_array(Eigen::half * dst,const Eigen::half * src,int64 size)422 void copy_array<Eigen::half, int64>(Eigen::half* dst, const Eigen::half* src,
423 int64 size) {
424 slow_copy_array(dst, src, size);
425 }
426
427 template <>
copy_array(Eigen::half * dst,const Eigen::half * src,int32 size)428 void copy_array<Eigen::half, int32>(Eigen::half* dst, const Eigen::half* src,
429 int32 size) {
430 slow_copy_array(dst, src, size);
431 }
432
433 template <typename VALUE_TYPE, typename INDEX_TYPE>
434 class RaggedTensorToTensorOp : public RaggedTensorToTensorBaseOp<INDEX_TYPE> {
435 public:
RaggedTensorToTensorOp(OpKernelConstruction * context)436 explicit RaggedTensorToTensorOp(OpKernelConstruction* context)
437 : RaggedTensorToTensorBaseOp<INDEX_TYPE>(context) {}
438
SetOutput(OpKernelContext * context,int ragged_rank,const vector<INDEX_TYPE> & output_index,Tensor * output_tensor)439 void SetOutput(OpKernelContext* context, int ragged_rank,
440 const vector<INDEX_TYPE>& output_index,
441 Tensor* output_tensor) override {
442 // Note: it's ok to use OP_REQUIRES_OK (rather than TF_RETURN_IF_ERROR)
443 // in this function, but only because it's the last thing we do before
444 // returning from Compute().
445
446 if (output_tensor->NumElements() == 0) return;
447
448 const auto& values_tensor = context->input(kValueInputIndex);
449 const VALUE_TYPE* values_base = values_tensor.flat<VALUE_TYPE>().data();
450 const auto& default_value_tensor = context->input(kDefaultValueInputIndex);
451 VALUE_TYPE* output_base = output_tensor->flat<VALUE_TYPE>().data();
452
453 TensorShape element_shape = output_tensor->shape();
454 element_shape.RemoveDimRange(0, ragged_rank + 1);
455 int value_element_size = element_shape.num_elements();
456 size_t output_index_size = output_index.size();
457
458 // Broadcast the default value to value_element_size. (We can skip this
459 // if default_value_tensor.NumElements() == 1, since we use std::fill
460 // when that's true.)
461 const VALUE_TYPE* default_value =
462 default_value_tensor.flat<VALUE_TYPE>().data();
463 Tensor bcast_default; // Temporary tensor for result of broadcast
464 if (default_value_tensor.NumElements() != value_element_size &&
465 default_value_tensor.NumElements() != 1) {
466 const auto& src_shape = default_value_tensor.shape();
467 BCast bcast(BCast::FromShape(src_shape), BCast::FromShape(element_shape),
468 /*fewer_dims_optimization=*/true);
469 // Note: bcast should always be valid, since we rejected any incompatible
470 // shapes when we called ValidateDefaultValueShape().
471 OP_REQUIRES(context, bcast.IsValid(),
472 errors::InvalidArgument("Error broadcasting default_value"));
473 OP_REQUIRES_OK(context,
474 context->allocate_temp(default_value_tensor.dtype(),
475 element_shape, &bcast_default));
476 const CPUDevice& device = context->eigen_device<CPUDevice>();
477 functor::BroadcastTo<CPUDevice, VALUE_TYPE>()(
478 device, context, bcast_default, element_shape, default_value_tensor,
479 src_shape, bcast);
480 default_value = bcast_default.flat<VALUE_TYPE>().data();
481 }
482
483 // Loop through the output_index vector, finding contiguous regions that
484 // should be copied. Once we find the end of a contiguous region, copy it
485 // and add any necessary padding (with default_value).
486 INDEX_TYPE src_start = 0; // Start of contiguous region (in values)
487 INDEX_TYPE dst_start = 0; // Destination for contiguous region (in output)
488 INDEX_TYPE dst_end = 0; // Destination for contiguous region (in output)
489 for (int src_i = 0; src_i <= output_index_size; ++src_i) {
490 // dst_i is the destination where the value at src_i should be copied.
491 INDEX_TYPE dst_i = src_i < output_index_size ? output_index[src_i] : -1;
492
493 // If we're still in a contiguous region, then update dst_end go to the
494 // next src_i.
495 if (dst_i == dst_end) {
496 ++dst_end;
497 continue;
498 }
499
500 // We found the end of contiguous region. This can be because we found
501 // a gap (dst_i > dst_end), or a source value that shouldn't be copied
502 // because it's out-of-bounds (dst_i == -1), or the end of the tensor
503 // (dst_i = -1).
504 if (dst_start < dst_end) {
505 // Copy the contiguous region.
506 const VALUE_TYPE* src = values_base + src_start * value_element_size;
507 VALUE_TYPE* dst = output_base + dst_start * value_element_size;
508 INDEX_TYPE nvals = (dst_end - dst_start) * value_element_size;
509 copy_array<VALUE_TYPE, INDEX_TYPE>(dst, src, nvals);
510 }
511
512 // Add any necessary padding (w/ default_value).
513 if (src_i >= output_index_size) {
514 // We reached the end of values: pad to the end of output.
515 size_t output_size = output_tensor->NumElements();
516 dst_i = output_size / value_element_size;
517 }
518 if (dst_i > dst_end) {
519 if (default_value_tensor.NumElements() == 1) {
520 std::fill(output_base + dst_end * value_element_size,
521 output_base + dst_i * value_element_size, *default_value);
522 dst_end = dst_i;
523 } else {
524 while (dst_i > dst_end) {
525 VALUE_TYPE* dst = output_base + dst_end * value_element_size;
526 copy_array<VALUE_TYPE, INDEX_TYPE>(dst, default_value,
527 value_element_size);
528 ++dst_end;
529 }
530 }
531 }
532
533 // Update indices.
534 if (dst_i < 0) {
535 // src_i should be skipped -- leave it out of the contiguous region.
536 src_start = src_i + 1;
537 dst_start = dst_end;
538 } else {
539 // src_i should be copied -- include it in the contiguous region.
540 src_start = src_i;
541 dst_start = dst_end;
542 dst_end = dst_start + 1;
543 }
544 }
545 }
546 };
547
548 #define REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, index_type) \
549 REGISTER_KERNEL_BUILDER(Name("RaggedTensorToTensor") \
550 .Device(DEVICE_CPU) \
551 .TypeConstraint<value_type>("T") \
552 .TypeConstraint<index_type>("Tindex"), \
553 RaggedTensorToTensorOp<value_type, index_type>);
554
555 #define REGISTER_CPU_KERNEL(value_type) \
556 REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, tensorflow::int64); \
557 REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, tensorflow::int32);
558
559 TF_CALL_POD_TYPES(REGISTER_CPU_KERNEL);
560 TF_CALL_string(REGISTER_CPU_KERNEL);
561 TF_CALL_QUANTIZED_TYPES(REGISTER_CPU_KERNEL);
562 TF_CALL_quint16(REGISTER_CPU_KERNEL);
563 TF_CALL_qint16(REGISTER_CPU_KERNEL);
564
565 #undef REGISTER_CPU_KERNEL
566
567 } // namespace
568 } // namespace tensorflow
569