1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #define EIGEN_USE_THREADS
17 
18 #include <stddef.h>
19 
20 #include <algorithm>
21 #include <string>
22 #include <vector>
23 
24 #include "tensorflow/core/framework/kernel_def_builder.h"
25 #include "tensorflow/core/framework/numeric_types.h"
26 #include "tensorflow/core/framework/op.h"
27 #include "tensorflow/core/framework/op_kernel.h"
28 #include "tensorflow/core/framework/register_types.h"
29 #include "tensorflow/core/framework/shape_inference.h"
30 #include "tensorflow/core/framework/tensor.h"
31 #include "tensorflow/core/framework/tensor_shape.h"
32 #include "tensorflow/core/framework/tensor_shape.pb.h"
33 #include "tensorflow/core/framework/tensor_types.h"
34 #include "tensorflow/core/framework/types.h"
35 #include "tensorflow/core/kernels/broadcast_to_op.h"
36 #include "tensorflow/core/kernels/list_kernels.h"
37 #include "tensorflow/core/lib/core/errors.h"
38 #include "tensorflow/core/lib/core/status.h"
39 #include "tensorflow/core/platform/bfloat16.h"
40 #include "tensorflow/core/platform/types.h"
41 #include "tensorflow/core/util/bcast.h"
42 #include "tensorflow/core/util/ragged_to_dense_util.h"
43 
44 namespace tensorflow {
45 
46 namespace {
47 typedef Eigen::ThreadPoolDevice CPUDevice;
48 using ::std::vector;
49 
50 const int kShapeInputIndex = 0;
51 const int kValueInputIndex = 1;
52 const int kDefaultValueInputIndex = 2;
53 const int kFirstPartitionInputIndex = 3;
54 
55 template <typename INDEX_TYPE>
56 class RaggedTensorToTensorBaseOp : public OpKernel {
57  public:
58   typedef
59       typename ::tensorflow::TTypes<const INDEX_TYPE>::Flat RowPartitionTensor;
60 
RaggedTensorToTensorBaseOp(OpKernelConstruction * context)61   explicit RaggedTensorToTensorBaseOp(OpKernelConstruction* context)
62       : OpKernel(context) {
63     OP_REQUIRES_OK(context, GetRowPartitionTypes<OpKernelConstruction>(
64                                 context, &row_partition_types_));
65     ragged_rank_ = GetRaggedRank(row_partition_types_);
66   }
67 
68   // Returns the relationship between dimension and dimension + 1.
GetRowPartitionTypeByDimension(int dimension)69   RowPartitionType GetRowPartitionTypeByDimension(int dimension) {
70     if (row_partition_types_[0] == RowPartitionType::FIRST_DIM_SIZE) {
71       return row_partition_types_[dimension + 1];
72     } else {
73       return row_partition_types_[dimension];
74     }
75   }
76 
77   // Returns the relationship between dimension and dimension + 1.
GetRowPartitionTensor(OpKernelContext * c,int dimension)78   RowPartitionTensor GetRowPartitionTensor(OpKernelContext* c, int dimension) {
79     if (row_partition_types_[0] == RowPartitionType::FIRST_DIM_SIZE) {
80       return c->input(dimension + 1 + kFirstPartitionInputIndex)
81           .flat<INDEX_TYPE>();
82     } else {
83       return c->input(dimension + kFirstPartitionInputIndex).flat<INDEX_TYPE>();
84     }
85   }
86 
GetMaxWidth(OpKernelContext * c,int dimension,INDEX_TYPE * result)87   Status GetMaxWidth(OpKernelContext* c, int dimension, INDEX_TYPE* result) {
88     const RowPartitionTensor row_partition_tensor =
89         GetRowPartitionTensor(c, dimension - 1);
90     switch (GetRowPartitionTypeByDimension(dimension - 1)) {
91       case RowPartitionType::VALUE_ROWIDS:
92         *result = GetMaxWidthValueRowID(row_partition_tensor);
93         return Status::OK();
94       case RowPartitionType::ROW_SPLITS:
95         *result = GetMaxWidthRowSplit(row_partition_tensor);
96         return Status::OK();
97       default:
98         return errors::InvalidArgument(
99             "Cannot handle partition type ",
100             RowPartitionTypeToString(
101                 GetRowPartitionTypeByDimension(dimension - 1)));
102     }
103   }
104 
GetMaxWidthRowSplit(const RowPartitionTensor & row_split)105   static INDEX_TYPE GetMaxWidthRowSplit(const RowPartitionTensor& row_split) {
106     const INDEX_TYPE tensor_length = row_split.size();
107     if (tensor_length == 0 || tensor_length == 1) {
108       return 0;
109     }
110     INDEX_TYPE max_width = 0;
111     for (INDEX_TYPE i = 0; i < tensor_length - 1; ++i) {
112       const INDEX_TYPE current_width = row_split(i + 1) - row_split(i);
113       if (current_width > max_width) {
114         max_width = current_width;
115       }
116     }
117     return max_width;
118   }
119 
GetMaxWidthValueRowID(const RowPartitionTensor & value_rowids)120   static INDEX_TYPE GetMaxWidthValueRowID(
121       const RowPartitionTensor& value_rowids) {
122     const INDEX_TYPE index_length = value_rowids.size();
123     if (index_length == 0) {
124       return 0;
125     }
126     INDEX_TYPE first_equal_index = 0;
127     INDEX_TYPE first_equal_index_value = value_rowids(0);
128     INDEX_TYPE max_width = 0;
129     for (INDEX_TYPE i = 1; i < index_length; ++i) {
130       const INDEX_TYPE value = value_rowids(i);
131       if (value != first_equal_index_value) {
132         first_equal_index_value = value;
133         max_width = std::max(i - first_equal_index, max_width);
134         first_equal_index = i;
135       }
136     }
137     return std::max(index_length - first_equal_index, max_width);
138   }
139 
CalculateOutputSize(INDEX_TYPE first_dim,OpKernelContext * c,vector<INDEX_TYPE> * result)140   Status CalculateOutputSize(INDEX_TYPE first_dim, OpKernelContext* c,
141                              vector<INDEX_TYPE>* result) {
142     TensorShapeProto value_shape_proto;
143     c->input(kValueInputIndex).shape().AsProto(&value_shape_proto);
144 
145     TensorShapeProto default_value_shape_proto;
146     c->input(kDefaultValueInputIndex)
147         .shape()
148         .AsProto(&default_value_shape_proto);
149 
150     TensorShapeProto output_shape_proto;
151     TF_RETURN_IF_ERROR(ValidateDefaultValueShape(default_value_shape_proto,
152                                                  value_shape_proto));
153 
154     TensorShapeProto shape_proto;
155     {
156       PartialTensorShape partial_tensor_shape;
157       TF_RETURN_IF_ERROR(TensorShapeFromTensor(c->input(kShapeInputIndex),
158                                                &partial_tensor_shape));
159       partial_tensor_shape.AsProto(&shape_proto);
160     }
161 
162     TF_RETURN_IF_ERROR(CombineRaggedTensorToTensorShapes(
163         ragged_rank_, shape_proto, value_shape_proto, &output_shape_proto));
164 
165     result->reserve(output_shape_proto.dim_size());
166     for (const TensorShapeProto::Dim& dim : output_shape_proto.dim()) {
167       // Note that this may be -1 (if dimension size is unknown).
168       result->push_back(dim.size());
169     }
170 
171     if ((*result)[0] < 0) {
172       (*result)[0] = first_dim;
173     }
174     for (int i = 1; i <= ragged_rank_; ++i) {
175       if ((*result)[i] < 0) {
176         TF_RETURN_IF_ERROR(GetMaxWidth(c, i, &(*result)[i]));
177       }
178     }
179     return Status::OK();
180   }
181 
182   /**
183    * The output_index represents the index in the output tensor
184    * where the first element of a particular dimension would be written.
185    * If it is -1, it indicates that the index is out of scope.
186    * Example, given first_dimension = 10, first_dimension_output = 6,
187    * and output_index_multiplier = 100:
188    * result = [0 100 200 300 400 500 -1 -1 -1 -1]
189    * If first_dimension_output = 11 instead, then:
190    * result = [0 100 200 300 400 500 600 700 800 900]
191    */
CalculateFirstParentOutputIndex(INDEX_TYPE first_dimension,INDEX_TYPE output_index_multiplier,INDEX_TYPE first_dimension_output,vector<INDEX_TYPE> * result)192   void CalculateFirstParentOutputIndex(INDEX_TYPE first_dimension,
193                                        INDEX_TYPE output_index_multiplier,
194                                        INDEX_TYPE first_dimension_output,
195                                        vector<INDEX_TYPE>* result) {
196     const INDEX_TYPE min_dimension =
197         std::min(first_dimension, first_dimension_output);
198     result->reserve(first_dimension);
199     int current_output_index = 0;
200     for (INDEX_TYPE i = 0; i < min_dimension;
201          ++i, current_output_index += output_index_multiplier) {
202       result->push_back(current_output_index);
203     }
204     for (INDEX_TYPE i = min_dimension; i < first_dimension; ++i) {
205       result->push_back(-1);
206     }
207     DCHECK_EQ(result->size(), first_dimension);
208   }
209 
CalculateOutputIndexRowSplit(const RowPartitionTensor & row_split,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)210   void CalculateOutputIndexRowSplit(
211       const RowPartitionTensor& row_split,
212       const vector<INDEX_TYPE>& parent_output_index,
213       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
214       vector<INDEX_TYPE>* result) {
215     INDEX_TYPE row_split_size = row_split.size();
216     if (row_split_size > 0) {
217       result->reserve(row_split(row_split_size - 1));
218     }
219     for (INDEX_TYPE i = 0; i < row_split_size - 1; ++i) {
220       INDEX_TYPE row_length = row_split(i + 1) - row_split(i);
221       INDEX_TYPE real_length = std::min(output_size, row_length);
222       INDEX_TYPE parent_output_index_current = parent_output_index[i];
223 
224       if (parent_output_index_current == -1) {
225         real_length = 0;
226       }
227       for (INDEX_TYPE j = 0; j < real_length; ++j) {
228         result->push_back(parent_output_index_current);
229         parent_output_index_current += output_index_multiplier;
230       }
231       for (INDEX_TYPE j = 0; j < row_length - real_length; ++j) {
232         result->push_back(-1);
233       }
234     }
235     if (row_split_size > 0) {
236       DCHECK_EQ(result->size(), row_split(row_split_size - 1));
237     }
238   }
239 
240   // Calculate the output index of the first element of a list.
241   // The parent_output_index is the same computation for the previous list.
242   // -1 indicates an element or list that is out of range.
243   // The output_index_multiplier is the number of output indices one moves
244   // forward for each column.
245   // E.g., given:
246   // value_rowids:[0 1 2 2 2 3 5 5 6]
247   // parent_output_index:[1000 1100 2000 2100 -1 3000 4000]
248   // output_index_multiplier: 10
249   // output_size: 2
250   // You get:
251   // result = [1000 1100 2000 2010 -1 2100 -1 -1 3000]
252   // result[0] = parent_output_index[value_rowids[0]]
253   // result[1] = parent_output_index[value_rowids[1]]
254   // result[2] = parent_output_index[value_rowids[2]]
255   // result[3] = parent_output_index[value_rowids[2] + 10]
256   // result[4] = -1 because it is the third element the size is 2.
257   // result[5] = parent_output_index[value_rowids[3]]
258   // result[6] = -1 because parent_output_index[value_rowids[6]] == -1
259   // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
260   // result[8] = parent_output_index[value_rowids[7]]
CalculateOutputIndexValueRowID(const RowPartitionTensor & value_rowids,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)261   void CalculateOutputIndexValueRowID(
262       const RowPartitionTensor& value_rowids,
263       const vector<INDEX_TYPE>& parent_output_index,
264       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
265       vector<INDEX_TYPE>* result) {
266     const INDEX_TYPE index_size = value_rowids.size();
267     result->reserve(index_size);
268     if (index_size == 0) {
269       return;
270     }
271 
272     INDEX_TYPE current_output_column = 0;
273     INDEX_TYPE current_value_rowid = value_rowids(0);
274     DCHECK_LT(current_value_rowid, parent_output_index.size());
275     INDEX_TYPE current_output_index = parent_output_index[current_value_rowid];
276     result->push_back(current_output_index);
277     for (INDEX_TYPE i = 1; i < index_size; ++i) {
278       INDEX_TYPE next_value_rowid = value_rowids(i);
279       if (next_value_rowid == current_value_rowid) {
280         if (current_output_index >= 0) {
281           ++current_output_column;
282           if (current_output_column < output_size) {
283             current_output_index += output_index_multiplier;
284           } else {
285             current_output_index = -1;
286           }
287         }
288       } else {
289         current_output_column = 0;
290         current_value_rowid = next_value_rowid;
291         DCHECK_LT(next_value_rowid, parent_output_index.size());
292         current_output_index = parent_output_index[next_value_rowid];
293       }
294       result->push_back(current_output_index);
295     }
296     DCHECK_EQ(result->size(), value_rowids.size());
297   }
298 
CalculateOutputIndex(OpKernelContext * context,int dimension,const vector<INDEX_TYPE> & parent_output_index,INDEX_TYPE output_index_multiplier,INDEX_TYPE output_size,vector<INDEX_TYPE> * result)299   Status CalculateOutputIndex(OpKernelContext* context, int dimension,
300                               const vector<INDEX_TYPE>& parent_output_index,
301                               INDEX_TYPE output_index_multiplier,
302                               INDEX_TYPE output_size,
303                               vector<INDEX_TYPE>* result) {
304     const RowPartitionTensor row_partition_tensor =
305         GetRowPartitionTensor(context, dimension);
306     auto partition_type = GetRowPartitionTypeByDimension(dimension);
307     switch (partition_type) {
308       case RowPartitionType::VALUE_ROWIDS:
309         CalculateOutputIndexValueRowID(
310             row_partition_tensor, parent_output_index, output_index_multiplier,
311             output_size, result);
312         return tensorflow::Status::OK();
313       case RowPartitionType::ROW_SPLITS:
314         CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index,
315                                      output_index_multiplier, output_size,
316                                      result);
317         return tensorflow::Status::OK();
318       default:
319         return errors::InvalidArgument(
320             "Unsupported partition type:",
321             RowPartitionTypeToString(partition_type));
322     }
323   }
324 
GetFirstDimensionSize(OpKernelContext * context,INDEX_TYPE * result)325   Status GetFirstDimensionSize(OpKernelContext* context, INDEX_TYPE* result) {
326     const Tensor first_partition_tensor =
327         context->input(kFirstPartitionInputIndex);
328     const RowPartitionType first_partition_type = row_partition_types_[0];
329     switch (first_partition_type) {
330       case RowPartitionType::FIRST_DIM_SIZE:
331         *result = first_partition_tensor.scalar<INDEX_TYPE>()();
332         return Status::OK();
333       case RowPartitionType::VALUE_ROWIDS:
334         return errors::InvalidArgument(
335             "Cannot handle VALUE_ROWIDS in first dimension.");
336       case RowPartitionType::ROW_SPLITS:
337         *result = first_partition_tensor.shape().dim_size(0) - 1;
338         return Status::OK();
339       default:
340         return errors::InvalidArgument(
341             "Cannot handle type ",
342             RowPartitionTypeToString(first_partition_type));
343     }
344   }
345 
Compute(OpKernelContext * context)346   void Compute(OpKernelContext* context) override {
347     INDEX_TYPE first_dimension;
348     OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension));
349     vector<INDEX_TYPE> output_size;
350     OP_REQUIRES_OK(context,
351                    CalculateOutputSize(first_dimension, context, &output_size));
352     vector<INDEX_TYPE> multiplier;
353     multiplier.resize(ragged_rank_ + 1);
354 
355     multiplier[multiplier.size() - 1] = 1;
356     for (int i = multiplier.size() - 2; i >= 0; --i) {
357       multiplier[i] = multiplier[i + 1] * output_size[i + 1];
358     }
359     // Full size of the tensor.
360     TensorShape output_shape;
361     OP_REQUIRES_OK(context,
362                    TensorShapeUtils::MakeShape(output_size, &output_shape));
363     Tensor* output_tensor = nullptr;
364 
365     OP_REQUIRES_OK(context,
366                    context->allocate_output(0, output_shape, &output_tensor));
367     const INDEX_TYPE full_size = multiplier[0] * output_size[0];
368     if (full_size > 0) {
369       vector<INDEX_TYPE> output_index, new_output_index;
370       int nvals = context->input(kValueInputIndex).shape().dim_size(0);
371       output_index.reserve(nvals);
372       new_output_index.reserve(nvals);
373 
374       CalculateFirstParentOutputIndex(first_dimension, multiplier[0],
375                                       output_size[0], &output_index);
376       for (int i = 1; i <= ragged_rank_; ++i) {
377         OP_REQUIRES_OK(context, CalculateOutputIndex(
378                                     context, i - 1, output_index, multiplier[i],
379                                     output_size[i], &new_output_index));
380         output_index.swap(new_output_index);
381         new_output_index.clear();
382       }
383 
384       SetOutput(context, ragged_rank_, output_index, output_tensor);
385     }
386   }
387   virtual void SetOutput(OpKernelContext* context, int ragged_rank,
388                          const vector<INDEX_TYPE>& output_index,
389                          Tensor* output_tensor) = 0;
390 
391  private:
392   vector<RowPartitionType> row_partition_types_;
393   int ragged_rank_;
394 };
395 
396 template <typename VALUE_TYPE, typename INDEX_TYPE>
slow_copy_array(VALUE_TYPE * dst,const VALUE_TYPE * src,INDEX_TYPE size)397 void slow_copy_array(VALUE_TYPE* dst, const VALUE_TYPE* src, INDEX_TYPE size) {
398   for (INDEX_TYPE index = 0; index < size; ++index) {
399     dst[index] = src[index];
400   }
401 }
402 
403 template <typename VALUE_TYPE, typename INDEX_TYPE>
copy_array(VALUE_TYPE * dst,const VALUE_TYPE * src,INDEX_TYPE size)404 void copy_array(VALUE_TYPE* dst, const VALUE_TYPE* src, INDEX_TYPE size) {
405   memcpy(dst, src, size * sizeof(VALUE_TYPE));
406 }
407 
408 template <>
copy_array(tstring * dst,const tstring * src,int64 size)409 void copy_array<tstring, int64>(tstring* dst, const tstring* src, int64 size) {
410   slow_copy_array(dst, src, size);
411 }
412 
413 template <>
copy_array(tstring * dst,const tstring * src,int32 size)414 void copy_array<tstring, int32>(tstring* dst, const tstring* src, int32 size) {
415   slow_copy_array(dst, src, size);
416 }
417 
418 // If we don't specialize for Eigen::half, we get:
419 // undefined behavior, destination object type 'Eigen::half'
420 // is not TriviallyCopyable
421 template <>
copy_array(Eigen::half * dst,const Eigen::half * src,int64 size)422 void copy_array<Eigen::half, int64>(Eigen::half* dst, const Eigen::half* src,
423                                     int64 size) {
424   slow_copy_array(dst, src, size);
425 }
426 
427 template <>
copy_array(Eigen::half * dst,const Eigen::half * src,int32 size)428 void copy_array<Eigen::half, int32>(Eigen::half* dst, const Eigen::half* src,
429                                     int32 size) {
430   slow_copy_array(dst, src, size);
431 }
432 
433 template <typename VALUE_TYPE, typename INDEX_TYPE>
434 class RaggedTensorToTensorOp : public RaggedTensorToTensorBaseOp<INDEX_TYPE> {
435  public:
RaggedTensorToTensorOp(OpKernelConstruction * context)436   explicit RaggedTensorToTensorOp(OpKernelConstruction* context)
437       : RaggedTensorToTensorBaseOp<INDEX_TYPE>(context) {}
438 
SetOutput(OpKernelContext * context,int ragged_rank,const vector<INDEX_TYPE> & output_index,Tensor * output_tensor)439   void SetOutput(OpKernelContext* context, int ragged_rank,
440                  const vector<INDEX_TYPE>& output_index,
441                  Tensor* output_tensor) override {
442     // Note: it's ok to use OP_REQUIRES_OK (rather than TF_RETURN_IF_ERROR)
443     // in this function, but only because it's the last thing we do before
444     // returning from Compute().
445 
446     if (output_tensor->NumElements() == 0) return;
447 
448     const auto& values_tensor = context->input(kValueInputIndex);
449     const VALUE_TYPE* values_base = values_tensor.flat<VALUE_TYPE>().data();
450     const auto& default_value_tensor = context->input(kDefaultValueInputIndex);
451     VALUE_TYPE* output_base = output_tensor->flat<VALUE_TYPE>().data();
452 
453     TensorShape element_shape = output_tensor->shape();
454     element_shape.RemoveDimRange(0, ragged_rank + 1);
455     int value_element_size = element_shape.num_elements();
456     size_t output_index_size = output_index.size();
457 
458     // Broadcast the default value to value_element_size.  (We can skip this
459     // if default_value_tensor.NumElements() == 1, since we use std::fill
460     // when that's true.)
461     const VALUE_TYPE* default_value =
462         default_value_tensor.flat<VALUE_TYPE>().data();
463     Tensor bcast_default;  // Temporary tensor for result of broadcast
464     if (default_value_tensor.NumElements() != value_element_size &&
465         default_value_tensor.NumElements() != 1) {
466       const auto& src_shape = default_value_tensor.shape();
467       BCast bcast(BCast::FromShape(src_shape), BCast::FromShape(element_shape),
468                   /*fewer_dims_optimization=*/true);
469       // Note: bcast should always be valid, since we rejected any incompatible
470       // shapes when we called ValidateDefaultValueShape().
471       OP_REQUIRES(context, bcast.IsValid(),
472                   errors::InvalidArgument("Error broadcasting default_value"));
473       OP_REQUIRES_OK(context,
474                      context->allocate_temp(default_value_tensor.dtype(),
475                                             element_shape, &bcast_default));
476       const CPUDevice& device = context->eigen_device<CPUDevice>();
477       functor::BroadcastTo<CPUDevice, VALUE_TYPE>()(
478           device, context, bcast_default, element_shape, default_value_tensor,
479           src_shape, bcast);
480       default_value = bcast_default.flat<VALUE_TYPE>().data();
481     }
482 
483     // Loop through the output_index vector, finding contiguous regions that
484     // should be copied.  Once we find the end of a contiguous region, copy it
485     // and add any necessary padding (with default_value).
486     INDEX_TYPE src_start = 0;  // Start of contiguous region (in values)
487     INDEX_TYPE dst_start = 0;  // Destination for contiguous region (in output)
488     INDEX_TYPE dst_end = 0;    // Destination for contiguous region (in output)
489     for (int src_i = 0; src_i <= output_index_size; ++src_i) {
490       // dst_i is the destination where the value at src_i should be copied.
491       INDEX_TYPE dst_i = src_i < output_index_size ? output_index[src_i] : -1;
492 
493       // If we're still in a contiguous region, then update dst_end go to the
494       // next src_i.
495       if (dst_i == dst_end) {
496         ++dst_end;
497         continue;
498       }
499 
500       // We found the end of contiguous region.  This can be because we found
501       // a gap (dst_i > dst_end), or a source value that shouldn't be copied
502       // because it's out-of-bounds (dst_i == -1), or the end of the tensor
503       // (dst_i = -1).
504       if (dst_start < dst_end) {
505         // Copy the contiguous region.
506         const VALUE_TYPE* src = values_base + src_start * value_element_size;
507         VALUE_TYPE* dst = output_base + dst_start * value_element_size;
508         INDEX_TYPE nvals = (dst_end - dst_start) * value_element_size;
509         copy_array<VALUE_TYPE, INDEX_TYPE>(dst, src, nvals);
510       }
511 
512       // Add any necessary padding (w/ default_value).
513       if (src_i >= output_index_size) {
514         // We reached the end of values: pad to the end of output.
515         size_t output_size = output_tensor->NumElements();
516         dst_i = output_size / value_element_size;
517       }
518       if (dst_i > dst_end) {
519         if (default_value_tensor.NumElements() == 1) {
520           std::fill(output_base + dst_end * value_element_size,
521                     output_base + dst_i * value_element_size, *default_value);
522           dst_end = dst_i;
523         } else {
524           while (dst_i > dst_end) {
525             VALUE_TYPE* dst = output_base + dst_end * value_element_size;
526             copy_array<VALUE_TYPE, INDEX_TYPE>(dst, default_value,
527                                                value_element_size);
528             ++dst_end;
529           }
530         }
531       }
532 
533       // Update indices.
534       if (dst_i < 0) {
535         // src_i should be skipped -- leave it out of the contiguous region.
536         src_start = src_i + 1;
537         dst_start = dst_end;
538       } else {
539         // src_i should be copied -- include it in the contiguous region.
540         src_start = src_i;
541         dst_start = dst_end;
542         dst_end = dst_start + 1;
543       }
544     }
545   }
546 };
547 
548 #define REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, index_type)       \
549   REGISTER_KERNEL_BUILDER(Name("RaggedTensorToTensor")               \
550                               .Device(DEVICE_CPU)                    \
551                               .TypeConstraint<value_type>("T")       \
552                               .TypeConstraint<index_type>("Tindex"), \
553                           RaggedTensorToTensorOp<value_type, index_type>);
554 
555 #define REGISTER_CPU_KERNEL(value_type)                          \
556   REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, tensorflow::int64); \
557   REGISTER_CPU_KERNEL_INDEX_TYPE(value_type, tensorflow::int32);
558 
559 TF_CALL_POD_TYPES(REGISTER_CPU_KERNEL);
560 TF_CALL_string(REGISTER_CPU_KERNEL);
561 TF_CALL_QUANTIZED_TYPES(REGISTER_CPU_KERNEL);
562 TF_CALL_quint16(REGISTER_CPU_KERNEL);
563 TF_CALL_qint16(REGISTER_CPU_KERNEL);
564 
565 #undef REGISTER_CPU_KERNEL
566 
567 }  // namespace
568 }  // namespace tensorflow
569