1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Parsing Ops."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.python.framework import ops
22from tensorflow.python.framework import sparse_tensor
23from tensorflow.python.ops import array_ops
24from tensorflow.python.ops import control_flow_ops
25from tensorflow.python.ops import gen_parsing_ops
26from tensorflow.python.ops import math_ops
27from tensorflow.python.ops import parsing_config
28# go/tf-wildcard-import
29# pylint: disable=wildcard-import,undefined-variable
30from tensorflow.python.ops.gen_parsing_ops import *
31# pylint: enable=wildcard-import,undefined-variable
32from tensorflow.python.util import deprecation
33from tensorflow.python.util import dispatch
34from tensorflow.python.util.tf_export import tf_export
35
36
37ops.NotDifferentiable("DecodeRaw")
38ops.NotDifferentiable("DecodePaddedRaw")
39ops.NotDifferentiable("ParseTensor")
40ops.NotDifferentiable("SerializeTensor")
41ops.NotDifferentiable("StringToNumber")
42
43
44VarLenFeature = parsing_config.VarLenFeature
45RaggedFeature = parsing_config.RaggedFeature
46SparseFeature = parsing_config.SparseFeature
47FixedLenFeature = parsing_config.FixedLenFeature
48FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature
49# pylint: disable=protected-access
50_ParseOpParams = parsing_config._ParseOpParams
51_construct_tensors_for_composite_features = (
52    parsing_config._construct_tensors_for_composite_features)
53# pylint: enable=protected-access
54
55
56# TODO(b/122887740) Switch files that use this private symbol to use new name.
57_construct_sparse_tensors_for_sparse_features = \
58    _construct_tensors_for_composite_features
59
60
61def _prepend_none_dimension(features):
62  """Returns a copy of features with adjusted FixedLenSequenceFeature shapes."""
63  if features:
64    modified_features = dict(features)  # Create a copy to modify
65    for key, feature in features.items():
66      if isinstance(feature, FixedLenSequenceFeature):
67        if not feature.allow_missing:
68          raise ValueError("Unsupported: FixedLenSequenceFeature requires "
69                           "allow_missing to be True.")
70        modified_features[key] = FixedLenSequenceFeature(
71            [None] + list(feature.shape),
72            feature.dtype,
73            feature.allow_missing,
74            feature.default_value)
75    return modified_features
76  else:
77    return features
78
79
80@tf_export("io.parse_example", v1=[])
81@dispatch.add_dispatch_support
82def parse_example_v2(serialized, features, example_names=None, name=None):
83  # pylint: disable=line-too-long
84  """Parses `Example` protos into a `dict` of tensors.
85
86  Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
87  protos given in `serialized`. We refer to `serialized` as a batch with
88  `batch_size` many entries of individual `Example` protos.
89
90  `example_names` may contain descriptive names for the corresponding serialized
91  protos. These may be useful for debugging purposes, but they have no effect on
92  the output. If not `None`, `example_names` must be the same length as
93  `serialized`.
94
95  This op parses serialized examples into a dictionary mapping keys to `Tensor`
96  `SparseTensor`, and `RaggedTensor` objects. `features` is a dict from keys to
97  `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature`
98  objects. Each `VarLenFeature` and `SparseFeature` is mapped to a
99  `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each
100  `RaggedFeature` is mapped to a `RaggedTensor`.
101
102  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
103  representing a ragged matrix. Its indices are `[batch, index]` where `batch`
104  identifies the example in `serialized`, and `index` is the value's index in
105  the list of values associated with that feature and example.
106
107  Each `SparseFeature` maps to a `SparseTensor` of the specified type
108  representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
109  Its `values` come from the feature in the examples with key `value_key`.
110  A `values[i]` comes from a position `k` in the feature of an example at batch
111  entry `batch`. This positional information is recorded in `indices[i]` as
112  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
113  the feature in the example at with key `SparseFeature.index_key[j]`.
114  In other words, we split the indices (except the first index indicating the
115  batch entry) of a `SparseTensor` by dimension into different features of the
116  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
117  `SparseFeature` whenever possible.
118
119  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
120  `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
121
122  `FixedLenFeature` entries with a `default_value` are optional. With no default
123  value, we will fail if that `Feature` is missing from any example in
124  `serialized`.
125
126  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
127  (or `tf.float32` if not specified) and shape
128  `(serialized.size(), None) + df.shape`.
129  All examples in `serialized` will be padded with `default_value` along the
130  second dimension.
131
132  Each `RaggedFeature` maps to a `RaggedTensor` of the specified type.  It
133  is formed by stacking the `RaggedTensor` for each example, where the
134  `RaggedTensor` for each individual example is constructed using the tensors
135  specified by `RaggedTensor.values_key` and `RaggedTensor.partition`.  See
136  the `tf.io.RaggedFeature` documentation for details and examples.
137
138  Examples:
139
140  For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
141  serialized `Example`s are provided:
142
143  ```
144  serialized = [
145    features
146      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
147    features
148      { feature []},
149    features
150      { feature { key: "ft" value { float_list { value: [3.0] } } }
151  ]
152  ```
153
154  then the output will look like:
155
156  ```python
157  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
158                      values=[1.0, 2.0, 3.0],
159                      dense_shape=(3, 2)) }
160  ```
161
162  If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
163  `shape=[]` is used then the output will look like:
164
165  ```python
166  {"ft": [[1.0, 2.0], [3.0, -1.0]]}
167  ```
168
169  Given two `Example` input protos in `serialized`:
170
171  ```
172  [
173    features {
174      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
175      feature { key: "gps" value { float_list { value: [] } } }
176    },
177    features {
178      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
179      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
180      feature { key: "gps" value { } }
181    }
182  ]
183  ```
184
185  And arguments
186
187  ```
188  example_names: ["input0", "input1"],
189  features: {
190      "kw": VarLenFeature(tf.string),
191      "dank": VarLenFeature(tf.int64),
192      "gps": VarLenFeature(tf.float32),
193  }
194  ```
195
196  Then the output is a dictionary:
197
198  ```python
199  {
200    "kw": SparseTensor(
201        indices=[[0, 0], [0, 1], [1, 0]],
202        values=["knit", "big", "emmy"]
203        dense_shape=[2, 2]),
204    "dank": SparseTensor(
205        indices=[[1, 0]],
206        values=[42],
207        dense_shape=[2, 1]),
208    "gps": SparseTensor(
209        indices=[],
210        values=[],
211        dense_shape=[2, 0]),
212  }
213  ```
214
215  For dense results in two serialized `Example`s:
216
217  ```
218  [
219    features {
220      feature { key: "age" value { int64_list { value: [ 0 ] } } }
221      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
222     },
223     features {
224      feature { key: "age" value { int64_list { value: [] } } }
225      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
226    }
227  ]
228  ```
229
230  We can use arguments:
231
232  ```
233  example_names: ["input0", "input1"],
234  features: {
235      "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
236      "gender": FixedLenFeature([], dtype=tf.string),
237  }
238  ```
239
240  And the expected output is:
241
242  ```python
243  {
244    "age": [[0], [-1]],
245    "gender": [["f"], ["f"]],
246  }
247  ```
248
249  An alternative to `VarLenFeature` to obtain a `SparseTensor` is
250  `SparseFeature`. For example, given two `Example` input protos in
251  `serialized`:
252
253  ```
254  [
255    features {
256      feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
257      feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
258    },
259    features {
260      feature { key: "val" value { float_list { value: [ 0.0 ] } } }
261      feature { key: "ix" value { int64_list { value: [ 42 ] } } }
262    }
263  ]
264  ```
265
266  And arguments
267
268  ```
269  example_names: ["input0", "input1"],
270  features: {
271      "sparse": SparseFeature(
272          index_key="ix", value_key="val", dtype=tf.float32, size=100),
273  }
274  ```
275
276  Then the output is a dictionary:
277
278  ```python
279  {
280    "sparse": SparseTensor(
281        indices=[[0, 3], [0, 20], [1, 42]],
282        values=[0.5, -1.0, 0.0]
283        dense_shape=[2, 100]),
284  }
285  ```
286
287  See the `tf.io.RaggedFeature` documentation for examples showing how
288  `RaggedFeature` can be used to obtain `RaggedTensor`s.
289
290  Args:
291    serialized: A vector (1-D Tensor) of strings, a batch of binary
292      serialized `Example` protos.
293    features: A `dict` mapping feature keys to `FixedLenFeature`,
294      `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values.
295    example_names: A vector (1-D Tensor) of strings (optional), the names of
296      the serialized protos in the batch.
297    name: A name for this operation (optional).
298
299  Returns:
300    A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and
301    `RaggedTensor` values.
302
303  Raises:
304    ValueError: if any feature is invalid.
305  """
306  if not features:
307    raise ValueError("Missing: features was %s." % features)
308  features = _prepend_none_dimension(features)
309  params = _ParseOpParams.from_features(features, [
310      VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature,
311      RaggedFeature
312  ])
313
314  outputs = _parse_example_raw(serialized, example_names, params, name=name)
315  return _construct_tensors_for_composite_features(features, outputs)
316
317
318@tf_export(v1=["io.parse_example", "parse_example"])
319@dispatch.add_dispatch_support
320def parse_example(serialized, features, name=None, example_names=None):
321  return parse_example_v2(serialized, features, example_names, name)
322
323
324parse_example.__doc__ = parse_example_v2.__doc__
325
326
327def _parse_example_raw(serialized, names, params, name):
328  """Parses `Example` protos.
329
330  Args:
331    serialized: A vector (1-D Tensor) of strings, a batch of binary
332      serialized `Example` protos.
333    names: A vector (1-D Tensor) of strings (optional), the names of
334      the serialized protos.
335    params: A `ParseOpParams` containing the parameters for the parse op.
336    name: A name for this operation (optional).
337
338  Returns:
339    A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s.
340
341  """
342  if params.num_features == 0:
343    raise ValueError("Must provide at least one feature key")
344  with ops.name_scope(name, "ParseExample", [serialized, names]):
345    names = [] if names is None else names
346    serialized = ops.convert_to_tensor(serialized, name="serialized")
347    if params.ragged_keys and serialized.shape.ndims is None:
348      raise ValueError("serialized must have statically-known rank to "
349                       "parse ragged features.")
350    outputs = gen_parsing_ops.parse_example_v2(
351        serialized=serialized,
352        names=names,
353        sparse_keys=params.sparse_keys,
354        dense_keys=params.dense_keys,
355        ragged_keys=params.ragged_keys,
356        dense_defaults=params.dense_defaults_vec,
357        num_sparse=len(params.sparse_keys),
358        sparse_types=params.sparse_types,
359        ragged_value_types=params.ragged_value_types,
360        ragged_split_types=params.ragged_split_types,
361        dense_shapes=params.dense_shapes_as_proto,
362        name=name)
363    (sparse_indices, sparse_values, sparse_shapes, dense_values,
364     ragged_values, ragged_row_splits) = outputs
365    # pylint: disable=protected-access
366    ragged_tensors = parsing_config._build_ragged_tensors(
367        serialized.shape, ragged_values, ragged_row_splits)
368
369    sparse_tensors = [
370        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
371        in zip(sparse_indices, sparse_values, sparse_shapes)]
372
373    return dict(
374        zip(params.sparse_keys + params.dense_keys + params.ragged_keys,
375            sparse_tensors + dense_values + ragged_tensors))
376
377
378@tf_export(v1=["io.parse_single_example", "parse_single_example"])
379@dispatch.add_dispatch_support
380def parse_single_example(serialized, features, name=None, example_names=None):
381  """Parses a single `Example` proto.
382
383  Similar to `parse_example`, except:
384
385  For dense tensors, the returned `Tensor` is identical to the output of
386  `parse_example`, except there is no batch dimension, the output shape is the
387  same as the shape given in `dense_shape`.
388
389  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
390  (the indices matrix is a column vector), the values vector is unchanged, and
391  the first (`batch_size`) entry of the shape vector is removed (it is now a
392  single element vector).
393
394  One might see performance advantages by batching `Example` protos with
395  `parse_example` instead of using this function directly.
396
397  Args:
398    serialized: A scalar string Tensor, a single serialized Example.
399    features: A `dict` mapping feature keys to `FixedLenFeature` or
400      `VarLenFeature` values.
401    name: A name for this operation (optional).
402    example_names: (Optional) A scalar string Tensor, the associated name.
403
404  Returns:
405    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
406
407  Raises:
408    ValueError: if any feature is invalid.
409  """
410  return parse_single_example_v2(serialized, features, example_names, name)
411
412
413@tf_export("io.parse_single_example", v1=[])
414@dispatch.add_dispatch_support
415def parse_single_example_v2(
416    serialized, features, example_names=None, name=None
417    ):
418  """Parses a single `Example` proto.
419
420  Similar to `parse_example`, except:
421
422  For dense tensors, the returned `Tensor` is identical to the output of
423  `parse_example`, except there is no batch dimension, the output shape is the
424  same as the shape given in `dense_shape`.
425
426  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
427  (the indices matrix is a column vector), the values vector is unchanged, and
428  the first (`batch_size`) entry of the shape vector is removed (it is now a
429  single element vector).
430
431  One might see performance advantages by batching `Example` protos with
432  `parse_example` instead of using this function directly.
433
434  Args:
435    serialized: A scalar string Tensor, a single serialized Example.
436    features: A `dict` mapping feature keys to `FixedLenFeature` or
437      `VarLenFeature` values.
438    example_names: (Optional) A scalar string Tensor, the associated name.
439    name: A name for this operation (optional).
440
441  Returns:
442    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
443
444  Raises:
445    ValueError: if any feature is invalid.
446  """
447  if not features:
448    raise ValueError("Missing features.")
449  with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]):
450    serialized = ops.convert_to_tensor(serialized, name="serialized")
451    serialized = _assert_scalar(serialized, "serialized")
452    return parse_example_v2(serialized, features, example_names, name)
453
454
455@tf_export("io.parse_sequence_example")
456@dispatch.add_dispatch_support
457def parse_sequence_example(serialized,
458                           context_features=None,
459                           sequence_features=None,
460                           example_names=None,
461                           name=None):
462  # pylint: disable=line-too-long
463  """Parses a batch of `SequenceExample` protos.
464
465  Parses a vector of serialized
466  [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
467  protos given in `serialized`.
468
469  This op parses serialized sequence examples into a tuple of dictionaries,
470  each mapping keys to `Tensor` and `SparseTensor` objects.
471  The first dictionary contains mappings for keys appearing in
472  `context_features`, and the second dictionary contains mappings for keys
473  appearing in `sequence_features`.
474
475  At least one of `context_features` and `sequence_features` must be provided
476  and non-empty.
477
478  The `context_features` keys are associated with a `SequenceExample` as a
479  whole, independent of time / frame.  In contrast, the `sequence_features` keys
480  provide a way to access variable-length data within the `FeatureList` section
481  of the `SequenceExample` proto.  While the shapes of `context_features` values
482  are fixed with respect to frame, the frame dimension (the first dimension)
483  of `sequence_features` values may vary between `SequenceExample` protos,
484  and even between `feature_list` keys within the same `SequenceExample`.
485
486  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
487  `FixedLenFeature`  objects. Each `VarLenFeature` is mapped to a
488  `SparseTensor`; each `RaggedFeature` is  mapped to a `RaggedTensor`; and each
489  `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and
490  default value.
491
492  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
493  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
494  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor; and
495  each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified
496  type. The shape will be `(B,T,) + df.dense_shape` for
497  `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the
498  length of the associated `FeatureList` in the `SequenceExample`. For instance,
499  `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape
500  `[None, None]` and dynamic shape `[B, T]`, while
501  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor`
502  of static shape `[None, None, k]` and dynamic shape `[B, T, k]`.
503
504  Like the input, the resulting output tensors have a batch dimension. This
505  means that the original per-example shapes of `VarLenFeature`s and
506  `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also
507  provides dicts of shape tensors as part of the output. There is one dict for
508  the context features, and one for the feature_list features. Context features
509  of type `FixedLenFeature`s will not be present, since their shapes are already
510  known by the caller. In situations where the input 'FixedLenFeature`s are of
511  different lengths across examples, the shorter examples will be padded with
512  default datatype values: 0 for numeric types, and the empty string for string
513  types.
514
515  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
516  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
517  entry and `index` is the value's index in the list of values associated with
518  that time.
519
520  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
521  entries with `allow_missing=True` are optional; otherwise, we will fail if
522  that `Feature` or `FeatureList` is missing from any example in `serialized`.
523
524  `example_name` may contain a descriptive name for the corresponding serialized
525  proto. This may be useful for debugging purposes, but it has no effect on the
526  output. If not `None`, `example_name` must be a scalar.
527
528  Args:
529    serialized: A vector (1-D Tensor) of type string containing binary
530      serialized `SequenceExample` protos.
531    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
532      `VarLenFeature` or `RaggedFeature` values. These features are associated
533      with a `SequenceExample` as a whole.
534    sequence_features: A `dict` mapping feature keys to
535      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
536      These features are associated with data within the `FeatureList` section
537      of the `SequenceExample` proto.
538    example_names: A vector (1-D Tensor) of strings (optional), the name of the
539      serialized protos.
540    name: A name for this operation (optional).
541
542  Returns:
543    A tuple of three `dict`s, each mapping keys to `Tensor`s,
544    `SparseTensor`s, and `RaggedTensor`. The first dict contains the context
545    key/values, the second dict contains the feature_list key/values, and the
546    final dict contains the lengths of any dense feature_list features.
547
548  Raises:
549    ValueError: if any feature is invalid.
550  """
551  if not (context_features or sequence_features):
552    raise ValueError("Missing features.")
553  context_params = _ParseOpParams.from_features(
554      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
555  feature_list_params = _ParseOpParams.from_features(
556      sequence_features,
557      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
558
559  with ops.name_scope(name, "ParseSequenceExample",
560                      [serialized, example_names]):
561    outputs = _parse_sequence_example_raw(serialized, example_names,
562                                          context_params, feature_list_params,
563                                          name)
564    context_output, feature_list_output, feature_list_lengths = outputs
565
566    if context_params.ragged_keys:
567      context_output = _construct_tensors_for_composite_features(
568          context_features, context_output)
569    if feature_list_params.ragged_keys:
570      feature_list_output = _construct_tensors_for_composite_features(
571          sequence_features, feature_list_output)
572
573    return context_output, feature_list_output, feature_list_lengths
574
575
576def _parse_sequence_example_raw(serialized,
577                                debug_name,
578                                context,
579                                feature_list,
580                                name=None):
581  """Parses a vector of `SequenceExample` protos.
582
583  Args:
584    serialized: A vector (1-D Tensor) of type string, containing binary
585      serialized `SequenceExample` protos.
586    debug_name: A vector (1-D Tensor) of strings (optional), the names of the
587      serialized protos.
588    context: A `ParseOpParams` containing the parameters for the parse
589      op for the context features.
590    feature_list: A `ParseOpParams` containing the parameters for the
591      parse op for the feature_list features.
592    name: A name for this operation (optional).
593
594  Returns:
595    A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s,
596    and `RaggedTensor`s. The first dict contains the context key/values, the
597    second dict contains the feature_list key/values, and the final dict
598    contains the lengths of any dense feature_list features.
599
600  Raises:
601    TypeError: if feature_list.dense_defaults is not either None or a dict.
602  """
603  if context.num_features + feature_list.num_features == 0:
604    raise ValueError("Must provide at least one feature key")
605  with ops.name_scope(name, "ParseSequenceExample", [serialized]):
606    debug_name = [] if debug_name is None else debug_name
607
608    # Internal
609    feature_list_dense_missing_assumed_empty = []
610    for k, v in feature_list.dense_defaults.items():
611      if v is not None:
612        raise ValueError("Value feature_list.dense_defaults[%s] must be None" %
613                         k)
614      feature_list_dense_missing_assumed_empty.append(k)
615
616    has_ragged = context.ragged_keys or feature_list.ragged_keys
617    serialized = ops.convert_to_tensor(serialized, name="serialized")
618    if has_ragged and serialized.shape.ndims is None:
619      raise ValueError("serialized must have statically-known rank to "
620                       "parse ragged features.")
621    feature_list_dense_missing_assumed_empty_vector = [
622        key in feature_list_dense_missing_assumed_empty
623        for key in feature_list.dense_keys
624    ]
625    outputs = gen_parsing_ops.parse_sequence_example_v2(
626        # Inputs
627        serialized=serialized,
628        debug_name=debug_name,
629        context_sparse_keys=context.sparse_keys,
630        context_dense_keys=context.dense_keys,
631        context_ragged_keys=context.ragged_keys,
632        feature_list_sparse_keys=feature_list.sparse_keys,
633        feature_list_dense_keys=feature_list.dense_keys,
634        feature_list_ragged_keys=feature_list.ragged_keys,
635        feature_list_dense_missing_assumed_empty=(
636            feature_list_dense_missing_assumed_empty_vector),
637        context_dense_defaults=context.dense_defaults_vec,
638        # Attrs
639        Ncontext_sparse=len(context.sparse_keys),
640        Nfeature_list_sparse=len(feature_list.sparse_keys),
641        Nfeature_list_dense=len(feature_list.dense_keys),
642        context_sparse_types=context.sparse_types,
643        context_ragged_value_types=context.ragged_value_types,
644        context_ragged_split_types=context.ragged_split_types,
645        feature_list_dense_types=feature_list.dense_types,
646        feature_list_sparse_types=feature_list.sparse_types,
647        feature_list_ragged_value_types=feature_list.ragged_value_types,
648        feature_list_ragged_split_types=feature_list.ragged_split_types,
649        context_dense_shapes=context.dense_shapes_as_proto,
650        feature_list_dense_shapes=feature_list.dense_shapes,
651        name=name)
652    (context_sparse_indices, context_sparse_values, context_sparse_shapes,
653     context_dense_values, context_ragged_values, context_ragged_row_splits,
654     feature_list_sparse_indices, feature_list_sparse_values,
655     feature_list_sparse_shapes, feature_list_dense_values,
656     feature_list_dense_lengths, feature_list_ragged_values,
657     feature_list_ragged_outer_splits,
658     feature_list_ragged_inner_splits) = outputs
659    # pylint: disable=protected-access
660    context_ragged_tensors = parsing_config._build_ragged_tensors(
661        serialized.shape, context_ragged_values, context_ragged_row_splits)
662    feature_list_ragged_tensors = parsing_config._build_ragged_tensors(
663        serialized.shape, feature_list_ragged_values,
664        feature_list_ragged_outer_splits, feature_list_ragged_inner_splits)
665
666    # pylint: disable=g-complex-comprehension
667    context_sparse_tensors = [
668        sparse_tensor.SparseTensor(ix, val, shape)
669        for (ix, val,
670             shape) in zip(context_sparse_indices, context_sparse_values,
671                           context_sparse_shapes)
672    ]
673
674    feature_list_sparse_tensors = [
675        sparse_tensor.SparseTensor(ix, val, shape)
676        for (ix, val, shape
677            ) in zip(feature_list_sparse_indices, feature_list_sparse_values,
678                     feature_list_sparse_shapes)
679    ]
680    # pylint: enable=g-complex-comprehension
681
682    context_output = dict(
683        zip(
684            context.sparse_keys + context.dense_keys + context.ragged_keys,
685            context_sparse_tensors + context_dense_values +
686            context_ragged_tensors))
687    feature_list_output = dict(
688        zip(
689            feature_list.sparse_keys + feature_list.dense_keys +
690            feature_list.ragged_keys, feature_list_sparse_tensors +
691            feature_list_dense_values + feature_list_ragged_tensors))
692    feature_list_lengths = dict(
693        zip(feature_list.dense_keys, feature_list_dense_lengths))
694
695    return (context_output, feature_list_output, feature_list_lengths)
696
697
698@tf_export("io.parse_single_sequence_example",
699           v1=["io.parse_single_sequence_example",
700               "parse_single_sequence_example"])
701@dispatch.add_dispatch_support
702def parse_single_sequence_example(
703    serialized, context_features=None, sequence_features=None,
704    example_name=None, name=None):
705  # pylint: disable=line-too-long
706  """Parses a single `SequenceExample` proto.
707
708  Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
709  proto given in `serialized`.
710
711  This op parses a serialized sequence example into a tuple of dictionaries,
712  each mapping keys to `Tensor` and `SparseTensor` objects.
713  The first dictionary contains mappings for keys appearing in
714  `context_features`, and the second dictionary contains mappings for keys
715  appearing in `sequence_features`.
716
717  At least one of `context_features` and `sequence_features` must be provided
718  and non-empty.
719
720  The `context_features` keys are associated with a `SequenceExample` as a
721  whole, independent of time / frame.  In contrast, the `sequence_features` keys
722  provide a way to access variable-length data within the `FeatureList` section
723  of the `SequenceExample` proto.  While the shapes of `context_features` values
724  are fixed with respect to frame, the frame dimension (the first dimension)
725  of `sequence_features` values may vary between `SequenceExample` protos,
726  and even between `feature_list` keys within the same `SequenceExample`.
727
728  `context_features` contains `VarLenFeature`, `RaggedFeature`, and
729  `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`;
730  each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature`
731  is mapped to a `Tensor`, of the specified type, shape, and default value.
732
733  `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and
734  `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a
735  `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each
736  `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
737  The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`,
738  where `T` is the length of the associated `FeatureList` in the
739  `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar
740  1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while
741  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor`
742  of static shape `[None, k]` and dynamic shape `[T, k]`.
743
744  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
745  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
746  entry and `index` is the value's index in the list of values associated with
747  that time.
748
749  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
750  entries with `allow_missing=True` are optional; otherwise, we will fail if
751  that `Feature` or `FeatureList` is missing from any example in `serialized`.
752
753  `example_name` may contain a descriptive name for the corresponding serialized
754  proto. This may be useful for debugging purposes, but it has no effect on the
755  output. If not `None`, `example_name` must be a scalar.
756
757  Note that the batch version of this function, `tf.parse_sequence_example`,
758  is written for better memory efficiency and will be faster on large
759  `SequenceExample`s.
760
761  Args:
762    serialized: A scalar (0-D Tensor) of type string, a single binary
763      serialized `SequenceExample` proto.
764    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
765      `VarLenFeature` or `RaggedFeature` values. These features are associated
766      with a `SequenceExample` as a whole.
767    sequence_features: A `dict` mapping feature keys to
768      `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values.
769      These features are associated with data within the `FeatureList` section
770      of the `SequenceExample` proto.
771    example_name: A scalar (0-D Tensor) of strings (optional), the name of
772      the serialized proto.
773    name: A name for this operation (optional).
774
775  Returns:
776    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s
777    and `RaggedTensor`s.
778
779    * The first dict contains the context key/values.
780    * The second dict contains the feature_list key/values.
781
782  Raises:
783    ValueError: if any feature is invalid.
784  """
785  # pylint: enable=line-too-long
786  if not (context_features or sequence_features):
787    raise ValueError("Missing features.")
788  context_params = _ParseOpParams.from_features(
789      context_features, [VarLenFeature, FixedLenFeature, RaggedFeature])
790  feature_list_params = _ParseOpParams.from_features(
791      sequence_features,
792      [VarLenFeature, FixedLenSequenceFeature, RaggedFeature])
793
794  with ops.name_scope(name, "ParseSingleSequenceExample",
795                      [serialized, example_name]):
796    context_output, feature_list_output = (
797        _parse_single_sequence_example_raw(serialized, context_params,
798                                           feature_list_params, example_name,
799                                           name))
800
801    if context_params.ragged_keys:
802      context_output = _construct_tensors_for_composite_features(
803          context_features, context_output)
804    if feature_list_params.ragged_keys:
805      feature_list_output = _construct_tensors_for_composite_features(
806          sequence_features, feature_list_output)
807
808    return context_output, feature_list_output
809
810
811def _parse_single_sequence_example_raw(serialized,
812                                       context,
813                                       feature_list,
814                                       debug_name,
815                                       name=None):
816  """Parses a single `SequenceExample` proto.
817
818  Args:
819    serialized: A scalar (0-D Tensor) of type string, a single binary serialized
820      `SequenceExample` proto.
821    context: A `ParseOpParams` containing the parameters for the parse op for
822      the context features.
823    feature_list: A `ParseOpParams` containing the parameters for the parse op
824      for the feature_list features.
825    debug_name: A scalar (0-D Tensor) of strings (optional), the name of the
826      serialized proto.
827    name: A name for this operation (optional).
828
829  Returns:
830    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
831    The first dict contains the context key/values.
832    The second dict contains the feature_list key/values.
833
834  Raises:
835    TypeError: if feature_list.dense_defaults is not either None or a dict.
836  """
837  with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]):
838    serialized = ops.convert_to_tensor(serialized, name="serialized")
839    serialized = _assert_scalar(serialized, "serialized")
840  return _parse_sequence_example_raw(serialized, debug_name, context,
841                                     feature_list, name)[:2]
842
843
844@tf_export("io.decode_raw", v1=[])
845@dispatch.add_dispatch_support
846def decode_raw(input_bytes,
847               out_type,
848               little_endian=True,
849               fixed_length=None,
850               name=None):
851  """Convert raw byte strings into tensors.
852
853  Args:
854    input_bytes:
855      Each element of the input Tensor is converted to an array of bytes.
856    out_type:
857      `DType` of the output. Acceptable types are `half`, `float`, `double`,
858      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
859    little_endian:
860      Whether the `input_bytes` data is in little-endian format. Data will be
861      converted into host byte order if necessary.
862    fixed_length:
863      If set, the first `fixed_length` bytes of each element will be converted.
864      Data will be zero-padded or truncated to the specified length.
865
866      `fixed_length` must be a multiple of the size of `out_type`.
867      `fixed_length` must be specified if the elements of `input_bytes` are of
868      variable length.
869    name: A name for the operation (optional).
870
871  Returns:
872    A `Tensor` object storing the decoded bytes.
873
874  """
875  if fixed_length is not None:
876    return gen_parsing_ops.decode_padded_raw(
877        input_bytes,
878        fixed_length=fixed_length,
879        out_type=out_type,
880        little_endian=little_endian,
881        name=name)
882  else:
883    return gen_parsing_ops.decode_raw(
884        input_bytes, out_type, little_endian=little_endian, name=name)
885
886
887@tf_export(v1=["decode_raw", "io.decode_raw"])
888@dispatch.add_dispatch_support
889@deprecation.deprecated_args(None,
890                             "bytes is deprecated, use input_bytes instead",
891                             "bytes")
892def decode_raw_v1(
893    input_bytes=None,
894    out_type=None,
895    little_endian=True,
896    name=None,
897    bytes=None  # pylint: disable=redefined-builtin
898):
899  """Convert raw byte strings into tensors.
900
901  Args:
902    input_bytes:
903      Each element of the input Tensor is converted to an array of bytes.
904    out_type:
905      `DType` of the output. Acceptable types are `half`, `float`, `double`,
906      `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`.
907    little_endian:
908      Whether the `input_bytes` data is in little-endian format. Data will be
909      converted into host byte order if necessary.
910    name: A name for the operation (optional).
911    bytes: Deprecated parameter. Use `input_bytes` instead.
912
913  Returns:
914    A `Tensor` object storing the decoded bytes.
915  """
916  input_bytes = deprecation.deprecated_argument_lookup("input_bytes",
917                                                       input_bytes, "bytes",
918                                                       bytes)
919
920  # out_type is a required positional argument in the original API, and had to
921  # be changed to a keyword argument in order to facilitate the transition from
922  # the reserved named `bytes` to `input_bytes`. Ensure it's still set.
923  if out_type is None:
924    raise ValueError(
925        "decode_raw_v1() missing 1 positional argument: 'out_type'")
926
927  return gen_parsing_ops.decode_raw(
928      input_bytes, out_type, little_endian=little_endian, name=name)
929
930
931# Swap `name` and `na_value` for backward compatibility.
932@tf_export(v1=["io.decode_csv", "decode_csv"])
933@dispatch.add_dispatch_support
934@deprecation.deprecated_endpoints("decode_csv")
935def decode_csv(records,
936               record_defaults,
937               field_delim=",",
938               use_quote_delim=True,
939               name=None,
940               na_value="",
941               select_cols=None):
942  """Convert CSV records to tensors. Each column maps to one tensor.
943
944  RFC 4180 format is expected for the CSV records.
945  (https://tools.ietf.org/html/rfc4180)
946  Note that we allow leading and trailing spaces with int or float field.
947
948  Args:
949    records: A `Tensor` of type `string`.
950      Each string is a record/row in the csv and all records should have
951      the same format.
952    record_defaults: A list of `Tensor` objects with specific types.
953      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
954      One tensor per column of the input record, with either a
955      scalar default value for that column or an empty vector if the column is
956      required.
957    field_delim: An optional `string`. Defaults to `","`.
958      char delimiter to separate fields in a record.
959    use_quote_delim: An optional `bool`. Defaults to `True`.
960      If false, treats double quotation marks as regular
961      characters inside of the string fields (ignoring RFC 4180, Section 2,
962      Bullet 5).
963    name: A name for the operation (optional).
964    na_value: Additional string to recognize as NA/NaN.
965    select_cols: Optional sorted list of column indices to select. If specified,
966      only this subset of columns will be parsed and returned.
967
968  Returns:
969    A list of `Tensor` objects. Has the same type as `record_defaults`.
970    Each tensor will have the same shape as records.
971
972  Raises:
973    ValueError: If any of the arguments is malformed.
974  """
975  return decode_csv_v2(
976      records, record_defaults,
977      field_delim, use_quote_delim,
978      na_value, select_cols, name
979      )
980
981
982@tf_export("io.decode_csv", v1=[])
983@dispatch.add_dispatch_support
984def decode_csv_v2(records,
985                  record_defaults,
986                  field_delim=",",
987                  use_quote_delim=True,
988                  na_value="",
989                  select_cols=None,
990                  name=None):
991  """Convert CSV records to tensors. Each column maps to one tensor.
992
993  RFC 4180 format is expected for the CSV records.
994  (https://tools.ietf.org/html/rfc4180)
995  Note that we allow leading and trailing spaces with int or float field.
996
997  Args:
998    records: A `Tensor` of type `string`.
999      Each string is a record/row in the csv and all records should have
1000      the same format.
1001    record_defaults: A list of `Tensor` objects with specific types.
1002      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
1003      One tensor per column of the input record, with either a
1004      scalar default value for that column or an empty vector if the column is
1005      required.
1006    field_delim: An optional `string`. Defaults to `","`.
1007      char delimiter to separate fields in a record.
1008    use_quote_delim: An optional `bool`. Defaults to `True`.
1009      If false, treats double quotation marks as regular
1010      characters inside of the string fields (ignoring RFC 4180, Section 2,
1011      Bullet 5).
1012    na_value: Additional string to recognize as NA/NaN.
1013    select_cols: Optional sorted list of column indices to select. If specified,
1014      only this subset of columns will be parsed and returned.
1015    name: A name for the operation (optional).
1016
1017  Returns:
1018    A list of `Tensor` objects. Has the same type as `record_defaults`.
1019    Each tensor will have the same shape as records.
1020
1021  Raises:
1022    ValueError: If any of the arguments is malformed.
1023  """
1024  if select_cols is not None and any(select_cols[i] >= select_cols[i + 1]
1025                                     for i in range(len(select_cols) - 1)):
1026    raise ValueError("select_cols is not strictly increasing.")
1027  if select_cols is not None and select_cols[0] < 0:
1028    raise ValueError("select_cols contains negative values.")
1029  if select_cols is not None and len(select_cols) != len(record_defaults):
1030    raise ValueError("Length of select_cols and record_defaults do not match.")
1031  return gen_parsing_ops.decode_csv(
1032      records=records,
1033      record_defaults=record_defaults,
1034      field_delim=field_delim,
1035      use_quote_delim=use_quote_delim,
1036      na_value=na_value,
1037      name=name,
1038      select_cols=select_cols,
1039  )
1040
1041
1042def _assert_scalar(value, name):
1043  """Asserts that `value` is scalar, and returns `value`."""
1044  value_rank = value.shape.rank
1045  if value_rank is None:
1046    check = control_flow_ops.Assert(
1047        math_ops.equal(array_ops.rank(value), 0),
1048        ["Input %s must be a scalar" % name],
1049        name="%sIsScalar" % name.capitalize())
1050    result = control_flow_ops.with_dependencies([check],
1051                                                value,
1052                                                name="%sDependencies" % name)
1053    result.set_shape([])
1054    return result
1055  elif value_rank == 0:
1056    return value
1057  else:
1058    raise ValueError("Input %s must be a scalar" % name)
1059
1060
1061@tf_export("io.decode_json_example",
1062           v1=["decode_json_example", "io.decode_json_example"])
1063def decode_json_example(json_examples, name=None):
1064  r"""Convert JSON-encoded Example records to binary protocol buffer strings.
1065
1066  Note: This is **not** a general purpose JSON parsing op.
1067
1068  This op converts JSON-serialized `tf.train.Example` (maybe created with
1069  `json_format.MessageToJson`, following the
1070  [standard JSON mapping](
1071  https://developers.google.com/protocol-buffers/docs/proto3#json))
1072  to a binary-serialized `tf.train.Example` (equivalent to
1073  `Example.SerializeToString()`) suitable for conversion to tensors with
1074  `tf.io.parse_example`.
1075
1076  Here is a `tf.train.Example` proto:
1077
1078  >>> example = tf.train.Example(
1079  ...   features=tf.train.Features(
1080  ...       feature={
1081  ...           "a": tf.train.Feature(
1082  ...               int64_list=tf.train.Int64List(
1083  ...                   value=[1, 1, 3]))}))
1084
1085  Here it is converted to JSON:
1086
1087  >>> from google.protobuf import json_format
1088  >>> example_json = json_format.MessageToJson(example)
1089  >>> print(example_json)
1090  {
1091    "features": {
1092      "feature": {
1093        "a": {
1094          "int64List": {
1095            "value": [
1096              "1",
1097              "1",
1098              "3"
1099            ]
1100          }
1101        }
1102      }
1103    }
1104  }
1105
1106  This op converts the above json string to a binary proto:
1107
1108  >>> example_binary = tf.io.decode_json_example(example_json)
1109  >>> example_binary.numpy()
1110  b'\n\x0f\n\r\n\x01a\x12\x08\x1a\x06\x08\x01\x08\x01\x08\x03'
1111
1112  The OP works on string tensors of andy shape:
1113
1114  >>> tf.io.decode_json_example([
1115  ...     [example_json, example_json],
1116  ...     [example_json, example_json]]).shape.as_list()
1117  [2, 2]
1118
1119  This resulting binary-string is equivalent to `Example.SerializeToString()`,
1120  and can be converted to Tensors using `tf.io.parse_example` and related
1121  functions:
1122
1123  >>> tf.io.parse_example(
1124  ...   serialized=[example_binary.numpy(),
1125  ...              example.SerializeToString()],
1126  ...   features = {'a': tf.io.FixedLenFeature(shape=[3], dtype=tf.int64)})
1127  {'a': <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
1128   array([[1, 1, 3],
1129          [1, 1, 3]])>}
1130
1131  Args:
1132    json_examples: A string tensor containing json-serialized `tf.Example`
1133      protos.
1134    name: A name for the op.
1135
1136  Returns:
1137    A string Tensor containing the binary-serialized `tf.Example` protos.
1138
1139  Raises:
1140     `tf.errors.InvalidArgumentError`: If the JSON could not be converted to a
1141     `tf.Example`
1142  """
1143  return gen_parsing_ops.decode_json_example(json_examples, name=name)
1144