1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""Parsing Ops."""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import collections
22import re
23
24from tensorflow.python.framework import constant_op
25from tensorflow.python.framework import dtypes
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import sparse_tensor
28from tensorflow.python.framework import tensor_shape
29from tensorflow.python.ops import array_ops
30from tensorflow.python.ops import control_flow_ops
31from tensorflow.python.ops import gen_parsing_ops
32from tensorflow.python.ops import math_ops
33from tensorflow.python.ops import sparse_ops
34# go/tf-wildcard-import
35# pylint: disable=wildcard-import,undefined-variable
36from tensorflow.python.ops.gen_parsing_ops import *
37# pylint: enable=wildcard-import,undefined-variable
38from tensorflow.python.platform import tf_logging
39from tensorflow.python.util import deprecation
40from tensorflow.python.util.tf_export import tf_export
41
42
43ops.NotDifferentiable("DecodeRaw")
44ops.NotDifferentiable("ParseTensor")
45ops.NotDifferentiable("SerializeTensor")
46ops.NotDifferentiable("StringToNumber")
47
48
49@tf_export("io.VarLenFeature", v1=["VarLenFeature", "io.VarLenFeature"])
50class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
51  """Configuration for parsing a variable-length input feature.
52
53  Fields:
54    dtype: Data type of input.
55  """
56  pass
57
58
59@tf_export("io.SparseFeature", v1=["io.SparseFeature", "SparseFeature"])
60class SparseFeature(
61    collections.namedtuple(
62        "SparseFeature",
63        ["index_key", "value_key", "dtype", "size", "already_sorted"])):
64  """Configuration for parsing a sparse input feature from an `Example`.
65
66  Note, preferably use `VarLenFeature` (possibly in combination with a
67  `SequenceExample`) in order to parse out `SparseTensor`s instead of
68  `SparseFeature` due to its simplicity.
69
70  Closely mimicking the `SparseTensor` that will be obtained by parsing an
71  `Example` with a `SparseFeature` config, a `SparseFeature` contains a
72
73  * `value_key`: The name of key for a `Feature` in the `Example` whose parsed
74    `Tensor` will be the resulting `SparseTensor.values`.
75
76  * `index_key`: A list of names - one for each dimension in the resulting
77    `SparseTensor` whose `indices[i][dim]` indicating the position of
78    the `i`-th value in the `dim` dimension will be equal to the `i`-th value in
79    the Feature with key named `index_key[dim]` in the `Example`.
80
81  * `size`: A list of ints for the resulting `SparseTensor.dense_shape`.
82
83  For example, we can represent the following 2D `SparseTensor`
84
85  ```python
86  SparseTensor(indices=[[3, 1], [20, 0]],
87               values=[0.5, -1.0]
88               dense_shape=[100, 3])
89  ```
90
91  with an `Example` input proto
92
93  ```python
94  features {
95    feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
96    feature { key: "ix0" value { int64_list { value: [ 3, 20 ] } } }
97    feature { key: "ix1" value { int64_list { value: [ 1, 0 ] } } }
98  }
99  ```
100
101  and `SparseFeature` config with 2 `index_key`s
102
103  ```python
104  SparseFeature(index_key=["ix0", "ix1"],
105                value_key="val",
106                dtype=tf.float32,
107                size=[100, 3])
108  ```
109
110  Fields:
111    index_key: A single string name or a list of string names of index features.
112      For each key the underlying feature's type must be `int64` and its length
113      must always match that of the `value_key` feature.
114      To represent `SparseTensor`s with a `dense_shape` of `rank` higher than 1
115      a list of length `rank` should be used.
116    value_key: Name of value feature.  The underlying feature's type must
117      be `dtype` and its length must always match that of all the `index_key`s'
118      features.
119    dtype: Data type of the `value_key` feature.
120    size: A Python int or list thereof specifying the dense shape. Should be a
121      list if and only if `index_key` is a list. In that case the list must be
122      equal to the length of `index_key`. Each for each entry `i` all values in
123      the `index_key`[i] feature must be in `[0, size[i])`.
124    already_sorted: A Python boolean to specify whether the values in
125      `value_key` are already sorted by their index position. If so skip
126      sorting. False by default (optional).
127  """
128
129  def __new__(cls, index_key, value_key, dtype, size, already_sorted=False):
130    return super(SparseFeature, cls).__new__(
131        cls, index_key, value_key, dtype, size, already_sorted)
132
133
134@tf_export("io.FixedLenFeature", v1=["io.FixedLenFeature", "FixedLenFeature"])
135class FixedLenFeature(collections.namedtuple(
136    "FixedLenFeature", ["shape", "dtype", "default_value"])):
137  """Configuration for parsing a fixed-length input feature.
138
139  To treat sparse input as dense, provide a `default_value`; otherwise,
140  the parse functions will fail on any examples missing this feature.
141
142  Fields:
143    shape: Shape of input data.
144    dtype: Data type of input.
145    default_value: Value to be used if an example is missing this feature. It
146        must be compatible with `dtype` and of the specified `shape`.
147  """
148
149  def __new__(cls, shape, dtype, default_value=None):
150    return super(FixedLenFeature, cls).__new__(
151        cls, shape, dtype, default_value)
152
153
154@tf_export("io.FixedLenSequenceFeature",
155           v1=["io.FixedLenSequenceFeature", "FixedLenSequenceFeature"])
156class FixedLenSequenceFeature(collections.namedtuple(
157    "FixedLenSequenceFeature",
158    ["shape", "dtype", "allow_missing", "default_value"])):
159  """Configuration for parsing a variable-length input feature into a `Tensor`.
160
161  The resulting `Tensor` of parsing a single `SequenceExample` or `Example` has
162  a static `shape` of `[None] + shape` and the specified `dtype`.
163  The resulting `Tensor` of parsing a `batch_size` many `Example`s has
164  a static `shape` of `[batch_size, None] + shape` and the specified `dtype`.
165  The entries in the `batch` from different `Examples` will be padded with
166  `default_value` to the maximum length present in the `batch`.
167
168  To treat a sparse input as dense, provide `allow_missing=True`; otherwise,
169  the parse functions will fail on any examples missing this feature.
170
171  Fields:
172    shape: Shape of input data for dimension 2 and higher. First dimension is
173      of variable length `None`.
174    dtype: Data type of input.
175    allow_missing: Whether to allow this feature to be missing from a feature
176      list item. Is available only for parsing `SequenceExample` not for
177      parsing `Examples`.
178    default_value: Scalar value to be used to pad multiple `Example`s to their
179      maximum length. Irrelevant for parsing a single `Example` or
180      `SequenceExample`. Defaults to "" for dtype string and 0 otherwise
181      (optional).
182  """
183
184  def __new__(cls, shape, dtype, allow_missing=False, default_value=None):
185    return super(FixedLenSequenceFeature, cls).__new__(
186        cls, shape, dtype, allow_missing, default_value)
187
188
189def _features_to_raw_params(features, types):
190  """Split feature tuples into raw params used by `gen_parsing_ops`.
191
192  Args:
193    features: A `dict` mapping feature keys to objects of a type in `types`.
194    types: Type of features to allow, among `FixedLenFeature`, `VarLenFeature`,
195      `SparseFeature`, and `FixedLenSequenceFeature`.
196
197  Returns:
198    Tuple of `sparse_keys`, `sparse_types`, `dense_keys`, `dense_types`,
199      `dense_defaults`, `dense_shapes`.
200
201  Raises:
202    ValueError: if `features` contains an item not in `types`, or an invalid
203        feature.
204  """
205  sparse_keys = []
206  sparse_types = []
207  dense_keys = []
208  dense_types = []
209  # When the graph is built twice, multiple dense_defaults in a normal dict
210  # could come out in different orders. This will fail the _e2e_test which
211  # expects exactly the same graph.
212  # OrderedDict which preserves the order can solve the problem.
213  dense_defaults = collections.OrderedDict()
214  dense_shapes = []
215  if features:
216    # NOTE: We iterate over sorted keys to keep things deterministic.
217    for key in sorted(features.keys()):
218      feature = features[key]
219      if isinstance(feature, VarLenFeature):
220        if VarLenFeature not in types:
221          raise ValueError("Unsupported VarLenFeature %s." % (feature,))
222        if not feature.dtype:
223          raise ValueError("Missing type for feature %s." % key)
224        sparse_keys.append(key)
225        sparse_types.append(feature.dtype)
226      elif isinstance(feature, SparseFeature):
227        if SparseFeature not in types:
228          raise ValueError("Unsupported SparseFeature %s." % (feature,))
229
230        if not feature.index_key:
231          raise ValueError(
232              "Missing index_key for SparseFeature %s." % (feature,))
233        if not feature.value_key:
234          raise ValueError(
235              "Missing value_key for SparseFeature %s." % (feature,))
236        if not feature.dtype:
237          raise ValueError("Missing type for feature %s." % key)
238        index_keys = feature.index_key
239        if isinstance(index_keys, str):
240          index_keys = [index_keys]
241        elif len(index_keys) > 1:
242          tf_logging.warning("SparseFeature is a complicated feature config "
243                             "and should only be used after careful "
244                             "consideration of VarLenFeature.")
245        for index_key in sorted(index_keys):
246          if index_key in sparse_keys:
247            dtype = sparse_types[sparse_keys.index(index_key)]
248            if dtype != dtypes.int64:
249              raise ValueError("Conflicting type %s vs int64 for feature %s." %
250                               (dtype, index_key))
251          else:
252            sparse_keys.append(index_key)
253            sparse_types.append(dtypes.int64)
254        if feature.value_key in sparse_keys:
255          dtype = sparse_types[sparse_keys.index(feature.value_key)]
256          if dtype != feature.dtype:
257            raise ValueError("Conflicting type %s vs %s for feature %s." % (
258                dtype, feature.dtype, feature.value_key))
259        else:
260          sparse_keys.append(feature.value_key)
261          sparse_types.append(feature.dtype)
262      elif isinstance(feature, FixedLenFeature):
263        if FixedLenFeature not in types:
264          raise ValueError("Unsupported FixedLenFeature %s." % (feature,))
265        if not feature.dtype:
266          raise ValueError("Missing type for feature %s." % key)
267        if feature.shape is None:
268          raise ValueError("Missing shape for feature %s." % key)
269        feature_tensor_shape = tensor_shape.as_shape(feature.shape)
270        if (feature.shape and feature_tensor_shape.ndims and
271            feature_tensor_shape.dims[0].value is None):
272          raise ValueError("First dimension of shape for feature %s unknown. "
273                           "Consider using FixedLenSequenceFeature." % key)
274        if (feature.shape is not None and
275            not feature_tensor_shape.is_fully_defined()):
276          raise ValueError("All dimensions of shape for feature %s need to be "
277                           "known but received %s." % (key, str(feature.shape)))
278        dense_keys.append(key)
279        dense_shapes.append(feature.shape)
280        dense_types.append(feature.dtype)
281        if feature.default_value is not None:
282          dense_defaults[key] = feature.default_value
283      elif isinstance(feature, FixedLenSequenceFeature):
284        if FixedLenSequenceFeature not in types:
285          raise ValueError("Unsupported FixedLenSequenceFeature %s." % (
286              feature,))
287        if not feature.dtype:
288          raise ValueError("Missing type for feature %s." % key)
289        if feature.shape is None:
290          raise ValueError("Missing shape for feature %s." % key)
291        dense_keys.append(key)
292        dense_shapes.append(feature.shape)
293        dense_types.append(feature.dtype)
294        if feature.allow_missing:
295          dense_defaults[key] = None
296        if feature.default_value is not None:
297          dense_defaults[key] = feature.default_value
298      else:
299        raise ValueError("Invalid feature %s:%s." % (key, feature))
300  return (
301      sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
302      dense_shapes)
303
304
305def _construct_sparse_tensors_for_sparse_features(features, tensor_dict):
306  """Merges SparseTensors of indices and values of SparseFeatures.
307
308  Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values
309  of `features` expects their `index_key`s and `index_value`s to be present in
310  `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor`
311  from them, and adds it to the result with the key from `features`.
312  Copies other keys and values from `tensor_dict` with keys present in
313  `features`.
314
315  Args:
316    features: A `dict` mapping feature keys to `SparseFeature` values.
317      Values of other types will be ignored.
318    tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor`
319      values. Expected to contain keys of the `SparseFeature`s' `index_key`s and
320      `value_key`s and mapping them to `SparseTensor`s.
321  Returns:
322    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar
323    to `tensor_dict` except each `SparseFeature`s in `features` results in a
324    single `SparseTensor`.
325  """
326  tensor_dict = dict(tensor_dict)  # Do not modify argument passed in.
327  # Construct SparseTensors for SparseFeatures.
328  for key in sorted(features.keys()):
329    feature = features[key]
330    if isinstance(feature, SparseFeature):
331      if isinstance(feature.index_key, str):
332        sp_ids = tensor_dict[feature.index_key]
333      else:
334        sp_ids = [tensor_dict[index_key] for index_key in feature.index_key]
335      sp_values = tensor_dict[feature.value_key]
336      tensor_dict[key] = sparse_ops.sparse_merge(
337          sp_ids,
338          sp_values,
339          vocab_size=feature.size,
340          already_sorted=feature.already_sorted)
341  # Remove tensors from dictionary that were only used to construct
342  # SparseTensors for SparseFeature.
343  for key in set(tensor_dict) - set(features):
344    del tensor_dict[key]
345  return tensor_dict
346
347
348def _prepend_none_dimension(features):
349  if features:
350    modified_features = dict(features)  # Create a copy to modify
351    for key, feature in features.items():
352      if isinstance(feature, FixedLenSequenceFeature):
353        if not feature.allow_missing:
354          raise ValueError("Unsupported: FixedLenSequenceFeature requires "
355                           "allow_missing to be True.")
356        modified_features[key] = FixedLenSequenceFeature(
357            [None] + list(feature.shape),
358            feature.dtype,
359            feature.allow_missing,
360            feature.default_value)
361    return modified_features
362  else:
363    return features
364
365
366@tf_export(v1=["io.parse_example", "parse_example"])
367def parse_example(serialized, features, name=None, example_names=None):
368  # pylint: disable=line-too-long
369  """Parses `Example` protos into a `dict` of tensors.
370
371  Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
372  protos given in `serialized`. We refer to `serialized` as a batch with
373  `batch_size` many entries of individual `Example` protos.
374
375  `example_names` may contain descriptive names for the corresponding serialized
376  protos. These may be useful for debugging purposes, but they have no effect on
377  the output. If not `None`, `example_names` must be the same length as
378  `serialized`.
379
380  This op parses serialized examples into a dictionary mapping keys to `Tensor`
381  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
382  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
383  and `SparseFeature` is mapped to a `SparseTensor`, and each
384  `FixedLenFeature` is mapped to a `Tensor`.
385
386  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
387  representing a ragged matrix. Its indices are `[batch, index]` where `batch`
388  identifies the example in `serialized`, and `index` is the value's index in
389  the list of values associated with that feature and example.
390
391  Each `SparseFeature` maps to a `SparseTensor` of the specified type
392  representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
393  Its `values` come from the feature in the examples with key `value_key`.
394  A `values[i]` comes from a position `k` in the feature of an example at batch
395  entry `batch`. This positional information is recorded in `indices[i]` as
396  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
397  the feature in the example at with key `SparseFeature.index_key[j]`.
398  In other words, we split the indices (except the first index indicating the
399  batch entry) of a `SparseTensor` by dimension into different features of the
400  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
401  `SparseFeature` whenever possible.
402
403  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
404  `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
405
406  `FixedLenFeature` entries with a `default_value` are optional. With no default
407  value, we will fail if that `Feature` is missing from any example in
408  `serialized`.
409
410  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
411  (or `tf.float32` if not specified) and shape
412  `(serialized.size(), None) + df.shape`.
413  All examples in `serialized` will be padded with `default_value` along the
414  second dimension.
415
416  Examples:
417
418  For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
419  serialized `Example`s are provided:
420
421  ```
422  serialized = [
423    features
424      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
425    features
426      { feature []},
427    features
428      { feature { key: "ft" value { float_list { value: [3.0] } } }
429  ]
430  ```
431
432  then the output will look like:
433
434  ```python
435  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
436                      values=[1.0, 2.0, 3.0],
437                      dense_shape=(3, 2)) }
438  ```
439
440  If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
441  `shape=[]` is used then the output will look like:
442
443  ```python
444  {"ft": [[1.0, 2.0], [3.0, -1.0]]}
445  ```
446
447  Given two `Example` input protos in `serialized`:
448
449  ```
450  [
451    features {
452      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
453      feature { key: "gps" value { float_list { value: [] } } }
454    },
455    features {
456      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
457      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
458      feature { key: "gps" value { } }
459    }
460  ]
461  ```
462
463  And arguments
464
465  ```
466  example_names: ["input0", "input1"],
467  features: {
468      "kw": VarLenFeature(tf.string),
469      "dank": VarLenFeature(tf.int64),
470      "gps": VarLenFeature(tf.float32),
471  }
472  ```
473
474  Then the output is a dictionary:
475
476  ```python
477  {
478    "kw": SparseTensor(
479        indices=[[0, 0], [0, 1], [1, 0]],
480        values=["knit", "big", "emmy"]
481        dense_shape=[2, 2]),
482    "dank": SparseTensor(
483        indices=[[1, 0]],
484        values=[42],
485        dense_shape=[2, 1]),
486    "gps": SparseTensor(
487        indices=[],
488        values=[],
489        dense_shape=[2, 0]),
490  }
491  ```
492
493  For dense results in two serialized `Example`s:
494
495  ```
496  [
497    features {
498      feature { key: "age" value { int64_list { value: [ 0 ] } } }
499      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
500     },
501     features {
502      feature { key: "age" value { int64_list { value: [] } } }
503      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
504    }
505  ]
506  ```
507
508  We can use arguments:
509
510  ```
511  example_names: ["input0", "input1"],
512  features: {
513      "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
514      "gender": FixedLenFeature([], dtype=tf.string),
515  }
516  ```
517
518  And the expected output is:
519
520  ```python
521  {
522    "age": [[0], [-1]],
523    "gender": [["f"], ["f"]],
524  }
525  ```
526
527  An alternative to `VarLenFeature` to obtain a `SparseTensor` is
528  `SparseFeature`. For example, given two `Example` input protos in
529  `serialized`:
530
531  ```
532  [
533    features {
534      feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
535      feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
536    },
537    features {
538      feature { key: "val" value { float_list { value: [ 0.0 ] } } }
539      feature { key: "ix" value { int64_list { value: [ 42 ] } } }
540    }
541  ]
542  ```
543
544  And arguments
545
546  ```
547  example_names: ["input0", "input1"],
548  features: {
549      "sparse": SparseFeature(
550          index_key="ix", value_key="val", dtype=tf.float32, size=100),
551  }
552  ```
553
554  Then the output is a dictionary:
555
556  ```python
557  {
558    "sparse": SparseTensor(
559        indices=[[0, 3], [0, 20], [1, 42]],
560        values=[0.5, -1.0, 0.0]
561        dense_shape=[2, 100]),
562  }
563  ```
564
565  Args:
566    serialized: A vector (1-D Tensor) of strings, a batch of binary
567      serialized `Example` protos.
568    features: A `dict` mapping feature keys to `FixedLenFeature`,
569      `VarLenFeature`, and `SparseFeature` values.
570    name: A name for this operation (optional).
571    example_names: A vector (1-D Tensor) of strings (optional), the names of
572      the serialized protos in the batch.
573
574  Returns:
575    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
576
577  Raises:
578    ValueError: if any feature is invalid.
579  """
580  return parse_example_v2(serialized, features, example_names, name)
581
582
583@tf_export("io.parse_example", v1=[])
584def parse_example_v2(serialized, features, example_names=None, name=None):
585  # pylint: disable=line-too-long
586  """Parses `Example` protos into a `dict` of tensors.
587
588  Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
589  protos given in `serialized`. We refer to `serialized` as a batch with
590  `batch_size` many entries of individual `Example` protos.
591
592  `example_names` may contain descriptive names for the corresponding serialized
593  protos. These may be useful for debugging purposes, but they have no effect on
594  the output. If not `None`, `example_names` must be the same length as
595  `serialized`.
596
597  This op parses serialized examples into a dictionary mapping keys to `Tensor`
598  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
599  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
600  and `SparseFeature` is mapped to a `SparseTensor`, and each
601  `FixedLenFeature` is mapped to a `Tensor`.
602
603  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
604  representing a ragged matrix. Its indices are `[batch, index]` where `batch`
605  identifies the example in `serialized`, and `index` is the value's index in
606  the list of values associated with that feature and example.
607
608  Each `SparseFeature` maps to a `SparseTensor` of the specified type
609  representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
610  Its `values` come from the feature in the examples with key `value_key`.
611  A `values[i]` comes from a position `k` in the feature of an example at batch
612  entry `batch`. This positional information is recorded in `indices[i]` as
613  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
614  the feature in the example at with key `SparseFeature.index_key[j]`.
615  In other words, we split the indices (except the first index indicating the
616  batch entry) of a `SparseTensor` by dimension into different features of the
617  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
618  `SparseFeature` whenever possible.
619
620  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
621  `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
622
623  `FixedLenFeature` entries with a `default_value` are optional. With no default
624  value, we will fail if that `Feature` is missing from any example in
625  `serialized`.
626
627  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
628  (or `tf.float32` if not specified) and shape
629  `(serialized.size(), None) + df.shape`.
630  All examples in `serialized` will be padded with `default_value` along the
631  second dimension.
632
633  Examples:
634
635  For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
636  serialized `Example`s are provided:
637
638  ```
639  serialized = [
640    features
641      { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
642    features
643      { feature []},
644    features
645      { feature { key: "ft" value { float_list { value: [3.0] } } }
646  ]
647  ```
648
649  then the output will look like:
650
651  ```python
652  {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
653                      values=[1.0, 2.0, 3.0],
654                      dense_shape=(3, 2)) }
655  ```
656
657  If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
658  `shape=[]` is used then the output will look like:
659
660  ```python
661  {"ft": [[1.0, 2.0], [3.0, -1.0]]}
662  ```
663
664  Given two `Example` input protos in `serialized`:
665
666  ```
667  [
668    features {
669      feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
670      feature { key: "gps" value { float_list { value: [] } } }
671    },
672    features {
673      feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
674      feature { key: "dank" value { int64_list { value: [ 42 ] } } }
675      feature { key: "gps" value { } }
676    }
677  ]
678  ```
679
680  And arguments
681
682  ```
683  example_names: ["input0", "input1"],
684  features: {
685      "kw": VarLenFeature(tf.string),
686      "dank": VarLenFeature(tf.int64),
687      "gps": VarLenFeature(tf.float32),
688  }
689  ```
690
691  Then the output is a dictionary:
692
693  ```python
694  {
695    "kw": SparseTensor(
696        indices=[[0, 0], [0, 1], [1, 0]],
697        values=["knit", "big", "emmy"]
698        dense_shape=[2, 2]),
699    "dank": SparseTensor(
700        indices=[[1, 0]],
701        values=[42],
702        dense_shape=[2, 1]),
703    "gps": SparseTensor(
704        indices=[],
705        values=[],
706        dense_shape=[2, 0]),
707  }
708  ```
709
710  For dense results in two serialized `Example`s:
711
712  ```
713  [
714    features {
715      feature { key: "age" value { int64_list { value: [ 0 ] } } }
716      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
717     },
718     features {
719      feature { key: "age" value { int64_list { value: [] } } }
720      feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
721    }
722  ]
723  ```
724
725  We can use arguments:
726
727  ```
728  example_names: ["input0", "input1"],
729  features: {
730      "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
731      "gender": FixedLenFeature([], dtype=tf.string),
732  }
733  ```
734
735  And the expected output is:
736
737  ```python
738  {
739    "age": [[0], [-1]],
740    "gender": [["f"], ["f"]],
741  }
742  ```
743
744  An alternative to `VarLenFeature` to obtain a `SparseTensor` is
745  `SparseFeature`. For example, given two `Example` input protos in
746  `serialized`:
747
748  ```
749  [
750    features {
751      feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
752      feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
753    },
754    features {
755      feature { key: "val" value { float_list { value: [ 0.0 ] } } }
756      feature { key: "ix" value { int64_list { value: [ 42 ] } } }
757    }
758  ]
759  ```
760
761  And arguments
762
763  ```
764  example_names: ["input0", "input1"],
765  features: {
766      "sparse": SparseFeature(
767          index_key="ix", value_key="val", dtype=tf.float32, size=100),
768  }
769  ```
770
771  Then the output is a dictionary:
772
773  ```python
774  {
775    "sparse": SparseTensor(
776        indices=[[0, 3], [0, 20], [1, 42]],
777        values=[0.5, -1.0, 0.0]
778        dense_shape=[2, 100]),
779  }
780  ```
781
782  Args:
783    serialized: A vector (1-D Tensor) of strings, a batch of binary
784      serialized `Example` protos.
785    features: A `dict` mapping feature keys to `FixedLenFeature`,
786      `VarLenFeature`, and `SparseFeature` values.
787    example_names: A vector (1-D Tensor) of strings (optional), the names of
788      the serialized protos in the batch.
789    name: A name for this operation (optional).
790
791  Returns:
792    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
793
794  Raises:
795    ValueError: if any feature is invalid.
796  """
797  if not features:
798    raise ValueError("Missing: features was %s." % features)
799  features = _prepend_none_dimension(features)
800  (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
801   dense_shapes) = _features_to_raw_params(
802       features,
803       [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
804  outputs = _parse_example_raw(
805      serialized, example_names, sparse_keys, sparse_types, dense_keys,
806      dense_types, dense_defaults, dense_shapes, name)
807  return _construct_sparse_tensors_for_sparse_features(features, outputs)
808
809
810def _parse_example_raw(serialized,
811                       names=None,
812                       sparse_keys=None,
813                       sparse_types=None,
814                       dense_keys=None,
815                       dense_types=None,
816                       dense_defaults=None,
817                       dense_shapes=None,
818                       name=None):
819  """Parses `Example` protos.
820
821  Args:
822    serialized: A vector (1-D Tensor) of strings, a batch of binary
823      serialized `Example` protos.
824    names: A vector (1-D Tensor) of strings (optional), the names of
825      the serialized protos.
826    sparse_keys: A list of string keys in the examples' features.
827      The results for these keys will be returned as `SparseTensor` objects.
828    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
829      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
830      and `tf.string` (`BytesList`) are supported.
831    dense_keys: A list of string keys in the examples' features.
832      The results for these keys will be returned as `Tensor`s
833    dense_types: A list of DTypes of the same length as `dense_keys`.
834      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
835      and `tf.string` (`BytesList`) are supported.
836    dense_defaults: A dict mapping string keys to `Tensor`s.
837      The keys of the dict must match the dense_keys of the feature.
838    dense_shapes: A list of tuples with the same length as `dense_keys`.
839      The shape of the data for each dense feature referenced by `dense_keys`.
840      Required for any input tensors identified by `dense_keys`.  Must be
841      either fully defined, or may contain an unknown first dimension.
842      An unknown first dimension means the feature is treated as having
843      a variable number of blocks, and the output shape along this dimension
844      is considered unknown at graph build time.  Padding is applied for
845      minibatch elements smaller than the maximum number of blocks for the
846      given feature along this dimension.
847    name: A name for this operation (optional).
848
849  Returns:
850    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.
851
852  """
853  with ops.name_scope(name, "ParseExample", [serialized, names]):
854    (names, dense_defaults_vec, sparse_keys, sparse_types,
855     dense_keys, dense_shapes, _) = _process_raw_parameters(
856         names, dense_defaults, sparse_keys, sparse_types, dense_keys,
857         dense_types, dense_shapes)
858
859    outputs = gen_parsing_ops.parse_example(
860        serialized=serialized,
861        names=names,
862        dense_defaults=dense_defaults_vec,
863        sparse_keys=sparse_keys,
864        sparse_types=sparse_types,
865        dense_keys=dense_keys,
866        dense_shapes=dense_shapes,
867        name=name)
868
869    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
870
871    sparse_tensors = [
872        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
873        in zip(sparse_indices, sparse_values, sparse_shapes)]
874
875    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
876
877
878def _process_raw_parameters(names, dense_defaults, sparse_keys, sparse_types,
879                            dense_keys, dense_types, dense_shapes):
880  """Process raw parameters to params used by `gen_parsing_ops`.
881
882  Args:
883    names: A vector (1-D Tensor) of strings (optional), the names of
884      the serialized protos.
885    dense_defaults: A dict mapping string keys to `Tensor`s.
886      The keys of the dict must match the dense_keys of the feature.
887    sparse_keys: A list of string keys in the examples' features.
888      The results for these keys will be returned as `SparseTensor` objects.
889    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
890      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
891      and `tf.string` (`BytesList`) are supported.
892    dense_keys: A list of string keys in the examples' features.
893      The results for these keys will be returned as `Tensor`s
894    dense_types: A list of DTypes of the same length as `dense_keys`.
895      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
896      and `tf.string` (`BytesList`) are supported.
897    dense_shapes: A list of tuples with the same length as `dense_keys`.
898      The shape of the data for each dense feature referenced by `dense_keys`.
899      Required for any input tensors identified by `dense_keys`.  Must be
900      either fully defined, or may contain an unknown first dimension.
901      An unknown first dimension means the feature is treated as having
902      a variable number of blocks, and the output shape along this dimension
903      is considered unknown at graph build time.  Padding is applied for
904      minibatch elements smaller than the maximum number of blocks for the
905      given feature along this dimension.
906
907  Returns:
908    Tuple of `names`, `dense_defaults_vec`, `sparse_keys`, `sparse_types`,
909    `dense_keys`, `dense_shapes`.
910
911  Raises:
912    ValueError: If sparse and dense key sets intersect, or input lengths do not
913      match up.
914  """
915  names = [] if names is None else names
916  dense_defaults = collections.OrderedDict(
917  ) if dense_defaults is None else dense_defaults
918  sparse_keys = [] if sparse_keys is None else sparse_keys
919  sparse_types = [] if sparse_types is None else sparse_types
920  dense_keys = [] if dense_keys is None else dense_keys
921  dense_types = [] if dense_types is None else dense_types
922  dense_shapes = ([[]] * len(dense_keys)
923                  if dense_shapes is None else dense_shapes)
924
925  num_dense = len(dense_keys)
926  num_sparse = len(sparse_keys)
927
928  if len(dense_shapes) != num_dense:
929    raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
930                     (len(dense_shapes), num_dense))
931  if len(dense_types) != num_dense:
932    raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
933                     (len(dense_types), num_dense))
934  if len(sparse_types) != num_sparse:
935    raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
936                     (len(sparse_types), num_sparse))
937  if num_dense + num_sparse == 0:
938    raise ValueError("Must provide at least one sparse key or dense key")
939  if not set(dense_keys).isdisjoint(set(sparse_keys)):
940    raise ValueError(
941        "Dense and sparse keys must not intersect; intersection: %s" %
942        set(dense_keys).intersection(set(sparse_keys)))
943
944  # Convert dense_shapes to TensorShape object.
945  dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]
946
947  dense_defaults_vec = []
948  for i, key in enumerate(dense_keys):
949    default_value = dense_defaults.get(key)
950    dense_shape = dense_shapes[i]
951    if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
952        dense_shape.dims[0].value is None):
953      # Variable stride dense shape, the default value should be a
954      # scalar padding value
955      if default_value is None:
956        default_value = ops.convert_to_tensor(
957            "" if dense_types[i] == dtypes.string else 0, dtype=dense_types[i])
958      else:
959        # Reshape to a scalar to ensure user gets an error if they
960        # provide a tensor that's not intended to be a padding value
961        # (0 or 2+ elements).
962        key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
963        default_value = ops.convert_to_tensor(
964            default_value, dtype=dense_types[i], name=key_name)
965        default_value = array_ops.reshape(default_value, [])
966    else:
967      if default_value is None:
968        default_value = constant_op.constant([], dtype=dense_types[i])
969      elif not isinstance(default_value, ops.Tensor):
970        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
971        default_value = ops.convert_to_tensor(
972            default_value, dtype=dense_types[i], name=key_name)
973        default_value = array_ops.reshape(default_value, dense_shape)
974
975    dense_defaults_vec.append(default_value)
976
977  # Finally, convert dense_shapes to TensorShapeProto
978  dense_shapes_as_proto = [shape.as_proto() for shape in dense_shapes]
979
980  return (names, dense_defaults_vec, sparse_keys, sparse_types, dense_keys,
981          dense_shapes_as_proto, dense_shapes)
982
983
984@tf_export(v1=["io.parse_single_example", "parse_single_example"])
985def parse_single_example(serialized, features, name=None, example_names=None):
986  """Parses a single `Example` proto.
987
988  Similar to `parse_example`, except:
989
990  For dense tensors, the returned `Tensor` is identical to the output of
991  `parse_example`, except there is no batch dimension, the output shape is the
992  same as the shape given in `dense_shape`.
993
994  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
995  (the indices matrix is a column vector), the values vector is unchanged, and
996  the first (`batch_size`) entry of the shape vector is removed (it is now a
997  single element vector).
998
999  One might see performance advantages by batching `Example` protos with
1000  `parse_example` instead of using this function directly.
1001
1002  Args:
1003    serialized: A scalar string Tensor, a single serialized Example.
1004      See `_parse_single_example_raw` documentation for more details.
1005    features: A `dict` mapping feature keys to `FixedLenFeature` or
1006      `VarLenFeature` values.
1007    name: A name for this operation (optional).
1008    example_names: (Optional) A scalar string Tensor, the associated name.
1009      See `_parse_single_example_raw` documentation for more details.
1010
1011  Returns:
1012    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
1013
1014  Raises:
1015    ValueError: if any feature is invalid.
1016  """
1017  return parse_single_example_v2_unoptimized(
1018      serialized, features, example_names, name
1019      )
1020
1021
1022# TODO(b/70890287): Combine the implementation of this op and
1023# `parse_single_example_v2()` after 1/10/2018.
1024@tf_export("io.parse_single_example", v1=[])
1025def parse_single_example_v2_unoptimized(
1026    serialized, features, example_names=None, name=None
1027    ):
1028  """Parses a single `Example` proto.
1029
1030  Similar to `parse_example`, except:
1031
1032  For dense tensors, the returned `Tensor` is identical to the output of
1033  `parse_example`, except there is no batch dimension, the output shape is the
1034  same as the shape given in `dense_shape`.
1035
1036  For `SparseTensor`s, the first (batch) column of the indices matrix is removed
1037  (the indices matrix is a column vector), the values vector is unchanged, and
1038  the first (`batch_size`) entry of the shape vector is removed (it is now a
1039  single element vector).
1040
1041  One might see performance advantages by batching `Example` protos with
1042  `parse_example` instead of using this function directly.
1043
1044  Args:
1045    serialized: A scalar string Tensor, a single serialized Example.
1046      See `_parse_single_example_raw` documentation for more details.
1047    features: A `dict` mapping feature keys to `FixedLenFeature` or
1048      `VarLenFeature` values.
1049    example_names: (Optional) A scalar string Tensor, the associated name.
1050      See `_parse_single_example_raw` documentation for more details.
1051    name: A name for this operation (optional).
1052
1053  Returns:
1054    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
1055
1056  Raises:
1057    ValueError: if any feature is invalid.
1058  """
1059  if not features:
1060    raise ValueError("Missing features.")
1061  if example_names is None:
1062    return parse_single_example_v2(serialized, features, name)
1063  features = _prepend_none_dimension(features)
1064  (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
1065   dense_shapes) = _features_to_raw_params(
1066       features,
1067       [VarLenFeature, FixedLenFeature, FixedLenSequenceFeature, SparseFeature])
1068  outputs = _parse_single_example_raw(
1069      serialized, example_names, sparse_keys, sparse_types, dense_keys,
1070      dense_types, dense_defaults, dense_shapes, name)
1071  return _construct_sparse_tensors_for_sparse_features(features, outputs)
1072
1073
1074def _parse_single_example_raw(serialized,
1075                              names=None,
1076                              sparse_keys=None,
1077                              sparse_types=None,
1078                              dense_keys=None,
1079                              dense_types=None,
1080                              dense_defaults=None,
1081                              dense_shapes=None,
1082                              name=None):
1083  """Parses a single `Example` proto.
1084
1085  Args:
1086    serialized: A scalar string Tensor, a single serialized Example.
1087      See `_parse_example_raw` documentation for more details.
1088    names: (Optional) A scalar string Tensor, the associated name.
1089      See `_parse_example_raw` documentation for more details.
1090    sparse_keys: See `_parse_example_raw` documentation for more details.
1091    sparse_types: See `_parse_example_raw` documentation for more details.
1092    dense_keys: See `_parse_example_raw` documentation for more details.
1093    dense_types: See `_parse_example_raw` documentation for more details.
1094    dense_defaults: See `_parse_example_raw` documentation for more details.
1095    dense_shapes: See `_parse_example_raw` documentation for more details.
1096    name: A name for this operation (optional).
1097
1098  Returns:
1099    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
1100
1101  Raises:
1102    ValueError: if any feature is invalid.
1103  """
1104  with ops.name_scope(name, "ParseSingleExample", [serialized, names]):
1105    serialized = ops.convert_to_tensor(serialized)
1106    serialized_shape = serialized.get_shape()
1107    if serialized_shape.ndims is not None:
1108      if serialized_shape.ndims != 0:
1109        raise ValueError("Input serialized must be a scalar")
1110    else:
1111      serialized = control_flow_ops.with_dependencies(
1112          [control_flow_ops.Assert(
1113              math_ops.equal(array_ops.rank(serialized), 0),
1114              ["Input serialized must be a scalar"],
1115              name="SerializedIsScalar")],
1116          serialized,
1117          name="SerializedDependencies")
1118    serialized = array_ops.expand_dims(serialized, 0)
1119    if names is not None:
1120      names = ops.convert_to_tensor(names)
1121      names_shape = names.get_shape()
1122      if names_shape.ndims is not None:
1123        if names_shape.ndims != 0:
1124          raise ValueError("Input names must be a scalar")
1125      else:
1126        names = control_flow_ops.with_dependencies(
1127            [control_flow_ops.Assert(
1128                math_ops.equal(array_ops.rank(names), 0),
1129                ["Input names must be a scalar"],
1130                name="NamesIsScalar")],
1131            names,
1132            name="NamesDependencies")
1133      names = array_ops.expand_dims(names, 0)
1134
1135    outputs = _parse_example_raw(
1136        serialized,
1137        names=names,
1138        sparse_keys=sparse_keys,
1139        sparse_types=sparse_types,
1140        dense_keys=dense_keys,
1141        dense_types=dense_types,
1142        dense_defaults=dense_defaults,
1143        dense_shapes=dense_shapes,
1144        name=name)
1145    if dense_keys is not None:
1146      for d in dense_keys:
1147        d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d)
1148        outputs[d] = array_ops.squeeze(
1149            outputs[d], [0], name="Squeeze_%s" % d_name)
1150    if sparse_keys is not None:
1151      for s in sparse_keys:
1152        s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s)
1153        outputs[s] = sparse_tensor.SparseTensor(
1154            array_ops.slice(outputs[s].indices,
1155                            [0, 1], [-1, -1], name="Slice_Indices_%s" % s_name),
1156            outputs[s].values,
1157            array_ops.slice(outputs[s].dense_shape,
1158                            [1], [-1], name="Squeeze_Shape_%s" % s_name))
1159    return outputs
1160
1161
1162@tf_export("io.parse_sequence_example")
1163def parse_sequence_example(serialized,
1164                           context_features=None,
1165                           sequence_features=None,
1166                           example_names=None,
1167                           name=None):
1168  # pylint: disable=line-too-long
1169  """Parses a batch of `SequenceExample` protos.
1170
1171  Parses a vector of serialized
1172  [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
1173  protos given in `serialized`.
1174
1175  This op parses serialized sequence examples into a tuple of dictionaries
1176  mapping keys to `Tensor` and `SparseTensor` objects respectively.
1177  The first dictionary contains mappings for keys appearing in
1178  `context_features`, and the second dictionary contains mappings for keys
1179  appearing in `sequence_features`.
1180
1181  At least one of `context_features` and `sequence_features` must be provided
1182  and non-empty.
1183
1184  The `context_features` keys are associated with a `SequenceExample` as a
1185  whole, independent of time / frame.  In contrast, the `sequence_features` keys
1186  provide a way to access variable-length data within the `FeatureList` section
1187  of the `SequenceExample` proto.  While the shapes of `context_features` values
1188  are fixed with respect to frame, the frame dimension (the first dimension)
1189  of `sequence_features` values may vary between `SequenceExample` protos,
1190  and even between `feature_list` keys within the same `SequenceExample`.
1191
1192  `context_features` contains `VarLenFeature` and `FixedLenFeature` objects.
1193  Each `VarLenFeature` is mapped to a `SparseTensor`, and each `FixedLenFeature`
1194  is mapped to a `Tensor`, of the specified type, shape, and default value.
1195
1196  `sequence_features` contains `VarLenFeature` and `FixedLenSequenceFeature`
1197  objects. Each `VarLenFeature` is mapped to a `SparseTensor`, and each
1198  `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
1199  The shape will be `(B,T,) + df.dense_shape` for `FixedLenSequenceFeature`
1200  `df`, where `B` is the batch size, and `T` is the length of the associated
1201  `FeatureList` in the `SequenceExample`. For instance,
1202  `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape
1203  `[None, None]` and dynamic shape `[B, T]`, while
1204  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor`
1205  of static shape `[None, None, k]` and dynamic shape `[B, T, k]`.
1206
1207  Like the input, the resulting output tensors have a batch dimension. This
1208  means that the original per-example shapes of `VarLenFeature`s and
1209  `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also
1210  provides dicts of shape tensors as part of the output. There is one dict for
1211  the context features, and one for the feature_list features. Context features
1212  of type `FixedLenFeature`s will not be present, since their shapes are already
1213  known by the caller. In situations where the input 'FixedLenFeature`s are of
1214  different lengths across examples, the shorter examples will be padded with
1215  default datatype values: 0 for numeric types, and the empty string for string
1216  types.
1217
1218  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
1219  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
1220  entry and `index` is the value's index in the list of values associated with
1221  that time.
1222
1223  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
1224  entries with `allow_missing=True` are optional; otherwise, we will fail if
1225  that `Feature` or `FeatureList` is missing from any example in `serialized`.
1226
1227  `example_name` may contain a descriptive name for the corresponding serialized
1228  proto. This may be useful for debugging purposes, but it has no effect on the
1229  output. If not `None`, `example_name` must be a scalar.
1230
1231  Args:
1232    serialized: A vector (1-D Tensor) of type string containing binary
1233      serialized `SequenceExample` protos.
1234    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
1235      `VarLenFeature` values. These features are associated with a
1236      `SequenceExample` as a whole.
1237    sequence_features: A `dict` mapping feature keys to
1238      `FixedLenSequenceFeature` or `VarLenFeature` values. These features are
1239      associated with data within the `FeatureList` section of the
1240      `SequenceExample` proto.
1241    example_names: A vector (1-D Tensor) of strings (optional), the name of the
1242      serialized protos.
1243    name: A name for this operation (optional).
1244
1245  Returns:
1246    A tuple of three `dict`s, each mapping keys to `Tensor`s and
1247    `SparseTensor`s. The first dict contains the context key/values,
1248    the second dict contains the feature_list key/values, and the final dict
1249    contains the lengths of any dense feature_list features.
1250
1251  Raises:
1252    ValueError: if any feature is invalid.
1253  """
1254  if not (context_features or sequence_features):
1255    raise ValueError("Missing features.")
1256  (context_sparse_keys, context_sparse_types, context_dense_keys,
1257   context_dense_types,
1258   context_dense_defaults, context_dense_shapes) = _features_to_raw_params(
1259       context_features, [VarLenFeature, FixedLenFeature])
1260  (feature_list_sparse_keys, feature_list_sparse_types, feature_list_dense_keys,
1261   feature_list_dense_types, feature_list_dense_defaults,
1262   feature_list_dense_shapes) = _features_to_raw_params(
1263       sequence_features, [VarLenFeature, FixedLenSequenceFeature])
1264  return _parse_sequence_example_raw(
1265      serialized, example_names, context_sparse_keys, context_sparse_types,
1266      context_dense_keys, context_dense_types, context_dense_defaults,
1267      context_dense_shapes, feature_list_sparse_keys, feature_list_sparse_types,
1268      feature_list_dense_keys, feature_list_dense_types,
1269      feature_list_dense_shapes, feature_list_dense_defaults, name)
1270
1271
1272def _parse_sequence_example_raw(serialized,
1273                                debug_name=None,
1274                                context_sparse_keys=None,
1275                                context_sparse_types=None,
1276                                context_dense_keys=None,
1277                                context_dense_types=None,
1278                                context_dense_defaults=None,
1279                                context_dense_shapes=None,
1280                                feature_list_sparse_keys=None,
1281                                feature_list_sparse_types=None,
1282                                feature_list_dense_keys=None,
1283                                feature_list_dense_types=None,
1284                                feature_list_dense_shapes=None,
1285                                feature_list_dense_defaults=None,
1286                                name=None):
1287  """Parses a vector of `SequenceExample` protos.
1288
1289  Args:
1290    serialized: A vector (1-D Tensor) of type string, containing binary
1291      serialized `SequenceExample` protos.
1292    debug_name: A vector (1-D Tensor) of strings (optional), the names of the
1293      serialized protos.
1294    context_sparse_keys: A list of string keys in the `SequenceExample`'s
1295      features.  The results for these keys will be returned as `SparseTensor`
1296      objects.
1297    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
1298      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
1299      (`BytesList`) are supported.
1300    context_dense_keys: A list of string keys in the examples' features. The
1301      results for these keys will be returned as `Tensor`s
1302    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
1303      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
1304      (`BytesList`) are supported.
1305    context_dense_defaults: A dict mapping string keys to `Tensor`s. The keys of
1306      the dict must match the context_dense_keys of the feature.
1307    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
1308      The shape of the data for each context_dense feature referenced by
1309      `context_dense_keys`.  Required for any input tensors identified by
1310      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
1311    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
1312      feature_lists.  The results for these keys will be returned as
1313      `SparseTensor` objects.
1314    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
1315      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
1316      (`BytesList`) are supported.
1317    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
1318      features_lists. The results for these keys will be returned as `Tensor`s.
1319    feature_list_dense_types: A list of `DTypes`, same length as
1320      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`), `tf.int64`
1321      (`Int64List`), and `tf.string` (`BytesList`) are supported.
1322    feature_list_dense_shapes: A list of tuples, same length as
1323      `feature_list_dense_keys`.  The shape of the data for each `FeatureList`
1324      feature referenced by `feature_list_dense_keys`.
1325    feature_list_dense_defaults: A dict mapping key strings to values. The only
1326      currently allowed value is `None`.  Any key appearing in this dict with
1327      value `None` is allowed to be missing from the `SequenceExample`.  If
1328      missing, the key is treated as zero-length.
1329    name: A name for this operation (optional).
1330
1331  Returns:
1332    A tuple of three `dict`s, each mapping keys to `Tensor`s and
1333    `SparseTensor`s. The first dict contains the context key/values,
1334    the second dict contains the feature_list key/values, and the final dict
1335    contains the lengths of any dense feature_list features.
1336
1337  Raises:
1338    ValueError: If context_sparse and context_dense key sets intersect,
1339      if feature_list_sparse and feature_list_dense key sets intersect,
1340      if input lengths do not match up, or if a value in
1341      feature_list_dense_defaults is not None.
1342    TypeError: if feature_list_dense_defaults is not either None or a dict.
1343  """
1344  with ops.name_scope(name, "ParseSequenceExample", [serialized]):
1345    context_dense_defaults = ({} if context_dense_defaults is None else
1346                              context_dense_defaults)
1347    context_sparse_keys = ([] if context_sparse_keys is None else
1348                           context_sparse_keys)
1349    context_sparse_types = ([] if context_sparse_types is None else
1350                            context_sparse_types)
1351    context_dense_keys = ([]
1352                          if context_dense_keys is None else context_dense_keys)
1353    context_dense_types = ([] if context_dense_types is None else
1354                           context_dense_types)
1355    context_dense_shapes = ([[]] * len(context_dense_keys)
1356                            if context_dense_shapes is None else
1357                            context_dense_shapes)
1358    feature_list_sparse_keys = ([] if feature_list_sparse_keys is None else
1359                                feature_list_sparse_keys)
1360    feature_list_sparse_types = ([] if feature_list_sparse_types is None else
1361                                 feature_list_sparse_types)
1362    feature_list_dense_keys = ([] if feature_list_dense_keys is None else
1363                               feature_list_dense_keys)
1364    feature_list_dense_types = ([] if feature_list_dense_types is None else
1365                                feature_list_dense_types)
1366    feature_list_dense_shapes = ([[]] * len(feature_list_dense_keys)
1367                                 if feature_list_dense_shapes is None else
1368                                 feature_list_dense_shapes)
1369    feature_list_dense_defaults = (
1370        dict()
1371        if feature_list_dense_defaults is None else feature_list_dense_defaults)
1372    debug_name = [] if debug_name is None else debug_name
1373
1374    # Internal
1375    feature_list_dense_missing_assumed_empty = []
1376
1377    num_context_dense = len(context_dense_keys)
1378    num_feature_list_dense = len(feature_list_dense_keys)
1379    num_context_sparse = len(context_sparse_keys)
1380    num_feature_list_sparse = len(feature_list_sparse_keys)
1381
1382    if len(context_dense_shapes) != num_context_dense:
1383      raise ValueError(
1384          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d" %
1385          (len(context_dense_shapes), num_context_dense))
1386    if len(context_dense_types) != num_context_dense:
1387      raise ValueError(
1388          "len(context_dense_types) != len(num_context_dense): %d vs. %d" %
1389          (len(context_dense_types), num_context_dense))
1390    if len(feature_list_dense_shapes) != num_feature_list_dense:
1391      raise ValueError(
1392          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
1393          "%d vs. %d" % (len(feature_list_dense_shapes),
1394                         num_feature_list_dense))
1395    if len(feature_list_dense_types) != num_feature_list_dense:
1396      raise ValueError(
1397          "len(feature_list_dense_types) != len(num_feature_list_dense):"
1398          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
1399    if len(context_sparse_types) != num_context_sparse:
1400      raise ValueError(
1401          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d" %
1402          (len(context_sparse_types), num_context_sparse))
1403    if len(feature_list_sparse_types) != num_feature_list_sparse:
1404      raise ValueError(
1405          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
1406          "%d vs. %d" % (len(feature_list_sparse_types),
1407                         num_feature_list_sparse))
1408    if (num_context_dense + num_context_sparse + num_feature_list_dense +
1409        num_feature_list_sparse) == 0:
1410      raise ValueError(
1411          "Must provide at least one context_sparse key, context_dense key, "
1412          ", feature_list_sparse key, or feature_list_dense key")
1413    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
1414      raise ValueError(
1415          "context_dense and context_sparse keys must not intersect; "
1416          "intersection: %s" % set(context_dense_keys).intersection(
1417              set(context_sparse_keys)))
1418    if not set(feature_list_dense_keys).isdisjoint(
1419        set(feature_list_sparse_keys)):
1420      raise ValueError(
1421          "feature_list_dense and feature_list_sparse keys must not intersect; "
1422          "intersection: %s" % set(feature_list_dense_keys).intersection(
1423              set(feature_list_sparse_keys)))
1424    if not isinstance(feature_list_dense_defaults, dict):
1425      raise TypeError("feature_list_dense_defaults must be a dict")
1426    for k, v in feature_list_dense_defaults.items():
1427      if v is not None:
1428        raise ValueError(
1429            "Value feature_list_dense_defaults[%s] must be None" % k)
1430      feature_list_dense_missing_assumed_empty.append(k)
1431
1432    context_dense_defaults_vec = []
1433    for i, key in enumerate(context_dense_keys):
1434      default_value = context_dense_defaults.get(key)
1435      if default_value is None:
1436        default_value = constant_op.constant([], dtype=context_dense_types[i])
1437      elif not isinstance(default_value, ops.Tensor):
1438        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
1439        default_value = ops.convert_to_tensor(
1440            default_value, dtype=context_dense_types[i], name=key_name)
1441
1442      context_dense_defaults_vec.append(default_value)
1443
1444    context_dense_shapes = [
1445        tensor_shape.as_shape(shape).as_proto()
1446        for shape in context_dense_shapes
1447    ]
1448    feature_list_dense_shapes = [
1449        tensor_shape.as_shape(shape).as_proto()
1450        for shape in feature_list_dense_shapes
1451    ]
1452
1453    # pylint: disable=protected-access
1454    outputs = gen_parsing_ops.parse_sequence_example(
1455        serialized=serialized,
1456        debug_name=debug_name,
1457        Ncontext_sparse=num_context_sparse,
1458        Ncontext_dense=num_context_dense,
1459        Nfeature_list_sparse=num_feature_list_sparse,
1460        Nfeature_list_dense=num_feature_list_dense,
1461        context_dense_defaults=context_dense_defaults_vec,
1462        context_sparse_keys=context_sparse_keys,
1463        context_sparse_types=context_sparse_types,
1464        context_dense_keys=context_dense_keys,
1465        context_dense_shapes=context_dense_shapes,
1466        feature_list_sparse_keys=feature_list_sparse_keys,
1467        feature_list_sparse_types=feature_list_sparse_types,
1468        feature_list_dense_keys=feature_list_dense_keys,
1469        feature_list_dense_types=feature_list_dense_types,
1470        feature_list_dense_shapes=feature_list_dense_shapes,
1471        feature_list_dense_missing_assumed_empty=(
1472            feature_list_dense_missing_assumed_empty),
1473        name=name)
1474    # pylint: enable=protected-access
1475
1476    (context_sparse_indices, context_sparse_values, context_sparse_shapes,
1477     context_dense_values, feature_list_sparse_indices,
1478     feature_list_sparse_values, feature_list_sparse_shapes,
1479     feature_list_dense_values, feature_list_dense_lengths) = outputs
1480
1481    context_sparse_tensors = [
1482        sparse_tensor.SparseTensor(ix, val, shape)
1483        for (ix, val,
1484             shape) in zip(context_sparse_indices, context_sparse_values,
1485                           context_sparse_shapes)
1486    ]
1487
1488    feature_list_sparse_tensors = [
1489        sparse_tensor.SparseTensor(ix, val, shape)
1490        for (ix, val, shape
1491            ) in zip(feature_list_sparse_indices, feature_list_sparse_values,
1492                     feature_list_sparse_shapes)
1493    ]
1494
1495    context_output = dict(
1496        zip(context_sparse_keys + context_dense_keys,
1497            context_sparse_tensors + context_dense_values))
1498    feature_list_output = dict(
1499        zip(feature_list_sparse_keys + feature_list_dense_keys,
1500            feature_list_sparse_tensors + feature_list_dense_values))
1501    feature_list_lengths = dict(
1502        zip(feature_list_dense_keys, feature_list_dense_lengths))
1503
1504    return (context_output, feature_list_output, feature_list_lengths)
1505
1506
1507# TODO(sundberg): rewrite this method to call the batch version, which is more
1508# efficient especially for large inputs.
1509@tf_export("io.parse_single_sequence_example",
1510           v1=["io.parse_single_sequence_example",
1511               "parse_single_sequence_example"])
1512def parse_single_sequence_example(
1513    serialized, context_features=None, sequence_features=None,
1514    example_name=None, name=None):
1515  # pylint: disable=line-too-long
1516  """Parses a single `SequenceExample` proto.
1517
1518  Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
1519  proto given in `serialized`.
1520
1521  This op parses a serialized sequence example into a tuple of dictionaries
1522  mapping keys to `Tensor` and `SparseTensor` objects respectively.
1523  The first dictionary contains mappings for keys appearing in
1524  `context_features`, and the second dictionary contains mappings for keys
1525  appearing in `sequence_features`.
1526
1527  At least one of `context_features` and `sequence_features` must be provided
1528  and non-empty.
1529
1530  The `context_features` keys are associated with a `SequenceExample` as a
1531  whole, independent of time / frame.  In contrast, the `sequence_features` keys
1532  provide a way to access variable-length data within the `FeatureList` section
1533  of the `SequenceExample` proto.  While the shapes of `context_features` values
1534  are fixed with respect to frame, the frame dimension (the first dimension)
1535  of `sequence_features` values may vary between `SequenceExample` protos,
1536  and even between `feature_list` keys within the same `SequenceExample`.
1537
1538  `context_features` contains `VarLenFeature` and `FixedLenFeature` objects.
1539  Each `VarLenFeature` is mapped to a `SparseTensor`, and each `FixedLenFeature`
1540  is mapped to a `Tensor`, of the specified type, shape, and default value.
1541
1542  `sequence_features` contains `VarLenFeature` and `FixedLenSequenceFeature`
1543  objects. Each `VarLenFeature` is mapped to a `SparseTensor`, and each
1544  `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
1545  The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, where
1546  `T` is the length of the associated `FeatureList` in the `SequenceExample`.
1547  For instance, `FixedLenSequenceFeature([])` yields a scalar 1-D `Tensor` of
1548  static shape `[None]` and dynamic shape `[T]`, while
1549  `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor`
1550  of static shape `[None, k]` and dynamic shape `[T, k]`.
1551
1552  Each `SparseTensor` corresponding to `sequence_features` represents a ragged
1553  vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
1554  entry and `index` is the value's index in the list of values associated with
1555  that time.
1556
1557  `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
1558  entries with `allow_missing=True` are optional; otherwise, we will fail if
1559  that `Feature` or `FeatureList` is missing from any example in `serialized`.
1560
1561  `example_name` may contain a descriptive name for the corresponding serialized
1562  proto. This may be useful for debugging purposes, but it has no effect on the
1563  output. If not `None`, `example_name` must be a scalar.
1564
1565  Args:
1566    serialized: A scalar (0-D Tensor) of type string, a single binary
1567      serialized `SequenceExample` proto.
1568    context_features: A `dict` mapping feature keys to `FixedLenFeature` or
1569      `VarLenFeature` values. These features are associated with a
1570      `SequenceExample` as a whole.
1571    sequence_features: A `dict` mapping feature keys to
1572      `FixedLenSequenceFeature` or `VarLenFeature` values. These features are
1573      associated with data within the `FeatureList` section of the
1574      `SequenceExample` proto.
1575    example_name: A scalar (0-D Tensor) of strings (optional), the name of
1576      the serialized proto.
1577    name: A name for this operation (optional).
1578
1579  Returns:
1580    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
1581    The first dict contains the context key/values.
1582    The second dict contains the feature_list key/values.
1583
1584  Raises:
1585    ValueError: if any feature is invalid.
1586  """
1587  # pylint: enable=line-too-long
1588  if not (context_features or sequence_features):
1589    raise ValueError("Missing features.")
1590  (context_sparse_keys, context_sparse_types, context_dense_keys,
1591   context_dense_types, context_dense_defaults,
1592   context_dense_shapes) = _features_to_raw_params(
1593       context_features, [VarLenFeature, FixedLenFeature])
1594  (feature_list_sparse_keys, feature_list_sparse_types,
1595   feature_list_dense_keys, feature_list_dense_types,
1596   feature_list_dense_defaults,
1597   feature_list_dense_shapes) = _features_to_raw_params(
1598       sequence_features, [VarLenFeature, FixedLenSequenceFeature])
1599  return _parse_single_sequence_example_raw(
1600      serialized, context_sparse_keys, context_sparse_types,
1601      context_dense_keys, context_dense_types, context_dense_defaults,
1602      context_dense_shapes, feature_list_sparse_keys,
1603      feature_list_sparse_types, feature_list_dense_keys,
1604      feature_list_dense_types, feature_list_dense_shapes,
1605      feature_list_dense_defaults, example_name, name)
1606
1607
1608def _parse_single_sequence_example_raw(serialized,
1609                                       context_sparse_keys=None,
1610                                       context_sparse_types=None,
1611                                       context_dense_keys=None,
1612                                       context_dense_types=None,
1613                                       context_dense_defaults=None,
1614                                       context_dense_shapes=None,
1615                                       feature_list_sparse_keys=None,
1616                                       feature_list_sparse_types=None,
1617                                       feature_list_dense_keys=None,
1618                                       feature_list_dense_types=None,
1619                                       feature_list_dense_shapes=None,
1620                                       feature_list_dense_defaults=None,
1621                                       debug_name=None,
1622                                       name=None):
1623  """Parses a single `SequenceExample` proto.
1624
1625  Args:
1626    serialized: A scalar (0-D Tensor) of type string, a single binary
1627      serialized `SequenceExample` proto.
1628    context_sparse_keys: A list of string keys in the `SequenceExample`'s
1629      features.  The results for these keys will be returned as
1630      `SparseTensor` objects.
1631    context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
1632      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
1633      and `tf.string` (`BytesList`) are supported.
1634    context_dense_keys: A list of string keys in the examples' features.
1635      The results for these keys will be returned as `Tensor`s
1636    context_dense_types: A list of DTypes, same length as `context_dense_keys`.
1637      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
1638      and `tf.string` (`BytesList`) are supported.
1639    context_dense_defaults: A dict mapping string keys to `Tensor`s.
1640      The keys of the dict must match the context_dense_keys of the feature.
1641    context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
1642      The shape of the data for each context_dense feature referenced by
1643      `context_dense_keys`.  Required for any input tensors identified by
1644      `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
1645    feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
1646      feature_lists.  The results for these keys will be returned as
1647      `SparseTensor` objects.
1648    feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
1649      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
1650      and `tf.string` (`BytesList`) are supported.
1651    feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
1652      features_lists. The results for these keys will be returned as `Tensor`s.
1653    feature_list_dense_types: A list of `DTypes`, same length as
1654      `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
1655      `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
1656    feature_list_dense_shapes: A list of tuples, same length as
1657      `feature_list_dense_keys`.  The shape of the data for each
1658      `FeatureList` feature referenced by `feature_list_dense_keys`.
1659    feature_list_dense_defaults: A dict mapping key strings to values.
1660      The only currently allowed value is `None`.  Any key appearing
1661      in this dict with value `None` is allowed to be missing from the
1662      `SequenceExample`.  If missing, the key is treated as zero-length.
1663    debug_name: A scalar (0-D Tensor) of strings (optional), the name of
1664      the serialized proto.
1665    name: A name for this operation (optional).
1666
1667  Returns:
1668    A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
1669    The first dict contains the context key/values.
1670    The second dict contains the feature_list key/values.
1671
1672  Raises:
1673    ValueError: If context_sparse and context_dense key sets intersect,
1674      if input lengths do not match up, or if a value in
1675      feature_list_dense_defaults is not None.
1676    TypeError: if feature_list_dense_defaults is not either None or a dict.
1677  """
1678  with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
1679    context_dense_defaults = (
1680        {} if context_dense_defaults is None else context_dense_defaults)
1681    context_sparse_keys = (
1682        [] if context_sparse_keys is None else context_sparse_keys)
1683    context_sparse_types = (
1684        [] if context_sparse_types is None else context_sparse_types)
1685    context_dense_keys = (
1686        [] if context_dense_keys is None else context_dense_keys)
1687    context_dense_types = (
1688        [] if context_dense_types is None else context_dense_types)
1689    context_dense_shapes = (
1690        [[]] * len(context_dense_keys)
1691        if context_dense_shapes is None else context_dense_shapes)
1692    feature_list_sparse_keys = (
1693        [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
1694    feature_list_sparse_types = (
1695        [] if feature_list_sparse_types is None else feature_list_sparse_types)
1696    feature_list_dense_keys = (
1697        [] if feature_list_dense_keys is None else feature_list_dense_keys)
1698    feature_list_dense_types = (
1699        [] if feature_list_dense_types is None else feature_list_dense_types)
1700    feature_list_dense_shapes = (
1701        [[]] * len(feature_list_dense_keys)
1702        if feature_list_dense_shapes is None else feature_list_dense_shapes)
1703    feature_list_dense_defaults = (
1704        dict() if feature_list_dense_defaults is None
1705        else feature_list_dense_defaults)
1706    debug_name = "" if debug_name is None else debug_name
1707
1708    # Internal
1709    feature_list_dense_missing_assumed_empty = []
1710
1711    num_context_dense = len(context_dense_keys)
1712    num_feature_list_dense = len(feature_list_dense_keys)
1713    num_context_sparse = len(context_sparse_keys)
1714    num_feature_list_sparse = len(feature_list_sparse_keys)
1715
1716    if len(context_dense_shapes) != num_context_dense:
1717      raise ValueError(
1718          "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
1719          % (len(context_dense_shapes), num_context_dense))
1720    if len(context_dense_types) != num_context_dense:
1721      raise ValueError(
1722          "len(context_dense_types) != len(num_context_dense): %d vs. %d"
1723          % (len(context_dense_types), num_context_dense))
1724    if len(feature_list_dense_shapes) != num_feature_list_dense:
1725      raise ValueError(
1726          "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
1727          "%d vs. %d" % (len(feature_list_dense_shapes),
1728                         num_feature_list_dense))
1729    if len(feature_list_dense_types) != num_feature_list_dense:
1730      raise ValueError(
1731          "len(feature_list_dense_types) != len(num_feature_list_dense):"
1732          "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
1733    if len(context_sparse_types) != num_context_sparse:
1734      raise ValueError(
1735          "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
1736          % (len(context_sparse_types), num_context_sparse))
1737    if len(feature_list_sparse_types) != num_feature_list_sparse:
1738      raise ValueError(
1739          "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
1740          "%d vs. %d"
1741          % (len(feature_list_sparse_types), num_feature_list_sparse))
1742    if (num_context_dense + num_context_sparse
1743        + num_feature_list_dense + num_feature_list_sparse) == 0:
1744      raise ValueError(
1745          "Must provide at least one context_sparse key, context_dense key, "
1746          ", feature_list_sparse key, or feature_list_dense key")
1747    if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
1748      raise ValueError(
1749          "context_dense and context_sparse keys must not intersect; "
1750          "intersection: %s" %
1751          set(context_dense_keys).intersection(set(context_sparse_keys)))
1752    if not set(feature_list_dense_keys).isdisjoint(
1753        set(feature_list_sparse_keys)):
1754      raise ValueError(
1755          "feature_list_dense and feature_list_sparse keys must not intersect; "
1756          "intersection: %s" %
1757          set(feature_list_dense_keys).intersection(
1758              set(feature_list_sparse_keys)))
1759    if not isinstance(feature_list_dense_defaults, dict):
1760      raise TypeError("feature_list_dense_defaults must be a dict")
1761    for k, v in feature_list_dense_defaults.items():
1762      if v is not None:
1763        raise ValueError("Value feature_list_dense_defaults[%s] must be None"
1764                         % k)
1765      feature_list_dense_missing_assumed_empty.append(k)
1766
1767    context_dense_defaults_vec = []
1768    for i, key in enumerate(context_dense_keys):
1769      default_value = context_dense_defaults.get(key)
1770      if default_value is None:
1771        default_value = constant_op.constant([], dtype=context_dense_types[i])
1772      elif not isinstance(default_value, ops.Tensor):
1773        key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
1774        default_value = ops.convert_to_tensor(
1775            default_value, dtype=context_dense_types[i], name=key_name)
1776        default_value = array_ops.reshape(
1777            default_value, context_dense_shapes[i])
1778
1779      context_dense_defaults_vec.append(default_value)
1780
1781    context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
1782                            for shape in context_dense_shapes]
1783    feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
1784                                 for shape in feature_list_dense_shapes]
1785
1786    outputs = gen_parsing_ops.parse_single_sequence_example(
1787        serialized=serialized,
1788        debug_name=debug_name,
1789        context_dense_defaults=context_dense_defaults_vec,
1790        context_sparse_keys=context_sparse_keys,
1791        context_sparse_types=context_sparse_types,
1792        context_dense_keys=context_dense_keys,
1793        context_dense_shapes=context_dense_shapes,
1794        feature_list_sparse_keys=feature_list_sparse_keys,
1795        feature_list_sparse_types=feature_list_sparse_types,
1796        feature_list_dense_keys=feature_list_dense_keys,
1797        feature_list_dense_types=feature_list_dense_types,
1798        feature_list_dense_shapes=feature_list_dense_shapes,
1799        feature_list_dense_missing_assumed_empty=(
1800            feature_list_dense_missing_assumed_empty),
1801        name=name)
1802
1803    (context_sparse_indices, context_sparse_values,
1804     context_sparse_shapes, context_dense_values,
1805     feature_list_sparse_indices, feature_list_sparse_values,
1806     feature_list_sparse_shapes, feature_list_dense_values) = outputs
1807
1808    context_sparse_tensors = [
1809        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
1810        in zip(context_sparse_indices,
1811               context_sparse_values,
1812               context_sparse_shapes)]
1813
1814    feature_list_sparse_tensors = [
1815        sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
1816        in zip(feature_list_sparse_indices,
1817               feature_list_sparse_values,
1818               feature_list_sparse_shapes)]
1819
1820    context_output = dict(
1821        zip(context_sparse_keys + context_dense_keys,
1822            context_sparse_tensors + context_dense_values))
1823    feature_list_output = dict(
1824        zip(feature_list_sparse_keys + feature_list_dense_keys,
1825            feature_list_sparse_tensors + feature_list_dense_values))
1826
1827    return (context_output, feature_list_output)
1828
1829
1830# Swap `name` and `na_value` for backward compatibility.
1831@tf_export(v1=["io.decode_csv", "decode_csv"])
1832@deprecation.deprecated_endpoints("decode_csv")
1833def decode_csv(records,
1834               record_defaults,
1835               field_delim=",",
1836               use_quote_delim=True,
1837               name=None,
1838               na_value="",
1839               select_cols=None):
1840  """Convert CSV records to tensors. Each column maps to one tensor.
1841
1842  RFC 4180 format is expected for the CSV records.
1843  (https://tools.ietf.org/html/rfc4180)
1844  Note that we allow leading and trailing spaces with int or float field.
1845
1846  Args:
1847    records: A `Tensor` of type `string`.
1848      Each string is a record/row in the csv and all records should have
1849      the same format.
1850    record_defaults: A list of `Tensor` objects with specific types.
1851      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
1852      One tensor per column of the input record, with either a
1853      scalar default value for that column or an empty vector if the column is
1854      required.
1855    field_delim: An optional `string`. Defaults to `","`.
1856      char delimiter to separate fields in a record.
1857    use_quote_delim: An optional `bool`. Defaults to `True`.
1858      If false, treats double quotation marks as regular
1859      characters inside of the string fields (ignoring RFC 4180, Section 2,
1860      Bullet 5).
1861    name: A name for the operation (optional).
1862    na_value: Additional string to recognize as NA/NaN.
1863    select_cols: Optional sorted list of column indices to select. If specified,
1864      only this subset of columns will be parsed and returned.
1865
1866  Returns:
1867    A list of `Tensor` objects. Has the same type as `record_defaults`.
1868    Each tensor will have the same shape as records.
1869
1870  Raises:
1871    ValueError: If any of the arguments is malformed.
1872  """
1873  return decode_csv_v2(
1874      records, record_defaults,
1875      field_delim, use_quote_delim,
1876      na_value, select_cols, name
1877      )
1878
1879
1880@tf_export("io.decode_csv", v1=[])
1881def decode_csv_v2(records,
1882                  record_defaults,
1883                  field_delim=",",
1884                  use_quote_delim=True,
1885                  na_value="",
1886                  select_cols=None,
1887                  name=None):
1888  """Convert CSV records to tensors. Each column maps to one tensor.
1889
1890  RFC 4180 format is expected for the CSV records.
1891  (https://tools.ietf.org/html/rfc4180)
1892  Note that we allow leading and trailing spaces with int or float field.
1893
1894  Args:
1895    records: A `Tensor` of type `string`.
1896      Each string is a record/row in the csv and all records should have
1897      the same format.
1898    record_defaults: A list of `Tensor` objects with specific types.
1899      Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
1900      One tensor per column of the input record, with either a
1901      scalar default value for that column or an empty vector if the column is
1902      required.
1903    field_delim: An optional `string`. Defaults to `","`.
1904      char delimiter to separate fields in a record.
1905    use_quote_delim: An optional `bool`. Defaults to `True`.
1906      If false, treats double quotation marks as regular
1907      characters inside of the string fields (ignoring RFC 4180, Section 2,
1908      Bullet 5).
1909    na_value: Additional string to recognize as NA/NaN.
1910    select_cols: Optional sorted list of column indices to select. If specified,
1911      only this subset of columns will be parsed and returned.
1912    name: A name for the operation (optional).
1913
1914  Returns:
1915    A list of `Tensor` objects. Has the same type as `record_defaults`.
1916    Each tensor will have the same shape as records.
1917
1918  Raises:
1919    ValueError: If any of the arguments is malformed.
1920  """
1921  if select_cols is not None and any(select_cols[i] >= select_cols[i + 1]
1922                                     for i in range(len(select_cols) - 1)):
1923    raise ValueError("select_cols is not strictly increasing.")
1924  if select_cols is not None and select_cols[0] < 0:
1925    raise ValueError("select_cols contains negative values.")
1926  if select_cols is not None and len(select_cols) != len(record_defaults):
1927    raise ValueError("Length of select_cols and record_defaults do not match.")
1928  return gen_parsing_ops.decode_csv(
1929      records=records,
1930      record_defaults=record_defaults,
1931      field_delim=field_delim,
1932      use_quote_delim=use_quote_delim,
1933      na_value=na_value,
1934      name=name,
1935      select_cols=select_cols,
1936  )
1937
1938
1939# TODO(b/70890287): Combine the implementation of this op and
1940# `parse_single_example()` after 1/10/2018.
1941def parse_single_example_v2(serialized, features, name=None):
1942  # pylint: disable=line-too-long
1943  """Parses an `Example` proto into a `dict` of tensors.
1944
1945  Parses a serialized
1946  [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
1947  proto given in `serialized`.
1948
1949  This op parses serialized examples into a dictionary mapping keys to `Tensor`
1950  and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
1951  `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
1952  and `SparseFeature` is mapped to a `SparseTensor`, and each
1953  `FixedLenFeature` is mapped to a `Tensor`.
1954
1955  Each `VarLenFeature` maps to a `SparseTensor` of the specified type
1956  representing a ragged matrix. Its indices are `[index]` where
1957  `index` is the value's index in the list of values associated with
1958  that feature and example.
1959
1960  Each `SparseFeature` maps to a `SparseTensor` of the specified type
1961  representing a Tensor of `dense_shape` `SparseFeature.size`.
1962  Its `values` come from the feature in the examples with key `value_key`.
1963  A `values[i]` comes from a position `k` in the feature of an example at batch
1964  entry `batch`. This positional information is recorded in `indices[i]` as
1965  `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
1966  the feature in the example at with key `SparseFeature.index_key[j]`.
1967  In other words, we split the indices (except the first index indicating the
1968  batch entry) of a `SparseTensor` by dimension into different features of the
1969  `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
1970  `SparseFeature` whenever possible.
1971
1972  Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
1973  `tf.float32` if not specified) and shape `df.shape`.
1974
1975  `FixedLenFeature` entries with a `default_value` are optional. With no default
1976  value, we will fail if that `Feature` is missing from any example in
1977  `serialized`.
1978
1979  Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
1980  (or `tf.float32` if not specified) and shape `(None,) + df.shape`.
1981
1982  Args:
1983    serialized: A scalar (0-D Tensor) string, a serialized `Example` proto.
1984    features: A `dict` mapping feature keys to `FixedLenFeature`,
1985      `VarLenFeature`, and `SparseFeature` values.
1986    name: A name for this operation (optional).
1987
1988  Returns:
1989    A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
1990
1991  Raises:
1992    ValueError: if any feature is invalid.
1993  """
1994  if not features:
1995    raise ValueError("Missing: features was %s." % features)
1996  features = _prepend_none_dimension(features)
1997  (sparse_keys, sparse_types, dense_keys, dense_types,
1998   dense_defaults, dense_shapes) = _features_to_raw_params(
1999       features,
2000       [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
2001  outputs = _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
2002                                         dense_keys, dense_types,
2003                                         dense_defaults, dense_shapes, name)
2004  return _construct_sparse_tensors_for_sparse_features(features, outputs)
2005
2006
2007def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
2008                                 dense_keys, dense_types, dense_defaults,
2009                                 dense_shapes, name):
2010  """Parses `Example` protos.
2011
2012  Args:
2013    serialized: A scalar (0-D Tensor) string, containing a binary
2014      serialized `Example` proto.
2015    sparse_keys: A list of string keys in the examples' features.
2016      The results for these keys will be returned as `SparseTensor` objects.
2017    sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
2018      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
2019      and `tf.string` (`BytesList`) are supported.
2020    dense_keys: A list of string keys in the examples' features.
2021      The results for these keys will be returned as `Tensor`s
2022    dense_types: A list of DTypes of the same length as `dense_keys`.
2023      Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
2024      and `tf.string` (`BytesList`) are supported.
2025    dense_defaults: A dict mapping string keys to `Tensor`s.
2026      The keys of the dict must match the dense_keys of the feature.
2027    dense_shapes: A list of tuples with the same length as `dense_keys`.
2028      The shape of the data for each dense feature referenced by `dense_keys`.
2029      Required for any input tensors identified by `dense_keys`.  Must be
2030      either fully defined, or may contain an unknown first dimension.
2031      An unknown first dimension means the feature is treated as having
2032      a variable number of blocks, and the output shape along this dimension
2033      is considered unknown at graph build time.  Padding is applied for
2034      minibatch elements smaller than the maximum number of blocks for the
2035      given feature along this dimension.
2036    name: A name for this operation (optional).
2037
2038  Returns:
2039    A `dict` mapping keys to `Tensor`s and `SparseTensor`s.
2040
2041  Raises:
2042    ValueError: If sparse and dense key sets intersect, or input lengths do not
2043      match up.
2044  """
2045  with ops.name_scope(name, "ParseSingleExample", [serialized]):
2046    serialized = ops.convert_to_tensor(serialized, name="serialized")
2047    dense_defaults = collections.OrderedDict(
2048    ) if dense_defaults is None else dense_defaults
2049    sparse_keys = [] if sparse_keys is None else sparse_keys
2050    sparse_types = [] if sparse_types is None else sparse_types
2051    dense_keys = [] if dense_keys is None else dense_keys
2052    dense_types = [] if dense_types is None else dense_types
2053    dense_shapes = ([[]] * len(dense_keys)
2054                    if dense_shapes is None else dense_shapes)
2055
2056    num_dense = len(dense_keys)
2057    num_sparse = len(sparse_keys)
2058
2059    if len(dense_shapes) != num_dense:
2060      raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
2061                       (len(dense_shapes), num_dense))
2062    if len(dense_types) != num_dense:
2063      raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
2064                       (len(dense_types), num_dense))
2065    if len(sparse_types) != num_sparse:
2066      raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
2067                       (len(sparse_types), num_sparse))
2068    if num_dense + num_sparse == 0:
2069      raise ValueError("Must provide at least one sparse key or dense key")
2070    if not set(dense_keys).isdisjoint(set(sparse_keys)):
2071      raise ValueError(
2072          "Dense and sparse keys must not intersect; intersection: %s" %
2073          set(dense_keys).intersection(set(sparse_keys)))
2074
2075    # Convert dense_shapes to TensorShape object.
2076    dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]
2077
2078    dense_defaults_vec = []
2079    for i, key in enumerate(dense_keys):
2080      default_value = dense_defaults.get(key)
2081      dense_shape = dense_shapes[i]
2082      if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
2083          dense_shape.dims[0].value is None):
2084        # Variable stride dense shape, the default value should be a
2085        # scalar padding value
2086        if default_value is None:
2087          default_value = ops.convert_to_tensor(
2088              "" if dense_types[i] == dtypes.string else 0,
2089              dtype=dense_types[i])
2090        else:
2091          # Reshape to a scalar to ensure user gets an error if they
2092          # provide a tensor that's not intended to be a padding value
2093          # (0 or 2+ elements).
2094          key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
2095          default_value = ops.convert_to_tensor(
2096              default_value, dtype=dense_types[i], name=key_name)
2097          default_value = array_ops.reshape(default_value, [])
2098      else:
2099        if default_value is None:
2100          default_value = constant_op.constant([], dtype=dense_types[i])
2101        elif not isinstance(default_value, ops.Tensor):
2102          key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
2103          default_value = ops.convert_to_tensor(
2104              default_value, dtype=dense_types[i], name=key_name)
2105          default_value = array_ops.reshape(default_value, dense_shape)
2106
2107      dense_defaults_vec.append(default_value)
2108
2109    # Finally, convert dense_shapes to TensorShapeProto
2110    dense_shapes = [shape.as_proto() for shape in dense_shapes]
2111
2112    outputs = gen_parsing_ops.parse_single_example(
2113        serialized=serialized,
2114        dense_defaults=dense_defaults_vec,
2115        num_sparse=len(sparse_keys),
2116        sparse_keys=sparse_keys,
2117        sparse_types=sparse_types,
2118        dense_keys=dense_keys,
2119        dense_shapes=dense_shapes,
2120        name=name)
2121
2122    (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
2123
2124    sparse_tensors = [
2125        sparse_tensor.SparseTensor(ix, val, shape)
2126        for (ix, val,
2127             shape) in zip(sparse_indices, sparse_values, sparse_shapes)
2128    ]
2129
2130    return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
2131