1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for tensorflow.ops.parsing_ops."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import copy
22import itertools
23
24import numpy as np
25
26from google.protobuf import json_format
27
28from tensorflow.core.example import example_pb2
29from tensorflow.core.example import feature_pb2
30from tensorflow.python.framework import constant_op
31from tensorflow.python.framework import dtypes
32from tensorflow.python.framework import errors_impl
33from tensorflow.python.framework import ops
34from tensorflow.python.framework import sparse_tensor
35from tensorflow.python.framework import tensor_shape
36from tensorflow.python.framework import tensor_util
37from tensorflow.python.ops import array_ops
38from tensorflow.python.ops import parsing_ops
39from tensorflow.python.platform import test
40from tensorflow.python.platform import tf_logging
41
42# Helpers for creating Example objects
43example = example_pb2.Example
44feature = feature_pb2.Feature
45features = lambda d: feature_pb2.Features(feature=d)
46bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
47int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
48float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
49# Helpers for creating SequenceExample objects
50feature_list = lambda l: feature_pb2.FeatureList(feature=l)
51feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
52sequence_example = example_pb2.SequenceExample
53
54
55def flatten(list_of_lists):
56  """Flatten one level of nesting."""
57  return itertools.chain.from_iterable(list_of_lists)
58
59
60def flatten_values_tensors_or_sparse(tensors_list):
61  """Flatten each SparseTensor object into 3 Tensors for session.run()."""
62  return list(
63      flatten([[v.indices, v.values, v.dense_shape] if isinstance(
64          v, sparse_tensor.SparseTensor) else [v] for v in tensors_list]))
65
66
67def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
68                                flat_output):
69  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
70
71  i = 0  # Index into the flattened output of session.run()
72  for k, v in dict_tensors.items():
73    expected_v = expected_tensors[k]
74    tf_logging.info("Comparing key: %s", k)
75    if isinstance(v, sparse_tensor.SparseTensor):
76      # Three outputs for SparseTensor : indices, values, shape.
77      tester.assertEqual([k, len(expected_v)], [k, 3])
78      tester.assertAllEqual(expected_v[0], flat_output[i])
79      tester.assertAllEqual(expected_v[1], flat_output[i + 1])
80      tester.assertAllEqual(expected_v[2], flat_output[i + 2])
81      i += 3
82    else:
83      # One output for standard Tensor.
84      tester.assertAllEqual(expected_v, flat_output[i])
85      i += 1
86
87
88class ParseExampleTest(test.TestCase):
89
90  def _test(self, kwargs, expected_values=None, expected_err=None):
91    with self.test_session() as sess:
92      if expected_err:
93        with self.assertRaisesWithPredicateMatch(expected_err[0],
94                                                 expected_err[1]):
95          out = parsing_ops.parse_example(**kwargs)
96          sess.run(flatten_values_tensors_or_sparse(out.values()))
97        return
98      else:
99        # Returns dict w/ Tensors and SparseTensors.
100        out = parsing_ops.parse_example(**kwargs)
101        result = flatten_values_tensors_or_sparse(out.values())
102        # Check values.
103        tf_result = sess.run(result)
104        _compare_output_to_expected(self, out, expected_values, tf_result)
105
106      # Check shapes; if serialized is a Tensor we need its size to
107      # properly check.
108      serialized = kwargs["serialized"]
109      batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor)
110                    else np.asarray(serialized).size)
111      for k, f in kwargs["features"].items():
112        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
113          self.assertEqual(
114              tuple(out[k].get_shape().as_list()), (batch_size,) + f.shape)
115        elif isinstance(f, parsing_ops.VarLenFeature):
116          self.assertEqual(
117              tuple(out[k].indices.get_shape().as_list()), (None, 2))
118          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
119          self.assertEqual(
120              tuple(out[k].dense_shape.get_shape().as_list()), (2,))
121
122  def testEmptySerializedWithAllDefaults(self):
123    sparse_name = "st_a"
124    a_name = "a"
125    b_name = "b"
126    c_name = "c:has_a_tricky_name"
127    a_default = [0, 42, 0]
128    b_default = np.random.rand(3, 3).astype(bytes)
129    c_default = np.random.rand(2).astype(np.float32)
130
131    expected_st_a = (  # indices, values, shape
132        np.empty(
133            (0, 2), dtype=np.int64),  # indices
134        np.empty(
135            (0,), dtype=np.int64),  # sp_a is DT_INT64
136        np.array(
137            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
138
139    expected_output = {
140        sparse_name: expected_st_a,
141        a_name: np.array(2 * [[a_default]]),
142        b_name: np.array(2 * [b_default]),
143        c_name: np.array(2 * [c_default]),
144    }
145
146    self._test(
147        {
148            "example_names":
149                np.empty(
150                    (0,), dtype=bytes),
151            "serialized":
152                ops.convert_to_tensor(["", ""]),
153            "features": {
154                sparse_name:
155                    parsing_ops.VarLenFeature(dtypes.int64),
156                a_name:
157                    parsing_ops.FixedLenFeature(
158                        (1, 3), dtypes.int64, default_value=a_default),
159                b_name:
160                    parsing_ops.FixedLenFeature(
161                        (3, 3), dtypes.string, default_value=b_default),
162                c_name:
163                    parsing_ops.FixedLenFeature(
164                        (2,), dtypes.float32, default_value=c_default),
165            }
166        },
167        expected_output)
168
169  def testEmptySerializedWithoutDefaultsShouldFail(self):
170    input_features = {
171        "st_a":
172            parsing_ops.VarLenFeature(dtypes.int64),
173        "a":
174            parsing_ops.FixedLenFeature(
175                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
176        "b":
177            parsing_ops.FixedLenFeature(
178                (3, 3),
179                dtypes.string,
180                default_value=np.random.rand(3, 3).astype(bytes)),
181        # Feature "c" is missing a default, this gap will cause failure.
182        "c":
183            parsing_ops.FixedLenFeature(
184                (2,), dtype=dtypes.float32),
185    }
186
187    # Edge case where the key is there but the feature value is empty
188    original = example(features=features({"c": feature()}))
189    self._test(
190        {
191            "example_names": ["in1"],
192            "serialized": [original.SerializeToString()],
193            "features": input_features,
194        },
195        expected_err=(
196            errors_impl.OpError,
197            "Name: in1, Feature: c \\(data type: float\\) is required"))
198
199    # Standard case of missing key and value.
200    self._test(
201        {
202            "example_names": ["in1", "in2"],
203            "serialized": ["", ""],
204            "features": input_features,
205        },
206        expected_err=(
207            errors_impl.OpError,
208            "Name: in1, Feature: c \\(data type: float\\) is required"))
209
210  def testDenseNotMatchingShapeShouldFail(self):
211    original = [
212        example(features=features({
213            "a": float_feature([1, 1, 3]),
214        })), example(features=features({
215            "a": float_feature([-1, -1]),
216        }))
217    ]
218
219    names = ["passing", "failing"]
220    serialized = [m.SerializeToString() for m in original]
221
222    self._test(
223        {
224            "example_names": names,
225            "serialized": ops.convert_to_tensor(serialized),
226            "features": {
227                "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
228            }
229        },
230        expected_err=(errors_impl.OpError,
231                      "Name: failing, Key: a, Index: 1.  Number of float val"))
232
233  def testDenseDefaultNoShapeShouldFail(self):
234    original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
235
236    serialized = [m.SerializeToString() for m in original]
237
238    self._test(
239        {
240            "example_names": ["failing"],
241            "serialized": ops.convert_to_tensor(serialized),
242            "features": {
243                "a": parsing_ops.FixedLenFeature(None, dtypes.float32)
244            }
245        },
246        expected_err=(ValueError, "Missing shape for feature a"))
247
248  def testSerializedContainingSparse(self):
249    original = [
250        example(features=features({
251            "st_c": float_feature([3, 4])
252        })),
253        example(features=features({
254            "st_c": float_feature([]),  # empty float list
255        })),
256        example(features=features({
257            "st_d": feature(),  # feature with nothing in it
258        })),
259        example(features=features({
260            "st_c": float_feature([1, 2, -1]),
261            "st_d": bytes_feature([b"hi"])
262        }))
263    ]
264
265    serialized = [m.SerializeToString() for m in original]
266
267    expected_st_c = (  # indices, values, shape
268        np.array(
269            [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
270                [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
271                    [4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
272
273    expected_st_d = (  # indices, values, shape
274        np.array(
275            [[3, 0]], dtype=np.int64), np.array(
276                ["hi"], dtype=bytes), np.array(
277                    [4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
278
279    expected_output = {
280        "st_c": expected_st_c,
281        "st_d": expected_st_d,
282    }
283
284    self._test({
285        "serialized": ops.convert_to_tensor(serialized),
286        "features": {
287            "st_c": parsing_ops.VarLenFeature(dtypes.float32),
288            "st_d": parsing_ops.VarLenFeature(dtypes.string)
289        }
290    }, expected_output)
291
292  def testSerializedContainingSparseFeature(self):
293    original = [
294        example(features=features({
295            "val": float_feature([3, 4]),
296            "idx": int64_feature([5, 10])
297        })),
298        example(features=features({
299            "val": float_feature([]),  # empty float list
300            "idx": int64_feature([])
301        })),
302        example(features=features({
303            "val": feature(),  # feature with nothing in it
304            # missing idx feature
305        })),
306        example(features=features({
307            "val": float_feature([1, 2, -1]),
308            "idx":
309                int64_feature([0, 9, 3])  # unsorted
310        }))
311    ]
312
313    serialized = [m.SerializeToString() for m in original]
314
315    expected_sp = (  # indices, values, shape
316        np.array(
317            [[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
318        np.array(
319            [3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array(
320                [4, 13], dtype=np.int64))  # batch == 4, max_elems = 13
321
322    expected_output = {"sp": expected_sp,}
323
324    self._test({
325        "serialized": ops.convert_to_tensor(serialized),
326        "features": {
327            "sp": parsing_ops.SparseFeature(
328                ["idx"], "val", dtypes.float32, [13])
329        }
330    }, expected_output)
331
332  def testSerializedContainingSparseFeatureReuse(self):
333    original = [
334        example(features=features({
335            "val1": float_feature([3, 4]),
336            "val2": float_feature([5, 6]),
337            "idx": int64_feature([5, 10])
338        })),
339        example(features=features({
340            "val1": float_feature([]),  # empty float list
341            "idx": int64_feature([])
342        })),
343    ]
344
345    serialized = [m.SerializeToString() for m in original]
346
347    expected_sp1 = (  # indices, values, shape
348        np.array(
349            [[0, 5], [0, 10]], dtype=np.int64), np.array(
350                [3.0, 4.0], dtype=np.float32), np.array(
351                    [2, 13], dtype=np.int64))  # batch == 2, max_elems = 13
352
353    expected_sp2 = (  # indices, values, shape
354        np.array(
355            [[0, 5], [0, 10]], dtype=np.int64), np.array(
356                [5.0, 6.0], dtype=np.float32), np.array(
357                    [2, 7], dtype=np.int64))  # batch == 2, max_elems = 13
358
359    expected_output = {
360        "sp1": expected_sp1,
361        "sp2": expected_sp2,
362    }
363
364    self._test({
365        "serialized": ops.convert_to_tensor(serialized),
366        "features": {
367            "sp1":
368                parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
369            "sp2":
370                parsing_ops.SparseFeature(
371                    "idx", "val2", dtypes.float32, size=7, already_sorted=True)
372        }
373    }, expected_output)
374
375  def testSerializedContaining3DSparseFeature(self):
376    original = [
377        example(features=features({
378            "val": float_feature([3, 4]),
379            "idx0": int64_feature([5, 10]),
380            "idx1": int64_feature([0, 2]),
381        })),
382        example(features=features({
383            "val": float_feature([]),  # empty float list
384            "idx0": int64_feature([]),
385            "idx1": int64_feature([]),
386        })),
387        example(features=features({
388            "val": feature(),  # feature with nothing in it
389            # missing idx feature
390        })),
391        example(features=features({
392            "val": float_feature([1, 2, -1]),
393            "idx0": int64_feature([0, 9, 3]),  # unsorted
394            "idx1": int64_feature([1, 0, 2]),
395        }))
396    ]
397
398    serialized = [m.SerializeToString() for m in original]
399
400    expected_sp = (
401        # indices
402        np.array(
403            [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
404            dtype=np.int64),
405        # values
406        np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
407        # shape batch == 4, max_elems = 13
408        np.array([4, 13, 3], dtype=np.int64))
409
410    expected_output = {"sp": expected_sp,}
411
412    self._test({
413        "serialized": ops.convert_to_tensor(serialized),
414        "features": {
415            "sp": parsing_ops.SparseFeature(
416                ["idx0", "idx1"], "val", dtypes.float32, [13, 3])
417        }
418    }, expected_output)
419
420  def testSerializedContainingDense(self):
421    aname = "a"
422    bname = "b*has+a:tricky_name"
423    original = [
424        example(features=features({
425            aname: float_feature([1, 1]),
426            bname: bytes_feature([b"b0_str"]),
427        })), example(features=features({
428            aname: float_feature([-1, -1]),
429            bname: bytes_feature([b""]),
430        }))
431    ]
432
433    serialized = [m.SerializeToString() for m in original]
434
435    expected_output = {
436        aname:
437            np.array(
438                [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
439        bname:
440            np.array(
441                ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
442    }
443
444    # No defaults, values required
445    self._test(
446        {
447            "serialized":
448                ops.convert_to_tensor(serialized),
449            "features": {
450                aname:
451                    parsing_ops.FixedLenFeature(
452                        (1, 2, 1), dtype=dtypes.float32),
453                bname:
454                    parsing_ops.FixedLenFeature(
455                        (1, 1, 1, 1), dtype=dtypes.string),
456            }
457        },
458        expected_output)
459
460  # This test is identical as the previous one except
461  # for the creation of 'serialized'.
462  def testSerializedContainingDenseWithConcat(self):
463    aname = "a"
464    bname = "b*has+a:tricky_name"
465    # TODO(lew): Feature appearing twice should be an error in future.
466    original = [
467        (example(features=features({
468            aname: float_feature([10, 10]),
469        })), example(features=features({
470            aname: float_feature([1, 1]),
471            bname: bytes_feature([b"b0_str"]),
472        }))),
473        (
474            example(features=features({
475                bname: bytes_feature([b"b100"]),
476            })),
477            example(features=features({
478                aname: float_feature([-1, -1]),
479                bname: bytes_feature([b"b1"]),
480            })),),
481    ]
482
483    serialized = [
484        m.SerializeToString() + n.SerializeToString() for (m, n) in original
485    ]
486
487    expected_output = {
488        aname:
489            np.array(
490                [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
491        bname:
492            np.array(
493                ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
494    }
495
496    # No defaults, values required
497    self._test(
498        {
499            "serialized":
500                ops.convert_to_tensor(serialized),
501            "features": {
502                aname:
503                    parsing_ops.FixedLenFeature(
504                        (1, 2, 1), dtype=dtypes.float32),
505                bname:
506                    parsing_ops.FixedLenFeature(
507                        (1, 1, 1, 1), dtype=dtypes.string),
508            }
509        },
510        expected_output)
511
512  def testSerializedContainingDenseScalar(self):
513    original = [
514        example(features=features({
515            "a": float_feature([1]),
516        })), example(features=features({}))
517    ]
518
519    serialized = [m.SerializeToString() for m in original]
520
521    expected_output = {
522        "a":
523            np.array(
524                [[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
525    }
526
527    self._test(
528        {
529            "serialized":
530                ops.convert_to_tensor(serialized),
531            "features": {
532                "a":
533                    parsing_ops.FixedLenFeature(
534                        (1,), dtype=dtypes.float32, default_value=-1),
535            }
536        },
537        expected_output)
538
539  def testSerializedContainingDenseWithDefaults(self):
540    original = [
541        example(features=features({
542            "a": float_feature([1, 1]),
543        })),
544        example(features=features({
545            "b": bytes_feature([b"b1"]),
546        })),
547        example(features=features({
548            "b": feature()
549        })),
550    ]
551
552    serialized = [m.SerializeToString() for m in original]
553
554    expected_output = {
555        "a":
556            np.array(
557                [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2,
558                                                                      1),
559        "b":
560            np.array(
561                ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1,
562                                                                   1),
563    }
564
565    self._test(
566        {
567            "serialized":
568                ops.convert_to_tensor(serialized),
569            "features": {
570                "a":
571                    parsing_ops.FixedLenFeature(
572                        (1, 2, 1),
573                        dtype=dtypes.float32,
574                        default_value=[3.0, -3.0]),
575                "b":
576                    parsing_ops.FixedLenFeature(
577                        (1, 1, 1, 1),
578                        dtype=dtypes.string,
579                        default_value="tmp_str"),
580            }
581        },
582        expected_output)
583
584  def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
585    expected_st_a = (  # indices, values, shape
586        np.empty(
587            (0, 2), dtype=np.int64),  # indices
588        np.empty(
589            (0,), dtype=np.int64),  # sp_a is DT_INT64
590        np.array(
591            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
592    expected_sp = (  # indices, values, shape
593        np.array(
594            [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array(
595                ["a", "b", "c"], dtype="|S"), np.array(
596                    [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
597
598    original = [
599        example(features=features({
600            "c": float_feature([3, 4]),
601            "val": bytes_feature([b"a", b"b"]),
602            "idx": int64_feature([0, 3])
603        })), example(features=features({
604            "c": float_feature([1, 2]),
605            "val": bytes_feature([b"c"]),
606            "idx": int64_feature([7])
607        }))
608    ]
609
610    names = ["in1", "in2"]
611    serialized = [m.SerializeToString() for m in original]
612
613    a_default = [1, 2, 3]
614    b_default = np.random.rand(3, 3).astype(bytes)
615    expected_output = {
616        "st_a": expected_st_a,
617        "sp": expected_sp,
618        "a": np.array(2 * [[a_default]]),
619        "b": np.array(2 * [b_default]),
620        "c": np.array(
621            [[3, 4], [1, 2]], dtype=np.float32),
622    }
623
624    self._test(
625        {
626            "example_names":
627                names,
628            "serialized":
629                ops.convert_to_tensor(serialized),
630            "features": {
631                "st_a":
632                    parsing_ops.VarLenFeature(dtypes.int64),
633                "sp":
634                    parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
635                "a":
636                    parsing_ops.FixedLenFeature(
637                        (1, 3), dtypes.int64, default_value=a_default),
638                "b":
639                    parsing_ops.FixedLenFeature(
640                        (3, 3), dtypes.string, default_value=b_default),
641                # Feature "c" must be provided, since it has no default_value.
642                "c":
643                    parsing_ops.FixedLenFeature((2,), dtypes.float32),
644            }
645        },
646        expected_output)
647
648  def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
649    expected_idx = (  # indices, values, shape
650        np.array(
651            [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
652        np.array([0, 3, 7, 1]), np.array(
653            [2, 2], dtype=np.int64))  # batch == 4, max_elems = 2
654
655    expected_sp = (  # indices, values, shape
656        np.array(
657            [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array(
658                ["a", "b", "d", "c"], dtype="|S"), np.array(
659                    [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
660
661    original = [
662        example(features=features({
663            "val": bytes_feature([b"a", b"b"]),
664            "idx": int64_feature([0, 3])
665        })), example(features=features({
666            "val": bytes_feature([b"c", b"d"]),
667            "idx": int64_feature([7, 1])
668        }))
669    ]
670
671    names = ["in1", "in2"]
672    serialized = [m.SerializeToString() for m in original]
673
674    expected_output = {
675        "idx": expected_idx,
676        "sp": expected_sp,
677    }
678
679    self._test({
680        "example_names": names,
681        "serialized": ops.convert_to_tensor(serialized),
682        "features": {
683            "idx": parsing_ops.VarLenFeature(dtypes.int64),
684            "sp": parsing_ops.SparseFeature(
685                ["idx"], "val", dtypes.string, [13]),
686        }
687    }, expected_output)
688
689  def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size):
690    # During parsing, data read from the serialized proto is stored in buffers.
691    # For small batch sizes, a buffer will contain one minibatch entry.
692    # For larger batch sizes, a buffer may contain several minibatch
693    # entries.  This test identified a bug where the code that copied
694    # data out of the buffers and into the output tensors assumed each
695    # buffer only contained one minibatch entry.  The bug has since been fixed.
696    truth_int = [i for i in range(batch_size)]
697    truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()]
698                 for i in range(batch_size)]
699
700    expected_str = copy.deepcopy(truth_str)
701
702    # Delete some intermediate entries
703    for i in range(batch_size):
704      col = 1
705      if np.random.rand() < 0.25:
706        # w.p. 25%, drop out the second entry
707        expected_str[i][col] = b"default"
708        col -= 1
709        truth_str[i].pop()
710      if np.random.rand() < 0.25:
711        # w.p. 25%, drop out the second entry (possibly again)
712        expected_str[i][col] = b"default"
713        truth_str[i].pop()
714
715    expected_output = {
716        # Batch size batch_size, 1 time step.
717        "a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1),
718        # Batch size batch_size, 2 time steps.
719        "b": np.array(expected_str, dtype="|S").reshape(batch_size, 2),
720    }
721
722    original = [
723        example(features=features(
724            {"a": int64_feature([truth_int[i]]),
725             "b": bytes_feature(truth_str[i])}))
726        for i in range(batch_size)
727    ]
728
729    serialized = [m.SerializeToString() for m in original]
730
731    self._test({
732        "serialized": ops.convert_to_tensor(serialized, dtype=dtypes.string),
733        "features": {
734            "a": parsing_ops.FixedLenSequenceFeature(
735                shape=(), dtype=dtypes.int64, allow_missing=True,
736                default_value=-1),
737            "b": parsing_ops.FixedLenSequenceFeature(
738                shape=[], dtype=dtypes.string, allow_missing=True,
739                default_value="default"),
740        }
741    }, expected_output)
742
743  def testSerializedContainingVarLenDenseLargerBatch(self):
744    np.random.seed(3456)
745    for batch_size in (1, 10, 20, 100, 256):
746      self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
747
748  def testSerializedContainingVarLenDense(self):
749    aname = "a"
750    bname = "b"
751    cname = "c"
752    dname = "d"
753    example_names = ["in1", "in2", "in3", "in4"]
754    original = [
755        example(features=features({
756            cname: int64_feature([2]),
757        })),
758        example(features=features({
759            aname: float_feature([1, 1]),
760            bname: bytes_feature([b"b0_str", b"b1_str"]),
761        })),
762        example(features=features({
763            aname: float_feature([-1, -1, 2, 2]),
764            bname: bytes_feature([b"b1"]),
765        })),
766        example(features=features({
767            aname: float_feature([]),
768            cname: int64_feature([3]),
769        })),
770    ]
771
772    serialized = [m.SerializeToString() for m in original]
773
774    expected_output = {
775        aname:
776            np.array(
777                [
778                    [0, 0, 0, 0],
779                    [1, 1, 0, 0],
780                    [-1, -1, 2, 2],
781                    [0, 0, 0, 0],
782                ],
783                dtype=np.float32).reshape(4, 2, 2, 1),
784        bname:
785            np.array(
786                [["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
787                dtype=bytes).reshape(4, 2, 1, 1, 1),
788        cname:
789            np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1),
790        dname:
791            np.empty(shape=(4, 0), dtype=bytes),
792    }
793
794    self._test({
795        "example_names": example_names,
796        "serialized": ops.convert_to_tensor(serialized),
797        "features": {
798            aname:
799                parsing_ops.FixedLenSequenceFeature(
800                    (2, 1), dtype=dtypes.float32, allow_missing=True),
801            bname:
802                parsing_ops.FixedLenSequenceFeature(
803                    (1, 1, 1), dtype=dtypes.string, allow_missing=True),
804            cname:
805                parsing_ops.FixedLenSequenceFeature(
806                    shape=[], dtype=dtypes.int64, allow_missing=True),
807            dname:
808                parsing_ops.FixedLenSequenceFeature(
809                    shape=[], dtype=dtypes.string, allow_missing=True),
810        }
811    }, expected_output)
812
813    # Test with padding values.
814    expected_output_custom_padding = dict(expected_output)
815    expected_output_custom_padding[aname] = np.array(
816        [
817            [-2, -2, -2, -2],
818            [1, 1, -2, -2],
819            [-1, -1, 2, 2],
820            [-2, -2, -2, -2],
821        ],
822        dtype=np.float32).reshape(4, 2, 2, 1)
823
824    self._test({
825        "example_names": example_names,
826        "serialized": ops.convert_to_tensor(serialized),
827        "features": {
828            aname:
829                parsing_ops.FixedLenSequenceFeature(
830                    (2, 1), dtype=dtypes.float32, allow_missing=True,
831                    default_value=-2.0),
832            bname:
833                parsing_ops.FixedLenSequenceFeature(
834                    (1, 1, 1), dtype=dtypes.string, allow_missing=True),
835            cname:
836                parsing_ops.FixedLenSequenceFeature(
837                    shape=[], dtype=dtypes.int64, allow_missing=True),
838            dname:
839                parsing_ops.FixedLenSequenceFeature(
840                    shape=[], dtype=dtypes.string, allow_missing=True),
841        }
842    }, expected_output_custom_padding)
843
844    # Change number of required values so the inputs are not a
845    # multiple of this size.
846    self._test(
847        {
848            "example_names": example_names,
849            "serialized": ops.convert_to_tensor(serialized),
850            "features": {
851                aname:
852                    parsing_ops.FixedLenSequenceFeature(
853                        (2, 1), dtype=dtypes.float32, allow_missing=True),
854                bname:
855                    parsing_ops.FixedLenSequenceFeature(
856                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
857            }
858        },
859        expected_err=(
860            errors_impl.OpError, "Name: in3, Key: b, Index: 2.  "
861            "Number of bytes values is not a multiple of stride length."))
862
863    self._test(
864        {
865            "example_names": example_names,
866            "serialized": ops.convert_to_tensor(serialized),
867            "features": {
868                aname:
869                    parsing_ops.FixedLenSequenceFeature(
870                        (2, 1), dtype=dtypes.float32, allow_missing=True,
871                        default_value=[]),
872                bname:
873                    parsing_ops.FixedLenSequenceFeature(
874                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
875            }
876        },
877        expected_err=(ValueError,
878                      "Cannot reshape a tensor with 0 elements to shape"))
879
880    self._test(
881        {
882            "example_names": example_names,
883            "serialized": ops.convert_to_tensor(serialized),
884            "features": {
885                aname:
886                    parsing_ops.FixedLenFeature(
887                        (None, 2, 1), dtype=dtypes.float32),
888                bname:
889                    parsing_ops.FixedLenSequenceFeature(
890                        (2, 1, 1), dtype=dtypes.string, allow_missing=True),
891            }
892        },
893        expected_err=(ValueError,
894                      "First dimension of shape for feature a unknown. "
895                      "Consider using FixedLenSequenceFeature."))
896
897    self._test(
898        {
899            "example_names": example_names,
900            "serialized": ops.convert_to_tensor(serialized),
901            "features": {
902                cname:
903                    parsing_ops.FixedLenFeature(
904                        (1, None), dtype=dtypes.int64, default_value=[[1]]),
905            }
906        },
907        expected_err=(ValueError,
908                      "All dimensions of shape for feature c need to be known "
909                      r"but received \(1, None\)."))
910
911    self._test({
912        "example_names": example_names,
913        "serialized": ops.convert_to_tensor(serialized),
914        "features": {
915            aname:
916                parsing_ops.FixedLenSequenceFeature(
917                    (2, 1), dtype=dtypes.float32, allow_missing=True),
918            bname:
919                parsing_ops.FixedLenSequenceFeature(
920                    (1, 1, 1), dtype=dtypes.string, allow_missing=True),
921            cname:
922                parsing_ops.FixedLenSequenceFeature(
923                    shape=[], dtype=dtypes.int64, allow_missing=False),
924            dname:
925                parsing_ops.FixedLenSequenceFeature(
926                    shape=[], dtype=dtypes.string, allow_missing=True),
927        }
928    }, expected_err=(ValueError,
929                     "Unsupported: FixedLenSequenceFeature requires "
930                     "allow_missing to be True."))
931
932
933class ParseSingleExampleTest(test.TestCase):
934
935  def _test(self, kwargs, expected_values=None, expected_err=None):
936    with self.test_session() as sess:
937      if expected_err:
938        with self.assertRaisesWithPredicateMatch(expected_err[0],
939                                                 expected_err[1]):
940          out = parsing_ops.parse_single_example(**kwargs)
941          sess.run(flatten_values_tensors_or_sparse(out.values()))
942      else:
943        # Returns dict w/ Tensors and SparseTensors.
944        out = parsing_ops.parse_single_example(**kwargs)
945        # Check values.
946        tf_result = sess.run(flatten_values_tensors_or_sparse(out.values()))
947        _compare_output_to_expected(self, out, expected_values, tf_result)
948
949      # Check shapes.
950      for k, f in kwargs["features"].items():
951        if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
952          self.assertEqual(tuple(out[k].get_shape()),
953                           tensor_shape.as_shape(f.shape))
954        elif isinstance(f, parsing_ops.VarLenFeature):
955          self.assertEqual(
956              tuple(out[k].indices.get_shape().as_list()), (None, 1))
957          self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
958          self.assertEqual(
959              tuple(out[k].dense_shape.get_shape().as_list()), (1,))
960
961  def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
962    original = example(features=features({
963        "c": float_feature([3, 4]),
964        "d": float_feature([0.0, 1.0]),
965        "val": bytes_feature([b"a", b"b"]),
966        "idx": int64_feature([0, 3]),
967        "st_a": float_feature([3.0, 4.0])
968    }))
969
970    serialized = original.SerializeToString()
971
972    expected_st_a = (
973        np.array(
974            [[0], [1]], dtype=np.int64),  # indices
975        np.array(
976            [3.0, 4.0], dtype=np.float32),  # values
977        np.array(
978            [2], dtype=np.int64))  # shape: max_values = 2
979
980    expected_sp = (  # indices, values, shape
981        np.array(
982            [[0], [3]], dtype=np.int64), np.array(
983                ["a", "b"], dtype="|S"), np.array(
984                    [13], dtype=np.int64))  # max_values = 13
985
986    a_default = [1, 2, 3]
987    b_default = np.random.rand(3, 3).astype(bytes)
988    expected_output = {
989        "st_a": expected_st_a,
990        "sp": expected_sp,
991        "a": [a_default],
992        "b": b_default,
993        "c": np.array([3, 4], dtype=np.float32),
994        "d": np.array([0.0, 1.0], dtype=np.float32),
995    }
996
997    self._test(
998        {
999            "example_names":
1000                ops.convert_to_tensor("in1"),
1001            "serialized":
1002                ops.convert_to_tensor(serialized),
1003            "features": {
1004                "st_a":
1005                    parsing_ops.VarLenFeature(dtypes.float32),
1006                "sp":
1007                    parsing_ops.SparseFeature(
1008                        ["idx"], "val", dtypes.string, [13]),
1009                "a":
1010                    parsing_ops.FixedLenFeature(
1011                        (1, 3), dtypes.int64, default_value=a_default),
1012                "b":
1013                    parsing_ops.FixedLenFeature(
1014                        (3, 3), dtypes.string, default_value=b_default),
1015                # Feature "c" must be provided, since it has no default_value.
1016                "c":
1017                    parsing_ops.FixedLenFeature(2, dtypes.float32),
1018                "d":
1019                    parsing_ops.FixedLenSequenceFeature([],
1020                                                        dtypes.float32,
1021                                                        allow_missing=True)
1022            }
1023        },
1024        expected_output)
1025
1026
1027class ParseSequenceExampleTest(test.TestCase):
1028
1029  def testCreateSequenceExample(self):
1030    value = sequence_example(
1031        context=features({
1032            "global_feature": float_feature([1, 2, 3]),
1033        }),
1034        feature_lists=feature_lists({
1035            "repeated_feature_2_frames":
1036                feature_list([
1037                    bytes_feature([b"a", b"b", b"c"]),
1038                    bytes_feature([b"a", b"d", b"e"])
1039                ]),
1040            "repeated_feature_3_frames":
1041                feature_list([
1042                    int64_feature([3, 4, 5, 6, 7]),
1043                    int64_feature([-1, 0, 0, 0, 0]),
1044                    int64_feature([1, 2, 3, 4, 5])
1045                ])
1046        }))
1047    value.SerializeToString()  # Smoke test
1048
1049  def _test(self,
1050            kwargs,
1051            expected_context_values=None,
1052            expected_feat_list_values=None,
1053            expected_err=None):
1054    expected_context_values = expected_context_values or {}
1055    expected_feat_list_values = expected_feat_list_values or {}
1056
1057    with self.test_session() as sess:
1058      if expected_err:
1059        with self.assertRaisesWithPredicateMatch(expected_err[0],
1060                                                 expected_err[1]):
1061          c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
1062          if c_out:
1063            sess.run(flatten_values_tensors_or_sparse(c_out.values()))
1064          if fl_out:
1065            sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
1066      else:
1067        # Returns dicts w/ Tensors and SparseTensors.
1068        context_out, feat_list_out = parsing_ops.parse_single_sequence_example(
1069            **kwargs)
1070        context_result = sess.run(
1071            flatten_values_tensors_or_sparse(context_out.values(
1072            ))) if context_out else []
1073        feat_list_result = sess.run(
1074            flatten_values_tensors_or_sparse(feat_list_out.values(
1075            ))) if feat_list_out else []
1076        # Check values.
1077        _compare_output_to_expected(self, context_out, expected_context_values,
1078                                    context_result)
1079        _compare_output_to_expected(self, feat_list_out,
1080                                    expected_feat_list_values, feat_list_result)
1081
1082      # Check shapes; if serialized is a Tensor we need its size to
1083      # properly check.
1084      if "context_features" in kwargs:
1085        for k, f in kwargs["context_features"].items():
1086          if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
1087            self.assertEqual(
1088                tuple(context_out[k].get_shape().as_list()), f.shape)
1089          elif isinstance(f, parsing_ops.VarLenFeature):
1090            self.assertEqual(
1091                tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
1092            self.assertEqual(
1093                tuple(context_out[k].values.get_shape().as_list()), (None,))
1094            self.assertEqual(
1095                tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
1096
1097  def testSequenceExampleWithSparseAndDenseContext(self):
1098    original = sequence_example(context=features({
1099        "c": float_feature([3, 4]),
1100        "st_a": float_feature([3.0, 4.0])
1101    }))
1102
1103    serialized = original.SerializeToString()
1104
1105    expected_st_a = (
1106        np.array(
1107            [[0], [1]], dtype=np.int64),  # indices
1108        np.array(
1109            [3.0, 4.0], dtype=np.float32),  # values
1110        np.array(
1111            [2], dtype=np.int64))  # shape: num_features = 2
1112
1113    a_default = [1, 2, 3]
1114    b_default = np.random.rand(3, 3).astype(bytes)
1115    expected_context_output = {
1116        "st_a": expected_st_a,
1117        "a": [a_default],
1118        "b": b_default,
1119        "c": np.array(
1120            [3, 4], dtype=np.float32),
1121    }
1122
1123    self._test(
1124        {
1125            "example_name":
1126                "in1",
1127            "serialized":
1128                ops.convert_to_tensor(serialized),
1129            "context_features": {
1130                "st_a":
1131                    parsing_ops.VarLenFeature(dtypes.float32),
1132                "a":
1133                    parsing_ops.FixedLenFeature(
1134                        (1, 3), dtypes.int64, default_value=a_default),
1135                "b":
1136                    parsing_ops.FixedLenFeature(
1137                        (3, 3), dtypes.string, default_value=b_default),
1138                # Feature "c" must be provided, since it has no default_value.
1139                "c":
1140                    parsing_ops.FixedLenFeature((2,), dtypes.float32),
1141            }
1142        },
1143        expected_context_values=expected_context_output)
1144
1145  def testSequenceExampleWithMultipleSizeFeatureLists(self):
1146    original = sequence_example(feature_lists=feature_lists({
1147        "a":
1148            feature_list([
1149                int64_feature([-1, 0, 1]),
1150                int64_feature([2, 3, 4]),
1151                int64_feature([5, 6, 7]),
1152                int64_feature([8, 9, 10]),
1153            ]),
1154        "b":
1155            feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
1156        "c":
1157            feature_list([float_feature([3, 4]), float_feature([-1, 2])]),
1158    }))
1159
1160    serialized = original.SerializeToString()
1161
1162    expected_feature_list_output = {
1163        "a": np.array(
1164            [  # outer dimension is time.
1165                [[-1, 0, 1]],  # inside are 1x3 matrices
1166                [[2, 3, 4]],
1167                [[5, 6, 7]],
1168                [[8, 9, 10]]
1169            ],
1170            dtype=np.int64),
1171        "b": np.array(
1172            [  # outer dimension is time, inside are 2x2 matrices
1173                [[b"r00", b"r01"], [b"r10", b"r11"]]
1174            ],
1175            dtype=bytes),
1176        "c": np.array(
1177            [  # outer dimension is time, inside are 2-vectors
1178                [3, 4], [-1, 2]
1179            ],
1180            dtype=np.float32),
1181        "d": np.empty(
1182            shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
1183    }
1184
1185    self._test(
1186        {
1187            "example_name":
1188                "in1",
1189            "serialized":
1190                ops.convert_to_tensor(serialized),
1191            "sequence_features": {
1192                "a":
1193                    parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
1194                "b":
1195                    parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string),
1196                "c":
1197                    parsing_ops.FixedLenSequenceFeature(2, dtypes.float32),
1198                "d":
1199                    parsing_ops.FixedLenSequenceFeature(
1200                        (5,), dtypes.float32, allow_missing=True),
1201            }
1202        },
1203        expected_feat_list_values=expected_feature_list_output)
1204
1205  def testSequenceExampleWithoutDebugName(self):
1206    original = sequence_example(feature_lists=feature_lists({
1207        "a":
1208            feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
1209        "st_a":
1210            feature_list([
1211                float_feature([3.0, 4.0]), float_feature([5.0]),
1212                float_feature([])
1213            ]),
1214        "st_b":
1215            feature_list([
1216                bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
1217                bytes_feature([b"b", b"c"])
1218            ])
1219    }))
1220
1221    serialized = original.SerializeToString()
1222
1223    expected_st_a = (
1224        np.array(
1225            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
1226        np.array(
1227            [3.0, 4.0, 5.0], dtype=np.float32),  # values
1228        np.array(
1229            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
1230
1231    expected_st_b = (
1232        np.array(
1233            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
1234        np.array(
1235            ["a", "b", "c"], dtype="|S"),  # values
1236        np.array(
1237            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
1238
1239    expected_st_c = (
1240        np.empty(
1241            (0, 2), dtype=np.int64),  # indices
1242        np.empty(
1243            (0,), dtype=np.int64),  # values
1244        np.array(
1245            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
1246
1247    expected_feature_list_output = {
1248        "a": np.array(
1249            [[3, 4], [1, 0]], dtype=np.int64),
1250        "st_a": expected_st_a,
1251        "st_b": expected_st_b,
1252        "st_c": expected_st_c,
1253    }
1254
1255    self._test(
1256        {
1257            "serialized": ops.convert_to_tensor(serialized),
1258            "sequence_features": {
1259                "st_a": parsing_ops.VarLenFeature(dtypes.float32),
1260                "st_b": parsing_ops.VarLenFeature(dtypes.string),
1261                "st_c": parsing_ops.VarLenFeature(dtypes.int64),
1262                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
1263            }
1264        },
1265        expected_feat_list_values=expected_feature_list_output)
1266
1267  def testSequenceExampleWithSparseAndDenseFeatureLists(self):
1268    original = sequence_example(feature_lists=feature_lists({
1269        "a":
1270            feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
1271        "st_a":
1272            feature_list([
1273                float_feature([3.0, 4.0]), float_feature([5.0]),
1274                float_feature([])
1275            ]),
1276        "st_b":
1277            feature_list([
1278                bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
1279                bytes_feature([b"b", b"c"])
1280            ])
1281    }))
1282
1283    serialized = original.SerializeToString()
1284
1285    expected_st_a = (
1286        np.array(
1287            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
1288        np.array(
1289            [3.0, 4.0, 5.0], dtype=np.float32),  # values
1290        np.array(
1291            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
1292
1293    expected_st_b = (
1294        np.array(
1295            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
1296        np.array(
1297            ["a", "b", "c"], dtype="|S"),  # values
1298        np.array(
1299            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
1300
1301    expected_st_c = (
1302        np.empty(
1303            (0, 2), dtype=np.int64),  # indices
1304        np.empty(
1305            (0,), dtype=np.int64),  # values
1306        np.array(
1307            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
1308
1309    expected_feature_list_output = {
1310        "a": np.array(
1311            [[3, 4], [1, 0]], dtype=np.int64),
1312        "st_a": expected_st_a,
1313        "st_b": expected_st_b,
1314        "st_c": expected_st_c,
1315    }
1316
1317    self._test(
1318        {
1319            "example_name": "in1",
1320            "serialized": ops.convert_to_tensor(serialized),
1321            "sequence_features": {
1322                "st_a": parsing_ops.VarLenFeature(dtypes.float32),
1323                "st_b": parsing_ops.VarLenFeature(dtypes.string),
1324                "st_c": parsing_ops.VarLenFeature(dtypes.int64),
1325                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
1326            }
1327        },
1328        expected_feat_list_values=expected_feature_list_output)
1329
1330  def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
1331    original = sequence_example(feature_lists=feature_lists({
1332        "st_a":
1333            feature_list([
1334                float_feature([3.0, 4.0]),
1335                feature(),
1336                float_feature([5.0]),
1337            ]),
1338    }))
1339
1340    serialized = original.SerializeToString()
1341
1342    expected_st_a = (
1343        np.array(
1344            [[0, 0], [0, 1], [2, 0]], dtype=np.int64),  # indices
1345        np.array(
1346            [3.0, 4.0, 5.0], dtype=np.float32),  # values
1347        np.array(
1348            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
1349
1350    expected_feature_list_output = {
1351        "st_a": expected_st_a,
1352    }
1353
1354    self._test(
1355        {
1356            "example_name": "in1",
1357            "serialized": ops.convert_to_tensor(serialized),
1358            "sequence_features": {
1359                "st_a": parsing_ops.VarLenFeature(dtypes.float32),
1360            }
1361        },
1362        expected_feat_list_values=expected_feature_list_output)
1363
1364  def testSequenceExampleListWithInconsistentDataFails(self):
1365    original = sequence_example(feature_lists=feature_lists({
1366        "a": feature_list([int64_feature([-1, 0]), float_feature([2, 3])])
1367    }))
1368
1369    serialized = original.SerializeToString()
1370
1371    self._test(
1372        {
1373            "example_name": "in1",
1374            "serialized": ops.convert_to_tensor(serialized),
1375            "sequence_features": {
1376                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
1377            }
1378        },
1379        expected_err=(errors_impl.OpError, "Feature list: a, Index: 1."
1380                      "  Data types don't match. Expected type: int64"))
1381
1382  def testSequenceExampleListWithWrongDataTypeFails(self):
1383    original = sequence_example(feature_lists=feature_lists({
1384        "a": feature_list([float_feature([2, 3])])
1385    }))
1386
1387    serialized = original.SerializeToString()
1388
1389    self._test(
1390        {
1391            "example_name": "in1",
1392            "serialized": ops.convert_to_tensor(serialized),
1393            "sequence_features": {
1394                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
1395            }
1396        },
1397        expected_err=(errors_impl.OpError,
1398                      "Feature list: a, Index: 0.  Data types don't match."
1399                      " Expected type: int64"))
1400
1401  def testSequenceExampleListWithWrongSparseDataTypeFails(self):
1402    original = sequence_example(feature_lists=feature_lists({
1403        "a":
1404            feature_list([
1405                int64_feature([3, 4]), int64_feature([1, 2]),
1406                float_feature([2.0, 3.0])
1407            ])
1408    }))
1409
1410    serialized = original.SerializeToString()
1411
1412    self._test(
1413        {
1414            "example_name": "in1",
1415            "serialized": ops.convert_to_tensor(serialized),
1416            "sequence_features": {
1417                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
1418            }
1419        },
1420        expected_err=(errors_impl.OpError,
1421                      "Name: in1, Feature list: a, Index: 2."
1422                      "  Data types don't match. Expected type: int64"
1423                      "  Feature is: float_list"))
1424
1425  def testSequenceExampleListWithWrongShapeFails(self):
1426    original = sequence_example(feature_lists=feature_lists({
1427        "a": feature_list([int64_feature([2, 3]), int64_feature([2, 3, 4])]),
1428    }))
1429
1430    serialized = original.SerializeToString()
1431
1432    self._test(
1433        {
1434            "example_name": "in1",
1435            "serialized": ops.convert_to_tensor(serialized),
1436            "sequence_features": {
1437                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
1438            }
1439        },
1440        expected_err=(errors_impl.OpError, r"Name: in1, Key: a, Index: 1."
1441                      r"  Number of int64 values != expected."
1442                      r"  values size: 3 but output shape: \[2\]"))
1443
1444  def testSequenceExampleWithMissingFeatureListFails(self):
1445    original = sequence_example(feature_lists=feature_lists({}))
1446
1447    # Test fails because we didn't add:
1448    #  feature_list_dense_defaults = {"a": None}
1449    self._test(
1450        {
1451            "example_name": "in1",
1452            "serialized": ops.convert_to_tensor(original.SerializeToString()),
1453            "sequence_features": {
1454                "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
1455            }
1456        },
1457        expected_err=(
1458            errors_impl.OpError,
1459            "Name: in1, Feature list 'a' is required but could not be found."
1460            "  Did you mean to include it in"
1461            " feature_list_dense_missing_assumed_empty or"
1462            " feature_list_dense_defaults?"))
1463
1464
1465class DecodeJSONExampleTest(test.TestCase):
1466
1467  def _testRoundTrip(self, examples):
1468    with self.test_session() as sess:
1469      examples = np.array(examples, dtype=np.object)
1470
1471      json_tensor = constant_op.constant(
1472          [json_format.MessageToJson(m) for m in examples.flatten()],
1473          shape=examples.shape,
1474          dtype=dtypes.string)
1475      binary_tensor = parsing_ops.decode_json_example(json_tensor)
1476      binary_val = sess.run(binary_tensor)
1477
1478      if examples.shape:
1479        self.assertShapeEqual(binary_val, json_tensor)
1480        for input_example, output_binary in zip(
1481            np.array(examples).flatten(), binary_val.flatten()):
1482          output_example = example_pb2.Example()
1483          output_example.ParseFromString(output_binary)
1484          self.assertProtoEquals(input_example, output_example)
1485      else:
1486        output_example = example_pb2.Example()
1487        output_example.ParseFromString(binary_val)
1488        self.assertProtoEquals(examples.item(), output_example)
1489
1490  def testEmptyTensor(self):
1491    self._testRoundTrip([])
1492    self._testRoundTrip([[], [], []])
1493
1494  def testEmptyExamples(self):
1495    self._testRoundTrip([example(), example(), example()])
1496
1497  def testDenseFeaturesScalar(self):
1498    self._testRoundTrip(
1499        example(features=features({
1500            "a": float_feature([1, 1, 3])
1501        })))
1502
1503  def testDenseFeaturesVector(self):
1504    self._testRoundTrip([
1505        example(features=features({
1506            "a": float_feature([1, 1, 3])
1507        })),
1508        example(features=features({
1509            "a": float_feature([-1, -1, 2])
1510        })),
1511    ])
1512
1513  def testDenseFeaturesMatrix(self):
1514    self._testRoundTrip([
1515        [example(features=features({
1516            "a": float_feature([1, 1, 3])
1517        }))],
1518        [example(features=features({
1519            "a": float_feature([-1, -1, 2])
1520        }))],
1521    ])
1522
1523  def testSparseFeatures(self):
1524    self._testRoundTrip([
1525        example(features=features({
1526            "st_c": float_feature([3, 4])
1527        })),
1528        example(features=features({
1529            "st_c": float_feature([])
1530        })),
1531        example(features=features({
1532            "st_d": feature()
1533        })),
1534        example(features=features({
1535            "st_c": float_feature([1, 2, -1]),
1536            "st_d": bytes_feature([b"hi"])
1537        })),
1538    ])
1539
1540  def testSerializedContainingBytes(self):
1541    aname = "a"
1542    bname = "b*has+a:tricky_name"
1543    self._testRoundTrip([
1544        example(features=features({
1545            aname: float_feature([1, 1]),
1546            bname: bytes_feature([b"b0_str"])
1547        })),
1548        example(features=features({
1549            aname: float_feature([-1, -1]),
1550            bname: bytes_feature([b"b1"])
1551        })),
1552    ])
1553
1554  def testInvalidSyntax(self):
1555    with self.test_session() as sess:
1556      json_tensor = constant_op.constant(["{]"])
1557      binary_tensor = parsing_ops.decode_json_example(json_tensor)
1558      with self.assertRaisesOpError("Error while parsing JSON"):
1559        sess.run(binary_tensor)
1560
1561
1562class ParseTensorOpTest(test.TestCase):
1563
1564  def testToFloat32(self):
1565    with self.test_session():
1566      expected = np.random.rand(3, 4, 5).astype(np.float32)
1567      tensor_proto = tensor_util.make_tensor_proto(expected)
1568
1569      serialized = array_ops.placeholder(dtypes.string)
1570      tensor = parsing_ops.parse_tensor(serialized, dtypes.float32)
1571
1572      result = tensor.eval(
1573          feed_dict={serialized: tensor_proto.SerializeToString()})
1574
1575      self.assertAllEqual(expected, result)
1576
1577  def testToUint8(self):
1578    with self.test_session():
1579      expected = np.random.rand(3, 4, 5).astype(np.uint8)
1580      tensor_proto = tensor_util.make_tensor_proto(expected)
1581
1582      serialized = array_ops.placeholder(dtypes.string)
1583      tensor = parsing_ops.parse_tensor(serialized, dtypes.uint8)
1584
1585      result = tensor.eval(
1586          feed_dict={serialized: tensor_proto.SerializeToString()})
1587
1588      self.assertAllEqual(expected, result)
1589
1590  def testTypeMismatch(self):
1591    with self.test_session():
1592      expected = np.random.rand(3, 4, 5).astype(np.uint8)
1593      tensor_proto = tensor_util.make_tensor_proto(expected)
1594
1595      serialized = array_ops.placeholder(dtypes.string)
1596      tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
1597
1598      with self.assertRaisesOpError(
1599          r"Type mismatch between parsed tensor \(uint8\) and dtype "
1600          r"\(uint16\)"):
1601        tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
1602
1603  def testInvalidInput(self):
1604    with self.test_session():
1605      serialized = array_ops.placeholder(dtypes.string)
1606      tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
1607
1608      with self.assertRaisesOpError(
1609          "Could not parse `serialized` as TensorProto: 'bogus'"):
1610        tensor.eval(feed_dict={serialized: "bogus"})
1611
1612      with self.assertRaisesOpError(
1613          r"Expected `serialized` to be a scalar, got shape: \[1\]"):
1614        tensor.eval(feed_dict={serialized: ["bogus"]})
1615
1616
1617if __name__ == "__main__":
1618  test.main()
1619