1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for tensorflow.ops.parsing_ops.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import copy 22import itertools 23 24import numpy as np 25 26from google.protobuf import json_format 27 28from tensorflow.core.example import example_pb2 29from tensorflow.core.example import feature_pb2 30from tensorflow.python.framework import constant_op 31from tensorflow.python.framework import dtypes 32from tensorflow.python.framework import errors_impl 33from tensorflow.python.framework import ops 34from tensorflow.python.framework import sparse_tensor 35from tensorflow.python.framework import tensor_shape 36from tensorflow.python.framework import tensor_util 37from tensorflow.python.ops import array_ops 38from tensorflow.python.ops import parsing_ops 39from tensorflow.python.platform import test 40from tensorflow.python.platform import tf_logging 41 42# Helpers for creating Example objects 43example = example_pb2.Example 44feature = feature_pb2.Feature 45features = lambda d: feature_pb2.Features(feature=d) 46bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v)) 47int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v)) 48float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v)) 49# Helpers for creating SequenceExample objects 50feature_list = lambda l: feature_pb2.FeatureList(feature=l) 51feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d) 52sequence_example = example_pb2.SequenceExample 53 54 55def flatten(list_of_lists): 56 """Flatten one level of nesting.""" 57 return itertools.chain.from_iterable(list_of_lists) 58 59 60def flatten_values_tensors_or_sparse(tensors_list): 61 """Flatten each SparseTensor object into 3 Tensors for session.run().""" 62 return list( 63 flatten([[v.indices, v.values, v.dense_shape] if isinstance( 64 v, sparse_tensor.SparseTensor) else [v] for v in tensors_list])) 65 66 67def _compare_output_to_expected(tester, dict_tensors, expected_tensors, 68 flat_output): 69 tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) 70 71 i = 0 # Index into the flattened output of session.run() 72 for k, v in dict_tensors.items(): 73 expected_v = expected_tensors[k] 74 tf_logging.info("Comparing key: %s", k) 75 if isinstance(v, sparse_tensor.SparseTensor): 76 # Three outputs for SparseTensor : indices, values, shape. 77 tester.assertEqual([k, len(expected_v)], [k, 3]) 78 tester.assertAllEqual(expected_v[0], flat_output[i]) 79 tester.assertAllEqual(expected_v[1], flat_output[i + 1]) 80 tester.assertAllEqual(expected_v[2], flat_output[i + 2]) 81 i += 3 82 else: 83 # One output for standard Tensor. 84 tester.assertAllEqual(expected_v, flat_output[i]) 85 i += 1 86 87 88class ParseExampleTest(test.TestCase): 89 90 def _test(self, kwargs, expected_values=None, expected_err=None): 91 with self.test_session() as sess: 92 if expected_err: 93 with self.assertRaisesWithPredicateMatch(expected_err[0], 94 expected_err[1]): 95 out = parsing_ops.parse_example(**kwargs) 96 sess.run(flatten_values_tensors_or_sparse(out.values())) 97 return 98 else: 99 # Returns dict w/ Tensors and SparseTensors. 100 out = parsing_ops.parse_example(**kwargs) 101 result = flatten_values_tensors_or_sparse(out.values()) 102 # Check values. 103 tf_result = sess.run(result) 104 _compare_output_to_expected(self, out, expected_values, tf_result) 105 106 # Check shapes; if serialized is a Tensor we need its size to 107 # properly check. 108 serialized = kwargs["serialized"] 109 batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor) 110 else np.asarray(serialized).size) 111 for k, f in kwargs["features"].items(): 112 if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: 113 self.assertEqual( 114 tuple(out[k].get_shape().as_list()), (batch_size,) + f.shape) 115 elif isinstance(f, parsing_ops.VarLenFeature): 116 self.assertEqual( 117 tuple(out[k].indices.get_shape().as_list()), (None, 2)) 118 self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) 119 self.assertEqual( 120 tuple(out[k].dense_shape.get_shape().as_list()), (2,)) 121 122 def testEmptySerializedWithAllDefaults(self): 123 sparse_name = "st_a" 124 a_name = "a" 125 b_name = "b" 126 c_name = "c:has_a_tricky_name" 127 a_default = [0, 42, 0] 128 b_default = np.random.rand(3, 3).astype(bytes) 129 c_default = np.random.rand(2).astype(np.float32) 130 131 expected_st_a = ( # indices, values, shape 132 np.empty( 133 (0, 2), dtype=np.int64), # indices 134 np.empty( 135 (0,), dtype=np.int64), # sp_a is DT_INT64 136 np.array( 137 [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 138 139 expected_output = { 140 sparse_name: expected_st_a, 141 a_name: np.array(2 * [[a_default]]), 142 b_name: np.array(2 * [b_default]), 143 c_name: np.array(2 * [c_default]), 144 } 145 146 self._test( 147 { 148 "example_names": 149 np.empty( 150 (0,), dtype=bytes), 151 "serialized": 152 ops.convert_to_tensor(["", ""]), 153 "features": { 154 sparse_name: 155 parsing_ops.VarLenFeature(dtypes.int64), 156 a_name: 157 parsing_ops.FixedLenFeature( 158 (1, 3), dtypes.int64, default_value=a_default), 159 b_name: 160 parsing_ops.FixedLenFeature( 161 (3, 3), dtypes.string, default_value=b_default), 162 c_name: 163 parsing_ops.FixedLenFeature( 164 (2,), dtypes.float32, default_value=c_default), 165 } 166 }, 167 expected_output) 168 169 def testEmptySerializedWithoutDefaultsShouldFail(self): 170 input_features = { 171 "st_a": 172 parsing_ops.VarLenFeature(dtypes.int64), 173 "a": 174 parsing_ops.FixedLenFeature( 175 (1, 3), dtypes.int64, default_value=[0, 42, 0]), 176 "b": 177 parsing_ops.FixedLenFeature( 178 (3, 3), 179 dtypes.string, 180 default_value=np.random.rand(3, 3).astype(bytes)), 181 # Feature "c" is missing a default, this gap will cause failure. 182 "c": 183 parsing_ops.FixedLenFeature( 184 (2,), dtype=dtypes.float32), 185 } 186 187 # Edge case where the key is there but the feature value is empty 188 original = example(features=features({"c": feature()})) 189 self._test( 190 { 191 "example_names": ["in1"], 192 "serialized": [original.SerializeToString()], 193 "features": input_features, 194 }, 195 expected_err=( 196 errors_impl.OpError, 197 "Name: in1, Feature: c \\(data type: float\\) is required")) 198 199 # Standard case of missing key and value. 200 self._test( 201 { 202 "example_names": ["in1", "in2"], 203 "serialized": ["", ""], 204 "features": input_features, 205 }, 206 expected_err=( 207 errors_impl.OpError, 208 "Name: in1, Feature: c \\(data type: float\\) is required")) 209 210 def testDenseNotMatchingShapeShouldFail(self): 211 original = [ 212 example(features=features({ 213 "a": float_feature([1, 1, 3]), 214 })), example(features=features({ 215 "a": float_feature([-1, -1]), 216 })) 217 ] 218 219 names = ["passing", "failing"] 220 serialized = [m.SerializeToString() for m in original] 221 222 self._test( 223 { 224 "example_names": names, 225 "serialized": ops.convert_to_tensor(serialized), 226 "features": { 227 "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32) 228 } 229 }, 230 expected_err=(errors_impl.OpError, 231 "Name: failing, Key: a, Index: 1. Number of float val")) 232 233 def testDenseDefaultNoShapeShouldFail(self): 234 original = [example(features=features({"a": float_feature([1, 1, 3]),})),] 235 236 serialized = [m.SerializeToString() for m in original] 237 238 self._test( 239 { 240 "example_names": ["failing"], 241 "serialized": ops.convert_to_tensor(serialized), 242 "features": { 243 "a": parsing_ops.FixedLenFeature(None, dtypes.float32) 244 } 245 }, 246 expected_err=(ValueError, "Missing shape for feature a")) 247 248 def testSerializedContainingSparse(self): 249 original = [ 250 example(features=features({ 251 "st_c": float_feature([3, 4]) 252 })), 253 example(features=features({ 254 "st_c": float_feature([]), # empty float list 255 })), 256 example(features=features({ 257 "st_d": feature(), # feature with nothing in it 258 })), 259 example(features=features({ 260 "st_c": float_feature([1, 2, -1]), 261 "st_d": bytes_feature([b"hi"]) 262 })) 263 ] 264 265 serialized = [m.SerializeToString() for m in original] 266 267 expected_st_c = ( # indices, values, shape 268 np.array( 269 [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array( 270 [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array( 271 [4, 3], dtype=np.int64)) # batch == 2, max_elems = 3 272 273 expected_st_d = ( # indices, values, shape 274 np.array( 275 [[3, 0]], dtype=np.int64), np.array( 276 ["hi"], dtype=bytes), np.array( 277 [4, 1], dtype=np.int64)) # batch == 2, max_elems = 1 278 279 expected_output = { 280 "st_c": expected_st_c, 281 "st_d": expected_st_d, 282 } 283 284 self._test({ 285 "serialized": ops.convert_to_tensor(serialized), 286 "features": { 287 "st_c": parsing_ops.VarLenFeature(dtypes.float32), 288 "st_d": parsing_ops.VarLenFeature(dtypes.string) 289 } 290 }, expected_output) 291 292 def testSerializedContainingSparseFeature(self): 293 original = [ 294 example(features=features({ 295 "val": float_feature([3, 4]), 296 "idx": int64_feature([5, 10]) 297 })), 298 example(features=features({ 299 "val": float_feature([]), # empty float list 300 "idx": int64_feature([]) 301 })), 302 example(features=features({ 303 "val": feature(), # feature with nothing in it 304 # missing idx feature 305 })), 306 example(features=features({ 307 "val": float_feature([1, 2, -1]), 308 "idx": 309 int64_feature([0, 9, 3]) # unsorted 310 })) 311 ] 312 313 serialized = [m.SerializeToString() for m in original] 314 315 expected_sp = ( # indices, values, shape 316 np.array( 317 [[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64), 318 np.array( 319 [3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array( 320 [4, 13], dtype=np.int64)) # batch == 4, max_elems = 13 321 322 expected_output = {"sp": expected_sp,} 323 324 self._test({ 325 "serialized": ops.convert_to_tensor(serialized), 326 "features": { 327 "sp": parsing_ops.SparseFeature( 328 ["idx"], "val", dtypes.float32, [13]) 329 } 330 }, expected_output) 331 332 def testSerializedContainingSparseFeatureReuse(self): 333 original = [ 334 example(features=features({ 335 "val1": float_feature([3, 4]), 336 "val2": float_feature([5, 6]), 337 "idx": int64_feature([5, 10]) 338 })), 339 example(features=features({ 340 "val1": float_feature([]), # empty float list 341 "idx": int64_feature([]) 342 })), 343 ] 344 345 serialized = [m.SerializeToString() for m in original] 346 347 expected_sp1 = ( # indices, values, shape 348 np.array( 349 [[0, 5], [0, 10]], dtype=np.int64), np.array( 350 [3.0, 4.0], dtype=np.float32), np.array( 351 [2, 13], dtype=np.int64)) # batch == 2, max_elems = 13 352 353 expected_sp2 = ( # indices, values, shape 354 np.array( 355 [[0, 5], [0, 10]], dtype=np.int64), np.array( 356 [5.0, 6.0], dtype=np.float32), np.array( 357 [2, 7], dtype=np.int64)) # batch == 2, max_elems = 13 358 359 expected_output = { 360 "sp1": expected_sp1, 361 "sp2": expected_sp2, 362 } 363 364 self._test({ 365 "serialized": ops.convert_to_tensor(serialized), 366 "features": { 367 "sp1": 368 parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13), 369 "sp2": 370 parsing_ops.SparseFeature( 371 "idx", "val2", dtypes.float32, size=7, already_sorted=True) 372 } 373 }, expected_output) 374 375 def testSerializedContaining3DSparseFeature(self): 376 original = [ 377 example(features=features({ 378 "val": float_feature([3, 4]), 379 "idx0": int64_feature([5, 10]), 380 "idx1": int64_feature([0, 2]), 381 })), 382 example(features=features({ 383 "val": float_feature([]), # empty float list 384 "idx0": int64_feature([]), 385 "idx1": int64_feature([]), 386 })), 387 example(features=features({ 388 "val": feature(), # feature with nothing in it 389 # missing idx feature 390 })), 391 example(features=features({ 392 "val": float_feature([1, 2, -1]), 393 "idx0": int64_feature([0, 9, 3]), # unsorted 394 "idx1": int64_feature([1, 0, 2]), 395 })) 396 ] 397 398 serialized = [m.SerializeToString() for m in original] 399 400 expected_sp = ( 401 # indices 402 np.array( 403 [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]], 404 dtype=np.int64), 405 # values 406 np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), 407 # shape batch == 4, max_elems = 13 408 np.array([4, 13, 3], dtype=np.int64)) 409 410 expected_output = {"sp": expected_sp,} 411 412 self._test({ 413 "serialized": ops.convert_to_tensor(serialized), 414 "features": { 415 "sp": parsing_ops.SparseFeature( 416 ["idx0", "idx1"], "val", dtypes.float32, [13, 3]) 417 } 418 }, expected_output) 419 420 def testSerializedContainingDense(self): 421 aname = "a" 422 bname = "b*has+a:tricky_name" 423 original = [ 424 example(features=features({ 425 aname: float_feature([1, 1]), 426 bname: bytes_feature([b"b0_str"]), 427 })), example(features=features({ 428 aname: float_feature([-1, -1]), 429 bname: bytes_feature([b""]), 430 })) 431 ] 432 433 serialized = [m.SerializeToString() for m in original] 434 435 expected_output = { 436 aname: 437 np.array( 438 [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), 439 bname: 440 np.array( 441 ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1), 442 } 443 444 # No defaults, values required 445 self._test( 446 { 447 "serialized": 448 ops.convert_to_tensor(serialized), 449 "features": { 450 aname: 451 parsing_ops.FixedLenFeature( 452 (1, 2, 1), dtype=dtypes.float32), 453 bname: 454 parsing_ops.FixedLenFeature( 455 (1, 1, 1, 1), dtype=dtypes.string), 456 } 457 }, 458 expected_output) 459 460 # This test is identical as the previous one except 461 # for the creation of 'serialized'. 462 def testSerializedContainingDenseWithConcat(self): 463 aname = "a" 464 bname = "b*has+a:tricky_name" 465 # TODO(lew): Feature appearing twice should be an error in future. 466 original = [ 467 (example(features=features({ 468 aname: float_feature([10, 10]), 469 })), example(features=features({ 470 aname: float_feature([1, 1]), 471 bname: bytes_feature([b"b0_str"]), 472 }))), 473 ( 474 example(features=features({ 475 bname: bytes_feature([b"b100"]), 476 })), 477 example(features=features({ 478 aname: float_feature([-1, -1]), 479 bname: bytes_feature([b"b1"]), 480 })),), 481 ] 482 483 serialized = [ 484 m.SerializeToString() + n.SerializeToString() for (m, n) in original 485 ] 486 487 expected_output = { 488 aname: 489 np.array( 490 [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), 491 bname: 492 np.array( 493 ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1), 494 } 495 496 # No defaults, values required 497 self._test( 498 { 499 "serialized": 500 ops.convert_to_tensor(serialized), 501 "features": { 502 aname: 503 parsing_ops.FixedLenFeature( 504 (1, 2, 1), dtype=dtypes.float32), 505 bname: 506 parsing_ops.FixedLenFeature( 507 (1, 1, 1, 1), dtype=dtypes.string), 508 } 509 }, 510 expected_output) 511 512 def testSerializedContainingDenseScalar(self): 513 original = [ 514 example(features=features({ 515 "a": float_feature([1]), 516 })), example(features=features({})) 517 ] 518 519 serialized = [m.SerializeToString() for m in original] 520 521 expected_output = { 522 "a": 523 np.array( 524 [[1], [-1]], dtype=np.float32) # 2x1 (column vector) 525 } 526 527 self._test( 528 { 529 "serialized": 530 ops.convert_to_tensor(serialized), 531 "features": { 532 "a": 533 parsing_ops.FixedLenFeature( 534 (1,), dtype=dtypes.float32, default_value=-1), 535 } 536 }, 537 expected_output) 538 539 def testSerializedContainingDenseWithDefaults(self): 540 original = [ 541 example(features=features({ 542 "a": float_feature([1, 1]), 543 })), 544 example(features=features({ 545 "b": bytes_feature([b"b1"]), 546 })), 547 example(features=features({ 548 "b": feature() 549 })), 550 ] 551 552 serialized = [m.SerializeToString() for m in original] 553 554 expected_output = { 555 "a": 556 np.array( 557 [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2, 558 1), 559 "b": 560 np.array( 561 ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1, 562 1), 563 } 564 565 self._test( 566 { 567 "serialized": 568 ops.convert_to_tensor(serialized), 569 "features": { 570 "a": 571 parsing_ops.FixedLenFeature( 572 (1, 2, 1), 573 dtype=dtypes.float32, 574 default_value=[3.0, -3.0]), 575 "b": 576 parsing_ops.FixedLenFeature( 577 (1, 1, 1, 1), 578 dtype=dtypes.string, 579 default_value="tmp_str"), 580 } 581 }, 582 expected_output) 583 584 def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self): 585 expected_st_a = ( # indices, values, shape 586 np.empty( 587 (0, 2), dtype=np.int64), # indices 588 np.empty( 589 (0,), dtype=np.int64), # sp_a is DT_INT64 590 np.array( 591 [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 592 expected_sp = ( # indices, values, shape 593 np.array( 594 [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array( 595 ["a", "b", "c"], dtype="|S"), np.array( 596 [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 597 598 original = [ 599 example(features=features({ 600 "c": float_feature([3, 4]), 601 "val": bytes_feature([b"a", b"b"]), 602 "idx": int64_feature([0, 3]) 603 })), example(features=features({ 604 "c": float_feature([1, 2]), 605 "val": bytes_feature([b"c"]), 606 "idx": int64_feature([7]) 607 })) 608 ] 609 610 names = ["in1", "in2"] 611 serialized = [m.SerializeToString() for m in original] 612 613 a_default = [1, 2, 3] 614 b_default = np.random.rand(3, 3).astype(bytes) 615 expected_output = { 616 "st_a": expected_st_a, 617 "sp": expected_sp, 618 "a": np.array(2 * [[a_default]]), 619 "b": np.array(2 * [b_default]), 620 "c": np.array( 621 [[3, 4], [1, 2]], dtype=np.float32), 622 } 623 624 self._test( 625 { 626 "example_names": 627 names, 628 "serialized": 629 ops.convert_to_tensor(serialized), 630 "features": { 631 "st_a": 632 parsing_ops.VarLenFeature(dtypes.int64), 633 "sp": 634 parsing_ops.SparseFeature("idx", "val", dtypes.string, 13), 635 "a": 636 parsing_ops.FixedLenFeature( 637 (1, 3), dtypes.int64, default_value=a_default), 638 "b": 639 parsing_ops.FixedLenFeature( 640 (3, 3), dtypes.string, default_value=b_default), 641 # Feature "c" must be provided, since it has no default_value. 642 "c": 643 parsing_ops.FixedLenFeature((2,), dtypes.float32), 644 } 645 }, 646 expected_output) 647 648 def testSerializedContainingSparseAndSparseFeatureWithReuse(self): 649 expected_idx = ( # indices, values, shape 650 np.array( 651 [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64), 652 np.array([0, 3, 7, 1]), np.array( 653 [2, 2], dtype=np.int64)) # batch == 4, max_elems = 2 654 655 expected_sp = ( # indices, values, shape 656 np.array( 657 [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array( 658 ["a", "b", "d", "c"], dtype="|S"), np.array( 659 [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13 660 661 original = [ 662 example(features=features({ 663 "val": bytes_feature([b"a", b"b"]), 664 "idx": int64_feature([0, 3]) 665 })), example(features=features({ 666 "val": bytes_feature([b"c", b"d"]), 667 "idx": int64_feature([7, 1]) 668 })) 669 ] 670 671 names = ["in1", "in2"] 672 serialized = [m.SerializeToString() for m in original] 673 674 expected_output = { 675 "idx": expected_idx, 676 "sp": expected_sp, 677 } 678 679 self._test({ 680 "example_names": names, 681 "serialized": ops.convert_to_tensor(serialized), 682 "features": { 683 "idx": parsing_ops.VarLenFeature(dtypes.int64), 684 "sp": parsing_ops.SparseFeature( 685 ["idx"], "val", dtypes.string, [13]), 686 } 687 }, expected_output) 688 689 def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size): 690 # During parsing, data read from the serialized proto is stored in buffers. 691 # For small batch sizes, a buffer will contain one minibatch entry. 692 # For larger batch sizes, a buffer may contain several minibatch 693 # entries. This test identified a bug where the code that copied 694 # data out of the buffers and into the output tensors assumed each 695 # buffer only contained one minibatch entry. The bug has since been fixed. 696 truth_int = [i for i in range(batch_size)] 697 truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()] 698 for i in range(batch_size)] 699 700 expected_str = copy.deepcopy(truth_str) 701 702 # Delete some intermediate entries 703 for i in range(batch_size): 704 col = 1 705 if np.random.rand() < 0.25: 706 # w.p. 25%, drop out the second entry 707 expected_str[i][col] = b"default" 708 col -= 1 709 truth_str[i].pop() 710 if np.random.rand() < 0.25: 711 # w.p. 25%, drop out the second entry (possibly again) 712 expected_str[i][col] = b"default" 713 truth_str[i].pop() 714 715 expected_output = { 716 # Batch size batch_size, 1 time step. 717 "a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1), 718 # Batch size batch_size, 2 time steps. 719 "b": np.array(expected_str, dtype="|S").reshape(batch_size, 2), 720 } 721 722 original = [ 723 example(features=features( 724 {"a": int64_feature([truth_int[i]]), 725 "b": bytes_feature(truth_str[i])})) 726 for i in range(batch_size) 727 ] 728 729 serialized = [m.SerializeToString() for m in original] 730 731 self._test({ 732 "serialized": ops.convert_to_tensor(serialized, dtype=dtypes.string), 733 "features": { 734 "a": parsing_ops.FixedLenSequenceFeature( 735 shape=(), dtype=dtypes.int64, allow_missing=True, 736 default_value=-1), 737 "b": parsing_ops.FixedLenSequenceFeature( 738 shape=[], dtype=dtypes.string, allow_missing=True, 739 default_value="default"), 740 } 741 }, expected_output) 742 743 def testSerializedContainingVarLenDenseLargerBatch(self): 744 np.random.seed(3456) 745 for batch_size in (1, 10, 20, 100, 256): 746 self._testSerializedContainingVarLenDenseLargerBatch(batch_size) 747 748 def testSerializedContainingVarLenDense(self): 749 aname = "a" 750 bname = "b" 751 cname = "c" 752 dname = "d" 753 example_names = ["in1", "in2", "in3", "in4"] 754 original = [ 755 example(features=features({ 756 cname: int64_feature([2]), 757 })), 758 example(features=features({ 759 aname: float_feature([1, 1]), 760 bname: bytes_feature([b"b0_str", b"b1_str"]), 761 })), 762 example(features=features({ 763 aname: float_feature([-1, -1, 2, 2]), 764 bname: bytes_feature([b"b1"]), 765 })), 766 example(features=features({ 767 aname: float_feature([]), 768 cname: int64_feature([3]), 769 })), 770 ] 771 772 serialized = [m.SerializeToString() for m in original] 773 774 expected_output = { 775 aname: 776 np.array( 777 [ 778 [0, 0, 0, 0], 779 [1, 1, 0, 0], 780 [-1, -1, 2, 2], 781 [0, 0, 0, 0], 782 ], 783 dtype=np.float32).reshape(4, 2, 2, 1), 784 bname: 785 np.array( 786 [["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]], 787 dtype=bytes).reshape(4, 2, 1, 1, 1), 788 cname: 789 np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1), 790 dname: 791 np.empty(shape=(4, 0), dtype=bytes), 792 } 793 794 self._test({ 795 "example_names": example_names, 796 "serialized": ops.convert_to_tensor(serialized), 797 "features": { 798 aname: 799 parsing_ops.FixedLenSequenceFeature( 800 (2, 1), dtype=dtypes.float32, allow_missing=True), 801 bname: 802 parsing_ops.FixedLenSequenceFeature( 803 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 804 cname: 805 parsing_ops.FixedLenSequenceFeature( 806 shape=[], dtype=dtypes.int64, allow_missing=True), 807 dname: 808 parsing_ops.FixedLenSequenceFeature( 809 shape=[], dtype=dtypes.string, allow_missing=True), 810 } 811 }, expected_output) 812 813 # Test with padding values. 814 expected_output_custom_padding = dict(expected_output) 815 expected_output_custom_padding[aname] = np.array( 816 [ 817 [-2, -2, -2, -2], 818 [1, 1, -2, -2], 819 [-1, -1, 2, 2], 820 [-2, -2, -2, -2], 821 ], 822 dtype=np.float32).reshape(4, 2, 2, 1) 823 824 self._test({ 825 "example_names": example_names, 826 "serialized": ops.convert_to_tensor(serialized), 827 "features": { 828 aname: 829 parsing_ops.FixedLenSequenceFeature( 830 (2, 1), dtype=dtypes.float32, allow_missing=True, 831 default_value=-2.0), 832 bname: 833 parsing_ops.FixedLenSequenceFeature( 834 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 835 cname: 836 parsing_ops.FixedLenSequenceFeature( 837 shape=[], dtype=dtypes.int64, allow_missing=True), 838 dname: 839 parsing_ops.FixedLenSequenceFeature( 840 shape=[], dtype=dtypes.string, allow_missing=True), 841 } 842 }, expected_output_custom_padding) 843 844 # Change number of required values so the inputs are not a 845 # multiple of this size. 846 self._test( 847 { 848 "example_names": example_names, 849 "serialized": ops.convert_to_tensor(serialized), 850 "features": { 851 aname: 852 parsing_ops.FixedLenSequenceFeature( 853 (2, 1), dtype=dtypes.float32, allow_missing=True), 854 bname: 855 parsing_ops.FixedLenSequenceFeature( 856 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 857 } 858 }, 859 expected_err=( 860 errors_impl.OpError, "Name: in3, Key: b, Index: 2. " 861 "Number of bytes values is not a multiple of stride length.")) 862 863 self._test( 864 { 865 "example_names": example_names, 866 "serialized": ops.convert_to_tensor(serialized), 867 "features": { 868 aname: 869 parsing_ops.FixedLenSequenceFeature( 870 (2, 1), dtype=dtypes.float32, allow_missing=True, 871 default_value=[]), 872 bname: 873 parsing_ops.FixedLenSequenceFeature( 874 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 875 } 876 }, 877 expected_err=(ValueError, 878 "Cannot reshape a tensor with 0 elements to shape")) 879 880 self._test( 881 { 882 "example_names": example_names, 883 "serialized": ops.convert_to_tensor(serialized), 884 "features": { 885 aname: 886 parsing_ops.FixedLenFeature( 887 (None, 2, 1), dtype=dtypes.float32), 888 bname: 889 parsing_ops.FixedLenSequenceFeature( 890 (2, 1, 1), dtype=dtypes.string, allow_missing=True), 891 } 892 }, 893 expected_err=(ValueError, 894 "First dimension of shape for feature a unknown. " 895 "Consider using FixedLenSequenceFeature.")) 896 897 self._test( 898 { 899 "example_names": example_names, 900 "serialized": ops.convert_to_tensor(serialized), 901 "features": { 902 cname: 903 parsing_ops.FixedLenFeature( 904 (1, None), dtype=dtypes.int64, default_value=[[1]]), 905 } 906 }, 907 expected_err=(ValueError, 908 "All dimensions of shape for feature c need to be known " 909 r"but received \(1, None\).")) 910 911 self._test({ 912 "example_names": example_names, 913 "serialized": ops.convert_to_tensor(serialized), 914 "features": { 915 aname: 916 parsing_ops.FixedLenSequenceFeature( 917 (2, 1), dtype=dtypes.float32, allow_missing=True), 918 bname: 919 parsing_ops.FixedLenSequenceFeature( 920 (1, 1, 1), dtype=dtypes.string, allow_missing=True), 921 cname: 922 parsing_ops.FixedLenSequenceFeature( 923 shape=[], dtype=dtypes.int64, allow_missing=False), 924 dname: 925 parsing_ops.FixedLenSequenceFeature( 926 shape=[], dtype=dtypes.string, allow_missing=True), 927 } 928 }, expected_err=(ValueError, 929 "Unsupported: FixedLenSequenceFeature requires " 930 "allow_missing to be True.")) 931 932 933class ParseSingleExampleTest(test.TestCase): 934 935 def _test(self, kwargs, expected_values=None, expected_err=None): 936 with self.test_session() as sess: 937 if expected_err: 938 with self.assertRaisesWithPredicateMatch(expected_err[0], 939 expected_err[1]): 940 out = parsing_ops.parse_single_example(**kwargs) 941 sess.run(flatten_values_tensors_or_sparse(out.values())) 942 else: 943 # Returns dict w/ Tensors and SparseTensors. 944 out = parsing_ops.parse_single_example(**kwargs) 945 # Check values. 946 tf_result = sess.run(flatten_values_tensors_or_sparse(out.values())) 947 _compare_output_to_expected(self, out, expected_values, tf_result) 948 949 # Check shapes. 950 for k, f in kwargs["features"].items(): 951 if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: 952 self.assertEqual(tuple(out[k].get_shape()), 953 tensor_shape.as_shape(f.shape)) 954 elif isinstance(f, parsing_ops.VarLenFeature): 955 self.assertEqual( 956 tuple(out[k].indices.get_shape().as_list()), (None, 1)) 957 self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) 958 self.assertEqual( 959 tuple(out[k].dense_shape.get_shape().as_list()), (1,)) 960 961 def testSingleExampleWithSparseAndSparseFeatureAndDense(self): 962 original = example(features=features({ 963 "c": float_feature([3, 4]), 964 "d": float_feature([0.0, 1.0]), 965 "val": bytes_feature([b"a", b"b"]), 966 "idx": int64_feature([0, 3]), 967 "st_a": float_feature([3.0, 4.0]) 968 })) 969 970 serialized = original.SerializeToString() 971 972 expected_st_a = ( 973 np.array( 974 [[0], [1]], dtype=np.int64), # indices 975 np.array( 976 [3.0, 4.0], dtype=np.float32), # values 977 np.array( 978 [2], dtype=np.int64)) # shape: max_values = 2 979 980 expected_sp = ( # indices, values, shape 981 np.array( 982 [[0], [3]], dtype=np.int64), np.array( 983 ["a", "b"], dtype="|S"), np.array( 984 [13], dtype=np.int64)) # max_values = 13 985 986 a_default = [1, 2, 3] 987 b_default = np.random.rand(3, 3).astype(bytes) 988 expected_output = { 989 "st_a": expected_st_a, 990 "sp": expected_sp, 991 "a": [a_default], 992 "b": b_default, 993 "c": np.array([3, 4], dtype=np.float32), 994 "d": np.array([0.0, 1.0], dtype=np.float32), 995 } 996 997 self._test( 998 { 999 "example_names": 1000 ops.convert_to_tensor("in1"), 1001 "serialized": 1002 ops.convert_to_tensor(serialized), 1003 "features": { 1004 "st_a": 1005 parsing_ops.VarLenFeature(dtypes.float32), 1006 "sp": 1007 parsing_ops.SparseFeature( 1008 ["idx"], "val", dtypes.string, [13]), 1009 "a": 1010 parsing_ops.FixedLenFeature( 1011 (1, 3), dtypes.int64, default_value=a_default), 1012 "b": 1013 parsing_ops.FixedLenFeature( 1014 (3, 3), dtypes.string, default_value=b_default), 1015 # Feature "c" must be provided, since it has no default_value. 1016 "c": 1017 parsing_ops.FixedLenFeature(2, dtypes.float32), 1018 "d": 1019 parsing_ops.FixedLenSequenceFeature([], 1020 dtypes.float32, 1021 allow_missing=True) 1022 } 1023 }, 1024 expected_output) 1025 1026 1027class ParseSequenceExampleTest(test.TestCase): 1028 1029 def testCreateSequenceExample(self): 1030 value = sequence_example( 1031 context=features({ 1032 "global_feature": float_feature([1, 2, 3]), 1033 }), 1034 feature_lists=feature_lists({ 1035 "repeated_feature_2_frames": 1036 feature_list([ 1037 bytes_feature([b"a", b"b", b"c"]), 1038 bytes_feature([b"a", b"d", b"e"]) 1039 ]), 1040 "repeated_feature_3_frames": 1041 feature_list([ 1042 int64_feature([3, 4, 5, 6, 7]), 1043 int64_feature([-1, 0, 0, 0, 0]), 1044 int64_feature([1, 2, 3, 4, 5]) 1045 ]) 1046 })) 1047 value.SerializeToString() # Smoke test 1048 1049 def _test(self, 1050 kwargs, 1051 expected_context_values=None, 1052 expected_feat_list_values=None, 1053 expected_err=None): 1054 expected_context_values = expected_context_values or {} 1055 expected_feat_list_values = expected_feat_list_values or {} 1056 1057 with self.test_session() as sess: 1058 if expected_err: 1059 with self.assertRaisesWithPredicateMatch(expected_err[0], 1060 expected_err[1]): 1061 c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs) 1062 if c_out: 1063 sess.run(flatten_values_tensors_or_sparse(c_out.values())) 1064 if fl_out: 1065 sess.run(flatten_values_tensors_or_sparse(fl_out.values())) 1066 else: 1067 # Returns dicts w/ Tensors and SparseTensors. 1068 context_out, feat_list_out = parsing_ops.parse_single_sequence_example( 1069 **kwargs) 1070 context_result = sess.run( 1071 flatten_values_tensors_or_sparse(context_out.values( 1072 ))) if context_out else [] 1073 feat_list_result = sess.run( 1074 flatten_values_tensors_or_sparse(feat_list_out.values( 1075 ))) if feat_list_out else [] 1076 # Check values. 1077 _compare_output_to_expected(self, context_out, expected_context_values, 1078 context_result) 1079 _compare_output_to_expected(self, feat_list_out, 1080 expected_feat_list_values, feat_list_result) 1081 1082 # Check shapes; if serialized is a Tensor we need its size to 1083 # properly check. 1084 if "context_features" in kwargs: 1085 for k, f in kwargs["context_features"].items(): 1086 if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: 1087 self.assertEqual( 1088 tuple(context_out[k].get_shape().as_list()), f.shape) 1089 elif isinstance(f, parsing_ops.VarLenFeature): 1090 self.assertEqual( 1091 tuple(context_out[k].indices.get_shape().as_list()), (None, 1)) 1092 self.assertEqual( 1093 tuple(context_out[k].values.get_shape().as_list()), (None,)) 1094 self.assertEqual( 1095 tuple(context_out[k].dense_shape.get_shape().as_list()), (1,)) 1096 1097 def testSequenceExampleWithSparseAndDenseContext(self): 1098 original = sequence_example(context=features({ 1099 "c": float_feature([3, 4]), 1100 "st_a": float_feature([3.0, 4.0]) 1101 })) 1102 1103 serialized = original.SerializeToString() 1104 1105 expected_st_a = ( 1106 np.array( 1107 [[0], [1]], dtype=np.int64), # indices 1108 np.array( 1109 [3.0, 4.0], dtype=np.float32), # values 1110 np.array( 1111 [2], dtype=np.int64)) # shape: num_features = 2 1112 1113 a_default = [1, 2, 3] 1114 b_default = np.random.rand(3, 3).astype(bytes) 1115 expected_context_output = { 1116 "st_a": expected_st_a, 1117 "a": [a_default], 1118 "b": b_default, 1119 "c": np.array( 1120 [3, 4], dtype=np.float32), 1121 } 1122 1123 self._test( 1124 { 1125 "example_name": 1126 "in1", 1127 "serialized": 1128 ops.convert_to_tensor(serialized), 1129 "context_features": { 1130 "st_a": 1131 parsing_ops.VarLenFeature(dtypes.float32), 1132 "a": 1133 parsing_ops.FixedLenFeature( 1134 (1, 3), dtypes.int64, default_value=a_default), 1135 "b": 1136 parsing_ops.FixedLenFeature( 1137 (3, 3), dtypes.string, default_value=b_default), 1138 # Feature "c" must be provided, since it has no default_value. 1139 "c": 1140 parsing_ops.FixedLenFeature((2,), dtypes.float32), 1141 } 1142 }, 1143 expected_context_values=expected_context_output) 1144 1145 def testSequenceExampleWithMultipleSizeFeatureLists(self): 1146 original = sequence_example(feature_lists=feature_lists({ 1147 "a": 1148 feature_list([ 1149 int64_feature([-1, 0, 1]), 1150 int64_feature([2, 3, 4]), 1151 int64_feature([5, 6, 7]), 1152 int64_feature([8, 9, 10]), 1153 ]), 1154 "b": 1155 feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]), 1156 "c": 1157 feature_list([float_feature([3, 4]), float_feature([-1, 2])]), 1158 })) 1159 1160 serialized = original.SerializeToString() 1161 1162 expected_feature_list_output = { 1163 "a": np.array( 1164 [ # outer dimension is time. 1165 [[-1, 0, 1]], # inside are 1x3 matrices 1166 [[2, 3, 4]], 1167 [[5, 6, 7]], 1168 [[8, 9, 10]] 1169 ], 1170 dtype=np.int64), 1171 "b": np.array( 1172 [ # outer dimension is time, inside are 2x2 matrices 1173 [[b"r00", b"r01"], [b"r10", b"r11"]] 1174 ], 1175 dtype=bytes), 1176 "c": np.array( 1177 [ # outer dimension is time, inside are 2-vectors 1178 [3, 4], [-1, 2] 1179 ], 1180 dtype=np.float32), 1181 "d": np.empty( 1182 shape=(0, 5), dtype=np.float32), # empty_allowed_missing 1183 } 1184 1185 self._test( 1186 { 1187 "example_name": 1188 "in1", 1189 "serialized": 1190 ops.convert_to_tensor(serialized), 1191 "sequence_features": { 1192 "a": 1193 parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64), 1194 "b": 1195 parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string), 1196 "c": 1197 parsing_ops.FixedLenSequenceFeature(2, dtypes.float32), 1198 "d": 1199 parsing_ops.FixedLenSequenceFeature( 1200 (5,), dtypes.float32, allow_missing=True), 1201 } 1202 }, 1203 expected_feat_list_values=expected_feature_list_output) 1204 1205 def testSequenceExampleWithoutDebugName(self): 1206 original = sequence_example(feature_lists=feature_lists({ 1207 "a": 1208 feature_list([int64_feature([3, 4]), int64_feature([1, 0])]), 1209 "st_a": 1210 feature_list([ 1211 float_feature([3.0, 4.0]), float_feature([5.0]), 1212 float_feature([]) 1213 ]), 1214 "st_b": 1215 feature_list([ 1216 bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]), 1217 bytes_feature([b"b", b"c"]) 1218 ]) 1219 })) 1220 1221 serialized = original.SerializeToString() 1222 1223 expected_st_a = ( 1224 np.array( 1225 [[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices 1226 np.array( 1227 [3.0, 4.0, 5.0], dtype=np.float32), # values 1228 np.array( 1229 [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2 1230 1231 expected_st_b = ( 1232 np.array( 1233 [[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices 1234 np.array( 1235 ["a", "b", "c"], dtype="|S"), # values 1236 np.array( 1237 [4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2 1238 1239 expected_st_c = ( 1240 np.empty( 1241 (0, 2), dtype=np.int64), # indices 1242 np.empty( 1243 (0,), dtype=np.int64), # values 1244 np.array( 1245 [0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0 1246 1247 expected_feature_list_output = { 1248 "a": np.array( 1249 [[3, 4], [1, 0]], dtype=np.int64), 1250 "st_a": expected_st_a, 1251 "st_b": expected_st_b, 1252 "st_c": expected_st_c, 1253 } 1254 1255 self._test( 1256 { 1257 "serialized": ops.convert_to_tensor(serialized), 1258 "sequence_features": { 1259 "st_a": parsing_ops.VarLenFeature(dtypes.float32), 1260 "st_b": parsing_ops.VarLenFeature(dtypes.string), 1261 "st_c": parsing_ops.VarLenFeature(dtypes.int64), 1262 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64), 1263 } 1264 }, 1265 expected_feat_list_values=expected_feature_list_output) 1266 1267 def testSequenceExampleWithSparseAndDenseFeatureLists(self): 1268 original = sequence_example(feature_lists=feature_lists({ 1269 "a": 1270 feature_list([int64_feature([3, 4]), int64_feature([1, 0])]), 1271 "st_a": 1272 feature_list([ 1273 float_feature([3.0, 4.0]), float_feature([5.0]), 1274 float_feature([]) 1275 ]), 1276 "st_b": 1277 feature_list([ 1278 bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]), 1279 bytes_feature([b"b", b"c"]) 1280 ]) 1281 })) 1282 1283 serialized = original.SerializeToString() 1284 1285 expected_st_a = ( 1286 np.array( 1287 [[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices 1288 np.array( 1289 [3.0, 4.0, 5.0], dtype=np.float32), # values 1290 np.array( 1291 [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2 1292 1293 expected_st_b = ( 1294 np.array( 1295 [[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices 1296 np.array( 1297 ["a", "b", "c"], dtype="|S"), # values 1298 np.array( 1299 [4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2 1300 1301 expected_st_c = ( 1302 np.empty( 1303 (0, 2), dtype=np.int64), # indices 1304 np.empty( 1305 (0,), dtype=np.int64), # values 1306 np.array( 1307 [0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0 1308 1309 expected_feature_list_output = { 1310 "a": np.array( 1311 [[3, 4], [1, 0]], dtype=np.int64), 1312 "st_a": expected_st_a, 1313 "st_b": expected_st_b, 1314 "st_c": expected_st_c, 1315 } 1316 1317 self._test( 1318 { 1319 "example_name": "in1", 1320 "serialized": ops.convert_to_tensor(serialized), 1321 "sequence_features": { 1322 "st_a": parsing_ops.VarLenFeature(dtypes.float32), 1323 "st_b": parsing_ops.VarLenFeature(dtypes.string), 1324 "st_c": parsing_ops.VarLenFeature(dtypes.int64), 1325 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64), 1326 } 1327 }, 1328 expected_feat_list_values=expected_feature_list_output) 1329 1330 def testSequenceExampleWithEmptyFeatureInFeatureLists(self): 1331 original = sequence_example(feature_lists=feature_lists({ 1332 "st_a": 1333 feature_list([ 1334 float_feature([3.0, 4.0]), 1335 feature(), 1336 float_feature([5.0]), 1337 ]), 1338 })) 1339 1340 serialized = original.SerializeToString() 1341 1342 expected_st_a = ( 1343 np.array( 1344 [[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices 1345 np.array( 1346 [3.0, 4.0, 5.0], dtype=np.float32), # values 1347 np.array( 1348 [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2 1349 1350 expected_feature_list_output = { 1351 "st_a": expected_st_a, 1352 } 1353 1354 self._test( 1355 { 1356 "example_name": "in1", 1357 "serialized": ops.convert_to_tensor(serialized), 1358 "sequence_features": { 1359 "st_a": parsing_ops.VarLenFeature(dtypes.float32), 1360 } 1361 }, 1362 expected_feat_list_values=expected_feature_list_output) 1363 1364 def testSequenceExampleListWithInconsistentDataFails(self): 1365 original = sequence_example(feature_lists=feature_lists({ 1366 "a": feature_list([int64_feature([-1, 0]), float_feature([2, 3])]) 1367 })) 1368 1369 serialized = original.SerializeToString() 1370 1371 self._test( 1372 { 1373 "example_name": "in1", 1374 "serialized": ops.convert_to_tensor(serialized), 1375 "sequence_features": { 1376 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64) 1377 } 1378 }, 1379 expected_err=(errors_impl.OpError, "Feature list: a, Index: 1." 1380 " Data types don't match. Expected type: int64")) 1381 1382 def testSequenceExampleListWithWrongDataTypeFails(self): 1383 original = sequence_example(feature_lists=feature_lists({ 1384 "a": feature_list([float_feature([2, 3])]) 1385 })) 1386 1387 serialized = original.SerializeToString() 1388 1389 self._test( 1390 { 1391 "example_name": "in1", 1392 "serialized": ops.convert_to_tensor(serialized), 1393 "sequence_features": { 1394 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64) 1395 } 1396 }, 1397 expected_err=(errors_impl.OpError, 1398 "Feature list: a, Index: 0. Data types don't match." 1399 " Expected type: int64")) 1400 1401 def testSequenceExampleListWithWrongSparseDataTypeFails(self): 1402 original = sequence_example(feature_lists=feature_lists({ 1403 "a": 1404 feature_list([ 1405 int64_feature([3, 4]), int64_feature([1, 2]), 1406 float_feature([2.0, 3.0]) 1407 ]) 1408 })) 1409 1410 serialized = original.SerializeToString() 1411 1412 self._test( 1413 { 1414 "example_name": "in1", 1415 "serialized": ops.convert_to_tensor(serialized), 1416 "sequence_features": { 1417 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64) 1418 } 1419 }, 1420 expected_err=(errors_impl.OpError, 1421 "Name: in1, Feature list: a, Index: 2." 1422 " Data types don't match. Expected type: int64" 1423 " Feature is: float_list")) 1424 1425 def testSequenceExampleListWithWrongShapeFails(self): 1426 original = sequence_example(feature_lists=feature_lists({ 1427 "a": feature_list([int64_feature([2, 3]), int64_feature([2, 3, 4])]), 1428 })) 1429 1430 serialized = original.SerializeToString() 1431 1432 self._test( 1433 { 1434 "example_name": "in1", 1435 "serialized": ops.convert_to_tensor(serialized), 1436 "sequence_features": { 1437 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64) 1438 } 1439 }, 1440 expected_err=(errors_impl.OpError, r"Name: in1, Key: a, Index: 1." 1441 r" Number of int64 values != expected." 1442 r" values size: 3 but output shape: \[2\]")) 1443 1444 def testSequenceExampleWithMissingFeatureListFails(self): 1445 original = sequence_example(feature_lists=feature_lists({})) 1446 1447 # Test fails because we didn't add: 1448 # feature_list_dense_defaults = {"a": None} 1449 self._test( 1450 { 1451 "example_name": "in1", 1452 "serialized": ops.convert_to_tensor(original.SerializeToString()), 1453 "sequence_features": { 1454 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64) 1455 } 1456 }, 1457 expected_err=( 1458 errors_impl.OpError, 1459 "Name: in1, Feature list 'a' is required but could not be found." 1460 " Did you mean to include it in" 1461 " feature_list_dense_missing_assumed_empty or" 1462 " feature_list_dense_defaults?")) 1463 1464 1465class DecodeJSONExampleTest(test.TestCase): 1466 1467 def _testRoundTrip(self, examples): 1468 with self.test_session() as sess: 1469 examples = np.array(examples, dtype=np.object) 1470 1471 json_tensor = constant_op.constant( 1472 [json_format.MessageToJson(m) for m in examples.flatten()], 1473 shape=examples.shape, 1474 dtype=dtypes.string) 1475 binary_tensor = parsing_ops.decode_json_example(json_tensor) 1476 binary_val = sess.run(binary_tensor) 1477 1478 if examples.shape: 1479 self.assertShapeEqual(binary_val, json_tensor) 1480 for input_example, output_binary in zip( 1481 np.array(examples).flatten(), binary_val.flatten()): 1482 output_example = example_pb2.Example() 1483 output_example.ParseFromString(output_binary) 1484 self.assertProtoEquals(input_example, output_example) 1485 else: 1486 output_example = example_pb2.Example() 1487 output_example.ParseFromString(binary_val) 1488 self.assertProtoEquals(examples.item(), output_example) 1489 1490 def testEmptyTensor(self): 1491 self._testRoundTrip([]) 1492 self._testRoundTrip([[], [], []]) 1493 1494 def testEmptyExamples(self): 1495 self._testRoundTrip([example(), example(), example()]) 1496 1497 def testDenseFeaturesScalar(self): 1498 self._testRoundTrip( 1499 example(features=features({ 1500 "a": float_feature([1, 1, 3]) 1501 }))) 1502 1503 def testDenseFeaturesVector(self): 1504 self._testRoundTrip([ 1505 example(features=features({ 1506 "a": float_feature([1, 1, 3]) 1507 })), 1508 example(features=features({ 1509 "a": float_feature([-1, -1, 2]) 1510 })), 1511 ]) 1512 1513 def testDenseFeaturesMatrix(self): 1514 self._testRoundTrip([ 1515 [example(features=features({ 1516 "a": float_feature([1, 1, 3]) 1517 }))], 1518 [example(features=features({ 1519 "a": float_feature([-1, -1, 2]) 1520 }))], 1521 ]) 1522 1523 def testSparseFeatures(self): 1524 self._testRoundTrip([ 1525 example(features=features({ 1526 "st_c": float_feature([3, 4]) 1527 })), 1528 example(features=features({ 1529 "st_c": float_feature([]) 1530 })), 1531 example(features=features({ 1532 "st_d": feature() 1533 })), 1534 example(features=features({ 1535 "st_c": float_feature([1, 2, -1]), 1536 "st_d": bytes_feature([b"hi"]) 1537 })), 1538 ]) 1539 1540 def testSerializedContainingBytes(self): 1541 aname = "a" 1542 bname = "b*has+a:tricky_name" 1543 self._testRoundTrip([ 1544 example(features=features({ 1545 aname: float_feature([1, 1]), 1546 bname: bytes_feature([b"b0_str"]) 1547 })), 1548 example(features=features({ 1549 aname: float_feature([-1, -1]), 1550 bname: bytes_feature([b"b1"]) 1551 })), 1552 ]) 1553 1554 def testInvalidSyntax(self): 1555 with self.test_session() as sess: 1556 json_tensor = constant_op.constant(["{]"]) 1557 binary_tensor = parsing_ops.decode_json_example(json_tensor) 1558 with self.assertRaisesOpError("Error while parsing JSON"): 1559 sess.run(binary_tensor) 1560 1561 1562class ParseTensorOpTest(test.TestCase): 1563 1564 def testToFloat32(self): 1565 with self.test_session(): 1566 expected = np.random.rand(3, 4, 5).astype(np.float32) 1567 tensor_proto = tensor_util.make_tensor_proto(expected) 1568 1569 serialized = array_ops.placeholder(dtypes.string) 1570 tensor = parsing_ops.parse_tensor(serialized, dtypes.float32) 1571 1572 result = tensor.eval( 1573 feed_dict={serialized: tensor_proto.SerializeToString()}) 1574 1575 self.assertAllEqual(expected, result) 1576 1577 def testToUint8(self): 1578 with self.test_session(): 1579 expected = np.random.rand(3, 4, 5).astype(np.uint8) 1580 tensor_proto = tensor_util.make_tensor_proto(expected) 1581 1582 serialized = array_ops.placeholder(dtypes.string) 1583 tensor = parsing_ops.parse_tensor(serialized, dtypes.uint8) 1584 1585 result = tensor.eval( 1586 feed_dict={serialized: tensor_proto.SerializeToString()}) 1587 1588 self.assertAllEqual(expected, result) 1589 1590 def testTypeMismatch(self): 1591 with self.test_session(): 1592 expected = np.random.rand(3, 4, 5).astype(np.uint8) 1593 tensor_proto = tensor_util.make_tensor_proto(expected) 1594 1595 serialized = array_ops.placeholder(dtypes.string) 1596 tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16) 1597 1598 with self.assertRaisesOpError( 1599 r"Type mismatch between parsed tensor \(uint8\) and dtype " 1600 r"\(uint16\)"): 1601 tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()}) 1602 1603 def testInvalidInput(self): 1604 with self.test_session(): 1605 serialized = array_ops.placeholder(dtypes.string) 1606 tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16) 1607 1608 with self.assertRaisesOpError( 1609 "Could not parse `serialized` as TensorProto: 'bogus'"): 1610 tensor.eval(feed_dict={serialized: "bogus"}) 1611 1612 with self.assertRaisesOpError( 1613 r"Expected `serialized` to be a scalar, got shape: \[1\]"): 1614 tensor.eval(feed_dict={serialized: ["bogus"]}) 1615 1616 1617if __name__ == "__main__": 1618 test.main() 1619