1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Implementation of image ops.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import functools 22import numpy as np 23 24from tensorflow.python.compat import compat 25from tensorflow.python.eager import def_function 26from tensorflow.python.framework import constant_op 27from tensorflow.python.framework import dtypes 28from tensorflow.python.framework import ops 29from tensorflow.python.framework import random_seed 30from tensorflow.python.framework import tensor_shape 31from tensorflow.python.framework import tensor_util 32from tensorflow.python.ops import array_ops 33from tensorflow.python.ops import check_ops 34from tensorflow.python.ops import control_flow_ops 35from tensorflow.python.ops import gen_image_ops 36from tensorflow.python.ops import math_ops 37from tensorflow.python.ops import nn 38from tensorflow.python.ops import nn_ops 39from tensorflow.python.ops import random_ops 40from tensorflow.python.ops import sort_ops 41from tensorflow.python.ops import stateless_random_ops 42from tensorflow.python.ops import string_ops 43from tensorflow.python.ops import variables 44from tensorflow.python.util import deprecation 45from tensorflow.python.util import dispatch 46from tensorflow.python.util.tf_export import tf_export 47 48ops.NotDifferentiable('RandomCrop') 49# TODO(b/31222613): This op may be differentiable, and there may be 50# latent bugs here. 51ops.NotDifferentiable('HSVToRGB') 52ops.NotDifferentiable('DrawBoundingBoxes') 53ops.NotDifferentiable('SampleDistortedBoundingBox') 54ops.NotDifferentiable('SampleDistortedBoundingBoxV2') 55# TODO(bsteiner): Implement the gradient function for extract_glimpse 56# TODO(b/31222613): This op may be differentiable, and there may be 57# latent bugs here. 58ops.NotDifferentiable('ExtractGlimpse') 59ops.NotDifferentiable('NonMaxSuppression') 60ops.NotDifferentiable('NonMaxSuppressionV2') 61ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') 62ops.NotDifferentiable('GenerateBoundingBoxProposals') 63 64 65# pylint: disable=invalid-name 66def _assert(cond, ex_type, msg): 67 """A polymorphic assert, works with tensors and boolean expressions. 68 69 If `cond` is not a tensor, behave like an ordinary assert statement, except 70 that a empty list is returned. If `cond` is a tensor, return a list 71 containing a single TensorFlow assert op. 72 73 Args: 74 cond: Something evaluates to a boolean value. May be a tensor. 75 ex_type: The exception class to use. 76 msg: The error message. 77 78 Returns: 79 A list, containing at most one assert op. 80 """ 81 if _is_tensor(cond): 82 return [control_flow_ops.Assert(cond, [msg])] 83 else: 84 if not cond: 85 raise ex_type(msg) 86 else: 87 return [] 88 89 90def _is_tensor(x): 91 """Returns `True` if `x` is a symbolic tensor-like object. 92 93 Args: 94 x: A python object to check. 95 96 Returns: 97 `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. 98 """ 99 return isinstance(x, (ops.Tensor, variables.Variable)) 100 101 102def _ImageDimensions(image, rank): 103 """Returns the dimensions of an image tensor. 104 105 Args: 106 image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. 107 rank: The expected rank of the image 108 109 Returns: 110 A list of corresponding to the dimensions of the 111 input image. Dimensions that are statically known are python integers, 112 otherwise, they are integer scalar tensors. 113 """ 114 if image.get_shape().is_fully_defined(): 115 return image.get_shape().as_list() 116 else: 117 static_shape = image.get_shape().with_rank(rank).as_list() 118 dynamic_shape = array_ops.unstack(array_ops.shape(image), rank) 119 return [ 120 s if s is not None else d for s, d in zip(static_shape, dynamic_shape) 121 ] 122 123 124def _Check3DImage(image, require_static=True): 125 """Assert that we are working with a properly shaped image. 126 127 Args: 128 image: 3-D Tensor of shape [height, width, channels] 129 require_static: If `True`, requires that all dimensions of `image` are known 130 and non-zero. 131 132 Raises: 133 ValueError: if `image.shape` is not a 3-vector. 134 135 Returns: 136 An empty list, if `image` has fully defined dimensions. Otherwise, a list 137 containing an assert op is returned. 138 """ 139 try: 140 image_shape = image.get_shape().with_rank(3) 141 except ValueError: 142 raise ValueError("'image' (shape %s) must be three-dimensional." % 143 image.shape) 144 if require_static and not image_shape.is_fully_defined(): 145 raise ValueError("'image' (shape %s) must be fully defined." % image_shape) 146 if any(x == 0 for x in image_shape): 147 raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape) 148 if not image_shape.is_fully_defined(): 149 return [ 150 check_ops.assert_positive( 151 array_ops.shape(image), 152 ["all dims of 'image.shape' " 153 'must be > 0.']) 154 ] 155 else: 156 return [] 157 158 159def _Assert3DImage(image): 160 """Assert that we are working with a properly shaped image. 161 162 Performs the check statically if possible (i.e. if the shape 163 is statically known). Otherwise adds a control dependency 164 to an assert op that checks the dynamic shape. 165 166 Args: 167 image: 3-D Tensor of shape [height, width, channels] 168 169 Raises: 170 ValueError: if `image.shape` is not a 3-vector. 171 172 Returns: 173 If the shape of `image` could be verified statically, `image` is 174 returned unchanged, otherwise there will be a control dependency 175 added that asserts the correct dynamic shape. 176 """ 177 return control_flow_ops.with_dependencies( 178 _Check3DImage(image, require_static=False), image) 179 180 181def _AssertAtLeast3DImage(image): 182 """Assert that we are working with a properly shaped image. 183 184 Performs the check statically if possible (i.e. if the shape 185 is statically known). Otherwise adds a control dependency 186 to an assert op that checks the dynamic shape. 187 188 Args: 189 image: >= 3-D Tensor of size [*, height, width, depth] 190 191 Raises: 192 ValueError: if image.shape is not a [>= 3] vector. 193 194 Returns: 195 If the shape of `image` could be verified statically, `image` is 196 returned unchanged, otherwise there will be a control dependency 197 added that asserts the correct dynamic shape. 198 """ 199 return control_flow_ops.with_dependencies( 200 _CheckAtLeast3DImage(image, require_static=False), image) 201 202 203def _CheckAtLeast3DImage(image, require_static=True): 204 """Assert that we are working with a properly shaped image. 205 206 Args: 207 image: >= 3-D Tensor of size [*, height, width, depth] 208 require_static: If `True`, requires that all dimensions of `image` are known 209 and non-zero. 210 211 Raises: 212 ValueError: if image.shape is not a [>= 3] vector. 213 214 Returns: 215 An empty list, if `image` has fully defined dimensions. Otherwise, a list 216 containing an assert op is returned. 217 """ 218 try: 219 if image.get_shape().ndims is None: 220 image_shape = image.get_shape().with_rank(3) 221 else: 222 image_shape = image.get_shape().with_rank_at_least(3) 223 except ValueError: 224 raise ValueError("'image' (shape %s) must be at least three-dimensional." % 225 image.shape) 226 if require_static and not image_shape.is_fully_defined(): 227 raise ValueError('\'image\' must be fully defined.') 228 if any(x == 0 for x in image_shape[-3:]): 229 raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' % 230 image_shape) 231 if not image_shape[-3:].is_fully_defined(): 232 return [ 233 check_ops.assert_positive( 234 array_ops.shape(image)[-3:], 235 ["inner 3 dims of 'image.shape' " 236 'must be > 0.']), 237 check_ops.assert_greater_equal( 238 array_ops.rank(image), 239 3, 240 message="'image' must be at least three-dimensional.") 241 ] 242 else: 243 return [] 244 245 246def _AssertGrayscaleImage(image): 247 """Assert that we are working with a properly shaped grayscale image. 248 249 Performs the check statically if possible (i.e. if the shape 250 is statically known). Otherwise adds a control dependency 251 to an assert op that checks the dynamic shape. 252 253 Args: 254 image: >= 2-D Tensor of size [*, 1] 255 256 Raises: 257 ValueError: if image.shape is not a [>= 2] vector or if 258 last dimension is not size 1. 259 260 Returns: 261 If the shape of `image` could be verified statically, `image` is 262 returned unchanged, otherwise there will be a control dependency 263 added that asserts the correct dynamic shape. 264 """ 265 return control_flow_ops.with_dependencies( 266 _CheckGrayscaleImage(image, require_static=False), image) 267 268 269def _CheckGrayscaleImage(image, require_static=True): 270 """Assert that we are working with properly shaped grayscale image. 271 272 Args: 273 image: >= 2-D Tensor of size [*, 1] 274 require_static: Boolean, whether static shape is required. 275 276 Raises: 277 ValueError: if image.shape is not a [>= 2] vector or if 278 last dimension is not size 1. 279 280 Returns: 281 An empty list, if `image` has fully defined dimensions. Otherwise, a list 282 containing an assert op is returned. 283 """ 284 try: 285 if image.get_shape().ndims is None: 286 image_shape = image.get_shape().with_rank(2) 287 else: 288 image_shape = image.get_shape().with_rank_at_least(2) 289 except ValueError: 290 raise ValueError('A grayscale image (shape %s) must be at least ' 291 'two-dimensional.' % image.shape) 292 if require_static and not image_shape.is_fully_defined(): 293 raise ValueError('\'image\' must be fully defined.') 294 if image_shape.is_fully_defined(): 295 if image_shape[-1] != 1: 296 raise ValueError('Last dimension of a grayscale image should be size 1.') 297 if not image_shape.is_fully_defined(): 298 return [ 299 check_ops.assert_equal( 300 array_ops.shape(image)[-1], 301 1, 302 message='Last dimension of a grayscale image should be size 1.'), 303 check_ops.assert_greater_equal( 304 array_ops.rank(image), 305 3, 306 message='A grayscale image must be at least two-dimensional.') 307 ] 308 else: 309 return [] 310 311 312def fix_image_flip_shape(image, result): 313 """Set the shape to 3 dimensional if we don't know anything else. 314 315 Args: 316 image: original image size 317 result: flipped or transformed image 318 319 Returns: 320 An image whose shape is at least (None, None, None). 321 """ 322 323 image_shape = image.get_shape() 324 if image_shape == tensor_shape.unknown_shape(): 325 result.set_shape([None, None, None]) 326 else: 327 result.set_shape(image_shape) 328 return result 329 330 331@tf_export('image.random_flip_up_down') 332@dispatch.add_dispatch_support 333def random_flip_up_down(image, seed=None): 334 """Randomly flips an image vertically (upside down). 335 336 With a 1 in 2 chance, outputs the contents of `image` flipped along the first 337 dimension, which is `height`. Otherwise, output the image as-is. 338 When passing a batch of images, each image will be randomly flipped 339 independent of other images. 340 341 Example usage: 342 343 >>> image = np.array([[[1], [2]], [[3], [4]]]) 344 >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() 345 [[[3], [4]], [[1], [2]]] 346 347 Randomly flip multiple images. 348 349 >>> images = np.array( 350 ... [ 351 ... [[[1], [2]], [[3], [4]]], 352 ... [[[5], [6]], [[7], [8]]] 353 ... ]) 354 >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() 355 [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] 356 357 For producing deterministic results given a `seed` value, use 358 `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param 359 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 360 same results given the same seed independent of how many times the function is 361 called, and independent of global seed settings (e.g. tf.random.set_seed). 362 363 Args: 364 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 365 of shape `[height, width, channels]`. 366 seed: A Python integer. Used to create a random seed. See 367 `tf.compat.v1.set_random_seed` for behavior. 368 369 Returns: 370 A tensor of the same type and shape as `image`. 371 Raises: 372 ValueError: if the shape of `image` not supported. 373 """ 374 random_func = functools.partial(random_ops.random_uniform, seed=seed) 375 return _random_flip(image, 0, random_func, 'random_flip_up_down') 376 377 378@tf_export('image.random_flip_left_right') 379@dispatch.add_dispatch_support 380def random_flip_left_right(image, seed=None): 381 """Randomly flip an image horizontally (left to right). 382 383 With a 1 in 2 chance, outputs the contents of `image` flipped along the 384 second dimension, which is `width`. Otherwise output the image as-is. 385 When passing a batch of images, each image will be randomly flipped 386 independent of other images. 387 388 Example usage: 389 390 >>> image = np.array([[[1], [2]], [[3], [4]]]) 391 >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() 392 [[[2], [1]], [[4], [3]]] 393 394 Randomly flip multiple images. 395 396 >>> images = np.array( 397 ... [ 398 ... [[[1], [2]], [[3], [4]]], 399 ... [[[5], [6]], [[7], [8]]] 400 ... ]) 401 >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() 402 [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] 403 404 For producing deterministic results given a `seed` value, use 405 `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param 406 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 407 same results given the same seed independent of how many times the function is 408 called, and independent of global seed settings (e.g. tf.random.set_seed). 409 410 Args: 411 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 412 of shape `[height, width, channels]`. 413 seed: A Python integer. Used to create a random seed. See 414 `tf.compat.v1.set_random_seed` for behavior. 415 416 Returns: 417 A tensor of the same type and shape as `image`. 418 419 Raises: 420 ValueError: if the shape of `image` not supported. 421 """ 422 random_func = functools.partial(random_ops.random_uniform, seed=seed) 423 return _random_flip(image, 1, random_func, 'random_flip_left_right') 424 425 426@tf_export('image.stateless_random_flip_left_right', v1=[]) 427@dispatch.add_dispatch_support 428def stateless_random_flip_left_right(image, seed): 429 """Randomly flip an image horizontally (left to right) deterministically. 430 431 Guarantees the same results given the same `seed` independent of how many 432 times the function is called, and independent of global seed settings (e.g. 433 `tf.random.set_seed`). 434 435 Example usage: 436 437 >>> image = np.array([[[1], [2]], [[3], [4]]]) 438 >>> seed = (2, 3) 439 >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() 440 [[[2], [1]], [[4], [3]]] 441 442 Args: 443 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 444 of shape `[height, width, channels]`. 445 seed: A shape [2] Tensor, the seed to the random number generator. Must have 446 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 447 448 Returns: 449 A tensor of the same type and shape as `image`. 450 """ 451 random_func = functools.partial( 452 stateless_random_ops.stateless_random_uniform, seed=seed) 453 return _random_flip( 454 image, 1, random_func, 'stateless_random_flip_left_right') 455 456 457@tf_export('image.stateless_random_flip_up_down', v1=[]) 458@dispatch.add_dispatch_support 459def stateless_random_flip_up_down(image, seed): 460 """Randomly flip an image vertically (upside down) deterministically. 461 462 Guarantees the same results given the same `seed` independent of how many 463 times the function is called, and independent of global seed settings (e.g. 464 `tf.random.set_seed`). 465 466 Example usage: 467 468 >>> image = np.array([[[1], [2]], [[3], [4]]]) 469 >>> seed = (2, 3) 470 >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() 471 [[[3], [4]], [[1], [2]]] 472 473 Args: 474 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 475 of shape `[height, width, channels]`. 476 seed: A shape [2] Tensor, the seed to the random number generator. Must have 477 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 478 479 Returns: 480 A tensor of the same type and shape as `image`. 481 """ 482 random_func = functools.partial( 483 stateless_random_ops.stateless_random_uniform, seed=seed) 484 return _random_flip( 485 image, 0, random_func, 'stateless_random_flip_up_down') 486 487 488def _random_flip(image, flip_index, random_func, scope_name): 489 """Randomly (50% chance) flip an image along axis `flip_index`. 490 491 Args: 492 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 493 of shape `[height, width, channels]`. 494 flip_index: Dimension along which to flip the image. 495 Vertical is 0, Horizontal is 1. 496 random_func: partial function for calling either stateful or stateless 497 random ops with `seed` parameter specified. 498 scope_name: Name of the scope in which the ops are added. 499 500 Returns: 501 A tensor of the same type and shape as `image`. 502 503 Raises: 504 ValueError: if the shape of `image` not supported. 505 """ 506 with ops.name_scope(None, scope_name, [image]) as scope: 507 image = ops.convert_to_tensor(image, name='image') 508 image = _AssertAtLeast3DImage(image) 509 shape = image.get_shape() 510 511 def f_rank3(): 512 uniform_random = random_func(shape=[], minval=0, maxval=1.0) 513 mirror_cond = math_ops.less(uniform_random, .5) 514 result = control_flow_ops.cond( 515 mirror_cond, 516 lambda: array_ops.reverse(image, [flip_index]), 517 lambda: image, 518 name=scope) 519 return fix_image_flip_shape(image, result) 520 521 def f_rank4(): 522 batch_size = array_ops.shape(image)[0] 523 uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) 524 flips = math_ops.round( 525 array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) 526 flips = math_ops.cast(flips, image.dtype) 527 flipped_input = array_ops.reverse(image, [flip_index + 1]) 528 return flips * flipped_input + (1 - flips) * image 529 530 if shape.ndims is None: 531 rank = array_ops.rank(image) 532 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 533 if shape.ndims == 3: 534 return f_rank3() 535 elif shape.ndims == 4: 536 return f_rank4() 537 else: 538 raise ValueError( 539 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 540 541 542@tf_export('image.flip_left_right') 543@dispatch.add_dispatch_support 544def flip_left_right(image): 545 """Flip an image horizontally (left to right). 546 547 Outputs the contents of `image` flipped along the width dimension. 548 549 See also `tf.reverse`. 550 551 Usage Example: 552 553 >>> x = [[[1.0, 2.0, 3.0], 554 ... [4.0, 5.0, 6.0]], 555 ... [[7.0, 8.0, 9.0], 556 ... [10.0, 11.0, 12.0]]] 557 >>> tf.image.flip_left_right(x) 558 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 559 array([[[ 4., 5., 6.], 560 [ 1., 2., 3.]], 561 [[10., 11., 12.], 562 [ 7., 8., 9.]]], dtype=float32)> 563 564 Args: 565 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 566 of shape `[height, width, channels]`. 567 568 Returns: 569 A tensor of the same type and shape as `image`. 570 571 Raises: 572 ValueError: if the shape of `image` not supported. 573 """ 574 return _flip(image, 1, 'flip_left_right') 575 576 577@tf_export('image.flip_up_down') 578@dispatch.add_dispatch_support 579def flip_up_down(image): 580 """Flip an image vertically (upside down). 581 582 Outputs the contents of `image` flipped along the height dimension. 583 584 See also `reverse()`. 585 586 Usage Example: 587 588 >>> x = [[[1.0, 2.0, 3.0], 589 ... [4.0, 5.0, 6.0]], 590 ... [[7.0, 8.0, 9.0], 591 ... [10.0, 11.0, 12.0]]] 592 >>> tf.image.flip_up_down(x) 593 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 594 array([[[ 7., 8., 9.], 595 [10., 11., 12.]], 596 [[ 1., 2., 3.], 597 [ 4., 5., 6.]]], dtype=float32)> 598 599 Args: 600 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 601 of shape `[height, width, channels]`. 602 603 Returns: 604 A `Tensor` of the same type and shape as `image`. 605 606 Raises: 607 ValueError: if the shape of `image` not supported. 608 """ 609 return _flip(image, 0, 'flip_up_down') 610 611 612def _flip(image, flip_index, scope_name): 613 """Flip an image either horizontally or vertically. 614 615 Outputs the contents of `image` flipped along the dimension `flip_index`. 616 617 See also `reverse()`. 618 619 Args: 620 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 621 of shape `[height, width, channels]`. 622 flip_index: 0 For vertical, 1 for horizontal. 623 scope_name: string, scope name. 624 625 Returns: 626 A `Tensor` of the same type and shape as `image`. 627 628 Raises: 629 ValueError: if the shape of `image` not supported. 630 """ 631 with ops.name_scope(None, scope_name, [image]): 632 image = ops.convert_to_tensor(image, name='image') 633 image = _AssertAtLeast3DImage(image) 634 shape = image.get_shape() 635 636 def f_rank3(): 637 return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) 638 639 def f_rank4(): 640 return array_ops.reverse(image, [flip_index + 1]) 641 642 if shape.ndims is None: 643 rank = array_ops.rank(image) 644 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 645 elif shape.ndims == 3: 646 return f_rank3() 647 elif shape.ndims == 4: 648 return f_rank4() 649 else: 650 raise ValueError( 651 '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) 652 653 654@tf_export('image.rot90') 655@dispatch.add_dispatch_support 656def rot90(image, k=1, name=None): 657 """Rotate image(s) counter-clockwise by 90 degrees. 658 659 660 For example: 661 662 >>> a=tf.constant([[[1],[2]], 663 ... [[3],[4]]]) 664 >>> # rotating `a` counter clockwise by 90 degrees 665 >>> a_rot=tf.image.rot90(a) 666 >>> print(a_rot[...,0].numpy()) 667 [[2 4] 668 [1 3]] 669 >>> # rotating `a` counter clockwise by 270 degrees 670 >>> a_rot=tf.image.rot90(a, k=3) 671 >>> print(a_rot[...,0].numpy()) 672 [[3 1] 673 [4 2]] 674 675 Args: 676 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 677 of shape `[height, width, channels]`. 678 k: A scalar integer. The number of times the image is rotated by 90 degrees. 679 name: A name for this operation (optional). 680 681 Returns: 682 A rotated tensor of the same type and shape as `image`. 683 684 Raises: 685 ValueError: if the shape of `image` not supported. 686 """ 687 with ops.name_scope(name, 'rot90', [image, k]) as scope: 688 image = ops.convert_to_tensor(image, name='image') 689 image = _AssertAtLeast3DImage(image) 690 k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') 691 k.get_shape().assert_has_rank(0) 692 k = math_ops.mod(k, 4) 693 694 shape = image.get_shape() 695 if shape.ndims is None: 696 rank = array_ops.rank(image) 697 698 def f_rank3(): 699 return _rot90_3D(image, k, scope) 700 701 def f_rank4(): 702 return _rot90_4D(image, k, scope) 703 704 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 705 elif shape.ndims == 3: 706 return _rot90_3D(image, k, scope) 707 elif shape.ndims == 4: 708 return _rot90_4D(image, k, scope) 709 else: 710 raise ValueError( 711 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 712 713 714def _rot90_3D(image, k, name_scope): 715 """Rotate image counter-clockwise by 90 degrees `k` times. 716 717 Args: 718 image: 3-D Tensor of shape `[height, width, channels]`. 719 k: A scalar integer. The number of times the image is rotated by 90 degrees. 720 name_scope: A valid TensorFlow name scope. 721 722 Returns: 723 A 3-D tensor of the same type and shape as `image`. 724 725 """ 726 727 def _rot90(): 728 return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) 729 730 def _rot180(): 731 return array_ops.reverse_v2(image, [0, 1]) 732 733 def _rot270(): 734 return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) 735 736 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 737 (math_ops.equal(k, 3), _rot270)] 738 739 result = control_flow_ops.case( 740 cases, default=lambda: image, exclusive=True, name=name_scope) 741 result.set_shape([None, None, image.get_shape()[2]]) 742 return result 743 744 745def _rot90_4D(images, k, name_scope): 746 """Rotate batch of images counter-clockwise by 90 degrees `k` times. 747 748 Args: 749 images: 4-D Tensor of shape `[height, width, channels]`. 750 k: A scalar integer. The number of times the images are rotated by 90 751 degrees. 752 name_scope: A valid TensorFlow name scope. 753 754 Returns: 755 A 4-D `Tensor` of the same type and shape as `images`. 756 """ 757 758 def _rot90(): 759 return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3]) 760 761 def _rot180(): 762 return array_ops.reverse_v2(images, [1, 2]) 763 764 def _rot270(): 765 return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2]) 766 767 cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), 768 (math_ops.equal(k, 3), _rot270)] 769 770 result = control_flow_ops.case( 771 cases, default=lambda: images, exclusive=True, name=name_scope) 772 shape = result.get_shape() 773 result.set_shape([shape[0], None, None, shape[3]]) 774 return result 775 776 777@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image']) 778@dispatch.add_dispatch_support 779def transpose(image, name=None): 780 """Transpose image(s) by swapping the height and width dimension. 781 782 Usage Example: 783 784 >>> x = [[[1.0, 2.0, 3.0], 785 ... [4.0, 5.0, 6.0]], 786 ... [[7.0, 8.0, 9.0], 787 ... [10.0, 11.0, 12.0]]] 788 >>> tf.image.transpose(x) 789 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 790 array([[[ 1., 2., 3.], 791 [ 7., 8., 9.]], 792 [[ 4., 5., 6.], 793 [10., 11., 12.]]], dtype=float32)> 794 795 Args: 796 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 797 of shape `[height, width, channels]`. 798 name: A name for this operation (optional). 799 800 Returns: 801 If `image` was 4-D, a 4-D float Tensor of shape 802 `[batch, width, height, channels]` 803 If `image` was 3-D, a 3-D float Tensor of shape 804 `[width, height, channels]` 805 806 Raises: 807 ValueError: if the shape of `image` not supported. 808 809 Usage Example: 810 811 >>> image = [[[1, 2], [3, 4]], 812 ... [[5, 6], [7, 8]], 813 ... [[9, 10], [11, 12]]] 814 >>> image = tf.constant(image) 815 >>> tf.image.transpose(image) 816 <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy= 817 array([[[ 1, 2], 818 [ 5, 6], 819 [ 9, 10]], 820 [[ 3, 4], 821 [ 7, 8], 822 [11, 12]]], dtype=int32)> 823 """ 824 with ops.name_scope(name, 'transpose', [image]): 825 image = ops.convert_to_tensor(image, name='image') 826 image = _AssertAtLeast3DImage(image) 827 shape = image.get_shape() 828 if shape.ndims is None: 829 rank = array_ops.rank(image) 830 831 def f_rank3(): 832 return array_ops.transpose(image, [1, 0, 2], name=name) 833 834 def f_rank4(): 835 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 836 837 return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) 838 elif shape.ndims == 3: 839 return array_ops.transpose(image, [1, 0, 2], name=name) 840 elif shape.ndims == 4: 841 return array_ops.transpose(image, [0, 2, 1, 3], name=name) 842 else: 843 raise ValueError( 844 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) 845 846 847@tf_export('image.central_crop') 848@dispatch.add_dispatch_support 849def central_crop(image, central_fraction): 850 """Crop the central region of the image(s). 851 852 Remove the outer parts of an image but retain the central region of the image 853 along each dimension. If we specify central_fraction = 0.5, this function 854 returns the region marked with "X" in the below diagram. 855 856 -------- 857 | | 858 | XXXX | 859 | XXXX | 860 | | where "X" is the central 50% of the image. 861 -------- 862 863 This function works on either a single image (`image` is a 3-D Tensor), or a 864 batch of images (`image` is a 4-D Tensor). 865 866 Usage Example: 867 868 >>> x = [[[1.0, 2.0, 3.0], 869 ... [4.0, 5.0, 6.0], 870 ... [7.0, 8.0, 9.0], 871 ... [10.0, 11.0, 12.0]], 872 ... [[13.0, 14.0, 15.0], 873 ... [16.0, 17.0, 18.0], 874 ... [19.0, 20.0, 21.0], 875 ... [22.0, 23.0, 24.0]], 876 ... [[25.0, 26.0, 27.0], 877 ... [28.0, 29.0, 30.0], 878 ... [31.0, 32.0, 33.0], 879 ... [34.0, 35.0, 36.0]], 880 ... [[37.0, 38.0, 39.0], 881 ... [40.0, 41.0, 42.0], 882 ... [43.0, 44.0, 45.0], 883 ... [46.0, 47.0, 48.0]]] 884 >>> tf.image.central_crop(x, 0.5) 885 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 886 array([[[16., 17., 18.], 887 [19., 20., 21.]], 888 [[28., 29., 30.], 889 [31., 32., 33.]]], dtype=float32)> 890 891 Args: 892 image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D 893 Tensor of shape [batch_size, height, width, depth]. 894 central_fraction: float (0, 1], fraction of size to crop 895 896 Raises: 897 ValueError: if central_crop_fraction is not within (0, 1]. 898 899 Returns: 900 3-D / 4-D float Tensor, as per the input. 901 """ 902 with ops.name_scope(None, 'central_crop', [image]): 903 image = ops.convert_to_tensor(image, name='image') 904 central_fraction_static = tensor_util.constant_value(central_fraction) 905 if central_fraction_static is not None: 906 if central_fraction_static <= 0.0 or central_fraction_static > 1.0: 907 raise ValueError('central_fraction must be within (0, 1]') 908 if central_fraction_static == 1.0: 909 return image 910 else: 911 assert_ops = _assert( 912 math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0), 913 ValueError, 'central_fraction must be within (0, 1]') 914 image = control_flow_ops.with_dependencies(assert_ops, image) 915 916 _AssertAtLeast3DImage(image) 917 rank = image.get_shape().ndims 918 if rank != 3 and rank != 4: 919 raise ValueError('`image` should either be a Tensor with rank = 3 or ' 920 'rank = 4. Had rank = {}.'.format(rank)) 921 922 # Helper method to return the `idx`-th dimension of `tensor`, along with 923 # a boolean signifying if the dimension is dynamic. 924 def _get_dim(tensor, idx): 925 static_shape = tensor.get_shape().dims[idx].value 926 if static_shape is not None: 927 return static_shape, False 928 return array_ops.shape(tensor)[idx], True 929 930 # Get the height, width, depth (and batch size, if the image is a 4-D 931 # tensor). 932 if rank == 3: 933 img_h, dynamic_h = _get_dim(image, 0) 934 img_w, dynamic_w = _get_dim(image, 1) 935 img_d = image.get_shape()[2] 936 else: 937 img_bs = image.get_shape()[0] 938 img_h, dynamic_h = _get_dim(image, 1) 939 img_w, dynamic_w = _get_dim(image, 2) 940 img_d = image.get_shape()[3] 941 942 dynamic_h = dynamic_h or (central_fraction_static is None) 943 dynamic_w = dynamic_w or (central_fraction_static is None) 944 945 # Compute the bounding boxes for the crop. The type and value of the 946 # bounding boxes depend on the `image` tensor's rank and whether / not the 947 # dimensions are statically defined. 948 if dynamic_h: 949 img_hd = math_ops.cast(img_h, dtypes.float64) 950 bbox_h_start = math_ops.cast( 951 (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) / 952 2, dtypes.int32) 953 else: 954 img_hd = float(img_h) 955 bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2) 956 957 if dynamic_w: 958 img_wd = math_ops.cast(img_w, dtypes.float64) 959 bbox_w_start = math_ops.cast( 960 (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) / 961 2, dtypes.int32) 962 else: 963 img_wd = float(img_w) 964 bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2) 965 966 bbox_h_size = img_h - bbox_h_start * 2 967 bbox_w_size = img_w - bbox_w_start * 2 968 969 if rank == 3: 970 bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) 971 bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) 972 else: 973 bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) 974 bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) 975 976 image = array_ops.slice(image, bbox_begin, bbox_size) 977 978 # Reshape the `image` tensor to the desired size. 979 if rank == 3: 980 image.set_shape([ 981 None if dynamic_h else bbox_h_size, 982 None if dynamic_w else bbox_w_size, img_d 983 ]) 984 else: 985 image.set_shape([ 986 img_bs, None if dynamic_h else bbox_h_size, 987 None if dynamic_w else bbox_w_size, img_d 988 ]) 989 return image 990 991 992@tf_export('image.pad_to_bounding_box') 993@dispatch.add_dispatch_support 994def pad_to_bounding_box(image, offset_height, offset_width, target_height, 995 target_width): 996 """Pad `image` with zeros to the specified `height` and `width`. 997 998 Adds `offset_height` rows of zeros on top, `offset_width` columns of 999 zeros on the left, and then pads the image on the bottom and right 1000 with zeros until it has dimensions `target_height`, `target_width`. 1001 1002 This op does nothing if `offset_*` is zero and the image already has size 1003 `target_height` by `target_width`. 1004 1005 Usage Example: 1006 1007 >>> x = [[[1., 2., 3.], 1008 ... [4., 5., 6.]], 1009 ... [[7., 8., 9.], 1010 ... [10., 11., 12.]]] 1011 >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4) 1012 >>> padded_image 1013 <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy= 1014 array([[[ 0., 0., 0.], 1015 [ 0., 0., 0.], 1016 [ 0., 0., 0.], 1017 [ 0., 0., 0.]], 1018 [[ 0., 0., 0.], 1019 [ 1., 2., 3.], 1020 [ 4., 5., 6.], 1021 [ 0., 0., 0.]], 1022 [[ 0., 0., 0.], 1023 [ 7., 8., 9.], 1024 [10., 11., 12.], 1025 [ 0., 0., 0.]], 1026 [[ 0., 0., 0.], 1027 [ 0., 0., 0.], 1028 [ 0., 0., 0.], 1029 [ 0., 0., 0.]]], dtype=float32)> 1030 1031 Args: 1032 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1033 of shape `[height, width, channels]`. 1034 offset_height: Number of rows of zeros to add on top. 1035 offset_width: Number of columns of zeros to add on the left. 1036 target_height: Height of output image. 1037 target_width: Width of output image. 1038 1039 Returns: 1040 If `image` was 4-D, a 4-D float Tensor of shape 1041 `[batch, target_height, target_width, channels]` 1042 If `image` was 3-D, a 3-D float Tensor of shape 1043 `[target_height, target_width, channels]` 1044 1045 Raises: 1046 ValueError: If the shape of `image` is incompatible with the `offset_*` or 1047 `target_*` arguments, or either `offset_height` or `offset_width` is 1048 negative. 1049 """ 1050 with ops.name_scope(None, 'pad_to_bounding_box', [image]): 1051 image = ops.convert_to_tensor(image, name='image') 1052 1053 is_batch = True 1054 image_shape = image.get_shape() 1055 if image_shape.ndims == 3: 1056 is_batch = False 1057 image = array_ops.expand_dims(image, 0) 1058 elif image_shape.ndims is None: 1059 is_batch = False 1060 image = array_ops.expand_dims(image, 0) 1061 image.set_shape([None] * 4) 1062 elif image_shape.ndims != 4: 1063 raise ValueError( 1064 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1065 image_shape) 1066 1067 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1068 batch, height, width, depth = _ImageDimensions(image, rank=4) 1069 1070 after_padding_width = target_width - offset_width - width 1071 1072 after_padding_height = target_height - offset_height - height 1073 1074 assert_ops += _assert(offset_height >= 0, ValueError, 1075 'offset_height must be >= 0') 1076 assert_ops += _assert(offset_width >= 0, ValueError, 1077 'offset_width must be >= 0') 1078 assert_ops += _assert(after_padding_width >= 0, ValueError, 1079 'width must be <= target - offset') 1080 assert_ops += _assert(after_padding_height >= 0, ValueError, 1081 'height must be <= target - offset') 1082 image = control_flow_ops.with_dependencies(assert_ops, image) 1083 1084 # Do not pad on the depth dimensions. 1085 paddings = array_ops.reshape( 1086 array_ops.stack([ 1087 0, 0, offset_height, after_padding_height, offset_width, 1088 after_padding_width, 0, 0 1089 ]), [4, 2]) 1090 padded = array_ops.pad(image, paddings) 1091 1092 padded_shape = [ 1093 None if _is_tensor(i) else i 1094 for i in [batch, target_height, target_width, depth] 1095 ] 1096 padded.set_shape(padded_shape) 1097 1098 if not is_batch: 1099 padded = array_ops.squeeze(padded, axis=[0]) 1100 1101 return padded 1102 1103 1104@tf_export('image.crop_to_bounding_box') 1105@dispatch.add_dispatch_support 1106def crop_to_bounding_box(image, offset_height, offset_width, target_height, 1107 target_width): 1108 """Crops an image to a specified bounding box. 1109 1110 This op cuts a rectangular part out of `image`. The top-left corner of the 1111 returned image is at `offset_height, offset_width` in `image`, and its 1112 lower-right corner is at 1113 `offset_height + target_height, offset_width + target_width`. 1114 1115 Args: 1116 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1117 of shape `[height, width, channels]`. 1118 offset_height: Vertical coordinate of the top-left corner of the result in 1119 the input. 1120 offset_width: Horizontal coordinate of the top-left corner of the result in 1121 the input. 1122 target_height: Height of the result. 1123 target_width: Width of the result. 1124 1125 Returns: 1126 If `image` was 4-D, a 4-D float Tensor of shape 1127 `[batch, target_height, target_width, channels]` 1128 If `image` was 3-D, a 3-D float Tensor of shape 1129 `[target_height, target_width, channels]` 1130 1131 Raises: 1132 ValueError: If the shape of `image` is incompatible with the `offset_*` or 1133 `target_*` arguments, or either `offset_height` or `offset_width` is 1134 negative, or either `target_height` or `target_width` is not positive. 1135 """ 1136 with ops.name_scope(None, 'crop_to_bounding_box', [image]): 1137 image = ops.convert_to_tensor(image, name='image') 1138 1139 is_batch = True 1140 image_shape = image.get_shape() 1141 if image_shape.ndims == 3: 1142 is_batch = False 1143 image = array_ops.expand_dims(image, 0) 1144 elif image_shape.ndims is None: 1145 is_batch = False 1146 image = array_ops.expand_dims(image, 0) 1147 image.set_shape([None] * 4) 1148 elif image_shape.ndims != 4: 1149 raise ValueError( 1150 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1151 image_shape) 1152 1153 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1154 1155 batch, height, width, depth = _ImageDimensions(image, rank=4) 1156 1157 assert_ops += _assert(offset_width >= 0, ValueError, 1158 'offset_width must be >= 0.') 1159 assert_ops += _assert(offset_height >= 0, ValueError, 1160 'offset_height must be >= 0.') 1161 assert_ops += _assert(target_width > 0, ValueError, 1162 'target_width must be > 0.') 1163 assert_ops += _assert(target_height > 0, ValueError, 1164 'target_height must be > 0.') 1165 assert_ops += _assert(width >= (target_width + offset_width), ValueError, 1166 'width must be >= target + offset.') 1167 assert_ops += _assert(height >= (target_height + offset_height), ValueError, 1168 'height must be >= target + offset.') 1169 image = control_flow_ops.with_dependencies(assert_ops, image) 1170 1171 cropped = array_ops.slice( 1172 image, array_ops.stack([0, offset_height, offset_width, 0]), 1173 array_ops.stack([-1, target_height, target_width, -1])) 1174 1175 cropped_shape = [ 1176 None if _is_tensor(i) else i 1177 for i in [batch, target_height, target_width, depth] 1178 ] 1179 cropped.set_shape(cropped_shape) 1180 1181 if not is_batch: 1182 cropped = array_ops.squeeze(cropped, axis=[0]) 1183 1184 return cropped 1185 1186 1187@tf_export( 1188 'image.resize_with_crop_or_pad', 1189 v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) 1190@dispatch.add_dispatch_support 1191def resize_image_with_crop_or_pad(image, target_height, target_width): 1192 """Crops and/or pads an image to a target width and height. 1193 1194 Resizes an image to a target width and height by either centrally 1195 cropping the image or padding it evenly with zeros. 1196 1197 If `width` or `height` is greater than the specified `target_width` or 1198 `target_height` respectively, this op centrally crops along that dimension. 1199 If `width` or `height` is smaller than the specified `target_width` or 1200 `target_height` respectively, this op centrally pads with 0 along that 1201 dimension. 1202 1203 Args: 1204 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1205 of shape `[height, width, channels]`. 1206 target_height: Target height. 1207 target_width: Target width. 1208 1209 Raises: 1210 ValueError: if `target_height` or `target_width` are zero or negative. 1211 1212 Returns: 1213 Cropped and/or padded image. 1214 If `images` was 4-D, a 4-D float Tensor of shape 1215 `[batch, new_height, new_width, channels]`. 1216 If `images` was 3-D, a 3-D float Tensor of shape 1217 `[new_height, new_width, channels]`. 1218 """ 1219 with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]): 1220 image = ops.convert_to_tensor(image, name='image') 1221 image_shape = image.get_shape() 1222 is_batch = True 1223 if image_shape.ndims == 3: 1224 is_batch = False 1225 image = array_ops.expand_dims(image, 0) 1226 elif image_shape.ndims is None: 1227 is_batch = False 1228 image = array_ops.expand_dims(image, 0) 1229 image.set_shape([None] * 4) 1230 elif image_shape.ndims != 4: 1231 raise ValueError( 1232 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1233 image_shape) 1234 1235 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1236 assert_ops += _assert(target_width > 0, ValueError, 1237 'target_width must be > 0.') 1238 assert_ops += _assert(target_height > 0, ValueError, 1239 'target_height must be > 0.') 1240 1241 image = control_flow_ops.with_dependencies(assert_ops, image) 1242 # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. 1243 # Make sure our checks come first, so that error messages are clearer. 1244 if _is_tensor(target_height): 1245 target_height = control_flow_ops.with_dependencies( 1246 assert_ops, target_height) 1247 if _is_tensor(target_width): 1248 target_width = control_flow_ops.with_dependencies(assert_ops, 1249 target_width) 1250 1251 def max_(x, y): 1252 if _is_tensor(x) or _is_tensor(y): 1253 return math_ops.maximum(x, y) 1254 else: 1255 return max(x, y) 1256 1257 def min_(x, y): 1258 if _is_tensor(x) or _is_tensor(y): 1259 return math_ops.minimum(x, y) 1260 else: 1261 return min(x, y) 1262 1263 def equal_(x, y): 1264 if _is_tensor(x) or _is_tensor(y): 1265 return math_ops.equal(x, y) 1266 else: 1267 return x == y 1268 1269 _, height, width, _ = _ImageDimensions(image, rank=4) 1270 width_diff = target_width - width 1271 offset_crop_width = max_(-width_diff // 2, 0) 1272 offset_pad_width = max_(width_diff // 2, 0) 1273 1274 height_diff = target_height - height 1275 offset_crop_height = max_(-height_diff // 2, 0) 1276 offset_pad_height = max_(height_diff // 2, 0) 1277 1278 # Maybe crop if needed. 1279 cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width, 1280 min_(target_height, height), 1281 min_(target_width, width)) 1282 1283 # Maybe pad if needed. 1284 resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, 1285 target_height, target_width) 1286 1287 # In theory all the checks below are redundant. 1288 if resized.get_shape().ndims is None: 1289 raise ValueError('resized contains no shape.') 1290 1291 _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) 1292 1293 assert_ops = [] 1294 assert_ops += _assert( 1295 equal_(resized_height, target_height), ValueError, 1296 'resized height is not correct.') 1297 assert_ops += _assert( 1298 equal_(resized_width, target_width), ValueError, 1299 'resized width is not correct.') 1300 1301 resized = control_flow_ops.with_dependencies(assert_ops, resized) 1302 1303 if not is_batch: 1304 resized = array_ops.squeeze(resized, axis=[0]) 1305 1306 return resized 1307 1308 1309@tf_export(v1=['image.ResizeMethod']) 1310class ResizeMethodV1(object): 1311 """See `v1.image.resize` for details.""" 1312 BILINEAR = 0 1313 NEAREST_NEIGHBOR = 1 1314 BICUBIC = 2 1315 AREA = 3 1316 1317 1318@tf_export('image.ResizeMethod', v1=[]) 1319class ResizeMethod(object): 1320 """See `tf.image.resize` for details.""" 1321 BILINEAR = 'bilinear' 1322 NEAREST_NEIGHBOR = 'nearest' 1323 BICUBIC = 'bicubic' 1324 AREA = 'area' 1325 LANCZOS3 = 'lanczos3' 1326 LANCZOS5 = 'lanczos5' 1327 GAUSSIAN = 'gaussian' 1328 MITCHELLCUBIC = 'mitchellcubic' 1329 1330 1331def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name, 1332 skip_resize_if_same): 1333 """Core functionality for v1 and v2 resize functions.""" 1334 with ops.name_scope(name, 'resize', [images, size]): 1335 images = ops.convert_to_tensor(images, name='images') 1336 if images.get_shape().ndims is None: 1337 raise ValueError('\'images\' contains no shape.') 1338 # TODO(shlens): Migrate this functionality to the underlying Op's. 1339 is_batch = True 1340 if images.get_shape().ndims == 3: 1341 is_batch = False 1342 images = array_ops.expand_dims(images, 0) 1343 elif images.get_shape().ndims != 4: 1344 raise ValueError('\'images\' must have either 3 or 4 dimensions.') 1345 1346 _, height, width, _ = images.get_shape().as_list() 1347 1348 try: 1349 size = ops.convert_to_tensor(size, dtypes.int32, name='size') 1350 except (TypeError, ValueError): 1351 raise ValueError('\'size\' must be a 1-D int32 Tensor') 1352 if not size.get_shape().is_compatible_with([2]): 1353 raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: ' 1354 'new_height, new_width') 1355 1356 if preserve_aspect_ratio: 1357 # Get the current shapes of the image, even if dynamic. 1358 _, current_height, current_width, _ = _ImageDimensions(images, rank=4) 1359 1360 # do the computation to find the right scale and height/width. 1361 scale_factor_height = ( 1362 math_ops.cast(size[0], dtypes.float32) / 1363 math_ops.cast(current_height, dtypes.float32)) 1364 scale_factor_width = ( 1365 math_ops.cast(size[1], dtypes.float32) / 1366 math_ops.cast(current_width, dtypes.float32)) 1367 scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width) 1368 scaled_height_const = math_ops.cast( 1369 math_ops.round(scale_factor * 1370 math_ops.cast(current_height, dtypes.float32)), 1371 dtypes.int32) 1372 scaled_width_const = math_ops.cast( 1373 math_ops.round(scale_factor * 1374 math_ops.cast(current_width, dtypes.float32)), 1375 dtypes.int32) 1376 1377 # NOTE: Reset the size and other constants used later. 1378 size = ops.convert_to_tensor([scaled_height_const, scaled_width_const], 1379 dtypes.int32, 1380 name='size') 1381 1382 size_const_as_shape = tensor_util.constant_value_as_shape(size) 1383 new_height_const = tensor_shape.dimension_at_index(size_const_as_shape, 1384 0).value 1385 new_width_const = tensor_shape.dimension_at_index(size_const_as_shape, 1386 1).value 1387 1388 # If we can determine that the height and width will be unmodified by this 1389 # transformation, we avoid performing the resize. 1390 if skip_resize_if_same and all( 1391 x is not None 1392 for x in [new_width_const, width, new_height_const, height]) and ( 1393 width == new_width_const and height == new_height_const): 1394 if not is_batch: 1395 images = array_ops.squeeze(images, axis=[0]) 1396 return images 1397 1398 images = resizer_fn(images, size) 1399 1400 # NOTE(mrry): The shape functions for the resize ops cannot unpack 1401 # the packed values in `new_size`, so set the shape here. 1402 images.set_shape([None, new_height_const, new_width_const, None]) 1403 1404 if not is_batch: 1405 images = array_ops.squeeze(images, axis=[0]) 1406 return images 1407 1408 1409@tf_export(v1=['image.resize_images', 'image.resize']) 1410@dispatch.add_dispatch_support 1411def resize_images(images, 1412 size, 1413 method=ResizeMethodV1.BILINEAR, 1414 align_corners=False, 1415 preserve_aspect_ratio=False, 1416 name=None): 1417 """Resize `images` to `size` using the specified `method`. 1418 1419 Resized images will be distorted if their original aspect ratio is not 1420 the same as `size`. To avoid distortions see 1421 `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`. 1422 1423 The `method` can be one of: 1424 1425 * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]( 1426 https://en.wikipedia.org/wiki/Bilinear_interpolation) 1427 * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [ 1428 Nearest neighbor interpolation.]( 1429 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1430 * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]( 1431 https://en.wikipedia.org/wiki/Bicubic_interpolation) 1432 * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation. 1433 1434 The return value has the same type as `images` if `method` is 1435 `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type 1436 as `images` if the size of `images` can be statically determined to be the 1437 same as `size`, because `images` is returned in this case. Otherwise, the 1438 return value has type `float32`. 1439 1440 Args: 1441 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1442 of shape `[height, width, channels]`. 1443 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1444 size for the images. 1445 method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`. 1446 align_corners: bool. If True, the centers of the 4 corner pixels of the 1447 input and output tensors are aligned, preserving the values at the corner 1448 pixels. Defaults to `False`. 1449 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1450 then `images` will be resized to a size that fits in `size` while 1451 preserving the aspect ratio of the original image. Scales up the image if 1452 `size` is bigger than the current size of the `image`. Defaults to False. 1453 name: A name for this operation (optional). 1454 1455 Raises: 1456 ValueError: if the shape of `images` is incompatible with the 1457 shape arguments to this function 1458 ValueError: if `size` has invalid shape or type. 1459 ValueError: if an unsupported resize method is specified. 1460 1461 Returns: 1462 If `images` was 4-D, a 4-D float Tensor of shape 1463 `[batch, new_height, new_width, channels]`. 1464 If `images` was 3-D, a 3-D float Tensor of shape 1465 `[new_height, new_width, channels]`. 1466 """ 1467 1468 def resize_fn(images_t, new_size): 1469 """Legacy resize core function, passed to _resize_images_common.""" 1470 if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR: 1471 return gen_image_ops.resize_bilinear( 1472 images_t, new_size, align_corners=align_corners) 1473 elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or 1474 method == ResizeMethod.NEAREST_NEIGHBOR): 1475 return gen_image_ops.resize_nearest_neighbor( 1476 images_t, new_size, align_corners=align_corners) 1477 elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC: 1478 return gen_image_ops.resize_bicubic( 1479 images_t, new_size, align_corners=align_corners) 1480 elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA: 1481 return gen_image_ops.resize_area( 1482 images_t, new_size, align_corners=align_corners) 1483 else: 1484 raise ValueError('Resize method is not implemented: {}'.format(method)) 1485 1486 return _resize_images_common( 1487 images, 1488 resize_fn, 1489 size, 1490 preserve_aspect_ratio=preserve_aspect_ratio, 1491 name=name, 1492 skip_resize_if_same=True) 1493 1494 1495@tf_export('image.resize', v1=[]) 1496@dispatch.add_dispatch_support 1497def resize_images_v2(images, 1498 size, 1499 method=ResizeMethod.BILINEAR, 1500 preserve_aspect_ratio=False, 1501 antialias=False, 1502 name=None): 1503 """Resize `images` to `size` using the specified `method`. 1504 1505 Resized images will be distorted if their original aspect ratio is not 1506 the same as `size`. To avoid distortions see 1507 `tf.image.resize_with_pad`. 1508 1509 >>> image = tf.constant([ 1510 ... [1,0,0,0,0], 1511 ... [0,1,0,0,0], 1512 ... [0,0,1,0,0], 1513 ... [0,0,0,1,0], 1514 ... [0,0,0,0,1], 1515 ... ]) 1516 >>> # Add "batch" and "channels" dimensions 1517 >>> image = image[tf.newaxis, ..., tf.newaxis] 1518 >>> image.shape.as_list() # [batch, height, width, channels] 1519 [1, 5, 5, 1] 1520 >>> tf.image.resize(image, [3,5])[0,...,0].numpy() 1521 array([[0.6666667, 0.3333333, 0. , 0. , 0. ], 1522 [0. , 0. , 1. , 0. , 0. ], 1523 [0. , 0. , 0. , 0.3333335, 0.6666665]], 1524 dtype=float32) 1525 1526 It works equally well with a single image instead of a batch of images: 1527 1528 >>> tf.image.resize(image[0], [3,5]).shape.as_list() 1529 [3, 5, 1] 1530 1531 When `antialias` is true, the sampling filter will anti-alias the input image 1532 as well as interpolate. When downsampling an image with [anti-aliasing]( 1533 https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter 1534 kernel is scaled in order to properly anti-alias the input image signal. 1535 `antialias` has no effect when upsampling an image: 1536 1537 >>> a = tf.image.resize(image, [5,10]) 1538 >>> b = tf.image.resize(image, [5,10], antialias=True) 1539 >>> tf.reduce_max(abs(a - b)).numpy() 1540 0.0 1541 1542 The `method` argument expects an item from the `image.ResizeMethod` enum, or 1543 the string equivalent. The options are: 1544 1545 * <b>`bilinear`</b>: [Bilinear interpolation.]( 1546 https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is 1547 true, becomes a hat/tent filter function with radius 1 when downsampling. 1548 * <b>`lanczos3`</b>: [Lanczos kernel]( 1549 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3. 1550 High-quality practical filter but may have some ringing, especially on 1551 synthetic images. 1552 * <b>`lanczos5`</b>: [Lanczos kernel] ( 1553 https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5. 1554 Very-high-quality filter but may have stronger ringing. 1555 * <b>`bicubic`</b>: [Cubic interpolant]( 1556 https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to 1557 Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel, 1558 particularly when upsampling. 1559 * <b>`gaussian`</b>: [Gaussian kernel]( 1560 https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3, 1561 sigma = 1.5 / 3.0. 1562 * <b>`nearest`</b>: [Nearest neighbor interpolation.]( 1563 https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) 1564 `antialias` has no effect when used with nearest neighbor interpolation. 1565 * <b>`area`</b>: Anti-aliased resampling with area interpolation. 1566 `antialias` has no effect when used with area interpolation; it 1567 always anti-aliases. 1568 * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter. 1569 For synthetic images (especially those lacking proper prefiltering), less 1570 ringing than Keys cubic kernel but less sharp. 1571 1572 Note: Near image edges the filtering kernel may be partially outside the 1573 image boundaries. For these pixels, only input pixels inside the image will be 1574 included in the filter sum, and the output value will be appropriately 1575 normalized. 1576 1577 The return value has type `float32`, unless the `method` is 1578 `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype 1579 of `images`: 1580 1581 >>> nn = tf.image.resize(image, [5,7], method='nearest') 1582 >>> nn[0,...,0].numpy() 1583 array([[1, 0, 0, 0, 0, 0, 0], 1584 [0, 1, 1, 0, 0, 0, 0], 1585 [0, 0, 0, 1, 0, 0, 0], 1586 [0, 0, 0, 0, 1, 1, 0], 1587 [0, 0, 0, 0, 0, 0, 1]], dtype=int32) 1588 1589 With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size` 1590 is the maximum for each dimension: 1591 1592 >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True) 1593 >>> max_10_20.shape.as_list() 1594 [1, 10, 10, 1] 1595 1596 Args: 1597 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1598 of shape `[height, width, channels]`. 1599 size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new 1600 size for the images. 1601 method: An `image.ResizeMethod`, or string equivalent. Defaults to 1602 `bilinear`. 1603 preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, 1604 then `images` will be resized to a size that fits in `size` while 1605 preserving the aspect ratio of the original image. Scales up the image if 1606 `size` is bigger than the current size of the `image`. Defaults to False. 1607 antialias: Whether to use an anti-aliasing filter when downsampling an 1608 image. 1609 name: A name for this operation (optional). 1610 1611 Raises: 1612 ValueError: if the shape of `images` is incompatible with the 1613 shape arguments to this function 1614 ValueError: if `size` has an invalid shape or type. 1615 ValueError: if an unsupported resize method is specified. 1616 1617 Returns: 1618 If `images` was 4-D, a 4-D float Tensor of shape 1619 `[batch, new_height, new_width, channels]`. 1620 If `images` was 3-D, a 3-D float Tensor of shape 1621 `[new_height, new_width, channels]`. 1622 """ 1623 1624 def resize_fn(images_t, new_size): 1625 """Resize core function, passed to _resize_images_common.""" 1626 scale_and_translate_methods = [ 1627 ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN, 1628 ResizeMethod.MITCHELLCUBIC 1629 ] 1630 1631 def resize_with_scale_and_translate(method): 1632 scale = ( 1633 math_ops.cast(new_size, dtype=dtypes.float32) / 1634 math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32)) 1635 return gen_image_ops.scale_and_translate( 1636 images_t, 1637 new_size, 1638 scale, 1639 array_ops.zeros([2]), 1640 kernel_type=method, 1641 antialias=antialias) 1642 1643 if method == ResizeMethod.BILINEAR: 1644 if antialias: 1645 return resize_with_scale_and_translate('triangle') 1646 else: 1647 return gen_image_ops.resize_bilinear( 1648 images_t, new_size, half_pixel_centers=True) 1649 elif method == ResizeMethod.NEAREST_NEIGHBOR: 1650 return gen_image_ops.resize_nearest_neighbor( 1651 images_t, new_size, half_pixel_centers=True) 1652 elif method == ResizeMethod.BICUBIC: 1653 if antialias: 1654 return resize_with_scale_and_translate('keyscubic') 1655 else: 1656 return gen_image_ops.resize_bicubic( 1657 images_t, new_size, half_pixel_centers=True) 1658 elif method == ResizeMethod.AREA: 1659 return gen_image_ops.resize_area(images_t, new_size) 1660 elif method in scale_and_translate_methods: 1661 return resize_with_scale_and_translate(method) 1662 else: 1663 raise ValueError('Resize method is not implemented: {}'.format(method)) 1664 1665 return _resize_images_common( 1666 images, 1667 resize_fn, 1668 size, 1669 preserve_aspect_ratio=preserve_aspect_ratio, 1670 name=name, 1671 skip_resize_if_same=False) 1672 1673 1674def _resize_image_with_pad_common(image, target_height, target_width, 1675 resize_fn): 1676 """Core functionality for v1 and v2 resize_image_with_pad functions.""" 1677 with ops.name_scope(None, 'resize_image_with_pad', [image]): 1678 image = ops.convert_to_tensor(image, name='image') 1679 image_shape = image.get_shape() 1680 is_batch = True 1681 if image_shape.ndims == 3: 1682 is_batch = False 1683 image = array_ops.expand_dims(image, 0) 1684 elif image_shape.ndims is None: 1685 is_batch = False 1686 image = array_ops.expand_dims(image, 0) 1687 image.set_shape([None] * 4) 1688 elif image_shape.ndims != 4: 1689 raise ValueError( 1690 '\'image\' (shape %s) must have either 3 or 4 dimensions.' % 1691 image_shape) 1692 1693 assert_ops = _CheckAtLeast3DImage(image, require_static=False) 1694 assert_ops += _assert(target_width > 0, ValueError, 1695 'target_width must be > 0.') 1696 assert_ops += _assert(target_height > 0, ValueError, 1697 'target_height must be > 0.') 1698 1699 image = control_flow_ops.with_dependencies(assert_ops, image) 1700 1701 def max_(x, y): 1702 if _is_tensor(x) or _is_tensor(y): 1703 return math_ops.maximum(x, y) 1704 else: 1705 return max(x, y) 1706 1707 _, height, width, _ = _ImageDimensions(image, rank=4) 1708 1709 # convert values to float, to ease divisions 1710 f_height = math_ops.cast(height, dtype=dtypes.float32) 1711 f_width = math_ops.cast(width, dtype=dtypes.float32) 1712 f_target_height = math_ops.cast(target_height, dtype=dtypes.float32) 1713 f_target_width = math_ops.cast(target_width, dtype=dtypes.float32) 1714 1715 # Find the ratio by which the image must be adjusted 1716 # to fit within the target 1717 ratio = max_(f_width / f_target_width, f_height / f_target_height) 1718 resized_height_float = f_height / ratio 1719 resized_width_float = f_width / ratio 1720 resized_height = math_ops.cast( 1721 math_ops.floor(resized_height_float), dtype=dtypes.int32) 1722 resized_width = math_ops.cast( 1723 math_ops.floor(resized_width_float), dtype=dtypes.int32) 1724 1725 padding_height = (f_target_height - resized_height_float) / 2 1726 padding_width = (f_target_width - resized_width_float) / 2 1727 f_padding_height = math_ops.floor(padding_height) 1728 f_padding_width = math_ops.floor(padding_width) 1729 p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32)) 1730 p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32)) 1731 1732 # Resize first, then pad to meet requested dimensions 1733 resized = resize_fn(image, [resized_height, resized_width]) 1734 1735 padded = pad_to_bounding_box(resized, p_height, p_width, target_height, 1736 target_width) 1737 1738 if padded.get_shape().ndims is None: 1739 raise ValueError('padded contains no shape.') 1740 1741 _ImageDimensions(padded, rank=4) 1742 1743 if not is_batch: 1744 padded = array_ops.squeeze(padded, axis=[0]) 1745 1746 return padded 1747 1748 1749@tf_export(v1=['image.resize_image_with_pad']) 1750@dispatch.add_dispatch_support 1751def resize_image_with_pad_v1(image, 1752 target_height, 1753 target_width, 1754 method=ResizeMethodV1.BILINEAR, 1755 align_corners=False): 1756 """Resizes and pads an image to a target width and height. 1757 1758 Resizes an image to a target width and height by keeping 1759 the aspect ratio the same without distortion. If the target 1760 dimensions don't match the image dimensions, the image 1761 is resized and then padded with zeroes to match requested 1762 dimensions. 1763 1764 Args: 1765 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1766 of shape `[height, width, channels]`. 1767 target_height: Target height. 1768 target_width: Target width. 1769 method: Method to use for resizing image. See `resize_images()` 1770 align_corners: bool. If True, the centers of the 4 corner pixels of the 1771 input and output tensors are aligned, preserving the values at the corner 1772 pixels. Defaults to `False`. 1773 1774 Raises: 1775 ValueError: if `target_height` or `target_width` are zero or negative. 1776 1777 Returns: 1778 Resized and padded image. 1779 If `images` was 4-D, a 4-D float Tensor of shape 1780 `[batch, new_height, new_width, channels]`. 1781 If `images` was 3-D, a 3-D float Tensor of shape 1782 `[new_height, new_width, channels]`. 1783 """ 1784 1785 def _resize_fn(im, new_size): 1786 return resize_images(im, new_size, method, align_corners=align_corners) 1787 1788 return _resize_image_with_pad_common(image, target_height, target_width, 1789 _resize_fn) 1790 1791 1792@tf_export('image.resize_with_pad', v1=[]) 1793@dispatch.add_dispatch_support 1794def resize_image_with_pad_v2(image, 1795 target_height, 1796 target_width, 1797 method=ResizeMethod.BILINEAR, 1798 antialias=False): 1799 """Resizes and pads an image to a target width and height. 1800 1801 Resizes an image to a target width and height by keeping 1802 the aspect ratio the same without distortion. If the target 1803 dimensions don't match the image dimensions, the image 1804 is resized and then padded with zeroes to match requested 1805 dimensions. 1806 1807 Args: 1808 image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 1809 of shape `[height, width, channels]`. 1810 target_height: Target height. 1811 target_width: Target width. 1812 method: Method to use for resizing image. See `image.resize()` 1813 antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'. 1814 1815 Raises: 1816 ValueError: if `target_height` or `target_width` are zero or negative. 1817 1818 Returns: 1819 Resized and padded image. 1820 If `images` was 4-D, a 4-D float Tensor of shape 1821 `[batch, new_height, new_width, channels]`. 1822 If `images` was 3-D, a 3-D float Tensor of shape 1823 `[new_height, new_width, channels]`. 1824 """ 1825 1826 def _resize_fn(im, new_size): 1827 return resize_images_v2(im, new_size, method, antialias=antialias) 1828 1829 return _resize_image_with_pad_common(image, target_height, target_width, 1830 _resize_fn) 1831 1832 1833@tf_export('image.per_image_standardization') 1834@dispatch.add_dispatch_support 1835def per_image_standardization(image): 1836 """Linearly scales each image in `image` to have mean 0 and variance 1. 1837 1838 For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`, 1839 where 1840 1841 - `mean` is the average of all values in `x` 1842 - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to 1843 protect against division by 0 when handling uniform images 1844 - `N` is the number of elements in `x` 1845 - `stddev` is the standard deviation of all values in `x` 1846 1847 Args: 1848 image: An n-D Tensor with at least 3 dimensions, the last 3 of which are the 1849 dimensions of each image. 1850 1851 Returns: 1852 A `Tensor` with the same shape as `image`. 1853 1854 Raises: 1855 ValueError: if the shape of 'image' is incompatible with this function. 1856 """ 1857 with ops.name_scope(None, 'per_image_standardization', [image]) as scope: 1858 image = ops.convert_to_tensor(image, name='image') 1859 image = _AssertAtLeast3DImage(image) 1860 1861 image = math_ops.cast(image, dtype=dtypes.float32) 1862 num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) 1863 image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) 1864 1865 # Apply a minimum normalization that protects us against uniform images. 1866 stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) 1867 min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32)) 1868 adjusted_stddev = math_ops.maximum(stddev, min_stddev) 1869 1870 image -= image_mean 1871 image = math_ops.divide(image, adjusted_stddev, name=scope) 1872 return image 1873 1874 1875@tf_export('image.random_brightness') 1876@dispatch.add_dispatch_support 1877def random_brightness(image, max_delta, seed=None): 1878 """Adjust the brightness of images by a random factor. 1879 1880 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 1881 interval `[-max_delta, max_delta)`. 1882 1883 For producing deterministic results given a `seed` value, use 1884 `tf.image.stateless_random_brightness`. Unlike using the `seed` param 1885 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 1886 same results given the same seed independent of how many times the function is 1887 called, and independent of global seed settings (e.g. tf.random.set_seed). 1888 1889 Args: 1890 image: An image or images to adjust. 1891 max_delta: float, must be non-negative. 1892 seed: A Python integer. Used to create a random seed. See 1893 `tf.compat.v1.set_random_seed` for behavior. 1894 1895 Usage Example: 1896 1897 >>> x = [[[1.0, 2.0, 3.0], 1898 ... [4.0, 5.0, 6.0]], 1899 ... [[7.0, 8.0, 9.0], 1900 ... [10.0, 11.0, 12.0]]] 1901 >>> tf.image.random_brightness(x, 0.2) 1902 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 1903 1904 Returns: 1905 The brightness-adjusted image(s). 1906 1907 Raises: 1908 ValueError: if `max_delta` is negative. 1909 """ 1910 if max_delta < 0: 1911 raise ValueError('max_delta must be non-negative.') 1912 1913 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 1914 return adjust_brightness(image, delta) 1915 1916 1917@tf_export('image.stateless_random_brightness', v1=[]) 1918@dispatch.add_dispatch_support 1919def stateless_random_brightness(image, max_delta, seed): 1920 """Adjust the brightness of images by a random factor deterministically. 1921 1922 Equivalent to `adjust_brightness()` using a `delta` randomly picked in the 1923 interval `[-max_delta, max_delta)`. 1924 1925 Guarantees the same results given the same `seed` independent of how many 1926 times the function is called, and independent of global seed settings (e.g. 1927 `tf.random.set_seed`). 1928 1929 Usage Example: 1930 1931 >>> x = [[[1.0, 2.0, 3.0], 1932 ... [4.0, 5.0, 6.0]], 1933 ... [[7.0, 8.0, 9.0], 1934 ... [10.0, 11.0, 12.0]]] 1935 >>> seed = (1, 2) 1936 >>> tf.image.stateless_random_brightness(x, 0.2, seed) 1937 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 1938 array([[[ 1.1376241, 2.1376243, 3.1376243], 1939 [ 4.1376243, 5.1376243, 6.1376243]], 1940 [[ 7.1376243, 8.137624 , 9.137624 ], 1941 [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)> 1942 1943 Args: 1944 image: An image or images to adjust. 1945 max_delta: float, must be non-negative. 1946 seed: A shape [2] Tensor, the seed to the random number generator. Must have 1947 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 1948 1949 Returns: 1950 The brightness-adjusted image(s). 1951 1952 Raises: 1953 ValueError: if `max_delta` is negative. 1954 """ 1955 if max_delta < 0: 1956 raise ValueError('max_delta must be non-negative.') 1957 1958 delta = stateless_random_ops.stateless_random_uniform( 1959 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 1960 return adjust_brightness(image, delta) 1961 1962 1963@tf_export('image.random_contrast') 1964@dispatch.add_dispatch_support 1965def random_contrast(image, lower, upper, seed=None): 1966 """Adjust the contrast of an image or images by a random factor. 1967 1968 Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly 1969 picked in the interval `[lower, upper)`. 1970 1971 For producing deterministic results given a `seed` value, use 1972 `tf.image.stateless_random_contrast`. Unlike using the `seed` param 1973 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 1974 same results given the same seed independent of how many times the function is 1975 called, and independent of global seed settings (e.g. tf.random.set_seed). 1976 1977 Args: 1978 image: An image tensor with 3 or more dimensions. 1979 lower: float. Lower bound for the random contrast factor. 1980 upper: float. Upper bound for the random contrast factor. 1981 seed: A Python integer. Used to create a random seed. See 1982 `tf.compat.v1.set_random_seed` for behavior. 1983 1984 Usage Example: 1985 1986 >>> x = [[[1.0, 2.0, 3.0], 1987 ... [4.0, 5.0, 6.0]], 1988 ... [[7.0, 8.0, 9.0], 1989 ... [10.0, 11.0, 12.0]]] 1990 >>> tf.image.random_contrast(x, 0.2, 0.5) 1991 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 1992 1993 Returns: 1994 The contrast-adjusted image(s). 1995 1996 Raises: 1997 ValueError: if `upper <= lower` or if `lower < 0`. 1998 """ 1999 if upper <= lower: 2000 raise ValueError('upper must be > lower.') 2001 2002 if lower < 0: 2003 raise ValueError('lower must be non-negative.') 2004 2005 contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) 2006 return adjust_contrast(image, contrast_factor) 2007 2008 2009@tf_export('image.stateless_random_contrast', v1=[]) 2010@dispatch.add_dispatch_support 2011def stateless_random_contrast(image, lower, upper, seed): 2012 """Adjust the contrast of images by a random factor deterministically. 2013 2014 Guarantees the same results given the same `seed` independent of how many 2015 times the function is called, and independent of global seed settings (e.g. 2016 `tf.random.set_seed`). 2017 2018 Args: 2019 image: An image tensor with 3 or more dimensions. 2020 lower: float. Lower bound for the random contrast factor. 2021 upper: float. Upper bound for the random contrast factor. 2022 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2023 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2024 2025 Usage Example: 2026 2027 >>> x = [[[1.0, 2.0, 3.0], 2028 ... [4.0, 5.0, 6.0]], 2029 ... [[7.0, 8.0, 9.0], 2030 ... [10.0, 11.0, 12.0]]] 2031 >>> seed = (1, 2) 2032 >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed) 2033 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2034 array([[[3.4605184, 4.4605184, 5.4605184], 2035 [4.820173 , 5.820173 , 6.820173 ]], 2036 [[6.179827 , 7.179827 , 8.179828 ], 2037 [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)> 2038 2039 Returns: 2040 The contrast-adjusted image(s). 2041 2042 Raises: 2043 ValueError: if `upper <= lower` or if `lower < 0`. 2044 """ 2045 if upper <= lower: 2046 raise ValueError('upper must be > lower.') 2047 2048 if lower < 0: 2049 raise ValueError('lower must be non-negative.') 2050 2051 contrast_factor = stateless_random_ops.stateless_random_uniform( 2052 shape=[], minval=lower, maxval=upper, seed=seed) 2053 return adjust_contrast(image, contrast_factor) 2054 2055 2056@tf_export('image.adjust_brightness') 2057@dispatch.add_dispatch_support 2058def adjust_brightness(image, delta): 2059 """Adjust the brightness of RGB or Grayscale images. 2060 2061 This is a convenience method that converts RGB images to float 2062 representation, adjusts their brightness, and then converts them back to the 2063 original data type. If several adjustments are chained, it is advisable to 2064 minimize the number of redundant conversions. 2065 2066 The value `delta` is added to all components of the tensor `image`. `image` is 2067 converted to `float` and scaled appropriately if it is in fixed-point 2068 representation, and `delta` is converted to the same data type. For regular 2069 images, `delta` should be in the range `(-1,1)`, as it is added to the image 2070 in floating point representation, where pixel values are in the `[0,1)` range. 2071 2072 Usage Example: 2073 2074 >>> x = [[[1.0, 2.0, 3.0], 2075 ... [4.0, 5.0, 6.0]], 2076 ... [[7.0, 8.0, 9.0], 2077 ... [10.0, 11.0, 12.0]]] 2078 >>> tf.image.adjust_brightness(x, delta=0.1) 2079 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2080 array([[[ 1.1, 2.1, 3.1], 2081 [ 4.1, 5.1, 6.1]], 2082 [[ 7.1, 8.1, 9.1], 2083 [10.1, 11.1, 12.1]]], dtype=float32)> 2084 2085 Args: 2086 image: RGB image or images to adjust. 2087 delta: A scalar. Amount to add to the pixel values. 2088 2089 Returns: 2090 A brightness-adjusted tensor of the same shape and type as `image`. 2091 """ 2092 with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name: 2093 image = ops.convert_to_tensor(image, name='image') 2094 # Remember original dtype to so we can convert back if needed 2095 orig_dtype = image.dtype 2096 2097 if orig_dtype in [dtypes.float16, dtypes.float32]: 2098 flt_image = image 2099 else: 2100 flt_image = convert_image_dtype(image, dtypes.float32) 2101 2102 adjusted = math_ops.add( 2103 flt_image, math_ops.cast(delta, flt_image.dtype), name=name) 2104 2105 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2106 2107 2108@tf_export('image.adjust_contrast') 2109@dispatch.add_dispatch_support 2110def adjust_contrast(images, contrast_factor): 2111 """Adjust contrast of RGB or grayscale images. 2112 2113 This is a convenience method that converts RGB images to float 2114 representation, adjusts their contrast, and then converts them back to the 2115 original data type. If several adjustments are chained, it is advisable to 2116 minimize the number of redundant conversions. 2117 2118 `images` is a tensor of at least 3 dimensions. The last 3 dimensions are 2119 interpreted as `[height, width, channels]`. The other dimensions only 2120 represent a collection of images, such as `[batch, height, width, channels].` 2121 2122 Contrast is adjusted independently for each channel of each image. 2123 2124 For each channel, this Op computes the mean of the image pixels in the 2125 channel and then adjusts each component `x` of each pixel to 2126 `(x - mean) * contrast_factor + mean`. 2127 2128 Usage Example: 2129 2130 >>> x = [[[1.0, 2.0, 3.0], 2131 ... [4.0, 5.0, 6.0]], 2132 ... [[7.0, 8.0, 9.0], 2133 ... [10.0, 11.0, 12.0]]] 2134 >>> tf.image.adjust_contrast(x, 2) 2135 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2136 array([[[-3.5, -2.5, -1.5], 2137 [ 2.5, 3.5, 4.5]], 2138 [[ 8.5, 9.5, 10.5], 2139 [14.5, 15.5, 16.5]]], dtype=float32)> 2140 2141 Args: 2142 images: Images to adjust. At least 3-D. 2143 contrast_factor: A float multiplier for adjusting contrast. 2144 2145 Returns: 2146 The contrast-adjusted image or images. 2147 """ 2148 with ops.name_scope(None, 'adjust_contrast', 2149 [images, contrast_factor]) as name: 2150 images = ops.convert_to_tensor(images, name='images') 2151 # Remember original dtype to so we can convert back if needed 2152 orig_dtype = images.dtype 2153 2154 if orig_dtype in (dtypes.float16, dtypes.float32): 2155 flt_images = images 2156 else: 2157 flt_images = convert_image_dtype(images, dtypes.float32) 2158 2159 adjusted = gen_image_ops.adjust_contrastv2( 2160 flt_images, contrast_factor=contrast_factor, name=name) 2161 2162 return convert_image_dtype(adjusted, orig_dtype, saturate=True) 2163 2164 2165@tf_export('image.adjust_gamma') 2166@dispatch.add_dispatch_support 2167def adjust_gamma(image, gamma=1, gain=1): 2168 """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction). 2169 2170 on the input image. 2171 2172 Also known as Power Law Transform. This function converts the 2173 input images at first to float representation, then transforms them 2174 pixelwise according to the equation `Out = gain * In**gamma`, 2175 and then converts the back to the original data type. 2176 2177 Usage Example: 2178 2179 >>> x = [[[1.0, 2.0, 3.0], 2180 ... [4.0, 5.0, 6.0]], 2181 ... [[7.0, 8.0, 9.0], 2182 ... [10.0, 11.0, 12.0]]] 2183 >>> tf.image.adjust_gamma(x, 0.2) 2184 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2185 array([[[1. , 1.1486983, 1.2457309], 2186 [1.319508 , 1.3797297, 1.4309691]], 2187 [[1.4757731, 1.5157166, 1.5518456], 2188 [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)> 2189 2190 Args: 2191 image : RGB image or images to adjust. 2192 gamma : A scalar or tensor. Non-negative real number. 2193 gain : A scalar or tensor. The constant multiplier. 2194 2195 Returns: 2196 A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. 2197 2198 Raises: 2199 ValueError: If gamma is negative. 2200 Notes: 2201 For gamma greater than 1, the histogram will shift towards left and 2202 the output image will be darker than the input image. 2203 For gamma less than 1, the histogram will shift towards right and 2204 the output image will be brighter than the input image. 2205 References: 2206 [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction) 2207 """ 2208 2209 with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: 2210 image = ops.convert_to_tensor(image, name='image') 2211 # Remember original dtype to so we can convert back if needed 2212 orig_dtype = image.dtype 2213 2214 if orig_dtype in [dtypes.float16, dtypes.float32]: 2215 flt_image = image 2216 else: 2217 flt_image = convert_image_dtype(image, dtypes.float32) 2218 2219 assert_op = _assert(gamma >= 0, ValueError, 2220 'Gamma should be a non-negative real number.') 2221 if assert_op: 2222 gamma = control_flow_ops.with_dependencies(assert_op, gamma) 2223 2224 # According to the definition of gamma correction. 2225 adjusted_img = gain * flt_image**gamma 2226 2227 return convert_image_dtype(adjusted_img, orig_dtype, saturate=True) 2228 2229 2230@tf_export('image.convert_image_dtype') 2231@dispatch.add_dispatch_support 2232def convert_image_dtype(image, dtype, saturate=False, name=None): 2233 """Convert `image` to `dtype`, scaling its values if needed. 2234 2235 The operation supports data types (for `image` and `dtype`) of 2236 `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, 2237 `float16`, `float32`, `float64`, `bfloat16`. 2238 2239 Images that are represented using floating point values are expected to have 2240 values in the range [0,1). Image data stored in integer data types are 2241 expected to have values in the range `[0,MAX]`, where `MAX` is the largest 2242 positive representable number for the data type. 2243 2244 This op converts between data types, scaling the values appropriately before 2245 casting. 2246 2247 Usage Example: 2248 2249 >>> x = [[[1, 2, 3], [4, 5, 6]], 2250 ... [[7, 8, 9], [10, 11, 12]]] 2251 >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8) 2252 >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False) 2253 <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy= 2254 array([[[0.00787, 0.01575, 0.02362], 2255 [0.0315 , 0.03937, 0.04724]], 2256 [[0.0551 , 0.063 , 0.07086], 2257 [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)> 2258 2259 Converting integer types to floating point types returns normalized floating 2260 point values in the range [0, 1); the values are normalized by the `MAX` value 2261 of the input dtype. Consider the following two examples: 2262 2263 >>> a = [[[1], [2]], [[3], [4]]] 2264 >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8) 2265 >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32) 2266 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2267 array([[[0.00787402], 2268 [0.01574803]], 2269 [[0.02362205], 2270 [0.03149606]]], dtype=float32)> 2271 2272 >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32) 2273 >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32) 2274 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2275 array([[[4.6566129e-10], 2276 [9.3132257e-10]], 2277 [[1.3969839e-09], 2278 [1.8626451e-09]]], dtype=float32)> 2279 2280 Despite having identical values of `a` and output dtype of `float32`, the 2281 outputs differ due to the different input dtypes (`int8` vs. `int32`). This 2282 is, again, because the values are normalized by the `MAX` value of the input 2283 dtype. 2284 2285 Note that converting floating point values to integer type may lose precision. 2286 In the example below, an image tensor `b` of dtype `float32` is converted to 2287 `int8` and back to `float32`. The final output, however, is different from 2288 the original input `b` due to precision loss. 2289 2290 >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]] 2291 >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32) 2292 >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8) 2293 >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32) 2294 <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= 2295 array([[[0.11811024], 2296 [0.33858266]], 2297 [[0.5590551 ], 2298 [0.77952754]]], dtype=float32)> 2299 2300 Scaling up from an integer type (input dtype) to another integer type (output 2301 dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting 2302 back and forth should result in no change. For example, as shown below, the 2303 `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767) 2304 but, when scaled back, we get the same, original values of `c`. 2305 2306 >>> c = [[[1], [2]], [[127], [127]]] 2307 >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8) 2308 >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16) 2309 >>> print(c_int16) 2310 tf.Tensor( 2311 [[[ 256] 2312 [ 512]] 2313 [[32512] 2314 [32512]]], shape=(2, 2, 1), dtype=int16) 2315 >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8) 2316 >>> print(c_int8_back) 2317 tf.Tensor( 2318 [[[ 1] 2319 [ 2]] 2320 [[127] 2321 [127]]], shape=(2, 2, 1), dtype=int8) 2322 2323 Scaling down from an integer type to another integer type can be a lossy 2324 conversion. Notice in the example below that converting `int16` to `uint8` and 2325 back to `int16` has lost precision. 2326 2327 >>> d = [[[1000], [2000]], [[3000], [4000]]] 2328 >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16) 2329 >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8) 2330 >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16) 2331 >>> print(d_int16_back) 2332 tf.Tensor( 2333 [[[ 896] 2334 [1920]] 2335 [[2944] 2336 [3968]]], shape=(2, 2, 1), dtype=int16) 2337 2338 Note that converting from floating point inputs to integer types may lead to 2339 over/underflow problems. Set saturate to `True` to avoid such problem in 2340 problematic conversions. If enabled, saturation will clip the output into the 2341 allowed range before performing a potentially dangerous cast (and only before 2342 performing such a cast, i.e., when casting from a floating point to an integer 2343 type, and when casting from a signed to an unsigned type; `saturate` has no 2344 effect on casts between floats, or on casts that increase the type's range). 2345 2346 Args: 2347 image: An image. 2348 dtype: A `DType` to convert `image` to. 2349 saturate: If `True`, clip the input before casting (if necessary). 2350 name: A name for this operation (optional). 2351 2352 Returns: 2353 `image`, converted to `dtype`. 2354 2355 Raises: 2356 AttributeError: Raises an attribute error when dtype is neither 2357 float nor integer 2358 """ 2359 image = ops.convert_to_tensor(image, name='image') 2360 dtype = dtypes.as_dtype(dtype) 2361 if not dtype.is_floating and not dtype.is_integer: 2362 raise AttributeError('dtype must be either floating point or integer') 2363 if dtype == image.dtype: 2364 return array_ops.identity(image, name=name) 2365 2366 with ops.name_scope(name, 'convert_image', [image]) as name: 2367 # Both integer: use integer multiplication in the larger range 2368 if image.dtype.is_integer and dtype.is_integer: 2369 scale_in = image.dtype.max 2370 scale_out = dtype.max 2371 if scale_in > scale_out: 2372 # Scaling down, scale first, then cast. The scaling factor will 2373 # cause in.max to be mapped to above out.max but below out.max+1, 2374 # so that the output is safely in the supported range. 2375 scale = (scale_in + 1) // (scale_out + 1) 2376 scaled = math_ops.floordiv(image, scale) 2377 2378 if saturate: 2379 return math_ops.saturate_cast(scaled, dtype, name=name) 2380 else: 2381 return math_ops.cast(scaled, dtype, name=name) 2382 else: 2383 # Scaling up, cast first, then scale. The scale will not map in.max to 2384 # out.max, but converting back and forth should result in no change. 2385 if saturate: 2386 cast = math_ops.saturate_cast(image, dtype) 2387 else: 2388 cast = math_ops.cast(image, dtype) 2389 scale = (scale_out + 1) // (scale_in + 1) 2390 return math_ops.multiply(cast, scale, name=name) 2391 elif image.dtype.is_floating and dtype.is_floating: 2392 # Both float: Just cast, no possible overflows in the allowed ranges. 2393 # Note: We're ignoring float overflows. If your image dynamic range 2394 # exceeds float range, you're on your own. 2395 return math_ops.cast(image, dtype, name=name) 2396 else: 2397 if image.dtype.is_integer: 2398 # Converting to float: first cast, then scale. No saturation possible. 2399 cast = math_ops.cast(image, dtype) 2400 scale = 1. / image.dtype.max 2401 return math_ops.multiply(cast, scale, name=name) 2402 else: 2403 # Converting from float: first scale, then cast 2404 scale = dtype.max + 0.5 # avoid rounding problems in the cast 2405 scaled = math_ops.multiply(image, scale) 2406 if saturate: 2407 return math_ops.saturate_cast(scaled, dtype, name=name) 2408 else: 2409 return math_ops.cast(scaled, dtype, name=name) 2410 2411 2412@tf_export('image.rgb_to_grayscale') 2413@dispatch.add_dispatch_support 2414def rgb_to_grayscale(images, name=None): 2415 """Converts one or more images from RGB to Grayscale. 2416 2417 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2418 last dimension of the output is 1, containing the Grayscale value of the 2419 pixels. 2420 2421 >>> original = tf.constant([[[1.0, 2.0, 3.0]]]) 2422 >>> converted = tf.image.rgb_to_grayscale(original) 2423 >>> print(converted.numpy()) 2424 [[[1.81...]]] 2425 2426 Args: 2427 images: The RGB tensor to convert. The last dimension must have size 3 and 2428 should contain RGB values. 2429 name: A name for the operation (optional). 2430 2431 Returns: 2432 The converted grayscale image(s). 2433 """ 2434 with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name: 2435 images = ops.convert_to_tensor(images, name='images') 2436 # Remember original dtype to so we can convert back if needed 2437 orig_dtype = images.dtype 2438 flt_image = convert_image_dtype(images, dtypes.float32) 2439 2440 # Reference for converting between RGB and grayscale. 2441 # https://en.wikipedia.org/wiki/Luma_%28video%29 2442 rgb_weights = [0.2989, 0.5870, 0.1140] 2443 gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1]) 2444 gray_float = array_ops.expand_dims(gray_float, -1) 2445 return convert_image_dtype(gray_float, orig_dtype, name=name) 2446 2447 2448@tf_export('image.grayscale_to_rgb') 2449@dispatch.add_dispatch_support 2450def grayscale_to_rgb(images, name=None): 2451 """Converts one or more images from Grayscale to RGB. 2452 2453 Outputs a tensor of the same `DType` and rank as `images`. The size of the 2454 last dimension of the output is 3, containing the RGB value of the pixels. 2455 The input images' last dimension must be size 1. 2456 2457 >>> original = tf.constant([[[1.0], [2.0], [3.0]]]) 2458 >>> converted = tf.image.grayscale_to_rgb(original) 2459 >>> print(converted.numpy()) 2460 [[[1. 1. 1.] 2461 [2. 2. 2.] 2462 [3. 3. 3.]]] 2463 2464 Args: 2465 images: The Grayscale tensor to convert. The last dimension must be size 1. 2466 name: A name for the operation (optional). 2467 2468 Returns: 2469 The converted grayscale image(s). 2470 """ 2471 with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name: 2472 images = _AssertGrayscaleImage(images) 2473 2474 images = ops.convert_to_tensor(images, name='images') 2475 rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) 2476 shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] + 2477 [array_ops.expand_dims(3, 0)]) 2478 multiples = array_ops.concat(shape_list, 0) 2479 rgb = array_ops.tile(images, multiples, name=name) 2480 rgb.set_shape(images.get_shape()[:-1].concatenate([3])) 2481 return rgb 2482 2483 2484# pylint: disable=invalid-name 2485@tf_export('image.random_hue') 2486@dispatch.add_dispatch_support 2487def random_hue(image, max_delta, seed=None): 2488 """Adjust the hue of RGB images by a random factor. 2489 2490 Equivalent to `adjust_hue()` but uses a `delta` randomly 2491 picked in the interval `[-max_delta, max_delta)`. 2492 2493 `max_delta` must be in the interval `[0, 0.5]`. 2494 2495 Usage Example: 2496 2497 >>> x = [[[1.0, 2.0, 3.0], 2498 ... [4.0, 5.0, 6.0]], 2499 ... [[7.0, 8.0, 9.0], 2500 ... [10.0, 11.0, 12.0]]] 2501 >>> tf.image.random_hue(x, 0.2) 2502 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2503 2504 For producing deterministic results given a `seed` value, use 2505 `tf.image.stateless_random_hue`. Unlike using the `seed` param with 2506 `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same 2507 results given the same seed independent of how many times the function is 2508 called, and independent of global seed settings (e.g. tf.random.set_seed). 2509 2510 Args: 2511 image: RGB image or images. The size of the last dimension must be 3. 2512 max_delta: float. The maximum value for the random delta. 2513 seed: An operation-specific seed. It will be used in conjunction with the 2514 graph-level seed to determine the real seeds that will be used in this 2515 operation. Please see the documentation of set_random_seed for its 2516 interaction with the graph-level random seed. 2517 2518 Returns: 2519 Adjusted image(s), same shape and DType as `image`. 2520 2521 Raises: 2522 ValueError: if `max_delta` is invalid. 2523 """ 2524 if max_delta > 0.5: 2525 raise ValueError('max_delta must be <= 0.5.') 2526 2527 if max_delta < 0: 2528 raise ValueError('max_delta must be non-negative.') 2529 2530 delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) 2531 return adjust_hue(image, delta) 2532 2533 2534@tf_export('image.stateless_random_hue', v1=[]) 2535@dispatch.add_dispatch_support 2536def stateless_random_hue(image, max_delta, seed): 2537 """Adjust the hue of RGB images by a random factor deterministically. 2538 2539 Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the 2540 interval `[-max_delta, max_delta)`. 2541 2542 Guarantees the same results given the same `seed` independent of how many 2543 times the function is called, and independent of global seed settings (e.g. 2544 `tf.random.set_seed`). 2545 2546 `max_delta` must be in the interval `[0, 0.5]`. 2547 2548 Usage Example: 2549 2550 >>> x = [[[1.0, 2.0, 3.0], 2551 ... [4.0, 5.0, 6.0]], 2552 ... [[7.0, 8.0, 9.0], 2553 ... [10.0, 11.0, 12.0]]] 2554 >>> seed = (1, 2) 2555 >>> tf.image.stateless_random_hue(x, 0.2, seed) 2556 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2557 array([[[ 1.6514902, 1. , 3. ], 2558 [ 4.65149 , 4. , 6. ]], 2559 [[ 7.65149 , 7. , 9. ], 2560 [10.65149 , 10. , 12. ]]], dtype=float32)> 2561 2562 Args: 2563 image: RGB image or images. The size of the last dimension must be 3. 2564 max_delta: float. The maximum value for the random delta. 2565 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2566 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2567 2568 Returns: 2569 Adjusted image(s), same shape and DType as `image`. 2570 2571 Raises: 2572 ValueError: if `max_delta` is invalid. 2573 """ 2574 if max_delta > 0.5: 2575 raise ValueError('max_delta must be <= 0.5.') 2576 2577 if max_delta < 0: 2578 raise ValueError('max_delta must be non-negative.') 2579 2580 delta = stateless_random_ops.stateless_random_uniform( 2581 shape=[], minval=-max_delta, maxval=max_delta, seed=seed) 2582 return adjust_hue(image, delta) 2583 2584 2585@tf_export('image.adjust_hue') 2586@dispatch.add_dispatch_support 2587def adjust_hue(image, delta, name=None): 2588 """Adjust hue of RGB images. 2589 2590 This is a convenience method that converts an RGB image to float 2591 representation, converts it to HSV, adds an offset to the 2592 hue channel, converts back to RGB and then back to the original 2593 data type. If several adjustments are chained it is advisable to minimize 2594 the number of redundant conversions. 2595 2596 `image` is an RGB image. The image hue is adjusted by converting the 2597 image(s) to HSV and rotating the hue channel (H) by 2598 `delta`. The image is then converted back to RGB. 2599 2600 `delta` must be in the interval `[-1, 1]`. 2601 2602 Usage Example: 2603 2604 >>> x = [[[1.0, 2.0, 3.0], 2605 ... [4.0, 5.0, 6.0]], 2606 ... [[7.0, 8.0, 9.0], 2607 ... [10.0, 11.0, 12.0]]] 2608 >>> tf.image.adjust_hue(x, 0.2) 2609 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2610 array([[[ 2.3999996, 1. , 3. ], 2611 [ 5.3999996, 4. , 6. ]], 2612 [[ 8.4 , 7. , 9. ], 2613 [11.4 , 10. , 12. ]]], dtype=float32)> 2614 2615 Args: 2616 image: RGB image or images. The size of the last dimension must be 3. 2617 delta: float. How much to add to the hue channel. 2618 name: A name for this operation (optional). 2619 2620 Returns: 2621 Adjusted image(s), same shape and DType as `image`. 2622 2623 Usage Example: 2624 2625 >>> image = [[[1, 2, 3], [4, 5, 6]], 2626 ... [[7, 8, 9], [10, 11, 12]], 2627 ... [[13, 14, 15], [16, 17, 18]]] 2628 >>> image = tf.constant(image) 2629 >>> tf.image.adjust_hue(image, 0.2) 2630 <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy= 2631 array([[[ 2, 1, 3], 2632 [ 5, 4, 6]], 2633 [[ 8, 7, 9], 2634 [11, 10, 12]], 2635 [[14, 13, 15], 2636 [17, 16, 18]]], dtype=int32)> 2637 """ 2638 with ops.name_scope(name, 'adjust_hue', [image]) as name: 2639 image = ops.convert_to_tensor(image, name='image') 2640 # Remember original dtype to so we can convert back if needed 2641 orig_dtype = image.dtype 2642 if orig_dtype in (dtypes.float16, dtypes.float32): 2643 flt_image = image 2644 else: 2645 flt_image = convert_image_dtype(image, dtypes.float32) 2646 2647 rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) 2648 2649 return convert_image_dtype(rgb_altered, orig_dtype) 2650 2651 2652# pylint: disable=invalid-name 2653@tf_export('image.random_jpeg_quality') 2654@dispatch.add_dispatch_support 2655def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None): 2656 """Randomly changes jpeg encoding quality for inducing jpeg noise. 2657 2658 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2659 `max_jpeg_quality`. 2660 `max_jpeg_quality` must be in the interval `[0, 100]`. 2661 2662 Usage Example: 2663 2664 >>> x = [[[1.0, 2.0, 3.0], 2665 ... [4.0, 5.0, 6.0]], 2666 ... [[7.0, 8.0, 9.0], 2667 ... [10.0, 11.0, 12.0]]] 2668 >>> tf.image.random_jpeg_quality(x, 75, 95) 2669 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> 2670 2671 For producing deterministic results given a `seed` value, use 2672 `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param 2673 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2674 same results given the same seed independent of how many times the function is 2675 called, and independent of global seed settings (e.g. tf.random.set_seed). 2676 2677 Args: 2678 image: 3D image. Size of the last dimension must be 1 or 3. 2679 min_jpeg_quality: Minimum jpeg encoding quality to use. 2680 max_jpeg_quality: Maximum jpeg encoding quality to use. 2681 seed: An operation-specific seed. It will be used in conjunction with the 2682 graph-level seed to determine the real seeds that will be used in this 2683 operation. Please see the documentation of set_random_seed for its 2684 interaction with the graph-level random seed. 2685 2686 Returns: 2687 Adjusted image(s), same shape and DType as `image`. 2688 2689 Raises: 2690 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2691 """ 2692 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2693 max_jpeg_quality > 100): 2694 raise ValueError('jpeg encoding range must be between 0 and 100.') 2695 2696 if min_jpeg_quality >= max_jpeg_quality: 2697 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2698 2699 jpeg_quality = random_ops.random_uniform([], 2700 min_jpeg_quality, 2701 max_jpeg_quality, 2702 seed=seed, 2703 dtype=dtypes.int32) 2704 return adjust_jpeg_quality(image, jpeg_quality) 2705 2706 2707@tf_export('image.stateless_random_jpeg_quality', v1=[]) 2708@dispatch.add_dispatch_support 2709def stateless_random_jpeg_quality(image, 2710 min_jpeg_quality, 2711 max_jpeg_quality, 2712 seed): 2713 """Deterministically radomize jpeg encoding quality for inducing jpeg noise. 2714 2715 Guarantees the same results given the same `seed` independent of how many 2716 times the function is called, and independent of global seed settings (e.g. 2717 `tf.random.set_seed`). 2718 2719 `min_jpeg_quality` must be in the interval `[0, 100]` and less than 2720 `max_jpeg_quality`. 2721 `max_jpeg_quality` must be in the interval `[0, 100]`. 2722 2723 Usage Example: 2724 2725 >>> x = [[[1, 2, 3], 2726 ... [4, 5, 6]], 2727 ... [[7, 8, 9], 2728 ... [10, 11, 12]]] 2729 >>> x_uint8 = tf.cast(x, tf.uint8) 2730 >>> seed = (1, 2) 2731 >>> tf.image.stateless_random_jpeg_quality(x_uint8, 75, 95, seed) 2732 <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy= 2733 array([[[ 0, 4, 5], 2734 [ 1, 5, 6]], 2735 [[ 5, 9, 10], 2736 [ 5, 9, 10]]], dtype=uint8)> 2737 2738 Args: 2739 image: 3D image. Size of the last dimension must be 1 or 3. 2740 min_jpeg_quality: Minimum jpeg encoding quality to use. 2741 max_jpeg_quality: Maximum jpeg encoding quality to use. 2742 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2743 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2744 2745 Returns: 2746 Adjusted image(s), same shape and DType as `image`. 2747 2748 Raises: 2749 ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. 2750 """ 2751 if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or 2752 max_jpeg_quality > 100): 2753 raise ValueError('jpeg encoding range must be between 0 and 100.') 2754 2755 if min_jpeg_quality >= max_jpeg_quality: 2756 raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') 2757 2758 jpeg_quality = stateless_random_ops.stateless_random_uniform( 2759 shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed, 2760 dtype=dtypes.int32) 2761 return adjust_jpeg_quality(image, jpeg_quality) 2762 2763 2764@tf_export('image.adjust_jpeg_quality') 2765@dispatch.add_dispatch_support 2766def adjust_jpeg_quality(image, jpeg_quality, name=None): 2767 """Adjust jpeg encoding quality of an image. 2768 2769 This is a convenience method that converts an image to uint8 representation, 2770 encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back 2771 to the original data type. 2772 2773 `jpeg_quality` must be in the interval `[0, 100]`. 2774 2775 Usage Example: 2776 2777 >>> x = [[[1.0, 2.0, 3.0], 2778 ... [4.0, 5.0, 6.0]], 2779 ... [[7.0, 8.0, 9.0], 2780 ... [10.0, 11.0, 12.0]]] 2781 >>> tf.image.adjust_jpeg_quality(x, 75) 2782 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2783 array([[[1., 1., 1.], 2784 [1., 1., 1.]], 2785 [[1., 1., 1.], 2786 [1., 1., 1.]]], dtype=float32)> 2787 2788 Args: 2789 image: 3D image. The size of the last dimension must be None, 1 or 3. 2790 jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality. 2791 name: A name for this operation (optional). 2792 2793 Returns: 2794 Adjusted image, same shape and DType as `image`. 2795 2796 Raises: 2797 InvalidArgumentError: quality must be in [0,100] 2798 InvalidArgumentError: image must have 1 or 3 channels 2799 """ 2800 with ops.name_scope(name, 'adjust_jpeg_quality', [image]): 2801 image = ops.convert_to_tensor(image, name='image') 2802 channels = image.shape.as_list()[-1] 2803 # Remember original dtype to so we can convert back if needed 2804 orig_dtype = image.dtype 2805 image = convert_image_dtype(image, dtypes.uint8, saturate=True) 2806 if not _is_tensor(jpeg_quality): 2807 # If jpeg_quality is a int (not tensor). 2808 jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32) 2809 image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality) 2810 2811 image = gen_image_ops.decode_jpeg(image, channels=channels) 2812 return convert_image_dtype(image, orig_dtype, saturate=True) 2813 2814 2815@tf_export('image.random_saturation') 2816@dispatch.add_dispatch_support 2817def random_saturation(image, lower, upper, seed=None): 2818 """Adjust the saturation of RGB images by a random factor. 2819 2820 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 2821 picked in the interval `[lower, upper)`. 2822 2823 Usage Example: 2824 2825 >>> x = [[[1.0, 2.0, 3.0], 2826 ... [4.0, 5.0, 6.0]], 2827 ... [[7.0, 8.0, 9.0], 2828 ... [10.0, 11.0, 12.0]]] 2829 >>> tf.image.random_saturation(x, 5, 10) 2830 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2831 array([[[ 0. , 1.5, 3. ], 2832 [ 0. , 3. , 6. ]], 2833 [[ 0. , 4.5, 9. ], 2834 [ 0. , 6. , 12. ]]], dtype=float32)> 2835 2836 For producing deterministic results given a `seed` value, use 2837 `tf.image.stateless_random_saturation`. Unlike using the `seed` param 2838 with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the 2839 same results given the same seed independent of how many times the function is 2840 called, and independent of global seed settings (e.g. tf.random.set_seed). 2841 2842 Args: 2843 image: RGB image or images. The size of the last dimension must be 3. 2844 lower: float. Lower bound for the random saturation factor. 2845 upper: float. Upper bound for the random saturation factor. 2846 seed: An operation-specific seed. It will be used in conjunction with the 2847 graph-level seed to determine the real seeds that will be used in this 2848 operation. Please see the documentation of set_random_seed for its 2849 interaction with the graph-level random seed. 2850 2851 Returns: 2852 Adjusted image(s), same shape and DType as `image`. 2853 2854 Raises: 2855 ValueError: if `upper <= lower` or if `lower < 0`. 2856 """ 2857 if upper <= lower: 2858 raise ValueError('upper must be > lower.') 2859 2860 if lower < 0: 2861 raise ValueError('lower must be non-negative.') 2862 2863 saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) 2864 return adjust_saturation(image, saturation_factor) 2865 2866 2867@tf_export('image.stateless_random_saturation', v1=[]) 2868@dispatch.add_dispatch_support 2869def stateless_random_saturation(image, lower, upper, seed=None): 2870 """Adjust the saturation of RGB images by a random factor deterministically. 2871 2872 Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly 2873 picked in the interval `[lower, upper)`. 2874 2875 Guarantees the same results given the same `seed` independent of how many 2876 times the function is called, and independent of global seed settings (e.g. 2877 `tf.random.set_seed`). 2878 2879 Usage Example: 2880 2881 >>> x = [[[1.0, 2.0, 3.0], 2882 ... [4.0, 5.0, 6.0]], 2883 ... [[7.0, 8.0, 9.0], 2884 ... [10.0, 11.0, 12.0]]] 2885 >>> seed = (1, 2) 2886 >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed) 2887 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2888 array([[[ 1.1559395, 2.0779698, 3. ], 2889 [ 4.1559396, 5.07797 , 6. ]], 2890 [[ 7.1559396, 8.07797 , 9. ], 2891 [10.155939 , 11.07797 , 12. ]]], dtype=float32)> 2892 2893 Args: 2894 image: RGB image or images. The size of the last dimension must be 3. 2895 lower: float. Lower bound for the random saturation factor. 2896 upper: float. Upper bound for the random saturation factor. 2897 seed: A shape [2] Tensor, the seed to the random number generator. Must have 2898 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 2899 2900 Returns: 2901 Adjusted image(s), same shape and DType as `image`. 2902 2903 Raises: 2904 ValueError: if `upper <= lower` or if `lower < 0`. 2905 """ 2906 if upper <= lower: 2907 raise ValueError('upper must be > lower.') 2908 2909 if lower < 0: 2910 raise ValueError('lower must be non-negative.') 2911 2912 saturation_factor = stateless_random_ops.stateless_random_uniform( 2913 shape=[], minval=lower, maxval=upper, seed=seed) 2914 return adjust_saturation(image, saturation_factor) 2915 2916 2917@tf_export('image.adjust_saturation') 2918@dispatch.add_dispatch_support 2919def adjust_saturation(image, saturation_factor, name=None): 2920 """Adjust saturation of RGB images. 2921 2922 This is a convenience method that converts RGB images to float 2923 representation, converts them to HSV, adds an offset to the 2924 saturation channel, converts back to RGB and then back to the original 2925 data type. If several adjustments are chained it is advisable to minimize 2926 the number of redundant conversions. 2927 2928 `image` is an RGB image or images. The image saturation is adjusted by 2929 converting the images to HSV and multiplying the saturation (S) channel by 2930 `saturation_factor` and clipping. The images are then converted back to RGB. 2931 2932 Usage Example: 2933 2934 >>> x = [[[1.0, 2.0, 3.0], 2935 ... [4.0, 5.0, 6.0]], 2936 ... [[7.0, 8.0, 9.0], 2937 ... [10.0, 11.0, 12.0]]] 2938 >>> tf.image.adjust_saturation(x, 0.5) 2939 <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= 2940 array([[[ 2. , 2.5, 3. ], 2941 [ 5. , 5.5, 6. ]], 2942 [[ 8. , 8.5, 9. ], 2943 [11. , 11.5, 12. ]]], dtype=float32)> 2944 2945 Args: 2946 image: RGB image or images. The size of the last dimension must be 3. 2947 saturation_factor: float. Factor to multiply the saturation by. 2948 name: A name for this operation (optional). 2949 2950 Returns: 2951 Adjusted image(s), same shape and DType as `image`. 2952 2953 Raises: 2954 InvalidArgumentError: input must have 3 channels 2955 """ 2956 with ops.name_scope(name, 'adjust_saturation', [image]) as name: 2957 image = ops.convert_to_tensor(image, name='image') 2958 # Remember original dtype to so we can convert back if needed 2959 orig_dtype = image.dtype 2960 if orig_dtype in (dtypes.float16, dtypes.float32): 2961 flt_image = image 2962 else: 2963 flt_image = convert_image_dtype(image, dtypes.float32) 2964 2965 adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor) 2966 2967 return convert_image_dtype(adjusted, orig_dtype) 2968 2969 2970@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg']) 2971def is_jpeg(contents, name=None): 2972 r"""Convenience function to check if the 'contents' encodes a JPEG image. 2973 2974 Args: 2975 contents: 0-D `string`. The encoded image bytes. 2976 name: A name for the operation (optional) 2977 2978 Returns: 2979 A scalar boolean tensor indicating if 'contents' may be a JPEG image. 2980 is_jpeg is susceptible to false positives. 2981 """ 2982 # Normal JPEGs start with \xff\xd8\xff\xe0 2983 # JPEG with EXIF starts with \xff\xd8\xff\xe1 2984 # Use \xff\xd8\xff to cover both. 2985 with ops.name_scope(name, 'is_jpeg'): 2986 substr = string_ops.substr(contents, 0, 3) 2987 return math_ops.equal(substr, b'\xff\xd8\xff', name=name) 2988 2989 2990def _is_png(contents, name=None): 2991 r"""Convenience function to check if the 'contents' encodes a PNG image. 2992 2993 Args: 2994 contents: 0-D `string`. The encoded image bytes. 2995 name: A name for the operation (optional) 2996 2997 Returns: 2998 A scalar boolean tensor indicating if 'contents' may be a PNG image. 2999 is_png is susceptible to false positives. 3000 """ 3001 with ops.name_scope(name, 'is_png'): 3002 substr = string_ops.substr(contents, 0, 3) 3003 return math_ops.equal(substr, b'\211PN', name=name) 3004 3005 3006tf_export( 3007 'io.decode_and_crop_jpeg', 3008 'image.decode_and_crop_jpeg', 3009 v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])( 3010 dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg)) 3011 3012tf_export( 3013 'io.decode_bmp', 3014 'image.decode_bmp', 3015 v1=['io.decode_bmp', 'image.decode_bmp'])( 3016 dispatch.add_dispatch_support(gen_image_ops.decode_bmp)) 3017tf_export( 3018 'io.decode_gif', 3019 'image.decode_gif', 3020 v1=['io.decode_gif', 'image.decode_gif'])( 3021 dispatch.add_dispatch_support(gen_image_ops.decode_gif)) 3022tf_export( 3023 'io.decode_jpeg', 3024 'image.decode_jpeg', 3025 v1=['io.decode_jpeg', 'image.decode_jpeg'])( 3026 dispatch.add_dispatch_support(gen_image_ops.decode_jpeg)) 3027tf_export( 3028 'io.decode_png', 3029 'image.decode_png', 3030 v1=['io.decode_png', 'image.decode_png'])( 3031 dispatch.add_dispatch_support(gen_image_ops.decode_png)) 3032 3033tf_export( 3034 'io.encode_jpeg', 3035 'image.encode_jpeg', 3036 v1=['io.encode_jpeg', 'image.encode_jpeg'])( 3037 dispatch.add_dispatch_support(gen_image_ops.encode_jpeg)) 3038tf_export( 3039 'io.extract_jpeg_shape', 3040 'image.extract_jpeg_shape', 3041 v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])( 3042 dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape)) 3043 3044 3045@tf_export('io.encode_png', 'image.encode_png') 3046@dispatch.add_dispatch_support 3047def encode_png(image, compression=-1, name=None): 3048 r"""PNG-encode an image. 3049 3050 `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` 3051 where `channels` is: 3052 3053 * 1: for grayscale. 3054 * 2: for grayscale + alpha. 3055 * 3: for RGB. 3056 * 4: for RGBA. 3057 3058 The ZLIB compression level, `compression`, can be -1 for the PNG-encoder 3059 default or a value from 0 to 9. 9 is the highest compression level, 3060 generating the smallest output, but is slower. 3061 3062 Args: 3063 image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`. 3064 3-D with shape `[height, width, channels]`. 3065 compression: An optional `int`. Defaults to `-1`. Compression level. 3066 name: A name for the operation (optional). 3067 3068 Returns: 3069 A `Tensor` of type `string`. 3070 """ 3071 return gen_image_ops.encode_png( 3072 ops.convert_to_tensor(image), compression, name) 3073 3074 3075@tf_export( 3076 'io.decode_image', 3077 'image.decode_image', 3078 v1=['io.decode_image', 'image.decode_image']) 3079@dispatch.add_dispatch_support 3080def decode_image(contents, 3081 channels=None, 3082 dtype=dtypes.uint8, 3083 name=None, 3084 expand_animations=True): 3085 """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. 3086 3087 Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the 3088 appropriate operation to convert the input bytes `string` into a `Tensor` 3089 of type `dtype`. 3090 3091 Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as 3092 opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D 3093 arrays `[height, width, num_channels]`. Make sure to take this into account 3094 when constructing your graph if you are intermixing GIF files with BMP, JPEG, 3095 and/or PNG files. Alternately, set the `expand_animations` argument of this 3096 function to `False`, in which case the op will return 3-dimensional tensors 3097 and will truncate animated GIF files to the first frame. 3098 3099 NOTE: If the first frame of an animated GIF does not occupy the entire 3100 canvas (maximum frame width x maximum frame height), then it fills the 3101 unoccupied areas (in the first frame) with zeros (black). For frames after the 3102 first frame that does not occupy the entire canvas, it uses the previous 3103 frame to fill the unoccupied areas. 3104 3105 Args: 3106 contents: A `Tensor` of type `string`. 0-D. The encoded image bytes. 3107 channels: An optional `int`. Defaults to `0`. Number of color channels for 3108 the decoded image. 3109 dtype: The desired DType of the returned `Tensor`. 3110 name: A name for the operation (optional) 3111 expand_animations: An optional `bool`. Defaults to `True`. Controls the 3112 shape of the returned op's output. If `True`, the returned op will produce 3113 a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs, 3114 whether animated or not. If, `False`, the returned op will produce a 3-D 3115 tensor for all file types and will truncate animated GIFs to the first 3116 frame. 3117 3118 Returns: 3119 `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on 3120 the file type and the value of the `expand_animations` parameter. 3121 3122 Raises: 3123 ValueError: On incorrect number of channels. 3124 """ 3125 with ops.name_scope(name, 'decode_image'): 3126 if compat.forward_compatible(2020, 8, 14): 3127 channels = 0 if channels is None else channels 3128 if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: 3129 dest_dtype = dtype 3130 dtype = dtypes.uint16 3131 return convert_image_dtype(gen_image_ops.decode_image( 3132 contents=contents, 3133 channels=channels, 3134 expand_animations=expand_animations, 3135 dtype=dtype), dest_dtype) 3136 else: 3137 return gen_image_ops.decode_image( 3138 contents=contents, 3139 channels=channels, 3140 expand_animations=expand_animations, 3141 dtype=dtype) 3142 3143 if channels not in (None, 0, 1, 3, 4): 3144 raise ValueError('channels must be in (None, 0, 1, 3, 4)') 3145 substr = string_ops.substr(contents, 0, 3) 3146 3147 def _bmp(): 3148 """Decodes a BMP image.""" 3149 signature = string_ops.substr(contents, 0, 2) 3150 # Create assert op to check that bytes are BMP decodable 3151 is_bmp = math_ops.equal(signature, 'BM', name='is_bmp') 3152 decode_msg = 'Unable to decode bytes as JPEG, PNG, GIF, or BMP' 3153 assert_decode = control_flow_ops.Assert(is_bmp, [decode_msg]) 3154 bmp_channels = 0 if channels is None else channels 3155 good_channels = math_ops.not_equal(bmp_channels, 1, name='check_channels') 3156 channels_msg = ('Channels must be in (None, 0, 3, 4) when decoding BMP ' 3157 'images') 3158 assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) 3159 with ops.control_dependencies([assert_decode, assert_channels]): 3160 return convert_image_dtype( 3161 gen_image_ops.decode_bmp(contents, channels=bmp_channels), dtype) 3162 3163 def _gif(): 3164 """Decodes a GIF image.""" 3165 # Create assert to make sure that channels is not set to 1 3166 # Already checked above that channels is in (None, 0, 1, 3) 3167 gif_channels = 0 if channels is None else channels 3168 good_channels = math_ops.logical_and( 3169 math_ops.not_equal(gif_channels, 1, name='check_gif_channels'), 3170 math_ops.not_equal(gif_channels, 4, name='check_gif_channels')) 3171 channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' 3172 assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) 3173 with ops.control_dependencies([assert_channels]): 3174 result = convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) 3175 if not expand_animations: 3176 # For now we decode animated GIFs fully and toss out all but the 3177 # first frame when expand_animations is False 3178 result = array_ops.gather(result, 0) 3179 return result 3180 3181 def check_gif(): 3182 # Create assert op to check that bytes are GIF decodable 3183 is_gif = math_ops.equal(substr, b'\x47\x49\x46', name='is_gif') 3184 return control_flow_ops.cond(is_gif, _gif, _bmp, name='cond_gif') 3185 3186 def _png(): 3187 """Decodes a PNG image.""" 3188 return convert_image_dtype( 3189 gen_image_ops.decode_png( 3190 contents, 3191 channels, 3192 dtype=dtypes.uint8 if dtype == dtypes.uint8 else dtypes.uint16), 3193 dtype) 3194 3195 def check_png(): 3196 """Checks if an image is PNG.""" 3197 return control_flow_ops.cond( 3198 _is_png(contents), _png, check_gif, name='cond_png') 3199 3200 def _jpeg(): 3201 """Decodes a jpeg image.""" 3202 jpeg_channels = 0 if channels is None else channels 3203 good_channels = math_ops.not_equal( 3204 jpeg_channels, 4, name='check_jpeg_channels') 3205 channels_msg = ('Channels must be in (None, 0, 1, 3) when decoding JPEG ' 3206 'images') 3207 assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) 3208 with ops.control_dependencies([assert_channels]): 3209 return convert_image_dtype( 3210 gen_image_ops.decode_jpeg(contents, channels), dtype) 3211 3212 # Decode normal JPEG images (start with \xff\xd8\xff\xe0) 3213 # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). 3214 return control_flow_ops.cond( 3215 is_jpeg(contents), _jpeg, check_png, name='cond_jpeg') 3216 3217 3218@tf_export('image.total_variation') 3219@dispatch.add_dispatch_support 3220def total_variation(images, name=None): 3221 """Calculate and return the total variation for one or more images. 3222 3223 The total variation is the sum of the absolute differences for neighboring 3224 pixel-values in the input images. This measures how much noise is in the 3225 images. 3226 3227 This can be used as a loss-function during optimization so as to suppress 3228 noise in images. If you have a batch of images, then you should calculate 3229 the scalar loss-value as the sum: 3230 `loss = tf.reduce_sum(tf.image.total_variation(images))` 3231 3232 This implements the anisotropic 2-D version of the formula described here: 3233 3234 https://en.wikipedia.org/wiki/Total_variation_denoising 3235 3236 Args: 3237 images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor 3238 of shape `[height, width, channels]`. 3239 name: A name for the operation (optional). 3240 3241 Raises: 3242 ValueError: if images.shape is not a 3-D or 4-D vector. 3243 3244 Returns: 3245 The total variation of `images`. 3246 3247 If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the 3248 total variation for each image in the batch. 3249 If `images` was 3-D, return a scalar float with the total variation for 3250 that image. 3251 """ 3252 3253 with ops.name_scope(name, 'total_variation'): 3254 ndims = images.get_shape().ndims 3255 3256 if ndims == 3: 3257 # The input is a single image with shape [height, width, channels]. 3258 3259 # Calculate the difference of neighboring pixel-values. 3260 # The images are shifted one pixel along the height and width by slicing. 3261 pixel_dif1 = images[1:, :, :] - images[:-1, :, :] 3262 pixel_dif2 = images[:, 1:, :] - images[:, :-1, :] 3263 3264 # Sum for all axis. (None is an alias for all axis.) 3265 sum_axis = None 3266 elif ndims == 4: 3267 # The input is a batch of images with shape: 3268 # [batch, height, width, channels]. 3269 3270 # Calculate the difference of neighboring pixel-values. 3271 # The images are shifted one pixel along the height and width by slicing. 3272 pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] 3273 pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] 3274 3275 # Only sum for the last 3 axis. 3276 # This results in a 1-D tensor with the total variation for each image. 3277 sum_axis = [1, 2, 3] 3278 else: 3279 raise ValueError('\'images\' must be either 3 or 4-dimensional.') 3280 3281 # Calculate the total variation by taking the absolute value of the 3282 # pixel-differences and summing over the appropriate axis. 3283 tot_var = ( 3284 math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) + 3285 math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis)) 3286 3287 return tot_var 3288 3289 3290@tf_export('image.sample_distorted_bounding_box', v1=[]) 3291@dispatch.add_dispatch_support 3292def sample_distorted_bounding_box_v2(image_size, 3293 bounding_boxes, 3294 seed=0, 3295 min_object_covered=0.1, 3296 aspect_ratio_range=None, 3297 area_range=None, 3298 max_attempts=None, 3299 use_image_if_no_bounding_boxes=None, 3300 name=None): 3301 """Generate a single randomly distorted bounding box for an image. 3302 3303 Bounding box annotations are often supplied in addition to ground-truth labels 3304 in image recognition or object localization tasks. A common technique for 3305 training such a system is to randomly distort an image while preserving 3306 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3307 localization of an object, i.e. bounding box, given an `image_size`, 3308 `bounding_boxes` and a series of constraints. 3309 3310 The output of this Op is a single bounding box that may be used to crop the 3311 original image. The output is returned as 3 tensors: `begin`, `size` and 3312 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3313 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3314 visualize what the bounding box looks like. 3315 3316 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3317 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3318 and the height of the underlying image. 3319 3320 For example, 3321 3322 ```python 3323 # Generate a single distorted bounding box. 3324 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3325 tf.shape(image), 3326 bounding_boxes=bounding_boxes, 3327 min_object_covered=0.1) 3328 3329 # Draw the bounding box in an image summary. 3330 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3331 bbox_for_draw) 3332 tf.compat.v1.summary.image('images_with_box', image_with_box) 3333 3334 # Employ the bounding box to distort the image. 3335 distorted_image = tf.slice(image, begin, size) 3336 ``` 3337 3338 Note that if no bounding box information is available, setting 3339 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3340 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3341 false and no bounding boxes are supplied, an error is raised. 3342 3343 For producing deterministic results given a `seed` value, use 3344 `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed` 3345 param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops 3346 guarantee the same results given the same seed independent of how many times 3347 the function is called, and independent of global seed settings 3348 (e.g. tf.random.set_seed). 3349 3350 Args: 3351 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3352 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3353 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3354 describing the N bounding boxes associated with the image. 3355 seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the 3356 random number generator is seeded by the given `seed`. Otherwise, it is 3357 seeded by a random seed. 3358 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3359 cropped area of the image must contain at least this fraction of any 3360 bounding box supplied. The value of this parameter should be non-negative. 3361 In the case of 0, the cropped area does not need to overlap any of the 3362 bounding boxes supplied. 3363 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3364 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3365 height` within this range. 3366 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3367 cropped area of the image must contain a fraction of the supplied image 3368 within this range. 3369 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3370 generating a cropped region of the image of the specified constraints. 3371 After `max_attempts` failures, return the entire image. 3372 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3373 Controls behavior if no bounding boxes supplied. If true, assume an 3374 implicit bounding box covering the whole input. If false, raise an error. 3375 name: A name for the operation (optional). 3376 3377 Returns: 3378 A tuple of `Tensor` objects (begin, size, bboxes). 3379 3380 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3381 `[offset_height, offset_width, 0]`. Provide as input to 3382 `tf.slice`. 3383 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3384 `[target_height, target_width, -1]`. Provide as input to 3385 `tf.slice`. 3386 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3387 the distorted bounding box. 3388 Provide as input to `tf.image.draw_bounding_boxes`. 3389 """ 3390 seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0) 3391 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3392 return gen_image_ops.sample_distorted_bounding_box_v2( 3393 image_size, 3394 bounding_boxes, 3395 seed=seed1, 3396 seed2=seed2, 3397 min_object_covered=min_object_covered, 3398 aspect_ratio_range=aspect_ratio_range, 3399 area_range=area_range, 3400 max_attempts=max_attempts, 3401 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3402 name=name) 3403 3404 3405@tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) 3406@dispatch.add_dispatch_support 3407def stateless_sample_distorted_bounding_box(image_size, 3408 bounding_boxes, 3409 seed, 3410 min_object_covered=0.1, 3411 aspect_ratio_range=None, 3412 area_range=None, 3413 max_attempts=None, 3414 use_image_if_no_bounding_boxes=None, 3415 name=None): 3416 """Generate a randomly distorted bounding box for an image deterministically. 3417 3418 Bounding box annotations are often supplied in addition to ground-truth labels 3419 in image recognition or object localization tasks. A common technique for 3420 training such a system is to randomly distort an image while preserving 3421 its content, i.e. *data augmentation*. This Op, given the same `seed`, 3422 deterministically outputs a randomly distorted localization of an object, i.e. 3423 bounding box, given an `image_size`, `bounding_boxes` and a series of 3424 constraints. 3425 3426 The output of this Op is a single bounding box that may be used to crop the 3427 original image. The output is returned as 3 tensors: `begin`, `size` and 3428 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3429 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3430 visualize what the bounding box looks like. 3431 3432 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3433 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 3434 and the height of the underlying image. 3435 3436 The output of this Op is guaranteed to be the same given the same `seed` and 3437 is independent of how many times the function is called, and independent of 3438 global seed settings (e.g. `tf.random.set_seed`). 3439 3440 Example usage: 3441 3442 >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) 3443 >>> bbox = tf.constant( 3444 ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) 3445 >>> seed = (1, 2) 3446 >>> # Generate a single distorted bounding box. 3447 >>> bbox_begin, bbox_size, bbox_draw = ( 3448 ... tf.image.stateless_sample_distorted_bounding_box( 3449 ... tf.shape(image), bounding_boxes=bbox, seed=seed)) 3450 >>> # Employ the bounding box to distort the image. 3451 >>> tf.slice(image, bbox_begin, bbox_size) 3452 <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy= 3453 array([[[1], 3454 [2]], 3455 [[4], 3456 [5]]])> 3457 >>> # Draw the bounding box in an image summary. 3458 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 3459 >>> tf.image.draw_bounding_boxes( 3460 ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) 3461 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= 3462 array([[[[1.], 3463 [1.], 3464 [3.]], 3465 [[1.], 3466 [1.], 3467 [6.]], 3468 [[7.], 3469 [8.], 3470 [9.]]]], dtype=float32)> 3471 3472 Note that if no bounding box information is available, setting 3473 `use_image_if_no_bounding_boxes = true` will assume there is a single implicit 3474 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3475 false and no bounding boxes are supplied, an error is raised. 3476 3477 Args: 3478 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3479 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3480 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3481 describing the N bounding boxes associated with the image. 3482 seed: A shape [2] Tensor, the seed to the random number generator. Must have 3483 dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) 3484 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3485 cropped area of the image must contain at least this fraction of any 3486 bounding box supplied. The value of this parameter should be non-negative. 3487 In the case of 0, the cropped area does not need to overlap any of the 3488 bounding boxes supplied. 3489 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3490 1.33]`. The cropped area of the image must have an aspect `ratio = width / 3491 height` within this range. 3492 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3493 cropped area of the image must contain a fraction of the supplied image 3494 within this range. 3495 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3496 generating a cropped region of the image of the specified constraints. 3497 After `max_attempts` failures, return the entire image. 3498 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3499 Controls behavior if no bounding boxes supplied. If true, assume an 3500 implicit bounding box covering the whole input. If false, raise an error. 3501 name: A name for the operation (optional). 3502 3503 Returns: 3504 A tuple of `Tensor` objects (begin, size, bboxes). 3505 3506 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3507 `[offset_height, offset_width, 0]`. Provide as input to 3508 `tf.slice`. 3509 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3510 `[target_height, target_width, -1]`. Provide as input to 3511 `tf.slice`. 3512 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3513 the distorted bounding box. 3514 Provide as input to `tf.image.draw_bounding_boxes`. 3515 """ 3516 with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): 3517 return gen_image_ops.stateless_sample_distorted_bounding_box( 3518 image_size=image_size, 3519 bounding_boxes=bounding_boxes, 3520 seed=seed, 3521 min_object_covered=min_object_covered, 3522 aspect_ratio_range=aspect_ratio_range, 3523 area_range=area_range, 3524 max_attempts=max_attempts, 3525 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3526 name=name) 3527 3528 3529@tf_export(v1=['image.sample_distorted_bounding_box']) 3530@dispatch.add_dispatch_support 3531@deprecation.deprecated( 3532 date=None, 3533 instructions='`seed2` arg is deprecated.' 3534 'Use sample_distorted_bounding_box_v2 instead.') 3535def sample_distorted_bounding_box(image_size, 3536 bounding_boxes, 3537 seed=None, 3538 seed2=None, 3539 min_object_covered=0.1, 3540 aspect_ratio_range=None, 3541 area_range=None, 3542 max_attempts=None, 3543 use_image_if_no_bounding_boxes=None, 3544 name=None): 3545 """Generate a single randomly distorted bounding box for an image. 3546 3547 Bounding box annotations are often supplied in addition to ground-truth labels 3548 in image recognition or object localization tasks. A common technique for 3549 training such a system is to randomly distort an image while preserving 3550 its content, i.e. *data augmentation*. This Op outputs a randomly distorted 3551 localization of an object, i.e. bounding box, given an `image_size`, 3552 `bounding_boxes` and a series of constraints. 3553 3554 The output of this Op is a single bounding box that may be used to crop the 3555 original image. The output is returned as 3 tensors: `begin`, `size` and 3556 `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the 3557 image. The latter may be supplied to `tf.image.draw_bounding_boxes` to 3558 visualize what the bounding box looks like. 3559 3560 Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. 3561 The 3562 bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and 3563 height of the underlying image. 3564 3565 For example, 3566 3567 ```python 3568 # Generate a single distorted bounding box. 3569 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( 3570 tf.shape(image), 3571 bounding_boxes=bounding_boxes, 3572 min_object_covered=0.1) 3573 3574 # Draw the bounding box in an image summary. 3575 image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), 3576 bbox_for_draw) 3577 tf.compat.v1.summary.image('images_with_box', image_with_box) 3578 3579 # Employ the bounding box to distort the image. 3580 distorted_image = tf.slice(image, begin, size) 3581 ``` 3582 3583 Note that if no bounding box information is available, setting 3584 `use_image_if_no_bounding_boxes = True` will assume there is a single implicit 3585 bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is 3586 false and no bounding boxes are supplied, an error is raised. 3587 3588 Args: 3589 image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, 3590 `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. 3591 bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` 3592 describing the N bounding boxes associated with the image. 3593 seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are 3594 set to non-zero, the random number generator is seeded by the given 3595 `seed`. Otherwise, it is seeded by a random seed. 3596 seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed 3597 collision. 3598 min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The 3599 cropped area of the image must contain at least this fraction of any 3600 bounding box supplied. The value of this parameter should be non-negative. 3601 In the case of 0, the cropped area does not need to overlap any of the 3602 bounding boxes supplied. 3603 aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, 3604 1.33]`. The cropped area of the image must have an aspect ratio = width / 3605 height within this range. 3606 area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The 3607 cropped area of the image must contain a fraction of the supplied image 3608 within this range. 3609 max_attempts: An optional `int`. Defaults to `100`. Number of attempts at 3610 generating a cropped region of the image of the specified constraints. 3611 After `max_attempts` failures, return the entire image. 3612 use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. 3613 Controls behavior if no bounding boxes supplied. If true, assume an 3614 implicit bounding box covering the whole input. If false, raise an error. 3615 name: A name for the operation (optional). 3616 3617 Returns: 3618 A tuple of `Tensor` objects (begin, size, bboxes). 3619 3620 begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3621 `[offset_height, offset_width, 0]`. Provide as input to 3622 `tf.slice`. 3623 size: A `Tensor`. Has the same type as `image_size`. 1-D, containing 3624 `[target_height, target_width, -1]`. Provide as input to 3625 `tf.slice`. 3626 bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing 3627 the distorted bounding box. 3628 Provide as input to `tf.image.draw_bounding_boxes`. 3629 """ 3630 with ops.name_scope(name, 'sample_distorted_bounding_box'): 3631 return gen_image_ops.sample_distorted_bounding_box_v2( 3632 image_size, 3633 bounding_boxes, 3634 seed=seed, 3635 seed2=seed2, 3636 min_object_covered=min_object_covered, 3637 aspect_ratio_range=aspect_ratio_range, 3638 area_range=area_range, 3639 max_attempts=max_attempts, 3640 use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, 3641 name=name) 3642 3643 3644@tf_export('image.non_max_suppression') 3645@dispatch.add_dispatch_support 3646def non_max_suppression(boxes, 3647 scores, 3648 max_output_size, 3649 iou_threshold=0.5, 3650 score_threshold=float('-inf'), 3651 name=None): 3652 """Greedily selects a subset of bounding boxes in descending order of score. 3653 3654 Prunes away boxes that have high intersection-over-union (IOU) overlap 3655 with previously selected boxes. Bounding boxes are supplied as 3656 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3657 diagonal pair of box corners and the coordinates can be provided as normalized 3658 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3659 is agnostic to where the origin is in the coordinate system. Note that this 3660 algorithm is invariant to orthogonal transformations and translations 3661 of the coordinate system; thus translating or reflections of the coordinate 3662 system result in the same boxes being selected by the algorithm. 3663 The output of this operation is a set of integers indexing into the input 3664 collection of bounding boxes representing the selected boxes. The bounding 3665 box coordinates corresponding to the selected indices can then be obtained 3666 using the `tf.gather` operation. For example: 3667 ```python 3668 selected_indices = tf.image.non_max_suppression( 3669 boxes, scores, max_output_size, iou_threshold) 3670 selected_boxes = tf.gather(boxes, selected_indices) 3671 ``` 3672 3673 Args: 3674 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3675 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3676 score corresponding to each box (each row of boxes). 3677 max_output_size: A scalar integer `Tensor` representing the maximum number 3678 of boxes to be selected by non-max suppression. 3679 iou_threshold: A 0-D float tensor representing the threshold for deciding 3680 whether boxes overlap too much with respect to IOU. 3681 score_threshold: A 0-D float tensor representing the threshold for deciding 3682 when to remove boxes based on score. 3683 name: A name for the operation (optional). 3684 3685 Returns: 3686 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3687 selected indices from the boxes tensor, where `M <= max_output_size`. 3688 """ 3689 with ops.name_scope(name, 'non_max_suppression'): 3690 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3691 score_threshold = ops.convert_to_tensor( 3692 score_threshold, name='score_threshold') 3693 return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, 3694 iou_threshold, score_threshold) 3695 3696 3697@tf_export('image.non_max_suppression_with_scores') 3698@dispatch.add_dispatch_support 3699def non_max_suppression_with_scores(boxes, 3700 scores, 3701 max_output_size, 3702 iou_threshold=0.5, 3703 score_threshold=float('-inf'), 3704 soft_nms_sigma=0.0, 3705 name=None): 3706 """Greedily selects a subset of bounding boxes in descending order of score. 3707 3708 Prunes away boxes that have high intersection-over-union (IOU) overlap 3709 with previously selected boxes. Bounding boxes are supplied as 3710 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 3711 diagonal pair of box corners and the coordinates can be provided as normalized 3712 (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm 3713 is agnostic to where the origin is in the coordinate system. Note that this 3714 algorithm is invariant to orthogonal transformations and translations 3715 of the coordinate system; thus translating or reflections of the coordinate 3716 system result in the same boxes being selected by the algorithm. 3717 The output of this operation is a set of integers indexing into the input 3718 collection of bounding boxes representing the selected boxes. The bounding 3719 box coordinates corresponding to the selected indices can then be obtained 3720 using the `tf.gather` operation. For example: 3721 ```python 3722 selected_indices, selected_scores = tf.image.non_max_suppression_padded( 3723 boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1, 3724 soft_nms_sigma=0.5) 3725 selected_boxes = tf.gather(boxes, selected_indices) 3726 ``` 3727 3728 This function generalizes the `tf.image.non_max_suppression` op by also 3729 supporting a Soft-NMS (with Gaussian weighting) mode (c.f. 3730 Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score 3731 of other overlapping boxes instead of directly causing them to be pruned. 3732 Consequently, in contrast to `tf.image.non_max_suppression`, 3733 `tf.image.non_max_suppression_padded` returns the new scores of each input box 3734 in the second output, `selected_scores`. 3735 3736 To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be 3737 larger than 0. When `soft_nms_sigma` equals 0, the behavior of 3738 `tf.image.non_max_suppression_padded` is identical to that of 3739 `tf.image.non_max_suppression` (except for the extra output) both in function 3740 and in running time. 3741 3742 Args: 3743 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 3744 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3745 score corresponding to each box (each row of boxes). 3746 max_output_size: A scalar integer `Tensor` representing the maximum number 3747 of boxes to be selected by non-max suppression. 3748 iou_threshold: A 0-D float tensor representing the threshold for deciding 3749 whether boxes overlap too much with respect to IOU. 3750 score_threshold: A 0-D float tensor representing the threshold for deciding 3751 when to remove boxes based on score. 3752 soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft 3753 NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503). When 3754 `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) 3755 NMS. 3756 name: A name for the operation (optional). 3757 3758 Returns: 3759 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3760 selected indices from the boxes tensor, where `M <= max_output_size`. 3761 selected_scores: A 1-D float tensor of shape `[M]` representing the 3762 corresponding scores for each selected box, where `M <= max_output_size`. 3763 Scores only differ from corresponding input scores when using Soft NMS 3764 (i.e. when `soft_nms_sigma>0`) 3765 """ 3766 with ops.name_scope(name, 'non_max_suppression_with_scores'): 3767 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 3768 score_threshold = ops.convert_to_tensor( 3769 score_threshold, name='score_threshold') 3770 soft_nms_sigma = ops.convert_to_tensor( 3771 soft_nms_sigma, name='soft_nms_sigma') 3772 (selected_indices, selected_scores, 3773 _) = gen_image_ops.non_max_suppression_v5( 3774 boxes, 3775 scores, 3776 max_output_size, 3777 iou_threshold, 3778 score_threshold, 3779 soft_nms_sigma, 3780 pad_to_max_output_size=False) 3781 return selected_indices, selected_scores 3782 3783 3784@tf_export('image.non_max_suppression_overlaps') 3785@dispatch.add_dispatch_support 3786def non_max_suppression_with_overlaps(overlaps, 3787 scores, 3788 max_output_size, 3789 overlap_threshold=0.5, 3790 score_threshold=float('-inf'), 3791 name=None): 3792 """Greedily selects a subset of bounding boxes in descending order of score. 3793 3794 Prunes away boxes that have high overlap with previously selected boxes. 3795 N-by-n overlap values are supplied as square matrix. 3796 The output of this operation is a set of integers indexing into the input 3797 collection of bounding boxes representing the selected boxes. The bounding 3798 box coordinates corresponding to the selected indices can then be obtained 3799 using the `tf.gather` operation. For example: 3800 ```python 3801 selected_indices = tf.image.non_max_suppression_overlaps( 3802 overlaps, scores, max_output_size, iou_threshold) 3803 selected_boxes = tf.gather(boxes, selected_indices) 3804 ``` 3805 3806 Args: 3807 overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]` 3808 representing the n-by-n box overlap values. 3809 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 3810 score corresponding to each box (each row of boxes). 3811 max_output_size: A scalar integer `Tensor` representing the maximum number 3812 of boxes to be selected by non-max suppression. 3813 overlap_threshold: A 0-D float tensor representing the threshold for 3814 deciding whether boxes overlap too much with respect to the provided 3815 overlap values. 3816 score_threshold: A 0-D float tensor representing the threshold for deciding 3817 when to remove boxes based on score. 3818 name: A name for the operation (optional). 3819 3820 Returns: 3821 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 3822 selected indices from the overlaps tensor, where `M <= max_output_size`. 3823 """ 3824 with ops.name_scope(name, 'non_max_suppression_overlaps'): 3825 overlap_threshold = ops.convert_to_tensor( 3826 overlap_threshold, name='overlap_threshold') 3827 # pylint: disable=protected-access 3828 return gen_image_ops.non_max_suppression_with_overlaps( 3829 overlaps, scores, max_output_size, overlap_threshold, score_threshold) 3830 # pylint: enable=protected-access 3831 3832 3833_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], 3834 [0.587, -0.27455667, -0.52273617], 3835 [0.114, -0.32134392, 0.31119955]] 3836 3837 3838@tf_export('image.rgb_to_yiq') 3839@dispatch.add_dispatch_support 3840def rgb_to_yiq(images): 3841 """Converts one or more images from RGB to YIQ. 3842 3843 Outputs a tensor of the same shape as the `images` tensor, containing the YIQ 3844 value of the pixels. 3845 The output is only well defined if the value in images are in [0,1]. 3846 3847 Usage Example: 3848 3849 >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) 3850 >>> tf.image.rgb_to_yiq(x) 3851 <tf.Tensor: shape=(1, 1, 3), dtype=float32, 3852 numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)> 3853 3854 Args: 3855 images: 2-D or higher rank. Image data to convert. Last dimension must be 3856 size 3. 3857 3858 Returns: 3859 images: tensor with the same shape as `images`. 3860 """ 3861 images = ops.convert_to_tensor(images, name='images') 3862 kernel = ops.convert_to_tensor( 3863 _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel') 3864 ndims = images.get_shape().ndims 3865 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3866 3867 3868_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021], 3869 [0.6208248, -0.64720424, 1.70423049]] 3870 3871 3872@tf_export('image.yiq_to_rgb') 3873@dispatch.add_dispatch_support 3874def yiq_to_rgb(images): 3875 """Converts one or more images from YIQ to RGB. 3876 3877 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 3878 value of the pixels. 3879 The output is only well defined if the Y value in images are in [0,1], 3880 I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226]. 3881 3882 Args: 3883 images: 2-D or higher rank. Image data to convert. Last dimension must be 3884 size 3. 3885 3886 Returns: 3887 images: tensor with the same shape as `images`. 3888 """ 3889 images = ops.convert_to_tensor(images, name='images') 3890 kernel = ops.convert_to_tensor( 3891 _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel') 3892 ndims = images.get_shape().ndims 3893 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3894 3895 3896_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538], 3897 [0.587, -0.28886916, -0.51496512], 3898 [0.114, 0.43601035, -0.10001026]] 3899 3900 3901@tf_export('image.rgb_to_yuv') 3902@dispatch.add_dispatch_support 3903def rgb_to_yuv(images): 3904 """Converts one or more images from RGB to YUV. 3905 3906 Outputs a tensor of the same shape as the `images` tensor, containing the YUV 3907 value of the pixels. 3908 The output is only well defined if the value in images are in [0, 1]. 3909 There are two ways of representing an image: [0, 255] pixel values range or 3910 [0, 1] (as float) pixel values range. Users need to convert the input image 3911 into a float [0, 1] range. 3912 3913 Args: 3914 images: 2-D or higher rank. Image data to convert. Last dimension must be 3915 size 3. 3916 3917 Returns: 3918 images: tensor with the same shape as `images`. 3919 """ 3920 images = ops.convert_to_tensor(images, name='images') 3921 kernel = ops.convert_to_tensor( 3922 _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel') 3923 ndims = images.get_shape().ndims 3924 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3925 3926 3927_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185], 3928 [1.13988303, -0.58062185, 0]] 3929 3930 3931@tf_export('image.yuv_to_rgb') 3932@dispatch.add_dispatch_support 3933def yuv_to_rgb(images): 3934 """Converts one or more images from YUV to RGB. 3935 3936 Outputs a tensor of the same shape as the `images` tensor, containing the RGB 3937 value of the pixels. 3938 The output is only well defined if the Y value in images are in [0,1], 3939 U and V value are in [-0.5,0.5]. 3940 3941 As per the above description, you need to scale your YUV images if their 3942 pixel values are not in the required range. Below given example illustrates 3943 preprocessing of each channel of images before feeding them to `yuv_to_rgb`. 3944 3945 ```python 3946 yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255) 3947 last_dimension_axis = len(yuv_images.shape) - 1 3948 yuv_tensor_images = tf.truediv( 3949 tf.subtract( 3950 yuv_images, 3951 tf.reduce_min(yuv_images) 3952 ), 3953 tf.subtract( 3954 tf.reduce_max(yuv_images), 3955 tf.reduce_min(yuv_images) 3956 ) 3957 ) 3958 y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis) 3959 target_uv_min, target_uv_max = -0.5, 0.5 3960 u = u * (target_uv_max - target_uv_min) + target_uv_min 3961 v = v * (target_uv_max - target_uv_min) + target_uv_min 3962 preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis) 3963 rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images) 3964 ``` 3965 3966 Args: 3967 images: 2-D or higher rank. Image data to convert. Last dimension must be 3968 size 3. 3969 3970 Returns: 3971 images: tensor with the same shape as `images`. 3972 """ 3973 images = ops.convert_to_tensor(images, name='images') 3974 kernel = ops.convert_to_tensor( 3975 _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') 3976 ndims = images.get_shape().ndims 3977 return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) 3978 3979 3980def _verify_compatible_image_shapes(img1, img2): 3981 """Checks if two image tensors are compatible for applying SSIM or PSNR. 3982 3983 This function checks if two sets of images have ranks at least 3, and if the 3984 last three dimensions match. 3985 3986 Args: 3987 img1: Tensor containing the first image batch. 3988 img2: Tensor containing the second image batch. 3989 3990 Returns: 3991 A tuple containing: the first tensor shape, the second tensor shape, and a 3992 list of control_flow_ops.Assert() ops implementing the checks. 3993 3994 Raises: 3995 ValueError: When static shape check fails. 3996 """ 3997 shape1 = img1.get_shape().with_rank_at_least(3) 3998 shape2 = img2.get_shape().with_rank_at_least(3) 3999 shape1[-3:].assert_is_compatible_with(shape2[-3:]) 4000 4001 if shape1.ndims is not None and shape2.ndims is not None: 4002 for dim1, dim2 in zip( 4003 reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])): 4004 if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): 4005 raise ValueError('Two images are not compatible: %s and %s' % 4006 (shape1, shape2)) 4007 4008 # Now assign shape tensors. 4009 shape1, shape2 = array_ops.shape_n([img1, img2]) 4010 4011 # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. 4012 checks = [] 4013 checks.append( 4014 control_flow_ops.Assert( 4015 math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], 4016 summarize=10)) 4017 checks.append( 4018 control_flow_ops.Assert( 4019 math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), 4020 [shape1, shape2], 4021 summarize=10)) 4022 return shape1, shape2, checks 4023 4024 4025@tf_export('image.psnr') 4026@dispatch.add_dispatch_support 4027def psnr(a, b, max_val, name=None): 4028 """Returns the Peak Signal-to-Noise Ratio between a and b. 4029 4030 This is intended to be used on signals (or images). Produces a PSNR value for 4031 each image in batch. 4032 4033 The last three dimensions of input are expected to be [height, width, depth]. 4034 4035 Example: 4036 4037 ```python 4038 # Read images from file. 4039 im1 = tf.decode_png('path/to/im1.png') 4040 im2 = tf.decode_png('path/to/im2.png') 4041 # Compute PSNR over tf.uint8 Tensors. 4042 psnr1 = tf.image.psnr(im1, im2, max_val=255) 4043 4044 # Compute PSNR over tf.float32 Tensors. 4045 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4046 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4047 psnr2 = tf.image.psnr(im1, im2, max_val=1.0) 4048 # psnr1 and psnr2 both have type tf.float32 and are almost equal. 4049 ``` 4050 4051 Args: 4052 a: First set of images. 4053 b: Second set of images. 4054 max_val: The dynamic range of the images (i.e., the difference between the 4055 maximum the and minimum allowed values). 4056 name: Namespace to embed the computation in. 4057 4058 Returns: 4059 The scalar PSNR between a and b. The returned tensor has type `tf.float32` 4060 and shape [batch_size, 1]. 4061 """ 4062 with ops.name_scope(name, 'PSNR', [a, b]): 4063 # Need to convert the images to float32. Scale max_val accordingly so that 4064 # PSNR is computed correctly. 4065 max_val = math_ops.cast(max_val, a.dtype) 4066 max_val = convert_image_dtype(max_val, dtypes.float32) 4067 a = convert_image_dtype(a, dtypes.float32) 4068 b = convert_image_dtype(b, dtypes.float32) 4069 mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) 4070 psnr_val = math_ops.subtract( 4071 20 * math_ops.log(max_val) / math_ops.log(10.0), 4072 np.float32(10 / np.log(10)) * math_ops.log(mse), 4073 name='psnr') 4074 4075 _, _, checks = _verify_compatible_image_shapes(a, b) 4076 with ops.control_dependencies(checks): 4077 return array_ops.identity(psnr_val) 4078 4079 4080def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03): 4081 r"""Helper function for computing SSIM. 4082 4083 SSIM estimates covariances with weighted sums. The default parameters 4084 use a biased estimate of the covariance: 4085 Suppose `reducer` is a weighted sum, then the mean estimators are 4086 \mu_x = \sum_i w_i x_i, 4087 \mu_y = \sum_i w_i y_i, 4088 where w_i's are the weighted-sum weights, and covariance estimator is 4089 cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4090 with assumption \sum_i w_i = 1. This covariance estimator is biased, since 4091 E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). 4092 For SSIM measure with unbiased covariance estimators, pass as `compensation` 4093 argument (1 - \sum_i w_i ^ 2). 4094 4095 Args: 4096 x: First set of images. 4097 y: Second set of images. 4098 reducer: Function that computes 'local' averages from the set of images. For 4099 non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and 4100 for convolutional version, this is usually tf.nn.avg_pool2d or 4101 tf.nn.conv2d with weighted-sum kernel. 4102 max_val: The dynamic range (i.e., the difference between the maximum 4103 possible allowed value and the minimum allowed value). 4104 compensation: Compensation factor. See above. 4105 k1: Default value 0.01 4106 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4107 it would be better if we took the values in the range of 0 < K2 < 0.4). 4108 4109 Returns: 4110 A pair containing the luminance measure, and the contrast-structure measure. 4111 """ 4112 4113 c1 = (k1 * max_val)**2 4114 c2 = (k2 * max_val)**2 4115 4116 # SSIM luminance measure is 4117 # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). 4118 mean0 = reducer(x) 4119 mean1 = reducer(y) 4120 num0 = mean0 * mean1 * 2.0 4121 den0 = math_ops.square(mean0) + math_ops.square(mean1) 4122 luminance = (num0 + c1) / (den0 + c1) 4123 4124 # SSIM contrast-structure measure is 4125 # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). 4126 # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then 4127 # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) 4128 # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). 4129 num1 = reducer(x * y) * 2.0 4130 den1 = reducer(math_ops.square(x) + math_ops.square(y)) 4131 c2 *= compensation 4132 cs = (num1 - num0 + c2) / (den1 - den0 + c2) 4133 4134 # SSIM score is the product of the luminance and contrast-structure measures. 4135 return luminance, cs 4136 4137 4138def _fspecial_gauss(size, sigma): 4139 """Function to mimic the 'fspecial' gaussian MATLAB function.""" 4140 size = ops.convert_to_tensor(size, dtypes.int32) 4141 sigma = ops.convert_to_tensor(sigma) 4142 4143 coords = math_ops.cast(math_ops.range(size), sigma.dtype) 4144 coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 4145 4146 g = math_ops.square(coords) 4147 g *= -0.5 / math_ops.square(sigma) 4148 4149 g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) 4150 g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). 4151 g = nn_ops.softmax(g) 4152 return array_ops.reshape(g, shape=[size, size, 1, 1]) 4153 4154 4155def _ssim_per_channel(img1, 4156 img2, 4157 max_val=1.0, 4158 filter_size=11, 4159 filter_sigma=1.5, 4160 k1=0.01, 4161 k2=0.03): 4162 """Computes SSIM index between img1 and img2 per color channel. 4163 4164 This function matches the standard SSIM implementation from: 4165 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4166 quality assessment: from error visibility to structural similarity. IEEE 4167 transactions on image processing. 4168 4169 Details: 4170 - 11x11 Gaussian filter of width 1.5 is used. 4171 - k1 = 0.01, k2 = 0.03 as in the original paper. 4172 4173 Args: 4174 img1: First image batch. 4175 img2: Second image batch. 4176 max_val: The dynamic range of the images (i.e., the difference between the 4177 maximum the and minimum allowed values). 4178 filter_size: Default value 11 (size of gaussian filter). 4179 filter_sigma: Default value 1.5 (width of gaussian filter). 4180 k1: Default value 0.01 4181 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4182 it would be better if we took the values in the range of 0 < K2 < 0.4). 4183 4184 Returns: 4185 A pair of tensors containing and channel-wise SSIM and contrast-structure 4186 values. The shape is [..., channels]. 4187 """ 4188 filter_size = constant_op.constant(filter_size, dtype=dtypes.int32) 4189 filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype) 4190 4191 shape1, shape2 = array_ops.shape_n([img1, img2]) 4192 checks = [ 4193 control_flow_ops.Assert( 4194 math_ops.reduce_all( 4195 math_ops.greater_equal(shape1[-3:-1], filter_size)), 4196 [shape1, filter_size], 4197 summarize=8), 4198 control_flow_ops.Assert( 4199 math_ops.reduce_all( 4200 math_ops.greater_equal(shape2[-3:-1], filter_size)), 4201 [shape2, filter_size], 4202 summarize=8) 4203 ] 4204 4205 # Enforce the check to run before computation. 4206 with ops.control_dependencies(checks): 4207 img1 = array_ops.identity(img1) 4208 4209 # TODO(sjhwang): Try to cache kernels and compensation factor. 4210 kernel = _fspecial_gauss(filter_size, filter_sigma) 4211 kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) 4212 4213 # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, 4214 # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. 4215 compensation = 1.0 4216 4217 # TODO(sjhwang): Try FFT. 4218 # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying 4219 # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter. 4220 def reducer(x): 4221 shape = array_ops.shape(x) 4222 x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) 4223 y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') 4224 return array_ops.reshape( 4225 y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) 4226 4227 luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1, 4228 k2) 4229 4230 # Average over the second and the third from the last: height, width. 4231 axes = constant_op.constant([-3, -2], dtype=dtypes.int32) 4232 ssim_val = math_ops.reduce_mean(luminance * cs, axes) 4233 cs = math_ops.reduce_mean(cs, axes) 4234 return ssim_val, cs 4235 4236 4237@tf_export('image.ssim') 4238@dispatch.add_dispatch_support 4239def ssim(img1, 4240 img2, 4241 max_val, 4242 filter_size=11, 4243 filter_sigma=1.5, 4244 k1=0.01, 4245 k2=0.03): 4246 """Computes SSIM index between img1 and img2. 4247 4248 This function is based on the standard SSIM implementation from: 4249 Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image 4250 quality assessment: from error visibility to structural similarity. IEEE 4251 transactions on image processing. 4252 4253 Note: The true SSIM is only defined on grayscale. This function does not 4254 perform any colorspace transform. (If the input is already YUV, then it will 4255 compute YUV SSIM average.) 4256 4257 Details: 4258 - 11x11 Gaussian filter of width 1.5 is used. 4259 - k1 = 0.01, k2 = 0.03 as in the original paper. 4260 4261 The image sizes must be at least 11x11 because of the filter size. 4262 4263 Example: 4264 4265 ```python 4266 # Read images (of size 255 x 255) from file. 4267 im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png')) 4268 im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png')) 4269 tf.shape(im1) # `img1.png` has 3 channels; shape is `(255, 255, 3)` 4270 tf.shape(im2) # `img2.png` has 3 channels; shape is `(255, 255, 3)` 4271 # Add an outer batch for each image. 4272 im1 = tf.expand_dims(im1, axis=0) 4273 im2 = tf.expand_dims(im2, axis=0) 4274 # Compute SSIM over tf.uint8 Tensors. 4275 ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11, 4276 filter_sigma=1.5, k1=0.01, k2=0.03) 4277 4278 # Compute SSIM over tf.float32 Tensors. 4279 im1 = tf.image.convert_image_dtype(im1, tf.float32) 4280 im2 = tf.image.convert_image_dtype(im2, tf.float32) 4281 ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11, 4282 filter_sigma=1.5, k1=0.01, k2=0.03) 4283 # ssim1 and ssim2 both have type tf.float32 and are almost equal. 4284 ``` 4285 4286 Args: 4287 img1: First image batch. 4-D Tensor of shape `[batch, height, width, 4288 channels]`. 4289 img2: Second image batch. 4-D Tensor of shape `[batch, height, width, 4290 channels]`. 4291 max_val: The dynamic range of the images (i.e., the difference between the 4292 maximum the and minimum allowed values). 4293 filter_size: Default value 11 (size of gaussian filter). 4294 filter_sigma: Default value 1.5 (width of gaussian filter). 4295 k1: Default value 0.01 4296 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4297 it would be better if we took the values in the range of 0 < K2 < 0.4). 4298 4299 Returns: 4300 A tensor containing an SSIM value for each image in batch. Returned SSIM 4301 values are in range (-1, 1], when pixel values are non-negative. Returns 4302 a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]). 4303 """ 4304 with ops.name_scope(None, 'SSIM', [img1, img2]): 4305 # Convert to tensor if needed. 4306 img1 = ops.convert_to_tensor(img1, name='img1') 4307 img2 = ops.convert_to_tensor(img2, name='img2') 4308 # Shape checking. 4309 _, _, checks = _verify_compatible_image_shapes(img1, img2) 4310 with ops.control_dependencies(checks): 4311 img1 = array_ops.identity(img1) 4312 4313 # Need to convert the images to float32. Scale max_val accordingly so that 4314 # SSIM is computed correctly. 4315 max_val = math_ops.cast(max_val, img1.dtype) 4316 max_val = convert_image_dtype(max_val, dtypes.float32) 4317 img1 = convert_image_dtype(img1, dtypes.float32) 4318 img2 = convert_image_dtype(img2, dtypes.float32) 4319 ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size, 4320 filter_sigma, k1, k2) 4321 # Compute average over color channels. 4322 return math_ops.reduce_mean(ssim_per_channel, [-1]) 4323 4324 4325# Default values obtained by Wang et al. 4326_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) 4327 4328 4329@tf_export('image.ssim_multiscale') 4330@dispatch.add_dispatch_support 4331def ssim_multiscale(img1, 4332 img2, 4333 max_val, 4334 power_factors=_MSSSIM_WEIGHTS, 4335 filter_size=11, 4336 filter_sigma=1.5, 4337 k1=0.01, 4338 k2=0.03): 4339 """Computes the MS-SSIM between img1 and img2. 4340 4341 This function assumes that `img1` and `img2` are image batches, i.e. the last 4342 three dimensions are [height, width, channels]. 4343 4344 Note: The true SSIM is only defined on grayscale. This function does not 4345 perform any colorspace transform. (If the input is already YUV, then it will 4346 compute YUV SSIM average.) 4347 4348 Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale 4349 structural similarity for image quality assessment." Signals, Systems and 4350 Computers, 2004. 4351 4352 Args: 4353 img1: First image batch. 4354 img2: Second image batch. Must have the same rank as img1. 4355 max_val: The dynamic range of the images (i.e., the difference between the 4356 maximum the and minimum allowed values). 4357 power_factors: Iterable of weights for each of the scales. The number of 4358 scales used is the length of the list. Index 0 is the unscaled 4359 resolution's weight and each increasing scale corresponds to the image 4360 being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, 4361 0.1333), which are the values obtained in the original paper. 4362 filter_size: Default value 11 (size of gaussian filter). 4363 filter_sigma: Default value 1.5 (width of gaussian filter). 4364 k1: Default value 0.01 4365 k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so 4366 it would be better if we took the values in the range of 0 < K2 < 0.4). 4367 4368 Returns: 4369 A tensor containing an MS-SSIM value for each image in batch. The values 4370 are in range [0, 1]. Returns a tensor with shape: 4371 broadcast(img1.shape[:-3], img2.shape[:-3]). 4372 """ 4373 with ops.name_scope(None, 'MS-SSIM', [img1, img2]): 4374 # Convert to tensor if needed. 4375 img1 = ops.convert_to_tensor(img1, name='img1') 4376 img2 = ops.convert_to_tensor(img2, name='img2') 4377 # Shape checking. 4378 shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) 4379 with ops.control_dependencies(checks): 4380 img1 = array_ops.identity(img1) 4381 4382 # Need to convert the images to float32. Scale max_val accordingly so that 4383 # SSIM is computed correctly. 4384 max_val = math_ops.cast(max_val, img1.dtype) 4385 max_val = convert_image_dtype(max_val, dtypes.float32) 4386 img1 = convert_image_dtype(img1, dtypes.float32) 4387 img2 = convert_image_dtype(img2, dtypes.float32) 4388 4389 imgs = [img1, img2] 4390 shapes = [shape1, shape2] 4391 4392 # img1 and img2 are assumed to be a (multi-dimensional) batch of 4393 # 3-dimensional images (height, width, channels). `heads` contain the batch 4394 # dimensions, and `tails` contain the image dimensions. 4395 heads = [s[:-3] for s in shapes] 4396 tails = [s[-3:] for s in shapes] 4397 4398 divisor = [1, 2, 2, 1] 4399 divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) 4400 4401 def do_pad(images, remainder): 4402 padding = array_ops.expand_dims(remainder, -1) 4403 padding = array_ops.pad(padding, [[1, 0], [1, 0]]) 4404 return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] 4405 4406 mcs = [] 4407 for k in range(len(power_factors)): 4408 with ops.name_scope(None, 'Scale%d' % k, imgs): 4409 if k > 0: 4410 # Avg pool takes rank 4 tensors. Flatten leading dimensions. 4411 flat_imgs = [ 4412 array_ops.reshape(x, array_ops.concat([[-1], t], 0)) 4413 for x, t in zip(imgs, tails) 4414 ] 4415 4416 remainder = tails[0] % divisor_tensor 4417 need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) 4418 # pylint: disable=cell-var-from-loop 4419 padded = control_flow_ops.cond(need_padding, 4420 lambda: do_pad(flat_imgs, remainder), 4421 lambda: flat_imgs) 4422 # pylint: enable=cell-var-from-loop 4423 4424 downscaled = [ 4425 nn_ops.avg_pool( 4426 x, ksize=divisor, strides=divisor, padding='VALID') 4427 for x in padded 4428 ] 4429 tails = [x[1:] for x in array_ops.shape_n(downscaled)] 4430 imgs = [ 4431 array_ops.reshape(x, array_ops.concat([h, t], 0)) 4432 for x, h, t in zip(downscaled, heads, tails) 4433 ] 4434 4435 # Overwrite previous ssim value since we only need the last one. 4436 ssim_per_channel, cs = _ssim_per_channel( 4437 *imgs, 4438 max_val=max_val, 4439 filter_size=filter_size, 4440 filter_sigma=filter_sigma, 4441 k1=k1, 4442 k2=k2) 4443 mcs.append(nn_ops.relu(cs)) 4444 4445 # Remove the cs score for the last scale. In the MS-SSIM calculation, 4446 # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). 4447 mcs.pop() # Remove the cs score for the last scale. 4448 mcs_and_ssim = array_ops.stack( 4449 mcs + [nn_ops.relu(ssim_per_channel)], axis=-1) 4450 # Take weighted geometric mean across the scale axis. 4451 ms_ssim = math_ops.reduce_prod( 4452 math_ops.pow(mcs_and_ssim, power_factors), [-1]) 4453 4454 return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. 4455 4456 4457@tf_export('image.image_gradients') 4458@dispatch.add_dispatch_support 4459def image_gradients(image): 4460 """Returns image gradients (dy, dx) for each color channel. 4461 4462 Both output tensors have the same shape as the input: [batch_size, h, w, 4463 d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in 4464 location (x, y). That means that dy will always have zeros in the last row, 4465 and dx will always have zeros in the last column. 4466 4467 Usage Example: 4468 ```python 4469 BATCH_SIZE = 1 4470 IMAGE_HEIGHT = 5 4471 IMAGE_WIDTH = 5 4472 CHANNELS = 1 4473 image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS, 4474 delta=1, dtype=tf.float32), 4475 shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) 4476 dy, dx = tf.image.image_gradients(image) 4477 print(image[0, :,:,0]) 4478 tf.Tensor( 4479 [[ 0. 1. 2. 3. 4.] 4480 [ 5. 6. 7. 8. 9.] 4481 [10. 11. 12. 13. 14.] 4482 [15. 16. 17. 18. 19.] 4483 [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32) 4484 print(dy[0, :,:,0]) 4485 tf.Tensor( 4486 [[5. 5. 5. 5. 5.] 4487 [5. 5. 5. 5. 5.] 4488 [5. 5. 5. 5. 5.] 4489 [5. 5. 5. 5. 5.] 4490 [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32) 4491 print(dx[0, :,:,0]) 4492 tf.Tensor( 4493 [[1. 1. 1. 1. 0.] 4494 [1. 1. 1. 1. 0.] 4495 [1. 1. 1. 1. 0.] 4496 [1. 1. 1. 1. 0.] 4497 [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32) 4498 ``` 4499 4500 Args: 4501 image: Tensor with shape [batch_size, h, w, d]. 4502 4503 Returns: 4504 Pair of tensors (dy, dx) holding the vertical and horizontal image 4505 gradients (1-step finite difference). 4506 4507 Raises: 4508 ValueError: If `image` is not a 4D tensor. 4509 """ 4510 if image.get_shape().ndims != 4: 4511 raise ValueError('image_gradients expects a 4D tensor ' 4512 '[batch_size, h, w, d], not {}.'.format(image.get_shape())) 4513 image_shape = array_ops.shape(image) 4514 batch_size, height, width, depth = array_ops.unstack(image_shape) 4515 dy = image[:, 1:, :, :] - image[:, :-1, :, :] 4516 dx = image[:, :, 1:, :] - image[:, :, :-1, :] 4517 4518 # Return tensors with same size as original image by concatenating 4519 # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). 4520 shape = array_ops.stack([batch_size, 1, width, depth]) 4521 dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) 4522 dy = array_ops.reshape(dy, image_shape) 4523 4524 shape = array_ops.stack([batch_size, height, 1, depth]) 4525 dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) 4526 dx = array_ops.reshape(dx, image_shape) 4527 4528 return dy, dx 4529 4530 4531@tf_export('image.sobel_edges') 4532@dispatch.add_dispatch_support 4533def sobel_edges(image): 4534 """Returns a tensor holding Sobel edge maps. 4535 4536 Example usage: 4537 4538 For general usage, `image` would be loaded from a file as below: 4539 4540 ```python 4541 image_bytes = tf.io.read_file(path_to_image_file) 4542 image = tf.image.decode_image(image_bytes) 4543 image = tf.cast(image, tf.float32) 4544 image = tf.expand_dims(image, 0) 4545 ``` 4546 But for demo purposes, we are using randomly generated values for `image`: 4547 4548 >>> image = tf.random.uniform( 4549 ... maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32) 4550 >>> sobel = tf.image.sobel_edges(image) 4551 >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction 4552 >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction 4553 4554 For displaying the sobel results, PIL's [Image Module]( 4555 https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used: 4556 4557 ```python 4558 # Display edge maps for the first channel (at index 0) 4559 Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show() 4560 Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show() 4561 ``` 4562 4563 Args: 4564 image: Image tensor with shape [batch_size, h, w, d] and type float32 or 4565 float64. The image(s) must be 2x2 or larger. 4566 4567 Returns: 4568 Tensor holding edge maps for each channel. Returns a tensor with shape 4569 [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], 4570 [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. 4571 """ 4572 # Define vertical and horizontal Sobel filters. 4573 static_image_shape = image.get_shape() 4574 image_shape = array_ops.shape(image) 4575 kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], 4576 [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] 4577 num_kernels = len(kernels) 4578 kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) 4579 kernels = np.expand_dims(kernels, -2) 4580 kernels_tf = constant_op.constant(kernels, dtype=image.dtype) 4581 4582 kernels_tf = array_ops.tile( 4583 kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters') 4584 4585 # Use depth-wise convolution to calculate edge maps per channel. 4586 pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] 4587 padded = array_ops.pad(image, pad_sizes, mode='REFLECT') 4588 4589 # Output tensor has shape [batch_size, h, w, d * num_kernels]. 4590 strides = [1, 1, 1, 1] 4591 output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') 4592 4593 # Reshape to [batch_size, h, w, d, num_kernels]. 4594 shape = array_ops.concat([image_shape, [num_kernels]], 0) 4595 output = array_ops.reshape(output, shape=shape) 4596 output.set_shape(static_image_shape.concatenate([num_kernels])) 4597 return output 4598 4599 4600def resize_bicubic(images, 4601 size, 4602 align_corners=False, 4603 name=None, 4604 half_pixel_centers=False): 4605 return gen_image_ops.resize_bicubic( 4606 images=images, 4607 size=size, 4608 align_corners=align_corners, 4609 half_pixel_centers=half_pixel_centers, 4610 name=name) 4611 4612 4613def resize_bilinear(images, 4614 size, 4615 align_corners=False, 4616 name=None, 4617 half_pixel_centers=False): 4618 return gen_image_ops.resize_bilinear( 4619 images=images, 4620 size=size, 4621 align_corners=align_corners, 4622 half_pixel_centers=half_pixel_centers, 4623 name=name) 4624 4625 4626def resize_nearest_neighbor(images, 4627 size, 4628 align_corners=False, 4629 name=None, 4630 half_pixel_centers=False): 4631 return gen_image_ops.resize_nearest_neighbor( 4632 images=images, 4633 size=size, 4634 align_corners=align_corners, 4635 half_pixel_centers=half_pixel_centers, 4636 name=name) 4637 4638 4639resize_area_deprecation = deprecation.deprecated( 4640 date=None, 4641 instructions=( 4642 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.')) 4643tf_export(v1=['image.resize_area'])( 4644 resize_area_deprecation( 4645 dispatch.add_dispatch_support(gen_image_ops.resize_area))) 4646 4647resize_bicubic_deprecation = deprecation.deprecated( 4648 date=None, 4649 instructions=( 4650 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.')) 4651tf_export(v1=['image.resize_bicubic'])( 4652 dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic))) 4653 4654resize_bilinear_deprecation = deprecation.deprecated( 4655 date=None, 4656 instructions=( 4657 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.')) 4658tf_export(v1=['image.resize_bilinear'])( 4659 dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear))) 4660 4661resize_nearest_neighbor_deprecation = deprecation.deprecated( 4662 date=None, 4663 instructions=( 4664 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` ' 4665 'instead.')) 4666tf_export(v1=['image.resize_nearest_neighbor'])( 4667 dispatch.add_dispatch_support( 4668 resize_nearest_neighbor_deprecation(resize_nearest_neighbor))) 4669 4670 4671@tf_export('image.crop_and_resize', v1=[]) 4672@dispatch.add_dispatch_support 4673def crop_and_resize_v2(image, 4674 boxes, 4675 box_indices, 4676 crop_size, 4677 method='bilinear', 4678 extrapolation_value=0, 4679 name=None): 4680 """Extracts crops from the input image tensor and resizes them. 4681 4682 Extracts crops from the input image tensor and resizes them using bilinear 4683 sampling or nearest neighbor sampling (possibly with aspect ratio change) to a 4684 common output size specified by `crop_size`. This is more general than the 4685 `crop_to_bounding_box` op which extracts a fixed size slice from the input 4686 image and does not allow resizing or aspect ratio change. 4687 4688 Returns a tensor with `crops` from the input `image` at positions defined at 4689 the bounding box locations in `boxes`. The cropped boxes are all resized (with 4690 bilinear or nearest neighbor interpolation) to a fixed 4691 `size = [crop_height, crop_width]`. The result is a 4-D tensor 4692 `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. 4693 In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical 4694 results to using `tf.compat.v1.image.resize_bilinear()` or 4695 `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method` 4696 argument) with 4697 `align_corners=True`. 4698 4699 Args: 4700 image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. 4701 Both `image_height` and `image_width` need to be positive. 4702 boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor 4703 specifies the coordinates of a box in the `box_ind[i]` image and is 4704 specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized 4705 coordinate value of `y` is mapped to the image coordinate at `y * 4706 (image_height - 1)`, so as the `[0, 1]` interval of normalized image 4707 height is mapped to `[0, image_height - 1]` in image height coordinates. 4708 We do allow `y1` > `y2`, in which case the sampled crop is an up-down 4709 flipped version of the original image. The width dimension is treated 4710 similarly. Normalized coordinates outside the `[0, 1]` range are allowed, 4711 in which case we use `extrapolation_value` to extrapolate the input image 4712 values. 4713 box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, 4714 batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box 4715 refers to. 4716 crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. 4717 All cropped image patches are resized to this size. The aspect ratio of 4718 the image content is not preserved. Both `crop_height` and `crop_width` 4719 need to be positive. 4720 method: An optional string specifying the sampling method for resizing. It 4721 can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`. 4722 Currently two sampling methods are supported: Bilinear and Nearest 4723 Neighbor. 4724 extrapolation_value: An optional `float`. Defaults to `0`. Value used for 4725 extrapolation, when applicable. 4726 name: A name for the operation (optional). 4727 4728 Returns: 4729 A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. 4730 4731 Example: 4732 4733 ```python 4734 import tensorflow as tf 4735 BATCH_SIZE = 1 4736 NUM_BOXES = 5 4737 IMAGE_HEIGHT = 256 4738 IMAGE_WIDTH = 256 4739 CHANNELS = 3 4740 CROP_SIZE = (24, 24) 4741 4742 image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, 4743 CHANNELS) ) 4744 boxes = tf.random.uniform(shape=(NUM_BOXES, 4)) 4745 box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, 4746 maxval=BATCH_SIZE, dtype=tf.int32) 4747 output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE) 4748 output.shape #=> (5, 24, 24, 3) 4749 ``` 4750 """ 4751 return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size, 4752 method, extrapolation_value, name) 4753 4754 4755@tf_export(v1=['image.crop_and_resize']) 4756@dispatch.add_dispatch_support 4757@deprecation.deprecated_args(None, 4758 'box_ind is deprecated, use box_indices instead', 4759 'box_ind') 4760def crop_and_resize_v1( # pylint: disable=missing-docstring 4761 image, 4762 boxes, 4763 box_ind=None, 4764 crop_size=None, 4765 method='bilinear', 4766 extrapolation_value=0, 4767 name=None, 4768 box_indices=None): 4769 box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices, 4770 'box_ind', box_ind) 4771 return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, 4772 extrapolation_value, name) 4773 4774 4775crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__ 4776 4777 4778@tf_export(v1=['image.extract_glimpse']) 4779@dispatch.add_dispatch_support 4780def extract_glimpse( 4781 input, # pylint: disable=redefined-builtin 4782 size, 4783 offsets, 4784 centered=True, 4785 normalized=True, 4786 uniform_noise=True, 4787 name=None): 4788 """Extracts a glimpse from the input tensor. 4789 4790 Returns a set of windows called glimpses extracted at location 4791 `offsets` from the input tensor. If the windows only partially 4792 overlaps the inputs, the non-overlapping areas will be filled with 4793 random noise. 4794 4795 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 4796 glimpse_width, channels]`. The channels and batch dimensions are the 4797 same as that of the input tensor. The height and width of the output 4798 windows are specified in the `size` parameter. 4799 4800 The argument `normalized` and `centered` controls how the windows are built: 4801 4802 * If the coordinates are normalized but not centered, 0.0 and 1.0 4803 correspond to the minimum and maximum of each height and width 4804 dimension. 4805 * If the coordinates are both normalized and centered, they range from 4806 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 4807 left corner, the lower right corner is located at (1.0, 1.0) and the 4808 center is at (0, 0). 4809 * If the coordinates are not normalized they are interpreted as 4810 numbers of pixels. 4811 4812 Usage Example: 4813 4814 >>> x = [[[[0.0], 4815 ... [1.0], 4816 ... [2.0]], 4817 ... [[3.0], 4818 ... [4.0], 4819 ... [5.0]], 4820 ... [[6.0], 4821 ... [7.0], 4822 ... [8.0]]]] 4823 >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 4824 ... centered=False, normalized=False) 4825 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 4826 array([[[[0.], 4827 [1.]], 4828 [[3.], 4829 [4.]]]], dtype=float32)> 4830 4831 Args: 4832 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 4833 `[batch_size, height, width, channels]`. 4834 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 4835 size of the glimpses to extract. The glimpse height must be specified 4836 first, following by the glimpse width. 4837 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 4838 `[batch_size, 2]` containing the y, x locations of the center of each 4839 window. 4840 centered: An optional `bool`. Defaults to `True`. indicates if the offset 4841 coordinates are centered relative to the image, in which case the (0, 0) 4842 offset is relative to the center of the input images. If false, the (0,0) 4843 offset corresponds to the upper left corner of the input images. 4844 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 4845 coordinates are normalized. 4846 uniform_noise: An optional `bool`. Defaults to `True`. indicates if the 4847 noise should be generated using a uniform distribution or a Gaussian 4848 distribution. 4849 name: A name for the operation (optional). 4850 4851 Returns: 4852 A `Tensor` of type `float32`. 4853 """ 4854 return gen_image_ops.extract_glimpse( 4855 input=input, 4856 size=size, 4857 offsets=offsets, 4858 centered=centered, 4859 normalized=normalized, 4860 uniform_noise=uniform_noise, 4861 name=name) 4862 4863 4864@tf_export('image.extract_glimpse', v1=[]) 4865@dispatch.add_dispatch_support 4866def extract_glimpse_v2( 4867 input, # pylint: disable=redefined-builtin 4868 size, 4869 offsets, 4870 centered=True, 4871 normalized=True, 4872 noise='uniform', 4873 name=None): 4874 """Extracts a glimpse from the input tensor. 4875 4876 Returns a set of windows called glimpses extracted at location 4877 `offsets` from the input tensor. If the windows only partially 4878 overlaps the inputs, the non-overlapping areas will be filled with 4879 random noise. 4880 4881 The result is a 4-D tensor of shape `[batch_size, glimpse_height, 4882 glimpse_width, channels]`. The channels and batch dimensions are the 4883 same as that of the input tensor. The height and width of the output 4884 windows are specified in the `size` parameter. 4885 4886 The argument `normalized` and `centered` controls how the windows are built: 4887 4888 * If the coordinates are normalized but not centered, 0.0 and 1.0 4889 correspond to the minimum and maximum of each height and width 4890 dimension. 4891 * If the coordinates are both normalized and centered, they range from 4892 -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper 4893 left corner, the lower right corner is located at (1.0, 1.0) and the 4894 center is at (0, 0). 4895 * If the coordinates are not normalized they are interpreted as 4896 numbers of pixels. 4897 4898 Usage Example: 4899 4900 >>> x = [[[[0.0], 4901 ... [1.0], 4902 ... [2.0]], 4903 ... [[3.0], 4904 ... [4.0], 4905 ... [5.0]], 4906 ... [[6.0], 4907 ... [7.0], 4908 ... [8.0]]]] 4909 >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], 4910 ... centered=False, normalized=False) 4911 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 4912 array([[[[4.], 4913 [5.]], 4914 [[7.], 4915 [8.]]]], dtype=float32)> 4916 4917 Args: 4918 input: A `Tensor` of type `float32`. A 4-D float tensor of shape 4919 `[batch_size, height, width, channels]`. 4920 size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the 4921 size of the glimpses to extract. The glimpse height must be specified 4922 first, following by the glimpse width. 4923 offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape 4924 `[batch_size, 2]` containing the y, x locations of the center of each 4925 window. 4926 centered: An optional `bool`. Defaults to `True`. indicates if the offset 4927 coordinates are centered relative to the image, in which case the (0, 0) 4928 offset is relative to the center of the input images. If false, the (0,0) 4929 offset corresponds to the upper left corner of the input images. 4930 normalized: An optional `bool`. Defaults to `True`. indicates if the offset 4931 coordinates are normalized. 4932 noise: An optional `string`. Defaults to `uniform`. indicates if the noise 4933 should be `uniform` (uniform distribution), `gaussian` (gaussian 4934 distribution), or `zero` (zero padding). 4935 name: A name for the operation (optional). 4936 4937 Returns: 4938 A `Tensor` of type `float32`. 4939 """ 4940 return gen_image_ops.extract_glimpse_v2( 4941 input=input, 4942 size=size, 4943 offsets=offsets, 4944 centered=centered, 4945 normalized=normalized, 4946 noise=noise, 4947 uniform_noise=False, 4948 name=name) 4949 4950 4951@tf_export('image.combined_non_max_suppression') 4952@dispatch.add_dispatch_support 4953def combined_non_max_suppression(boxes, 4954 scores, 4955 max_output_size_per_class, 4956 max_total_size, 4957 iou_threshold=0.5, 4958 score_threshold=float('-inf'), 4959 pad_per_class=False, 4960 clip_boxes=True, 4961 name=None): 4962 """Greedily selects a subset of bounding boxes in descending order of score. 4963 4964 This operation performs non_max_suppression on the inputs per batch, across 4965 all classes. 4966 Prunes away boxes that have high intersection-over-union (IOU) overlap 4967 with previously selected boxes. Bounding boxes are supplied as 4968 [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any 4969 diagonal pair of box corners and the coordinates can be provided as normalized 4970 (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm 4971 is agnostic to where the origin is in the coordinate system. Also note that 4972 this algorithm is invariant to orthogonal transformations and translations 4973 of the coordinate system; thus translating or reflections of the coordinate 4974 system result in the same boxes being selected by the algorithm. 4975 The output of this operation is the final boxes, scores and classes tensor 4976 returned after performing non_max_suppression. 4977 4978 Args: 4979 boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` 4980 is 1 then same boxes are used for all classes otherwise, if `q` is equal 4981 to number of classes, class-specific boxes are used. 4982 scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` 4983 representing a single score corresponding to each box (each row of boxes). 4984 max_output_size_per_class: A scalar integer `Tensor` representing the 4985 maximum number of boxes to be selected by non-max suppression per class 4986 max_total_size: A int32 scalar representing maximum number of boxes retained 4987 over all classes. Note that setting this value to a large number may 4988 result in OOM error depending on the system workload. 4989 iou_threshold: A float representing the threshold for deciding whether boxes 4990 overlap too much with respect to IOU. 4991 score_threshold: A float representing the threshold for deciding when to 4992 remove boxes based on score. 4993 pad_per_class: If false, the output nmsed boxes, scores and classes are 4994 padded/clipped to `max_total_size`. If true, the output nmsed boxes, 4995 scores and classes are padded to be of length 4996 `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in 4997 which case it is clipped to `max_total_size`. Defaults to false. 4998 clip_boxes: If true, the coordinates of output nmsed boxes will be clipped 4999 to [0, 1]. If false, output the box coordinates as it is. Defaults to 5000 true. 5001 name: A name for the operation (optional). 5002 5003 Returns: 5004 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor 5005 containing the non-max suppressed boxes. 5006 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing 5007 the scores for the boxes. 5008 'nmsed_classes': A [batch_size, max_detections] float32 tensor 5009 containing the class for boxes. 5010 'valid_detections': A [batch_size] int32 tensor indicating the number of 5011 valid detections per batch item. Only the top valid_detections[i] entries 5012 in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the 5013 entries are zero paddings. 5014 """ 5015 with ops.name_scope(name, 'combined_non_max_suppression'): 5016 iou_threshold = ops.convert_to_tensor( 5017 iou_threshold, dtype=dtypes.float32, name='iou_threshold') 5018 score_threshold = ops.convert_to_tensor( 5019 score_threshold, dtype=dtypes.float32, name='score_threshold') 5020 5021 # Convert `max_total_size` to tensor *without* setting the `dtype` param. 5022 # This allows us to catch `int32` overflow case with `max_total_size` 5023 # whose expected dtype is `int32` by the op registration. Any number within 5024 # `int32` will get converted to `int32` tensor. Anything larger will get 5025 # converted to `int64`. Passing in `int64` for `max_total_size` to the op 5026 # will throw dtype mismatch exception. 5027 # TODO(b/173251596): Once there is a more general solution to warn against 5028 # int overflow conversions, revisit this check. 5029 max_total_size = ops.convert_to_tensor(max_total_size) 5030 5031 return gen_image_ops.combined_non_max_suppression( 5032 boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, 5033 score_threshold, pad_per_class, clip_boxes) 5034 5035 5036def _bbox_overlap(boxes_a, boxes_b): 5037 """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b. 5038 5039 Args: 5040 boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of 5041 boxes per image. The last dimension is the pixel coordinates in 5042 [ymin, xmin, ymax, xmax] form. 5043 boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of 5044 boxes. The last dimension is the pixel coordinates in 5045 [ymin, xmin, ymax, xmax] form. 5046 Returns: 5047 intersection_over_union: a tensor with as a shape of [batch_size, N, M], 5048 representing the ratio of intersection area over union area (IoU) between 5049 two boxes 5050 """ 5051 with ops.name_scope('bbox_overlap'): 5052 a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split( 5053 value=boxes_a, num_or_size_splits=4, axis=2) 5054 b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split( 5055 value=boxes_b, num_or_size_splits=4, axis=2) 5056 5057 # Calculates the intersection area. 5058 i_xmin = math_ops.maximum( 5059 a_x_min, array_ops.transpose(b_x_min, [0, 2, 1])) 5060 i_xmax = math_ops.minimum( 5061 a_x_max, array_ops.transpose(b_x_max, [0, 2, 1])) 5062 i_ymin = math_ops.maximum( 5063 a_y_min, array_ops.transpose(b_y_min, [0, 2, 1])) 5064 i_ymax = math_ops.minimum( 5065 a_y_max, array_ops.transpose(b_y_max, [0, 2, 1])) 5066 i_area = math_ops.maximum( 5067 (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0) 5068 5069 # Calculates the union area. 5070 a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min) 5071 b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min) 5072 EPSILON = 1e-8 5073 # Adds a small epsilon to avoid divide-by-zero. 5074 u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON 5075 5076 # Calculates IoU. 5077 intersection_over_union = i_area / u_area 5078 5079 return intersection_over_union 5080 5081 5082def _self_suppression(iou, _, iou_sum, iou_threshold): 5083 """Suppress boxes in the same tile. 5084 5085 Compute boxes that cannot be suppressed by others (i.e., 5086 can_suppress_others), and then use them to suppress boxes in the same tile. 5087 5088 Args: 5089 iou: a tensor of shape [batch_size, num_boxes_with_padding] representing 5090 intersection over union. 5091 iou_sum: a scalar tensor. 5092 iou_threshold: a scalar tensor. 5093 5094 Returns: 5095 iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding]. 5096 iou_diff: a scalar tensor representing whether any box is supressed in 5097 this step. 5098 iou_sum_new: a scalar tensor of shape [batch_size] that represents 5099 the iou sum after suppression. 5100 iou_threshold: a scalar tensor. 5101 """ 5102 batch_size = array_ops.shape(iou)[0] 5103 can_suppress_others = math_ops.cast( 5104 array_ops.reshape( 5105 math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]), 5106 iou.dtype) 5107 iou_after_suppression = array_ops.reshape( 5108 math_ops.cast( 5109 math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold, 5110 iou.dtype), 5111 [batch_size, -1, 1]) * iou 5112 iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2]) 5113 return [ 5114 iou_after_suppression, 5115 math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new, 5116 iou_threshold 5117 ] 5118 5119 5120def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size): 5121 """Suppress boxes between different tiles. 5122 5123 Args: 5124 boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] 5125 box_slice: a tensor of shape [batch_size, tile_size, 4] 5126 iou_threshold: a scalar tensor 5127 inner_idx: a scalar tensor representing the tile index of the tile 5128 that is used to supress box_slice 5129 tile_size: an integer representing the number of boxes in a tile 5130 5131 Returns: 5132 boxes: unchanged boxes as input 5133 box_slice_after_suppression: box_slice after suppression 5134 iou_threshold: unchanged 5135 """ 5136 batch_size = array_ops.shape(boxes)[0] 5137 new_slice = array_ops.slice( 5138 boxes, [0, inner_idx * tile_size, 0], 5139 [batch_size, tile_size, 4]) 5140 iou = _bbox_overlap(new_slice, box_slice) 5141 box_slice_after_suppression = array_ops.expand_dims( 5142 math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]), 5143 box_slice.dtype), 5144 2) * box_slice 5145 return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1 5146 5147 5148def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size): 5149 """Process boxes in the range [idx*tile_size, (idx+1)*tile_size). 5150 5151 Args: 5152 boxes: a tensor with a shape of [batch_size, anchors, 4]. 5153 iou_threshold: a float representing the threshold for deciding whether boxes 5154 overlap too much with respect to IOU. 5155 output_size: an int32 tensor of size [batch_size]. Representing the number 5156 of selected boxes for each batch. 5157 idx: an integer scalar representing induction variable. 5158 tile_size: an integer representing the number of boxes in a tile 5159 5160 Returns: 5161 boxes: updated boxes. 5162 iou_threshold: pass down iou_threshold to the next iteration. 5163 output_size: the updated output_size. 5164 idx: the updated induction variable. 5165 """ 5166 with ops.name_scope('suppression_loop_body'): 5167 num_tiles = array_ops.shape(boxes)[1] // tile_size 5168 batch_size = array_ops.shape(boxes)[0] 5169 5170 def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx): 5171 return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, 5172 tile_size) 5173 5174 # Iterates over tiles that can possibly suppress the current tile. 5175 box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0], 5176 [batch_size, tile_size, 4]) 5177 _, box_slice, _, _ = control_flow_ops.while_loop( 5178 lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, 5179 cross_suppression_func, 5180 [boxes, box_slice, iou_threshold, constant_op.constant(0)]) 5181 5182 # Iterates over the current tile to compute self-suppression. 5183 iou = _bbox_overlap(box_slice, box_slice) 5184 mask = array_ops.expand_dims( 5185 array_ops.reshape( 5186 math_ops.range(tile_size), [1, -1]) > array_ops.reshape( 5187 math_ops.range(tile_size), [-1, 1]), 0) 5188 iou *= math_ops.cast( 5189 math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype) 5190 suppressed_iou, _, _, _ = control_flow_ops.while_loop( 5191 lambda _iou, loop_condition, _iou_sum, _: loop_condition, 5192 _self_suppression, 5193 [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]), 5194 iou_threshold]) 5195 suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0 5196 box_slice *= array_ops.expand_dims( 5197 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2) 5198 5199 # Uses box_slice to update the input boxes. 5200 mask = array_ops.reshape( 5201 math_ops.cast( 5202 math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype), 5203 [1, -1, 1, 1]) 5204 boxes = array_ops.tile(array_ops.expand_dims( 5205 box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape( 5206 boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask) 5207 boxes = array_ops.reshape(boxes, [batch_size, -1, 4]) 5208 5209 # Updates output_size. 5210 output_size += math_ops.reduce_sum( 5211 math_ops.cast( 5212 math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1]) 5213 return boxes, iou_threshold, output_size, idx + 1 5214 5215 5216@tf_export('image.non_max_suppression_padded') 5217@dispatch.add_dispatch_support 5218def non_max_suppression_padded(boxes, 5219 scores, 5220 max_output_size, 5221 iou_threshold=0.5, 5222 score_threshold=float('-inf'), 5223 pad_to_max_output_size=False, 5224 name=None, 5225 sorted_input=False, 5226 canonicalized_coordinates=False, 5227 tile_size=512): 5228 """Greedily selects a subset of bounding boxes in descending order of score. 5229 5230 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5231 with the addition of an optional parameter which zero-pads the output to 5232 be of size `max_output_size`. 5233 The output of this operation is a tuple containing the set of integers 5234 indexing into the input collection of bounding boxes representing the selected 5235 boxes and the number of valid indices in the index set. The bounding box 5236 coordinates corresponding to the selected indices can then be obtained using 5237 the `tf.slice` and `tf.gather` operations. For example: 5238 ```python 5239 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5240 boxes, scores, max_output_size, iou_threshold, 5241 score_threshold, pad_to_max_output_size=True) 5242 selected_indices = tf.slice( 5243 selected_indices_padded, tf.constant([0]), num_valid) 5244 selected_boxes = tf.gather(boxes, selected_indices) 5245 ``` 5246 5247 Args: 5248 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5249 Dimensions except the last two are batch dimensions. 5250 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5251 max_output_size: a scalar integer `Tensor` representing the maximum number 5252 of boxes to be selected by non max suppression. Note that setting this 5253 value to a large number may result in OOM error depending on the system 5254 workload. 5255 iou_threshold: a float representing the threshold for deciding whether boxes 5256 overlap too much with respect to IoU (intersection over union). 5257 score_threshold: a float representing the threshold for box scores. Boxes 5258 with a score that is not larger than this threshold will be suppressed. 5259 pad_to_max_output_size: whether to pad the output idx to max_output_size. 5260 Must be set to True when the input is a batch of images. 5261 name: name of operation. 5262 sorted_input: a boolean indicating whether the input boxes and scores 5263 are sorted in descending order by the score. 5264 canonicalized_coordinates: if box coordinates are given as 5265 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5266 computation to canonicalize box coordinates. 5267 tile_size: an integer representing the number of boxes in a tile, i.e., 5268 the maximum number of boxes per image that can be used to suppress other 5269 boxes in parallel; larger tile_size means larger parallelism and 5270 potentially more redundant work. 5271 Returns: 5272 idx: a tensor with a shape of [..., num_boxes] representing the 5273 indices selected by non-max suppression. The leading dimensions 5274 are the batch dimensions of the input boxes. All numbers are within 5275 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5276 indices (i.e., idx[i][:num_valid[i]]) are valid. 5277 num_valid: a tensor of rank 0 or higher with a shape of [...] 5278 representing the number of valid indices in idx. Its dimensions are the 5279 batch dimensions of the input boxes. 5280 Raises: 5281 ValueError: When set pad_to_max_output_size to False for batched input. 5282 """ 5283 # if no new arguments are used and no later than 2020/6/23, use the old 5284 # version to give us time to fix TFLite conversion after the TF 2.3 release. 5285 if (not sorted_input) and \ 5286 (not canonicalized_coordinates) and \ 5287 tile_size == 512 and not compat.forward_compatible(2020, 6, 23): 5288 return non_max_suppression_padded_v1( 5289 boxes, scores, max_output_size, iou_threshold, score_threshold, 5290 pad_to_max_output_size, name) 5291 else: 5292 with ops.name_scope(name, 'non_max_suppression_padded'): 5293 if not pad_to_max_output_size: 5294 # pad_to_max_output_size may be set to False only when the shape of 5295 # boxes is [num_boxes, 4], i.e., a single image. We make best effort to 5296 # detect violations at compile time. If `boxes` does not have a static 5297 # rank, the check allows computation to proceed. 5298 if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2: 5299 raise ValueError( 5300 "'pad_to_max_output_size' (value {}) must be True for " 5301 'batched input'.format(pad_to_max_output_size)) 5302 if name is None: 5303 name = '' 5304 idx, num_valid = non_max_suppression_padded_v2( 5305 boxes, scores, max_output_size, iou_threshold, score_threshold, 5306 sorted_input, canonicalized_coordinates, tile_size) 5307 # def_function.function seems to lose shape information, so set it here. 5308 if not pad_to_max_output_size: 5309 idx = idx[0, :num_valid] 5310 else: 5311 batch_dims = array_ops.concat([ 5312 array_ops.shape(boxes)[:-2], 5313 array_ops.expand_dims(max_output_size, 0) 5314 ], 0) 5315 idx = array_ops.reshape(idx, batch_dims) 5316 return idx, num_valid 5317 5318 5319# TODO(b/158709815): Improve performance regression due to 5320# def_function.function. 5321@def_function.function( 5322 experimental_implements='non_max_suppression_padded_v2') 5323def non_max_suppression_padded_v2(boxes, 5324 scores, 5325 max_output_size, 5326 iou_threshold=0.5, 5327 score_threshold=float('-inf'), 5328 sorted_input=False, 5329 canonicalized_coordinates=False, 5330 tile_size=512): 5331 """Non-maximum suppression. 5332 5333 Prunes away boxes that have high intersection-over-union (IOU) overlap 5334 with previously selected boxes. Bounding boxes are supplied as 5335 `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any 5336 diagonal pair of box corners and the coordinates can be provided as normalized 5337 (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box 5338 coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`, 5339 where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower 5340 left and upper right corner. User may indiciate the input box coordinates are 5341 already canonicalized to eliminate redundant work by setting 5342 canonicalized_coordinates to `True`. Note that this algorithm is agnostic to 5343 where the origin is in the coordinate system. Note that this algorithm is 5344 invariant to orthogonal transformations and translations of the coordinate 5345 system; thus translating or reflections of the coordinate system result in the 5346 same boxes being selected by the algorithm. 5347 5348 Similar to tf.image.non_max_suppression, non_max_suppression_padded 5349 implements hard NMS but can operate on a batch of images and improves 5350 performance by titling the bounding boxes. Non_max_suppression_padded should 5351 be preferred over tf.image_non_max_suppression when running on devices with 5352 abundant parallelsim for higher computation speed. For soft NMS, refer to 5353 tf.image.non_max_suppression_with_scores. 5354 5355 While a serial NMS algorithm iteratively uses the highest-scored unprocessed 5356 box to suppress boxes, this algorithm uses many boxes to suppress other boxes 5357 in parallel. The key idea is to partition boxes into tiles based on their 5358 score and suppresses boxes tile by tile, thus achieving parallelism within a 5359 tile. The tile size determines the degree of parallelism. 5360 5361 In cross suppression (using boxes of tile A to suppress boxes of tile B), 5362 all boxes in A can independently suppress boxes in B. 5363 5364 Self suppression (suppressing boxes of the same tile) needs to be iteratively 5365 applied until there's no more suppression. In each iteration, boxes that 5366 cannot be suppressed are used to suppress boxes in the same tile. 5367 5368 boxes = boxes.pad_to_multiply_of(tile_size) 5369 num_tiles = len(boxes) // tile_size 5370 output_boxes = [] 5371 for i in range(num_tiles): 5372 box_tile = boxes[i*tile_size : (i+1)*tile_size] 5373 for j in range(i - 1): 5374 # in parallel suppress boxes in box_tile using boxes from suppressing_tile 5375 suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] 5376 iou = _bbox_overlap(box_tile, suppressing_tile) 5377 # if the box is suppressed in iou, clear it to a dot 5378 box_tile *= _update_boxes(iou) 5379 # Iteratively handle the diagnal tile. 5380 iou = _box_overlap(box_tile, box_tile) 5381 iou_changed = True 5382 while iou_changed: 5383 # boxes that are not suppressed by anything else 5384 suppressing_boxes = _get_suppressing_boxes(iou) 5385 # boxes that are suppressed by suppressing_boxes 5386 suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) 5387 # clear iou to 0 for boxes that are suppressed, as they cannot be used 5388 # to suppress other boxes any more 5389 new_iou = _clear_iou(iou, suppressed_boxes) 5390 iou_changed = (new_iou != iou) 5391 iou = new_iou 5392 # remaining boxes that can still suppress others, are selected boxes. 5393 output_boxes.append(_get_suppressing_boxes(iou)) 5394 if len(output_boxes) >= max_output_size: 5395 break 5396 5397 Args: 5398 boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. 5399 Dimensions except the last two are batch dimensions. The last dimension 5400 represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates 5401 on each dimension can be given in any order 5402 (see also `canonicalized_coordinates`) but must describe a box with 5403 a positive area. 5404 scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. 5405 max_output_size: a scalar integer `Tensor` representing the maximum number 5406 of boxes to be selected by non max suppression. 5407 iou_threshold: a float representing the threshold for deciding whether boxes 5408 overlap too much with respect to IoU (intersection over union). 5409 score_threshold: a float representing the threshold for box scores. Boxes 5410 with a score that is not larger than this threshold will be suppressed. 5411 sorted_input: a boolean indicating whether the input boxes and scores 5412 are sorted in descending order by the score. 5413 canonicalized_coordinates: if box coordinates are given as 5414 `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant 5415 computation to canonicalize box coordinates. 5416 tile_size: an integer representing the number of boxes in a tile, i.e., 5417 the maximum number of boxes per image that can be used to suppress other 5418 boxes in parallel; larger tile_size means larger parallelism and 5419 potentially more redundant work. 5420 Returns: 5421 idx: a tensor with a shape of [..., num_boxes] representing the 5422 indices selected by non-max suppression. The leading dimensions 5423 are the batch dimensions of the input boxes. All numbers are within 5424 [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] 5425 indices (i.e., idx[i][:num_valid[i]]) are valid. 5426 num_valid: a tensor of rank 0 or higher with a shape of [...] 5427 representing the number of valid indices in idx. Its dimensions are the 5428 batch dimensions of the input boxes. 5429 Raises: 5430 ValueError: When set pad_to_max_output_size to False for batched input. 5431 """ 5432 def _sort_scores_and_boxes(scores, boxes): 5433 """Sort boxes based their score from highest to lowest. 5434 5435 Args: 5436 scores: a tensor with a shape of [batch_size, num_boxes] representing 5437 the scores of boxes. 5438 boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing 5439 the boxes. 5440 Returns: 5441 sorted_scores: a tensor with a shape of [batch_size, num_boxes] 5442 representing the sorted scores. 5443 sorted_boxes: a tensor representing the sorted boxes. 5444 sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes] 5445 representing the index of the scores in a sorted descending order. 5446 """ 5447 with ops.name_scope('sort_scores_and_boxes'): 5448 batch_size = array_ops.shape(boxes)[0] 5449 num_boxes = array_ops.shape(boxes)[1] 5450 sorted_scores_indices = sort_ops.argsort( 5451 scores, axis=1, direction='DESCENDING') 5452 index_offsets = math_ops.range(batch_size) * num_boxes 5453 indices = array_ops.reshape( 5454 sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1]) 5455 sorted_scores = array_ops.reshape( 5456 array_ops.gather(array_ops.reshape(scores, [-1]), indices), 5457 [batch_size, -1]) 5458 sorted_boxes = array_ops.reshape( 5459 array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices), 5460 [batch_size, -1, 4]) 5461 return sorted_scores, sorted_boxes, sorted_scores_indices 5462 5463 batch_dims = array_ops.shape(boxes)[:-2] 5464 num_boxes = array_ops.shape(boxes)[-2] 5465 boxes = array_ops.reshape(boxes, [-1, num_boxes, 4]) 5466 scores = array_ops.reshape(scores, [-1, num_boxes]) 5467 batch_size = array_ops.shape(boxes)[0] 5468 if score_threshold != float('-inf'): 5469 with ops.name_scope('filter_by_score'): 5470 score_mask = math_ops.cast(scores > score_threshold, scores.dtype) 5471 scores *= score_mask 5472 box_mask = array_ops.expand_dims( 5473 math_ops.cast(score_mask, boxes.dtype), 2) 5474 boxes *= box_mask 5475 5476 if not canonicalized_coordinates: 5477 with ops.name_scope('canonicalize_coordinates'): 5478 y_1, x_1, y_2, x_2 = array_ops.split( 5479 value=boxes, num_or_size_splits=4, axis=2) 5480 y_1_is_min = math_ops.reduce_all( 5481 math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0])) 5482 y_min, y_max = control_flow_ops.cond( 5483 y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1)) 5484 x_1_is_min = math_ops.reduce_all( 5485 math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0])) 5486 x_min, x_max = control_flow_ops.cond( 5487 x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1)) 5488 boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2) 5489 5490 if not sorted_input: 5491 scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes) 5492 else: 5493 # Default value required for Autograph. 5494 sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32) 5495 5496 pad = math_ops.cast( 5497 math_ops.ceil( 5498 math_ops.cast( 5499 math_ops.maximum(num_boxes, max_output_size), dtypes.float32) / 5500 math_ops.cast(tile_size, dtypes.float32)), 5501 dtypes.int32) * tile_size - num_boxes 5502 boxes = array_ops.pad( 5503 math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]]) 5504 scores = array_ops.pad( 5505 math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]]) 5506 num_boxes_after_padding = num_boxes + pad 5507 num_iterations = num_boxes_after_padding // tile_size 5508 def _loop_cond(unused_boxes, unused_threshold, output_size, idx): 5509 return math_ops.logical_and( 5510 math_ops.reduce_min(output_size) < max_output_size, 5511 idx < num_iterations) 5512 5513 def suppression_loop_body(boxes, iou_threshold, output_size, idx): 5514 return _suppression_loop_body( 5515 boxes, iou_threshold, output_size, idx, tile_size) 5516 5517 selected_boxes, _, output_size, _ = control_flow_ops.while_loop( 5518 _loop_cond, 5519 suppression_loop_body, 5520 [ 5521 boxes, iou_threshold, 5522 array_ops.zeros([batch_size], dtypes.int32), 5523 constant_op.constant(0) 5524 ], 5525 shape_invariants=[ 5526 tensor_shape.TensorShape([None, None, 4]), 5527 tensor_shape.TensorShape([]), 5528 tensor_shape.TensorShape([None]), 5529 tensor_shape.TensorShape([]), 5530 ], 5531 ) 5532 num_valid = math_ops.minimum(output_size, max_output_size) 5533 idx = num_boxes_after_padding - math_ops.cast( 5534 nn_ops.top_k( 5535 math_ops.cast(math_ops.reduce_any( 5536 selected_boxes > 0, [2]), dtypes.int32) * 5537 array_ops.expand_dims( 5538 math_ops.range(num_boxes_after_padding, 0, -1), 0), 5539 max_output_size)[0], dtypes.int32) 5540 idx = math_ops.minimum(idx, num_boxes - 1) 5541 5542 if not sorted_input: 5543 index_offsets = math_ops.range(batch_size) * num_boxes 5544 gather_idx = array_ops.reshape( 5545 idx + array_ops.expand_dims(index_offsets, 1), [-1]) 5546 idx = array_ops.reshape( 5547 array_ops.gather(array_ops.reshape(sorted_indices, [-1]), 5548 gather_idx), 5549 [batch_size, -1]) 5550 invalid_index = array_ops.fill([batch_size, max_output_size], 0) 5551 idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) 5552 num_valid_expanded = array_ops.expand_dims(num_valid, 1) 5553 idx = array_ops.where(idx_index < num_valid_expanded, 5554 idx, invalid_index) 5555 5556 num_valid = array_ops.reshape(num_valid, batch_dims) 5557 return idx, num_valid 5558 5559 5560def non_max_suppression_padded_v1(boxes, 5561 scores, 5562 max_output_size, 5563 iou_threshold=0.5, 5564 score_threshold=float('-inf'), 5565 pad_to_max_output_size=False, 5566 name=None): 5567 """Greedily selects a subset of bounding boxes in descending order of score. 5568 5569 Performs algorithmically equivalent operation to tf.image.non_max_suppression, 5570 with the addition of an optional parameter which zero-pads the output to 5571 be of size `max_output_size`. 5572 The output of this operation is a tuple containing the set of integers 5573 indexing into the input collection of bounding boxes representing the selected 5574 boxes and the number of valid indices in the index set. The bounding box 5575 coordinates corresponding to the selected indices can then be obtained using 5576 the `tf.slice` and `tf.gather` operations. For example: 5577 ```python 5578 selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( 5579 boxes, scores, max_output_size, iou_threshold, 5580 score_threshold, pad_to_max_output_size=True) 5581 selected_indices = tf.slice( 5582 selected_indices_padded, tf.constant([0]), num_valid) 5583 selected_boxes = tf.gather(boxes, selected_indices) 5584 ``` 5585 5586 Args: 5587 boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. 5588 scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single 5589 score corresponding to each box (each row of boxes). 5590 max_output_size: A scalar integer `Tensor` representing the maximum number 5591 of boxes to be selected by non-max suppression. 5592 iou_threshold: A float representing the threshold for deciding whether boxes 5593 overlap too much with respect to IOU. 5594 score_threshold: A float representing the threshold for deciding when to 5595 remove boxes based on score. 5596 pad_to_max_output_size: bool. If True, size of `selected_indices` output is 5597 padded to `max_output_size`. 5598 name: A name for the operation (optional). 5599 5600 Returns: 5601 selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the 5602 selected indices from the boxes tensor, where `M <= max_output_size`. 5603 valid_outputs: A scalar integer `Tensor` denoting how many elements in 5604 `selected_indices` are valid. Valid elements occur first, then padding. 5605 """ 5606 with ops.name_scope(name, 'non_max_suppression_padded'): 5607 iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') 5608 score_threshold = ops.convert_to_tensor( 5609 score_threshold, name='score_threshold') 5610 return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size, 5611 iou_threshold, score_threshold, 5612 pad_to_max_output_size) 5613 5614 5615@tf_export('image.draw_bounding_boxes', v1=[]) 5616@dispatch.add_dispatch_support 5617def draw_bounding_boxes_v2(images, boxes, colors, name=None): 5618 """Draw bounding boxes on a batch of images. 5619 5620 Outputs a copy of `images` but draws on top of the pixels zero or more 5621 bounding boxes specified by the locations in `boxes`. The coordinates of the 5622 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5623 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5624 and the height of the underlying image. 5625 5626 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5627 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5628 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5629 5630 Parts of the bounding box may fall outside the image. 5631 5632 Args: 5633 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5634 4-D with shape `[batch, height, width, depth]`. A batch of images. 5635 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5636 num_bounding_boxes, 4]` containing bounding boxes. 5637 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5638 through for the boxes. 5639 name: A name for the operation (optional). 5640 5641 Returns: 5642 A `Tensor`. Has the same type as `images`. 5643 5644 Usage Example: 5645 5646 >>> # create an empty image 5647 >>> img = tf.zeros([1, 3, 3, 3]) 5648 >>> # draw a box around the image 5649 >>> box = np.array([0, 0, 1, 1]) 5650 >>> boxes = box.reshape([1, 1, 4]) 5651 >>> # alternate between red and blue 5652 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5653 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5654 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5655 array([[[[1., 0., 0.], 5656 [1., 0., 0.], 5657 [1., 0., 0.]], 5658 [[1., 0., 0.], 5659 [0., 0., 0.], 5660 [1., 0., 0.]], 5661 [[1., 0., 0.], 5662 [1., 0., 0.], 5663 [1., 0., 0.]]]], dtype=float32)> 5664 """ 5665 if colors is None: 5666 return gen_image_ops.draw_bounding_boxes(images, boxes, name) 5667 return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name) 5668 5669 5670@tf_export(v1=['image.draw_bounding_boxes']) 5671@dispatch.add_dispatch_support 5672def draw_bounding_boxes(images, boxes, name=None, colors=None): 5673 """Draw bounding boxes on a batch of images. 5674 5675 Outputs a copy of `images` but draws on top of the pixels zero or more 5676 bounding boxes specified by the locations in `boxes`. The coordinates of the 5677 each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. 5678 The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width 5679 and the height of the underlying image. 5680 5681 For example, if an image is 100 x 200 pixels (height x width) and the bounding 5682 box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of 5683 the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). 5684 5685 Parts of the bounding box may fall outside the image. 5686 5687 Args: 5688 images: A `Tensor`. Must be one of the following types: `float32`, `half`. 5689 4-D with shape `[batch, height, width, depth]`. A batch of images. 5690 boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, 5691 num_bounding_boxes, 4]` containing bounding boxes. 5692 name: A name for the operation (optional). 5693 colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle 5694 through for the boxes. 5695 5696 Returns: 5697 A `Tensor`. Has the same type as `images`. 5698 5699 Usage Example: 5700 5701 >>> # create an empty image 5702 >>> img = tf.zeros([1, 3, 3, 3]) 5703 >>> # draw a box around the image 5704 >>> box = np.array([0, 0, 1, 1]) 5705 >>> boxes = box.reshape([1, 1, 4]) 5706 >>> # alternate between red and blue 5707 >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) 5708 >>> tf.image.draw_bounding_boxes(img, boxes, colors) 5709 <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= 5710 array([[[[1., 0., 0.], 5711 [1., 0., 0.], 5712 [1., 0., 0.]], 5713 [[1., 0., 0.], 5714 [0., 0., 0.], 5715 [1., 0., 0.]], 5716 [[1., 0., 0.], 5717 [1., 0., 0.], 5718 [1., 0., 0.]]]], dtype=float32)> 5719 """ 5720 return draw_bounding_boxes_v2(images, boxes, colors, name) 5721 5722 5723@tf_export('image.generate_bounding_box_proposals') 5724@dispatch.add_dispatch_support 5725def generate_bounding_box_proposals(scores, 5726 bbox_deltas, 5727 image_info, 5728 anchors, 5729 nms_threshold=0.7, 5730 pre_nms_topn=6000, 5731 min_size=16, 5732 post_nms_topn=300, 5733 name=None): 5734 """Generate bounding box proposals from encoded bounding boxes. 5735 5736 Args: 5737 scores: A 4-D float `Tensor` of shape 5738 `[num_images, height, width, num_achors]` containing scores of 5739 the boxes for given anchors, can be unsorted. 5740 bbox_deltas: A 4-D float `Tensor` of shape 5741 `[num_images, height, width, 4 x num_anchors]` encoding boxes 5742 with respect to each anchor. Coordinates are given 5743 in the form `[dy, dx, dh, dw]`. 5744 image_info: A 2-D float `Tensor` of shape `[num_images, 5]` 5745 containing image information Height, Width, Scale. 5746 anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]` 5747 describing the anchor boxes. 5748 Boxes are formatted in the form `[y1, x1, y2, x2]`. 5749 nms_threshold: A scalar float `Tensor` for non-maximal-suppression 5750 threshold. Defaults to 0.7. 5751 pre_nms_topn: A scalar int `Tensor` for the number of 5752 top scoring boxes to be used as input. Defaults to 6000. 5753 min_size: A scalar float `Tensor`. Any box that has a smaller size 5754 than min_size will be discarded. Defaults to 16. 5755 post_nms_topn: An integer. Maximum number of rois in the output. 5756 name: A name for this operation (optional). 5757 5758 Returns: 5759 rois: Region of interest boxes sorted by their scores. 5760 roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`. 5761 """ 5762 return gen_image_ops.generate_bounding_box_proposals( 5763 scores=scores, 5764 bbox_deltas=bbox_deltas, 5765 image_info=image_info, 5766 anchors=anchors, 5767 nms_threshold=nms_threshold, 5768 pre_nms_topn=pre_nms_topn, 5769 min_size=min_size, 5770 post_nms_topn=post_nms_topn, 5771 name=name) 5772