1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Implementation of image ops."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import functools
22import numpy as np
23
24from tensorflow.python.compat import compat
25from tensorflow.python.eager import def_function
26from tensorflow.python.framework import constant_op
27from tensorflow.python.framework import dtypes
28from tensorflow.python.framework import ops
29from tensorflow.python.framework import random_seed
30from tensorflow.python.framework import tensor_shape
31from tensorflow.python.framework import tensor_util
32from tensorflow.python.ops import array_ops
33from tensorflow.python.ops import check_ops
34from tensorflow.python.ops import control_flow_ops
35from tensorflow.python.ops import gen_image_ops
36from tensorflow.python.ops import math_ops
37from tensorflow.python.ops import nn
38from tensorflow.python.ops import nn_ops
39from tensorflow.python.ops import random_ops
40from tensorflow.python.ops import sort_ops
41from tensorflow.python.ops import stateless_random_ops
42from tensorflow.python.ops import string_ops
43from tensorflow.python.ops import variables
44from tensorflow.python.util import deprecation
45from tensorflow.python.util import dispatch
46from tensorflow.python.util.tf_export import tf_export
47
48ops.NotDifferentiable('RandomCrop')
49# TODO(b/31222613): This op may be differentiable, and there may be
50# latent bugs here.
51ops.NotDifferentiable('HSVToRGB')
52ops.NotDifferentiable('DrawBoundingBoxes')
53ops.NotDifferentiable('SampleDistortedBoundingBox')
54ops.NotDifferentiable('SampleDistortedBoundingBoxV2')
55# TODO(bsteiner): Implement the gradient function for extract_glimpse
56# TODO(b/31222613): This op may be differentiable, and there may be
57# latent bugs here.
58ops.NotDifferentiable('ExtractGlimpse')
59ops.NotDifferentiable('NonMaxSuppression')
60ops.NotDifferentiable('NonMaxSuppressionV2')
61ops.NotDifferentiable('NonMaxSuppressionWithOverlaps')
62ops.NotDifferentiable('GenerateBoundingBoxProposals')
63
64
65# pylint: disable=invalid-name
66def _assert(cond, ex_type, msg):
67  """A polymorphic assert, works with tensors and boolean expressions.
68
69  If `cond` is not a tensor, behave like an ordinary assert statement, except
70  that a empty list is returned. If `cond` is a tensor, return a list
71  containing a single TensorFlow assert op.
72
73  Args:
74    cond: Something evaluates to a boolean value. May be a tensor.
75    ex_type: The exception class to use.
76    msg: The error message.
77
78  Returns:
79    A list, containing at most one assert op.
80  """
81  if _is_tensor(cond):
82    return [control_flow_ops.Assert(cond, [msg])]
83  else:
84    if not cond:
85      raise ex_type(msg)
86    else:
87      return []
88
89
90def _is_tensor(x):
91  """Returns `True` if `x` is a symbolic tensor-like object.
92
93  Args:
94    x: A python object to check.
95
96  Returns:
97    `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
98  """
99  return isinstance(x, (ops.Tensor, variables.Variable))
100
101
102def _ImageDimensions(image, rank):
103  """Returns the dimensions of an image tensor.
104
105  Args:
106    image: A rank-D Tensor. For 3-D  of shape: `[height, width, channels]`.
107    rank: The expected rank of the image
108
109  Returns:
110    A list of corresponding to the dimensions of the
111    input image.  Dimensions that are statically known are python integers,
112    otherwise, they are integer scalar tensors.
113  """
114  if image.get_shape().is_fully_defined():
115    return image.get_shape().as_list()
116  else:
117    static_shape = image.get_shape().with_rank(rank).as_list()
118    dynamic_shape = array_ops.unstack(array_ops.shape(image), rank)
119    return [
120        s if s is not None else d for s, d in zip(static_shape, dynamic_shape)
121    ]
122
123
124def _Check3DImage(image, require_static=True):
125  """Assert that we are working with a properly shaped image.
126
127  Args:
128    image: 3-D Tensor of shape [height, width, channels]
129    require_static: If `True`, requires that all dimensions of `image` are known
130      and non-zero.
131
132  Raises:
133    ValueError: if `image.shape` is not a 3-vector.
134
135  Returns:
136    An empty list, if `image` has fully defined dimensions. Otherwise, a list
137    containing an assert op is returned.
138  """
139  try:
140    image_shape = image.get_shape().with_rank(3)
141  except ValueError:
142    raise ValueError("'image' (shape %s) must be three-dimensional." %
143                     image.shape)
144  if require_static and not image_shape.is_fully_defined():
145    raise ValueError("'image' (shape %s) must be fully defined." % image_shape)
146  if any(x == 0 for x in image_shape):
147    raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape)
148  if not image_shape.is_fully_defined():
149    return [
150        check_ops.assert_positive(
151            array_ops.shape(image),
152            ["all dims of 'image.shape' "
153             'must be > 0.'])
154    ]
155  else:
156    return []
157
158
159def _Assert3DImage(image):
160  """Assert that we are working with a properly shaped image.
161
162  Performs the check statically if possible (i.e. if the shape
163  is statically known). Otherwise adds a control dependency
164  to an assert op that checks the dynamic shape.
165
166  Args:
167    image: 3-D Tensor of shape [height, width, channels]
168
169  Raises:
170    ValueError: if `image.shape` is not a 3-vector.
171
172  Returns:
173    If the shape of `image` could be verified statically, `image` is
174    returned unchanged, otherwise there will be a control dependency
175    added that asserts the correct dynamic shape.
176  """
177  return control_flow_ops.with_dependencies(
178      _Check3DImage(image, require_static=False), image)
179
180
181def _AssertAtLeast3DImage(image):
182  """Assert that we are working with a properly shaped image.
183
184  Performs the check statically if possible (i.e. if the shape
185  is statically known). Otherwise adds a control dependency
186  to an assert op that checks the dynamic shape.
187
188  Args:
189    image: >= 3-D Tensor of size [*, height, width, depth]
190
191  Raises:
192    ValueError: if image.shape is not a [>= 3] vector.
193
194  Returns:
195    If the shape of `image` could be verified statically, `image` is
196    returned unchanged, otherwise there will be a control dependency
197    added that asserts the correct dynamic shape.
198  """
199  return control_flow_ops.with_dependencies(
200      _CheckAtLeast3DImage(image, require_static=False), image)
201
202
203def _CheckAtLeast3DImage(image, require_static=True):
204  """Assert that we are working with a properly shaped image.
205
206  Args:
207    image: >= 3-D Tensor of size [*, height, width, depth]
208    require_static: If `True`, requires that all dimensions of `image` are known
209      and non-zero.
210
211  Raises:
212    ValueError: if image.shape is not a [>= 3] vector.
213
214  Returns:
215    An empty list, if `image` has fully defined dimensions. Otherwise, a list
216    containing an assert op is returned.
217  """
218  try:
219    if image.get_shape().ndims is None:
220      image_shape = image.get_shape().with_rank(3)
221    else:
222      image_shape = image.get_shape().with_rank_at_least(3)
223  except ValueError:
224    raise ValueError("'image' (shape %s) must be at least three-dimensional." %
225                     image.shape)
226  if require_static and not image_shape.is_fully_defined():
227    raise ValueError('\'image\' must be fully defined.')
228  if any(x == 0 for x in image_shape[-3:]):
229    raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' %
230                     image_shape)
231  if not image_shape[-3:].is_fully_defined():
232    return [
233        check_ops.assert_positive(
234            array_ops.shape(image)[-3:],
235            ["inner 3 dims of 'image.shape' "
236             'must be > 0.']),
237        check_ops.assert_greater_equal(
238            array_ops.rank(image),
239            3,
240            message="'image' must be at least three-dimensional.")
241    ]
242  else:
243    return []
244
245
246def _AssertGrayscaleImage(image):
247  """Assert that we are working with a properly shaped grayscale image.
248
249  Performs the check statically if possible (i.e. if the shape
250  is statically known). Otherwise adds a control dependency
251  to an assert op that checks the dynamic shape.
252
253  Args:
254    image: >= 2-D Tensor of size [*, 1]
255
256  Raises:
257    ValueError: if image.shape is not a [>= 2] vector or if
258              last dimension is not size 1.
259
260  Returns:
261    If the shape of `image` could be verified statically, `image` is
262    returned unchanged, otherwise there will be a control dependency
263    added that asserts the correct dynamic shape.
264  """
265  return control_flow_ops.with_dependencies(
266      _CheckGrayscaleImage(image, require_static=False), image)
267
268
269def _CheckGrayscaleImage(image, require_static=True):
270  """Assert that we are working with properly shaped grayscale image.
271
272  Args:
273    image: >= 2-D Tensor of size [*, 1]
274    require_static: Boolean, whether static shape is required.
275
276  Raises:
277    ValueError: if image.shape is not a [>= 2] vector or if
278              last dimension is not size 1.
279
280  Returns:
281    An empty list, if `image` has fully defined dimensions. Otherwise, a list
282    containing an assert op is returned.
283  """
284  try:
285    if image.get_shape().ndims is None:
286      image_shape = image.get_shape().with_rank(2)
287    else:
288      image_shape = image.get_shape().with_rank_at_least(2)
289  except ValueError:
290    raise ValueError('A grayscale image (shape %s) must be at least '
291                     'two-dimensional.' % image.shape)
292  if require_static and not image_shape.is_fully_defined():
293    raise ValueError('\'image\' must be fully defined.')
294  if image_shape.is_fully_defined():
295    if image_shape[-1] != 1:
296      raise ValueError('Last dimension of a grayscale image should be size 1.')
297  if not image_shape.is_fully_defined():
298    return [
299        check_ops.assert_equal(
300            array_ops.shape(image)[-1],
301            1,
302            message='Last dimension of a grayscale image should be size 1.'),
303        check_ops.assert_greater_equal(
304            array_ops.rank(image),
305            3,
306            message='A grayscale image must be at least two-dimensional.')
307    ]
308  else:
309    return []
310
311
312def fix_image_flip_shape(image, result):
313  """Set the shape to 3 dimensional if we don't know anything else.
314
315  Args:
316    image: original image size
317    result: flipped or transformed image
318
319  Returns:
320    An image whose shape is at least (None, None, None).
321  """
322
323  image_shape = image.get_shape()
324  if image_shape == tensor_shape.unknown_shape():
325    result.set_shape([None, None, None])
326  else:
327    result.set_shape(image_shape)
328  return result
329
330
331@tf_export('image.random_flip_up_down')
332@dispatch.add_dispatch_support
333def random_flip_up_down(image, seed=None):
334  """Randomly flips an image vertically (upside down).
335
336  With a 1 in 2 chance, outputs the contents of `image` flipped along the first
337  dimension, which is `height`.  Otherwise, output the image as-is.
338  When passing a batch of images, each image will be randomly flipped
339  independent of other images.
340
341  Example usage:
342
343  >>> image = np.array([[[1], [2]], [[3], [4]]])
344  >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()
345  [[[3], [4]], [[1], [2]]]
346
347  Randomly flip multiple images.
348
349  >>> images = np.array(
350  ... [
351  ...     [[[1], [2]], [[3], [4]]],
352  ...     [[[5], [6]], [[7], [8]]]
353  ... ])
354  >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()
355  [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]
356
357  For producing deterministic results given a `seed` value, use
358  `tf.image.stateless_random_flip_up_down`. Unlike using the `seed` param
359  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
360  same results given the same seed independent of how many times the function is
361  called, and independent of global seed settings (e.g. tf.random.set_seed).
362
363  Args:
364    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
365      of shape `[height, width, channels]`.
366    seed: A Python integer. Used to create a random seed. See
367      `tf.compat.v1.set_random_seed` for behavior.
368
369  Returns:
370    A tensor of the same type and shape as `image`.
371  Raises:
372    ValueError: if the shape of `image` not supported.
373  """
374  random_func = functools.partial(random_ops.random_uniform, seed=seed)
375  return _random_flip(image, 0, random_func, 'random_flip_up_down')
376
377
378@tf_export('image.random_flip_left_right')
379@dispatch.add_dispatch_support
380def random_flip_left_right(image, seed=None):
381  """Randomly flip an image horizontally (left to right).
382
383  With a 1 in 2 chance, outputs the contents of `image` flipped along the
384  second dimension, which is `width`.  Otherwise output the image as-is.
385  When passing a batch of images, each image will be randomly flipped
386  independent of other images.
387
388  Example usage:
389
390  >>> image = np.array([[[1], [2]], [[3], [4]]])
391  >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()
392  [[[2], [1]], [[4], [3]]]
393
394  Randomly flip multiple images.
395
396  >>> images = np.array(
397  ... [
398  ...     [[[1], [2]], [[3], [4]]],
399  ...     [[[5], [6]], [[7], [8]]]
400  ... ])
401  >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()
402  [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]
403
404  For producing deterministic results given a `seed` value, use
405  `tf.image.stateless_random_flip_left_right`. Unlike using the `seed` param
406  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
407  same results given the same seed independent of how many times the function is
408  called, and independent of global seed settings (e.g. tf.random.set_seed).
409
410  Args:
411    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
412      of shape `[height, width, channels]`.
413    seed: A Python integer. Used to create a random seed. See
414      `tf.compat.v1.set_random_seed` for behavior.
415
416  Returns:
417    A tensor of the same type and shape as `image`.
418
419  Raises:
420    ValueError: if the shape of `image` not supported.
421  """
422  random_func = functools.partial(random_ops.random_uniform, seed=seed)
423  return _random_flip(image, 1, random_func, 'random_flip_left_right')
424
425
426@tf_export('image.stateless_random_flip_left_right', v1=[])
427@dispatch.add_dispatch_support
428def stateless_random_flip_left_right(image, seed):
429  """Randomly flip an image horizontally (left to right) deterministically.
430
431  Guarantees the same results given the same `seed` independent of how many
432  times the function is called, and independent of global seed settings (e.g.
433  `tf.random.set_seed`).
434
435  Example usage:
436
437  >>> image = np.array([[[1], [2]], [[3], [4]]])
438  >>> seed = (2, 3)
439  >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist()
440  [[[2], [1]], [[4], [3]]]
441
442  Args:
443    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
444      of shape `[height, width, channels]`.
445    seed: A shape [2] Tensor, the seed to the random number generator. Must have
446      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
447
448  Returns:
449    A tensor of the same type and shape as `image`.
450  """
451  random_func = functools.partial(
452      stateless_random_ops.stateless_random_uniform, seed=seed)
453  return _random_flip(
454      image, 1, random_func, 'stateless_random_flip_left_right')
455
456
457@tf_export('image.stateless_random_flip_up_down', v1=[])
458@dispatch.add_dispatch_support
459def stateless_random_flip_up_down(image, seed):
460  """Randomly flip an image vertically (upside down) deterministically.
461
462  Guarantees the same results given the same `seed` independent of how many
463  times the function is called, and independent of global seed settings (e.g.
464  `tf.random.set_seed`).
465
466  Example usage:
467
468  >>> image = np.array([[[1], [2]], [[3], [4]]])
469  >>> seed = (2, 3)
470  >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist()
471  [[[3], [4]], [[1], [2]]]
472
473  Args:
474    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
475      of shape `[height, width, channels]`.
476    seed: A shape [2] Tensor, the seed to the random number generator. Must have
477      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
478
479  Returns:
480    A tensor of the same type and shape as `image`.
481  """
482  random_func = functools.partial(
483      stateless_random_ops.stateless_random_uniform, seed=seed)
484  return _random_flip(
485      image, 0, random_func, 'stateless_random_flip_up_down')
486
487
488def _random_flip(image, flip_index, random_func, scope_name):
489  """Randomly (50% chance) flip an image along axis `flip_index`.
490
491  Args:
492    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
493      of shape `[height, width, channels]`.
494    flip_index: Dimension along which to flip the image.
495      Vertical is 0, Horizontal is 1.
496    random_func: partial function for calling either stateful or stateless
497      random ops with `seed` parameter specified.
498    scope_name: Name of the scope in which the ops are added.
499
500  Returns:
501    A tensor of the same type and shape as `image`.
502
503  Raises:
504    ValueError: if the shape of `image` not supported.
505  """
506  with ops.name_scope(None, scope_name, [image]) as scope:
507    image = ops.convert_to_tensor(image, name='image')
508    image = _AssertAtLeast3DImage(image)
509    shape = image.get_shape()
510
511    def f_rank3():
512      uniform_random = random_func(shape=[], minval=0, maxval=1.0)
513      mirror_cond = math_ops.less(uniform_random, .5)
514      result = control_flow_ops.cond(
515          mirror_cond,
516          lambda: array_ops.reverse(image, [flip_index]),
517          lambda: image,
518          name=scope)
519      return fix_image_flip_shape(image, result)
520
521    def f_rank4():
522      batch_size = array_ops.shape(image)[0]
523      uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0)
524      flips = math_ops.round(
525          array_ops.reshape(uniform_random, [batch_size, 1, 1, 1]))
526      flips = math_ops.cast(flips, image.dtype)
527      flipped_input = array_ops.reverse(image, [flip_index + 1])
528      return flips * flipped_input + (1 - flips) * image
529
530    if shape.ndims is None:
531      rank = array_ops.rank(image)
532      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
533    if shape.ndims == 3:
534      return f_rank3()
535    elif shape.ndims == 4:
536      return f_rank4()
537    else:
538      raise ValueError(
539          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
540
541
542@tf_export('image.flip_left_right')
543@dispatch.add_dispatch_support
544def flip_left_right(image):
545  """Flip an image horizontally (left to right).
546
547  Outputs the contents of `image` flipped along the width dimension.
548
549  See also `tf.reverse`.
550
551  Usage Example:
552
553  >>> x = [[[1.0, 2.0, 3.0],
554  ...       [4.0, 5.0, 6.0]],
555  ...     [[7.0, 8.0, 9.0],
556  ...       [10.0, 11.0, 12.0]]]
557  >>> tf.image.flip_left_right(x)
558  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
559  array([[[ 4.,  5.,  6.],
560          [ 1.,  2.,  3.]],
561         [[10., 11., 12.],
562          [ 7.,  8.,  9.]]], dtype=float32)>
563
564  Args:
565    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
566      of shape `[height, width, channels]`.
567
568  Returns:
569    A tensor of the same type and shape as `image`.
570
571  Raises:
572    ValueError: if the shape of `image` not supported.
573  """
574  return _flip(image, 1, 'flip_left_right')
575
576
577@tf_export('image.flip_up_down')
578@dispatch.add_dispatch_support
579def flip_up_down(image):
580  """Flip an image vertically (upside down).
581
582  Outputs the contents of `image` flipped along the height dimension.
583
584  See also `reverse()`.
585
586  Usage Example:
587
588  >>> x = [[[1.0, 2.0, 3.0],
589  ...       [4.0, 5.0, 6.0]],
590  ...     [[7.0, 8.0, 9.0],
591  ...       [10.0, 11.0, 12.0]]]
592  >>> tf.image.flip_up_down(x)
593  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
594  array([[[ 7.,  8.,  9.],
595          [10., 11., 12.]],
596         [[ 1.,  2.,  3.],
597          [ 4.,  5.,  6.]]], dtype=float32)>
598
599  Args:
600    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
601      of shape `[height, width, channels]`.
602
603  Returns:
604    A `Tensor` of the same type and shape as `image`.
605
606  Raises:
607    ValueError: if the shape of `image` not supported.
608  """
609  return _flip(image, 0, 'flip_up_down')
610
611
612def _flip(image, flip_index, scope_name):
613  """Flip an image either horizontally or vertically.
614
615  Outputs the contents of `image` flipped along the dimension `flip_index`.
616
617  See also `reverse()`.
618
619  Args:
620    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
621      of shape `[height, width, channels]`.
622    flip_index: 0 For vertical, 1 for horizontal.
623    scope_name: string, scope name.
624
625  Returns:
626    A `Tensor` of the same type and shape as `image`.
627
628  Raises:
629    ValueError: if the shape of `image` not supported.
630  """
631  with ops.name_scope(None, scope_name, [image]):
632    image = ops.convert_to_tensor(image, name='image')
633    image = _AssertAtLeast3DImage(image)
634    shape = image.get_shape()
635
636    def f_rank3():
637      return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index]))
638
639    def f_rank4():
640      return array_ops.reverse(image, [flip_index + 1])
641
642    if shape.ndims is None:
643      rank = array_ops.rank(image)
644      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
645    elif shape.ndims == 3:
646      return f_rank3()
647    elif shape.ndims == 4:
648      return f_rank4()
649    else:
650      raise ValueError(
651          '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape)
652
653
654@tf_export('image.rot90')
655@dispatch.add_dispatch_support
656def rot90(image, k=1, name=None):
657  """Rotate image(s) counter-clockwise by 90 degrees.
658
659
660  For example:
661
662  >>> a=tf.constant([[[1],[2]],
663  ...                [[3],[4]]])
664  >>> # rotating `a` counter clockwise by 90 degrees
665  >>> a_rot=tf.image.rot90(a)
666  >>> print(a_rot[...,0].numpy())
667  [[2 4]
668   [1 3]]
669  >>> # rotating `a` counter clockwise by 270 degrees
670  >>> a_rot=tf.image.rot90(a, k=3)
671  >>> print(a_rot[...,0].numpy())
672  [[3 1]
673   [4 2]]
674
675  Args:
676    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
677      of shape `[height, width, channels]`.
678    k: A scalar integer. The number of times the image is rotated by 90 degrees.
679    name: A name for this operation (optional).
680
681  Returns:
682    A rotated tensor of the same type and shape as `image`.
683
684  Raises:
685    ValueError: if the shape of `image` not supported.
686  """
687  with ops.name_scope(name, 'rot90', [image, k]) as scope:
688    image = ops.convert_to_tensor(image, name='image')
689    image = _AssertAtLeast3DImage(image)
690    k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k')
691    k.get_shape().assert_has_rank(0)
692    k = math_ops.mod(k, 4)
693
694    shape = image.get_shape()
695    if shape.ndims is None:
696      rank = array_ops.rank(image)
697
698      def f_rank3():
699        return _rot90_3D(image, k, scope)
700
701      def f_rank4():
702        return _rot90_4D(image, k, scope)
703
704      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
705    elif shape.ndims == 3:
706      return _rot90_3D(image, k, scope)
707    elif shape.ndims == 4:
708      return _rot90_4D(image, k, scope)
709    else:
710      raise ValueError(
711          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
712
713
714def _rot90_3D(image, k, name_scope):
715  """Rotate image counter-clockwise by 90 degrees `k` times.
716
717  Args:
718    image: 3-D Tensor of shape `[height, width, channels]`.
719    k: A scalar integer. The number of times the image is rotated by 90 degrees.
720    name_scope: A valid TensorFlow name scope.
721
722  Returns:
723    A 3-D tensor of the same type and shape as `image`.
724
725  """
726
727  def _rot90():
728    return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2])
729
730  def _rot180():
731    return array_ops.reverse_v2(image, [0, 1])
732
733  def _rot270():
734    return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1])
735
736  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
737           (math_ops.equal(k, 3), _rot270)]
738
739  result = control_flow_ops.case(
740      cases, default=lambda: image, exclusive=True, name=name_scope)
741  result.set_shape([None, None, image.get_shape()[2]])
742  return result
743
744
745def _rot90_4D(images, k, name_scope):
746  """Rotate batch of images counter-clockwise by 90 degrees `k` times.
747
748  Args:
749    images: 4-D Tensor of shape `[height, width, channels]`.
750    k: A scalar integer. The number of times the images are rotated by 90
751      degrees.
752    name_scope: A valid TensorFlow name scope.
753
754  Returns:
755    A 4-D `Tensor` of the same type and shape as `images`.
756  """
757
758  def _rot90():
759    return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3])
760
761  def _rot180():
762    return array_ops.reverse_v2(images, [1, 2])
763
764  def _rot270():
765    return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2])
766
767  cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180),
768           (math_ops.equal(k, 3), _rot270)]
769
770  result = control_flow_ops.case(
771      cases, default=lambda: images, exclusive=True, name=name_scope)
772  shape = result.get_shape()
773  result.set_shape([shape[0], None, None, shape[3]])
774  return result
775
776
777@tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image'])
778@dispatch.add_dispatch_support
779def transpose(image, name=None):
780  """Transpose image(s) by swapping the height and width dimension.
781
782  Usage Example:
783
784  >>> x = [[[1.0, 2.0, 3.0],
785  ...       [4.0, 5.0, 6.0]],
786  ...     [[7.0, 8.0, 9.0],
787  ...       [10.0, 11.0, 12.0]]]
788  >>> tf.image.transpose(x)
789  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
790  array([[[ 1.,  2.,  3.],
791          [ 7.,  8.,  9.]],
792         [[ 4.,  5.,  6.],
793          [10., 11., 12.]]], dtype=float32)>
794
795  Args:
796    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
797      of shape `[height, width, channels]`.
798    name: A name for this operation (optional).
799
800  Returns:
801    If `image` was 4-D, a 4-D float Tensor of shape
802   `[batch, width, height, channels]`
803    If `image` was 3-D, a 3-D float Tensor of shape
804   `[width, height, channels]`
805
806  Raises:
807    ValueError: if the shape of `image` not supported.
808
809  Usage Example:
810
811  >>> image = [[[1, 2], [3, 4]],
812  ...         [[5, 6], [7, 8]],
813  ...         [[9, 10], [11, 12]]]
814  >>> image = tf.constant(image)
815  >>> tf.image.transpose(image)
816  <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
817  array([[[ 1,  2],
818         [ 5,  6],
819         [ 9, 10]],
820        [[ 3,  4],
821         [ 7,  8],
822         [11, 12]]], dtype=int32)>
823  """
824  with ops.name_scope(name, 'transpose', [image]):
825    image = ops.convert_to_tensor(image, name='image')
826    image = _AssertAtLeast3DImage(image)
827    shape = image.get_shape()
828    if shape.ndims is None:
829      rank = array_ops.rank(image)
830
831      def f_rank3():
832        return array_ops.transpose(image, [1, 0, 2], name=name)
833
834      def f_rank4():
835        return array_ops.transpose(image, [0, 2, 1, 3], name=name)
836
837      return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4)
838    elif shape.ndims == 3:
839      return array_ops.transpose(image, [1, 0, 2], name=name)
840    elif shape.ndims == 4:
841      return array_ops.transpose(image, [0, 2, 1, 3], name=name)
842    else:
843      raise ValueError(
844          '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape)
845
846
847@tf_export('image.central_crop')
848@dispatch.add_dispatch_support
849def central_crop(image, central_fraction):
850  """Crop the central region of the image(s).
851
852  Remove the outer parts of an image but retain the central region of the image
853  along each dimension. If we specify central_fraction = 0.5, this function
854  returns the region marked with "X" in the below diagram.
855
856       --------
857      |        |
858      |  XXXX  |
859      |  XXXX  |
860      |        |   where "X" is the central 50% of the image.
861       --------
862
863  This function works on either a single image (`image` is a 3-D Tensor), or a
864  batch of images (`image` is a 4-D Tensor).
865
866  Usage Example:
867
868  >>> x = [[[1.0, 2.0, 3.0],
869  ...       [4.0, 5.0, 6.0],
870  ...       [7.0, 8.0, 9.0],
871  ...       [10.0, 11.0, 12.0]],
872  ...     [[13.0, 14.0, 15.0],
873  ...       [16.0, 17.0, 18.0],
874  ...       [19.0, 20.0, 21.0],
875  ...       [22.0, 23.0, 24.0]],
876  ...     [[25.0, 26.0, 27.0],
877  ...       [28.0, 29.0, 30.0],
878  ...       [31.0, 32.0, 33.0],
879  ...       [34.0, 35.0, 36.0]],
880  ...     [[37.0, 38.0, 39.0],
881  ...       [40.0, 41.0, 42.0],
882  ...       [43.0, 44.0, 45.0],
883  ...       [46.0, 47.0, 48.0]]]
884  >>> tf.image.central_crop(x, 0.5)
885  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
886  array([[[16., 17., 18.],
887          [19., 20., 21.]],
888         [[28., 29., 30.],
889          [31., 32., 33.]]], dtype=float32)>
890
891  Args:
892    image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D
893      Tensor of shape [batch_size, height, width, depth].
894    central_fraction: float (0, 1], fraction of size to crop
895
896  Raises:
897    ValueError: if central_crop_fraction is not within (0, 1].
898
899  Returns:
900    3-D / 4-D float Tensor, as per the input.
901  """
902  with ops.name_scope(None, 'central_crop', [image]):
903    image = ops.convert_to_tensor(image, name='image')
904    central_fraction_static = tensor_util.constant_value(central_fraction)
905    if central_fraction_static is not None:
906      if central_fraction_static <= 0.0 or central_fraction_static > 1.0:
907        raise ValueError('central_fraction must be within (0, 1]')
908      if central_fraction_static == 1.0:
909        return image
910    else:
911      assert_ops = _assert(
912          math_ops.logical_or(central_fraction > 0.0, central_fraction <= 1.0),
913          ValueError, 'central_fraction must be within (0, 1]')
914      image = control_flow_ops.with_dependencies(assert_ops, image)
915
916    _AssertAtLeast3DImage(image)
917    rank = image.get_shape().ndims
918    if rank != 3 and rank != 4:
919      raise ValueError('`image` should either be a Tensor with rank = 3 or '
920                       'rank = 4. Had rank = {}.'.format(rank))
921
922    # Helper method to return the `idx`-th dimension of `tensor`, along with
923    # a boolean signifying if the dimension is dynamic.
924    def _get_dim(tensor, idx):
925      static_shape = tensor.get_shape().dims[idx].value
926      if static_shape is not None:
927        return static_shape, False
928      return array_ops.shape(tensor)[idx], True
929
930    # Get the height, width, depth (and batch size, if the image is a 4-D
931    # tensor).
932    if rank == 3:
933      img_h, dynamic_h = _get_dim(image, 0)
934      img_w, dynamic_w = _get_dim(image, 1)
935      img_d = image.get_shape()[2]
936    else:
937      img_bs = image.get_shape()[0]
938      img_h, dynamic_h = _get_dim(image, 1)
939      img_w, dynamic_w = _get_dim(image, 2)
940      img_d = image.get_shape()[3]
941
942    dynamic_h = dynamic_h or (central_fraction_static is None)
943    dynamic_w = dynamic_w or (central_fraction_static is None)
944
945    # Compute the bounding boxes for the crop. The type and value of the
946    # bounding boxes depend on the `image` tensor's rank and whether / not the
947    # dimensions are statically defined.
948    if dynamic_h:
949      img_hd = math_ops.cast(img_h, dtypes.float64)
950      bbox_h_start = math_ops.cast(
951          (img_hd - img_hd * math_ops.cast(central_fraction, dtypes.float64)) /
952          2, dtypes.int32)
953    else:
954      img_hd = float(img_h)
955      bbox_h_start = int((img_hd - img_hd * central_fraction_static) / 2)
956
957    if dynamic_w:
958      img_wd = math_ops.cast(img_w, dtypes.float64)
959      bbox_w_start = math_ops.cast(
960          (img_wd - img_wd * math_ops.cast(central_fraction, dtypes.float64)) /
961          2, dtypes.int32)
962    else:
963      img_wd = float(img_w)
964      bbox_w_start = int((img_wd - img_wd * central_fraction_static) / 2)
965
966    bbox_h_size = img_h - bbox_h_start * 2
967    bbox_w_size = img_w - bbox_w_start * 2
968
969    if rank == 3:
970      bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0])
971      bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1])
972    else:
973      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
974      bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1])
975
976    image = array_ops.slice(image, bbox_begin, bbox_size)
977
978    # Reshape the `image` tensor to the desired size.
979    if rank == 3:
980      image.set_shape([
981          None if dynamic_h else bbox_h_size,
982          None if dynamic_w else bbox_w_size, img_d
983      ])
984    else:
985      image.set_shape([
986          img_bs, None if dynamic_h else bbox_h_size,
987          None if dynamic_w else bbox_w_size, img_d
988      ])
989    return image
990
991
992@tf_export('image.pad_to_bounding_box')
993@dispatch.add_dispatch_support
994def pad_to_bounding_box(image, offset_height, offset_width, target_height,
995                        target_width):
996  """Pad `image` with zeros to the specified `height` and `width`.
997
998  Adds `offset_height` rows of zeros on top, `offset_width` columns of
999  zeros on the left, and then pads the image on the bottom and right
1000  with zeros until it has dimensions `target_height`, `target_width`.
1001
1002  This op does nothing if `offset_*` is zero and the image already has size
1003  `target_height` by `target_width`.
1004
1005  Usage Example:
1006
1007  >>> x = [[[1., 2., 3.],
1008  ...       [4., 5., 6.]],
1009  ...       [[7., 8., 9.],
1010  ...       [10., 11., 12.]]]
1011  >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4)
1012  >>> padded_image
1013  <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy=
1014  array([[[ 0.,  0.,  0.],
1015  [ 0.,  0.,  0.],
1016  [ 0.,  0.,  0.],
1017  [ 0.,  0.,  0.]],
1018  [[ 0.,  0.,  0.],
1019  [ 1.,  2.,  3.],
1020  [ 4.,  5.,  6.],
1021  [ 0.,  0.,  0.]],
1022  [[ 0.,  0.,  0.],
1023  [ 7.,  8.,  9.],
1024  [10., 11., 12.],
1025  [ 0.,  0.,  0.]],
1026  [[ 0.,  0.,  0.],
1027  [ 0.,  0.,  0.],
1028  [ 0.,  0.,  0.],
1029  [ 0.,  0.,  0.]]], dtype=float32)>
1030
1031  Args:
1032    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1033      of shape `[height, width, channels]`.
1034    offset_height: Number of rows of zeros to add on top.
1035    offset_width: Number of columns of zeros to add on the left.
1036    target_height: Height of output image.
1037    target_width: Width of output image.
1038
1039  Returns:
1040    If `image` was 4-D, a 4-D float Tensor of shape
1041    `[batch, target_height, target_width, channels]`
1042    If `image` was 3-D, a 3-D float Tensor of shape
1043    `[target_height, target_width, channels]`
1044
1045  Raises:
1046    ValueError: If the shape of `image` is incompatible with the `offset_*` or
1047      `target_*` arguments, or either `offset_height` or `offset_width` is
1048      negative.
1049  """
1050  with ops.name_scope(None, 'pad_to_bounding_box', [image]):
1051    image = ops.convert_to_tensor(image, name='image')
1052
1053    is_batch = True
1054    image_shape = image.get_shape()
1055    if image_shape.ndims == 3:
1056      is_batch = False
1057      image = array_ops.expand_dims(image, 0)
1058    elif image_shape.ndims is None:
1059      is_batch = False
1060      image = array_ops.expand_dims(image, 0)
1061      image.set_shape([None] * 4)
1062    elif image_shape.ndims != 4:
1063      raise ValueError(
1064          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1065          image_shape)
1066
1067    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1068    batch, height, width, depth = _ImageDimensions(image, rank=4)
1069
1070    after_padding_width = target_width - offset_width - width
1071
1072    after_padding_height = target_height - offset_height - height
1073
1074    assert_ops += _assert(offset_height >= 0, ValueError,
1075                          'offset_height must be >= 0')
1076    assert_ops += _assert(offset_width >= 0, ValueError,
1077                          'offset_width must be >= 0')
1078    assert_ops += _assert(after_padding_width >= 0, ValueError,
1079                          'width must be <= target - offset')
1080    assert_ops += _assert(after_padding_height >= 0, ValueError,
1081                          'height must be <= target - offset')
1082    image = control_flow_ops.with_dependencies(assert_ops, image)
1083
1084    # Do not pad on the depth dimensions.
1085    paddings = array_ops.reshape(
1086        array_ops.stack([
1087            0, 0, offset_height, after_padding_height, offset_width,
1088            after_padding_width, 0, 0
1089        ]), [4, 2])
1090    padded = array_ops.pad(image, paddings)
1091
1092    padded_shape = [
1093        None if _is_tensor(i) else i
1094        for i in [batch, target_height, target_width, depth]
1095    ]
1096    padded.set_shape(padded_shape)
1097
1098    if not is_batch:
1099      padded = array_ops.squeeze(padded, axis=[0])
1100
1101    return padded
1102
1103
1104@tf_export('image.crop_to_bounding_box')
1105@dispatch.add_dispatch_support
1106def crop_to_bounding_box(image, offset_height, offset_width, target_height,
1107                         target_width):
1108  """Crops an image to a specified bounding box.
1109
1110  This op cuts a rectangular part out of `image`. The top-left corner of the
1111  returned image is at `offset_height, offset_width` in `image`, and its
1112  lower-right corner is at
1113  `offset_height + target_height, offset_width + target_width`.
1114
1115  Args:
1116    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1117      of shape `[height, width, channels]`.
1118    offset_height: Vertical coordinate of the top-left corner of the result in
1119      the input.
1120    offset_width: Horizontal coordinate of the top-left corner of the result in
1121      the input.
1122    target_height: Height of the result.
1123    target_width: Width of the result.
1124
1125  Returns:
1126    If `image` was 4-D, a 4-D float Tensor of shape
1127    `[batch, target_height, target_width, channels]`
1128    If `image` was 3-D, a 3-D float Tensor of shape
1129    `[target_height, target_width, channels]`
1130
1131  Raises:
1132    ValueError: If the shape of `image` is incompatible with the `offset_*` or
1133      `target_*` arguments, or either `offset_height` or `offset_width` is
1134      negative, or either `target_height` or `target_width` is not positive.
1135  """
1136  with ops.name_scope(None, 'crop_to_bounding_box', [image]):
1137    image = ops.convert_to_tensor(image, name='image')
1138
1139    is_batch = True
1140    image_shape = image.get_shape()
1141    if image_shape.ndims == 3:
1142      is_batch = False
1143      image = array_ops.expand_dims(image, 0)
1144    elif image_shape.ndims is None:
1145      is_batch = False
1146      image = array_ops.expand_dims(image, 0)
1147      image.set_shape([None] * 4)
1148    elif image_shape.ndims != 4:
1149      raise ValueError(
1150          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1151          image_shape)
1152
1153    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1154
1155    batch, height, width, depth = _ImageDimensions(image, rank=4)
1156
1157    assert_ops += _assert(offset_width >= 0, ValueError,
1158                          'offset_width must be >= 0.')
1159    assert_ops += _assert(offset_height >= 0, ValueError,
1160                          'offset_height must be >= 0.')
1161    assert_ops += _assert(target_width > 0, ValueError,
1162                          'target_width must be > 0.')
1163    assert_ops += _assert(target_height > 0, ValueError,
1164                          'target_height must be > 0.')
1165    assert_ops += _assert(width >= (target_width + offset_width), ValueError,
1166                          'width must be >= target + offset.')
1167    assert_ops += _assert(height >= (target_height + offset_height), ValueError,
1168                          'height must be >= target + offset.')
1169    image = control_flow_ops.with_dependencies(assert_ops, image)
1170
1171    cropped = array_ops.slice(
1172        image, array_ops.stack([0, offset_height, offset_width, 0]),
1173        array_ops.stack([-1, target_height, target_width, -1]))
1174
1175    cropped_shape = [
1176        None if _is_tensor(i) else i
1177        for i in [batch, target_height, target_width, depth]
1178    ]
1179    cropped.set_shape(cropped_shape)
1180
1181    if not is_batch:
1182      cropped = array_ops.squeeze(cropped, axis=[0])
1183
1184    return cropped
1185
1186
1187@tf_export(
1188    'image.resize_with_crop_or_pad',
1189    v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad'])
1190@dispatch.add_dispatch_support
1191def resize_image_with_crop_or_pad(image, target_height, target_width):
1192  """Crops and/or pads an image to a target width and height.
1193
1194  Resizes an image to a target width and height by either centrally
1195  cropping the image or padding it evenly with zeros.
1196
1197  If `width` or `height` is greater than the specified `target_width` or
1198  `target_height` respectively, this op centrally crops along that dimension.
1199  If `width` or `height` is smaller than the specified `target_width` or
1200  `target_height` respectively, this op centrally pads with 0 along that
1201  dimension.
1202
1203  Args:
1204    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1205      of shape `[height, width, channels]`.
1206    target_height: Target height.
1207    target_width: Target width.
1208
1209  Raises:
1210    ValueError: if `target_height` or `target_width` are zero or negative.
1211
1212  Returns:
1213    Cropped and/or padded image.
1214    If `images` was 4-D, a 4-D float Tensor of shape
1215    `[batch, new_height, new_width, channels]`.
1216    If `images` was 3-D, a 3-D float Tensor of shape
1217    `[new_height, new_width, channels]`.
1218  """
1219  with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]):
1220    image = ops.convert_to_tensor(image, name='image')
1221    image_shape = image.get_shape()
1222    is_batch = True
1223    if image_shape.ndims == 3:
1224      is_batch = False
1225      image = array_ops.expand_dims(image, 0)
1226    elif image_shape.ndims is None:
1227      is_batch = False
1228      image = array_ops.expand_dims(image, 0)
1229      image.set_shape([None] * 4)
1230    elif image_shape.ndims != 4:
1231      raise ValueError(
1232          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1233          image_shape)
1234
1235    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1236    assert_ops += _assert(target_width > 0, ValueError,
1237                          'target_width must be > 0.')
1238    assert_ops += _assert(target_height > 0, ValueError,
1239                          'target_height must be > 0.')
1240
1241    image = control_flow_ops.with_dependencies(assert_ops, image)
1242    # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
1243    # Make sure our checks come first, so that error messages are clearer.
1244    if _is_tensor(target_height):
1245      target_height = control_flow_ops.with_dependencies(
1246          assert_ops, target_height)
1247    if _is_tensor(target_width):
1248      target_width = control_flow_ops.with_dependencies(assert_ops,
1249                                                        target_width)
1250
1251    def max_(x, y):
1252      if _is_tensor(x) or _is_tensor(y):
1253        return math_ops.maximum(x, y)
1254      else:
1255        return max(x, y)
1256
1257    def min_(x, y):
1258      if _is_tensor(x) or _is_tensor(y):
1259        return math_ops.minimum(x, y)
1260      else:
1261        return min(x, y)
1262
1263    def equal_(x, y):
1264      if _is_tensor(x) or _is_tensor(y):
1265        return math_ops.equal(x, y)
1266      else:
1267        return x == y
1268
1269    _, height, width, _ = _ImageDimensions(image, rank=4)
1270    width_diff = target_width - width
1271    offset_crop_width = max_(-width_diff // 2, 0)
1272    offset_pad_width = max_(width_diff // 2, 0)
1273
1274    height_diff = target_height - height
1275    offset_crop_height = max_(-height_diff // 2, 0)
1276    offset_pad_height = max_(height_diff // 2, 0)
1277
1278    # Maybe crop if needed.
1279    cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
1280                                   min_(target_height, height),
1281                                   min_(target_width, width))
1282
1283    # Maybe pad if needed.
1284    resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
1285                                  target_height, target_width)
1286
1287    # In theory all the checks below are redundant.
1288    if resized.get_shape().ndims is None:
1289      raise ValueError('resized contains no shape.')
1290
1291    _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4)
1292
1293    assert_ops = []
1294    assert_ops += _assert(
1295        equal_(resized_height, target_height), ValueError,
1296        'resized height is not correct.')
1297    assert_ops += _assert(
1298        equal_(resized_width, target_width), ValueError,
1299        'resized width is not correct.')
1300
1301    resized = control_flow_ops.with_dependencies(assert_ops, resized)
1302
1303    if not is_batch:
1304      resized = array_ops.squeeze(resized, axis=[0])
1305
1306    return resized
1307
1308
1309@tf_export(v1=['image.ResizeMethod'])
1310class ResizeMethodV1(object):
1311  """See `v1.image.resize` for details."""
1312  BILINEAR = 0
1313  NEAREST_NEIGHBOR = 1
1314  BICUBIC = 2
1315  AREA = 3
1316
1317
1318@tf_export('image.ResizeMethod', v1=[])
1319class ResizeMethod(object):
1320  """See `tf.image.resize` for details."""
1321  BILINEAR = 'bilinear'
1322  NEAREST_NEIGHBOR = 'nearest'
1323  BICUBIC = 'bicubic'
1324  AREA = 'area'
1325  LANCZOS3 = 'lanczos3'
1326  LANCZOS5 = 'lanczos5'
1327  GAUSSIAN = 'gaussian'
1328  MITCHELLCUBIC = 'mitchellcubic'
1329
1330
1331def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name,
1332                          skip_resize_if_same):
1333  """Core functionality for v1 and v2 resize functions."""
1334  with ops.name_scope(name, 'resize', [images, size]):
1335    images = ops.convert_to_tensor(images, name='images')
1336    if images.get_shape().ndims is None:
1337      raise ValueError('\'images\' contains no shape.')
1338    # TODO(shlens): Migrate this functionality to the underlying Op's.
1339    is_batch = True
1340    if images.get_shape().ndims == 3:
1341      is_batch = False
1342      images = array_ops.expand_dims(images, 0)
1343    elif images.get_shape().ndims != 4:
1344      raise ValueError('\'images\' must have either 3 or 4 dimensions.')
1345
1346    _, height, width, _ = images.get_shape().as_list()
1347
1348    try:
1349      size = ops.convert_to_tensor(size, dtypes.int32, name='size')
1350    except (TypeError, ValueError):
1351      raise ValueError('\'size\' must be a 1-D int32 Tensor')
1352    if not size.get_shape().is_compatible_with([2]):
1353      raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: '
1354                       'new_height, new_width')
1355
1356    if preserve_aspect_ratio:
1357      # Get the current shapes of the image, even if dynamic.
1358      _, current_height, current_width, _ = _ImageDimensions(images, rank=4)
1359
1360      # do the computation to find the right scale and height/width.
1361      scale_factor_height = (
1362          math_ops.cast(size[0], dtypes.float32) /
1363          math_ops.cast(current_height, dtypes.float32))
1364      scale_factor_width = (
1365          math_ops.cast(size[1], dtypes.float32) /
1366          math_ops.cast(current_width, dtypes.float32))
1367      scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width)
1368      scaled_height_const = math_ops.cast(
1369          math_ops.round(scale_factor *
1370                         math_ops.cast(current_height, dtypes.float32)),
1371          dtypes.int32)
1372      scaled_width_const = math_ops.cast(
1373          math_ops.round(scale_factor *
1374                         math_ops.cast(current_width, dtypes.float32)),
1375          dtypes.int32)
1376
1377      # NOTE: Reset the size and other constants used later.
1378      size = ops.convert_to_tensor([scaled_height_const, scaled_width_const],
1379                                   dtypes.int32,
1380                                   name='size')
1381
1382    size_const_as_shape = tensor_util.constant_value_as_shape(size)
1383    new_height_const = tensor_shape.dimension_at_index(size_const_as_shape,
1384                                                       0).value
1385    new_width_const = tensor_shape.dimension_at_index(size_const_as_shape,
1386                                                      1).value
1387
1388    # If we can determine that the height and width will be unmodified by this
1389    # transformation, we avoid performing the resize.
1390    if skip_resize_if_same and all(
1391        x is not None
1392        for x in [new_width_const, width, new_height_const, height]) and (
1393            width == new_width_const and height == new_height_const):
1394      if not is_batch:
1395        images = array_ops.squeeze(images, axis=[0])
1396      return images
1397
1398    images = resizer_fn(images, size)
1399
1400    # NOTE(mrry): The shape functions for the resize ops cannot unpack
1401    # the packed values in `new_size`, so set the shape here.
1402    images.set_shape([None, new_height_const, new_width_const, None])
1403
1404    if not is_batch:
1405      images = array_ops.squeeze(images, axis=[0])
1406    return images
1407
1408
1409@tf_export(v1=['image.resize_images', 'image.resize'])
1410@dispatch.add_dispatch_support
1411def resize_images(images,
1412                  size,
1413                  method=ResizeMethodV1.BILINEAR,
1414                  align_corners=False,
1415                  preserve_aspect_ratio=False,
1416                  name=None):
1417  """Resize `images` to `size` using the specified `method`.
1418
1419  Resized images will be distorted if their original aspect ratio is not
1420  the same as `size`.  To avoid distortions see
1421  `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`.
1422
1423  The `method` can be one of:
1424
1425  *   <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.](
1426    https://en.wikipedia.org/wiki/Bilinear_interpolation)
1427  *   <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [
1428    Nearest neighbor interpolation.](
1429    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1430  *   <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.](
1431    https://en.wikipedia.org/wiki/Bicubic_interpolation)
1432  *   <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation.
1433
1434  The return value has the same type as `images` if `method` is
1435  `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type
1436  as `images` if the size of `images` can be statically determined to be the
1437  same as `size`, because `images` is returned in this case. Otherwise, the
1438  return value has type `float32`.
1439
1440  Args:
1441    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1442      of shape `[height, width, channels]`.
1443    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1444      size for the images.
1445    method: ResizeMethod.  Defaults to `tf.image.ResizeMethod.BILINEAR`.
1446    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1447      input and output tensors are aligned, preserving the values at the corner
1448      pixels. Defaults to `False`.
1449    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1450      then `images` will be resized to a size that fits in `size` while
1451      preserving the aspect ratio of the original image. Scales up the image if
1452      `size` is bigger than the current size of the `image`. Defaults to False.
1453    name: A name for this operation (optional).
1454
1455  Raises:
1456    ValueError: if the shape of `images` is incompatible with the
1457      shape arguments to this function
1458    ValueError: if `size` has invalid shape or type.
1459    ValueError: if an unsupported resize method is specified.
1460
1461  Returns:
1462    If `images` was 4-D, a 4-D float Tensor of shape
1463    `[batch, new_height, new_width, channels]`.
1464    If `images` was 3-D, a 3-D float Tensor of shape
1465    `[new_height, new_width, channels]`.
1466  """
1467
1468  def resize_fn(images_t, new_size):
1469    """Legacy resize core function, passed to _resize_images_common."""
1470    if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR:
1471      return gen_image_ops.resize_bilinear(
1472          images_t, new_size, align_corners=align_corners)
1473    elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or
1474          method == ResizeMethod.NEAREST_NEIGHBOR):
1475      return gen_image_ops.resize_nearest_neighbor(
1476          images_t, new_size, align_corners=align_corners)
1477    elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC:
1478      return gen_image_ops.resize_bicubic(
1479          images_t, new_size, align_corners=align_corners)
1480    elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA:
1481      return gen_image_ops.resize_area(
1482          images_t, new_size, align_corners=align_corners)
1483    else:
1484      raise ValueError('Resize method is not implemented: {}'.format(method))
1485
1486  return _resize_images_common(
1487      images,
1488      resize_fn,
1489      size,
1490      preserve_aspect_ratio=preserve_aspect_ratio,
1491      name=name,
1492      skip_resize_if_same=True)
1493
1494
1495@tf_export('image.resize', v1=[])
1496@dispatch.add_dispatch_support
1497def resize_images_v2(images,
1498                     size,
1499                     method=ResizeMethod.BILINEAR,
1500                     preserve_aspect_ratio=False,
1501                     antialias=False,
1502                     name=None):
1503  """Resize `images` to `size` using the specified `method`.
1504
1505  Resized images will be distorted if their original aspect ratio is not
1506  the same as `size`.  To avoid distortions see
1507  `tf.image.resize_with_pad`.
1508
1509  >>> image = tf.constant([
1510  ...  [1,0,0,0,0],
1511  ...  [0,1,0,0,0],
1512  ...  [0,0,1,0,0],
1513  ...  [0,0,0,1,0],
1514  ...  [0,0,0,0,1],
1515  ... ])
1516  >>> # Add "batch" and "channels" dimensions
1517  >>> image = image[tf.newaxis, ..., tf.newaxis]
1518  >>> image.shape.as_list()  # [batch, height, width, channels]
1519  [1, 5, 5, 1]
1520  >>> tf.image.resize(image, [3,5])[0,...,0].numpy()
1521  array([[0.6666667, 0.3333333, 0.       , 0.       , 0.       ],
1522         [0.       , 0.       , 1.       , 0.       , 0.       ],
1523         [0.       , 0.       , 0.       , 0.3333335, 0.6666665]],
1524        dtype=float32)
1525
1526  It works equally well with a single image instead of a batch of images:
1527
1528  >>> tf.image.resize(image[0], [3,5]).shape.as_list()
1529  [3, 5, 1]
1530
1531  When `antialias` is true, the sampling filter will anti-alias the input image
1532  as well as interpolate.  When downsampling an image with [anti-aliasing](
1533  https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter
1534  kernel is scaled in order to properly anti-alias the input image signal.
1535  `antialias` has no effect when upsampling an image:
1536
1537  >>> a = tf.image.resize(image, [5,10])
1538  >>> b = tf.image.resize(image, [5,10], antialias=True)
1539  >>> tf.reduce_max(abs(a - b)).numpy()
1540  0.0
1541
1542  The `method` argument expects an item from the `image.ResizeMethod` enum, or
1543  the string equivalent. The options are:
1544
1545  *   <b>`bilinear`</b>: [Bilinear interpolation.](
1546    https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is
1547    true, becomes a hat/tent filter function with radius 1 when downsampling.
1548  *   <b>`lanczos3`</b>:  [Lanczos kernel](
1549    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3.
1550    High-quality practical filter but may have some ringing, especially on
1551    synthetic images.
1552  *   <b>`lanczos5`</b>: [Lanczos kernel] (
1553    https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5.
1554    Very-high-quality filter but may have stronger ringing.
1555  *   <b>`bicubic`</b>: [Cubic interpolant](
1556    https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to
1557    Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel,
1558    particularly when upsampling.
1559  *   <b>`gaussian`</b>: [Gaussian kernel](
1560    https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3,
1561    sigma = 1.5 / 3.0.
1562  *   <b>`nearest`</b>: [Nearest neighbor interpolation.](
1563    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
1564    `antialias` has no effect when used with nearest neighbor interpolation.
1565  *   <b>`area`</b>: Anti-aliased resampling with area interpolation.
1566    `antialias` has no effect when used with area interpolation; it
1567    always anti-aliases.
1568  *   <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter.
1569    For synthetic images (especially those lacking proper prefiltering), less
1570    ringing than Keys cubic kernel but less sharp.
1571
1572  Note: Near image edges the filtering kernel may be partially outside the
1573  image boundaries. For these pixels, only input pixels inside the image will be
1574  included in the filter sum, and the output value will be appropriately
1575  normalized.
1576
1577  The return value has type `float32`, unless the `method` is
1578  `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype
1579  of `images`:
1580
1581  >>> nn = tf.image.resize(image, [5,7], method='nearest')
1582  >>> nn[0,...,0].numpy()
1583  array([[1, 0, 0, 0, 0, 0, 0],
1584         [0, 1, 1, 0, 0, 0, 0],
1585         [0, 0, 0, 1, 0, 0, 0],
1586         [0, 0, 0, 0, 1, 1, 0],
1587         [0, 0, 0, 0, 0, 0, 1]], dtype=int32)
1588
1589  With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size`
1590  is the maximum for each dimension:
1591
1592  >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True)
1593  >>> max_10_20.shape.as_list()
1594  [1, 10, 10, 1]
1595
1596  Args:
1597    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1598      of shape `[height, width, channels]`.
1599    size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The new
1600      size for the images.
1601    method: An `image.ResizeMethod`, or string equivalent.  Defaults to
1602      `bilinear`.
1603    preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set,
1604      then `images` will be resized to a size that fits in `size` while
1605      preserving the aspect ratio of the original image. Scales up the image if
1606      `size` is bigger than the current size of the `image`. Defaults to False.
1607    antialias: Whether to use an anti-aliasing filter when downsampling an
1608      image.
1609    name: A name for this operation (optional).
1610
1611  Raises:
1612    ValueError: if the shape of `images` is incompatible with the
1613      shape arguments to this function
1614    ValueError: if `size` has an invalid shape or type.
1615    ValueError: if an unsupported resize method is specified.
1616
1617  Returns:
1618    If `images` was 4-D, a 4-D float Tensor of shape
1619    `[batch, new_height, new_width, channels]`.
1620    If `images` was 3-D, a 3-D float Tensor of shape
1621    `[new_height, new_width, channels]`.
1622  """
1623
1624  def resize_fn(images_t, new_size):
1625    """Resize core function, passed to _resize_images_common."""
1626    scale_and_translate_methods = [
1627        ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN,
1628        ResizeMethod.MITCHELLCUBIC
1629    ]
1630
1631    def resize_with_scale_and_translate(method):
1632      scale = (
1633          math_ops.cast(new_size, dtype=dtypes.float32) /
1634          math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32))
1635      return gen_image_ops.scale_and_translate(
1636          images_t,
1637          new_size,
1638          scale,
1639          array_ops.zeros([2]),
1640          kernel_type=method,
1641          antialias=antialias)
1642
1643    if method == ResizeMethod.BILINEAR:
1644      if antialias:
1645        return resize_with_scale_and_translate('triangle')
1646      else:
1647        return gen_image_ops.resize_bilinear(
1648            images_t, new_size, half_pixel_centers=True)
1649    elif method == ResizeMethod.NEAREST_NEIGHBOR:
1650      return gen_image_ops.resize_nearest_neighbor(
1651          images_t, new_size, half_pixel_centers=True)
1652    elif method == ResizeMethod.BICUBIC:
1653      if antialias:
1654        return resize_with_scale_and_translate('keyscubic')
1655      else:
1656        return gen_image_ops.resize_bicubic(
1657            images_t, new_size, half_pixel_centers=True)
1658    elif method == ResizeMethod.AREA:
1659      return gen_image_ops.resize_area(images_t, new_size)
1660    elif method in scale_and_translate_methods:
1661      return resize_with_scale_and_translate(method)
1662    else:
1663      raise ValueError('Resize method is not implemented: {}'.format(method))
1664
1665  return _resize_images_common(
1666      images,
1667      resize_fn,
1668      size,
1669      preserve_aspect_ratio=preserve_aspect_ratio,
1670      name=name,
1671      skip_resize_if_same=False)
1672
1673
1674def _resize_image_with_pad_common(image, target_height, target_width,
1675                                  resize_fn):
1676  """Core functionality for v1 and v2 resize_image_with_pad functions."""
1677  with ops.name_scope(None, 'resize_image_with_pad', [image]):
1678    image = ops.convert_to_tensor(image, name='image')
1679    image_shape = image.get_shape()
1680    is_batch = True
1681    if image_shape.ndims == 3:
1682      is_batch = False
1683      image = array_ops.expand_dims(image, 0)
1684    elif image_shape.ndims is None:
1685      is_batch = False
1686      image = array_ops.expand_dims(image, 0)
1687      image.set_shape([None] * 4)
1688    elif image_shape.ndims != 4:
1689      raise ValueError(
1690          '\'image\' (shape %s) must have either 3 or 4 dimensions.' %
1691          image_shape)
1692
1693    assert_ops = _CheckAtLeast3DImage(image, require_static=False)
1694    assert_ops += _assert(target_width > 0, ValueError,
1695                          'target_width must be > 0.')
1696    assert_ops += _assert(target_height > 0, ValueError,
1697                          'target_height must be > 0.')
1698
1699    image = control_flow_ops.with_dependencies(assert_ops, image)
1700
1701    def max_(x, y):
1702      if _is_tensor(x) or _is_tensor(y):
1703        return math_ops.maximum(x, y)
1704      else:
1705        return max(x, y)
1706
1707    _, height, width, _ = _ImageDimensions(image, rank=4)
1708
1709    # convert values to float, to ease divisions
1710    f_height = math_ops.cast(height, dtype=dtypes.float32)
1711    f_width = math_ops.cast(width, dtype=dtypes.float32)
1712    f_target_height = math_ops.cast(target_height, dtype=dtypes.float32)
1713    f_target_width = math_ops.cast(target_width, dtype=dtypes.float32)
1714
1715    # Find the ratio by which the image must be adjusted
1716    # to fit within the target
1717    ratio = max_(f_width / f_target_width, f_height / f_target_height)
1718    resized_height_float = f_height / ratio
1719    resized_width_float = f_width / ratio
1720    resized_height = math_ops.cast(
1721        math_ops.floor(resized_height_float), dtype=dtypes.int32)
1722    resized_width = math_ops.cast(
1723        math_ops.floor(resized_width_float), dtype=dtypes.int32)
1724
1725    padding_height = (f_target_height - resized_height_float) / 2
1726    padding_width = (f_target_width - resized_width_float) / 2
1727    f_padding_height = math_ops.floor(padding_height)
1728    f_padding_width = math_ops.floor(padding_width)
1729    p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32))
1730    p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32))
1731
1732    # Resize first, then pad to meet requested dimensions
1733    resized = resize_fn(image, [resized_height, resized_width])
1734
1735    padded = pad_to_bounding_box(resized, p_height, p_width, target_height,
1736                                 target_width)
1737
1738    if padded.get_shape().ndims is None:
1739      raise ValueError('padded contains no shape.')
1740
1741    _ImageDimensions(padded, rank=4)
1742
1743    if not is_batch:
1744      padded = array_ops.squeeze(padded, axis=[0])
1745
1746    return padded
1747
1748
1749@tf_export(v1=['image.resize_image_with_pad'])
1750@dispatch.add_dispatch_support
1751def resize_image_with_pad_v1(image,
1752                             target_height,
1753                             target_width,
1754                             method=ResizeMethodV1.BILINEAR,
1755                             align_corners=False):
1756  """Resizes and pads an image to a target width and height.
1757
1758  Resizes an image to a target width and height by keeping
1759  the aspect ratio the same without distortion. If the target
1760  dimensions don't match the image dimensions, the image
1761  is resized and then padded with zeroes to match requested
1762  dimensions.
1763
1764  Args:
1765    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1766      of shape `[height, width, channels]`.
1767    target_height: Target height.
1768    target_width: Target width.
1769    method: Method to use for resizing image. See `resize_images()`
1770    align_corners: bool.  If True, the centers of the 4 corner pixels of the
1771      input and output tensors are aligned, preserving the values at the corner
1772      pixels. Defaults to `False`.
1773
1774  Raises:
1775    ValueError: if `target_height` or `target_width` are zero or negative.
1776
1777  Returns:
1778    Resized and padded image.
1779    If `images` was 4-D, a 4-D float Tensor of shape
1780    `[batch, new_height, new_width, channels]`.
1781    If `images` was 3-D, a 3-D float Tensor of shape
1782    `[new_height, new_width, channels]`.
1783  """
1784
1785  def _resize_fn(im, new_size):
1786    return resize_images(im, new_size, method, align_corners=align_corners)
1787
1788  return _resize_image_with_pad_common(image, target_height, target_width,
1789                                       _resize_fn)
1790
1791
1792@tf_export('image.resize_with_pad', v1=[])
1793@dispatch.add_dispatch_support
1794def resize_image_with_pad_v2(image,
1795                             target_height,
1796                             target_width,
1797                             method=ResizeMethod.BILINEAR,
1798                             antialias=False):
1799  """Resizes and pads an image to a target width and height.
1800
1801  Resizes an image to a target width and height by keeping
1802  the aspect ratio the same without distortion. If the target
1803  dimensions don't match the image dimensions, the image
1804  is resized and then padded with zeroes to match requested
1805  dimensions.
1806
1807  Args:
1808    image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
1809      of shape `[height, width, channels]`.
1810    target_height: Target height.
1811    target_width: Target width.
1812    method: Method to use for resizing image. See `image.resize()`
1813    antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'.
1814
1815  Raises:
1816    ValueError: if `target_height` or `target_width` are zero or negative.
1817
1818  Returns:
1819    Resized and padded image.
1820    If `images` was 4-D, a 4-D float Tensor of shape
1821    `[batch, new_height, new_width, channels]`.
1822    If `images` was 3-D, a 3-D float Tensor of shape
1823    `[new_height, new_width, channels]`.
1824  """
1825
1826  def _resize_fn(im, new_size):
1827    return resize_images_v2(im, new_size, method, antialias=antialias)
1828
1829  return _resize_image_with_pad_common(image, target_height, target_width,
1830                                       _resize_fn)
1831
1832
1833@tf_export('image.per_image_standardization')
1834@dispatch.add_dispatch_support
1835def per_image_standardization(image):
1836  """Linearly scales each image in `image` to have mean 0 and variance 1.
1837
1838  For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`,
1839  where
1840
1841  - `mean` is the average of all values in `x`
1842  - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to
1843    protect against division by 0 when handling uniform images
1844    - `N` is the number of elements in `x`
1845    - `stddev` is the standard deviation of all values in `x`
1846
1847  Args:
1848    image: An n-D Tensor with at least 3 dimensions, the last 3 of which are the
1849      dimensions of each image.
1850
1851  Returns:
1852    A `Tensor` with the same shape as `image`.
1853
1854  Raises:
1855    ValueError: if the shape of 'image' is incompatible with this function.
1856  """
1857  with ops.name_scope(None, 'per_image_standardization', [image]) as scope:
1858    image = ops.convert_to_tensor(image, name='image')
1859    image = _AssertAtLeast3DImage(image)
1860
1861    image = math_ops.cast(image, dtype=dtypes.float32)
1862    num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:])
1863    image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True)
1864
1865    # Apply a minimum normalization that protects us against uniform images.
1866    stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True)
1867    min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, dtypes.float32))
1868    adjusted_stddev = math_ops.maximum(stddev, min_stddev)
1869
1870    image -= image_mean
1871    image = math_ops.divide(image, adjusted_stddev, name=scope)
1872    return image
1873
1874
1875@tf_export('image.random_brightness')
1876@dispatch.add_dispatch_support
1877def random_brightness(image, max_delta, seed=None):
1878  """Adjust the brightness of images by a random factor.
1879
1880  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
1881  interval `[-max_delta, max_delta)`.
1882
1883  For producing deterministic results given a `seed` value, use
1884  `tf.image.stateless_random_brightness`. Unlike using the `seed` param
1885  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
1886  same results given the same seed independent of how many times the function is
1887  called, and independent of global seed settings (e.g. tf.random.set_seed).
1888
1889  Args:
1890    image: An image or images to adjust.
1891    max_delta: float, must be non-negative.
1892    seed: A Python integer. Used to create a random seed. See
1893      `tf.compat.v1.set_random_seed` for behavior.
1894
1895  Usage Example:
1896
1897  >>> x = [[[1.0, 2.0, 3.0],
1898  ...       [4.0, 5.0, 6.0]],
1899  ...      [[7.0, 8.0, 9.0],
1900  ...       [10.0, 11.0, 12.0]]]
1901  >>> tf.image.random_brightness(x, 0.2)
1902  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
1903
1904  Returns:
1905    The brightness-adjusted image(s).
1906
1907  Raises:
1908    ValueError: if `max_delta` is negative.
1909  """
1910  if max_delta < 0:
1911    raise ValueError('max_delta must be non-negative.')
1912
1913  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
1914  return adjust_brightness(image, delta)
1915
1916
1917@tf_export('image.stateless_random_brightness', v1=[])
1918@dispatch.add_dispatch_support
1919def stateless_random_brightness(image, max_delta, seed):
1920  """Adjust the brightness of images by a random factor deterministically.
1921
1922  Equivalent to `adjust_brightness()` using a `delta` randomly picked in the
1923  interval `[-max_delta, max_delta)`.
1924
1925  Guarantees the same results given the same `seed` independent of how many
1926  times the function is called, and independent of global seed settings (e.g.
1927  `tf.random.set_seed`).
1928
1929  Usage Example:
1930
1931  >>> x = [[[1.0, 2.0, 3.0],
1932  ...       [4.0, 5.0, 6.0]],
1933  ...      [[7.0, 8.0, 9.0],
1934  ...       [10.0, 11.0, 12.0]]]
1935  >>> seed = (1, 2)
1936  >>> tf.image.stateless_random_brightness(x, 0.2, seed)
1937  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
1938  array([[[ 1.1376241,  2.1376243,  3.1376243],
1939          [ 4.1376243,  5.1376243,  6.1376243]],
1940         [[ 7.1376243,  8.137624 ,  9.137624 ],
1941          [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)>
1942
1943  Args:
1944    image: An image or images to adjust.
1945    max_delta: float, must be non-negative.
1946    seed: A shape [2] Tensor, the seed to the random number generator. Must have
1947      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
1948
1949  Returns:
1950    The brightness-adjusted image(s).
1951
1952  Raises:
1953    ValueError: if `max_delta` is negative.
1954  """
1955  if max_delta < 0:
1956    raise ValueError('max_delta must be non-negative.')
1957
1958  delta = stateless_random_ops.stateless_random_uniform(
1959      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
1960  return adjust_brightness(image, delta)
1961
1962
1963@tf_export('image.random_contrast')
1964@dispatch.add_dispatch_support
1965def random_contrast(image, lower, upper, seed=None):
1966  """Adjust the contrast of an image or images by a random factor.
1967
1968  Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly
1969  picked in the interval `[lower, upper)`.
1970
1971  For producing deterministic results given a `seed` value, use
1972  `tf.image.stateless_random_contrast`. Unlike using the `seed` param
1973  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
1974  same results given the same seed independent of how many times the function is
1975  called, and independent of global seed settings (e.g. tf.random.set_seed).
1976
1977  Args:
1978    image: An image tensor with 3 or more dimensions.
1979    lower: float.  Lower bound for the random contrast factor.
1980    upper: float.  Upper bound for the random contrast factor.
1981    seed: A Python integer. Used to create a random seed. See
1982      `tf.compat.v1.set_random_seed` for behavior.
1983
1984  Usage Example:
1985
1986  >>> x = [[[1.0, 2.0, 3.0],
1987  ...       [4.0, 5.0, 6.0]],
1988  ...     [[7.0, 8.0, 9.0],
1989  ...       [10.0, 11.0, 12.0]]]
1990  >>> tf.image.random_contrast(x, 0.2, 0.5)
1991  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
1992
1993  Returns:
1994    The contrast-adjusted image(s).
1995
1996  Raises:
1997    ValueError: if `upper <= lower` or if `lower < 0`.
1998  """
1999  if upper <= lower:
2000    raise ValueError('upper must be > lower.')
2001
2002  if lower < 0:
2003    raise ValueError('lower must be non-negative.')
2004
2005  contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2006  return adjust_contrast(image, contrast_factor)
2007
2008
2009@tf_export('image.stateless_random_contrast', v1=[])
2010@dispatch.add_dispatch_support
2011def stateless_random_contrast(image, lower, upper, seed):
2012  """Adjust the contrast of images by a random factor deterministically.
2013
2014  Guarantees the same results given the same `seed` independent of how many
2015  times the function is called, and independent of global seed settings (e.g.
2016  `tf.random.set_seed`).
2017
2018  Args:
2019    image: An image tensor with 3 or more dimensions.
2020    lower: float.  Lower bound for the random contrast factor.
2021    upper: float.  Upper bound for the random contrast factor.
2022    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2023      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2024
2025  Usage Example:
2026
2027  >>> x = [[[1.0, 2.0, 3.0],
2028  ...       [4.0, 5.0, 6.0]],
2029  ...      [[7.0, 8.0, 9.0],
2030  ...       [10.0, 11.0, 12.0]]]
2031  >>> seed = (1, 2)
2032  >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed)
2033  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2034  array([[[3.4605184, 4.4605184, 5.4605184],
2035          [4.820173 , 5.820173 , 6.820173 ]],
2036         [[6.179827 , 7.179827 , 8.179828 ],
2037          [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)>
2038
2039  Returns:
2040    The contrast-adjusted image(s).
2041
2042  Raises:
2043    ValueError: if `upper <= lower` or if `lower < 0`.
2044  """
2045  if upper <= lower:
2046    raise ValueError('upper must be > lower.')
2047
2048  if lower < 0:
2049    raise ValueError('lower must be non-negative.')
2050
2051  contrast_factor = stateless_random_ops.stateless_random_uniform(
2052      shape=[], minval=lower, maxval=upper, seed=seed)
2053  return adjust_contrast(image, contrast_factor)
2054
2055
2056@tf_export('image.adjust_brightness')
2057@dispatch.add_dispatch_support
2058def adjust_brightness(image, delta):
2059  """Adjust the brightness of RGB or Grayscale images.
2060
2061  This is a convenience method that converts RGB images to float
2062  representation, adjusts their brightness, and then converts them back to the
2063  original data type. If several adjustments are chained, it is advisable to
2064  minimize the number of redundant conversions.
2065
2066  The value `delta` is added to all components of the tensor `image`. `image` is
2067  converted to `float` and scaled appropriately if it is in fixed-point
2068  representation, and `delta` is converted to the same data type. For regular
2069  images, `delta` should be in the range `(-1,1)`, as it is added to the image
2070  in floating point representation, where pixel values are in the `[0,1)` range.
2071
2072  Usage Example:
2073
2074  >>> x = [[[1.0, 2.0, 3.0],
2075  ...       [4.0, 5.0, 6.0]],
2076  ...     [[7.0, 8.0, 9.0],
2077  ...       [10.0, 11.0, 12.0]]]
2078  >>> tf.image.adjust_brightness(x, delta=0.1)
2079  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2080  array([[[ 1.1,  2.1,  3.1],
2081          [ 4.1,  5.1,  6.1]],
2082         [[ 7.1,  8.1,  9.1],
2083          [10.1, 11.1, 12.1]]], dtype=float32)>
2084
2085  Args:
2086    image: RGB image or images to adjust.
2087    delta: A scalar. Amount to add to the pixel values.
2088
2089  Returns:
2090    A brightness-adjusted tensor of the same shape and type as `image`.
2091  """
2092  with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name:
2093    image = ops.convert_to_tensor(image, name='image')
2094    # Remember original dtype to so we can convert back if needed
2095    orig_dtype = image.dtype
2096
2097    if orig_dtype in [dtypes.float16, dtypes.float32]:
2098      flt_image = image
2099    else:
2100      flt_image = convert_image_dtype(image, dtypes.float32)
2101
2102    adjusted = math_ops.add(
2103        flt_image, math_ops.cast(delta, flt_image.dtype), name=name)
2104
2105    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2106
2107
2108@tf_export('image.adjust_contrast')
2109@dispatch.add_dispatch_support
2110def adjust_contrast(images, contrast_factor):
2111  """Adjust contrast of RGB or grayscale images.
2112
2113  This is a convenience method that converts RGB images to float
2114  representation, adjusts their contrast, and then converts them back to the
2115  original data type. If several adjustments are chained, it is advisable to
2116  minimize the number of redundant conversions.
2117
2118  `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
2119  interpreted as `[height, width, channels]`.  The other dimensions only
2120  represent a collection of images, such as `[batch, height, width, channels].`
2121
2122  Contrast is adjusted independently for each channel of each image.
2123
2124  For each channel, this Op computes the mean of the image pixels in the
2125  channel and then adjusts each component `x` of each pixel to
2126  `(x - mean) * contrast_factor + mean`.
2127
2128  Usage Example:
2129
2130  >>> x = [[[1.0, 2.0, 3.0],
2131  ...       [4.0, 5.0, 6.0]],
2132  ...     [[7.0, 8.0, 9.0],
2133  ...       [10.0, 11.0, 12.0]]]
2134  >>> tf.image.adjust_contrast(x, 2)
2135  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2136  array([[[-3.5, -2.5, -1.5],
2137          [ 2.5,  3.5,  4.5]],
2138         [[ 8.5,  9.5, 10.5],
2139          [14.5, 15.5, 16.5]]], dtype=float32)>
2140
2141  Args:
2142    images: Images to adjust.  At least 3-D.
2143    contrast_factor: A float multiplier for adjusting contrast.
2144
2145  Returns:
2146    The contrast-adjusted image or images.
2147  """
2148  with ops.name_scope(None, 'adjust_contrast',
2149                      [images, contrast_factor]) as name:
2150    images = ops.convert_to_tensor(images, name='images')
2151    # Remember original dtype to so we can convert back if needed
2152    orig_dtype = images.dtype
2153
2154    if orig_dtype in (dtypes.float16, dtypes.float32):
2155      flt_images = images
2156    else:
2157      flt_images = convert_image_dtype(images, dtypes.float32)
2158
2159    adjusted = gen_image_ops.adjust_contrastv2(
2160        flt_images, contrast_factor=contrast_factor, name=name)
2161
2162    return convert_image_dtype(adjusted, orig_dtype, saturate=True)
2163
2164
2165@tf_export('image.adjust_gamma')
2166@dispatch.add_dispatch_support
2167def adjust_gamma(image, gamma=1, gain=1):
2168  """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction).
2169
2170  on the input image.
2171
2172  Also known as Power Law Transform. This function converts the
2173  input images at first to float representation, then transforms them
2174  pixelwise according to the equation `Out = gain * In**gamma`,
2175  and then converts the back to the original data type.
2176
2177  Usage Example:
2178
2179  >>> x = [[[1.0, 2.0, 3.0],
2180  ...       [4.0, 5.0, 6.0]],
2181  ...     [[7.0, 8.0, 9.0],
2182  ...       [10.0, 11.0, 12.0]]]
2183  >>> tf.image.adjust_gamma(x, 0.2)
2184  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2185  array([[[1.       , 1.1486983, 1.2457309],
2186          [1.319508 , 1.3797297, 1.4309691]],
2187         [[1.4757731, 1.5157166, 1.5518456],
2188          [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)>
2189
2190  Args:
2191    image : RGB image or images to adjust.
2192    gamma : A scalar or tensor. Non-negative real number.
2193    gain  : A scalar or tensor. The constant multiplier.
2194
2195  Returns:
2196    A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`.
2197
2198  Raises:
2199    ValueError: If gamma is negative.
2200  Notes:
2201    For gamma greater than 1, the histogram will shift towards left and
2202    the output image will be darker than the input image.
2203    For gamma less than 1, the histogram will shift towards right and
2204    the output image will be brighter than the input image.
2205  References:
2206    [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction)
2207  """
2208
2209  with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name:
2210    image = ops.convert_to_tensor(image, name='image')
2211    # Remember original dtype to so we can convert back if needed
2212    orig_dtype = image.dtype
2213
2214    if orig_dtype in [dtypes.float16, dtypes.float32]:
2215      flt_image = image
2216    else:
2217      flt_image = convert_image_dtype(image, dtypes.float32)
2218
2219    assert_op = _assert(gamma >= 0, ValueError,
2220                        'Gamma should be a non-negative real number.')
2221    if assert_op:
2222      gamma = control_flow_ops.with_dependencies(assert_op, gamma)
2223
2224    # According to the definition of gamma correction.
2225    adjusted_img = gain * flt_image**gamma
2226
2227    return convert_image_dtype(adjusted_img, orig_dtype, saturate=True)
2228
2229
2230@tf_export('image.convert_image_dtype')
2231@dispatch.add_dispatch_support
2232def convert_image_dtype(image, dtype, saturate=False, name=None):
2233  """Convert `image` to `dtype`, scaling its values if needed.
2234
2235  The operation supports data types (for `image` and `dtype`) of
2236  `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`,
2237  `float16`, `float32`, `float64`, `bfloat16`.
2238
2239  Images that are represented using floating point values are expected to have
2240  values in the range [0,1). Image data stored in integer data types are
2241  expected to have values in the range `[0,MAX]`, where `MAX` is the largest
2242  positive representable number for the data type.
2243
2244  This op converts between data types, scaling the values appropriately before
2245  casting.
2246
2247  Usage Example:
2248
2249  >>> x = [[[1, 2, 3], [4, 5, 6]],
2250  ...      [[7, 8, 9], [10, 11, 12]]]
2251  >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8)
2252  >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False)
2253  <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy=
2254  array([[[0.00787, 0.01575, 0.02362],
2255          [0.0315 , 0.03937, 0.04724]],
2256         [[0.0551 , 0.063  , 0.07086],
2257          [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)>
2258
2259  Converting integer types to floating point types returns normalized floating
2260  point values in the range [0, 1); the values are normalized by the `MAX` value
2261  of the input dtype. Consider the following two examples:
2262
2263  >>> a = [[[1], [2]], [[3], [4]]]
2264  >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8)
2265  >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32)
2266  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2267  array([[[0.00787402],
2268          [0.01574803]],
2269         [[0.02362205],
2270          [0.03149606]]], dtype=float32)>
2271
2272  >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32)
2273  >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32)
2274  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2275  array([[[4.6566129e-10],
2276          [9.3132257e-10]],
2277         [[1.3969839e-09],
2278          [1.8626451e-09]]], dtype=float32)>
2279
2280  Despite having identical values of `a` and output dtype of `float32`, the
2281  outputs differ due to the different input dtypes (`int8` vs. `int32`). This
2282  is, again, because the values are normalized by the `MAX` value of the input
2283  dtype.
2284
2285  Note that converting floating point values to integer type may lose precision.
2286  In the example below, an image tensor `b` of dtype `float32` is converted to
2287  `int8` and back to `float32`. The final output, however, is different from
2288  the original input `b` due to precision loss.
2289
2290  >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]]
2291  >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32)
2292  >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8)
2293  >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32)
2294  <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy=
2295  array([[[0.11811024],
2296          [0.33858266]],
2297         [[0.5590551 ],
2298          [0.77952754]]], dtype=float32)>
2299
2300  Scaling up from an integer type (input dtype) to another integer type (output
2301  dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting
2302  back and forth should result in no change. For example, as shown below, the
2303  `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767)
2304  but, when scaled back, we get the same, original values of `c`.
2305
2306  >>> c = [[[1], [2]], [[127], [127]]]
2307  >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8)
2308  >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16)
2309  >>> print(c_int16)
2310  tf.Tensor(
2311  [[[  256]
2312    [  512]]
2313   [[32512]
2314    [32512]]], shape=(2, 2, 1), dtype=int16)
2315  >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8)
2316  >>> print(c_int8_back)
2317  tf.Tensor(
2318  [[[  1]
2319    [  2]]
2320   [[127]
2321    [127]]], shape=(2, 2, 1), dtype=int8)
2322
2323  Scaling down from an integer type to another integer type can be a lossy
2324  conversion. Notice in the example below that converting `int16` to `uint8` and
2325  back to `int16` has lost precision.
2326
2327  >>> d = [[[1000], [2000]], [[3000], [4000]]]
2328  >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16)
2329  >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8)
2330  >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16)
2331  >>> print(d_int16_back)
2332  tf.Tensor(
2333  [[[ 896]
2334    [1920]]
2335   [[2944]
2336    [3968]]], shape=(2, 2, 1), dtype=int16)
2337
2338  Note that converting from floating point inputs to integer types may lead to
2339  over/underflow problems. Set saturate to `True` to avoid such problem in
2340  problematic conversions. If enabled, saturation will clip the output into the
2341  allowed range before performing a potentially dangerous cast (and only before
2342  performing such a cast, i.e., when casting from a floating point to an integer
2343  type, and when casting from a signed to an unsigned type; `saturate` has no
2344  effect on casts between floats, or on casts that increase the type's range).
2345
2346  Args:
2347    image: An image.
2348    dtype: A `DType` to convert `image` to.
2349    saturate: If `True`, clip the input before casting (if necessary).
2350    name: A name for this operation (optional).
2351
2352  Returns:
2353    `image`, converted to `dtype`.
2354
2355  Raises:
2356    AttributeError: Raises an attribute error when dtype is neither
2357    float nor integer
2358  """
2359  image = ops.convert_to_tensor(image, name='image')
2360  dtype = dtypes.as_dtype(dtype)
2361  if not dtype.is_floating and not dtype.is_integer:
2362    raise AttributeError('dtype must be either floating point or integer')
2363  if dtype == image.dtype:
2364    return array_ops.identity(image, name=name)
2365
2366  with ops.name_scope(name, 'convert_image', [image]) as name:
2367    # Both integer: use integer multiplication in the larger range
2368    if image.dtype.is_integer and dtype.is_integer:
2369      scale_in = image.dtype.max
2370      scale_out = dtype.max
2371      if scale_in > scale_out:
2372        # Scaling down, scale first, then cast. The scaling factor will
2373        # cause in.max to be mapped to above out.max but below out.max+1,
2374        # so that the output is safely in the supported range.
2375        scale = (scale_in + 1) // (scale_out + 1)
2376        scaled = math_ops.floordiv(image, scale)
2377
2378        if saturate:
2379          return math_ops.saturate_cast(scaled, dtype, name=name)
2380        else:
2381          return math_ops.cast(scaled, dtype, name=name)
2382      else:
2383        # Scaling up, cast first, then scale. The scale will not map in.max to
2384        # out.max, but converting back and forth should result in no change.
2385        if saturate:
2386          cast = math_ops.saturate_cast(image, dtype)
2387        else:
2388          cast = math_ops.cast(image, dtype)
2389        scale = (scale_out + 1) // (scale_in + 1)
2390        return math_ops.multiply(cast, scale, name=name)
2391    elif image.dtype.is_floating and dtype.is_floating:
2392      # Both float: Just cast, no possible overflows in the allowed ranges.
2393      # Note: We're ignoring float overflows. If your image dynamic range
2394      # exceeds float range, you're on your own.
2395      return math_ops.cast(image, dtype, name=name)
2396    else:
2397      if image.dtype.is_integer:
2398        # Converting to float: first cast, then scale. No saturation possible.
2399        cast = math_ops.cast(image, dtype)
2400        scale = 1. / image.dtype.max
2401        return math_ops.multiply(cast, scale, name=name)
2402      else:
2403        # Converting from float: first scale, then cast
2404        scale = dtype.max + 0.5  # avoid rounding problems in the cast
2405        scaled = math_ops.multiply(image, scale)
2406        if saturate:
2407          return math_ops.saturate_cast(scaled, dtype, name=name)
2408        else:
2409          return math_ops.cast(scaled, dtype, name=name)
2410
2411
2412@tf_export('image.rgb_to_grayscale')
2413@dispatch.add_dispatch_support
2414def rgb_to_grayscale(images, name=None):
2415  """Converts one or more images from RGB to Grayscale.
2416
2417  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2418  last dimension of the output is 1, containing the Grayscale value of the
2419  pixels.
2420
2421  >>> original = tf.constant([[[1.0, 2.0, 3.0]]])
2422  >>> converted = tf.image.rgb_to_grayscale(original)
2423  >>> print(converted.numpy())
2424  [[[1.81...]]]
2425
2426  Args:
2427    images: The RGB tensor to convert. The last dimension must have size 3 and
2428      should contain RGB values.
2429    name: A name for the operation (optional).
2430
2431  Returns:
2432    The converted grayscale image(s).
2433  """
2434  with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name:
2435    images = ops.convert_to_tensor(images, name='images')
2436    # Remember original dtype to so we can convert back if needed
2437    orig_dtype = images.dtype
2438    flt_image = convert_image_dtype(images, dtypes.float32)
2439
2440    # Reference for converting between RGB and grayscale.
2441    # https://en.wikipedia.org/wiki/Luma_%28video%29
2442    rgb_weights = [0.2989, 0.5870, 0.1140]
2443    gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1])
2444    gray_float = array_ops.expand_dims(gray_float, -1)
2445    return convert_image_dtype(gray_float, orig_dtype, name=name)
2446
2447
2448@tf_export('image.grayscale_to_rgb')
2449@dispatch.add_dispatch_support
2450def grayscale_to_rgb(images, name=None):
2451  """Converts one or more images from Grayscale to RGB.
2452
2453  Outputs a tensor of the same `DType` and rank as `images`.  The size of the
2454  last dimension of the output is 3, containing the RGB value of the pixels.
2455  The input images' last dimension must be size 1.
2456
2457  >>> original = tf.constant([[[1.0], [2.0], [3.0]]])
2458  >>> converted = tf.image.grayscale_to_rgb(original)
2459  >>> print(converted.numpy())
2460  [[[1. 1. 1.]
2461    [2. 2. 2.]
2462    [3. 3. 3.]]]
2463
2464  Args:
2465    images: The Grayscale tensor to convert. The last dimension must be size 1.
2466    name: A name for the operation (optional).
2467
2468  Returns:
2469    The converted grayscale image(s).
2470  """
2471  with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name:
2472    images = _AssertGrayscaleImage(images)
2473
2474    images = ops.convert_to_tensor(images, name='images')
2475    rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
2476    shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] +
2477                  [array_ops.expand_dims(3, 0)])
2478    multiples = array_ops.concat(shape_list, 0)
2479    rgb = array_ops.tile(images, multiples, name=name)
2480    rgb.set_shape(images.get_shape()[:-1].concatenate([3]))
2481    return rgb
2482
2483
2484# pylint: disable=invalid-name
2485@tf_export('image.random_hue')
2486@dispatch.add_dispatch_support
2487def random_hue(image, max_delta, seed=None):
2488  """Adjust the hue of RGB images by a random factor.
2489
2490  Equivalent to `adjust_hue()` but uses a `delta` randomly
2491  picked in the interval `[-max_delta, max_delta)`.
2492
2493  `max_delta` must be in the interval `[0, 0.5]`.
2494
2495  Usage Example:
2496
2497  >>> x = [[[1.0, 2.0, 3.0],
2498  ...       [4.0, 5.0, 6.0]],
2499  ...     [[7.0, 8.0, 9.0],
2500  ...       [10.0, 11.0, 12.0]]]
2501  >>> tf.image.random_hue(x, 0.2)
2502  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2503
2504  For producing deterministic results given a `seed` value, use
2505  `tf.image.stateless_random_hue`. Unlike using the `seed` param with
2506  `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the same
2507  results given the same seed independent of how many times the function is
2508  called, and independent of global seed settings (e.g. tf.random.set_seed).
2509
2510  Args:
2511    image: RGB image or images. The size of the last dimension must be 3.
2512    max_delta: float. The maximum value for the random delta.
2513    seed: An operation-specific seed. It will be used in conjunction with the
2514      graph-level seed to determine the real seeds that will be used in this
2515      operation. Please see the documentation of set_random_seed for its
2516      interaction with the graph-level random seed.
2517
2518  Returns:
2519    Adjusted image(s), same shape and DType as `image`.
2520
2521  Raises:
2522    ValueError: if `max_delta` is invalid.
2523  """
2524  if max_delta > 0.5:
2525    raise ValueError('max_delta must be <= 0.5.')
2526
2527  if max_delta < 0:
2528    raise ValueError('max_delta must be non-negative.')
2529
2530  delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
2531  return adjust_hue(image, delta)
2532
2533
2534@tf_export('image.stateless_random_hue', v1=[])
2535@dispatch.add_dispatch_support
2536def stateless_random_hue(image, max_delta, seed):
2537  """Adjust the hue of RGB images by a random factor deterministically.
2538
2539  Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the
2540  interval `[-max_delta, max_delta)`.
2541
2542  Guarantees the same results given the same `seed` independent of how many
2543  times the function is called, and independent of global seed settings (e.g.
2544  `tf.random.set_seed`).
2545
2546  `max_delta` must be in the interval `[0, 0.5]`.
2547
2548  Usage Example:
2549
2550  >>> x = [[[1.0, 2.0, 3.0],
2551  ...       [4.0, 5.0, 6.0]],
2552  ...      [[7.0, 8.0, 9.0],
2553  ...       [10.0, 11.0, 12.0]]]
2554  >>> seed = (1, 2)
2555  >>> tf.image.stateless_random_hue(x, 0.2, seed)
2556  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2557  array([[[ 1.6514902,  1.       ,  3.       ],
2558          [ 4.65149  ,  4.       ,  6.       ]],
2559         [[ 7.65149  ,  7.       ,  9.       ],
2560          [10.65149  , 10.       , 12.       ]]], dtype=float32)>
2561
2562  Args:
2563    image: RGB image or images. The size of the last dimension must be 3.
2564    max_delta: float. The maximum value for the random delta.
2565    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2566      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2567
2568  Returns:
2569    Adjusted image(s), same shape and DType as `image`.
2570
2571  Raises:
2572    ValueError: if `max_delta` is invalid.
2573  """
2574  if max_delta > 0.5:
2575    raise ValueError('max_delta must be <= 0.5.')
2576
2577  if max_delta < 0:
2578    raise ValueError('max_delta must be non-negative.')
2579
2580  delta = stateless_random_ops.stateless_random_uniform(
2581      shape=[], minval=-max_delta, maxval=max_delta, seed=seed)
2582  return adjust_hue(image, delta)
2583
2584
2585@tf_export('image.adjust_hue')
2586@dispatch.add_dispatch_support
2587def adjust_hue(image, delta, name=None):
2588  """Adjust hue of RGB images.
2589
2590  This is a convenience method that converts an RGB image to float
2591  representation, converts it to HSV, adds an offset to the
2592  hue channel, converts back to RGB and then back to the original
2593  data type. If several adjustments are chained it is advisable to minimize
2594  the number of redundant conversions.
2595
2596  `image` is an RGB image.  The image hue is adjusted by converting the
2597  image(s) to HSV and rotating the hue channel (H) by
2598  `delta`.  The image is then converted back to RGB.
2599
2600  `delta` must be in the interval `[-1, 1]`.
2601
2602  Usage Example:
2603
2604  >>> x = [[[1.0, 2.0, 3.0],
2605  ...       [4.0, 5.0, 6.0]],
2606  ...     [[7.0, 8.0, 9.0],
2607  ...       [10.0, 11.0, 12.0]]]
2608  >>> tf.image.adjust_hue(x, 0.2)
2609  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2610  array([[[ 2.3999996,  1.       ,  3.       ],
2611          [ 5.3999996,  4.       ,  6.       ]],
2612        [[ 8.4      ,  7.       ,  9.       ],
2613          [11.4      , 10.       , 12.       ]]], dtype=float32)>
2614
2615  Args:
2616    image: RGB image or images. The size of the last dimension must be 3.
2617    delta: float.  How much to add to the hue channel.
2618    name: A name for this operation (optional).
2619
2620  Returns:
2621    Adjusted image(s), same shape and DType as `image`.
2622
2623  Usage Example:
2624
2625  >>> image = [[[1, 2, 3], [4, 5, 6]],
2626  ...          [[7, 8, 9], [10, 11, 12]],
2627  ...          [[13, 14, 15], [16, 17, 18]]]
2628  >>> image = tf.constant(image)
2629  >>> tf.image.adjust_hue(image, 0.2)
2630  <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy=
2631  array([[[ 2,  1,  3],
2632        [ 5,  4,  6]],
2633       [[ 8,  7,  9],
2634        [11, 10, 12]],
2635       [[14, 13, 15],
2636        [17, 16, 18]]], dtype=int32)>
2637  """
2638  with ops.name_scope(name, 'adjust_hue', [image]) as name:
2639    image = ops.convert_to_tensor(image, name='image')
2640    # Remember original dtype to so we can convert back if needed
2641    orig_dtype = image.dtype
2642    if orig_dtype in (dtypes.float16, dtypes.float32):
2643      flt_image = image
2644    else:
2645      flt_image = convert_image_dtype(image, dtypes.float32)
2646
2647    rgb_altered = gen_image_ops.adjust_hue(flt_image, delta)
2648
2649    return convert_image_dtype(rgb_altered, orig_dtype)
2650
2651
2652# pylint: disable=invalid-name
2653@tf_export('image.random_jpeg_quality')
2654@dispatch.add_dispatch_support
2655def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None):
2656  """Randomly changes jpeg encoding quality for inducing jpeg noise.
2657
2658  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2659  `max_jpeg_quality`.
2660  `max_jpeg_quality` must be in the interval `[0, 100]`.
2661
2662  Usage Example:
2663
2664  >>> x = [[[1.0, 2.0, 3.0],
2665  ...       [4.0, 5.0, 6.0]],
2666  ...     [[7.0, 8.0, 9.0],
2667  ...       [10.0, 11.0, 12.0]]]
2668  >>> tf.image.random_jpeg_quality(x, 75, 95)
2669  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...>
2670
2671  For producing deterministic results given a `seed` value, use
2672  `tf.image.stateless_random_jpeg_quality`. Unlike using the `seed` param
2673  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2674  same results given the same seed independent of how many times the function is
2675  called, and independent of global seed settings (e.g. tf.random.set_seed).
2676
2677  Args:
2678    image: 3D image. Size of the last dimension must be 1 or 3.
2679    min_jpeg_quality: Minimum jpeg encoding quality to use.
2680    max_jpeg_quality: Maximum jpeg encoding quality to use.
2681    seed: An operation-specific seed. It will be used in conjunction with the
2682      graph-level seed to determine the real seeds that will be used in this
2683      operation. Please see the documentation of set_random_seed for its
2684      interaction with the graph-level random seed.
2685
2686  Returns:
2687    Adjusted image(s), same shape and DType as `image`.
2688
2689  Raises:
2690    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2691  """
2692  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2693      max_jpeg_quality > 100):
2694    raise ValueError('jpeg encoding range must be between 0 and 100.')
2695
2696  if min_jpeg_quality >= max_jpeg_quality:
2697    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2698
2699  jpeg_quality = random_ops.random_uniform([],
2700                                           min_jpeg_quality,
2701                                           max_jpeg_quality,
2702                                           seed=seed,
2703                                           dtype=dtypes.int32)
2704  return adjust_jpeg_quality(image, jpeg_quality)
2705
2706
2707@tf_export('image.stateless_random_jpeg_quality', v1=[])
2708@dispatch.add_dispatch_support
2709def stateless_random_jpeg_quality(image,
2710                                  min_jpeg_quality,
2711                                  max_jpeg_quality,
2712                                  seed):
2713  """Deterministically radomize jpeg encoding quality for inducing jpeg noise.
2714
2715  Guarantees the same results given the same `seed` independent of how many
2716  times the function is called, and independent of global seed settings (e.g.
2717  `tf.random.set_seed`).
2718
2719  `min_jpeg_quality` must be in the interval `[0, 100]` and less than
2720  `max_jpeg_quality`.
2721  `max_jpeg_quality` must be in the interval `[0, 100]`.
2722
2723  Usage Example:
2724
2725  >>> x = [[[1, 2, 3],
2726  ...       [4, 5, 6]],
2727  ...      [[7, 8, 9],
2728  ...       [10, 11, 12]]]
2729  >>> x_uint8 = tf.cast(x, tf.uint8)
2730  >>> seed = (1, 2)
2731  >>> tf.image.stateless_random_jpeg_quality(x_uint8, 75, 95, seed)
2732  <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy=
2733  array([[[ 0,  4,  5],
2734          [ 1,  5,  6]],
2735         [[ 5,  9, 10],
2736          [ 5,  9, 10]]], dtype=uint8)>
2737
2738  Args:
2739    image: 3D image. Size of the last dimension must be 1 or 3.
2740    min_jpeg_quality: Minimum jpeg encoding quality to use.
2741    max_jpeg_quality: Maximum jpeg encoding quality to use.
2742    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2743      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2744
2745  Returns:
2746    Adjusted image(s), same shape and DType as `image`.
2747
2748  Raises:
2749    ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid.
2750  """
2751  if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or
2752      max_jpeg_quality > 100):
2753    raise ValueError('jpeg encoding range must be between 0 and 100.')
2754
2755  if min_jpeg_quality >= max_jpeg_quality:
2756    raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.')
2757
2758  jpeg_quality = stateless_random_ops.stateless_random_uniform(
2759      shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed,
2760      dtype=dtypes.int32)
2761  return adjust_jpeg_quality(image, jpeg_quality)
2762
2763
2764@tf_export('image.adjust_jpeg_quality')
2765@dispatch.add_dispatch_support
2766def adjust_jpeg_quality(image, jpeg_quality, name=None):
2767  """Adjust jpeg encoding quality of an image.
2768
2769  This is a convenience method that converts an image to uint8 representation,
2770  encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back
2771  to the original data type.
2772
2773  `jpeg_quality` must be in the interval `[0, 100]`.
2774
2775  Usage Example:
2776
2777  >>> x = [[[1.0, 2.0, 3.0],
2778  ...       [4.0, 5.0, 6.0]],
2779  ...     [[7.0, 8.0, 9.0],
2780  ...       [10.0, 11.0, 12.0]]]
2781  >>> tf.image.adjust_jpeg_quality(x, 75)
2782  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2783  array([[[1., 1., 1.],
2784          [1., 1., 1.]],
2785         [[1., 1., 1.],
2786          [1., 1., 1.]]], dtype=float32)>
2787
2788  Args:
2789    image: 3D image. The size of the last dimension must be None, 1 or 3.
2790    jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality.
2791    name: A name for this operation (optional).
2792
2793  Returns:
2794    Adjusted image, same shape and DType as `image`.
2795
2796  Raises:
2797    InvalidArgumentError: quality must be in [0,100]
2798    InvalidArgumentError: image must have 1 or 3 channels
2799  """
2800  with ops.name_scope(name, 'adjust_jpeg_quality', [image]):
2801    image = ops.convert_to_tensor(image, name='image')
2802    channels = image.shape.as_list()[-1]
2803    # Remember original dtype to so we can convert back if needed
2804    orig_dtype = image.dtype
2805    image = convert_image_dtype(image, dtypes.uint8, saturate=True)
2806    if not _is_tensor(jpeg_quality):
2807      # If jpeg_quality is a int (not tensor).
2808      jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32)
2809    image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality)
2810
2811    image = gen_image_ops.decode_jpeg(image, channels=channels)
2812    return convert_image_dtype(image, orig_dtype, saturate=True)
2813
2814
2815@tf_export('image.random_saturation')
2816@dispatch.add_dispatch_support
2817def random_saturation(image, lower, upper, seed=None):
2818  """Adjust the saturation of RGB images by a random factor.
2819
2820  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2821  picked in the interval `[lower, upper)`.
2822
2823  Usage Example:
2824
2825  >>> x = [[[1.0, 2.0, 3.0],
2826  ...       [4.0, 5.0, 6.0]],
2827  ...     [[7.0, 8.0, 9.0],
2828  ...       [10.0, 11.0, 12.0]]]
2829  >>> tf.image.random_saturation(x, 5, 10)
2830  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2831  array([[[ 0. ,  1.5,  3. ],
2832          [ 0. ,  3. ,  6. ]],
2833         [[ 0. ,  4.5,  9. ],
2834          [ 0. ,  6. , 12. ]]], dtype=float32)>
2835
2836  For producing deterministic results given a `seed` value, use
2837  `tf.image.stateless_random_saturation`. Unlike using the `seed` param
2838  with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops guarantee the
2839  same results given the same seed independent of how many times the function is
2840  called, and independent of global seed settings (e.g. tf.random.set_seed).
2841
2842  Args:
2843    image: RGB image or images. The size of the last dimension must be 3.
2844    lower: float.  Lower bound for the random saturation factor.
2845    upper: float.  Upper bound for the random saturation factor.
2846    seed: An operation-specific seed. It will be used in conjunction with the
2847      graph-level seed to determine the real seeds that will be used in this
2848      operation. Please see the documentation of set_random_seed for its
2849      interaction with the graph-level random seed.
2850
2851  Returns:
2852    Adjusted image(s), same shape and DType as `image`.
2853
2854  Raises:
2855    ValueError: if `upper <= lower` or if `lower < 0`.
2856  """
2857  if upper <= lower:
2858    raise ValueError('upper must be > lower.')
2859
2860  if lower < 0:
2861    raise ValueError('lower must be non-negative.')
2862
2863  saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
2864  return adjust_saturation(image, saturation_factor)
2865
2866
2867@tf_export('image.stateless_random_saturation', v1=[])
2868@dispatch.add_dispatch_support
2869def stateless_random_saturation(image, lower, upper, seed=None):
2870  """Adjust the saturation of RGB images by a random factor deterministically.
2871
2872  Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
2873  picked in the interval `[lower, upper)`.
2874
2875  Guarantees the same results given the same `seed` independent of how many
2876  times the function is called, and independent of global seed settings (e.g.
2877  `tf.random.set_seed`).
2878
2879  Usage Example:
2880
2881  >>> x = [[[1.0, 2.0, 3.0],
2882  ...       [4.0, 5.0, 6.0]],
2883  ...      [[7.0, 8.0, 9.0],
2884  ...       [10.0, 11.0, 12.0]]]
2885  >>> seed = (1, 2)
2886  >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed)
2887  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2888  array([[[ 1.1559395,  2.0779698,  3.       ],
2889          [ 4.1559396,  5.07797  ,  6.       ]],
2890         [[ 7.1559396,  8.07797  ,  9.       ],
2891          [10.155939 , 11.07797  , 12.       ]]], dtype=float32)>
2892
2893  Args:
2894    image: RGB image or images. The size of the last dimension must be 3.
2895    lower: float.  Lower bound for the random saturation factor.
2896    upper: float.  Upper bound for the random saturation factor.
2897    seed: A shape [2] Tensor, the seed to the random number generator. Must have
2898      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
2899
2900  Returns:
2901    Adjusted image(s), same shape and DType as `image`.
2902
2903  Raises:
2904    ValueError: if `upper <= lower` or if `lower < 0`.
2905  """
2906  if upper <= lower:
2907    raise ValueError('upper must be > lower.')
2908
2909  if lower < 0:
2910    raise ValueError('lower must be non-negative.')
2911
2912  saturation_factor = stateless_random_ops.stateless_random_uniform(
2913      shape=[], minval=lower, maxval=upper, seed=seed)
2914  return adjust_saturation(image, saturation_factor)
2915
2916
2917@tf_export('image.adjust_saturation')
2918@dispatch.add_dispatch_support
2919def adjust_saturation(image, saturation_factor, name=None):
2920  """Adjust saturation of RGB images.
2921
2922  This is a convenience method that converts RGB images to float
2923  representation, converts them to HSV, adds an offset to the
2924  saturation channel, converts back to RGB and then back to the original
2925  data type. If several adjustments are chained it is advisable to minimize
2926  the number of redundant conversions.
2927
2928  `image` is an RGB image or images.  The image saturation is adjusted by
2929  converting the images to HSV and multiplying the saturation (S) channel by
2930  `saturation_factor` and clipping. The images are then converted back to RGB.
2931
2932  Usage Example:
2933
2934  >>> x = [[[1.0, 2.0, 3.0],
2935  ...       [4.0, 5.0, 6.0]],
2936  ...     [[7.0, 8.0, 9.0],
2937  ...       [10.0, 11.0, 12.0]]]
2938  >>> tf.image.adjust_saturation(x, 0.5)
2939  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
2940  array([[[ 2. ,  2.5,  3. ],
2941          [ 5. ,  5.5,  6. ]],
2942         [[ 8. ,  8.5,  9. ],
2943          [11. , 11.5, 12. ]]], dtype=float32)>
2944
2945  Args:
2946    image: RGB image or images. The size of the last dimension must be 3.
2947    saturation_factor: float. Factor to multiply the saturation by.
2948    name: A name for this operation (optional).
2949
2950  Returns:
2951    Adjusted image(s), same shape and DType as `image`.
2952
2953  Raises:
2954    InvalidArgumentError: input must have 3 channels
2955  """
2956  with ops.name_scope(name, 'adjust_saturation', [image]) as name:
2957    image = ops.convert_to_tensor(image, name='image')
2958    # Remember original dtype to so we can convert back if needed
2959    orig_dtype = image.dtype
2960    if orig_dtype in (dtypes.float16, dtypes.float32):
2961      flt_image = image
2962    else:
2963      flt_image = convert_image_dtype(image, dtypes.float32)
2964
2965    adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor)
2966
2967    return convert_image_dtype(adjusted, orig_dtype)
2968
2969
2970@tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg'])
2971def is_jpeg(contents, name=None):
2972  r"""Convenience function to check if the 'contents' encodes a JPEG image.
2973
2974  Args:
2975    contents: 0-D `string`. The encoded image bytes.
2976    name: A name for the operation (optional)
2977
2978  Returns:
2979     A scalar boolean tensor indicating if 'contents' may be a JPEG image.
2980     is_jpeg is susceptible to false positives.
2981  """
2982  # Normal JPEGs start with \xff\xd8\xff\xe0
2983  # JPEG with EXIF starts with \xff\xd8\xff\xe1
2984  # Use \xff\xd8\xff to cover both.
2985  with ops.name_scope(name, 'is_jpeg'):
2986    substr = string_ops.substr(contents, 0, 3)
2987    return math_ops.equal(substr, b'\xff\xd8\xff', name=name)
2988
2989
2990def _is_png(contents, name=None):
2991  r"""Convenience function to check if the 'contents' encodes a PNG image.
2992
2993  Args:
2994    contents: 0-D `string`. The encoded image bytes.
2995    name: A name for the operation (optional)
2996
2997  Returns:
2998     A scalar boolean tensor indicating if 'contents' may be a PNG image.
2999     is_png is susceptible to false positives.
3000  """
3001  with ops.name_scope(name, 'is_png'):
3002    substr = string_ops.substr(contents, 0, 3)
3003    return math_ops.equal(substr, b'\211PN', name=name)
3004
3005
3006tf_export(
3007    'io.decode_and_crop_jpeg',
3008    'image.decode_and_crop_jpeg',
3009    v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])(
3010        dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg))
3011
3012tf_export(
3013    'io.decode_bmp',
3014    'image.decode_bmp',
3015    v1=['io.decode_bmp', 'image.decode_bmp'])(
3016        dispatch.add_dispatch_support(gen_image_ops.decode_bmp))
3017tf_export(
3018    'io.decode_gif',
3019    'image.decode_gif',
3020    v1=['io.decode_gif', 'image.decode_gif'])(
3021        dispatch.add_dispatch_support(gen_image_ops.decode_gif))
3022tf_export(
3023    'io.decode_jpeg',
3024    'image.decode_jpeg',
3025    v1=['io.decode_jpeg', 'image.decode_jpeg'])(
3026        dispatch.add_dispatch_support(gen_image_ops.decode_jpeg))
3027tf_export(
3028    'io.decode_png',
3029    'image.decode_png',
3030    v1=['io.decode_png', 'image.decode_png'])(
3031        dispatch.add_dispatch_support(gen_image_ops.decode_png))
3032
3033tf_export(
3034    'io.encode_jpeg',
3035    'image.encode_jpeg',
3036    v1=['io.encode_jpeg', 'image.encode_jpeg'])(
3037        dispatch.add_dispatch_support(gen_image_ops.encode_jpeg))
3038tf_export(
3039    'io.extract_jpeg_shape',
3040    'image.extract_jpeg_shape',
3041    v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])(
3042        dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape))
3043
3044
3045@tf_export('io.encode_png', 'image.encode_png')
3046@dispatch.add_dispatch_support
3047def encode_png(image, compression=-1, name=None):
3048  r"""PNG-encode an image.
3049
3050  `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
3051  where `channels` is:
3052
3053  *   1: for grayscale.
3054  *   2: for grayscale + alpha.
3055  *   3: for RGB.
3056  *   4: for RGBA.
3057
3058  The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
3059  default or a value from 0 to 9.  9 is the highest compression level,
3060  generating the smallest output, but is slower.
3061
3062  Args:
3063    image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`.
3064      3-D with shape `[height, width, channels]`.
3065    compression: An optional `int`. Defaults to `-1`. Compression level.
3066    name: A name for the operation (optional).
3067
3068  Returns:
3069    A `Tensor` of type `string`.
3070  """
3071  return gen_image_ops.encode_png(
3072      ops.convert_to_tensor(image), compression, name)
3073
3074
3075@tf_export(
3076    'io.decode_image',
3077    'image.decode_image',
3078    v1=['io.decode_image', 'image.decode_image'])
3079@dispatch.add_dispatch_support
3080def decode_image(contents,
3081                 channels=None,
3082                 dtype=dtypes.uint8,
3083                 name=None,
3084                 expand_animations=True):
3085  """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`.
3086
3087  Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the
3088  appropriate operation to convert the input bytes `string` into a `Tensor`
3089  of type `dtype`.
3090
3091  Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as
3092  opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D
3093  arrays `[height, width, num_channels]`. Make sure to take this into account
3094  when constructing your graph if you are intermixing GIF files with BMP, JPEG,
3095  and/or PNG files. Alternately, set the `expand_animations` argument of this
3096  function to `False`, in which case the op will return 3-dimensional tensors
3097  and will truncate animated GIF files to the first frame.
3098
3099  NOTE: If the first frame of an animated GIF does not occupy the entire
3100  canvas (maximum frame width x maximum frame height), then it fills the
3101  unoccupied areas (in the first frame) with zeros (black). For frames after the
3102  first frame that does not occupy the entire canvas, it uses the previous
3103  frame to fill the unoccupied areas.
3104
3105  Args:
3106    contents: A `Tensor` of type `string`. 0-D. The encoded image bytes.
3107    channels: An optional `int`. Defaults to `0`. Number of color channels for
3108      the decoded image.
3109    dtype: The desired DType of the returned `Tensor`.
3110    name: A name for the operation (optional)
3111    expand_animations: An optional `bool`. Defaults to `True`. Controls the
3112      shape of the returned op's output. If `True`, the returned op will produce
3113      a 3-D tensor for PNG, JPEG, and BMP files; and a 4-D tensor for all GIFs,
3114      whether animated or not. If, `False`, the returned op will produce a 3-D
3115      tensor for all file types and will truncate animated GIFs to the first
3116      frame.
3117
3118  Returns:
3119    `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on
3120    the file type and the value of the `expand_animations` parameter.
3121
3122  Raises:
3123    ValueError: On incorrect number of channels.
3124  """
3125  with ops.name_scope(name, 'decode_image'):
3126    if compat.forward_compatible(2020, 8, 14):
3127      channels = 0 if channels is None else channels
3128      if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]:
3129        dest_dtype = dtype
3130        dtype = dtypes.uint16
3131        return convert_image_dtype(gen_image_ops.decode_image(
3132            contents=contents,
3133            channels=channels,
3134            expand_animations=expand_animations,
3135            dtype=dtype), dest_dtype)
3136      else:
3137        return gen_image_ops.decode_image(
3138            contents=contents,
3139            channels=channels,
3140            expand_animations=expand_animations,
3141            dtype=dtype)
3142
3143    if channels not in (None, 0, 1, 3, 4):
3144      raise ValueError('channels must be in (None, 0, 1, 3, 4)')
3145    substr = string_ops.substr(contents, 0, 3)
3146
3147    def _bmp():
3148      """Decodes a BMP image."""
3149      signature = string_ops.substr(contents, 0, 2)
3150      # Create assert op to check that bytes are BMP decodable
3151      is_bmp = math_ops.equal(signature, 'BM', name='is_bmp')
3152      decode_msg = 'Unable to decode bytes as JPEG, PNG, GIF, or BMP'
3153      assert_decode = control_flow_ops.Assert(is_bmp, [decode_msg])
3154      bmp_channels = 0 if channels is None else channels
3155      good_channels = math_ops.not_equal(bmp_channels, 1, name='check_channels')
3156      channels_msg = ('Channels must be in (None, 0, 3, 4) when decoding BMP '
3157                      'images')
3158      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
3159      with ops.control_dependencies([assert_decode, assert_channels]):
3160        return convert_image_dtype(
3161            gen_image_ops.decode_bmp(contents, channels=bmp_channels), dtype)
3162
3163    def _gif():
3164      """Decodes a GIF image."""
3165      # Create assert to make sure that channels is not set to 1
3166      # Already checked above that channels is in (None, 0, 1, 3)
3167      gif_channels = 0 if channels is None else channels
3168      good_channels = math_ops.logical_and(
3169          math_ops.not_equal(gif_channels, 1, name='check_gif_channels'),
3170          math_ops.not_equal(gif_channels, 4, name='check_gif_channels'))
3171      channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images'
3172      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
3173      with ops.control_dependencies([assert_channels]):
3174        result = convert_image_dtype(gen_image_ops.decode_gif(contents), dtype)
3175        if not expand_animations:
3176          # For now we decode animated GIFs fully and toss out all but the
3177          # first frame when expand_animations is False
3178          result = array_ops.gather(result, 0)
3179        return result
3180
3181    def check_gif():
3182      # Create assert op to check that bytes are GIF decodable
3183      is_gif = math_ops.equal(substr, b'\x47\x49\x46', name='is_gif')
3184      return control_flow_ops.cond(is_gif, _gif, _bmp, name='cond_gif')
3185
3186    def _png():
3187      """Decodes a PNG image."""
3188      return convert_image_dtype(
3189          gen_image_ops.decode_png(
3190              contents,
3191              channels,
3192              dtype=dtypes.uint8 if dtype == dtypes.uint8 else dtypes.uint16),
3193          dtype)
3194
3195    def check_png():
3196      """Checks if an image is PNG."""
3197      return control_flow_ops.cond(
3198          _is_png(contents), _png, check_gif, name='cond_png')
3199
3200    def _jpeg():
3201      """Decodes a jpeg image."""
3202      jpeg_channels = 0 if channels is None else channels
3203      good_channels = math_ops.not_equal(
3204          jpeg_channels, 4, name='check_jpeg_channels')
3205      channels_msg = ('Channels must be in (None, 0, 1, 3) when decoding JPEG '
3206                      'images')
3207      assert_channels = control_flow_ops.Assert(good_channels, [channels_msg])
3208      with ops.control_dependencies([assert_channels]):
3209        return convert_image_dtype(
3210            gen_image_ops.decode_jpeg(contents, channels), dtype)
3211
3212    # Decode normal JPEG images (start with \xff\xd8\xff\xe0)
3213    # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1).
3214    return control_flow_ops.cond(
3215        is_jpeg(contents), _jpeg, check_png, name='cond_jpeg')
3216
3217
3218@tf_export('image.total_variation')
3219@dispatch.add_dispatch_support
3220def total_variation(images, name=None):
3221  """Calculate and return the total variation for one or more images.
3222
3223  The total variation is the sum of the absolute differences for neighboring
3224  pixel-values in the input images. This measures how much noise is in the
3225  images.
3226
3227  This can be used as a loss-function during optimization so as to suppress
3228  noise in images. If you have a batch of images, then you should calculate
3229  the scalar loss-value as the sum:
3230  `loss = tf.reduce_sum(tf.image.total_variation(images))`
3231
3232  This implements the anisotropic 2-D version of the formula described here:
3233
3234  https://en.wikipedia.org/wiki/Total_variation_denoising
3235
3236  Args:
3237    images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
3238      of shape `[height, width, channels]`.
3239    name: A name for the operation (optional).
3240
3241  Raises:
3242    ValueError: if images.shape is not a 3-D or 4-D vector.
3243
3244  Returns:
3245    The total variation of `images`.
3246
3247    If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the
3248    total variation for each image in the batch.
3249    If `images` was 3-D, return a scalar float with the total variation for
3250    that image.
3251  """
3252
3253  with ops.name_scope(name, 'total_variation'):
3254    ndims = images.get_shape().ndims
3255
3256    if ndims == 3:
3257      # The input is a single image with shape [height, width, channels].
3258
3259      # Calculate the difference of neighboring pixel-values.
3260      # The images are shifted one pixel along the height and width by slicing.
3261      pixel_dif1 = images[1:, :, :] - images[:-1, :, :]
3262      pixel_dif2 = images[:, 1:, :] - images[:, :-1, :]
3263
3264      # Sum for all axis. (None is an alias for all axis.)
3265      sum_axis = None
3266    elif ndims == 4:
3267      # The input is a batch of images with shape:
3268      # [batch, height, width, channels].
3269
3270      # Calculate the difference of neighboring pixel-values.
3271      # The images are shifted one pixel along the height and width by slicing.
3272      pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
3273      pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
3274
3275      # Only sum for the last 3 axis.
3276      # This results in a 1-D tensor with the total variation for each image.
3277      sum_axis = [1, 2, 3]
3278    else:
3279      raise ValueError('\'images\' must be either 3 or 4-dimensional.')
3280
3281    # Calculate the total variation by taking the absolute value of the
3282    # pixel-differences and summing over the appropriate axis.
3283    tot_var = (
3284        math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) +
3285        math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis))
3286
3287  return tot_var
3288
3289
3290@tf_export('image.sample_distorted_bounding_box', v1=[])
3291@dispatch.add_dispatch_support
3292def sample_distorted_bounding_box_v2(image_size,
3293                                     bounding_boxes,
3294                                     seed=0,
3295                                     min_object_covered=0.1,
3296                                     aspect_ratio_range=None,
3297                                     area_range=None,
3298                                     max_attempts=None,
3299                                     use_image_if_no_bounding_boxes=None,
3300                                     name=None):
3301  """Generate a single randomly distorted bounding box for an image.
3302
3303  Bounding box annotations are often supplied in addition to ground-truth labels
3304  in image recognition or object localization tasks. A common technique for
3305  training such a system is to randomly distort an image while preserving
3306  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3307  localization of an object, i.e. bounding box, given an `image_size`,
3308  `bounding_boxes` and a series of constraints.
3309
3310  The output of this Op is a single bounding box that may be used to crop the
3311  original image. The output is returned as 3 tensors: `begin`, `size` and
3312  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3313  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3314  visualize what the bounding box looks like.
3315
3316  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3317  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3318  and the height of the underlying image.
3319
3320  For example,
3321
3322  ```python
3323      # Generate a single distorted bounding box.
3324      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3325          tf.shape(image),
3326          bounding_boxes=bounding_boxes,
3327          min_object_covered=0.1)
3328
3329      # Draw the bounding box in an image summary.
3330      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3331                                                    bbox_for_draw)
3332      tf.compat.v1.summary.image('images_with_box', image_with_box)
3333
3334      # Employ the bounding box to distort the image.
3335      distorted_image = tf.slice(image, begin, size)
3336  ```
3337
3338  Note that if no bounding box information is available, setting
3339  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3340  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3341  false and no bounding boxes are supplied, an error is raised.
3342
3343  For producing deterministic results given a `seed` value, use
3344  `tf.image.stateless_sample_distorted_bounding_box`. Unlike using the `seed`
3345  param with `tf.image.random_*` ops, `tf.image.stateless_random_*` ops
3346  guarantee the same results given the same seed independent of how many times
3347  the function is called, and independent of global seed settings
3348  (e.g. tf.random.set_seed).
3349
3350  Args:
3351    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3352      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3353    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3354      describing the N bounding boxes associated with the image.
3355    seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the
3356      random number generator is seeded by the given `seed`.  Otherwise, it is
3357      seeded by a random seed.
3358    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3359      cropped area of the image must contain at least this fraction of any
3360      bounding box supplied. The value of this parameter should be non-negative.
3361      In the case of 0, the cropped area does not need to overlap any of the
3362      bounding boxes supplied.
3363    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3364      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3365      height` within this range.
3366    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3367      cropped area of the image must contain a fraction of the supplied image
3368      within this range.
3369    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3370      generating a cropped region of the image of the specified constraints.
3371      After `max_attempts` failures, return the entire image.
3372    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3373      Controls behavior if no bounding boxes supplied. If true, assume an
3374      implicit bounding box covering the whole input. If false, raise an error.
3375    name: A name for the operation (optional).
3376
3377  Returns:
3378    A tuple of `Tensor` objects (begin, size, bboxes).
3379
3380    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3381    `[offset_height, offset_width, 0]`. Provide as input to
3382      `tf.slice`.
3383    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3384    `[target_height, target_width, -1]`. Provide as input to
3385      `tf.slice`.
3386    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3387    the distorted bounding box.
3388    Provide as input to `tf.image.draw_bounding_boxes`.
3389  """
3390  seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0)
3391  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3392    return gen_image_ops.sample_distorted_bounding_box_v2(
3393        image_size,
3394        bounding_boxes,
3395        seed=seed1,
3396        seed2=seed2,
3397        min_object_covered=min_object_covered,
3398        aspect_ratio_range=aspect_ratio_range,
3399        area_range=area_range,
3400        max_attempts=max_attempts,
3401        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3402        name=name)
3403
3404
3405@tf_export('image.stateless_sample_distorted_bounding_box', v1=[])
3406@dispatch.add_dispatch_support
3407def stateless_sample_distorted_bounding_box(image_size,
3408                                            bounding_boxes,
3409                                            seed,
3410                                            min_object_covered=0.1,
3411                                            aspect_ratio_range=None,
3412                                            area_range=None,
3413                                            max_attempts=None,
3414                                            use_image_if_no_bounding_boxes=None,
3415                                            name=None):
3416  """Generate a randomly distorted bounding box for an image deterministically.
3417
3418  Bounding box annotations are often supplied in addition to ground-truth labels
3419  in image recognition or object localization tasks. A common technique for
3420  training such a system is to randomly distort an image while preserving
3421  its content, i.e. *data augmentation*. This Op, given the same `seed`,
3422  deterministically outputs a randomly distorted localization of an object, i.e.
3423  bounding box, given an `image_size`, `bounding_boxes` and a series of
3424  constraints.
3425
3426  The output of this Op is a single bounding box that may be used to crop the
3427  original image. The output is returned as 3 tensors: `begin`, `size` and
3428  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3429  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3430  visualize what the bounding box looks like.
3431
3432  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3433  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
3434  and the height of the underlying image.
3435
3436  The output of this Op is guaranteed to be the same given the same `seed` and
3437  is independent of how many times the function is called, and independent of
3438  global seed settings (e.g. `tf.random.set_seed`).
3439
3440  Example usage:
3441
3442  >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]])
3443  >>> bbox = tf.constant(
3444  ...   [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
3445  >>> seed = (1, 2)
3446  >>> # Generate a single distorted bounding box.
3447  >>> bbox_begin, bbox_size, bbox_draw = (
3448  ...   tf.image.stateless_sample_distorted_bounding_box(
3449  ...     tf.shape(image), bounding_boxes=bbox, seed=seed))
3450  >>> # Employ the bounding box to distort the image.
3451  >>> tf.slice(image, bbox_begin, bbox_size)
3452  <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy=
3453  array([[[1],
3454          [2]],
3455         [[4],
3456          [5]]])>
3457  >>> # Draw the bounding box in an image summary.
3458  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
3459  >>> tf.image.draw_bounding_boxes(
3460  ...   tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors)
3461  <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
3462  array([[[[1.],
3463           [1.],
3464           [3.]],
3465          [[1.],
3466           [1.],
3467           [6.]],
3468          [[7.],
3469           [8.],
3470           [9.]]]], dtype=float32)>
3471
3472  Note that if no bounding box information is available, setting
3473  `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
3474  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3475  false and no bounding boxes are supplied, an error is raised.
3476
3477  Args:
3478    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3479      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3480    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3481      describing the N bounding boxes associated with the image.
3482    seed: A shape [2] Tensor, the seed to the random number generator. Must have
3483      dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.)
3484    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3485      cropped area of the image must contain at least this fraction of any
3486      bounding box supplied. The value of this parameter should be non-negative.
3487      In the case of 0, the cropped area does not need to overlap any of the
3488      bounding boxes supplied.
3489    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3490      1.33]`. The cropped area of the image must have an aspect `ratio = width /
3491      height` within this range.
3492    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3493      cropped area of the image must contain a fraction of the supplied image
3494      within this range.
3495    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3496      generating a cropped region of the image of the specified constraints.
3497      After `max_attempts` failures, return the entire image.
3498    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3499      Controls behavior if no bounding boxes supplied. If true, assume an
3500      implicit bounding box covering the whole input. If false, raise an error.
3501    name: A name for the operation (optional).
3502
3503  Returns:
3504    A tuple of `Tensor` objects (begin, size, bboxes).
3505
3506    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3507    `[offset_height, offset_width, 0]`. Provide as input to
3508      `tf.slice`.
3509    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3510    `[target_height, target_width, -1]`. Provide as input to
3511      `tf.slice`.
3512    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3513    the distorted bounding box.
3514    Provide as input to `tf.image.draw_bounding_boxes`.
3515  """
3516  with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'):
3517    return gen_image_ops.stateless_sample_distorted_bounding_box(
3518        image_size=image_size,
3519        bounding_boxes=bounding_boxes,
3520        seed=seed,
3521        min_object_covered=min_object_covered,
3522        aspect_ratio_range=aspect_ratio_range,
3523        area_range=area_range,
3524        max_attempts=max_attempts,
3525        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3526        name=name)
3527
3528
3529@tf_export(v1=['image.sample_distorted_bounding_box'])
3530@dispatch.add_dispatch_support
3531@deprecation.deprecated(
3532    date=None,
3533    instructions='`seed2` arg is deprecated.'
3534    'Use sample_distorted_bounding_box_v2 instead.')
3535def sample_distorted_bounding_box(image_size,
3536                                  bounding_boxes,
3537                                  seed=None,
3538                                  seed2=None,
3539                                  min_object_covered=0.1,
3540                                  aspect_ratio_range=None,
3541                                  area_range=None,
3542                                  max_attempts=None,
3543                                  use_image_if_no_bounding_boxes=None,
3544                                  name=None):
3545  """Generate a single randomly distorted bounding box for an image.
3546
3547  Bounding box annotations are often supplied in addition to ground-truth labels
3548  in image recognition or object localization tasks. A common technique for
3549  training such a system is to randomly distort an image while preserving
3550  its content, i.e. *data augmentation*. This Op outputs a randomly distorted
3551  localization of an object, i.e. bounding box, given an `image_size`,
3552  `bounding_boxes` and a series of constraints.
3553
3554  The output of this Op is a single bounding box that may be used to crop the
3555  original image. The output is returned as 3 tensors: `begin`, `size` and
3556  `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
3557  image. The latter may be supplied to `tf.image.draw_bounding_boxes` to
3558  visualize what the bounding box looks like.
3559
3560  Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`.
3561  The
3562  bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
3563  height of the underlying image.
3564
3565  For example,
3566
3567  ```python
3568      # Generate a single distorted bounding box.
3569      begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
3570          tf.shape(image),
3571          bounding_boxes=bounding_boxes,
3572          min_object_covered=0.1)
3573
3574      # Draw the bounding box in an image summary.
3575      image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
3576                                                    bbox_for_draw)
3577      tf.compat.v1.summary.image('images_with_box', image_with_box)
3578
3579      # Employ the bounding box to distort the image.
3580      distorted_image = tf.slice(image, begin, size)
3581  ```
3582
3583  Note that if no bounding box information is available, setting
3584  `use_image_if_no_bounding_boxes = True` will assume there is a single implicit
3585  bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
3586  false and no bounding boxes are supplied, an error is raised.
3587
3588  Args:
3589    image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
3590      `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`.
3591    bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]`
3592      describing the N bounding boxes associated with the image.
3593    seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are
3594      set to non-zero, the random number generator is seeded by the given
3595      `seed`.  Otherwise, it is seeded by a random seed.
3596    seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed
3597      collision.
3598    min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The
3599      cropped area of the image must contain at least this fraction of any
3600      bounding box supplied. The value of this parameter should be non-negative.
3601      In the case of 0, the cropped area does not need to overlap any of the
3602      bounding boxes supplied.
3603    aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75,
3604      1.33]`. The cropped area of the image must have an aspect ratio = width /
3605      height within this range.
3606    area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The
3607      cropped area of the image must contain a fraction of the supplied image
3608      within this range.
3609    max_attempts: An optional `int`. Defaults to `100`. Number of attempts at
3610      generating a cropped region of the image of the specified constraints.
3611      After `max_attempts` failures, return the entire image.
3612    use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`.
3613      Controls behavior if no bounding boxes supplied. If true, assume an
3614      implicit bounding box covering the whole input. If false, raise an error.
3615    name: A name for the operation (optional).
3616
3617  Returns:
3618    A tuple of `Tensor` objects (begin, size, bboxes).
3619
3620    begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3621    `[offset_height, offset_width, 0]`. Provide as input to
3622      `tf.slice`.
3623    size: A `Tensor`. Has the same type as `image_size`. 1-D, containing
3624    `[target_height, target_width, -1]`. Provide as input to
3625      `tf.slice`.
3626    bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing
3627    the distorted bounding box.
3628      Provide as input to `tf.image.draw_bounding_boxes`.
3629  """
3630  with ops.name_scope(name, 'sample_distorted_bounding_box'):
3631    return gen_image_ops.sample_distorted_bounding_box_v2(
3632        image_size,
3633        bounding_boxes,
3634        seed=seed,
3635        seed2=seed2,
3636        min_object_covered=min_object_covered,
3637        aspect_ratio_range=aspect_ratio_range,
3638        area_range=area_range,
3639        max_attempts=max_attempts,
3640        use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes,
3641        name=name)
3642
3643
3644@tf_export('image.non_max_suppression')
3645@dispatch.add_dispatch_support
3646def non_max_suppression(boxes,
3647                        scores,
3648                        max_output_size,
3649                        iou_threshold=0.5,
3650                        score_threshold=float('-inf'),
3651                        name=None):
3652  """Greedily selects a subset of bounding boxes in descending order of score.
3653
3654  Prunes away boxes that have high intersection-over-union (IOU) overlap
3655  with previously selected boxes.  Bounding boxes are supplied as
3656  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3657  diagonal pair of box corners and the coordinates can be provided as normalized
3658  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3659  is agnostic to where the origin is in the coordinate system.  Note that this
3660  algorithm is invariant to orthogonal transformations and translations
3661  of the coordinate system; thus translating or reflections of the coordinate
3662  system result in the same boxes being selected by the algorithm.
3663  The output of this operation is a set of integers indexing into the input
3664  collection of bounding boxes representing the selected boxes.  The bounding
3665  box coordinates corresponding to the selected indices can then be obtained
3666  using the `tf.gather` operation.  For example:
3667    ```python
3668    selected_indices = tf.image.non_max_suppression(
3669        boxes, scores, max_output_size, iou_threshold)
3670    selected_boxes = tf.gather(boxes, selected_indices)
3671    ```
3672
3673  Args:
3674    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3675    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3676      score corresponding to each box (each row of boxes).
3677    max_output_size: A scalar integer `Tensor` representing the maximum number
3678      of boxes to be selected by non-max suppression.
3679    iou_threshold: A 0-D float tensor representing the threshold for deciding
3680      whether boxes overlap too much with respect to IOU.
3681    score_threshold: A 0-D float tensor representing the threshold for deciding
3682      when to remove boxes based on score.
3683    name: A name for the operation (optional).
3684
3685  Returns:
3686    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3687      selected indices from the boxes tensor, where `M <= max_output_size`.
3688  """
3689  with ops.name_scope(name, 'non_max_suppression'):
3690    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3691    score_threshold = ops.convert_to_tensor(
3692        score_threshold, name='score_threshold')
3693    return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size,
3694                                                iou_threshold, score_threshold)
3695
3696
3697@tf_export('image.non_max_suppression_with_scores')
3698@dispatch.add_dispatch_support
3699def non_max_suppression_with_scores(boxes,
3700                                    scores,
3701                                    max_output_size,
3702                                    iou_threshold=0.5,
3703                                    score_threshold=float('-inf'),
3704                                    soft_nms_sigma=0.0,
3705                                    name=None):
3706  """Greedily selects a subset of bounding boxes in descending order of score.
3707
3708  Prunes away boxes that have high intersection-over-union (IOU) overlap
3709  with previously selected boxes.  Bounding boxes are supplied as
3710  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
3711  diagonal pair of box corners and the coordinates can be provided as normalized
3712  (i.e., lying in the interval `[0, 1]`) or absolute.  Note that this algorithm
3713  is agnostic to where the origin is in the coordinate system.  Note that this
3714  algorithm is invariant to orthogonal transformations and translations
3715  of the coordinate system; thus translating or reflections of the coordinate
3716  system result in the same boxes being selected by the algorithm.
3717  The output of this operation is a set of integers indexing into the input
3718  collection of bounding boxes representing the selected boxes.  The bounding
3719  box coordinates corresponding to the selected indices can then be obtained
3720  using the `tf.gather` operation.  For example:
3721    ```python
3722    selected_indices, selected_scores = tf.image.non_max_suppression_padded(
3723        boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1,
3724        soft_nms_sigma=0.5)
3725    selected_boxes = tf.gather(boxes, selected_indices)
3726    ```
3727
3728  This function generalizes the `tf.image.non_max_suppression` op by also
3729  supporting a Soft-NMS (with Gaussian weighting) mode (c.f.
3730  Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
3731  of other overlapping boxes instead of directly causing them to be pruned.
3732  Consequently, in contrast to `tf.image.non_max_suppression`,
3733  `tf.image.non_max_suppression_padded` returns the new scores of each input box
3734  in the second output, `selected_scores`.
3735
3736  To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
3737  larger than 0.  When `soft_nms_sigma` equals 0, the behavior of
3738  `tf.image.non_max_suppression_padded` is identical to that of
3739  `tf.image.non_max_suppression` (except for the extra output) both in function
3740  and in running time.
3741
3742  Args:
3743    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
3744    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3745      score corresponding to each box (each row of boxes).
3746    max_output_size: A scalar integer `Tensor` representing the maximum number
3747      of boxes to be selected by non-max suppression.
3748    iou_threshold: A 0-D float tensor representing the threshold for deciding
3749      whether boxes overlap too much with respect to IOU.
3750    score_threshold: A 0-D float tensor representing the threshold for deciding
3751      when to remove boxes based on score.
3752    soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft
3753      NMS; see Bodla et al (c.f. https://arxiv.org/abs/1704.04503).  When
3754      `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
3755      NMS.
3756    name: A name for the operation (optional).
3757
3758  Returns:
3759    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3760      selected indices from the boxes tensor, where `M <= max_output_size`.
3761    selected_scores: A 1-D float tensor of shape `[M]` representing the
3762      corresponding scores for each selected box, where `M <= max_output_size`.
3763      Scores only differ from corresponding input scores when using Soft NMS
3764      (i.e. when `soft_nms_sigma>0`)
3765  """
3766  with ops.name_scope(name, 'non_max_suppression_with_scores'):
3767    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
3768    score_threshold = ops.convert_to_tensor(
3769        score_threshold, name='score_threshold')
3770    soft_nms_sigma = ops.convert_to_tensor(
3771        soft_nms_sigma, name='soft_nms_sigma')
3772    (selected_indices, selected_scores,
3773     _) = gen_image_ops.non_max_suppression_v5(
3774         boxes,
3775         scores,
3776         max_output_size,
3777         iou_threshold,
3778         score_threshold,
3779         soft_nms_sigma,
3780         pad_to_max_output_size=False)
3781    return selected_indices, selected_scores
3782
3783
3784@tf_export('image.non_max_suppression_overlaps')
3785@dispatch.add_dispatch_support
3786def non_max_suppression_with_overlaps(overlaps,
3787                                      scores,
3788                                      max_output_size,
3789                                      overlap_threshold=0.5,
3790                                      score_threshold=float('-inf'),
3791                                      name=None):
3792  """Greedily selects a subset of bounding boxes in descending order of score.
3793
3794  Prunes away boxes that have high overlap with previously selected boxes.
3795  N-by-n overlap values are supplied as square matrix.
3796  The output of this operation is a set of integers indexing into the input
3797  collection of bounding boxes representing the selected boxes.  The bounding
3798  box coordinates corresponding to the selected indices can then be obtained
3799  using the `tf.gather` operation.  For example:
3800    ```python
3801    selected_indices = tf.image.non_max_suppression_overlaps(
3802        overlaps, scores, max_output_size, iou_threshold)
3803    selected_boxes = tf.gather(boxes, selected_indices)
3804    ```
3805
3806  Args:
3807    overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`
3808      representing the n-by-n box overlap values.
3809    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
3810      score corresponding to each box (each row of boxes).
3811    max_output_size: A scalar integer `Tensor` representing the maximum number
3812      of boxes to be selected by non-max suppression.
3813    overlap_threshold: A 0-D float tensor representing the threshold for
3814      deciding whether boxes overlap too much with respect to the provided
3815      overlap values.
3816    score_threshold: A 0-D float tensor representing the threshold for deciding
3817      when to remove boxes based on score.
3818    name: A name for the operation (optional).
3819
3820  Returns:
3821    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
3822      selected indices from the overlaps tensor, where `M <= max_output_size`.
3823  """
3824  with ops.name_scope(name, 'non_max_suppression_overlaps'):
3825    overlap_threshold = ops.convert_to_tensor(
3826        overlap_threshold, name='overlap_threshold')
3827    # pylint: disable=protected-access
3828    return gen_image_ops.non_max_suppression_with_overlaps(
3829        overlaps, scores, max_output_size, overlap_threshold, score_threshold)
3830    # pylint: enable=protected-access
3831
3832
3833_rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115],
3834                      [0.587, -0.27455667, -0.52273617],
3835                      [0.114, -0.32134392, 0.31119955]]
3836
3837
3838@tf_export('image.rgb_to_yiq')
3839@dispatch.add_dispatch_support
3840def rgb_to_yiq(images):
3841  """Converts one or more images from RGB to YIQ.
3842
3843  Outputs a tensor of the same shape as the `images` tensor, containing the YIQ
3844  value of the pixels.
3845  The output is only well defined if the value in images are in [0,1].
3846
3847  Usage Example:
3848
3849  >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
3850  >>> tf.image.rgb_to_yiq(x)
3851  <tf.Tensor: shape=(1, 1, 3), dtype=float32,
3852  numpy=array([[[ 1.815     , -0.91724455,  0.09962624]]], dtype=float32)>
3853
3854  Args:
3855    images: 2-D or higher rank. Image data to convert. Last dimension must be
3856      size 3.
3857
3858  Returns:
3859    images: tensor with the same shape as `images`.
3860  """
3861  images = ops.convert_to_tensor(images, name='images')
3862  kernel = ops.convert_to_tensor(
3863      _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel')
3864  ndims = images.get_shape().ndims
3865  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3866
3867
3868_yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021],
3869                      [0.6208248, -0.64720424, 1.70423049]]
3870
3871
3872@tf_export('image.yiq_to_rgb')
3873@dispatch.add_dispatch_support
3874def yiq_to_rgb(images):
3875  """Converts one or more images from YIQ to RGB.
3876
3877  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
3878  value of the pixels.
3879  The output is only well defined if the Y value in images are in [0,1],
3880  I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226].
3881
3882  Args:
3883    images: 2-D or higher rank. Image data to convert. Last dimension must be
3884      size 3.
3885
3886  Returns:
3887    images: tensor with the same shape as `images`.
3888  """
3889  images = ops.convert_to_tensor(images, name='images')
3890  kernel = ops.convert_to_tensor(
3891      _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel')
3892  ndims = images.get_shape().ndims
3893  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3894
3895
3896_rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538],
3897                      [0.587, -0.28886916, -0.51496512],
3898                      [0.114, 0.43601035, -0.10001026]]
3899
3900
3901@tf_export('image.rgb_to_yuv')
3902@dispatch.add_dispatch_support
3903def rgb_to_yuv(images):
3904  """Converts one or more images from RGB to YUV.
3905
3906  Outputs a tensor of the same shape as the `images` tensor, containing the YUV
3907  value of the pixels.
3908  The output is only well defined if the value in images are in [0, 1].
3909  There are two ways of representing an image: [0, 255] pixel values range or
3910  [0, 1] (as float) pixel values range. Users need to convert the input image
3911  into a float [0, 1] range.
3912
3913  Args:
3914    images: 2-D or higher rank. Image data to convert. Last dimension must be
3915      size 3.
3916
3917  Returns:
3918    images: tensor with the same shape as `images`.
3919  """
3920  images = ops.convert_to_tensor(images, name='images')
3921  kernel = ops.convert_to_tensor(
3922      _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel')
3923  ndims = images.get_shape().ndims
3924  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3925
3926
3927_yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185],
3928                      [1.13988303, -0.58062185, 0]]
3929
3930
3931@tf_export('image.yuv_to_rgb')
3932@dispatch.add_dispatch_support
3933def yuv_to_rgb(images):
3934  """Converts one or more images from YUV to RGB.
3935
3936  Outputs a tensor of the same shape as the `images` tensor, containing the RGB
3937  value of the pixels.
3938  The output is only well defined if the Y value in images are in [0,1],
3939  U and V value are in [-0.5,0.5].
3940
3941  As per the above description, you need to scale your YUV images if their
3942  pixel values are not in the required range. Below given example illustrates
3943  preprocessing of each channel of images before feeding them to `yuv_to_rgb`.
3944
3945  ```python
3946  yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255)
3947  last_dimension_axis = len(yuv_images.shape) - 1
3948  yuv_tensor_images = tf.truediv(
3949      tf.subtract(
3950          yuv_images,
3951          tf.reduce_min(yuv_images)
3952      ),
3953      tf.subtract(
3954          tf.reduce_max(yuv_images),
3955          tf.reduce_min(yuv_images)
3956       )
3957  )
3958  y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis)
3959  target_uv_min, target_uv_max = -0.5, 0.5
3960  u = u * (target_uv_max - target_uv_min) + target_uv_min
3961  v = v * (target_uv_max - target_uv_min) + target_uv_min
3962  preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis)
3963  rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images)
3964  ```
3965
3966  Args:
3967    images: 2-D or higher rank. Image data to convert. Last dimension must be
3968      size 3.
3969
3970  Returns:
3971    images: tensor with the same shape as `images`.
3972  """
3973  images = ops.convert_to_tensor(images, name='images')
3974  kernel = ops.convert_to_tensor(
3975      _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel')
3976  ndims = images.get_shape().ndims
3977  return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]])
3978
3979
3980def _verify_compatible_image_shapes(img1, img2):
3981  """Checks if two image tensors are compatible for applying SSIM or PSNR.
3982
3983  This function checks if two sets of images have ranks at least 3, and if the
3984  last three dimensions match.
3985
3986  Args:
3987    img1: Tensor containing the first image batch.
3988    img2: Tensor containing the second image batch.
3989
3990  Returns:
3991    A tuple containing: the first tensor shape, the second tensor shape, and a
3992    list of control_flow_ops.Assert() ops implementing the checks.
3993
3994  Raises:
3995    ValueError: When static shape check fails.
3996  """
3997  shape1 = img1.get_shape().with_rank_at_least(3)
3998  shape2 = img2.get_shape().with_rank_at_least(3)
3999  shape1[-3:].assert_is_compatible_with(shape2[-3:])
4000
4001  if shape1.ndims is not None and shape2.ndims is not None:
4002    for dim1, dim2 in zip(
4003        reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])):
4004      if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)):
4005        raise ValueError('Two images are not compatible: %s and %s' %
4006                         (shape1, shape2))
4007
4008  # Now assign shape tensors.
4009  shape1, shape2 = array_ops.shape_n([img1, img2])
4010
4011  # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable.
4012  checks = []
4013  checks.append(
4014      control_flow_ops.Assert(
4015          math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2],
4016          summarize=10))
4017  checks.append(
4018      control_flow_ops.Assert(
4019          math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])),
4020          [shape1, shape2],
4021          summarize=10))
4022  return shape1, shape2, checks
4023
4024
4025@tf_export('image.psnr')
4026@dispatch.add_dispatch_support
4027def psnr(a, b, max_val, name=None):
4028  """Returns the Peak Signal-to-Noise Ratio between a and b.
4029
4030  This is intended to be used on signals (or images). Produces a PSNR value for
4031  each image in batch.
4032
4033  The last three dimensions of input are expected to be [height, width, depth].
4034
4035  Example:
4036
4037  ```python
4038      # Read images from file.
4039      im1 = tf.decode_png('path/to/im1.png')
4040      im2 = tf.decode_png('path/to/im2.png')
4041      # Compute PSNR over tf.uint8 Tensors.
4042      psnr1 = tf.image.psnr(im1, im2, max_val=255)
4043
4044      # Compute PSNR over tf.float32 Tensors.
4045      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4046      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4047      psnr2 = tf.image.psnr(im1, im2, max_val=1.0)
4048      # psnr1 and psnr2 both have type tf.float32 and are almost equal.
4049  ```
4050
4051  Args:
4052    a: First set of images.
4053    b: Second set of images.
4054    max_val: The dynamic range of the images (i.e., the difference between the
4055      maximum the and minimum allowed values).
4056    name: Namespace to embed the computation in.
4057
4058  Returns:
4059    The scalar PSNR between a and b. The returned tensor has type `tf.float32`
4060    and shape [batch_size, 1].
4061  """
4062  with ops.name_scope(name, 'PSNR', [a, b]):
4063    # Need to convert the images to float32.  Scale max_val accordingly so that
4064    # PSNR is computed correctly.
4065    max_val = math_ops.cast(max_val, a.dtype)
4066    max_val = convert_image_dtype(max_val, dtypes.float32)
4067    a = convert_image_dtype(a, dtypes.float32)
4068    b = convert_image_dtype(b, dtypes.float32)
4069    mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1])
4070    psnr_val = math_ops.subtract(
4071        20 * math_ops.log(max_val) / math_ops.log(10.0),
4072        np.float32(10 / np.log(10)) * math_ops.log(mse),
4073        name='psnr')
4074
4075    _, _, checks = _verify_compatible_image_shapes(a, b)
4076    with ops.control_dependencies(checks):
4077      return array_ops.identity(psnr_val)
4078
4079
4080def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03):
4081  r"""Helper function for computing SSIM.
4082
4083  SSIM estimates covariances with weighted sums.  The default parameters
4084  use a biased estimate of the covariance:
4085  Suppose `reducer` is a weighted sum, then the mean estimators are
4086    \mu_x = \sum_i w_i x_i,
4087    \mu_y = \sum_i w_i y_i,
4088  where w_i's are the weighted-sum weights, and covariance estimator is
4089    cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4090  with assumption \sum_i w_i = 1. This covariance estimator is biased, since
4091    E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y).
4092  For SSIM measure with unbiased covariance estimators, pass as `compensation`
4093  argument (1 - \sum_i w_i ^ 2).
4094
4095  Args:
4096    x: First set of images.
4097    y: Second set of images.
4098    reducer: Function that computes 'local' averages from the set of images. For
4099      non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and
4100      for convolutional version, this is usually tf.nn.avg_pool2d or
4101      tf.nn.conv2d with weighted-sum kernel.
4102    max_val: The dynamic range (i.e., the difference between the maximum
4103      possible allowed value and the minimum allowed value).
4104    compensation: Compensation factor. See above.
4105    k1: Default value 0.01
4106    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4107      it would be better if we took the values in the range of 0 < K2 < 0.4).
4108
4109  Returns:
4110    A pair containing the luminance measure, and the contrast-structure measure.
4111  """
4112
4113  c1 = (k1 * max_val)**2
4114  c2 = (k2 * max_val)**2
4115
4116  # SSIM luminance measure is
4117  # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1).
4118  mean0 = reducer(x)
4119  mean1 = reducer(y)
4120  num0 = mean0 * mean1 * 2.0
4121  den0 = math_ops.square(mean0) + math_ops.square(mean1)
4122  luminance = (num0 + c1) / (den0 + c1)
4123
4124  # SSIM contrast-structure measure is
4125  #   (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2).
4126  # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then
4127  #   cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y)
4128  #          = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j).
4129  num1 = reducer(x * y) * 2.0
4130  den1 = reducer(math_ops.square(x) + math_ops.square(y))
4131  c2 *= compensation
4132  cs = (num1 - num0 + c2) / (den1 - den0 + c2)
4133
4134  # SSIM score is the product of the luminance and contrast-structure measures.
4135  return luminance, cs
4136
4137
4138def _fspecial_gauss(size, sigma):
4139  """Function to mimic the 'fspecial' gaussian MATLAB function."""
4140  size = ops.convert_to_tensor(size, dtypes.int32)
4141  sigma = ops.convert_to_tensor(sigma)
4142
4143  coords = math_ops.cast(math_ops.range(size), sigma.dtype)
4144  coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0
4145
4146  g = math_ops.square(coords)
4147  g *= -0.5 / math_ops.square(sigma)
4148
4149  g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1])
4150  g = array_ops.reshape(g, shape=[1, -1])  # For tf.nn.softmax().
4151  g = nn_ops.softmax(g)
4152  return array_ops.reshape(g, shape=[size, size, 1, 1])
4153
4154
4155def _ssim_per_channel(img1,
4156                      img2,
4157                      max_val=1.0,
4158                      filter_size=11,
4159                      filter_sigma=1.5,
4160                      k1=0.01,
4161                      k2=0.03):
4162  """Computes SSIM index between img1 and img2 per color channel.
4163
4164  This function matches the standard SSIM implementation from:
4165  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4166  quality assessment: from error visibility to structural similarity. IEEE
4167  transactions on image processing.
4168
4169  Details:
4170    - 11x11 Gaussian filter of width 1.5 is used.
4171    - k1 = 0.01, k2 = 0.03 as in the original paper.
4172
4173  Args:
4174    img1: First image batch.
4175    img2: Second image batch.
4176    max_val: The dynamic range of the images (i.e., the difference between the
4177      maximum the and minimum allowed values).
4178    filter_size: Default value 11 (size of gaussian filter).
4179    filter_sigma: Default value 1.5 (width of gaussian filter).
4180    k1: Default value 0.01
4181    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4182      it would be better if we took the values in the range of 0 < K2 < 0.4).
4183
4184  Returns:
4185    A pair of tensors containing and channel-wise SSIM and contrast-structure
4186    values. The shape is [..., channels].
4187  """
4188  filter_size = constant_op.constant(filter_size, dtype=dtypes.int32)
4189  filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype)
4190
4191  shape1, shape2 = array_ops.shape_n([img1, img2])
4192  checks = [
4193      control_flow_ops.Assert(
4194          math_ops.reduce_all(
4195              math_ops.greater_equal(shape1[-3:-1], filter_size)),
4196          [shape1, filter_size],
4197          summarize=8),
4198      control_flow_ops.Assert(
4199          math_ops.reduce_all(
4200              math_ops.greater_equal(shape2[-3:-1], filter_size)),
4201          [shape2, filter_size],
4202          summarize=8)
4203  ]
4204
4205  # Enforce the check to run before computation.
4206  with ops.control_dependencies(checks):
4207    img1 = array_ops.identity(img1)
4208
4209  # TODO(sjhwang): Try to cache kernels and compensation factor.
4210  kernel = _fspecial_gauss(filter_size, filter_sigma)
4211  kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1])
4212
4213  # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`,
4214  # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead.
4215  compensation = 1.0
4216
4217  # TODO(sjhwang): Try FFT.
4218  # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying
4219  #   1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter.
4220  def reducer(x):
4221    shape = array_ops.shape(x)
4222    x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0))
4223    y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID')
4224    return array_ops.reshape(
4225        y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0))
4226
4227  luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1,
4228                               k2)
4229
4230  # Average over the second and the third from the last: height, width.
4231  axes = constant_op.constant([-3, -2], dtype=dtypes.int32)
4232  ssim_val = math_ops.reduce_mean(luminance * cs, axes)
4233  cs = math_ops.reduce_mean(cs, axes)
4234  return ssim_val, cs
4235
4236
4237@tf_export('image.ssim')
4238@dispatch.add_dispatch_support
4239def ssim(img1,
4240         img2,
4241         max_val,
4242         filter_size=11,
4243         filter_sigma=1.5,
4244         k1=0.01,
4245         k2=0.03):
4246  """Computes SSIM index between img1 and img2.
4247
4248  This function is based on the standard SSIM implementation from:
4249  Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image
4250  quality assessment: from error visibility to structural similarity. IEEE
4251  transactions on image processing.
4252
4253  Note: The true SSIM is only defined on grayscale.  This function does not
4254  perform any colorspace transform.  (If the input is already YUV, then it will
4255  compute YUV SSIM average.)
4256
4257  Details:
4258    - 11x11 Gaussian filter of width 1.5 is used.
4259    - k1 = 0.01, k2 = 0.03 as in the original paper.
4260
4261  The image sizes must be at least 11x11 because of the filter size.
4262
4263  Example:
4264
4265  ```python
4266      # Read images (of size 255 x 255) from file.
4267      im1 = tf.image.decode_image(tf.io.read_file('path/to/im1.png'))
4268      im2 = tf.image.decode_image(tf.io.read_file('path/to/im2.png'))
4269      tf.shape(im1)  # `img1.png` has 3 channels; shape is `(255, 255, 3)`
4270      tf.shape(im2)  # `img2.png` has 3 channels; shape is `(255, 255, 3)`
4271      # Add an outer batch for each image.
4272      im1 = tf.expand_dims(im1, axis=0)
4273      im2 = tf.expand_dims(im2, axis=0)
4274      # Compute SSIM over tf.uint8 Tensors.
4275      ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11,
4276                            filter_sigma=1.5, k1=0.01, k2=0.03)
4277
4278      # Compute SSIM over tf.float32 Tensors.
4279      im1 = tf.image.convert_image_dtype(im1, tf.float32)
4280      im2 = tf.image.convert_image_dtype(im2, tf.float32)
4281      ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11,
4282                            filter_sigma=1.5, k1=0.01, k2=0.03)
4283      # ssim1 and ssim2 both have type tf.float32 and are almost equal.
4284  ```
4285
4286  Args:
4287    img1: First image batch. 4-D Tensor of shape `[batch, height, width,
4288      channels]`.
4289    img2: Second image batch. 4-D Tensor of shape `[batch, height, width,
4290      channels]`.
4291    max_val: The dynamic range of the images (i.e., the difference between the
4292      maximum the and minimum allowed values).
4293    filter_size: Default value 11 (size of gaussian filter).
4294    filter_sigma: Default value 1.5 (width of gaussian filter).
4295    k1: Default value 0.01
4296    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4297      it would be better if we took the values in the range of 0 < K2 < 0.4).
4298
4299  Returns:
4300    A tensor containing an SSIM value for each image in batch.  Returned SSIM
4301    values are in range (-1, 1], when pixel values are non-negative. Returns
4302    a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]).
4303  """
4304  with ops.name_scope(None, 'SSIM', [img1, img2]):
4305    # Convert to tensor if needed.
4306    img1 = ops.convert_to_tensor(img1, name='img1')
4307    img2 = ops.convert_to_tensor(img2, name='img2')
4308    # Shape checking.
4309    _, _, checks = _verify_compatible_image_shapes(img1, img2)
4310    with ops.control_dependencies(checks):
4311      img1 = array_ops.identity(img1)
4312
4313    # Need to convert the images to float32.  Scale max_val accordingly so that
4314    # SSIM is computed correctly.
4315    max_val = math_ops.cast(max_val, img1.dtype)
4316    max_val = convert_image_dtype(max_val, dtypes.float32)
4317    img1 = convert_image_dtype(img1, dtypes.float32)
4318    img2 = convert_image_dtype(img2, dtypes.float32)
4319    ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size,
4320                                            filter_sigma, k1, k2)
4321    # Compute average over color channels.
4322    return math_ops.reduce_mean(ssim_per_channel, [-1])
4323
4324
4325# Default values obtained by Wang et al.
4326_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
4327
4328
4329@tf_export('image.ssim_multiscale')
4330@dispatch.add_dispatch_support
4331def ssim_multiscale(img1,
4332                    img2,
4333                    max_val,
4334                    power_factors=_MSSSIM_WEIGHTS,
4335                    filter_size=11,
4336                    filter_sigma=1.5,
4337                    k1=0.01,
4338                    k2=0.03):
4339  """Computes the MS-SSIM between img1 and img2.
4340
4341  This function assumes that `img1` and `img2` are image batches, i.e. the last
4342  three dimensions are [height, width, channels].
4343
4344  Note: The true SSIM is only defined on grayscale.  This function does not
4345  perform any colorspace transform.  (If the input is already YUV, then it will
4346  compute YUV SSIM average.)
4347
4348  Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
4349  structural similarity for image quality assessment." Signals, Systems and
4350  Computers, 2004.
4351
4352  Args:
4353    img1: First image batch.
4354    img2: Second image batch. Must have the same rank as img1.
4355    max_val: The dynamic range of the images (i.e., the difference between the
4356      maximum the and minimum allowed values).
4357    power_factors: Iterable of weights for each of the scales. The number of
4358      scales used is the length of the list. Index 0 is the unscaled
4359      resolution's weight and each increasing scale corresponds to the image
4360      being downsampled by 2.  Defaults to (0.0448, 0.2856, 0.3001, 0.2363,
4361      0.1333), which are the values obtained in the original paper.
4362    filter_size: Default value 11 (size of gaussian filter).
4363    filter_sigma: Default value 1.5 (width of gaussian filter).
4364    k1: Default value 0.01
4365    k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so
4366      it would be better if we took the values in the range of 0 < K2 < 0.4).
4367
4368  Returns:
4369    A tensor containing an MS-SSIM value for each image in batch.  The values
4370    are in range [0, 1].  Returns a tensor with shape:
4371    broadcast(img1.shape[:-3], img2.shape[:-3]).
4372  """
4373  with ops.name_scope(None, 'MS-SSIM', [img1, img2]):
4374    # Convert to tensor if needed.
4375    img1 = ops.convert_to_tensor(img1, name='img1')
4376    img2 = ops.convert_to_tensor(img2, name='img2')
4377    # Shape checking.
4378    shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2)
4379    with ops.control_dependencies(checks):
4380      img1 = array_ops.identity(img1)
4381
4382    # Need to convert the images to float32.  Scale max_val accordingly so that
4383    # SSIM is computed correctly.
4384    max_val = math_ops.cast(max_val, img1.dtype)
4385    max_val = convert_image_dtype(max_val, dtypes.float32)
4386    img1 = convert_image_dtype(img1, dtypes.float32)
4387    img2 = convert_image_dtype(img2, dtypes.float32)
4388
4389    imgs = [img1, img2]
4390    shapes = [shape1, shape2]
4391
4392    # img1 and img2 are assumed to be a (multi-dimensional) batch of
4393    # 3-dimensional images (height, width, channels). `heads` contain the batch
4394    # dimensions, and `tails` contain the image dimensions.
4395    heads = [s[:-3] for s in shapes]
4396    tails = [s[-3:] for s in shapes]
4397
4398    divisor = [1, 2, 2, 1]
4399    divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32)
4400
4401    def do_pad(images, remainder):
4402      padding = array_ops.expand_dims(remainder, -1)
4403      padding = array_ops.pad(padding, [[1, 0], [1, 0]])
4404      return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images]
4405
4406    mcs = []
4407    for k in range(len(power_factors)):
4408      with ops.name_scope(None, 'Scale%d' % k, imgs):
4409        if k > 0:
4410          # Avg pool takes rank 4 tensors. Flatten leading dimensions.
4411          flat_imgs = [
4412              array_ops.reshape(x, array_ops.concat([[-1], t], 0))
4413              for x, t in zip(imgs, tails)
4414          ]
4415
4416          remainder = tails[0] % divisor_tensor
4417          need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0))
4418          # pylint: disable=cell-var-from-loop
4419          padded = control_flow_ops.cond(need_padding,
4420                                         lambda: do_pad(flat_imgs, remainder),
4421                                         lambda: flat_imgs)
4422          # pylint: enable=cell-var-from-loop
4423
4424          downscaled = [
4425              nn_ops.avg_pool(
4426                  x, ksize=divisor, strides=divisor, padding='VALID')
4427              for x in padded
4428          ]
4429          tails = [x[1:] for x in array_ops.shape_n(downscaled)]
4430          imgs = [
4431              array_ops.reshape(x, array_ops.concat([h, t], 0))
4432              for x, h, t in zip(downscaled, heads, tails)
4433          ]
4434
4435        # Overwrite previous ssim value since we only need the last one.
4436        ssim_per_channel, cs = _ssim_per_channel(
4437            *imgs,
4438            max_val=max_val,
4439            filter_size=filter_size,
4440            filter_sigma=filter_sigma,
4441            k1=k1,
4442            k2=k2)
4443        mcs.append(nn_ops.relu(cs))
4444
4445    # Remove the cs score for the last scale. In the MS-SSIM calculation,
4446    # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p).
4447    mcs.pop()  # Remove the cs score for the last scale.
4448    mcs_and_ssim = array_ops.stack(
4449        mcs + [nn_ops.relu(ssim_per_channel)], axis=-1)
4450    # Take weighted geometric mean across the scale axis.
4451    ms_ssim = math_ops.reduce_prod(
4452        math_ops.pow(mcs_and_ssim, power_factors), [-1])
4453
4454    return math_ops.reduce_mean(ms_ssim, [-1])  # Avg over color channels.
4455
4456
4457@tf_export('image.image_gradients')
4458@dispatch.add_dispatch_support
4459def image_gradients(image):
4460  """Returns image gradients (dy, dx) for each color channel.
4461
4462  Both output tensors have the same shape as the input: [batch_size, h, w,
4463  d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in
4464  location (x, y). That means that dy will always have zeros in the last row,
4465  and dx will always have zeros in the last column.
4466
4467  Usage Example:
4468    ```python
4469    BATCH_SIZE = 1
4470    IMAGE_HEIGHT = 5
4471    IMAGE_WIDTH = 5
4472    CHANNELS = 1
4473    image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS,
4474      delta=1, dtype=tf.float32),
4475      shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
4476    dy, dx = tf.image.image_gradients(image)
4477    print(image[0, :,:,0])
4478    tf.Tensor(
4479      [[ 0.  1.  2.  3.  4.]
4480      [ 5.  6.  7.  8.  9.]
4481      [10. 11. 12. 13. 14.]
4482      [15. 16. 17. 18. 19.]
4483      [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32)
4484    print(dy[0, :,:,0])
4485    tf.Tensor(
4486      [[5. 5. 5. 5. 5.]
4487      [5. 5. 5. 5. 5.]
4488      [5. 5. 5. 5. 5.]
4489      [5. 5. 5. 5. 5.]
4490      [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32)
4491    print(dx[0, :,:,0])
4492    tf.Tensor(
4493      [[1. 1. 1. 1. 0.]
4494      [1. 1. 1. 1. 0.]
4495      [1. 1. 1. 1. 0.]
4496      [1. 1. 1. 1. 0.]
4497      [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32)
4498    ```
4499
4500  Args:
4501    image: Tensor with shape [batch_size, h, w, d].
4502
4503  Returns:
4504    Pair of tensors (dy, dx) holding the vertical and horizontal image
4505    gradients (1-step finite difference).
4506
4507  Raises:
4508    ValueError: If `image` is not a 4D tensor.
4509  """
4510  if image.get_shape().ndims != 4:
4511    raise ValueError('image_gradients expects a 4D tensor '
4512                     '[batch_size, h, w, d], not {}.'.format(image.get_shape()))
4513  image_shape = array_ops.shape(image)
4514  batch_size, height, width, depth = array_ops.unstack(image_shape)
4515  dy = image[:, 1:, :, :] - image[:, :-1, :, :]
4516  dx = image[:, :, 1:, :] - image[:, :, :-1, :]
4517
4518  # Return tensors with same size as original image by concatenating
4519  # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y).
4520  shape = array_ops.stack([batch_size, 1, width, depth])
4521  dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1)
4522  dy = array_ops.reshape(dy, image_shape)
4523
4524  shape = array_ops.stack([batch_size, height, 1, depth])
4525  dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2)
4526  dx = array_ops.reshape(dx, image_shape)
4527
4528  return dy, dx
4529
4530
4531@tf_export('image.sobel_edges')
4532@dispatch.add_dispatch_support
4533def sobel_edges(image):
4534  """Returns a tensor holding Sobel edge maps.
4535
4536  Example usage:
4537
4538  For general usage, `image` would be loaded from a file as below:
4539
4540  ```python
4541  image_bytes = tf.io.read_file(path_to_image_file)
4542  image = tf.image.decode_image(image_bytes)
4543  image = tf.cast(image, tf.float32)
4544  image = tf.expand_dims(image, 0)
4545  ```
4546  But for demo purposes, we are using randomly generated values for `image`:
4547
4548  >>> image = tf.random.uniform(
4549  ...   maxval=255, shape=[1, 28, 28, 3], dtype=tf.float32)
4550  >>> sobel = tf.image.sobel_edges(image)
4551  >>> sobel_y = np.asarray(sobel[0, :, :, :, 0]) # sobel in y-direction
4552  >>> sobel_x = np.asarray(sobel[0, :, :, :, 1]) # sobel in x-direction
4553
4554  For displaying the sobel results, PIL's [Image Module](
4555  https://pillow.readthedocs.io/en/stable/reference/Image.html) can be used:
4556
4557  ```python
4558  # Display edge maps for the first channel (at index 0)
4559  Image.fromarray(sobel_y[..., 0] / 4 + 0.5).show()
4560  Image.fromarray(sobel_x[..., 0] / 4 + 0.5).show()
4561  ```
4562
4563  Args:
4564    image: Image tensor with shape [batch_size, h, w, d] and type float32 or
4565      float64.  The image(s) must be 2x2 or larger.
4566
4567  Returns:
4568    Tensor holding edge maps for each channel. Returns a tensor with shape
4569    [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]],
4570    [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter.
4571  """
4572  # Define vertical and horizontal Sobel filters.
4573  static_image_shape = image.get_shape()
4574  image_shape = array_ops.shape(image)
4575  kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
4576             [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]]
4577  num_kernels = len(kernels)
4578  kernels = np.transpose(np.asarray(kernels), (1, 2, 0))
4579  kernels = np.expand_dims(kernels, -2)
4580  kernels_tf = constant_op.constant(kernels, dtype=image.dtype)
4581
4582  kernels_tf = array_ops.tile(
4583      kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters')
4584
4585  # Use depth-wise convolution to calculate edge maps per channel.
4586  pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]]
4587  padded = array_ops.pad(image, pad_sizes, mode='REFLECT')
4588
4589  # Output tensor has shape [batch_size, h, w, d * num_kernels].
4590  strides = [1, 1, 1, 1]
4591  output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID')
4592
4593  # Reshape to [batch_size, h, w, d, num_kernels].
4594  shape = array_ops.concat([image_shape, [num_kernels]], 0)
4595  output = array_ops.reshape(output, shape=shape)
4596  output.set_shape(static_image_shape.concatenate([num_kernels]))
4597  return output
4598
4599
4600def resize_bicubic(images,
4601                   size,
4602                   align_corners=False,
4603                   name=None,
4604                   half_pixel_centers=False):
4605  return gen_image_ops.resize_bicubic(
4606      images=images,
4607      size=size,
4608      align_corners=align_corners,
4609      half_pixel_centers=half_pixel_centers,
4610      name=name)
4611
4612
4613def resize_bilinear(images,
4614                    size,
4615                    align_corners=False,
4616                    name=None,
4617                    half_pixel_centers=False):
4618  return gen_image_ops.resize_bilinear(
4619      images=images,
4620      size=size,
4621      align_corners=align_corners,
4622      half_pixel_centers=half_pixel_centers,
4623      name=name)
4624
4625
4626def resize_nearest_neighbor(images,
4627                            size,
4628                            align_corners=False,
4629                            name=None,
4630                            half_pixel_centers=False):
4631  return gen_image_ops.resize_nearest_neighbor(
4632      images=images,
4633      size=size,
4634      align_corners=align_corners,
4635      half_pixel_centers=half_pixel_centers,
4636      name=name)
4637
4638
4639resize_area_deprecation = deprecation.deprecated(
4640    date=None,
4641    instructions=(
4642        'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.'))
4643tf_export(v1=['image.resize_area'])(
4644    resize_area_deprecation(
4645        dispatch.add_dispatch_support(gen_image_ops.resize_area)))
4646
4647resize_bicubic_deprecation = deprecation.deprecated(
4648    date=None,
4649    instructions=(
4650        'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.'))
4651tf_export(v1=['image.resize_bicubic'])(
4652    dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic)))
4653
4654resize_bilinear_deprecation = deprecation.deprecated(
4655    date=None,
4656    instructions=(
4657        'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.'))
4658tf_export(v1=['image.resize_bilinear'])(
4659    dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear)))
4660
4661resize_nearest_neighbor_deprecation = deprecation.deprecated(
4662    date=None,
4663    instructions=(
4664        'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` '
4665        'instead.'))
4666tf_export(v1=['image.resize_nearest_neighbor'])(
4667    dispatch.add_dispatch_support(
4668        resize_nearest_neighbor_deprecation(resize_nearest_neighbor)))
4669
4670
4671@tf_export('image.crop_and_resize', v1=[])
4672@dispatch.add_dispatch_support
4673def crop_and_resize_v2(image,
4674                       boxes,
4675                       box_indices,
4676                       crop_size,
4677                       method='bilinear',
4678                       extrapolation_value=0,
4679                       name=None):
4680  """Extracts crops from the input image tensor and resizes them.
4681
4682  Extracts crops from the input image tensor and resizes them using bilinear
4683  sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
4684  common output size specified by `crop_size`. This is more general than the
4685  `crop_to_bounding_box` op which extracts a fixed size slice from the input
4686  image and does not allow resizing or aspect ratio change.
4687
4688  Returns a tensor with `crops` from the input `image` at positions defined at
4689  the bounding box locations in `boxes`. The cropped boxes are all resized (with
4690  bilinear or nearest neighbor interpolation) to a fixed
4691  `size = [crop_height, crop_width]`. The result is a 4-D tensor
4692  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
4693  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
4694  results to using `tf.compat.v1.image.resize_bilinear()` or
4695  `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method`
4696  argument) with
4697  `align_corners=True`.
4698
4699  Args:
4700    image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
4701      Both `image_height` and `image_width` need to be positive.
4702    boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
4703      specifies the coordinates of a box in the `box_ind[i]` image and is
4704      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
4705      coordinate value of `y` is mapped to the image coordinate at `y *
4706      (image_height - 1)`, so as the `[0, 1]` interval of normalized image
4707      height is mapped to `[0, image_height - 1]` in image height coordinates.
4708      We do allow `y1` > `y2`, in which case the sampled crop is an up-down
4709      flipped version of the original image. The width dimension is treated
4710      similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
4711      in which case we use `extrapolation_value` to extrapolate the input image
4712      values.
4713    box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
4714      batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
4715      refers to.
4716    crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
4717      All cropped image patches are resized to this size. The aspect ratio of
4718      the image content is not preserved. Both `crop_height` and `crop_width`
4719      need to be positive.
4720    method: An optional string specifying the sampling method for resizing. It
4721      can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`.
4722      Currently two sampling methods are supported: Bilinear and Nearest
4723        Neighbor.
4724    extrapolation_value: An optional `float`. Defaults to `0`. Value used for
4725      extrapolation, when applicable.
4726    name: A name for the operation (optional).
4727
4728  Returns:
4729    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
4730
4731  Example:
4732
4733  ```python
4734  import tensorflow as tf
4735  BATCH_SIZE = 1
4736  NUM_BOXES = 5
4737  IMAGE_HEIGHT = 256
4738  IMAGE_WIDTH = 256
4739  CHANNELS = 3
4740  CROP_SIZE = (24, 24)
4741
4742  image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH,
4743  CHANNELS) )
4744  boxes = tf.random.uniform(shape=(NUM_BOXES, 4))
4745  box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0,
4746  maxval=BATCH_SIZE, dtype=tf.int32)
4747  output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE)
4748  output.shape  #=> (5, 24, 24, 3)
4749  ```
4750  """
4751  return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size,
4752                                       method, extrapolation_value, name)
4753
4754
4755@tf_export(v1=['image.crop_and_resize'])
4756@dispatch.add_dispatch_support
4757@deprecation.deprecated_args(None,
4758                             'box_ind is deprecated, use box_indices instead',
4759                             'box_ind')
4760def crop_and_resize_v1(  # pylint: disable=missing-docstring
4761    image,
4762    boxes,
4763    box_ind=None,
4764    crop_size=None,
4765    method='bilinear',
4766    extrapolation_value=0,
4767    name=None,
4768    box_indices=None):
4769  box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices,
4770                                                   'box_ind', box_ind)
4771  return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method,
4772                                       extrapolation_value, name)
4773
4774
4775crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__
4776
4777
4778@tf_export(v1=['image.extract_glimpse'])
4779@dispatch.add_dispatch_support
4780def extract_glimpse(
4781    input,  # pylint: disable=redefined-builtin
4782    size,
4783    offsets,
4784    centered=True,
4785    normalized=True,
4786    uniform_noise=True,
4787    name=None):
4788  """Extracts a glimpse from the input tensor.
4789
4790  Returns a set of windows called glimpses extracted at location
4791  `offsets` from the input tensor. If the windows only partially
4792  overlaps the inputs, the non-overlapping areas will be filled with
4793  random noise.
4794
4795  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4796  glimpse_width, channels]`. The channels and batch dimensions are the
4797  same as that of the input tensor. The height and width of the output
4798  windows are specified in the `size` parameter.
4799
4800  The argument `normalized` and `centered` controls how the windows are built:
4801
4802  * If the coordinates are normalized but not centered, 0.0 and 1.0
4803    correspond to the minimum and maximum of each height and width
4804    dimension.
4805  * If the coordinates are both normalized and centered, they range from
4806    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4807    left corner, the lower right corner is located at (1.0, 1.0) and the
4808    center is at (0, 0).
4809  * If the coordinates are not normalized they are interpreted as
4810    numbers of pixels.
4811
4812  Usage Example:
4813
4814  >>> x = [[[[0.0],
4815  ...           [1.0],
4816  ...           [2.0]],
4817  ...          [[3.0],
4818  ...           [4.0],
4819  ...           [5.0]],
4820  ...          [[6.0],
4821  ...           [7.0],
4822  ...           [8.0]]]]
4823  >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4824  ...                                    centered=False, normalized=False)
4825  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4826  array([[[[0.],
4827           [1.]],
4828          [[3.],
4829           [4.]]]], dtype=float32)>
4830
4831  Args:
4832    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4833      `[batch_size, height, width, channels]`.
4834    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4835      size of the glimpses to extract.  The glimpse height must be specified
4836      first, following by the glimpse width.
4837    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
4838      `[batch_size, 2]` containing the y, x locations of the center of each
4839      window.
4840    centered: An optional `bool`. Defaults to `True`. indicates if the offset
4841      coordinates are centered relative to the image, in which case the (0, 0)
4842      offset is relative to the center of the input images. If false, the (0,0)
4843      offset corresponds to the upper left corner of the input images.
4844    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
4845      coordinates are normalized.
4846    uniform_noise: An optional `bool`. Defaults to `True`. indicates if the
4847      noise should be generated using a uniform distribution or a Gaussian
4848      distribution.
4849    name: A name for the operation (optional).
4850
4851  Returns:
4852    A `Tensor` of type `float32`.
4853  """
4854  return gen_image_ops.extract_glimpse(
4855      input=input,
4856      size=size,
4857      offsets=offsets,
4858      centered=centered,
4859      normalized=normalized,
4860      uniform_noise=uniform_noise,
4861      name=name)
4862
4863
4864@tf_export('image.extract_glimpse', v1=[])
4865@dispatch.add_dispatch_support
4866def extract_glimpse_v2(
4867    input,  # pylint: disable=redefined-builtin
4868    size,
4869    offsets,
4870    centered=True,
4871    normalized=True,
4872    noise='uniform',
4873    name=None):
4874  """Extracts a glimpse from the input tensor.
4875
4876  Returns a set of windows called glimpses extracted at location
4877  `offsets` from the input tensor. If the windows only partially
4878  overlaps the inputs, the non-overlapping areas will be filled with
4879  random noise.
4880
4881  The result is a 4-D tensor of shape `[batch_size, glimpse_height,
4882  glimpse_width, channels]`. The channels and batch dimensions are the
4883  same as that of the input tensor. The height and width of the output
4884  windows are specified in the `size` parameter.
4885
4886  The argument `normalized` and `centered` controls how the windows are built:
4887
4888  * If the coordinates are normalized but not centered, 0.0 and 1.0
4889    correspond to the minimum and maximum of each height and width
4890    dimension.
4891  * If the coordinates are both normalized and centered, they range from
4892    -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper
4893    left corner, the lower right corner is located at (1.0, 1.0) and the
4894    center is at (0, 0).
4895  * If the coordinates are not normalized they are interpreted as
4896    numbers of pixels.
4897
4898  Usage Example:
4899
4900  >>> x = [[[[0.0],
4901  ...           [1.0],
4902  ...           [2.0]],
4903  ...          [[3.0],
4904  ...           [4.0],
4905  ...           [5.0]],
4906  ...          [[6.0],
4907  ...           [7.0],
4908  ...           [8.0]]]]
4909  >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]],
4910  ...                         centered=False, normalized=False)
4911  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
4912  array([[[[4.],
4913           [5.]],
4914          [[7.],
4915           [8.]]]], dtype=float32)>
4916
4917  Args:
4918    input: A `Tensor` of type `float32`. A 4-D float tensor of shape
4919      `[batch_size, height, width, channels]`.
4920    size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the
4921      size of the glimpses to extract.  The glimpse height must be specified
4922      first, following by the glimpse width.
4923    offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape
4924      `[batch_size, 2]` containing the y, x locations of the center of each
4925      window.
4926    centered: An optional `bool`. Defaults to `True`. indicates if the offset
4927      coordinates are centered relative to the image, in which case the (0, 0)
4928      offset is relative to the center of the input images. If false, the (0,0)
4929      offset corresponds to the upper left corner of the input images.
4930    normalized: An optional `bool`. Defaults to `True`. indicates if the offset
4931      coordinates are normalized.
4932    noise: An optional `string`. Defaults to `uniform`. indicates if the noise
4933      should be `uniform` (uniform distribution), `gaussian` (gaussian
4934      distribution), or `zero` (zero padding).
4935    name: A name for the operation (optional).
4936
4937  Returns:
4938    A `Tensor` of type `float32`.
4939  """
4940  return gen_image_ops.extract_glimpse_v2(
4941      input=input,
4942      size=size,
4943      offsets=offsets,
4944      centered=centered,
4945      normalized=normalized,
4946      noise=noise,
4947      uniform_noise=False,
4948      name=name)
4949
4950
4951@tf_export('image.combined_non_max_suppression')
4952@dispatch.add_dispatch_support
4953def combined_non_max_suppression(boxes,
4954                                 scores,
4955                                 max_output_size_per_class,
4956                                 max_total_size,
4957                                 iou_threshold=0.5,
4958                                 score_threshold=float('-inf'),
4959                                 pad_per_class=False,
4960                                 clip_boxes=True,
4961                                 name=None):
4962  """Greedily selects a subset of bounding boxes in descending order of score.
4963
4964  This operation performs non_max_suppression on the inputs per batch, across
4965  all classes.
4966  Prunes away boxes that have high intersection-over-union (IOU) overlap
4967  with previously selected boxes.  Bounding boxes are supplied as
4968  [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
4969  diagonal pair of box corners and the coordinates can be provided as normalized
4970  (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
4971  is agnostic to where the origin is in the coordinate system. Also note that
4972  this algorithm is invariant to orthogonal transformations and translations
4973  of the coordinate system; thus translating or reflections of the coordinate
4974  system result in the same boxes being selected by the algorithm.
4975  The output of this operation is the final boxes, scores and classes tensor
4976  returned after performing non_max_suppression.
4977
4978  Args:
4979    boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q`
4980      is 1 then same boxes are used for all classes otherwise, if `q` is equal
4981      to number of classes, class-specific boxes are used.
4982    scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]`
4983      representing a single score corresponding to each box (each row of boxes).
4984    max_output_size_per_class: A scalar integer `Tensor` representing the
4985      maximum number of boxes to be selected by non-max suppression per class
4986    max_total_size: A int32 scalar representing maximum number of boxes retained
4987      over all classes. Note that setting this value to a large number may
4988      result in OOM error depending on the system workload.
4989    iou_threshold: A float representing the threshold for deciding whether boxes
4990      overlap too much with respect to IOU.
4991    score_threshold: A float representing the threshold for deciding when to
4992      remove boxes based on score.
4993    pad_per_class: If false, the output nmsed boxes, scores and classes are
4994      padded/clipped to `max_total_size`. If true, the output nmsed boxes,
4995      scores and classes are padded to be of length
4996      `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
4997      which case it is clipped to `max_total_size`. Defaults to false.
4998    clip_boxes: If true, the coordinates of output nmsed boxes will be clipped
4999      to [0, 1]. If false, output the box coordinates as it is. Defaults to
5000      true.
5001    name: A name for the operation (optional).
5002
5003  Returns:
5004    'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
5005      containing the non-max suppressed boxes.
5006    'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
5007      the scores for the boxes.
5008    'nmsed_classes': A [batch_size, max_detections] float32 tensor
5009      containing the class for boxes.
5010    'valid_detections': A [batch_size] int32 tensor indicating the number of
5011      valid detections per batch item. Only the top valid_detections[i] entries
5012      in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
5013      entries are zero paddings.
5014  """
5015  with ops.name_scope(name, 'combined_non_max_suppression'):
5016    iou_threshold = ops.convert_to_tensor(
5017        iou_threshold, dtype=dtypes.float32, name='iou_threshold')
5018    score_threshold = ops.convert_to_tensor(
5019        score_threshold, dtype=dtypes.float32, name='score_threshold')
5020
5021    # Convert `max_total_size` to tensor *without* setting the `dtype` param.
5022    # This allows us to catch `int32` overflow case with `max_total_size`
5023    # whose expected dtype is `int32` by the op registration. Any number within
5024    # `int32` will get converted to `int32` tensor. Anything larger will get
5025    # converted to `int64`. Passing in `int64` for `max_total_size` to the op
5026    # will throw dtype mismatch exception.
5027    # TODO(b/173251596): Once there is a more general solution to warn against
5028    # int overflow conversions, revisit this check.
5029    max_total_size = ops.convert_to_tensor(max_total_size)
5030
5031    return gen_image_ops.combined_non_max_suppression(
5032        boxes, scores, max_output_size_per_class, max_total_size, iou_threshold,
5033        score_threshold, pad_per_class, clip_boxes)
5034
5035
5036def _bbox_overlap(boxes_a, boxes_b):
5037  """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b.
5038
5039  Args:
5040    boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of
5041      boxes per image. The last dimension is the pixel coordinates in
5042      [ymin, xmin, ymax, xmax] form.
5043    boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of
5044      boxes. The last dimension is the pixel coordinates in
5045      [ymin, xmin, ymax, xmax] form.
5046  Returns:
5047    intersection_over_union: a tensor with as a shape of [batch_size, N, M],
5048    representing the ratio of intersection area over union area (IoU) between
5049    two boxes
5050  """
5051  with ops.name_scope('bbox_overlap'):
5052    a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split(
5053        value=boxes_a, num_or_size_splits=4, axis=2)
5054    b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split(
5055        value=boxes_b, num_or_size_splits=4, axis=2)
5056
5057    # Calculates the intersection area.
5058    i_xmin = math_ops.maximum(
5059        a_x_min, array_ops.transpose(b_x_min, [0, 2, 1]))
5060    i_xmax = math_ops.minimum(
5061        a_x_max, array_ops.transpose(b_x_max, [0, 2, 1]))
5062    i_ymin = math_ops.maximum(
5063        a_y_min, array_ops.transpose(b_y_min, [0, 2, 1]))
5064    i_ymax = math_ops.minimum(
5065        a_y_max, array_ops.transpose(b_y_max, [0, 2, 1]))
5066    i_area = math_ops.maximum(
5067        (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0)
5068
5069    # Calculates the union area.
5070    a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min)
5071    b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min)
5072    EPSILON = 1e-8
5073    # Adds a small epsilon to avoid divide-by-zero.
5074    u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON
5075
5076    # Calculates IoU.
5077    intersection_over_union = i_area / u_area
5078
5079    return intersection_over_union
5080
5081
5082def _self_suppression(iou, _, iou_sum, iou_threshold):
5083  """Suppress boxes in the same tile.
5084
5085     Compute boxes that cannot be suppressed by others (i.e.,
5086     can_suppress_others), and then use them to suppress boxes in the same tile.
5087
5088  Args:
5089    iou: a tensor of shape [batch_size, num_boxes_with_padding] representing
5090    intersection over union.
5091    iou_sum: a scalar tensor.
5092    iou_threshold: a scalar tensor.
5093
5094  Returns:
5095    iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding].
5096    iou_diff: a scalar tensor representing whether any box is supressed in
5097      this step.
5098    iou_sum_new: a scalar tensor of shape [batch_size] that represents
5099      the iou sum after suppression.
5100    iou_threshold: a scalar tensor.
5101  """
5102  batch_size = array_ops.shape(iou)[0]
5103  can_suppress_others = math_ops.cast(
5104      array_ops.reshape(
5105          math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]),
5106      iou.dtype)
5107  iou_after_suppression = array_ops.reshape(
5108      math_ops.cast(
5109          math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold,
5110          iou.dtype),
5111      [batch_size, -1, 1]) * iou
5112  iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2])
5113  return [
5114      iou_after_suppression,
5115      math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new,
5116      iou_threshold
5117  ]
5118
5119
5120def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size):
5121  """Suppress boxes between different tiles.
5122
5123  Args:
5124    boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4]
5125    box_slice: a tensor of shape [batch_size, tile_size, 4]
5126    iou_threshold: a scalar tensor
5127    inner_idx: a scalar tensor representing the tile index of the tile
5128      that is used to supress box_slice
5129    tile_size: an integer representing the number of boxes in a tile
5130
5131  Returns:
5132    boxes: unchanged boxes as input
5133    box_slice_after_suppression: box_slice after suppression
5134    iou_threshold: unchanged
5135  """
5136  batch_size = array_ops.shape(boxes)[0]
5137  new_slice = array_ops.slice(
5138      boxes, [0, inner_idx * tile_size, 0],
5139      [batch_size, tile_size, 4])
5140  iou = _bbox_overlap(new_slice, box_slice)
5141  box_slice_after_suppression = array_ops.expand_dims(
5142      math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]),
5143                    box_slice.dtype),
5144      2) * box_slice
5145  return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1
5146
5147
5148def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size):
5149  """Process boxes in the range [idx*tile_size, (idx+1)*tile_size).
5150
5151  Args:
5152    boxes: a tensor with a shape of [batch_size, anchors, 4].
5153    iou_threshold: a float representing the threshold for deciding whether boxes
5154      overlap too much with respect to IOU.
5155    output_size: an int32 tensor of size [batch_size]. Representing the number
5156      of selected boxes for each batch.
5157    idx: an integer scalar representing induction variable.
5158    tile_size: an integer representing the number of boxes in a tile
5159
5160  Returns:
5161    boxes: updated boxes.
5162    iou_threshold: pass down iou_threshold to the next iteration.
5163    output_size: the updated output_size.
5164    idx: the updated induction variable.
5165  """
5166  with ops.name_scope('suppression_loop_body'):
5167    num_tiles = array_ops.shape(boxes)[1] // tile_size
5168    batch_size = array_ops.shape(boxes)[0]
5169
5170    def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx):
5171      return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx,
5172                                tile_size)
5173
5174    # Iterates over tiles that can possibly suppress the current tile.
5175    box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0],
5176                                [batch_size, tile_size, 4])
5177    _, box_slice, _, _ = control_flow_ops.while_loop(
5178        lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
5179        cross_suppression_func,
5180        [boxes, box_slice, iou_threshold, constant_op.constant(0)])
5181
5182    # Iterates over the current tile to compute self-suppression.
5183    iou = _bbox_overlap(box_slice, box_slice)
5184    mask = array_ops.expand_dims(
5185        array_ops.reshape(
5186            math_ops.range(tile_size), [1, -1]) > array_ops.reshape(
5187                math_ops.range(tile_size), [-1, 1]), 0)
5188    iou *= math_ops.cast(
5189        math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype)
5190    suppressed_iou, _, _, _ = control_flow_ops.while_loop(
5191        lambda _iou, loop_condition, _iou_sum, _: loop_condition,
5192        _self_suppression,
5193        [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]),
5194         iou_threshold])
5195    suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0
5196    box_slice *= array_ops.expand_dims(
5197        1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2)
5198
5199    # Uses box_slice to update the input boxes.
5200    mask = array_ops.reshape(
5201        math_ops.cast(
5202            math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype),
5203        [1, -1, 1, 1])
5204    boxes = array_ops.tile(array_ops.expand_dims(
5205        box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape(
5206            boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask)
5207    boxes = array_ops.reshape(boxes, [batch_size, -1, 4])
5208
5209    # Updates output_size.
5210    output_size += math_ops.reduce_sum(
5211        math_ops.cast(
5212            math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1])
5213  return boxes, iou_threshold, output_size, idx + 1
5214
5215
5216@tf_export('image.non_max_suppression_padded')
5217@dispatch.add_dispatch_support
5218def non_max_suppression_padded(boxes,
5219                               scores,
5220                               max_output_size,
5221                               iou_threshold=0.5,
5222                               score_threshold=float('-inf'),
5223                               pad_to_max_output_size=False,
5224                               name=None,
5225                               sorted_input=False,
5226                               canonicalized_coordinates=False,
5227                               tile_size=512):
5228  """Greedily selects a subset of bounding boxes in descending order of score.
5229
5230  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5231  with the addition of an optional parameter which zero-pads the output to
5232  be of size `max_output_size`.
5233  The output of this operation is a tuple containing the set of integers
5234  indexing into the input collection of bounding boxes representing the selected
5235  boxes and the number of valid indices in the index set.  The bounding box
5236  coordinates corresponding to the selected indices can then be obtained using
5237  the `tf.slice` and `tf.gather` operations.  For example:
5238    ```python
5239    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5240        boxes, scores, max_output_size, iou_threshold,
5241        score_threshold, pad_to_max_output_size=True)
5242    selected_indices = tf.slice(
5243        selected_indices_padded, tf.constant([0]), num_valid)
5244    selected_boxes = tf.gather(boxes, selected_indices)
5245    ```
5246
5247  Args:
5248    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5249      Dimensions except the last two are batch dimensions.
5250    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5251    max_output_size: a scalar integer `Tensor` representing the maximum number
5252      of boxes to be selected by non max suppression. Note that setting this
5253      value to a large number may result in OOM error depending on the system
5254      workload.
5255    iou_threshold: a float representing the threshold for deciding whether boxes
5256      overlap too much with respect to IoU (intersection over union).
5257    score_threshold: a float representing the threshold for box scores. Boxes
5258      with a score that is not larger than this threshold will be suppressed.
5259    pad_to_max_output_size: whether to pad the output idx to max_output_size.
5260      Must be set to True when the input is a batch of images.
5261    name: name of operation.
5262    sorted_input: a boolean indicating whether the input boxes and scores
5263      are sorted in descending order by the score.
5264    canonicalized_coordinates: if box coordinates are given as
5265    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5266     computation to canonicalize box coordinates.
5267    tile_size: an integer representing the number of boxes in a tile, i.e.,
5268      the maximum number of boxes per image that can be used to suppress other
5269      boxes in parallel; larger tile_size means larger parallelism and
5270      potentially more redundant work.
5271  Returns:
5272    idx: a tensor with a shape of [..., num_boxes] representing the
5273      indices selected by non-max suppression. The leading dimensions
5274      are the batch dimensions of the input boxes. All numbers are within
5275      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5276      indices (i.e., idx[i][:num_valid[i]]) are valid.
5277    num_valid: a tensor of rank 0 or higher with a shape of [...]
5278      representing the number of valid indices in idx. Its dimensions are the
5279      batch dimensions of the input boxes.
5280   Raises:
5281    ValueError: When set pad_to_max_output_size to False for batched input.
5282  """
5283  # if no new arguments are used and no later than 2020/6/23, use the old
5284  # version to give us time to fix TFLite conversion after the TF 2.3 release.
5285  if (not sorted_input) and \
5286      (not canonicalized_coordinates) and \
5287      tile_size == 512 and not compat.forward_compatible(2020, 6, 23):
5288    return non_max_suppression_padded_v1(
5289        boxes, scores, max_output_size, iou_threshold, score_threshold,
5290        pad_to_max_output_size, name)
5291  else:
5292    with ops.name_scope(name, 'non_max_suppression_padded'):
5293      if not pad_to_max_output_size:
5294        # pad_to_max_output_size may be set to False only when the shape of
5295        # boxes is [num_boxes, 4], i.e., a single image. We make best effort to
5296        # detect violations at compile time. If `boxes` does not have a static
5297        # rank, the check allows computation to proceed.
5298        if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2:
5299          raise ValueError(
5300              "'pad_to_max_output_size' (value {}) must be True for "
5301              'batched input'.format(pad_to_max_output_size))
5302      if name is None:
5303        name = ''
5304      idx, num_valid = non_max_suppression_padded_v2(
5305          boxes, scores, max_output_size, iou_threshold, score_threshold,
5306          sorted_input, canonicalized_coordinates, tile_size)
5307      # def_function.function seems to lose shape information, so set it here.
5308      if not pad_to_max_output_size:
5309        idx = idx[0, :num_valid]
5310      else:
5311        batch_dims = array_ops.concat([
5312            array_ops.shape(boxes)[:-2],
5313            array_ops.expand_dims(max_output_size, 0)
5314        ], 0)
5315        idx = array_ops.reshape(idx, batch_dims)
5316      return idx, num_valid
5317
5318
5319# TODO(b/158709815): Improve performance regression due to
5320# def_function.function.
5321@def_function.function(
5322    experimental_implements='non_max_suppression_padded_v2')
5323def non_max_suppression_padded_v2(boxes,
5324                                  scores,
5325                                  max_output_size,
5326                                  iou_threshold=0.5,
5327                                  score_threshold=float('-inf'),
5328                                  sorted_input=False,
5329                                  canonicalized_coordinates=False,
5330                                  tile_size=512):
5331  """Non-maximum suppression.
5332
5333  Prunes away boxes that have high intersection-over-union (IOU) overlap
5334  with previously selected boxes. Bounding boxes are supplied as
5335  `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any
5336  diagonal pair of box corners and the coordinates can be provided as normalized
5337  (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box
5338  coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`,
5339  where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower
5340  left and upper right corner. User may indiciate the input box coordinates are
5341  already canonicalized to eliminate redundant work by setting
5342  canonicalized_coordinates to `True`. Note that this algorithm is agnostic to
5343  where the origin is in the coordinate system. Note that this algorithm is
5344  invariant to orthogonal transformations and translations of the coordinate
5345  system; thus translating or reflections of the coordinate system result in the
5346  same boxes being selected by the algorithm.
5347
5348  Similar to tf.image.non_max_suppression, non_max_suppression_padded
5349  implements hard NMS but can operate on a batch of images and improves
5350  performance by titling the bounding boxes. Non_max_suppression_padded should
5351  be preferred over tf.image_non_max_suppression when running on devices with
5352  abundant parallelsim for higher computation speed. For soft NMS, refer to
5353  tf.image.non_max_suppression_with_scores.
5354
5355  While a serial NMS algorithm iteratively uses the highest-scored unprocessed
5356  box to suppress boxes, this algorithm uses many boxes to suppress other boxes
5357  in parallel. The key idea is to partition boxes into tiles based on their
5358  score and suppresses boxes tile by tile, thus achieving parallelism within a
5359  tile. The tile size determines the degree of parallelism.
5360
5361  In cross suppression (using boxes of tile A to suppress boxes of tile B),
5362  all boxes in A can independently suppress boxes in B.
5363
5364  Self suppression (suppressing boxes of the same tile) needs to be iteratively
5365  applied until there's no more suppression. In each iteration, boxes that
5366  cannot be suppressed are used to suppress boxes in the same tile.
5367
5368  boxes = boxes.pad_to_multiply_of(tile_size)
5369  num_tiles = len(boxes) // tile_size
5370  output_boxes = []
5371  for i in range(num_tiles):
5372    box_tile = boxes[i*tile_size : (i+1)*tile_size]
5373    for j in range(i - 1):
5374      # in parallel suppress boxes in box_tile using boxes from suppressing_tile
5375      suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
5376      iou = _bbox_overlap(box_tile, suppressing_tile)
5377      # if the box is suppressed in iou, clear it to a dot
5378      box_tile *= _update_boxes(iou)
5379    # Iteratively handle the diagnal tile.
5380    iou = _box_overlap(box_tile, box_tile)
5381    iou_changed = True
5382    while iou_changed:
5383      # boxes that are not suppressed by anything else
5384      suppressing_boxes = _get_suppressing_boxes(iou)
5385      # boxes that are suppressed by suppressing_boxes
5386      suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
5387      # clear iou to 0 for boxes that are suppressed, as they cannot be used
5388      # to suppress other boxes any more
5389      new_iou = _clear_iou(iou, suppressed_boxes)
5390      iou_changed = (new_iou != iou)
5391      iou = new_iou
5392    # remaining boxes that can still suppress others, are selected boxes.
5393    output_boxes.append(_get_suppressing_boxes(iou))
5394    if len(output_boxes) >= max_output_size:
5395      break
5396
5397  Args:
5398    boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4].
5399      Dimensions except the last two are batch dimensions. The last dimension
5400      represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates
5401      on each dimension can be given in any order
5402      (see also `canonicalized_coordinates`) but must describe a box with
5403      a positive area.
5404    scores: a tensor of rank 1 or higher with a shape of [..., num_boxes].
5405    max_output_size: a scalar integer `Tensor` representing the maximum number
5406      of boxes to be selected by non max suppression.
5407    iou_threshold: a float representing the threshold for deciding whether boxes
5408      overlap too much with respect to IoU (intersection over union).
5409    score_threshold: a float representing the threshold for box scores. Boxes
5410      with a score that is not larger than this threshold will be suppressed.
5411    sorted_input: a boolean indicating whether the input boxes and scores
5412      are sorted in descending order by the score.
5413    canonicalized_coordinates: if box coordinates are given as
5414    `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant
5415     computation to canonicalize box coordinates.
5416    tile_size: an integer representing the number of boxes in a tile, i.e.,
5417      the maximum number of boxes per image that can be used to suppress other
5418      boxes in parallel; larger tile_size means larger parallelism and
5419      potentially more redundant work.
5420  Returns:
5421    idx: a tensor with a shape of [..., num_boxes] representing the
5422      indices selected by non-max suppression. The leading dimensions
5423      are the batch dimensions of the input boxes. All numbers are within
5424      [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i]
5425      indices (i.e., idx[i][:num_valid[i]]) are valid.
5426    num_valid: a tensor of rank 0 or higher with a shape of [...]
5427      representing the number of valid indices in idx. Its dimensions are the
5428      batch dimensions of the input boxes.
5429   Raises:
5430    ValueError: When set pad_to_max_output_size to False for batched input.
5431  """
5432  def _sort_scores_and_boxes(scores, boxes):
5433    """Sort boxes based their score from highest to lowest.
5434
5435    Args:
5436      scores: a tensor with a shape of [batch_size, num_boxes] representing
5437        the scores of boxes.
5438      boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing
5439        the boxes.
5440    Returns:
5441      sorted_scores: a tensor with a shape of [batch_size, num_boxes]
5442        representing the sorted scores.
5443      sorted_boxes: a tensor representing the sorted boxes.
5444      sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes]
5445        representing the index of the scores in a sorted descending order.
5446    """
5447    with ops.name_scope('sort_scores_and_boxes'):
5448      batch_size = array_ops.shape(boxes)[0]
5449      num_boxes = array_ops.shape(boxes)[1]
5450      sorted_scores_indices = sort_ops.argsort(
5451          scores, axis=1, direction='DESCENDING')
5452      index_offsets = math_ops.range(batch_size) * num_boxes
5453      indices = array_ops.reshape(
5454          sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1])
5455      sorted_scores = array_ops.reshape(
5456          array_ops.gather(array_ops.reshape(scores, [-1]), indices),
5457          [batch_size, -1])
5458      sorted_boxes = array_ops.reshape(
5459          array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices),
5460          [batch_size, -1, 4])
5461    return sorted_scores, sorted_boxes, sorted_scores_indices
5462
5463  batch_dims = array_ops.shape(boxes)[:-2]
5464  num_boxes = array_ops.shape(boxes)[-2]
5465  boxes = array_ops.reshape(boxes, [-1, num_boxes, 4])
5466  scores = array_ops.reshape(scores, [-1, num_boxes])
5467  batch_size = array_ops.shape(boxes)[0]
5468  if score_threshold != float('-inf'):
5469    with ops.name_scope('filter_by_score'):
5470      score_mask = math_ops.cast(scores > score_threshold, scores.dtype)
5471      scores *= score_mask
5472      box_mask = array_ops.expand_dims(
5473          math_ops.cast(score_mask, boxes.dtype), 2)
5474      boxes *= box_mask
5475
5476  if not canonicalized_coordinates:
5477    with ops.name_scope('canonicalize_coordinates'):
5478      y_1, x_1, y_2, x_2 = array_ops.split(
5479          value=boxes, num_or_size_splits=4, axis=2)
5480      y_1_is_min = math_ops.reduce_all(
5481          math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0]))
5482      y_min, y_max = control_flow_ops.cond(
5483          y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1))
5484      x_1_is_min = math_ops.reduce_all(
5485          math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0]))
5486      x_min, x_max = control_flow_ops.cond(
5487          x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1))
5488      boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2)
5489
5490  if not sorted_input:
5491    scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes)
5492  else:
5493    # Default value required for Autograph.
5494    sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32)
5495
5496  pad = math_ops.cast(
5497      math_ops.ceil(
5498          math_ops.cast(
5499              math_ops.maximum(num_boxes, max_output_size), dtypes.float32) /
5500          math_ops.cast(tile_size, dtypes.float32)),
5501      dtypes.int32) * tile_size - num_boxes
5502  boxes = array_ops.pad(
5503      math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]])
5504  scores = array_ops.pad(
5505      math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]])
5506  num_boxes_after_padding = num_boxes + pad
5507  num_iterations = num_boxes_after_padding // tile_size
5508  def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
5509    return math_ops.logical_and(
5510        math_ops.reduce_min(output_size) < max_output_size,
5511        idx < num_iterations)
5512
5513  def suppression_loop_body(boxes, iou_threshold, output_size, idx):
5514    return _suppression_loop_body(
5515        boxes, iou_threshold, output_size, idx, tile_size)
5516
5517  selected_boxes, _, output_size, _ = control_flow_ops.while_loop(
5518      _loop_cond,
5519      suppression_loop_body,
5520      [
5521          boxes, iou_threshold,
5522          array_ops.zeros([batch_size], dtypes.int32),
5523          constant_op.constant(0)
5524      ],
5525      shape_invariants=[
5526          tensor_shape.TensorShape([None, None, 4]),
5527          tensor_shape.TensorShape([]),
5528          tensor_shape.TensorShape([None]),
5529          tensor_shape.TensorShape([]),
5530      ],
5531  )
5532  num_valid = math_ops.minimum(output_size, max_output_size)
5533  idx = num_boxes_after_padding - math_ops.cast(
5534      nn_ops.top_k(
5535          math_ops.cast(math_ops.reduce_any(
5536              selected_boxes > 0, [2]), dtypes.int32) *
5537          array_ops.expand_dims(
5538              math_ops.range(num_boxes_after_padding, 0, -1), 0),
5539          max_output_size)[0], dtypes.int32)
5540  idx = math_ops.minimum(idx, num_boxes - 1)
5541
5542  if not sorted_input:
5543    index_offsets = math_ops.range(batch_size) * num_boxes
5544    gather_idx = array_ops.reshape(
5545        idx + array_ops.expand_dims(index_offsets, 1), [-1])
5546    idx = array_ops.reshape(
5547        array_ops.gather(array_ops.reshape(sorted_indices, [-1]),
5548                         gather_idx),
5549        [batch_size, -1])
5550  invalid_index = array_ops.fill([batch_size, max_output_size], 0)
5551  idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)
5552  num_valid_expanded = array_ops.expand_dims(num_valid, 1)
5553  idx = array_ops.where(idx_index < num_valid_expanded,
5554                        idx, invalid_index)
5555
5556  num_valid = array_ops.reshape(num_valid, batch_dims)
5557  return idx, num_valid
5558
5559
5560def non_max_suppression_padded_v1(boxes,
5561                                  scores,
5562                                  max_output_size,
5563                                  iou_threshold=0.5,
5564                                  score_threshold=float('-inf'),
5565                                  pad_to_max_output_size=False,
5566                                  name=None):
5567  """Greedily selects a subset of bounding boxes in descending order of score.
5568
5569  Performs algorithmically equivalent operation to tf.image.non_max_suppression,
5570  with the addition of an optional parameter which zero-pads the output to
5571  be of size `max_output_size`.
5572  The output of this operation is a tuple containing the set of integers
5573  indexing into the input collection of bounding boxes representing the selected
5574  boxes and the number of valid indices in the index set.  The bounding box
5575  coordinates corresponding to the selected indices can then be obtained using
5576  the `tf.slice` and `tf.gather` operations.  For example:
5577    ```python
5578    selected_indices_padded, num_valid = tf.image.non_max_suppression_padded(
5579        boxes, scores, max_output_size, iou_threshold,
5580        score_threshold, pad_to_max_output_size=True)
5581    selected_indices = tf.slice(
5582        selected_indices_padded, tf.constant([0]), num_valid)
5583    selected_boxes = tf.gather(boxes, selected_indices)
5584    ```
5585
5586  Args:
5587    boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`.
5588    scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single
5589      score corresponding to each box (each row of boxes).
5590    max_output_size: A scalar integer `Tensor` representing the maximum number
5591      of boxes to be selected by non-max suppression.
5592    iou_threshold: A float representing the threshold for deciding whether boxes
5593      overlap too much with respect to IOU.
5594    score_threshold: A float representing the threshold for deciding when to
5595      remove boxes based on score.
5596    pad_to_max_output_size: bool.  If True, size of `selected_indices` output is
5597      padded to `max_output_size`.
5598    name: A name for the operation (optional).
5599
5600  Returns:
5601    selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the
5602      selected indices from the boxes tensor, where `M <= max_output_size`.
5603    valid_outputs: A scalar integer `Tensor` denoting how many elements in
5604    `selected_indices` are valid.  Valid elements occur first, then padding.
5605  """
5606  with ops.name_scope(name, 'non_max_suppression_padded'):
5607    iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold')
5608    score_threshold = ops.convert_to_tensor(
5609        score_threshold, name='score_threshold')
5610    return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size,
5611                                                iou_threshold, score_threshold,
5612                                                pad_to_max_output_size)
5613
5614
5615@tf_export('image.draw_bounding_boxes', v1=[])
5616@dispatch.add_dispatch_support
5617def draw_bounding_boxes_v2(images, boxes, colors, name=None):
5618  """Draw bounding boxes on a batch of images.
5619
5620  Outputs a copy of `images` but draws on top of the pixels zero or more
5621  bounding boxes specified by the locations in `boxes`. The coordinates of the
5622  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5623  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5624  and the height of the underlying image.
5625
5626  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5627  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5628  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5629
5630  Parts of the bounding box may fall outside the image.
5631
5632  Args:
5633    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5634      4-D with shape `[batch, height, width, depth]`. A batch of images.
5635    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5636      num_bounding_boxes, 4]` containing bounding boxes.
5637    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5638      through for the boxes.
5639    name: A name for the operation (optional).
5640
5641  Returns:
5642    A `Tensor`. Has the same type as `images`.
5643
5644  Usage Example:
5645
5646  >>> # create an empty image
5647  >>> img = tf.zeros([1, 3, 3, 3])
5648  >>> # draw a box around the image
5649  >>> box = np.array([0, 0, 1, 1])
5650  >>> boxes = box.reshape([1, 1, 4])
5651  >>> # alternate between red and blue
5652  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5653  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5654  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5655  array([[[[1., 0., 0.],
5656          [1., 0., 0.],
5657          [1., 0., 0.]],
5658          [[1., 0., 0.],
5659          [0., 0., 0.],
5660          [1., 0., 0.]],
5661          [[1., 0., 0.],
5662          [1., 0., 0.],
5663          [1., 0., 0.]]]], dtype=float32)>
5664  """
5665  if colors is None:
5666    return gen_image_ops.draw_bounding_boxes(images, boxes, name)
5667  return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name)
5668
5669
5670@tf_export(v1=['image.draw_bounding_boxes'])
5671@dispatch.add_dispatch_support
5672def draw_bounding_boxes(images, boxes, name=None, colors=None):
5673  """Draw bounding boxes on a batch of images.
5674
5675  Outputs a copy of `images` but draws on top of the pixels zero or more
5676  bounding boxes specified by the locations in `boxes`. The coordinates of the
5677  each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`.
5678  The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width
5679  and the height of the underlying image.
5680
5681  For example, if an image is 100 x 200 pixels (height x width) and the bounding
5682  box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
5683  the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
5684
5685  Parts of the bounding box may fall outside the image.
5686
5687  Args:
5688    images: A `Tensor`. Must be one of the following types: `float32`, `half`.
5689      4-D with shape `[batch, height, width, depth]`. A batch of images.
5690    boxes: A `Tensor` of type `float32`. 3-D with shape `[batch,
5691      num_bounding_boxes, 4]` containing bounding boxes.
5692    name: A name for the operation (optional).
5693    colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle
5694      through for the boxes.
5695
5696  Returns:
5697    A `Tensor`. Has the same type as `images`.
5698
5699  Usage Example:
5700
5701  >>> # create an empty image
5702  >>> img = tf.zeros([1, 3, 3, 3])
5703  >>> # draw a box around the image
5704  >>> box = np.array([0, 0, 1, 1])
5705  >>> boxes = box.reshape([1, 1, 4])
5706  >>> # alternate between red and blue
5707  >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
5708  >>> tf.image.draw_bounding_boxes(img, boxes, colors)
5709  <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy=
5710  array([[[[1., 0., 0.],
5711          [1., 0., 0.],
5712          [1., 0., 0.]],
5713          [[1., 0., 0.],
5714          [0., 0., 0.],
5715          [1., 0., 0.]],
5716          [[1., 0., 0.],
5717          [1., 0., 0.],
5718          [1., 0., 0.]]]], dtype=float32)>
5719  """
5720  return draw_bounding_boxes_v2(images, boxes, colors, name)
5721
5722
5723@tf_export('image.generate_bounding_box_proposals')
5724@dispatch.add_dispatch_support
5725def generate_bounding_box_proposals(scores,
5726                                    bbox_deltas,
5727                                    image_info,
5728                                    anchors,
5729                                    nms_threshold=0.7,
5730                                    pre_nms_topn=6000,
5731                                    min_size=16,
5732                                    post_nms_topn=300,
5733                                    name=None):
5734  """Generate bounding box proposals from encoded bounding boxes.
5735
5736  Args:
5737    scores: A 4-D float `Tensor` of shape
5738     `[num_images, height, width, num_achors]` containing scores of
5739      the boxes for given anchors, can be unsorted.
5740    bbox_deltas: A 4-D float `Tensor` of shape
5741     `[num_images, height, width, 4 x num_anchors]` encoding boxes
5742      with respect to each anchor. Coordinates are given
5743      in the form `[dy, dx, dh, dw]`.
5744    image_info: A 2-D float `Tensor` of shape `[num_images, 5]`
5745      containing image information Height, Width, Scale.
5746    anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]`
5747      describing the anchor boxes.
5748      Boxes are formatted in the form `[y1, x1, y2, x2]`.
5749    nms_threshold: A scalar float `Tensor` for non-maximal-suppression
5750      threshold. Defaults to 0.7.
5751    pre_nms_topn: A scalar int `Tensor` for the number of
5752      top scoring boxes to be used as input. Defaults to 6000.
5753    min_size: A scalar float `Tensor`. Any box that has a smaller size
5754      than min_size will be discarded. Defaults to 16.
5755    post_nms_topn: An integer. Maximum number of rois in the output.
5756    name: A name for this operation (optional).
5757
5758  Returns:
5759    rois: Region of interest boxes sorted by their scores.
5760    roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`.
5761  """
5762  return gen_image_ops.generate_bounding_box_proposals(
5763      scores=scores,
5764      bbox_deltas=bbox_deltas,
5765      image_info=image_info,
5766      anchors=anchors,
5767      nms_threshold=nms_threshold,
5768      pre_nms_topn=pre_nms_topn,
5769      min_size=min_size,
5770      post_nms_topn=post_nms_topn,
5771      name=name)
5772