1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Pooling layers.
16"""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import functools
22
23from tensorflow.python.framework import tensor_shape
24from tensorflow.python.keras import backend
25from tensorflow.python.keras.engine.base_layer import Layer
26from tensorflow.python.keras.engine.input_spec import InputSpec
27from tensorflow.python.keras.utils import conv_utils
28from tensorflow.python.ops import array_ops
29from tensorflow.python.ops import math_ops
30from tensorflow.python.ops import nn
31from tensorflow.python.util.tf_export import keras_export
32
33
34class Pooling1D(Layer):
35  """Pooling layer for arbitrary pooling functions, for 1D inputs.
36
37  This class only exists for code reuse. It will never be an exposed API.
38
39  Args:
40    pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`.
41    pool_size: An integer or tuple/list of a single integer,
42      representing the size of the pooling window.
43    strides: An integer or tuple/list of a single integer, specifying the
44      strides of the pooling operation.
45    padding: A string. The padding method, either 'valid' or 'same'.
46      Case-insensitive.
47    data_format: A string,
48      one of `channels_last` (default) or `channels_first`.
49      The ordering of the dimensions in the inputs.
50      `channels_last` corresponds to inputs with shape
51      `(batch, steps, features)` while `channels_first`
52      corresponds to inputs with shape
53      `(batch, features, steps)`.
54    name: A string, the name of the layer.
55  """
56
57  def __init__(self, pool_function, pool_size, strides,
58               padding='valid', data_format='channels_last',
59               name=None, **kwargs):
60    super(Pooling1D, self).__init__(name=name, **kwargs)
61    if data_format is None:
62      data_format = backend.image_data_format()
63    if strides is None:
64      strides = pool_size
65    self.pool_function = pool_function
66    self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size')
67    self.strides = conv_utils.normalize_tuple(strides, 1, 'strides')
68    self.padding = conv_utils.normalize_padding(padding)
69    self.data_format = conv_utils.normalize_data_format(data_format)
70    self.input_spec = InputSpec(ndim=3)
71
72  def call(self, inputs):
73    pad_axis = 2 if self.data_format == 'channels_last' else 3
74    inputs = array_ops.expand_dims(inputs, pad_axis)
75    outputs = self.pool_function(
76        inputs,
77        self.pool_size + (1,),
78        strides=self.strides + (1,),
79        padding=self.padding,
80        data_format=self.data_format)
81    return array_ops.squeeze(outputs, pad_axis)
82
83  def compute_output_shape(self, input_shape):
84    input_shape = tensor_shape.TensorShape(input_shape).as_list()
85    if self.data_format == 'channels_first':
86      steps = input_shape[2]
87      features = input_shape[1]
88    else:
89      steps = input_shape[1]
90      features = input_shape[2]
91    length = conv_utils.conv_output_length(steps,
92                                           self.pool_size[0],
93                                           self.padding,
94                                           self.strides[0])
95    if self.data_format == 'channels_first':
96      return tensor_shape.TensorShape([input_shape[0], features, length])
97    else:
98      return tensor_shape.TensorShape([input_shape[0], length, features])
99
100  def get_config(self):
101    config = {
102        'strides': self.strides,
103        'pool_size': self.pool_size,
104        'padding': self.padding,
105        'data_format': self.data_format,
106    }
107    base_config = super(Pooling1D, self).get_config()
108    return dict(list(base_config.items()) + list(config.items()))
109
110
111@keras_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D')
112class MaxPooling1D(Pooling1D):
113  """Max pooling operation for 1D temporal data.
114
115  Downsamples the input representation by taking the maximum value over the
116  window defined by `pool_size`. The window is shifted by `strides`.  The
117  resulting output when using "valid" padding option has a shape of:
118  `output_shape = (input_shape - pool_size + 1) / strides)`
119
120  The resulting output shape when using the "same" padding option is:
121  `output_shape = input_shape / strides`
122
123  For example, for strides=1 and padding="valid":
124
125  >>> x = tf.constant([1., 2., 3., 4., 5.])
126  >>> x = tf.reshape(x, [1, 5, 1])
127  >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,
128  ...    strides=1, padding='valid')
129  >>> max_pool_1d(x)
130  <tf.Tensor: shape=(1, 4, 1), dtype=float32, numpy=
131  array([[[2.],
132          [3.],
133          [4.],
134          [5.]]], dtype=float32)>
135
136  For example, for strides=2 and padding="valid":
137
138  >>> x = tf.constant([1., 2., 3., 4., 5.])
139  >>> x = tf.reshape(x, [1, 5, 1])
140  >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,
141  ...    strides=2, padding='valid')
142  >>> max_pool_1d(x)
143  <tf.Tensor: shape=(1, 2, 1), dtype=float32, numpy=
144  array([[[2.],
145          [4.]]], dtype=float32)>
146
147  For example, for strides=1 and padding="same":
148
149  >>> x = tf.constant([1., 2., 3., 4., 5.])
150  >>> x = tf.reshape(x, [1, 5, 1])
151  >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2,
152  ...    strides=1, padding='same')
153  >>> max_pool_1d(x)
154  <tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy=
155  array([[[2.],
156          [3.],
157          [4.],
158          [5.],
159          [5.]]], dtype=float32)>
160
161  Args:
162    pool_size: Integer, size of the max pooling window.
163    strides: Integer, or None. Specifies how much the pooling window moves
164      for each pooling step.
165      If None, it will default to `pool_size`.
166    padding: One of `"valid"` or `"same"` (case-insensitive).
167      `"valid"` means no padding. `"same"` results in padding evenly to
168      the left/right or up/down of the input such that output has the same
169      height/width dimension as the input.
170    data_format: A string,
171      one of `channels_last` (default) or `channels_first`.
172      The ordering of the dimensions in the inputs.
173      `channels_last` corresponds to inputs with shape
174      `(batch, steps, features)` while `channels_first`
175      corresponds to inputs with shape
176      `(batch, features, steps)`.
177
178  Input shape:
179    - If `data_format='channels_last'`:
180      3D tensor with shape `(batch_size, steps, features)`.
181    - If `data_format='channels_first'`:
182      3D tensor with shape `(batch_size, features, steps)`.
183
184  Output shape:
185    - If `data_format='channels_last'`:
186      3D tensor with shape `(batch_size, downsampled_steps, features)`.
187    - If `data_format='channels_first'`:
188      3D tensor with shape `(batch_size, features, downsampled_steps)`.
189  """
190
191  def __init__(self, pool_size=2, strides=None,
192               padding='valid', data_format='channels_last', **kwargs):
193
194    super(MaxPooling1D, self).__init__(
195        functools.partial(backend.pool2d, pool_mode='max'),
196        pool_size=pool_size,
197        strides=strides,
198        padding=padding,
199        data_format=data_format,
200        **kwargs)
201
202
203@keras_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D')
204class AveragePooling1D(Pooling1D):
205  """Average pooling for temporal data.
206
207  Args:
208    pool_size: Integer, size of the average pooling windows.
209    strides: Integer, or None. Factor by which to downscale.
210      E.g. 2 will halve the input.
211      If None, it will default to `pool_size`.
212    padding: One of `"valid"` or `"same"` (case-insensitive).
213      `"valid"` means no padding. `"same"` results in padding evenly to
214      the left/right or up/down of the input such that output has the same
215      height/width dimension as the input.
216    data_format: A string,
217      one of `channels_last` (default) or `channels_first`.
218      The ordering of the dimensions in the inputs.
219      `channels_last` corresponds to inputs with shape
220      `(batch, steps, features)` while `channels_first`
221      corresponds to inputs with shape
222      `(batch, features, steps)`.
223
224  Input shape:
225    - If `data_format='channels_last'`:
226      3D tensor with shape `(batch_size, steps, features)`.
227    - If `data_format='channels_first'`:
228      3D tensor with shape `(batch_size, features, steps)`.
229
230  Output shape:
231    - If `data_format='channels_last'`:
232      3D tensor with shape `(batch_size, downsampled_steps, features)`.
233    - If `data_format='channels_first'`:
234      3D tensor with shape `(batch_size, features, downsampled_steps)`.
235  """
236
237  def __init__(self, pool_size=2, strides=None,
238               padding='valid', data_format='channels_last', **kwargs):
239    super(AveragePooling1D, self).__init__(
240        functools.partial(backend.pool2d, pool_mode='avg'),
241        pool_size=pool_size,
242        strides=strides,
243        padding=padding,
244        data_format=data_format,
245        **kwargs)
246
247
248class Pooling2D(Layer):
249  """Pooling layer for arbitrary pooling functions, for 2D inputs (e.g. images).
250
251  This class only exists for code reuse. It will never be an exposed API.
252
253  Args:
254    pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`.
255    pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width)
256      specifying the size of the pooling window.
257      Can be a single integer to specify the same value for
258      all spatial dimensions.
259    strides: An integer or tuple/list of 2 integers,
260      specifying the strides of the pooling operation.
261      Can be a single integer to specify the same value for
262      all spatial dimensions.
263    padding: A string. The padding method, either 'valid' or 'same'.
264      Case-insensitive.
265    data_format: A string, one of `channels_last` (default) or `channels_first`.
266      The ordering of the dimensions in the inputs.
267      `channels_last` corresponds to inputs with shape
268      `(batch, height, width, channels)` while `channels_first` corresponds to
269      inputs with shape `(batch, channels, height, width)`.
270    name: A string, the name of the layer.
271  """
272
273  def __init__(self, pool_function, pool_size, strides,
274               padding='valid', data_format=None,
275               name=None, **kwargs):
276    super(Pooling2D, self).__init__(name=name, **kwargs)
277    if data_format is None:
278      data_format = backend.image_data_format()
279    if strides is None:
280      strides = pool_size
281    self.pool_function = pool_function
282    self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size')
283    self.strides = conv_utils.normalize_tuple(strides, 2, 'strides')
284    self.padding = conv_utils.normalize_padding(padding)
285    self.data_format = conv_utils.normalize_data_format(data_format)
286    self.input_spec = InputSpec(ndim=4)
287
288  def call(self, inputs):
289    if self.data_format == 'channels_last':
290      pool_shape = (1,) + self.pool_size + (1,)
291      strides = (1,) + self.strides + (1,)
292    else:
293      pool_shape = (1, 1) + self.pool_size
294      strides = (1, 1) + self.strides
295    outputs = self.pool_function(
296        inputs,
297        ksize=pool_shape,
298        strides=strides,
299        padding=self.padding.upper(),
300        data_format=conv_utils.convert_data_format(self.data_format, 4))
301    return outputs
302
303  def compute_output_shape(self, input_shape):
304    input_shape = tensor_shape.TensorShape(input_shape).as_list()
305    if self.data_format == 'channels_first':
306      rows = input_shape[2]
307      cols = input_shape[3]
308    else:
309      rows = input_shape[1]
310      cols = input_shape[2]
311    rows = conv_utils.conv_output_length(rows, self.pool_size[0], self.padding,
312                                         self.strides[0])
313    cols = conv_utils.conv_output_length(cols, self.pool_size[1], self.padding,
314                                         self.strides[1])
315    if self.data_format == 'channels_first':
316      return tensor_shape.TensorShape(
317          [input_shape[0], input_shape[1], rows, cols])
318    else:
319      return tensor_shape.TensorShape(
320          [input_shape[0], rows, cols, input_shape[3]])
321
322  def get_config(self):
323    config = {
324        'pool_size': self.pool_size,
325        'padding': self.padding,
326        'strides': self.strides,
327        'data_format': self.data_format
328    }
329    base_config = super(Pooling2D, self).get_config()
330    return dict(list(base_config.items()) + list(config.items()))
331
332
333@keras_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D')
334class MaxPooling2D(Pooling2D):
335  """Max pooling operation for 2D spatial data.
336
337  Downsamples the input representation by taking the maximum value over the
338  window defined by `pool_size` for each dimension along the features axis.
339  The window is shifted by `strides` in each dimension.  The resulting output
340  when using "valid" padding option has a shape(number of rows or columns) of:
341  `output_shape = math.floor((input_shape - pool_size) / strides) + 1`
342  (when input_shape >= pool_size)
343
344  The resulting output shape when using the "same" padding option is:
345  `output_shape = math.floor((input_shape - 1) / strides) + 1`
346
347  For example, for stride=(1,1) and padding="valid":
348
349  >>> x = tf.constant([[1., 2., 3.],
350  ...                  [4., 5., 6.],
351  ...                  [7., 8., 9.]])
352  >>> x = tf.reshape(x, [1, 3, 3, 1])
353  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
354  ...    strides=(1, 1), padding='valid')
355  >>> max_pool_2d(x)
356  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
357    array([[[[5.],
358             [6.]],
359            [[8.],
360             [9.]]]], dtype=float32)>
361
362  For example, for stride=(2,2) and padding="valid":
363
364  >>> x = tf.constant([[1., 2., 3., 4.],
365  ...                  [5., 6., 7., 8.],
366  ...                  [9., 10., 11., 12.]])
367  >>> x = tf.reshape(x, [1, 3, 4, 1])
368  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
369  ...    strides=(2, 2), padding='valid')
370  >>> max_pool_2d(x)
371  <tf.Tensor: shape=(1, 1, 2, 1), dtype=float32, numpy=
372    array([[[[6.],
373             [8.]]]], dtype=float32)>
374
375  Usage Example:
376
377  >>> input_image = tf.constant([[[[1.], [1.], [2.], [4.]],
378  ...                            [[2.], [2.], [3.], [2.]],
379  ...                            [[4.], [1.], [1.], [1.]],
380  ...                            [[2.], [2.], [1.], [4.]]]])
381  >>> output = tf.constant([[[[1], [0]],
382  ...                       [[0], [1]]]])
383  >>> model = tf.keras.models.Sequential()
384  >>> model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
385  ...    input_shape=(4,4,1)))
386  >>> model.compile('adam', 'mean_squared_error')
387  >>> model.predict(input_image, steps=1)
388  array([[[[2.],
389           [4.]],
390          [[4.],
391           [4.]]]], dtype=float32)
392
393  For example, for stride=(1,1) and padding="same":
394
395  >>> x = tf.constant([[1., 2., 3.],
396  ...                  [4., 5., 6.],
397  ...                  [7., 8., 9.]])
398  >>> x = tf.reshape(x, [1, 3, 3, 1])
399  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
400  ...    strides=(1, 1), padding='same')
401  >>> max_pool_2d(x)
402  <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
403    array([[[[5.],
404             [6.],
405             [6.]],
406            [[8.],
407             [9.],
408             [9.]],
409            [[8.],
410             [9.],
411             [9.]]]], dtype=float32)>
412
413  Args:
414    pool_size: integer or tuple of 2 integers,
415      window size over which to take the maximum.
416      `(2, 2)` will take the max value over a 2x2 pooling window.
417      If only one integer is specified, the same window length
418      will be used for both dimensions.
419    strides: Integer, tuple of 2 integers, or None.
420      Strides values.  Specifies how far the pooling window moves
421      for each pooling step. If None, it will default to `pool_size`.
422    padding: One of `"valid"` or `"same"` (case-insensitive).
423      `"valid"` means no padding. `"same"` results in padding evenly to
424      the left/right or up/down of the input such that output has the same
425      height/width dimension as the input.
426    data_format: A string,
427      one of `channels_last` (default) or `channels_first`.
428      The ordering of the dimensions in the inputs.
429      `channels_last` corresponds to inputs with shape
430      `(batch, height, width, channels)` while `channels_first`
431      corresponds to inputs with shape
432      `(batch, channels, height, width)`.
433      It defaults to the `image_data_format` value found in your
434      Keras config file at `~/.keras/keras.json`.
435      If you never set it, then it will be "channels_last".
436
437  Input shape:
438    - If `data_format='channels_last'`:
439      4D tensor with shape `(batch_size, rows, cols, channels)`.
440    - If `data_format='channels_first'`:
441      4D tensor with shape `(batch_size, channels, rows, cols)`.
442
443  Output shape:
444    - If `data_format='channels_last'`:
445      4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`.
446    - If `data_format='channels_first'`:
447      4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`.
448
449  Returns:
450    A tensor of rank 4 representing the maximum pooled values.  See above for
451    output shape.
452  """
453
454  def __init__(self,
455               pool_size=(2, 2),
456               strides=None,
457               padding='valid',
458               data_format=None,
459               **kwargs):
460    super(MaxPooling2D, self).__init__(
461        nn.max_pool,
462        pool_size=pool_size, strides=strides,
463        padding=padding, data_format=data_format, **kwargs)
464
465
466@keras_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D')
467class AveragePooling2D(Pooling2D):
468  """Average pooling operation for spatial data.
469
470  Args:
471    pool_size: integer or tuple of 2 integers,
472      factors by which to downscale (vertical, horizontal).
473      `(2, 2)` will halve the input in both spatial dimension.
474      If only one integer is specified, the same window length
475      will be used for both dimensions.
476    strides: Integer, tuple of 2 integers, or None.
477      Strides values.
478      If None, it will default to `pool_size`.
479    padding: One of `"valid"` or `"same"` (case-insensitive).
480      `"valid"` means no padding. `"same"` results in padding evenly to
481      the left/right or up/down of the input such that output has the same
482      height/width dimension as the input.
483    data_format: A string,
484      one of `channels_last` (default) or `channels_first`.
485      The ordering of the dimensions in the inputs.
486      `channels_last` corresponds to inputs with shape
487      `(batch, height, width, channels)` while `channels_first`
488      corresponds to inputs with shape
489      `(batch, channels, height, width)`.
490      It defaults to the `image_data_format` value found in your
491      Keras config file at `~/.keras/keras.json`.
492      If you never set it, then it will be "channels_last".
493
494  Input shape:
495    - If `data_format='channels_last'`:
496      4D tensor with shape `(batch_size, rows, cols, channels)`.
497    - If `data_format='channels_first'`:
498      4D tensor with shape `(batch_size, channels, rows, cols)`.
499
500  Output shape:
501    - If `data_format='channels_last'`:
502      4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`.
503    - If `data_format='channels_first'`:
504      4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`.
505  """
506
507  def __init__(self,
508               pool_size=(2, 2),
509               strides=None,
510               padding='valid',
511               data_format=None,
512               **kwargs):
513    super(AveragePooling2D, self).__init__(
514        nn.avg_pool,
515        pool_size=pool_size, strides=strides,
516        padding=padding, data_format=data_format, **kwargs)
517
518
519class Pooling3D(Layer):
520  """Pooling layer for arbitrary pooling functions, for 3D inputs.
521
522  This class only exists for code reuse. It will never be an exposed API.
523
524  Args:
525    pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`.
526    pool_size: An integer or tuple/list of 3 integers:
527      (pool_depth, pool_height, pool_width)
528      specifying the size of the pooling window.
529      Can be a single integer to specify the same value for
530      all spatial dimensions.
531    strides: An integer or tuple/list of 3 integers,
532      specifying the strides of the pooling operation.
533      Can be a single integer to specify the same value for
534      all spatial dimensions.
535    padding: A string. The padding method, either 'valid' or 'same'.
536      Case-insensitive.
537    data_format: A string, one of `channels_last` (default) or `channels_first`.
538      The ordering of the dimensions in the inputs.
539      `channels_last` corresponds to inputs with shape
540      `(batch, depth, height, width, channels)`
541      while `channels_first` corresponds to
542      inputs with shape `(batch, channels, depth, height, width)`.
543    name: A string, the name of the layer.
544  """
545
546  def __init__(self, pool_function, pool_size, strides,
547               padding='valid', data_format='channels_last',
548               name=None, **kwargs):
549    super(Pooling3D, self).__init__(name=name, **kwargs)
550    if data_format is None:
551      data_format = backend.image_data_format()
552    if strides is None:
553      strides = pool_size
554    self.pool_function = pool_function
555    self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size')
556    self.strides = conv_utils.normalize_tuple(strides, 3, 'strides')
557    self.padding = conv_utils.normalize_padding(padding)
558    self.data_format = conv_utils.normalize_data_format(data_format)
559    self.input_spec = InputSpec(ndim=5)
560
561  def call(self, inputs):
562    pool_shape = (1,) + self.pool_size + (1,)
563    strides = (1,) + self.strides + (1,)
564
565    if self.data_format == 'channels_first':
566      # TF does not support `channels_first` with 3D pooling operations,
567      # so we must handle this case manually.
568      # TODO(fchollet): remove this when TF pooling is feature-complete.
569      inputs = array_ops.transpose(inputs, (0, 2, 3, 4, 1))
570
571    outputs = self.pool_function(
572        inputs,
573        ksize=pool_shape,
574        strides=strides,
575        padding=self.padding.upper())
576
577    if self.data_format == 'channels_first':
578      outputs = array_ops.transpose(outputs, (0, 4, 1, 2, 3))
579    return outputs
580
581  def compute_output_shape(self, input_shape):
582    input_shape = tensor_shape.TensorShape(input_shape).as_list()
583    if self.data_format == 'channels_first':
584      len_dim1 = input_shape[2]
585      len_dim2 = input_shape[3]
586      len_dim3 = input_shape[4]
587    else:
588      len_dim1 = input_shape[1]
589      len_dim2 = input_shape[2]
590      len_dim3 = input_shape[3]
591    len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0],
592                                             self.padding, self.strides[0])
593    len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1],
594                                             self.padding, self.strides[1])
595    len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2],
596                                             self.padding, self.strides[2])
597    if self.data_format == 'channels_first':
598      return tensor_shape.TensorShape(
599          [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3])
600    else:
601      return tensor_shape.TensorShape(
602          [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]])
603
604  def get_config(self):
605    config = {
606        'pool_size': self.pool_size,
607        'padding': self.padding,
608        'strides': self.strides,
609        'data_format': self.data_format
610    }
611    base_config = super(Pooling3D, self).get_config()
612    return dict(list(base_config.items()) + list(config.items()))
613
614
615@keras_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D')
616class MaxPooling3D(Pooling3D):
617  """Max pooling operation for 3D data (spatial or spatio-temporal).
618
619  Args:
620    pool_size: Tuple of 3 integers,
621      factors by which to downscale (dim1, dim2, dim3).
622      `(2, 2, 2)` will halve the size of the 3D input in each dimension.
623    strides: tuple of 3 integers, or None. Strides values.
624    padding: One of `"valid"` or `"same"` (case-insensitive).
625      `"valid"` means no padding. `"same"` results in padding evenly to
626      the left/right or up/down of the input such that output has the same
627      height/width dimension as the input.
628    data_format: A string,
629      one of `channels_last` (default) or `channels_first`.
630      The ordering of the dimensions in the inputs.
631      `channels_last` corresponds to inputs with shape
632      `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
633      while `channels_first` corresponds to inputs with shape
634      `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
635      It defaults to the `image_data_format` value found in your
636      Keras config file at `~/.keras/keras.json`.
637      If you never set it, then it will be "channels_last".
638
639  Input shape:
640    - If `data_format='channels_last'`:
641      5D tensor with shape:
642      `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
643    - If `data_format='channels_first'`:
644      5D tensor with shape:
645      `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
646
647  Output shape:
648    - If `data_format='channels_last'`:
649      5D tensor with shape:
650      `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`
651    - If `data_format='channels_first'`:
652      5D tensor with shape:
653      `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`
654  """
655
656  def __init__(self,
657               pool_size=(2, 2, 2),
658               strides=None,
659               padding='valid',
660               data_format=None,
661               **kwargs):
662    super(MaxPooling3D, self).__init__(
663        nn.max_pool3d,
664        pool_size=pool_size, strides=strides,
665        padding=padding, data_format=data_format, **kwargs)
666
667
668@keras_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D')
669class AveragePooling3D(Pooling3D):
670  """Average pooling operation for 3D data (spatial or spatio-temporal).
671
672  Args:
673    pool_size: tuple of 3 integers,
674      factors by which to downscale (dim1, dim2, dim3).
675      `(2, 2, 2)` will halve the size of the 3D input in each dimension.
676    strides: tuple of 3 integers, or None. Strides values.
677    padding: One of `"valid"` or `"same"` (case-insensitive).
678      `"valid"` means no padding. `"same"` results in padding evenly to
679      the left/right or up/down of the input such that output has the same
680      height/width dimension as the input.
681    data_format: A string,
682      one of `channels_last` (default) or `channels_first`.
683      The ordering of the dimensions in the inputs.
684      `channels_last` corresponds to inputs with shape
685      `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
686      while `channels_first` corresponds to inputs with shape
687      `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
688      It defaults to the `image_data_format` value found in your
689      Keras config file at `~/.keras/keras.json`.
690      If you never set it, then it will be "channels_last".
691
692  Input shape:
693    - If `data_format='channels_last'`:
694      5D tensor with shape:
695      `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
696    - If `data_format='channels_first'`:
697      5D tensor with shape:
698      `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
699
700  Output shape:
701    - If `data_format='channels_last'`:
702      5D tensor with shape:
703      `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)`
704    - If `data_format='channels_first'`:
705      5D tensor with shape:
706      `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)`
707  """
708
709  def __init__(self,
710               pool_size=(2, 2, 2),
711               strides=None,
712               padding='valid',
713               data_format=None,
714               **kwargs):
715    super(AveragePooling3D, self).__init__(
716        nn.avg_pool3d,
717        pool_size=pool_size, strides=strides,
718        padding=padding, data_format=data_format, **kwargs)
719
720
721class GlobalPooling1D(Layer):
722  """Abstract class for different global pooling 1D layers."""
723
724  def __init__(self, data_format='channels_last', **kwargs):
725    super(GlobalPooling1D, self).__init__(**kwargs)
726    self.input_spec = InputSpec(ndim=3)
727    self.data_format = conv_utils.normalize_data_format(data_format)
728
729  def compute_output_shape(self, input_shape):
730    input_shape = tensor_shape.TensorShape(input_shape).as_list()
731    if self.data_format == 'channels_first':
732      return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
733    else:
734      return tensor_shape.TensorShape([input_shape[0], input_shape[2]])
735
736  def call(self, inputs):
737    raise NotImplementedError
738
739  def get_config(self):
740    config = {'data_format': self.data_format}
741    base_config = super(GlobalPooling1D, self).get_config()
742    return dict(list(base_config.items()) + list(config.items()))
743
744
745@keras_export('keras.layers.GlobalAveragePooling1D',
746              'keras.layers.GlobalAvgPool1D')
747class GlobalAveragePooling1D(GlobalPooling1D):
748  """Global average pooling operation for temporal data.
749
750  Examples:
751
752  >>> input_shape = (2, 3, 4)
753  >>> x = tf.random.normal(input_shape)
754  >>> y = tf.keras.layers.GlobalAveragePooling1D()(x)
755  >>> print(y.shape)
756  (2, 4)
757
758  Args:
759    data_format: A string,
760      one of `channels_last` (default) or `channels_first`.
761      The ordering of the dimensions in the inputs.
762      `channels_last` corresponds to inputs with shape
763      `(batch, steps, features)` while `channels_first`
764      corresponds to inputs with shape
765      `(batch, features, steps)`.
766
767  Call arguments:
768    inputs: A 3D tensor.
769    mask: Binary tensor of shape `(batch_size, steps)` indicating whether
770      a given step should be masked (excluded from the average).
771
772  Input shape:
773    - If `data_format='channels_last'`:
774      3D tensor with shape:
775      `(batch_size, steps, features)`
776    - If `data_format='channels_first'`:
777      3D tensor with shape:
778      `(batch_size, features, steps)`
779
780  Output shape:
781    2D tensor with shape `(batch_size, features)`.
782  """
783
784  def __init__(self, data_format='channels_last', **kwargs):
785    super(GlobalAveragePooling1D, self).__init__(data_format=data_format,
786                                                 **kwargs)
787    self.supports_masking = True
788
789  def call(self, inputs, mask=None):
790    steps_axis = 1 if self.data_format == 'channels_last' else 2
791    if mask is not None:
792      mask = math_ops.cast(mask, backend.floatx())
793      mask = array_ops.expand_dims(
794          mask, 2 if self.data_format == 'channels_last' else 1)
795      inputs *= mask
796      return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum(
797          mask, axis=steps_axis)
798    else:
799      return backend.mean(inputs, axis=steps_axis)
800
801  def compute_mask(self, inputs, mask=None):
802    return None
803
804
805@keras_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D')
806class GlobalMaxPooling1D(GlobalPooling1D):
807  """Global max pooling operation for 1D temporal data.
808
809  Downsamples the input representation by taking the maximum value over
810  the time dimension.
811
812  For example:
813
814  >>> x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])
815  >>> x = tf.reshape(x, [3, 3, 1])
816  >>> x
817  <tf.Tensor: shape=(3, 3, 1), dtype=float32, numpy=
818  array([[[1.], [2.], [3.]],
819         [[4.], [5.], [6.]],
820         [[7.], [8.], [9.]]], dtype=float32)>
821  >>> max_pool_1d = tf.keras.layers.GlobalMaxPooling1D()
822  >>> max_pool_1d(x)
823  <tf.Tensor: shape=(3, 1), dtype=float32, numpy=
824  array([[3.],
825         [6.],
826         [9.], dtype=float32)>
827
828  Args:
829    data_format: A string,
830      one of `channels_last` (default) or `channels_first`.
831      The ordering of the dimensions in the inputs.
832      `channels_last` corresponds to inputs with shape
833      `(batch, steps, features)` while `channels_first`
834      corresponds to inputs with shape
835      `(batch, features, steps)`.
836
837  Input shape:
838    - If `data_format='channels_last'`:
839      3D tensor with shape:
840      `(batch_size, steps, features)`
841    - If `data_format='channels_first'`:
842      3D tensor with shape:
843      `(batch_size, features, steps)`
844
845  Output shape:
846    2D tensor with shape `(batch_size, features)`.
847  """
848
849  def call(self, inputs):
850    steps_axis = 1 if self.data_format == 'channels_last' else 2
851    return backend.max(inputs, axis=steps_axis)
852
853
854class GlobalPooling2D(Layer):
855  """Abstract class for different global pooling 2D layers.
856  """
857
858  def __init__(self, data_format=None, **kwargs):
859    super(GlobalPooling2D, self).__init__(**kwargs)
860    self.data_format = conv_utils.normalize_data_format(data_format)
861    self.input_spec = InputSpec(ndim=4)
862
863  def compute_output_shape(self, input_shape):
864    input_shape = tensor_shape.TensorShape(input_shape).as_list()
865    if self.data_format == 'channels_last':
866      return tensor_shape.TensorShape([input_shape[0], input_shape[3]])
867    else:
868      return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
869
870  def call(self, inputs):
871    raise NotImplementedError
872
873  def get_config(self):
874    config = {'data_format': self.data_format}
875    base_config = super(GlobalPooling2D, self).get_config()
876    return dict(list(base_config.items()) + list(config.items()))
877
878
879@keras_export('keras.layers.GlobalAveragePooling2D',
880              'keras.layers.GlobalAvgPool2D')
881class GlobalAveragePooling2D(GlobalPooling2D):
882  """Global average pooling operation for spatial data.
883
884  Examples:
885
886  >>> input_shape = (2, 4, 5, 3)
887  >>> x = tf.random.normal(input_shape)
888  >>> y = tf.keras.layers.GlobalAveragePooling2D()(x)
889  >>> print(y.shape)
890  (2, 3)
891
892  Args:
893      data_format: A string,
894        one of `channels_last` (default) or `channels_first`.
895        The ordering of the dimensions in the inputs.
896        `channels_last` corresponds to inputs with shape
897        `(batch, height, width, channels)` while `channels_first`
898        corresponds to inputs with shape
899        `(batch, channels, height, width)`.
900        It defaults to the `image_data_format` value found in your
901        Keras config file at `~/.keras/keras.json`.
902        If you never set it, then it will be "channels_last".
903
904  Input shape:
905    - If `data_format='channels_last'`:
906      4D tensor with shape `(batch_size, rows, cols, channels)`.
907    - If `data_format='channels_first'`:
908      4D tensor with shape `(batch_size, channels, rows, cols)`.
909
910  Output shape:
911    2D tensor with shape `(batch_size, channels)`.
912  """
913
914  def call(self, inputs):
915    if self.data_format == 'channels_last':
916      return backend.mean(inputs, axis=[1, 2])
917    else:
918      return backend.mean(inputs, axis=[2, 3])
919
920
921@keras_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D')
922class GlobalMaxPooling2D(GlobalPooling2D):
923  """Global max pooling operation for spatial data.
924
925  Examples:
926
927  >>> input_shape = (2, 4, 5, 3)
928  >>> x = tf.random.normal(input_shape)
929  >>> y = tf.keras.layers.GlobalMaxPool2D()(x)
930  >>> print(y.shape)
931  (2, 3)
932
933  Args:
934    data_format: A string,
935      one of `channels_last` (default) or `channels_first`.
936      The ordering of the dimensions in the inputs.
937      `channels_last` corresponds to inputs with shape
938      `(batch, height, width, channels)` while `channels_first`
939      corresponds to inputs with shape
940      `(batch, channels, height, width)`.
941      It defaults to the `image_data_format` value found in your
942      Keras config file at `~/.keras/keras.json`.
943      If you never set it, then it will be "channels_last".
944
945  Input shape:
946    - If `data_format='channels_last'`:
947      4D tensor with shape `(batch_size, rows, cols, channels)`.
948    - If `data_format='channels_first'`:
949      4D tensor with shape `(batch_size, channels, rows, cols)`.
950
951  Output shape:
952    2D tensor with shape `(batch_size, channels)`.
953  """
954
955  def call(self, inputs):
956    if self.data_format == 'channels_last':
957      return backend.max(inputs, axis=[1, 2])
958    else:
959      return backend.max(inputs, axis=[2, 3])
960
961
962class GlobalPooling3D(Layer):
963  """Abstract class for different global pooling 3D layers."""
964
965  def __init__(self, data_format=None, **kwargs):
966    super(GlobalPooling3D, self).__init__(**kwargs)
967    self.data_format = conv_utils.normalize_data_format(data_format)
968    self.input_spec = InputSpec(ndim=5)
969
970  def compute_output_shape(self, input_shape):
971    input_shape = tensor_shape.TensorShape(input_shape).as_list()
972    if self.data_format == 'channels_last':
973      return tensor_shape.TensorShape([input_shape[0], input_shape[4]])
974    else:
975      return tensor_shape.TensorShape([input_shape[0], input_shape[1]])
976
977  def call(self, inputs):
978    raise NotImplementedError
979
980  def get_config(self):
981    config = {'data_format': self.data_format}
982    base_config = super(GlobalPooling3D, self).get_config()
983    return dict(list(base_config.items()) + list(config.items()))
984
985
986@keras_export('keras.layers.GlobalAveragePooling3D',
987              'keras.layers.GlobalAvgPool3D')
988class GlobalAveragePooling3D(GlobalPooling3D):
989  """Global Average pooling operation for 3D data.
990
991  Args:
992    data_format: A string,
993      one of `channels_last` (default) or `channels_first`.
994      The ordering of the dimensions in the inputs.
995      `channels_last` corresponds to inputs with shape
996      `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
997      while `channels_first` corresponds to inputs with shape
998      `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
999      It defaults to the `image_data_format` value found in your
1000      Keras config file at `~/.keras/keras.json`.
1001      If you never set it, then it will be "channels_last".
1002
1003  Input shape:
1004    - If `data_format='channels_last'`:
1005      5D tensor with shape:
1006      `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
1007    - If `data_format='channels_first'`:
1008      5D tensor with shape:
1009      `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
1010
1011  Output shape:
1012    2D tensor with shape `(batch_size, channels)`.
1013  """
1014
1015  def call(self, inputs):
1016    if self.data_format == 'channels_last':
1017      return backend.mean(inputs, axis=[1, 2, 3])
1018    else:
1019      return backend.mean(inputs, axis=[2, 3, 4])
1020
1021
1022@keras_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D')
1023class GlobalMaxPooling3D(GlobalPooling3D):
1024  """Global Max pooling operation for 3D data.
1025
1026  Args:
1027    data_format: A string,
1028      one of `channels_last` (default) or `channels_first`.
1029      The ordering of the dimensions in the inputs.
1030      `channels_last` corresponds to inputs with shape
1031      `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
1032      while `channels_first` corresponds to inputs with shape
1033      `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`.
1034      It defaults to the `image_data_format` value found in your
1035      Keras config file at `~/.keras/keras.json`.
1036      If you never set it, then it will be "channels_last".
1037
1038  Input shape:
1039    - If `data_format='channels_last'`:
1040      5D tensor with shape:
1041      `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)`
1042    - If `data_format='channels_first'`:
1043      5D tensor with shape:
1044      `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`
1045
1046  Output shape:
1047    2D tensor with shape `(batch_size, channels)`.
1048  """
1049
1050  def call(self, inputs):
1051    if self.data_format == 'channels_last':
1052      return backend.max(inputs, axis=[1, 2, 3])
1053    else:
1054      return backend.max(inputs, axis=[2, 3, 4])
1055
1056
1057# Aliases
1058
1059AvgPool1D = AveragePooling1D
1060MaxPool1D = MaxPooling1D
1061AvgPool2D = AveragePooling2D
1062MaxPool2D = MaxPooling2D
1063AvgPool3D = AveragePooling3D
1064MaxPool3D = MaxPooling3D
1065GlobalMaxPool1D = GlobalMaxPooling1D
1066GlobalMaxPool2D = GlobalMaxPooling2D
1067GlobalMaxPool3D = GlobalMaxPooling3D
1068GlobalAvgPool1D = GlobalAveragePooling1D
1069GlobalAvgPool2D = GlobalAveragePooling2D
1070GlobalAvgPool3D = GlobalAveragePooling3D
1071