1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Pooling layers. 16""" 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import functools 22 23from tensorflow.python.framework import tensor_shape 24from tensorflow.python.keras import backend 25from tensorflow.python.keras.engine.base_layer import Layer 26from tensorflow.python.keras.engine.input_spec import InputSpec 27from tensorflow.python.keras.utils import conv_utils 28from tensorflow.python.ops import array_ops 29from tensorflow.python.ops import math_ops 30from tensorflow.python.ops import nn 31from tensorflow.python.util.tf_export import keras_export 32 33 34class Pooling1D(Layer): 35 """Pooling layer for arbitrary pooling functions, for 1D inputs. 36 37 This class only exists for code reuse. It will never be an exposed API. 38 39 Args: 40 pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. 41 pool_size: An integer or tuple/list of a single integer, 42 representing the size of the pooling window. 43 strides: An integer or tuple/list of a single integer, specifying the 44 strides of the pooling operation. 45 padding: A string. The padding method, either 'valid' or 'same'. 46 Case-insensitive. 47 data_format: A string, 48 one of `channels_last` (default) or `channels_first`. 49 The ordering of the dimensions in the inputs. 50 `channels_last` corresponds to inputs with shape 51 `(batch, steps, features)` while `channels_first` 52 corresponds to inputs with shape 53 `(batch, features, steps)`. 54 name: A string, the name of the layer. 55 """ 56 57 def __init__(self, pool_function, pool_size, strides, 58 padding='valid', data_format='channels_last', 59 name=None, **kwargs): 60 super(Pooling1D, self).__init__(name=name, **kwargs) 61 if data_format is None: 62 data_format = backend.image_data_format() 63 if strides is None: 64 strides = pool_size 65 self.pool_function = pool_function 66 self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') 67 self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') 68 self.padding = conv_utils.normalize_padding(padding) 69 self.data_format = conv_utils.normalize_data_format(data_format) 70 self.input_spec = InputSpec(ndim=3) 71 72 def call(self, inputs): 73 pad_axis = 2 if self.data_format == 'channels_last' else 3 74 inputs = array_ops.expand_dims(inputs, pad_axis) 75 outputs = self.pool_function( 76 inputs, 77 self.pool_size + (1,), 78 strides=self.strides + (1,), 79 padding=self.padding, 80 data_format=self.data_format) 81 return array_ops.squeeze(outputs, pad_axis) 82 83 def compute_output_shape(self, input_shape): 84 input_shape = tensor_shape.TensorShape(input_shape).as_list() 85 if self.data_format == 'channels_first': 86 steps = input_shape[2] 87 features = input_shape[1] 88 else: 89 steps = input_shape[1] 90 features = input_shape[2] 91 length = conv_utils.conv_output_length(steps, 92 self.pool_size[0], 93 self.padding, 94 self.strides[0]) 95 if self.data_format == 'channels_first': 96 return tensor_shape.TensorShape([input_shape[0], features, length]) 97 else: 98 return tensor_shape.TensorShape([input_shape[0], length, features]) 99 100 def get_config(self): 101 config = { 102 'strides': self.strides, 103 'pool_size': self.pool_size, 104 'padding': self.padding, 105 'data_format': self.data_format, 106 } 107 base_config = super(Pooling1D, self).get_config() 108 return dict(list(base_config.items()) + list(config.items())) 109 110 111@keras_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D') 112class MaxPooling1D(Pooling1D): 113 """Max pooling operation for 1D temporal data. 114 115 Downsamples the input representation by taking the maximum value over the 116 window defined by `pool_size`. The window is shifted by `strides`. The 117 resulting output when using "valid" padding option has a shape of: 118 `output_shape = (input_shape - pool_size + 1) / strides)` 119 120 The resulting output shape when using the "same" padding option is: 121 `output_shape = input_shape / strides` 122 123 For example, for strides=1 and padding="valid": 124 125 >>> x = tf.constant([1., 2., 3., 4., 5.]) 126 >>> x = tf.reshape(x, [1, 5, 1]) 127 >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, 128 ... strides=1, padding='valid') 129 >>> max_pool_1d(x) 130 <tf.Tensor: shape=(1, 4, 1), dtype=float32, numpy= 131 array([[[2.], 132 [3.], 133 [4.], 134 [5.]]], dtype=float32)> 135 136 For example, for strides=2 and padding="valid": 137 138 >>> x = tf.constant([1., 2., 3., 4., 5.]) 139 >>> x = tf.reshape(x, [1, 5, 1]) 140 >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, 141 ... strides=2, padding='valid') 142 >>> max_pool_1d(x) 143 <tf.Tensor: shape=(1, 2, 1), dtype=float32, numpy= 144 array([[[2.], 145 [4.]]], dtype=float32)> 146 147 For example, for strides=1 and padding="same": 148 149 >>> x = tf.constant([1., 2., 3., 4., 5.]) 150 >>> x = tf.reshape(x, [1, 5, 1]) 151 >>> max_pool_1d = tf.keras.layers.MaxPooling1D(pool_size=2, 152 ... strides=1, padding='same') 153 >>> max_pool_1d(x) 154 <tf.Tensor: shape=(1, 5, 1), dtype=float32, numpy= 155 array([[[2.], 156 [3.], 157 [4.], 158 [5.], 159 [5.]]], dtype=float32)> 160 161 Args: 162 pool_size: Integer, size of the max pooling window. 163 strides: Integer, or None. Specifies how much the pooling window moves 164 for each pooling step. 165 If None, it will default to `pool_size`. 166 padding: One of `"valid"` or `"same"` (case-insensitive). 167 `"valid"` means no padding. `"same"` results in padding evenly to 168 the left/right or up/down of the input such that output has the same 169 height/width dimension as the input. 170 data_format: A string, 171 one of `channels_last` (default) or `channels_first`. 172 The ordering of the dimensions in the inputs. 173 `channels_last` corresponds to inputs with shape 174 `(batch, steps, features)` while `channels_first` 175 corresponds to inputs with shape 176 `(batch, features, steps)`. 177 178 Input shape: 179 - If `data_format='channels_last'`: 180 3D tensor with shape `(batch_size, steps, features)`. 181 - If `data_format='channels_first'`: 182 3D tensor with shape `(batch_size, features, steps)`. 183 184 Output shape: 185 - If `data_format='channels_last'`: 186 3D tensor with shape `(batch_size, downsampled_steps, features)`. 187 - If `data_format='channels_first'`: 188 3D tensor with shape `(batch_size, features, downsampled_steps)`. 189 """ 190 191 def __init__(self, pool_size=2, strides=None, 192 padding='valid', data_format='channels_last', **kwargs): 193 194 super(MaxPooling1D, self).__init__( 195 functools.partial(backend.pool2d, pool_mode='max'), 196 pool_size=pool_size, 197 strides=strides, 198 padding=padding, 199 data_format=data_format, 200 **kwargs) 201 202 203@keras_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D') 204class AveragePooling1D(Pooling1D): 205 """Average pooling for temporal data. 206 207 Args: 208 pool_size: Integer, size of the average pooling windows. 209 strides: Integer, or None. Factor by which to downscale. 210 E.g. 2 will halve the input. 211 If None, it will default to `pool_size`. 212 padding: One of `"valid"` or `"same"` (case-insensitive). 213 `"valid"` means no padding. `"same"` results in padding evenly to 214 the left/right or up/down of the input such that output has the same 215 height/width dimension as the input. 216 data_format: A string, 217 one of `channels_last` (default) or `channels_first`. 218 The ordering of the dimensions in the inputs. 219 `channels_last` corresponds to inputs with shape 220 `(batch, steps, features)` while `channels_first` 221 corresponds to inputs with shape 222 `(batch, features, steps)`. 223 224 Input shape: 225 - If `data_format='channels_last'`: 226 3D tensor with shape `(batch_size, steps, features)`. 227 - If `data_format='channels_first'`: 228 3D tensor with shape `(batch_size, features, steps)`. 229 230 Output shape: 231 - If `data_format='channels_last'`: 232 3D tensor with shape `(batch_size, downsampled_steps, features)`. 233 - If `data_format='channels_first'`: 234 3D tensor with shape `(batch_size, features, downsampled_steps)`. 235 """ 236 237 def __init__(self, pool_size=2, strides=None, 238 padding='valid', data_format='channels_last', **kwargs): 239 super(AveragePooling1D, self).__init__( 240 functools.partial(backend.pool2d, pool_mode='avg'), 241 pool_size=pool_size, 242 strides=strides, 243 padding=padding, 244 data_format=data_format, 245 **kwargs) 246 247 248class Pooling2D(Layer): 249 """Pooling layer for arbitrary pooling functions, for 2D inputs (e.g. images). 250 251 This class only exists for code reuse. It will never be an exposed API. 252 253 Args: 254 pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. 255 pool_size: An integer or tuple/list of 2 integers: (pool_height, pool_width) 256 specifying the size of the pooling window. 257 Can be a single integer to specify the same value for 258 all spatial dimensions. 259 strides: An integer or tuple/list of 2 integers, 260 specifying the strides of the pooling operation. 261 Can be a single integer to specify the same value for 262 all spatial dimensions. 263 padding: A string. The padding method, either 'valid' or 'same'. 264 Case-insensitive. 265 data_format: A string, one of `channels_last` (default) or `channels_first`. 266 The ordering of the dimensions in the inputs. 267 `channels_last` corresponds to inputs with shape 268 `(batch, height, width, channels)` while `channels_first` corresponds to 269 inputs with shape `(batch, channels, height, width)`. 270 name: A string, the name of the layer. 271 """ 272 273 def __init__(self, pool_function, pool_size, strides, 274 padding='valid', data_format=None, 275 name=None, **kwargs): 276 super(Pooling2D, self).__init__(name=name, **kwargs) 277 if data_format is None: 278 data_format = backend.image_data_format() 279 if strides is None: 280 strides = pool_size 281 self.pool_function = pool_function 282 self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size') 283 self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') 284 self.padding = conv_utils.normalize_padding(padding) 285 self.data_format = conv_utils.normalize_data_format(data_format) 286 self.input_spec = InputSpec(ndim=4) 287 288 def call(self, inputs): 289 if self.data_format == 'channels_last': 290 pool_shape = (1,) + self.pool_size + (1,) 291 strides = (1,) + self.strides + (1,) 292 else: 293 pool_shape = (1, 1) + self.pool_size 294 strides = (1, 1) + self.strides 295 outputs = self.pool_function( 296 inputs, 297 ksize=pool_shape, 298 strides=strides, 299 padding=self.padding.upper(), 300 data_format=conv_utils.convert_data_format(self.data_format, 4)) 301 return outputs 302 303 def compute_output_shape(self, input_shape): 304 input_shape = tensor_shape.TensorShape(input_shape).as_list() 305 if self.data_format == 'channels_first': 306 rows = input_shape[2] 307 cols = input_shape[3] 308 else: 309 rows = input_shape[1] 310 cols = input_shape[2] 311 rows = conv_utils.conv_output_length(rows, self.pool_size[0], self.padding, 312 self.strides[0]) 313 cols = conv_utils.conv_output_length(cols, self.pool_size[1], self.padding, 314 self.strides[1]) 315 if self.data_format == 'channels_first': 316 return tensor_shape.TensorShape( 317 [input_shape[0], input_shape[1], rows, cols]) 318 else: 319 return tensor_shape.TensorShape( 320 [input_shape[0], rows, cols, input_shape[3]]) 321 322 def get_config(self): 323 config = { 324 'pool_size': self.pool_size, 325 'padding': self.padding, 326 'strides': self.strides, 327 'data_format': self.data_format 328 } 329 base_config = super(Pooling2D, self).get_config() 330 return dict(list(base_config.items()) + list(config.items())) 331 332 333@keras_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D') 334class MaxPooling2D(Pooling2D): 335 """Max pooling operation for 2D spatial data. 336 337 Downsamples the input representation by taking the maximum value over the 338 window defined by `pool_size` for each dimension along the features axis. 339 The window is shifted by `strides` in each dimension. The resulting output 340 when using "valid" padding option has a shape(number of rows or columns) of: 341 `output_shape = math.floor((input_shape - pool_size) / strides) + 1` 342 (when input_shape >= pool_size) 343 344 The resulting output shape when using the "same" padding option is: 345 `output_shape = math.floor((input_shape - 1) / strides) + 1` 346 347 For example, for stride=(1,1) and padding="valid": 348 349 >>> x = tf.constant([[1., 2., 3.], 350 ... [4., 5., 6.], 351 ... [7., 8., 9.]]) 352 >>> x = tf.reshape(x, [1, 3, 3, 1]) 353 >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 354 ... strides=(1, 1), padding='valid') 355 >>> max_pool_2d(x) 356 <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= 357 array([[[[5.], 358 [6.]], 359 [[8.], 360 [9.]]]], dtype=float32)> 361 362 For example, for stride=(2,2) and padding="valid": 363 364 >>> x = tf.constant([[1., 2., 3., 4.], 365 ... [5., 6., 7., 8.], 366 ... [9., 10., 11., 12.]]) 367 >>> x = tf.reshape(x, [1, 3, 4, 1]) 368 >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 369 ... strides=(2, 2), padding='valid') 370 >>> max_pool_2d(x) 371 <tf.Tensor: shape=(1, 1, 2, 1), dtype=float32, numpy= 372 array([[[[6.], 373 [8.]]]], dtype=float32)> 374 375 Usage Example: 376 377 >>> input_image = tf.constant([[[[1.], [1.], [2.], [4.]], 378 ... [[2.], [2.], [3.], [2.]], 379 ... [[4.], [1.], [1.], [1.]], 380 ... [[2.], [2.], [1.], [4.]]]]) 381 >>> output = tf.constant([[[[1], [0]], 382 ... [[0], [1]]]]) 383 >>> model = tf.keras.models.Sequential() 384 >>> model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 385 ... input_shape=(4,4,1))) 386 >>> model.compile('adam', 'mean_squared_error') 387 >>> model.predict(input_image, steps=1) 388 array([[[[2.], 389 [4.]], 390 [[4.], 391 [4.]]]], dtype=float32) 392 393 For example, for stride=(1,1) and padding="same": 394 395 >>> x = tf.constant([[1., 2., 3.], 396 ... [4., 5., 6.], 397 ... [7., 8., 9.]]) 398 >>> x = tf.reshape(x, [1, 3, 3, 1]) 399 >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2), 400 ... strides=(1, 1), padding='same') 401 >>> max_pool_2d(x) 402 <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= 403 array([[[[5.], 404 [6.], 405 [6.]], 406 [[8.], 407 [9.], 408 [9.]], 409 [[8.], 410 [9.], 411 [9.]]]], dtype=float32)> 412 413 Args: 414 pool_size: integer or tuple of 2 integers, 415 window size over which to take the maximum. 416 `(2, 2)` will take the max value over a 2x2 pooling window. 417 If only one integer is specified, the same window length 418 will be used for both dimensions. 419 strides: Integer, tuple of 2 integers, or None. 420 Strides values. Specifies how far the pooling window moves 421 for each pooling step. If None, it will default to `pool_size`. 422 padding: One of `"valid"` or `"same"` (case-insensitive). 423 `"valid"` means no padding. `"same"` results in padding evenly to 424 the left/right or up/down of the input such that output has the same 425 height/width dimension as the input. 426 data_format: A string, 427 one of `channels_last` (default) or `channels_first`. 428 The ordering of the dimensions in the inputs. 429 `channels_last` corresponds to inputs with shape 430 `(batch, height, width, channels)` while `channels_first` 431 corresponds to inputs with shape 432 `(batch, channels, height, width)`. 433 It defaults to the `image_data_format` value found in your 434 Keras config file at `~/.keras/keras.json`. 435 If you never set it, then it will be "channels_last". 436 437 Input shape: 438 - If `data_format='channels_last'`: 439 4D tensor with shape `(batch_size, rows, cols, channels)`. 440 - If `data_format='channels_first'`: 441 4D tensor with shape `(batch_size, channels, rows, cols)`. 442 443 Output shape: 444 - If `data_format='channels_last'`: 445 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. 446 - If `data_format='channels_first'`: 447 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. 448 449 Returns: 450 A tensor of rank 4 representing the maximum pooled values. See above for 451 output shape. 452 """ 453 454 def __init__(self, 455 pool_size=(2, 2), 456 strides=None, 457 padding='valid', 458 data_format=None, 459 **kwargs): 460 super(MaxPooling2D, self).__init__( 461 nn.max_pool, 462 pool_size=pool_size, strides=strides, 463 padding=padding, data_format=data_format, **kwargs) 464 465 466@keras_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D') 467class AveragePooling2D(Pooling2D): 468 """Average pooling operation for spatial data. 469 470 Args: 471 pool_size: integer or tuple of 2 integers, 472 factors by which to downscale (vertical, horizontal). 473 `(2, 2)` will halve the input in both spatial dimension. 474 If only one integer is specified, the same window length 475 will be used for both dimensions. 476 strides: Integer, tuple of 2 integers, or None. 477 Strides values. 478 If None, it will default to `pool_size`. 479 padding: One of `"valid"` or `"same"` (case-insensitive). 480 `"valid"` means no padding. `"same"` results in padding evenly to 481 the left/right or up/down of the input such that output has the same 482 height/width dimension as the input. 483 data_format: A string, 484 one of `channels_last` (default) or `channels_first`. 485 The ordering of the dimensions in the inputs. 486 `channels_last` corresponds to inputs with shape 487 `(batch, height, width, channels)` while `channels_first` 488 corresponds to inputs with shape 489 `(batch, channels, height, width)`. 490 It defaults to the `image_data_format` value found in your 491 Keras config file at `~/.keras/keras.json`. 492 If you never set it, then it will be "channels_last". 493 494 Input shape: 495 - If `data_format='channels_last'`: 496 4D tensor with shape `(batch_size, rows, cols, channels)`. 497 - If `data_format='channels_first'`: 498 4D tensor with shape `(batch_size, channels, rows, cols)`. 499 500 Output shape: 501 - If `data_format='channels_last'`: 502 4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`. 503 - If `data_format='channels_first'`: 504 4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`. 505 """ 506 507 def __init__(self, 508 pool_size=(2, 2), 509 strides=None, 510 padding='valid', 511 data_format=None, 512 **kwargs): 513 super(AveragePooling2D, self).__init__( 514 nn.avg_pool, 515 pool_size=pool_size, strides=strides, 516 padding=padding, data_format=data_format, **kwargs) 517 518 519class Pooling3D(Layer): 520 """Pooling layer for arbitrary pooling functions, for 3D inputs. 521 522 This class only exists for code reuse. It will never be an exposed API. 523 524 Args: 525 pool_function: The pooling function to apply, e.g. `tf.nn.max_pool2d`. 526 pool_size: An integer or tuple/list of 3 integers: 527 (pool_depth, pool_height, pool_width) 528 specifying the size of the pooling window. 529 Can be a single integer to specify the same value for 530 all spatial dimensions. 531 strides: An integer or tuple/list of 3 integers, 532 specifying the strides of the pooling operation. 533 Can be a single integer to specify the same value for 534 all spatial dimensions. 535 padding: A string. The padding method, either 'valid' or 'same'. 536 Case-insensitive. 537 data_format: A string, one of `channels_last` (default) or `channels_first`. 538 The ordering of the dimensions in the inputs. 539 `channels_last` corresponds to inputs with shape 540 `(batch, depth, height, width, channels)` 541 while `channels_first` corresponds to 542 inputs with shape `(batch, channels, depth, height, width)`. 543 name: A string, the name of the layer. 544 """ 545 546 def __init__(self, pool_function, pool_size, strides, 547 padding='valid', data_format='channels_last', 548 name=None, **kwargs): 549 super(Pooling3D, self).__init__(name=name, **kwargs) 550 if data_format is None: 551 data_format = backend.image_data_format() 552 if strides is None: 553 strides = pool_size 554 self.pool_function = pool_function 555 self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size') 556 self.strides = conv_utils.normalize_tuple(strides, 3, 'strides') 557 self.padding = conv_utils.normalize_padding(padding) 558 self.data_format = conv_utils.normalize_data_format(data_format) 559 self.input_spec = InputSpec(ndim=5) 560 561 def call(self, inputs): 562 pool_shape = (1,) + self.pool_size + (1,) 563 strides = (1,) + self.strides + (1,) 564 565 if self.data_format == 'channels_first': 566 # TF does not support `channels_first` with 3D pooling operations, 567 # so we must handle this case manually. 568 # TODO(fchollet): remove this when TF pooling is feature-complete. 569 inputs = array_ops.transpose(inputs, (0, 2, 3, 4, 1)) 570 571 outputs = self.pool_function( 572 inputs, 573 ksize=pool_shape, 574 strides=strides, 575 padding=self.padding.upper()) 576 577 if self.data_format == 'channels_first': 578 outputs = array_ops.transpose(outputs, (0, 4, 1, 2, 3)) 579 return outputs 580 581 def compute_output_shape(self, input_shape): 582 input_shape = tensor_shape.TensorShape(input_shape).as_list() 583 if self.data_format == 'channels_first': 584 len_dim1 = input_shape[2] 585 len_dim2 = input_shape[3] 586 len_dim3 = input_shape[4] 587 else: 588 len_dim1 = input_shape[1] 589 len_dim2 = input_shape[2] 590 len_dim3 = input_shape[3] 591 len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0], 592 self.padding, self.strides[0]) 593 len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1], 594 self.padding, self.strides[1]) 595 len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2], 596 self.padding, self.strides[2]) 597 if self.data_format == 'channels_first': 598 return tensor_shape.TensorShape( 599 [input_shape[0], input_shape[1], len_dim1, len_dim2, len_dim3]) 600 else: 601 return tensor_shape.TensorShape( 602 [input_shape[0], len_dim1, len_dim2, len_dim3, input_shape[4]]) 603 604 def get_config(self): 605 config = { 606 'pool_size': self.pool_size, 607 'padding': self.padding, 608 'strides': self.strides, 609 'data_format': self.data_format 610 } 611 base_config = super(Pooling3D, self).get_config() 612 return dict(list(base_config.items()) + list(config.items())) 613 614 615@keras_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D') 616class MaxPooling3D(Pooling3D): 617 """Max pooling operation for 3D data (spatial or spatio-temporal). 618 619 Args: 620 pool_size: Tuple of 3 integers, 621 factors by which to downscale (dim1, dim2, dim3). 622 `(2, 2, 2)` will halve the size of the 3D input in each dimension. 623 strides: tuple of 3 integers, or None. Strides values. 624 padding: One of `"valid"` or `"same"` (case-insensitive). 625 `"valid"` means no padding. `"same"` results in padding evenly to 626 the left/right or up/down of the input such that output has the same 627 height/width dimension as the input. 628 data_format: A string, 629 one of `channels_last` (default) or `channels_first`. 630 The ordering of the dimensions in the inputs. 631 `channels_last` corresponds to inputs with shape 632 `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 633 while `channels_first` corresponds to inputs with shape 634 `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. 635 It defaults to the `image_data_format` value found in your 636 Keras config file at `~/.keras/keras.json`. 637 If you never set it, then it will be "channels_last". 638 639 Input shape: 640 - If `data_format='channels_last'`: 641 5D tensor with shape: 642 `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 643 - If `data_format='channels_first'`: 644 5D tensor with shape: 645 `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` 646 647 Output shape: 648 - If `data_format='channels_last'`: 649 5D tensor with shape: 650 `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` 651 - If `data_format='channels_first'`: 652 5D tensor with shape: 653 `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` 654 """ 655 656 def __init__(self, 657 pool_size=(2, 2, 2), 658 strides=None, 659 padding='valid', 660 data_format=None, 661 **kwargs): 662 super(MaxPooling3D, self).__init__( 663 nn.max_pool3d, 664 pool_size=pool_size, strides=strides, 665 padding=padding, data_format=data_format, **kwargs) 666 667 668@keras_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D') 669class AveragePooling3D(Pooling3D): 670 """Average pooling operation for 3D data (spatial or spatio-temporal). 671 672 Args: 673 pool_size: tuple of 3 integers, 674 factors by which to downscale (dim1, dim2, dim3). 675 `(2, 2, 2)` will halve the size of the 3D input in each dimension. 676 strides: tuple of 3 integers, or None. Strides values. 677 padding: One of `"valid"` or `"same"` (case-insensitive). 678 `"valid"` means no padding. `"same"` results in padding evenly to 679 the left/right or up/down of the input such that output has the same 680 height/width dimension as the input. 681 data_format: A string, 682 one of `channels_last` (default) or `channels_first`. 683 The ordering of the dimensions in the inputs. 684 `channels_last` corresponds to inputs with shape 685 `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 686 while `channels_first` corresponds to inputs with shape 687 `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. 688 It defaults to the `image_data_format` value found in your 689 Keras config file at `~/.keras/keras.json`. 690 If you never set it, then it will be "channels_last". 691 692 Input shape: 693 - If `data_format='channels_last'`: 694 5D tensor with shape: 695 `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 696 - If `data_format='channels_first'`: 697 5D tensor with shape: 698 `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` 699 700 Output shape: 701 - If `data_format='channels_last'`: 702 5D tensor with shape: 703 `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` 704 - If `data_format='channels_first'`: 705 5D tensor with shape: 706 `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` 707 """ 708 709 def __init__(self, 710 pool_size=(2, 2, 2), 711 strides=None, 712 padding='valid', 713 data_format=None, 714 **kwargs): 715 super(AveragePooling3D, self).__init__( 716 nn.avg_pool3d, 717 pool_size=pool_size, strides=strides, 718 padding=padding, data_format=data_format, **kwargs) 719 720 721class GlobalPooling1D(Layer): 722 """Abstract class for different global pooling 1D layers.""" 723 724 def __init__(self, data_format='channels_last', **kwargs): 725 super(GlobalPooling1D, self).__init__(**kwargs) 726 self.input_spec = InputSpec(ndim=3) 727 self.data_format = conv_utils.normalize_data_format(data_format) 728 729 def compute_output_shape(self, input_shape): 730 input_shape = tensor_shape.TensorShape(input_shape).as_list() 731 if self.data_format == 'channels_first': 732 return tensor_shape.TensorShape([input_shape[0], input_shape[1]]) 733 else: 734 return tensor_shape.TensorShape([input_shape[0], input_shape[2]]) 735 736 def call(self, inputs): 737 raise NotImplementedError 738 739 def get_config(self): 740 config = {'data_format': self.data_format} 741 base_config = super(GlobalPooling1D, self).get_config() 742 return dict(list(base_config.items()) + list(config.items())) 743 744 745@keras_export('keras.layers.GlobalAveragePooling1D', 746 'keras.layers.GlobalAvgPool1D') 747class GlobalAveragePooling1D(GlobalPooling1D): 748 """Global average pooling operation for temporal data. 749 750 Examples: 751 752 >>> input_shape = (2, 3, 4) 753 >>> x = tf.random.normal(input_shape) 754 >>> y = tf.keras.layers.GlobalAveragePooling1D()(x) 755 >>> print(y.shape) 756 (2, 4) 757 758 Args: 759 data_format: A string, 760 one of `channels_last` (default) or `channels_first`. 761 The ordering of the dimensions in the inputs. 762 `channels_last` corresponds to inputs with shape 763 `(batch, steps, features)` while `channels_first` 764 corresponds to inputs with shape 765 `(batch, features, steps)`. 766 767 Call arguments: 768 inputs: A 3D tensor. 769 mask: Binary tensor of shape `(batch_size, steps)` indicating whether 770 a given step should be masked (excluded from the average). 771 772 Input shape: 773 - If `data_format='channels_last'`: 774 3D tensor with shape: 775 `(batch_size, steps, features)` 776 - If `data_format='channels_first'`: 777 3D tensor with shape: 778 `(batch_size, features, steps)` 779 780 Output shape: 781 2D tensor with shape `(batch_size, features)`. 782 """ 783 784 def __init__(self, data_format='channels_last', **kwargs): 785 super(GlobalAveragePooling1D, self).__init__(data_format=data_format, 786 **kwargs) 787 self.supports_masking = True 788 789 def call(self, inputs, mask=None): 790 steps_axis = 1 if self.data_format == 'channels_last' else 2 791 if mask is not None: 792 mask = math_ops.cast(mask, backend.floatx()) 793 mask = array_ops.expand_dims( 794 mask, 2 if self.data_format == 'channels_last' else 1) 795 inputs *= mask 796 return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum( 797 mask, axis=steps_axis) 798 else: 799 return backend.mean(inputs, axis=steps_axis) 800 801 def compute_mask(self, inputs, mask=None): 802 return None 803 804 805@keras_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D') 806class GlobalMaxPooling1D(GlobalPooling1D): 807 """Global max pooling operation for 1D temporal data. 808 809 Downsamples the input representation by taking the maximum value over 810 the time dimension. 811 812 For example: 813 814 >>> x = tf.constant([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) 815 >>> x = tf.reshape(x, [3, 3, 1]) 816 >>> x 817 <tf.Tensor: shape=(3, 3, 1), dtype=float32, numpy= 818 array([[[1.], [2.], [3.]], 819 [[4.], [5.], [6.]], 820 [[7.], [8.], [9.]]], dtype=float32)> 821 >>> max_pool_1d = tf.keras.layers.GlobalMaxPooling1D() 822 >>> max_pool_1d(x) 823 <tf.Tensor: shape=(3, 1), dtype=float32, numpy= 824 array([[3.], 825 [6.], 826 [9.], dtype=float32)> 827 828 Args: 829 data_format: A string, 830 one of `channels_last` (default) or `channels_first`. 831 The ordering of the dimensions in the inputs. 832 `channels_last` corresponds to inputs with shape 833 `(batch, steps, features)` while `channels_first` 834 corresponds to inputs with shape 835 `(batch, features, steps)`. 836 837 Input shape: 838 - If `data_format='channels_last'`: 839 3D tensor with shape: 840 `(batch_size, steps, features)` 841 - If `data_format='channels_first'`: 842 3D tensor with shape: 843 `(batch_size, features, steps)` 844 845 Output shape: 846 2D tensor with shape `(batch_size, features)`. 847 """ 848 849 def call(self, inputs): 850 steps_axis = 1 if self.data_format == 'channels_last' else 2 851 return backend.max(inputs, axis=steps_axis) 852 853 854class GlobalPooling2D(Layer): 855 """Abstract class for different global pooling 2D layers. 856 """ 857 858 def __init__(self, data_format=None, **kwargs): 859 super(GlobalPooling2D, self).__init__(**kwargs) 860 self.data_format = conv_utils.normalize_data_format(data_format) 861 self.input_spec = InputSpec(ndim=4) 862 863 def compute_output_shape(self, input_shape): 864 input_shape = tensor_shape.TensorShape(input_shape).as_list() 865 if self.data_format == 'channels_last': 866 return tensor_shape.TensorShape([input_shape[0], input_shape[3]]) 867 else: 868 return tensor_shape.TensorShape([input_shape[0], input_shape[1]]) 869 870 def call(self, inputs): 871 raise NotImplementedError 872 873 def get_config(self): 874 config = {'data_format': self.data_format} 875 base_config = super(GlobalPooling2D, self).get_config() 876 return dict(list(base_config.items()) + list(config.items())) 877 878 879@keras_export('keras.layers.GlobalAveragePooling2D', 880 'keras.layers.GlobalAvgPool2D') 881class GlobalAveragePooling2D(GlobalPooling2D): 882 """Global average pooling operation for spatial data. 883 884 Examples: 885 886 >>> input_shape = (2, 4, 5, 3) 887 >>> x = tf.random.normal(input_shape) 888 >>> y = tf.keras.layers.GlobalAveragePooling2D()(x) 889 >>> print(y.shape) 890 (2, 3) 891 892 Args: 893 data_format: A string, 894 one of `channels_last` (default) or `channels_first`. 895 The ordering of the dimensions in the inputs. 896 `channels_last` corresponds to inputs with shape 897 `(batch, height, width, channels)` while `channels_first` 898 corresponds to inputs with shape 899 `(batch, channels, height, width)`. 900 It defaults to the `image_data_format` value found in your 901 Keras config file at `~/.keras/keras.json`. 902 If you never set it, then it will be "channels_last". 903 904 Input shape: 905 - If `data_format='channels_last'`: 906 4D tensor with shape `(batch_size, rows, cols, channels)`. 907 - If `data_format='channels_first'`: 908 4D tensor with shape `(batch_size, channels, rows, cols)`. 909 910 Output shape: 911 2D tensor with shape `(batch_size, channels)`. 912 """ 913 914 def call(self, inputs): 915 if self.data_format == 'channels_last': 916 return backend.mean(inputs, axis=[1, 2]) 917 else: 918 return backend.mean(inputs, axis=[2, 3]) 919 920 921@keras_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D') 922class GlobalMaxPooling2D(GlobalPooling2D): 923 """Global max pooling operation for spatial data. 924 925 Examples: 926 927 >>> input_shape = (2, 4, 5, 3) 928 >>> x = tf.random.normal(input_shape) 929 >>> y = tf.keras.layers.GlobalMaxPool2D()(x) 930 >>> print(y.shape) 931 (2, 3) 932 933 Args: 934 data_format: A string, 935 one of `channels_last` (default) or `channels_first`. 936 The ordering of the dimensions in the inputs. 937 `channels_last` corresponds to inputs with shape 938 `(batch, height, width, channels)` while `channels_first` 939 corresponds to inputs with shape 940 `(batch, channels, height, width)`. 941 It defaults to the `image_data_format` value found in your 942 Keras config file at `~/.keras/keras.json`. 943 If you never set it, then it will be "channels_last". 944 945 Input shape: 946 - If `data_format='channels_last'`: 947 4D tensor with shape `(batch_size, rows, cols, channels)`. 948 - If `data_format='channels_first'`: 949 4D tensor with shape `(batch_size, channels, rows, cols)`. 950 951 Output shape: 952 2D tensor with shape `(batch_size, channels)`. 953 """ 954 955 def call(self, inputs): 956 if self.data_format == 'channels_last': 957 return backend.max(inputs, axis=[1, 2]) 958 else: 959 return backend.max(inputs, axis=[2, 3]) 960 961 962class GlobalPooling3D(Layer): 963 """Abstract class for different global pooling 3D layers.""" 964 965 def __init__(self, data_format=None, **kwargs): 966 super(GlobalPooling3D, self).__init__(**kwargs) 967 self.data_format = conv_utils.normalize_data_format(data_format) 968 self.input_spec = InputSpec(ndim=5) 969 970 def compute_output_shape(self, input_shape): 971 input_shape = tensor_shape.TensorShape(input_shape).as_list() 972 if self.data_format == 'channels_last': 973 return tensor_shape.TensorShape([input_shape[0], input_shape[4]]) 974 else: 975 return tensor_shape.TensorShape([input_shape[0], input_shape[1]]) 976 977 def call(self, inputs): 978 raise NotImplementedError 979 980 def get_config(self): 981 config = {'data_format': self.data_format} 982 base_config = super(GlobalPooling3D, self).get_config() 983 return dict(list(base_config.items()) + list(config.items())) 984 985 986@keras_export('keras.layers.GlobalAveragePooling3D', 987 'keras.layers.GlobalAvgPool3D') 988class GlobalAveragePooling3D(GlobalPooling3D): 989 """Global Average pooling operation for 3D data. 990 991 Args: 992 data_format: A string, 993 one of `channels_last` (default) or `channels_first`. 994 The ordering of the dimensions in the inputs. 995 `channels_last` corresponds to inputs with shape 996 `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 997 while `channels_first` corresponds to inputs with shape 998 `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. 999 It defaults to the `image_data_format` value found in your 1000 Keras config file at `~/.keras/keras.json`. 1001 If you never set it, then it will be "channels_last". 1002 1003 Input shape: 1004 - If `data_format='channels_last'`: 1005 5D tensor with shape: 1006 `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 1007 - If `data_format='channels_first'`: 1008 5D tensor with shape: 1009 `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` 1010 1011 Output shape: 1012 2D tensor with shape `(batch_size, channels)`. 1013 """ 1014 1015 def call(self, inputs): 1016 if self.data_format == 'channels_last': 1017 return backend.mean(inputs, axis=[1, 2, 3]) 1018 else: 1019 return backend.mean(inputs, axis=[2, 3, 4]) 1020 1021 1022@keras_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D') 1023class GlobalMaxPooling3D(GlobalPooling3D): 1024 """Global Max pooling operation for 3D data. 1025 1026 Args: 1027 data_format: A string, 1028 one of `channels_last` (default) or `channels_first`. 1029 The ordering of the dimensions in the inputs. 1030 `channels_last` corresponds to inputs with shape 1031 `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 1032 while `channels_first` corresponds to inputs with shape 1033 `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. 1034 It defaults to the `image_data_format` value found in your 1035 Keras config file at `~/.keras/keras.json`. 1036 If you never set it, then it will be "channels_last". 1037 1038 Input shape: 1039 - If `data_format='channels_last'`: 1040 5D tensor with shape: 1041 `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` 1042 - If `data_format='channels_first'`: 1043 5D tensor with shape: 1044 `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` 1045 1046 Output shape: 1047 2D tensor with shape `(batch_size, channels)`. 1048 """ 1049 1050 def call(self, inputs): 1051 if self.data_format == 'channels_last': 1052 return backend.max(inputs, axis=[1, 2, 3]) 1053 else: 1054 return backend.max(inputs, axis=[2, 3, 4]) 1055 1056 1057# Aliases 1058 1059AvgPool1D = AveragePooling1D 1060MaxPool1D = MaxPooling1D 1061AvgPool2D = AveragePooling2D 1062MaxPool2D = MaxPooling2D 1063AvgPool3D = AveragePooling3D 1064MaxPool3D = MaxPooling3D 1065GlobalMaxPool1D = GlobalMaxPooling1D 1066GlobalMaxPool2D = GlobalMaxPooling2D 1067GlobalMaxPool3D = GlobalMaxPooling3D 1068GlobalAvgPool1D = GlobalAveragePooling1D 1069GlobalAvgPool2D = GlobalAveragePooling2D 1070GlobalAvgPool3D = GlobalAveragePooling3D 1071