1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15# pylint: disable=invalid-name 16"""MobileNet v1 models for Keras. 17 18MobileNet is a general architecture and can be used for multiple use cases. 19Depending on the use case, it can use different input layer size and 20different width factors. This allows different width models to reduce 21the number of multiply-adds and thereby 22reduce inference cost on mobile devices. 23 24MobileNets support any input size greater than 32 x 32, with larger image sizes 25offering better performance. 26The number of parameters and number of multiply-adds 27can be modified by using the `alpha` parameter, 28which increases/decreases the number of filters in each layer. 29By altering the image size and `alpha` parameter, 30all 16 models from the paper can be built, with ImageNet weights provided. 31 32The paper demonstrates the performance of MobileNets using `alpha` values of 331.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. 34For each of these `alpha` values, weights for 4 different input image sizes 35are provided (224, 192, 160, 128). 36 37The following table describes the size and accuracy of the 100% MobileNet 38on size 224 x 224: 39---------------------------------------------------------------------------- 40Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) 41---------------------------------------------------------------------------- 42| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 43| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | 44| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | 45| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | 46---------------------------------------------------------------------------- 47 48The following table describes the performance of 49the 100 % MobileNet on various input sizes: 50------------------------------------------------------------------------ 51 Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) 52------------------------------------------------------------------------ 53| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 54| 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | 55| 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | 56| 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | 57------------------------------------------------------------------------ 58 59Reference: 60 - [MobileNets: Efficient Convolutional Neural Networks 61 for Mobile Vision Applications]( 62 https://arxiv.org/abs/1704.04861) 63""" 64from __future__ import absolute_import 65from __future__ import division 66from __future__ import print_function 67 68from tensorflow.python.keras import backend 69from tensorflow.python.keras.applications import imagenet_utils 70from tensorflow.python.keras.engine import training 71from tensorflow.python.keras.layers import VersionAwareLayers 72from tensorflow.python.keras.utils import data_utils 73from tensorflow.python.keras.utils import layer_utils 74from tensorflow.python.lib.io import file_io 75from tensorflow.python.platform import tf_logging as logging 76from tensorflow.python.util.tf_export import keras_export 77 78BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' 79 'keras-applications/mobilenet/') 80layers = None 81 82 83@keras_export('keras.applications.mobilenet.MobileNet', 84 'keras.applications.MobileNet') 85def MobileNet(input_shape=None, 86 alpha=1.0, 87 depth_multiplier=1, 88 dropout=1e-3, 89 include_top=True, 90 weights='imagenet', 91 input_tensor=None, 92 pooling=None, 93 classes=1000, 94 classifier_activation='softmax', 95 **kwargs): 96 """Instantiates the MobileNet architecture. 97 98 Reference: 99 - [MobileNets: Efficient Convolutional Neural Networks 100 for Mobile Vision Applications]( 101 https://arxiv.org/abs/1704.04861) 102 103 Optionally loads weights pre-trained on ImageNet. 104 Note that the data format convention used by the model is 105 the one specified in the `tf.keras.backend.image_data_format()`. 106 107 Note: each Keras Application expects a specific kind of input preprocessing. 108 For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input` 109 on your inputs before passing them to the model. 110 111 Args: 112 input_shape: Optional shape tuple, only to be specified if `include_top` 113 is False (otherwise the input shape has to be `(224, 224, 3)` (with 114 `channels_last` data format) or (3, 224, 224) (with `channels_first` 115 data format). It should have exactly 3 inputs channels, and width and 116 height should be no smaller than 32. E.g. `(200, 200, 3)` would be one 117 valid value. Default to `None`. 118 `input_shape` will be ignored if the `input_tensor` is provided. 119 alpha: Controls the width of the network. This is known as the width 120 multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally 121 decreases the number of filters in each layer. - If `alpha` > 1.0, 122 proportionally increases the number of filters in each layer. - If 123 `alpha` = 1, default number of filters from the paper are used at each 124 layer. Default to 1.0. 125 depth_multiplier: Depth multiplier for depthwise convolution. This is 126 called the resolution multiplier in the MobileNet paper. Default to 1.0. 127 dropout: Dropout rate. Default to 0.001. 128 include_top: Boolean, whether to include the fully-connected layer at the 129 top of the network. Default to `True`. 130 weights: One of `None` (random initialization), 'imagenet' (pre-training 131 on ImageNet), or the path to the weights file to be loaded. Default to 132 `imagenet`. 133 input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to 134 use as image input for the model. `input_tensor` is useful for sharing 135 inputs between multiple different networks. Default to None. 136 pooling: Optional pooling mode for feature extraction when `include_top` 137 is `False`. 138 - `None` (default) means that the output of the model will be 139 the 4D tensor output of the last convolutional block. 140 - `avg` means that global average pooling 141 will be applied to the output of the 142 last convolutional block, and thus 143 the output of the model will be a 2D tensor. 144 - `max` means that global max pooling will be applied. 145 classes: Optional number of classes to classify images into, only to be 146 specified if `include_top` is True, and if no `weights` argument is 147 specified. Defaults to 1000. 148 classifier_activation: A `str` or callable. The activation function to use 149 on the "top" layer. Ignored unless `include_top=True`. Set 150 `classifier_activation=None` to return the logits of the "top" layer. 151 **kwargs: For backwards compatibility only. 152 Returns: 153 A `keras.Model` instance. 154 155 Raises: 156 ValueError: in case of invalid argument for `weights`, 157 or invalid input shape. 158 ValueError: if `classifier_activation` is not `softmax` or `None` when 159 using a pretrained top layer. 160 """ 161 global layers 162 if 'layers' in kwargs: 163 layers = kwargs.pop('layers') 164 else: 165 layers = VersionAwareLayers() 166 if kwargs: 167 raise ValueError('Unknown argument(s): %s' % (kwargs,)) 168 if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): 169 raise ValueError('The `weights` argument should be either ' 170 '`None` (random initialization), `imagenet` ' 171 '(pre-training on ImageNet), ' 172 'or the path to the weights file to be loaded.') 173 174 if weights == 'imagenet' and include_top and classes != 1000: 175 raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 176 'as true, `classes` should be 1000') 177 178 # Determine proper input shape and default size. 179 if input_shape is None: 180 default_size = 224 181 else: 182 if backend.image_data_format() == 'channels_first': 183 rows = input_shape[1] 184 cols = input_shape[2] 185 else: 186 rows = input_shape[0] 187 cols = input_shape[1] 188 189 if rows == cols and rows in [128, 160, 192, 224]: 190 default_size = rows 191 else: 192 default_size = 224 193 194 input_shape = imagenet_utils.obtain_input_shape( 195 input_shape, 196 default_size=default_size, 197 min_size=32, 198 data_format=backend.image_data_format(), 199 require_flatten=include_top, 200 weights=weights) 201 202 if backend.image_data_format() == 'channels_last': 203 row_axis, col_axis = (0, 1) 204 else: 205 row_axis, col_axis = (1, 2) 206 rows = input_shape[row_axis] 207 cols = input_shape[col_axis] 208 209 if weights == 'imagenet': 210 if depth_multiplier != 1: 211 raise ValueError('If imagenet weights are being loaded, ' 212 'depth multiplier must be 1') 213 214 if alpha not in [0.25, 0.50, 0.75, 1.0]: 215 raise ValueError('If imagenet weights are being loaded, ' 216 'alpha can be one of' 217 '`0.25`, `0.50`, `0.75` or `1.0` only.') 218 219 if rows != cols or rows not in [128, 160, 192, 224]: 220 rows = 224 221 logging.warning('`input_shape` is undefined or non-square, ' 222 'or `rows` is not in [128, 160, 192, 224]. ' 223 'Weights for input shape (224, 224) will be' 224 ' loaded as the default.') 225 226 if input_tensor is None: 227 img_input = layers.Input(shape=input_shape) 228 else: 229 if not backend.is_keras_tensor(input_tensor): 230 img_input = layers.Input(tensor=input_tensor, shape=input_shape) 231 else: 232 img_input = input_tensor 233 234 x = _conv_block(img_input, 32, alpha, strides=(2, 2)) 235 x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) 236 237 x = _depthwise_conv_block( 238 x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) 239 x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) 240 241 x = _depthwise_conv_block( 242 x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) 243 x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) 244 245 x = _depthwise_conv_block( 246 x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) 247 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) 248 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) 249 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) 250 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) 251 x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) 252 253 x = _depthwise_conv_block( 254 x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) 255 x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) 256 257 if include_top: 258 if backend.image_data_format() == 'channels_first': 259 shape = (int(1024 * alpha), 1, 1) 260 else: 261 shape = (1, 1, int(1024 * alpha)) 262 263 x = layers.GlobalAveragePooling2D()(x) 264 x = layers.Reshape(shape, name='reshape_1')(x) 265 x = layers.Dropout(dropout, name='dropout')(x) 266 x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) 267 x = layers.Reshape((classes,), name='reshape_2')(x) 268 imagenet_utils.validate_activation(classifier_activation, weights) 269 x = layers.Activation(activation=classifier_activation, 270 name='predictions')(x) 271 else: 272 if pooling == 'avg': 273 x = layers.GlobalAveragePooling2D()(x) 274 elif pooling == 'max': 275 x = layers.GlobalMaxPooling2D()(x) 276 277 # Ensure that the model takes into account 278 # any potential predecessors of `input_tensor`. 279 if input_tensor is not None: 280 inputs = layer_utils.get_source_inputs(input_tensor) 281 else: 282 inputs = img_input 283 284 # Create model. 285 model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) 286 287 # Load weights. 288 if weights == 'imagenet': 289 if alpha == 1.0: 290 alpha_text = '1_0' 291 elif alpha == 0.75: 292 alpha_text = '7_5' 293 elif alpha == 0.50: 294 alpha_text = '5_0' 295 else: 296 alpha_text = '2_5' 297 298 if include_top: 299 model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) 300 weight_path = BASE_WEIGHT_PATH + model_name 301 weights_path = data_utils.get_file( 302 model_name, weight_path, cache_subdir='models') 303 else: 304 model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) 305 weight_path = BASE_WEIGHT_PATH + model_name 306 weights_path = data_utils.get_file( 307 model_name, weight_path, cache_subdir='models') 308 model.load_weights(weights_path) 309 elif weights is not None: 310 model.load_weights(weights) 311 312 return model 313 314 315def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): 316 """Adds an initial convolution layer (with batch normalization and relu6). 317 318 Args: 319 inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` 320 data format) or (3, rows, cols) (with `channels_first` data format). 321 It should have exactly 3 inputs channels, and width and height should 322 be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. 323 filters: Integer, the dimensionality of the output space (i.e. the 324 number of output filters in the convolution). 325 alpha: controls the width of the network. - If `alpha` < 1.0, 326 proportionally decreases the number of filters in each layer. - If 327 `alpha` > 1.0, proportionally increases the number of filters in each 328 layer. - If `alpha` = 1, default number of filters from the paper are 329 used at each layer. 330 kernel: An integer or tuple/list of 2 integers, specifying the width and 331 height of the 2D convolution window. Can be a single integer to 332 specify the same value for all spatial dimensions. 333 strides: An integer or tuple/list of 2 integers, specifying the strides 334 of the convolution along the width and height. Can be a single integer 335 to specify the same value for all spatial dimensions. Specifying any 336 stride value != 1 is incompatible with specifying any `dilation_rate` 337 value != 1. # Input shape 338 4D tensor with shape: `(samples, channels, rows, cols)` if 339 data_format='channels_first' 340 or 4D tensor with shape: `(samples, rows, cols, channels)` if 341 data_format='channels_last'. # Output shape 342 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if 343 data_format='channels_first' 344 or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if 345 data_format='channels_last'. `rows` and `cols` values might have 346 changed due to stride. 347 348 Returns: 349 Output tensor of block. 350 """ 351 channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 352 filters = int(filters * alpha) 353 x = layers.Conv2D( 354 filters, 355 kernel, 356 padding='same', 357 use_bias=False, 358 strides=strides, 359 name='conv1')(inputs) 360 x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) 361 return layers.ReLU(6., name='conv1_relu')(x) 362 363 364def _depthwise_conv_block(inputs, 365 pointwise_conv_filters, 366 alpha, 367 depth_multiplier=1, 368 strides=(1, 1), 369 block_id=1): 370 """Adds a depthwise convolution block. 371 372 A depthwise convolution block consists of a depthwise conv, 373 batch normalization, relu6, pointwise convolution, 374 batch normalization and relu6 activation. 375 376 Args: 377 inputs: Input tensor of shape `(rows, cols, channels)` (with 378 `channels_last` data format) or (channels, rows, cols) (with 379 `channels_first` data format). 380 pointwise_conv_filters: Integer, the dimensionality of the output space 381 (i.e. the number of output filters in the pointwise convolution). 382 alpha: controls the width of the network. - If `alpha` < 1.0, 383 proportionally decreases the number of filters in each layer. - If 384 `alpha` > 1.0, proportionally increases the number of filters in each 385 layer. - If `alpha` = 1, default number of filters from the paper are 386 used at each layer. 387 depth_multiplier: The number of depthwise convolution output channels 388 for each input channel. The total number of depthwise convolution 389 output channels will be equal to `filters_in * depth_multiplier`. 390 strides: An integer or tuple/list of 2 integers, specifying the strides 391 of the convolution along the width and height. Can be a single integer 392 to specify the same value for all spatial dimensions. Specifying any 393 stride value != 1 is incompatible with specifying any `dilation_rate` 394 value != 1. 395 block_id: Integer, a unique identification designating the block number. 396 # Input shape 397 4D tensor with shape: `(batch, channels, rows, cols)` if 398 data_format='channels_first' 399 or 4D tensor with shape: `(batch, rows, cols, channels)` if 400 data_format='channels_last'. # Output shape 401 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if 402 data_format='channels_first' 403 or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if 404 data_format='channels_last'. `rows` and `cols` values might have 405 changed due to stride. 406 407 Returns: 408 Output tensor of block. 409 """ 410 channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 411 pointwise_conv_filters = int(pointwise_conv_filters * alpha) 412 413 if strides == (1, 1): 414 x = inputs 415 else: 416 x = layers.ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)( 417 inputs) 418 x = layers.DepthwiseConv2D((3, 3), 419 padding='same' if strides == (1, 1) else 'valid', 420 depth_multiplier=depth_multiplier, 421 strides=strides, 422 use_bias=False, 423 name='conv_dw_%d' % block_id)( 424 x) 425 x = layers.BatchNormalization( 426 axis=channel_axis, name='conv_dw_%d_bn' % block_id)( 427 x) 428 x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) 429 430 x = layers.Conv2D( 431 pointwise_conv_filters, (1, 1), 432 padding='same', 433 use_bias=False, 434 strides=(1, 1), 435 name='conv_pw_%d' % block_id)( 436 x) 437 x = layers.BatchNormalization( 438 axis=channel_axis, name='conv_pw_%d_bn' % block_id)( 439 x) 440 return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) 441 442 443@keras_export('keras.applications.mobilenet.preprocess_input') 444def preprocess_input(x, data_format=None): 445 return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') 446 447 448@keras_export('keras.applications.mobilenet.decode_predictions') 449def decode_predictions(preds, top=5): 450 return imagenet_utils.decode_predictions(preds, top=top) 451 452 453preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( 454 mode='', 455 ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, 456 error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) 457decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ 458