1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=invalid-name
16"""MobileNet v1 models for Keras.
17
18MobileNet is a general architecture and can be used for multiple use cases.
19Depending on the use case, it can use different input layer size and
20different width factors. This allows different width models to reduce
21the number of multiply-adds and thereby
22reduce inference cost on mobile devices.
23
24MobileNets support any input size greater than 32 x 32, with larger image sizes
25offering better performance.
26The number of parameters and number of multiply-adds
27can be modified by using the `alpha` parameter,
28which increases/decreases the number of filters in each layer.
29By altering the image size and `alpha` parameter,
30all 16 models from the paper can be built, with ImageNet weights provided.
31
32The paper demonstrates the performance of MobileNets using `alpha` values of
331.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.
34For each of these `alpha` values, weights for 4 different input image sizes
35are provided (224, 192, 160, 128).
36
37The following table describes the size and accuracy of the 100% MobileNet
38on size 224 x 224:
39----------------------------------------------------------------------------
40Width Multiplier (alpha) | ImageNet Acc |  Multiply-Adds (M) |  Params (M)
41----------------------------------------------------------------------------
42|   1.0 MobileNet-224    |    70.6 %     |        529        |     4.2     |
43|   0.75 MobileNet-224   |    68.4 %     |        325        |     2.6     |
44|   0.50 MobileNet-224   |    63.7 %     |        149        |     1.3     |
45|   0.25 MobileNet-224   |    50.6 %     |        41         |     0.5     |
46----------------------------------------------------------------------------
47
48The following table describes the performance of
49the 100 % MobileNet on various input sizes:
50------------------------------------------------------------------------
51      Resolution      | ImageNet Acc | Multiply-Adds (M) | Params (M)
52------------------------------------------------------------------------
53|  1.0 MobileNet-224  |    70.6 %    |        529        |     4.2     |
54|  1.0 MobileNet-192  |    69.1 %    |        529        |     4.2     |
55|  1.0 MobileNet-160  |    67.2 %    |        529        |     4.2     |
56|  1.0 MobileNet-128  |    64.4 %    |        529        |     4.2     |
57------------------------------------------------------------------------
58
59Reference:
60  - [MobileNets: Efficient Convolutional Neural Networks
61     for Mobile Vision Applications](
62      https://arxiv.org/abs/1704.04861)
63"""
64from __future__ import absolute_import
65from __future__ import division
66from __future__ import print_function
67
68from tensorflow.python.keras import backend
69from tensorflow.python.keras.applications import imagenet_utils
70from tensorflow.python.keras.engine import training
71from tensorflow.python.keras.layers import VersionAwareLayers
72from tensorflow.python.keras.utils import data_utils
73from tensorflow.python.keras.utils import layer_utils
74from tensorflow.python.lib.io import file_io
75from tensorflow.python.platform import tf_logging as logging
76from tensorflow.python.util.tf_export import keras_export
77
78BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/'
79                    'keras-applications/mobilenet/')
80layers = None
81
82
83@keras_export('keras.applications.mobilenet.MobileNet',
84              'keras.applications.MobileNet')
85def MobileNet(input_shape=None,
86              alpha=1.0,
87              depth_multiplier=1,
88              dropout=1e-3,
89              include_top=True,
90              weights='imagenet',
91              input_tensor=None,
92              pooling=None,
93              classes=1000,
94              classifier_activation='softmax',
95              **kwargs):
96  """Instantiates the MobileNet architecture.
97
98  Reference:
99  - [MobileNets: Efficient Convolutional Neural Networks
100     for Mobile Vision Applications](
101      https://arxiv.org/abs/1704.04861)
102
103  Optionally loads weights pre-trained on ImageNet.
104  Note that the data format convention used by the model is
105  the one specified in the `tf.keras.backend.image_data_format()`.
106
107  Note: each Keras Application expects a specific kind of input preprocessing.
108  For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input`
109  on your inputs before passing them to the model.
110
111  Args:
112    input_shape: Optional shape tuple, only to be specified if `include_top`
113      is False (otherwise the input shape has to be `(224, 224, 3)` (with
114      `channels_last` data format) or (3, 224, 224) (with `channels_first`
115      data format). It should have exactly 3 inputs channels, and width and
116      height should be no smaller than 32. E.g. `(200, 200, 3)` would be one
117      valid value. Default to `None`.
118      `input_shape` will be ignored if the `input_tensor` is provided.
119    alpha: Controls the width of the network. This is known as the width
120      multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally
121      decreases the number of filters in each layer. - If `alpha` > 1.0,
122      proportionally increases the number of filters in each layer. - If
123      `alpha` = 1, default number of filters from the paper are used at each
124      layer. Default to 1.0.
125    depth_multiplier: Depth multiplier for depthwise convolution. This is
126      called the resolution multiplier in the MobileNet paper. Default to 1.0.
127    dropout: Dropout rate. Default to 0.001.
128    include_top: Boolean, whether to include the fully-connected layer at the
129      top of the network. Default to `True`.
130    weights: One of `None` (random initialization), 'imagenet' (pre-training
131      on ImageNet), or the path to the weights file to be loaded. Default to
132      `imagenet`.
133    input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to
134      use as image input for the model. `input_tensor` is useful for sharing
135      inputs between multiple different networks. Default to None.
136    pooling: Optional pooling mode for feature extraction when `include_top`
137      is `False`.
138      - `None` (default) means that the output of the model will be
139          the 4D tensor output of the last convolutional block.
140      - `avg` means that global average pooling
141          will be applied to the output of the
142          last convolutional block, and thus
143          the output of the model will be a 2D tensor.
144      - `max` means that global max pooling will be applied.
145    classes: Optional number of classes to classify images into, only to be
146      specified if `include_top` is True, and if no `weights` argument is
147      specified. Defaults to 1000.
148    classifier_activation: A `str` or callable. The activation function to use
149      on the "top" layer. Ignored unless `include_top=True`. Set
150      `classifier_activation=None` to return the logits of the "top" layer.
151    **kwargs: For backwards compatibility only.
152  Returns:
153    A `keras.Model` instance.
154
155  Raises:
156    ValueError: in case of invalid argument for `weights`,
157      or invalid input shape.
158    ValueError: if `classifier_activation` is not `softmax` or `None` when
159      using a pretrained top layer.
160  """
161  global layers
162  if 'layers' in kwargs:
163    layers = kwargs.pop('layers')
164  else:
165    layers = VersionAwareLayers()
166  if kwargs:
167    raise ValueError('Unknown argument(s): %s' % (kwargs,))
168  if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)):
169    raise ValueError('The `weights` argument should be either '
170                     '`None` (random initialization), `imagenet` '
171                     '(pre-training on ImageNet), '
172                     'or the path to the weights file to be loaded.')
173
174  if weights == 'imagenet' and include_top and classes != 1000:
175    raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
176                     'as true, `classes` should be 1000')
177
178  # Determine proper input shape and default size.
179  if input_shape is None:
180    default_size = 224
181  else:
182    if backend.image_data_format() == 'channels_first':
183      rows = input_shape[1]
184      cols = input_shape[2]
185    else:
186      rows = input_shape[0]
187      cols = input_shape[1]
188
189    if rows == cols and rows in [128, 160, 192, 224]:
190      default_size = rows
191    else:
192      default_size = 224
193
194  input_shape = imagenet_utils.obtain_input_shape(
195      input_shape,
196      default_size=default_size,
197      min_size=32,
198      data_format=backend.image_data_format(),
199      require_flatten=include_top,
200      weights=weights)
201
202  if backend.image_data_format() == 'channels_last':
203    row_axis, col_axis = (0, 1)
204  else:
205    row_axis, col_axis = (1, 2)
206  rows = input_shape[row_axis]
207  cols = input_shape[col_axis]
208
209  if weights == 'imagenet':
210    if depth_multiplier != 1:
211      raise ValueError('If imagenet weights are being loaded, '
212                       'depth multiplier must be 1')
213
214    if alpha not in [0.25, 0.50, 0.75, 1.0]:
215      raise ValueError('If imagenet weights are being loaded, '
216                       'alpha can be one of'
217                       '`0.25`, `0.50`, `0.75` or `1.0` only.')
218
219    if rows != cols or rows not in [128, 160, 192, 224]:
220      rows = 224
221      logging.warning('`input_shape` is undefined or non-square, '
222                      'or `rows` is not in [128, 160, 192, 224]. '
223                      'Weights for input shape (224, 224) will be'
224                      ' loaded as the default.')
225
226  if input_tensor is None:
227    img_input = layers.Input(shape=input_shape)
228  else:
229    if not backend.is_keras_tensor(input_tensor):
230      img_input = layers.Input(tensor=input_tensor, shape=input_shape)
231    else:
232      img_input = input_tensor
233
234  x = _conv_block(img_input, 32, alpha, strides=(2, 2))
235  x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
236
237  x = _depthwise_conv_block(
238      x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2)
239  x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
240
241  x = _depthwise_conv_block(
242      x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4)
243  x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
244
245  x = _depthwise_conv_block(
246      x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6)
247  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
248  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
249  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
250  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
251  x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
252
253  x = _depthwise_conv_block(
254      x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12)
255  x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
256
257  if include_top:
258    if backend.image_data_format() == 'channels_first':
259      shape = (int(1024 * alpha), 1, 1)
260    else:
261      shape = (1, 1, int(1024 * alpha))
262
263    x = layers.GlobalAveragePooling2D()(x)
264    x = layers.Reshape(shape, name='reshape_1')(x)
265    x = layers.Dropout(dropout, name='dropout')(x)
266    x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x)
267    x = layers.Reshape((classes,), name='reshape_2')(x)
268    imagenet_utils.validate_activation(classifier_activation, weights)
269    x = layers.Activation(activation=classifier_activation,
270                          name='predictions')(x)
271  else:
272    if pooling == 'avg':
273      x = layers.GlobalAveragePooling2D()(x)
274    elif pooling == 'max':
275      x = layers.GlobalMaxPooling2D()(x)
276
277  # Ensure that the model takes into account
278  # any potential predecessors of `input_tensor`.
279  if input_tensor is not None:
280    inputs = layer_utils.get_source_inputs(input_tensor)
281  else:
282    inputs = img_input
283
284  # Create model.
285  model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))
286
287  # Load weights.
288  if weights == 'imagenet':
289    if alpha == 1.0:
290      alpha_text = '1_0'
291    elif alpha == 0.75:
292      alpha_text = '7_5'
293    elif alpha == 0.50:
294      alpha_text = '5_0'
295    else:
296      alpha_text = '2_5'
297
298    if include_top:
299      model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
300      weight_path = BASE_WEIGHT_PATH + model_name
301      weights_path = data_utils.get_file(
302          model_name, weight_path, cache_subdir='models')
303    else:
304      model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
305      weight_path = BASE_WEIGHT_PATH + model_name
306      weights_path = data_utils.get_file(
307          model_name, weight_path, cache_subdir='models')
308    model.load_weights(weights_path)
309  elif weights is not None:
310    model.load_weights(weights)
311
312  return model
313
314
315def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
316  """Adds an initial convolution layer (with batch normalization and relu6).
317
318  Args:
319    inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last`
320      data format) or (3, rows, cols) (with `channels_first` data format).
321      It should have exactly 3 inputs channels, and width and height should
322      be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value.
323    filters: Integer, the dimensionality of the output space (i.e. the
324      number of output filters in the convolution).
325    alpha: controls the width of the network. - If `alpha` < 1.0,
326      proportionally decreases the number of filters in each layer. - If
327      `alpha` > 1.0, proportionally increases the number of filters in each
328      layer. - If `alpha` = 1, default number of filters from the paper are
329      used at each layer.
330    kernel: An integer or tuple/list of 2 integers, specifying the width and
331      height of the 2D convolution window. Can be a single integer to
332      specify the same value for all spatial dimensions.
333    strides: An integer or tuple/list of 2 integers, specifying the strides
334      of the convolution along the width and height. Can be a single integer
335      to specify the same value for all spatial dimensions. Specifying any
336      stride value != 1 is incompatible with specifying any `dilation_rate`
337      value != 1. # Input shape
338    4D tensor with shape: `(samples, channels, rows, cols)` if
339      data_format='channels_first'
340    or 4D tensor with shape: `(samples, rows, cols, channels)` if
341      data_format='channels_last'. # Output shape
342    4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
343      data_format='channels_first'
344    or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if
345      data_format='channels_last'. `rows` and `cols` values might have
346      changed due to stride.
347
348  Returns:
349    Output tensor of block.
350  """
351  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
352  filters = int(filters * alpha)
353  x = layers.Conv2D(
354      filters,
355      kernel,
356      padding='same',
357      use_bias=False,
358      strides=strides,
359      name='conv1')(inputs)
360  x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
361  return layers.ReLU(6., name='conv1_relu')(x)
362
363
364def _depthwise_conv_block(inputs,
365                          pointwise_conv_filters,
366                          alpha,
367                          depth_multiplier=1,
368                          strides=(1, 1),
369                          block_id=1):
370  """Adds a depthwise convolution block.
371
372  A depthwise convolution block consists of a depthwise conv,
373  batch normalization, relu6, pointwise convolution,
374  batch normalization and relu6 activation.
375
376  Args:
377    inputs: Input tensor of shape `(rows, cols, channels)` (with
378      `channels_last` data format) or (channels, rows, cols) (with
379      `channels_first` data format).
380    pointwise_conv_filters: Integer, the dimensionality of the output space
381      (i.e. the number of output filters in the pointwise convolution).
382    alpha: controls the width of the network. - If `alpha` < 1.0,
383      proportionally decreases the number of filters in each layer. - If
384      `alpha` > 1.0, proportionally increases the number of filters in each
385      layer. - If `alpha` = 1, default number of filters from the paper are
386      used at each layer.
387    depth_multiplier: The number of depthwise convolution output channels
388      for each input channel. The total number of depthwise convolution
389      output channels will be equal to `filters_in * depth_multiplier`.
390    strides: An integer or tuple/list of 2 integers, specifying the strides
391      of the convolution along the width and height. Can be a single integer
392      to specify the same value for all spatial dimensions. Specifying any
393      stride value != 1 is incompatible with specifying any `dilation_rate`
394      value != 1.
395    block_id: Integer, a unique identification designating the block number.
396      # Input shape
397    4D tensor with shape: `(batch, channels, rows, cols)` if
398      data_format='channels_first'
399    or 4D tensor with shape: `(batch, rows, cols, channels)` if
400      data_format='channels_last'. # Output shape
401    4D tensor with shape: `(batch, filters, new_rows, new_cols)` if
402      data_format='channels_first'
403    or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if
404      data_format='channels_last'. `rows` and `cols` values might have
405      changed due to stride.
406
407  Returns:
408    Output tensor of block.
409  """
410  channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
411  pointwise_conv_filters = int(pointwise_conv_filters * alpha)
412
413  if strides == (1, 1):
414    x = inputs
415  else:
416    x = layers.ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)(
417        inputs)
418  x = layers.DepthwiseConv2D((3, 3),
419                             padding='same' if strides == (1, 1) else 'valid',
420                             depth_multiplier=depth_multiplier,
421                             strides=strides,
422                             use_bias=False,
423                             name='conv_dw_%d' % block_id)(
424                                 x)
425  x = layers.BatchNormalization(
426      axis=channel_axis, name='conv_dw_%d_bn' % block_id)(
427          x)
428  x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)
429
430  x = layers.Conv2D(
431      pointwise_conv_filters, (1, 1),
432      padding='same',
433      use_bias=False,
434      strides=(1, 1),
435      name='conv_pw_%d' % block_id)(
436          x)
437  x = layers.BatchNormalization(
438      axis=channel_axis, name='conv_pw_%d_bn' % block_id)(
439          x)
440  return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
441
442
443@keras_export('keras.applications.mobilenet.preprocess_input')
444def preprocess_input(x, data_format=None):
445  return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf')
446
447
448@keras_export('keras.applications.mobilenet.decode_predictions')
449def decode_predictions(preds, top=5):
450  return imagenet_utils.decode_predictions(preds, top=top)
451
452
453preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format(
454    mode='',
455    ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF,
456    error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC)
457decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__
458