1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15# pylint: disable=protected-access
16"""Home of the `Sequential` model.
17"""
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import copy
23import warnings
24
25from tensorflow.python import tf2
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import tensor_util
28from tensorflow.python.keras import layers as layer_module
29from tensorflow.python.keras.engine import base_layer
30from tensorflow.python.keras.engine import functional
31from tensorflow.python.keras.engine import input_layer
32from tensorflow.python.keras.engine import training_utils
33from tensorflow.python.keras.saving.saved_model import model_serialization
34from tensorflow.python.keras.utils import generic_utils
35from tensorflow.python.keras.utils import layer_utils
36from tensorflow.python.keras.utils import tf_inspect
37from tensorflow.python.keras.utils import tf_utils
38from tensorflow.python.module import module
39from tensorflow.python.ops.numpy_ops import np_arrays
40from tensorflow.python.platform import tf_logging as logging
41from tensorflow.python.training.tracking import base as trackable
42from tensorflow.python.util import nest
43from tensorflow.python.util.tf_export import keras_export
44
45
46SINGLE_LAYER_OUTPUT_ERROR_MSG = ('All layers in a Sequential model should have '
47                                 'a single output tensor. For multi-output '
48                                 'layers, use the functional API.')
49
50
51@keras_export('keras.Sequential', 'keras.models.Sequential')
52class Sequential(functional.Functional):
53  """`Sequential` groups a linear stack of layers into a `tf.keras.Model`.
54
55  `Sequential` provides training and inference features on this model.
56
57  Examples:
58
59  >>> # Optionally, the first layer can receive an `input_shape` argument:
60  >>> model = tf.keras.Sequential()
61  >>> model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
62  >>> # Afterwards, we do automatic shape inference:
63  >>> model.add(tf.keras.layers.Dense(4))
64
65  >>> # This is identical to the following:
66  >>> model = tf.keras.Sequential()
67  >>> model.add(tf.keras.Input(shape=(16,)))
68  >>> model.add(tf.keras.layers.Dense(8))
69
70  >>> # Note that you can also omit the `input_shape` argument.
71  >>> # In that case the model doesn't have any weights until the first call
72  >>> # to a training/evaluation method (since it isn't yet built):
73  >>> model = tf.keras.Sequential()
74  >>> model.add(tf.keras.layers.Dense(8))
75  >>> model.add(tf.keras.layers.Dense(4))
76  >>> # model.weights not created yet
77
78  >>> # Whereas if you specify the input shape, the model gets built
79  >>> # continuously as you are adding layers:
80  >>> model = tf.keras.Sequential()
81  >>> model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
82  >>> model.add(tf.keras.layers.Dense(4))
83  >>> len(model.weights)
84  4
85
86  >>> # When using the delayed-build pattern (no input shape specified), you can
87  >>> # choose to manually build your model by calling
88  >>> # `build(batch_input_shape)`:
89  >>> model = tf.keras.Sequential()
90  >>> model.add(tf.keras.layers.Dense(8))
91  >>> model.add(tf.keras.layers.Dense(4))
92  >>> model.build((None, 16))
93  >>> len(model.weights)
94  4
95
96  ```python
97  # Note that when using the delayed-build pattern (no input shape specified),
98  # the model gets built the first time you call `fit`, `eval`, or `predict`,
99  # or the first time you call the model on some input data.
100  model = tf.keras.Sequential()
101  model.add(tf.keras.layers.Dense(8))
102  model.add(tf.keras.layers.Dense(1))
103  model.compile(optimizer='sgd', loss='mse')
104  # This builds the model for the first time:
105  model.fit(x, y, batch_size=32, epochs=10)
106  ```
107  """
108
109  @trackable.no_automatic_dependency_tracking
110  def __init__(self, layers=None, name=None):
111    """Creates a `Sequential` model instance.
112
113    Args:
114      layers: Optional list of layers to add to the model.
115      name: Optional name for the model.
116    """
117    # Skip the init in FunctionalModel since model doesn't have input/output yet
118    super(functional.Functional, self).__init__(  # pylint: disable=bad-super-call
119        name=name, autocast=False)
120    base_layer.keras_api_gauge.get_cell('Sequential').set(True)
121    self.supports_masking = True
122    self._compute_output_and_mask_jointly = True
123    self._auto_track_sub_layers = False
124    self._inferred_input_shape = None
125    self._has_explicit_input_shape = False
126    self._input_dtype = None
127    self._layer_call_argspecs = {}
128    self._created_nodes = set()
129    # Flag that indicate whether the sequential network topology has been
130    # created. It is false when there isn't any layer, or the layers doesn't
131    # have input shape.
132    self._graph_initialized = False
133
134    # Unfortunately some Sequential models using custom layers or FeatureColumn
135    # layers have multiple inputs. This is fundamentally incompatible with
136    # most of the Sequential API, and we have to disable a number of features
137    # for such models.
138    self._use_legacy_deferred_behavior = False
139
140    # Add to the model any layers passed to the constructor.
141    if layers:
142      if not isinstance(layers, (list, tuple)):
143        layers = [layers]
144      for layer in layers:
145        self.add(layer)
146
147  @property
148  def layers(self):
149    # Historically, `sequential.layers` only returns layers that were added
150    # via `add`, and omits the auto-generated `InputLayer` that comes at the
151    # bottom of the stack.
152    # `Trackable` manages the `_layers` attributes and does filtering
153    # over it.
154    layers = super(Sequential, self).layers
155    if layers and isinstance(layers[0], input_layer.InputLayer):
156      return layers[1:]
157    return layers[:]
158
159  @trackable.no_automatic_dependency_tracking
160  def add(self, layer):
161    """Adds a layer instance on top of the layer stack.
162
163    Args:
164        layer: layer instance.
165
166    Raises:
167        TypeError: If `layer` is not a layer instance.
168        ValueError: In case the `layer` argument does not
169            know its input shape.
170        ValueError: In case the `layer` argument has
171            multiple output tensors, or is already connected
172            somewhere else (forbidden in `Sequential` models).
173    """
174    # If we are passed a Keras tensor created by keras.Input(), we can extract
175    # the input layer from its keras history and use that without any loss of
176    # generality.
177    if hasattr(layer, '_keras_history'):
178      origin_layer = layer._keras_history[0]
179      if isinstance(origin_layer, input_layer.InputLayer):
180        layer = origin_layer
181        logging.warning(
182            'Please add `keras.layers.InputLayer` instead of `keras.Input` to '
183            'Sequential model. `keras.Input` is intended to be used by '
184            'Functional model.')
185
186    if isinstance(layer, module.Module):
187      if not isinstance(layer, base_layer.Layer):
188        layer = functional.ModuleWrapper(layer)
189    else:
190      raise TypeError('The added layer must be '
191                      'an instance of class Layer. '
192                      'Found: ' + str(layer))
193
194    tf_utils.assert_no_legacy_layers([layer])
195    if not self._is_layer_name_unique(layer):
196      raise ValueError('All layers added to a Sequential model '
197                       'should have unique names. Name "%s" is already the name'
198                       ' of a layer in this model. Update the `name` argument '
199                       'to pass a unique name.' % (layer.name,))
200
201    self.built = False
202    set_inputs = False
203    self._maybe_create_attribute('_self_tracked_trackables', [])
204    if not self._self_tracked_trackables:
205      if isinstance(layer, input_layer.InputLayer):
206        # Case where the user passes an Input or InputLayer layer via `add`.
207        set_inputs = True
208      else:
209        batch_shape, dtype = training_utils.get_input_shape_and_dtype(layer)
210        if batch_shape:
211          # Instantiate an input layer.
212          x = input_layer.Input(
213              batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input')
214          # This will build the current layer
215          # and create the node connecting the current layer
216          # to the input layer we just created.
217          layer(x)
218          set_inputs = True
219
220      if set_inputs:
221        outputs = nest.flatten(layer._inbound_nodes[-1].outputs)
222        if len(outputs) != 1:
223          raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
224        self.outputs = outputs
225        self.inputs = layer_utils.get_source_inputs(self.outputs[0])
226        self.built = True
227        self._has_explicit_input_shape = True
228
229    elif self.outputs:
230      # If the model is being built continuously on top of an input layer:
231      # refresh its output.
232      output_tensor = layer(self.outputs[0])
233      if len(nest.flatten(output_tensor)) != 1:
234        raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
235      self.outputs = [output_tensor]
236      self.built = True
237
238    if set_inputs or self._graph_initialized:
239      self._init_graph_network(self.inputs, self.outputs)
240      self._graph_initialized = True
241    else:
242      self._self_tracked_trackables.append(layer)
243      self._handle_deferred_layer_dependencies([layer])
244
245    self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
246
247  @trackable.no_automatic_dependency_tracking
248  def pop(self):
249    """Removes the last layer in the model.
250
251    Raises:
252        TypeError: if there are no layers in the model.
253    """
254    if not self.layers:
255      raise TypeError('There are no layers in the model.')
256
257    layer = self._self_tracked_trackables.pop()
258    self._layer_call_argspecs.pop(layer)
259    if not self.layers:
260      self.outputs = None
261      self.inputs = None
262      self.built = False
263      self._inferred_input_shape = None
264      self._has_explicit_input_shape = False
265      self._graph_initialized = False
266    elif self._graph_initialized:
267      self.layers[-1]._outbound_nodes = []
268      self.outputs = [self.layers[-1].output]
269      self._init_graph_network(self.inputs, self.outputs)
270      self.built = True
271
272  @trackable.no_automatic_dependency_tracking
273  def _build_graph_network_for_inferred_shape(self,
274                                              input_shape,
275                                              input_dtype=None):
276    if input_shape is None or not self.layers:
277      return
278    if not tf2.enabled() or not ops.executing_eagerly_outside_functions():
279      # This behavior is disabled in V1 or when eager execution is disabled.
280      return
281    if (not self._has_explicit_input_shape and
282        not self._use_legacy_deferred_behavior):
283      # Determine whether the input shape is novel, i.e. whether the model
284      # should be rebuilt.
285      input_shape = tuple(input_shape)
286      if self._inferred_input_shape is None:
287        new_shape = input_shape
288      else:
289        new_shape = relax_input_shape(self._inferred_input_shape, input_shape)
290      if (new_shape is not None and new_shape != self._inferred_input_shape):
291        # A novel shape has been received: we need to rebuild the model.
292        # In case we are inside a graph function, we step out of it.
293        with ops.init_scope():
294          inputs = input_layer.Input(
295              batch_shape=new_shape,
296              dtype=input_dtype,
297              name=self.layers[0].name + '_input')
298          layer_input = inputs
299          created_nodes = set()
300          for layer in self.layers:
301            # Clear nodes previously created via this method. This prevents
302            # node accumulation and ensures that e.g. `layer.output` is
303            # always connected to `model.inputs`
304            # (this is important e.g. for the feature extraction use case).
305            # We don't just do `layer._inbound_nodes = []` in order
306            # not to break shared layers added to Sequential models (which is
307            # technically illegal as per the `add()` docstring,
308            # but wasn't previously disabled).
309            clear_previously_created_nodes(layer, self._created_nodes)
310            try:
311              # Create Functional API connection by calling the current layer
312              layer_output = layer(layer_input)
313            except:  # pylint:disable=bare-except
314              # Functional API calls may fail for a number of reasons:
315              # 1) The layer may be buggy. In this case it will be easier for
316              # the user to debug if we fail on the first call on concrete data,
317              # instead of our own call on a symbolic input.
318              # 2) The layer is dynamic (graph-incompatible) and hasn't
319              # overridden `compute_output_shape`. In this case, it is
320              # impossible to build a graph network.
321              # 3) The layer is otherwise incompatible with the Functional API
322              # (e.g. this is the case for some probabilistic layers that rely
323              # on hacks and that do not return tensors).
324              # In all these cases, we should avoid creating a graph network
325              # (or we simply can't).
326              self._use_legacy_deferred_behavior = True
327              return
328            if len(nest.flatten(layer_output)) != 1:
329              raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
330            # Keep track of nodes just created above
331            track_nodes_created_by_last_call(layer, created_nodes)
332            layer_input = layer_output
333            outputs = layer_output
334          self._created_nodes = created_nodes
335          try:
336            # Initialize a graph Network. This call will never fail for
337            # a stack of valid Keras layers.
338            # However some users have layers that are fundamentally incompatible
339            # with the Functional API, which do not return tensors. In this
340            # case, we fall back to the legacy deferred behavior.
341            # TODO(fchollet): consider raising here, as we should not be
342            # supporting such layers.
343            self._init_graph_network(inputs, outputs)
344            self._graph_initialized = True
345          except:  # pylint:disable=bare-except
346            self._use_legacy_deferred_behavior = True
347        self._inferred_input_shape = new_shape
348
349  @generic_utils.default
350  def build(self, input_shape=None):
351    if self._graph_initialized:
352      self._init_graph_network(self.inputs, self.outputs)
353    else:
354      if input_shape is None:
355        raise ValueError('You must provide an `input_shape` argument.')
356      self._build_graph_network_for_inferred_shape(input_shape)
357      if not self.built:
358        input_shape = tuple(input_shape)
359        self._build_input_shape = input_shape
360        super(Sequential, self).build(input_shape)
361    self.built = True
362
363  def call(self, inputs, training=None, mask=None):  # pylint: disable=redefined-outer-name
364    # If applicable, update the static input shape of the model.
365    if not self._has_explicit_input_shape:
366      if not tensor_util.is_tf_type(inputs) and not isinstance(
367          inputs, np_arrays.ndarray):
368        # This is a Sequential with mutiple inputs. This is technically an
369        # invalid use case of Sequential, but we tolerate it for backwards
370        # compatibility.
371        self._use_legacy_deferred_behavior = True
372        self._build_input_shape = nest.map_structure(_get_shape_tuple, inputs)
373        if tf2.enabled():
374          logging.warning('Layers in a Sequential model should only have a '
375                          'single input tensor, but we receive a %s input: %s'
376                          '\nConsider rewriting this model with the Functional '
377                          'API.' % (type(inputs), inputs))
378      else:
379        self._build_graph_network_for_inferred_shape(inputs.shape, inputs.dtype)
380
381    if self._graph_initialized:
382      if not self.built:
383        self._init_graph_network(self.inputs, self.outputs)
384      return super(Sequential, self).call(inputs, training=training, mask=mask)
385
386    outputs = inputs  # handle the corner case where self.layers is empty
387    for layer in self.layers:
388      # During each iteration, `inputs` are the inputs to `layer`, and `outputs`
389      # are the outputs of `layer` applied to `inputs`. At the end of each
390      # iteration `inputs` is set to `outputs` to prepare for the next layer.
391      kwargs = {}
392      argspec = self._layer_call_argspecs[layer].args
393      if 'mask' in argspec:
394        kwargs['mask'] = mask
395      if 'training' in argspec:
396        kwargs['training'] = training
397
398      outputs = layer(inputs, **kwargs)
399
400      if len(nest.flatten(outputs)) != 1:
401        raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
402      # `outputs` will be the inputs to the next layer.
403      inputs = outputs
404      mask = getattr(outputs, '_keras_mask', None)
405    return outputs
406
407  def compute_output_shape(self, input_shape):
408    shape = input_shape
409    for layer in self.layers:
410      shape = layer.compute_output_shape(shape)
411    return shape
412
413  def compute_mask(self, inputs, mask):
414    # TODO(omalleyt): b/123540974 This function is not really safe to call
415    # by itself because it will duplicate any updates and losses in graph
416    # mode by `call`ing the Layers again.
417    outputs = self.call(inputs, mask=mask)
418    return getattr(outputs, '_keras_mask', None)
419
420  def predict_proba(self, x, batch_size=32, verbose=0):
421    """Generates class probability predictions for the input samples.
422
423    The input samples are processed batch by batch.
424
425    Args:
426        x: input data, as a Numpy array or list of Numpy arrays
427            (if the model has multiple inputs).
428        batch_size: integer.
429        verbose: verbosity mode, 0 or 1.
430
431    Returns:
432        A Numpy array of probability predictions.
433    """
434    warnings.warn('`model.predict_proba()` is deprecated and '
435                  'will be removed after 2021-01-01. '
436                  'Please use `model.predict()` instead.')
437    preds = self.predict(x, batch_size, verbose)
438    if preds.min() < 0. or preds.max() > 1.:
439      logging.warning('Network returning invalid probability values. '
440                      'The last layer might not normalize predictions '
441                      'into probabilities '
442                      '(like softmax or sigmoid would).')
443    return preds
444
445  def predict_classes(self, x, batch_size=32, verbose=0):
446    """Generate class predictions for the input samples.
447
448    The input samples are processed batch by batch.
449
450    Args:
451        x: input data, as a Numpy array or list of Numpy arrays
452            (if the model has multiple inputs).
453        batch_size: integer.
454        verbose: verbosity mode, 0 or 1.
455
456    Returns:
457        A numpy array of class predictions.
458    """
459    warnings.warn('`model.predict_classes()` is deprecated and '
460                  'will be removed after 2021-01-01. '
461                  'Please use instead:'
462                  '* `np.argmax(model.predict(x), axis=-1)`, '
463                  '  if your model does multi-class classification '
464                  '  (e.g. if it uses a `softmax` last-layer activation).'
465                  '* `(model.predict(x) > 0.5).astype("int32")`, '
466                  '  if your model does binary classification '
467                  '  (e.g. if it uses a `sigmoid` last-layer activation).')
468    proba = self.predict(x, batch_size=batch_size, verbose=verbose)
469    if proba.shape[-1] > 1:
470      return proba.argmax(axis=-1)
471    else:
472      return (proba > 0.5).astype('int32')
473
474  def get_config(self):
475    layer_configs = []
476    for layer in super(Sequential, self).layers:
477      # `super().layers` include the InputLayer if available (it is filtered out
478      # of `self.layers`). Note that `self._self_tracked_trackables` is managed
479      # by the tracking infrastructure and should not be used.
480      layer_configs.append(generic_utils.serialize_keras_object(layer))
481    config = {
482        'name': self.name,
483        'layers': copy.deepcopy(layer_configs)
484    }
485    if not self._is_graph_network and self._build_input_shape is not None:
486      config['build_input_shape'] = self._build_input_shape
487    return config
488
489  @classmethod
490  def from_config(cls, config, custom_objects=None):
491    if 'name' in config:
492      name = config['name']
493      build_input_shape = config.get('build_input_shape')
494      layer_configs = config['layers']
495    else:
496      name = None
497      build_input_shape = None
498      layer_configs = config
499    model = cls(name=name)
500    for layer_config in layer_configs:
501      layer = layer_module.deserialize(layer_config,
502                                       custom_objects=custom_objects)
503      model.add(layer)
504    if (not model.inputs and build_input_shape and
505        isinstance(build_input_shape, (tuple, list))):
506      model.build(build_input_shape)
507    return model
508
509  @property
510  def input_spec(self):
511    if hasattr(self, '_manual_input_spec'):
512      return self._manual_input_spec
513    if self.layers and hasattr(self.layers[0], 'input_spec'):
514      return self.layers[0].input_spec
515    return None
516
517  @input_spec.setter
518  def input_spec(self, value):
519    self._manual_input_spec = value
520
521  @property
522  def _trackable_saved_model_saver(self):
523    return model_serialization.SequentialSavedModelSaver(self)
524
525  def _is_layer_name_unique(self, layer):
526    for ref_layer in self.layers:
527      if layer.name == ref_layer.name and ref_layer is not layer:
528        return False
529    return True
530
531  def _assert_weights_created(self):
532    if self._graph_initialized:
533      return
534    # When the graph has not been initialized, use the Model's implementation to
535    # to check if the weights has been created.
536    super(functional.Functional, self)._assert_weights_created()  # pylint: disable=bad-super-call
537
538
539def _get_shape_tuple(t):
540  if hasattr(t, 'shape'):
541    shape = t.shape
542    if isinstance(shape, tuple):
543      return shape
544    if shape.rank is not None:
545      return tuple(shape.as_list())
546    return None
547  return None
548
549
550def relax_input_shape(shape_1, shape_2):
551  if shape_1 is None or shape_2 is None:
552    return None
553  if len(shape_1) != len(shape_2):
554    return None
555  return tuple(None if d1 != d2 else d1 for d1, d2 in zip(shape_1, shape_2))
556
557
558def clear_previously_created_nodes(layer, created_nodes):
559  """Remove nodes from `created_nodes` from the layer's inbound_nodes."""
560  for node in layer._inbound_nodes:
561    prev_layers = node.inbound_layers
562    for prev_layer in nest.flatten(prev_layers):
563      prev_layer._outbound_nodes = [
564          n for n in prev_layer._outbound_nodes
565          if n not in created_nodes]
566  layer._inbound_nodes = [
567      n for n in layer._inbound_nodes if n not in created_nodes]
568
569
570def track_nodes_created_by_last_call(layer, created_nodes):
571  """Adds to `created_nodes` the nodes created by the last call to `layer`."""
572  if not layer._inbound_nodes:
573    return
574  created_nodes.add(layer._inbound_nodes[-1])
575  prev_layers = layer._inbound_nodes[-1].inbound_layers
576  for prev_layer in nest.flatten(prev_layers):
577    if prev_layer._outbound_nodes:
578      created_nodes.add(prev_layer._outbound_nodes[-1])
579