1# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Part of the Keras training engine related to distributed training.
16"""
17# pylint: disable=protected-access
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import numpy as np
23
24from tensorflow.python.data.experimental.ops import batching
25from tensorflow.python.distribute import input_lib
26from tensorflow.python.distribute import reduce_util as ds_reduce_util
27from tensorflow.python.framework import constant_op
28from tensorflow.python.framework import errors
29from tensorflow.python.framework import tensor_shape
30from tensorflow.python.keras import backend as K
31from tensorflow.python.keras import callbacks as cbks
32from tensorflow.python.keras.engine import distributed_training_utils
33from tensorflow.python.keras.engine import partial_batch_padding_handler as padding_util
34from tensorflow.python.keras.engine import training_arrays
35from tensorflow.python.keras.engine import training_utils
36from tensorflow.python.keras.utils.generic_utils import Progbar
37from tensorflow.python.keras.utils.mode_keys import ModeKeys
38from tensorflow.python.ops import array_ops
39from tensorflow.python.platform import tf_logging as logging
40from tensorflow.python.util import nest
41
42
43def fit_distributed(model,
44                    x=None,
45                    y=None,
46                    batch_size=None,
47                    epochs=1,
48                    verbose=1,
49                    callbacks=None,
50                    validation_split=0.,
51                    validation_data=None,
52                    shuffle=True,
53                    class_weight=None,
54                    sample_weight=None,
55                    initial_epoch=0,
56                    steps_per_epoch=None,
57                    validation_steps=None,
58                    validation_freq=1):
59  """Fit loop for Distribution Strategies."""
60  distributed_training_utils.validate_callbacks(callbacks, model.optimizer)
61  distributed_training_utils.validate_inputs(
62      x, y, model._distribution_strategy)
63
64  first_x_value = nest.flatten(x)[0]
65  if isinstance(first_x_value, np.ndarray):
66    # Until support for partial batch is implemented across all
67    # functions and distribution strategy, we pass `mode` to selectively
68    # relax the costraint to consume all the training samples.
69    steps_per_epoch, batch_size = (
70        distributed_training_utils.get_input_params(
71            model._distribution_strategy, first_x_value, steps_per_epoch,
72            batch_size, mode=ModeKeys.TRAIN))
73  batch_size = model._validate_or_infer_batch_size(
74      batch_size, steps_per_epoch, x)
75  dataset = model._distribution_standardize_user_data(
76      x, y,
77      sample_weight=sample_weight,
78      class_weight=class_weight,
79      batch_size=batch_size,
80      validation_split=validation_split,
81      shuffle=shuffle,
82      repeat=True)
83
84  val_dataset = None
85  if validation_data:
86    val_x, val_y, val_sample_weights = model._unpack_validation_data(
87        validation_data)
88    distributed_training_utils.validate_inputs(
89        val_x, val_y, model._distribution_strategy)
90    first_valx_value = nest.flatten(val_x)[0]
91    if isinstance(first_valx_value, np.ndarray):
92      validation_steps, _ = distributed_training_utils.get_input_params(
93          model._distribution_strategy, first_valx_value, validation_steps,
94          batch_size)
95    val_dataset = model._distribution_standardize_user_data(
96        val_x, val_y,
97        sample_weight=val_sample_weights,
98        class_weight=None,
99        batch_size=batch_size,
100        validation_split=validation_split,
101        shuffle=shuffle)
102  elif validation_split:
103    raise ValueError('validation_split argument is not supported with '
104                     'distribution strategies.')
105
106  if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
107    return experimental_tpu_fit_loop(
108        model,
109        dataset,
110        epochs=epochs,
111        verbose=verbose,
112        callbacks=callbacks,
113        val_dataset=val_dataset,
114        initial_epoch=initial_epoch,
115        steps_per_epoch=steps_per_epoch,
116        validation_steps=validation_steps,
117        validation_freq=validation_freq)
118  else:
119    return training_arrays.fit_loop(
120        model,
121        dataset,
122        batch_size=batch_size,
123        epochs=epochs,
124        verbose=verbose,
125        callbacks=callbacks,
126        val_inputs=val_dataset,
127        shuffle=shuffle,
128        initial_epoch=initial_epoch,
129        steps_per_epoch=steps_per_epoch,
130        validation_steps=validation_steps,
131        validation_freq=validation_freq,
132        steps_name='steps_per_epoch')
133
134
135def evaluate_distributed(model,
136                         x=None,
137                         y=None,
138                         batch_size=None,
139                         verbose=1,
140                         sample_weight=None,
141                         steps=None,
142                         callbacks=None):
143  """Evaluate loop for Distribution Strategies."""
144  distributed_training_utils.validate_inputs(x, y, model._distribution_strategy)
145  first_x_value = nest.flatten(x)[0]
146  if isinstance(first_x_value, np.ndarray):
147    steps, batch_size = distributed_training_utils.get_input_params(
148        model._distribution_strategy, first_x_value, steps, batch_size)
149  batch_size = model._validate_or_infer_batch_size(batch_size, steps, x)
150  dataset = model._distribution_standardize_user_data(
151      x, y,
152      sample_weight=sample_weight,
153      batch_size=batch_size)
154
155  if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
156    return experimental_tpu_test_loop(
157        model, dataset, verbose=verbose, steps=steps, callbacks=callbacks)
158  else:
159    return training_arrays.test_loop(
160        model,
161        inputs=dataset,
162        batch_size=batch_size,
163        verbose=verbose,
164        steps=steps,
165        callbacks=callbacks)
166
167
168def predict_distributed(model,
169                        x=None,
170                        batch_size=None,
171                        verbose=0,
172                        steps=None,
173                        callbacks=None):
174  """Predict loop for Distribution Strategies."""
175  distributed_training_utils.validate_inputs(
176      x, None, model._distribution_strategy, allow_partial_batch=True)
177  first_x_value = nest.flatten(x)[0]
178  if isinstance(first_x_value, np.ndarray):
179    steps, batch_size = distributed_training_utils.get_input_params(
180        model._distribution_strategy, first_x_value, steps,
181        batch_size, mode=ModeKeys.PREDICT)
182  batch_size = model._validate_or_infer_batch_size(batch_size, steps, x)
183  dataset = model._distribution_standardize_user_data(
184      x,
185      batch_size=batch_size,
186      allow_partial_batch=True)
187  if distributed_training_utils.is_tpu_strategy(model._distribution_strategy):
188    return experimental_tpu_predict_loop(
189        model, dataset, verbose=verbose, steps=steps, callbacks=callbacks)
190  else:
191    return training_arrays.predict_loop(
192        model,
193        dataset,
194        batch_size=batch_size,
195        verbose=verbose,
196        steps=steps,
197        callbacks=callbacks)
198
199
200def _make_step_fn(model, mode, strategy, output_labels):
201  """Create step fn.
202
203  Arguments:
204    model: a Keras Model instance.
205    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
206    strategy: a `tf.distribute.Strategy` instance.
207    output_labels: the output labels for the step function.
208
209  Returns:
210    A step function to run by `tf.distribute.Strategy`.
211  """
212
213  def _per_device_execution_function(model):
214    exec_func = model._make_execution_function(mode)
215    return (exec_func.inputs, exec_func.outputs, exec_func.updates_op,
216            exec_func.session_kwargs)
217
218  def step_fn(ctx, inputs):
219    """A step fn that returns update ops."""
220    if mode == ModeKeys.PREDICT:
221      targets = None
222    else:
223      inputs, targets = inputs
224
225    if model._compile_distribution:
226      distributed_training_utils.clone_model_on_replicas(
227          model, strategy, mode, inputs=inputs, targets=targets)
228    else:
229      distributed_training_utils._build_distributed_network(
230          model, strategy, mode, inputs, targets)
231
232    (grouped_inputs, grouped_outputs, grouped_updates,
233     grouped_session_args) = strategy.extended.call_for_each_replica(
234         _per_device_execution_function,
235         args=(distributed_training_utils.get_distributed_model(model, mode),))
236    (all_inputs, all_outputs, all_updates,
237     all_session_args) = distributed_training_utils.unwrap_values(
238         strategy, grouped_inputs, grouped_outputs, grouped_updates,
239         grouped_session_args)
240    combined_fn = K.function(
241        all_inputs,
242        all_outputs,
243        updates=all_updates,
244        name='distributed_' + str(mode) + '_function',
245        **all_session_args)
246
247    for label, output in zip(output_labels, combined_fn.outputs):
248      if mode == ModeKeys.PREDICT:
249        ctx.set_last_step_output(label, output)
250      else:
251        if label == 'loss':
252          reduce_op = ds_reduce_util.ReduceOp.SUM
253        else:
254          # We reduce all other metrics using mean for now. This is temporary
255          # workaround until new metrics are in place.
256          reduce_op = ds_reduce_util.ReduceOp.MEAN
257        ctx.set_last_step_output(label, output, reduce_op)
258
259    # TODO(priyag, sourabhbajaj): Ignoring these things from the combined_fn:
260    # feed_dict, session kwargs, run options, run_metadata for now. These should
261    # be handled appropriately
262    return combined_fn.updates_op
263
264  return step_fn
265
266
267def experimental_tpu_fit_loop(model,
268                              dataset,
269                              epochs=100,
270                              verbose=1,
271                              callbacks=None,
272                              initial_epoch=0,
273                              steps_per_epoch=None,
274                              val_dataset=None,
275                              validation_steps=None,
276                              validation_freq=1):
277  """Fit loop for training with TPU DistributionStrategy.
278
279  Arguments:
280      model: Keras Model instance.
281      dataset: Dataset that returns inputs and targets
282      epochs: Number of times to iterate over the data
283      verbose: Integer, Verbosity mode, 0, 1 or 2
284      callbacks: List of callbacks to be called during training
285      initial_epoch: Epoch at which to start training
286          (useful for resuming a previous training run)
287      steps_per_epoch: Total number of steps (batches of samples)
288          before declaring one epoch finished and starting the
289          next epoch. Ignored with the default value of `None`.
290      val_dataset: Dataset for validation data.
291      validation_steps: Number of steps to run validation for
292          (only if doing validation from data tensors).
293          Ignored with the default value of `None`.
294      validation_freq: Only relevant if validation data is provided. Integer or
295          `collections.Container` instance (e.g. list, tuple, etc.). If an
296          integer, specifies how many training epochs to run before a new
297          validation run is performed, e.g. `validation_freq=2` runs
298          validation every 2 epochs. If a Container, specifies the epochs on
299          which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
300          validation at the end of the 1st, 2nd, and 10th epochs.
301
302  Returns:
303      Returns `None`.
304
305  Raises:
306      ValueError: in case of invalid arguments.
307  """
308  mode = ModeKeys.TRAIN
309  # TODO(fchollet): add support for `steps_per_epoch=None` in TPU loops.
310  current_strategy = model._distribution_strategy
311  iterator = distributed_training_utils.get_iterator(dataset, current_strategy)
312  steps_per_epoch = training_utils.infer_steps_for_dataset(
313      dataset, steps_per_epoch, epochs, steps_name='steps_per_epoch')
314  if (current_strategy.extended.steps_per_run != 1 and
315      steps_per_epoch is None):
316    raise ValueError('`steps_per_epoch` should be specified when calling '
317                     '`fit` on the model with TPUStrategy when '
318                     '`steps_per_run` != 1 .')
319
320  scope = distributed_training_utils.distributed_scope(
321      strategy=current_strategy, learning_phase=1)
322  scope.__enter__()
323
324  out_labels = model.metrics_names or []
325
326  step_fn = _make_step_fn(model, ModeKeys.TRAIN, current_strategy, out_labels)
327
328  # Add initial dummy values for loss and other metric tensors.
329  initial_loop_values = {}
330  initial_loop_values['loss'] = constant_op.constant(1e7)
331  for name in model.metrics_names[1:]:
332    tensor = model._all_metrics_tensors[name]
333    initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
334
335  use_steps = steps_per_epoch is not None
336  if use_steps:
337    iteration_value = min(steps_per_epoch,
338                          current_strategy.extended.steps_per_run)
339  else:
340    iteration_value = current_strategy.extended.steps_per_run
341
342  steps_per_run = K.variable(
343      value=iteration_value,
344      dtype='int32',
345      name='steps_per_run')
346  ctx = current_strategy.extended.experimental_run_steps_on_iterator(
347      step_fn, iterator, iterations=steps_per_run,
348      initial_loop_values=initial_loop_values)
349  train_op = ctx.run_op
350  output_tensors = ctx.last_step_outputs
351
352  do_validation = bool(validation_steps)
353
354  if model._compile_distribution:
355    distributed_training_utils._copy_weights_to_distributed_model(model, mode)
356
357  callbacks = cbks.configure_callbacks(
358      callbacks,
359      model,
360      do_validation=do_validation,
361      epochs=epochs,
362      steps_per_epoch=steps_per_epoch,
363      verbose=verbose,
364      count_mode='steps',
365      mode=mode)
366
367  # Calculate the steps each time on the device.
368  if use_steps:
369    steps_to_run = ([current_strategy.extended.steps_per_run] *
370                    (steps_per_epoch //
371                     current_strategy.extended.steps_per_run))
372    if steps_per_epoch % current_strategy.extended.steps_per_run:
373      steps_to_run.append(
374          steps_per_epoch % current_strategy.extended.steps_per_run)
375    target_steps = len(steps_to_run)
376  else:
377    target_steps = np.inf
378
379  callbacks._call_begin_hook(mode)
380  for epoch in range(initial_epoch, epochs):
381    distributed_training_utils._reset_metrics(model)
382    callbacks.on_epoch_begin(epoch)
383    epoch_logs = {}
384    step_index = 0
385    prev_step_count = None
386    current_step = 0
387    while current_step < target_steps:
388      step_count = steps_to_run[current_step] if use_steps else 1
389      batch_logs = {'batch': step_index, 'size': 1, 'num_steps': step_count}
390      callbacks._call_batch_hook(mode, 'begin', step_index, batch_logs)
391      if prev_step_count is None or step_count != prev_step_count:
392        steps_per_run.load(step_count, K.get_session())
393        prev_step_count = step_count
394      try:
395        _, outputs = K.batch_get_value([train_op, output_tensors])
396      except errors.OutOfRangeError:
397        if use_steps:
398          logging.warning('Your dataset iterator ran out of data; '
399                          'interrupting training. Make sure that your dataset '
400                          'can generate at least `steps_per_epoch * epochs` '
401                          'batches (in this case, %d batches).' %
402                          steps_per_epoch * epochs)
403        else:
404          target_steps = current_step
405          logging.info('Dataset iterator ran out of data. Inferring the '
406                       'value of `steps_per_epoch` as %s  .' % target_steps)
407          distributed_training_utils.initialize_iterator(iterator,
408                                                         current_strategy)
409        break
410
411      batch_logs.update(outputs)
412      callbacks._call_batch_hook(mode, 'end', step_index, batch_logs)
413      step_index = step_index + step_count
414      current_step += 1
415
416      if callbacks.model.stop_training:
417        break
418
419    if (do_validation and
420        training_utils.should_run_validation(validation_freq, epoch)):
421      logging.info('Running validation at fit epoch: %s', epoch)
422
423      if model._compile_distribution:
424        # Since we create a new clone from the original model we need to copy
425        # the weights back to the original model before we can run validation.
426        distributed_training_utils._copy_weights_to_original_model(
427            model, ModeKeys.TRAIN)
428
429      val_outs = experimental_tpu_test_loop(  # pylint: disable=undefined-variable
430          model,
431          val_dataset,
432          steps=validation_steps,
433          verbose=verbose,
434          callbacks=callbacks)
435      if not isinstance(val_outs, list):
436        val_outs = [val_outs]
437      # Same labels assumed.
438      for label, val_out in zip(out_labels, val_outs):
439        epoch_logs['val_' + label] = val_out
440
441    callbacks.on_epoch_end(epoch, epoch_logs)
442    if callbacks.model.stop_training:
443      break
444  callbacks._call_end_hook(mode)
445
446  if model._compile_distribution:
447    # Copy the weights back from the replicated model to the original model.
448    distributed_training_utils._copy_weights_to_original_model(
449        model, ModeKeys.TRAIN)
450  scope.__exit__(None, None, None)
451  return model.history
452
453
454def experimental_tpu_test_loop(model,
455                               dataset,
456                               verbose=0,
457                               steps=None,
458                               callbacks=None):
459  """Test loop for evaluating with TPU DistributionStrategy.
460
461  Arguments:
462      model: Keras Model instance.
463      dataset: Dataset for input data.
464      verbose: Integer, Verbosity mode 0 or 1.
465      steps: Total number of steps (batches of samples)
466          before declaring predictions finished.
467          Ignored with the default value of `None`.
468      callbacks: List of callbacks to be called during training
469
470  Returns:
471      Scalar loss (if the model has a single output and no metrics)
472      or list of scalars (if the model has multiple outputs
473      and/or metrics). The attribute `model.metrics_names` will give you
474      the display labels for the outputs.
475  """
476  mode = ModeKeys.TEST
477  current_strategy = model._distribution_strategy
478  iterator = distributed_training_utils.get_iterator(dataset,
479                                                     current_strategy)
480  steps = training_utils.infer_steps_for_dataset(dataset, steps,
481                                                 steps_name='steps')
482
483  scope = distributed_training_utils.distributed_scope(
484      strategy=current_strategy, learning_phase=0)
485  scope.__enter__()
486
487  out_labels = model.metrics_names
488  step_fn = _make_step_fn(model, ModeKeys.TEST, current_strategy, out_labels)
489
490  # Add initial dummy values for loss and other metric tensors.
491  initial_loop_values = {}
492  initial_loop_values['loss'] = constant_op.constant(1e7)
493  for name in model.metrics_names[1:]:
494    tensor = model._all_metrics_tensors[name]
495    initial_loop_values[name] = array_ops.zeros(tensor.shape, tensor.dtype)
496
497  # TODO(priyag): Use steps_per_run when we use new metrics as they will
498  # allow handling metric computation at each step using variables.
499  ctx = current_strategy.extended.experimental_run_steps_on_iterator(
500      step_fn, iterator, iterations=1,
501      initial_loop_values=initial_loop_values)
502
503  test_op = ctx.run_op
504  output_tensors = ctx.last_step_outputs
505
506  if verbose == 1:
507    progbar = Progbar(target=steps)
508
509  if model._compile_distribution:
510    distributed_training_utils._copy_weights_to_distributed_model(model, mode)
511
512  distributed_training_utils._reset_metrics(model)
513
514  callbacks = cbks.configure_callbacks(
515      callbacks,
516      model,
517      do_validation=False,
518      epochs=1,
519      steps_per_epoch=steps,
520      verbose=verbose,
521      count_mode='steps',
522      mode=ModeKeys.TEST)
523  callbacks._call_begin_hook(mode)
524
525  outs = [0.] * len(model.metrics_names)
526  if steps is not None:
527    target_steps = steps
528  else:
529    target_steps = np.inf
530
531  current_step = 0
532  while current_step < target_steps:
533    batch_logs = {'batch': current_step, 'size': 1}
534    callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs)
535    try:
536      _, batch_outs = K.batch_get_value([test_op, output_tensors])
537    except errors.OutOfRangeError:
538      if steps is not None:
539        warning_msg = 'Make sure that your dataset can generate at least '
540        '`steps` batches (in this case, {} batches).'.format(steps)
541      else:
542        warning_msg = 'Number of steps ran: {} steps'.format(current_step)
543
544      logging.warning('Your dataset iterator ran out of data; '
545                      'interrupting evaluation. ' + warning_msg)
546      target_steps = current_step
547      break
548    for i, label in enumerate(model.metrics_names):
549      if i == 0:
550        # Loss is stateless metrics.
551        outs[i] += batch_outs[label]
552      else:
553        # For all stateful metrics, the aggregation is handled by mirrored vars.
554        outs[i] = batch_outs[label]
555
556    batch_logs = cbks.make_logs(model, batch_logs, outs, mode)
557    callbacks._call_batch_hook(mode, 'end', current_step, batch_logs)
558    if verbose >= 1:
559      progbar.update(current_step + 1)
560    current_step += 1
561
562  callbacks._call_end_hook(mode)
563
564  scope.__exit__(None, None, None)
565  if len(outs) >= 0:
566    outs[0] /= (target_steps)
567
568  if len(outs) == 1:
569    return outs[0]
570  return outs
571
572
573def experimental_tpu_predict_loop(model,
574                                  dataset,
575                                  verbose=0,
576                                  steps=None,
577                                  callbacks=None):
578  """Predict loop for predicting with TPU DistributionStrategy.
579
580  Arguments:
581      model: Keras Model instance.
582      dataset: Dataset for input data.
583      verbose: Integer, Verbosity mode 0 or 1.
584      steps: Total number of steps (batches of samples)
585          before declaring `_predict_loop` finished.
586          Ignored with the default value of `None`.
587      callbacks: List of callbacks to be called during training
588
589  Returns:
590      Array of predictions (if the model has a single output)
591      or list of arrays of predictions
592      (if the model has multiple outputs).
593  """
594  mode = ModeKeys.PREDICT
595  steps = training_utils.infer_steps_for_dataset(dataset, steps,
596                                                 steps_name='steps')
597  dataset_fully_shaped = (distributed_training_utils.
598                          is_dataset_shape_fully_defined(dataset))
599  padding_handler = None
600  if not dataset_fully_shaped:
601    # TODO(hongjunchoi): Investigate whether operations from
602    # PartialBatchPaddingHandler are unnecessarily pruned out
603    # during graph optimization.
604    padding_handler = padding_util.PartialBatchPaddingHandler(
605        model._feed_output_shapes)
606    batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes(dataset)
607    padding_handler.padded_batch_size = batch_size
608    padding_handler.padding_mask = dataset.reduce(padding_handler.padding_mask,
609                                                  padding_handler.update_mask)
610
611    dataset = dataset.map(padding_handler.pad_batch)
612    dataset = dataset.apply(batching.unbatch())
613    # Upon this point, it is guaranteed that the dataset does not
614    # have partial batches. Thus, we set `drop_remainder=True` to
615    # get static shape information about the elements in the dataset.
616    dataset = dataset.batch(batch_size, drop_remainder=True)
617
618    if prefetch_buffer is not None:
619      dataset = dataset.prefetch(prefetch_buffer)
620
621  current_strategy = model._distribution_strategy
622  iterator = distributed_training_utils.get_iterator(dataset, current_strategy)
623
624  scope = distributed_training_utils.distributed_scope(
625      strategy=current_strategy, learning_phase=0)
626  scope.__enter__()
627
628  out_labels = model.output_names
629  step_fn = _make_step_fn(model, ModeKeys.PREDICT, current_strategy, out_labels)
630
631  # Add initial dummy values for outputs.
632  initial_loop_values = {}
633  batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
634  for name, tensor in zip(model.output_names, model.outputs):
635    # TODO(priyag): This is a workaround as we do not know the batch dimension
636    # of the model's output at this point.
637    shape = tensor_shape.TensorShape(tensor.shape.dims)
638    shape.dims = [batch_dimension] + shape.dims[1:]
639    initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)
640
641  # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
642  ctx = current_strategy.extended.experimental_run_steps_on_iterator(
643      step_fn, iterator, iterations=1,
644      initial_loop_values=initial_loop_values)
645
646  predict_op = ctx.run_op
647  output_tensors = ctx.last_step_outputs
648
649  if verbose == 1:
650    progbar = Progbar(target=steps)
651
652  if model._compile_distribution:
653    distributed_training_utils._copy_weights_to_distributed_model(model, mode)
654
655  distributed_training_utils._reset_metrics(model)
656
657  callbacks = cbks.configure_callbacks(
658      callbacks,
659      model,
660      do_validation=False,
661      epochs=1,
662      steps_per_epoch=steps,
663      verbose=verbose,
664      count_mode='steps',
665      mode=mode)
666  callbacks._call_begin_hook(mode)
667
668  # Since we do not know how many samples we will see, we cannot pre-allocate
669  # the returned Numpy arrays. Instead, we store one array per batch seen
670  # and concatenate them upon returning.
671  unconcatenated_outs = [[] for _ in model.outputs]
672  if steps is not None:
673    target_steps = steps
674  else:
675    target_steps = np.inf
676
677  current_step = 0
678  while current_step < target_steps:
679    batch_logs = {'batch': current_step, 'size': 1}
680    callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs)
681    try:
682      _, batch_outs = K.batch_get_value([predict_op, output_tensors])
683    except errors.OutOfRangeError:
684      if steps is not None:
685        warning_msg = 'Make sure that your dataset can generate at least '
686        '`steps` batches (in this case, {} batches).'.format(steps)
687      else:
688        warning_msg = 'Number of steps ran: {} steps'.format(current_step)
689
690      logging.warning('Your dataset iterator ran out of data; '
691                      'interrupting evaluation. ' + warning_msg)
692      break
693
694    # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
695    for i, label in enumerate(model.output_names):
696      unconcatenated_outs[i].extend(batch_outs[label])
697    batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
698    callbacks._call_batch_hook(mode, 'end', current_step, batch_logs)
699    if verbose >= 1:
700      progbar.update(current_step + 1)
701    current_step += 1
702
703  callbacks._call_end_hook(mode)
704
705  scope.__exit__(None, None, None)
706
707  if len(unconcatenated_outs) == 1:
708    prediction_result = np.concatenate(unconcatenated_outs[0], axis=0)
709  else:
710    prediction_result = [
711        np.concatenate(unconcatenated_outs[i], axis=0)
712        for i in range(len(unconcatenated_outs))
713    ]
714
715  if padding_handler:
716    prediction_result = padding_handler.apply_mask(prediction_result)
717
718  return prediction_result
719