1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Built-in loss functions.
16"""
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import abc
22
23import six
24
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import smart_cond
27from tensorflow.python.keras import backend as K
28from tensorflow.python.keras.utils import losses_utils
29from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
30from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
31from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable
32from tensorflow.python.ops import array_ops
33from tensorflow.python.ops import math_ops
34from tensorflow.python.ops import nn
35from tensorflow.python.ops.losses import losses_impl
36from tensorflow.python.util.tf_export import keras_export
37from tensorflow.tools.docs import doc_controls
38
39
40@keras_export('keras.losses.Loss')
41class Loss(object):
42  """Loss base class.
43
44  To be implemented by subclasses:
45  * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.
46
47  Example subclass implementation:
48  ```
49  class MeanSquaredError(Loss):
50    def call(self, y_true, y_pred):
51      y_pred = ops.convert_to_tensor(y_pred)
52      y_true = math_ops.cast(y_true, y_pred.dtype)
53      return K.mean(math_ops.square(y_pred - y_true), axis=-1)
54  ```
55
56  Args:
57    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
58      Default value is `SUM_OVER_BATCH_SIZE`.
59    name: Optional name for the op.
60  """
61
62  def __init__(self,
63               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
64               name=None):
65    self.reduction = reduction
66    self.name = name
67
68  def __call__(self, y_true, y_pred, sample_weight=None):
69    """Invokes the `Loss` instance.
70
71    Args:
72      y_true: Ground truth values.
73      y_pred: The predicted values.
74      sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
75        as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
76        coefficient for the loss. If a scalar is provided, then the loss is
77        simply scaled by the given value. If `sample_weight` is a tensor of size
78        `[batch_size]`, then the total loss for each sample of the batch is
79        rescaled by the corresponding element in the `sample_weight` vector. If
80        the shape of `sample_weight` matches the shape of `y_pred`, then the
81        loss of each measurable element of `y_pred` is scaled by the
82        corresponding value of `sample_weight`.
83
84    Returns:
85      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
86        shape as `y_true`; otherwise, it is scalar.
87
88    Raises:
89      ValueError: If the shape of `sample_weight` is invalid.
90    """
91    # If we are wrapping a lambda function strip '<>' from the name as it is not
92    # accepted in scope name.
93    scope_name = 'lambda' if self.name == '<lambda>' else self.name
94    with ops.name_scope(scope_name, format(self.__class__.__name__),
95                        (y_pred, y_true, sample_weight)):
96      losses = self.call(y_true, y_pred)
97      return losses_utils.compute_weighted_loss(
98          losses, sample_weight, reduction=self.reduction)
99
100  @classmethod
101  def from_config(cls, config):
102    """Instantiates a `Loss` from its config (output of `get_config()`).
103
104    Args:
105        config: Output of `get_config()`.
106
107    Returns:
108        A `Loss` instance.
109    """
110    return cls(**config)
111
112  def get_config(self):
113    return {'reduction': self.reduction, 'name': self.name}
114
115  @abc.abstractmethod
116  @doc_controls.for_subclass_implementers
117  def call(self, y_true, y_pred):
118    """Invokes the `Loss` instance.
119
120    Args:
121      y_true: Ground truth values, with the same shape as 'y_pred'.
122      y_pred: The predicted values.
123    """
124    NotImplementedError('Must be implemented in subclasses.')
125
126
127class LossFunctionWrapper(Loss):
128  """Wraps a loss function in the `Loss` class.
129
130  Args:
131    fn: The loss function to wrap, with signature `fn(y_true, y_pred,
132      **kwargs)`.
133    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
134      Default value is `SUM_OVER_BATCH_SIZE`.
135    name: (Optional) name for the loss.
136    **kwargs: The keyword arguments that are passed on to `fn`.
137  """
138
139  def __init__(self,
140               fn,
141               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
142               name=None,
143               **kwargs):
144    super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name)
145    self.fn = fn
146    self._fn_kwargs = kwargs
147
148  def call(self, y_true, y_pred):
149    """Invokes the `LossFunctionWrapper` instance.
150
151    Args:
152      y_true: Ground truth values.
153      y_pred: The predicted values.
154
155    Returns:
156      Loss values per sample.
157    """
158    return self.fn(y_true, y_pred, **self._fn_kwargs)
159
160  def get_config(self):
161    config = {}
162    for k, v in six.iteritems(self._fn_kwargs):
163      config[k] = K.eval(v) if is_tensor_or_variable(v) else v
164    base_config = super(LossFunctionWrapper, self).get_config()
165    return dict(list(base_config.items()) + list(config.items()))
166
167
168@keras_export('keras.losses.MeanSquaredError')
169class MeanSquaredError(LossFunctionWrapper):
170  """Computes the mean of squares of errors between labels and predictions.
171
172  For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.]
173  then the mean squared error value is 3/4 (0.75).
174
175  Usage:
176
177  ```python
178  mse = tf.keras.losses.MeanSquaredError()
179  loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.])
180  print('Loss: ', loss.numpy())  # Loss: 0.75
181  ```
182
183  Usage with tf.keras API:
184
185  ```python
186  model = tf.keras.Model(inputs, outputs)
187  model.compile('sgd', loss=tf.keras.losses.MeanSquaredError())
188  ```
189  """
190
191  def __init__(self,
192               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
193               name='mean_squared_error'):
194    super(MeanSquaredError, self).__init__(
195        mean_squared_error, name=name, reduction=reduction)
196
197
198@keras_export('keras.losses.MeanAbsoluteError')
199class MeanAbsoluteError(LossFunctionWrapper):
200  """Computes the mean of absolute difference between labels and predictions.
201
202  For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.]
203  then the mean absolute error value is 3/4 (0.75).
204
205  Usage:
206
207  ```python
208  mae = tf.keras.losses.MeanAbsoluteError()
209  loss = mae([0., 0., 1., 1.], [1., 1., 1., 0.])
210  print('Loss: ', loss.numpy())  # Loss: 0.75
211  ```
212
213  Usage with tf.keras API:
214
215  ```python
216  model = tf.keras.Model(inputs, outputs)
217  model.compile('sgd', loss=tf.keras.losses.MeanAbsoluteError())
218  ```
219  """
220
221  def __init__(self,
222               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
223               name='mean_absolute_error'):
224    super(MeanAbsoluteError, self).__init__(
225        mean_absolute_error, name=name, reduction=reduction)
226
227
228@keras_export('keras.losses.MeanAbsolutePercentageError')
229class MeanAbsolutePercentageError(LossFunctionWrapper):
230  """Computes the mean absolute percentage error between `y_true` and `y_pred`.
231
232  For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.]
233  then the mean absolute percentage error value is 5e+08.
234
235  Usage:
236
237  ```python
238  mape = tf.keras.losses.MeanAbsolutePercentageError()
239  loss = mape([0., 0., 1., 1.], [1., 1., 1., 0.])
240  print('Loss: ', loss.numpy())  # Loss: 5e+08
241  ```
242
243  Usage with tf.keras API:
244
245  ```python
246  model = tf.keras.Model(inputs, outputs)
247  model.compile('sgd', loss=tf.keras.losses.MeanAbsolutePercentageError())
248  ```
249  """
250
251  def __init__(self,
252               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
253               name='mean_absolute_percentage_error'):
254    super(MeanAbsolutePercentageError, self).__init__(
255        mean_absolute_percentage_error, name=name, reduction=reduction)
256
257
258@keras_export('keras.losses.MeanSquaredLogarithmicError')
259class MeanSquaredLogarithmicError(LossFunctionWrapper):
260  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
261
262  For example, if `y_true` is [0., 0., 1., 1.] and `y_pred` is [1., 1., 1., 0.]
263  then the mean squared logarithmic error value is 0.36034.
264
265  Usage:
266
267  ```python
268  msle = tf.keras.losses.MeanSquaredLogarithmicError()
269  loss = msle([0., 0., 1., 1.], [1., 1., 1., 0.])
270  print('Loss: ', loss.numpy())  # Loss: 0.36034
271  ```
272
273  Usage with tf.keras API:
274
275  ```python
276  model = tf.keras.Model(inputs, outputs)
277  model.compile('sgd', loss=tf.keras.losses.MeanSquaredLogarithmicError())
278  ```
279  """
280
281  def __init__(self,
282               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
283               name='mean_squared_logarithmic_error'):
284    super(MeanSquaredLogarithmicError, self).__init__(
285        mean_squared_logarithmic_error, name=name, reduction=reduction)
286
287
288@keras_export('keras.losses.BinaryCrossentropy')
289class BinaryCrossentropy(LossFunctionWrapper):
290  """Computes the crossentropy loss between the labels and predictions.
291
292  Use this crossentropy loss function when there are only two label classes
293  (assumed to be 0 and 1). There should be a single floating point value per
294  feature.
295
296  In the snippet below, there is a single floating pointing value per example,
297  and the shape of both `y_pred` and `y_true` are `[batch_size]`.
298
299  Usage:
300
301  ```python
302  bce = tf.keras.losses.BinaryCrossentropy()
303  loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.])
304  print('Loss: ', loss.numpy())  # Loss: 12.007
305  ```
306
307  Usage with tf.keras API:
308
309  ```python
310  model = tf.keras.Model(inputs, outputs)
311  model.compile('sgd', loss=tf.keras.losses.BinaryCrossentropy())
312  ```
313
314  Args:
315    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
316      we assume that `y_pred` encodes a probability distribution.
317    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
318    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
319      Default value is `SUM_OVER_BATCH_SIZE`.
320    name: Optional name for the op.
321  """
322
323  def __init__(self,
324               from_logits=False,
325               label_smoothing=0,
326               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
327               name='binary_crossentropy'):
328    super(BinaryCrossentropy, self).__init__(
329        binary_crossentropy,
330        name=name,
331        reduction=reduction,
332        from_logits=from_logits,
333        label_smoothing=label_smoothing)
334    self.from_logits = from_logits
335
336
337@keras_export('keras.losses.CategoricalCrossentropy')
338class CategoricalCrossentropy(LossFunctionWrapper):
339  """Computes the crossentropy loss between the labels and predictions.
340
341  Use this crossentropy loss function when there are two or more label classes.
342  We expect labels to be provided in a `one_hot` representation. If you want to
343  provide labels as integers, please use `SparseCategoricalCrossentropy` loss.
344  There should be `# classes` floating point values per feature.
345
346  In the snippet below, there is `# classes` floating pointing values per
347  example. The shape of both `y_pred` and `y_true` are
348  `[batch_size, num_classes]`.
349
350  Usage:
351
352  ```python
353  cce = tf.keras.losses.CategoricalCrossentropy()
354  loss = cce(
355    [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]],
356    [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
357  print('Loss: ', loss.numpy())  # Loss: 0.3239
358  ```
359
360  Usage with tf.keras API:
361
362  ```python
363  model = tf.keras.Model(inputs, outputs)
364  model.compile('sgd', loss=tf.keras.losses.CategoricalCrossentropy())
365  ```
366
367  Args:
368    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
369      we assume that `y_pred` encodes a probability distribution.
370    label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
371      meaning the confidence on label values are relaxed. e.g.
372      `label_smoothing=0.2` means that we will use a value of `0.1` for label
373      `0` and `0.9` for label `1`"
374    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
375      Default value is `SUM_OVER_BATCH_SIZE`.
376    name: Optional name for the op.
377  """
378
379  def __init__(self,
380               from_logits=False,
381               label_smoothing=0,
382               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
383               name='categorical_crossentropy'):
384    super(CategoricalCrossentropy, self).__init__(
385        categorical_crossentropy,
386        name=name,
387        reduction=reduction,
388        from_logits=from_logits,
389        label_smoothing=label_smoothing)
390
391
392@keras_export('keras.losses.SparseCategoricalCrossentropy')
393class SparseCategoricalCrossentropy(LossFunctionWrapper):
394  """Computes the crossentropy loss between the labels and predictions.
395
396  Use this crossentropy loss function when there are two or more label classes.
397  We expect labels to be provided as integers. If you want to provide labels
398  using `one-hot` representation, please use `CategoricalCrossentropy` loss.
399  There should be `# classes` floating point values per feature for `y_pred`
400  and a single floating point value per feature for `y_true`.
401
402  In the snippet below, there is a single floating point value per example for
403  `y_true` and `# classes` floating pointing values per example for `y_pred`.
404  The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
405  `[batch_size, num_classes]`.
406
407  Usage:
408
409  ```python
410  cce = tf.keras.losses.SparseCategoricalCrossentropy()
411  loss = cce(
412    [0, 1, 2],
413    [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
414  print('Loss: ', loss.numpy())  # Loss: 0.3239
415  ```
416
417  Usage with tf.keras API:
418
419  ```python
420  model = tf.keras.Model(inputs, outputs)
421  model.compile('sgd', loss=tf.keras.losses.SparseCategoricalCrossentropy())
422  ````
423
424  Args:
425    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
426      we assume that `y_pred` encodes a probability distribution.
427    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
428      Default value is `SUM_OVER_BATCH_SIZE`.
429    name: Optional name for the op.
430  """
431
432  def __init__(self,
433               from_logits=False,
434               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
435               name=None):
436    super(SparseCategoricalCrossentropy, self).__init__(
437        sparse_categorical_crossentropy,
438        name=name,
439        reduction=reduction,
440        from_logits=from_logits)
441
442
443@keras_export('keras.losses.Hinge')
444class Hinge(LossFunctionWrapper):
445  """Computes the hinge loss between `y_true` and `y_pred`.
446
447  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
448  provided we will convert them to -1 or 1.
449
450  Usage:
451
452  ```python
453  h = tf.keras.losses.Hinge()
454  loss = h([-1., 1., 1.], [0.6, -0.7, -0.5])
455
456  # loss = max(0, 1 - y_true * y_pred) = [1.6 + 1.7 + 1.5] / 3
457
458  print('Loss: ', loss.numpy())  # Loss: 1.6
459  ```
460
461  Usage with tf.keras API:
462
463  ```python
464  model = tf.keras.Model(inputs, outputs)
465  model.compile('sgd', loss=tf.keras.losses.Hinge())
466  ```
467  """
468
469  def __init__(self,
470               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
471               name=None):
472    super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
473
474
475@keras_export('keras.losses.SquaredHinge')
476class SquaredHinge(LossFunctionWrapper):
477  """Computes the squared hinge loss between `y_true` and `y_pred`.
478
479  `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
480  provided we will convert them to -1 or 1.
481
482  Usage:
483
484  ```python
485  sh = tf.keras.losses.SquaredHinge()
486  loss = sh([-1., 1., 1.], [0.6, -0.7, -0.5])
487
488  # loss = (max(0, 1 - y_true * y_pred))^2 = [1.6^2 + 1.7^2 + 1.5^2] / 3
489
490  print('Loss: ', loss.numpy())  # Loss: 2.566666
491  ```
492
493  Usage with tf.keras API:
494
495  ```python
496  model = tf.keras.Model(inputs, outputs)
497  model.compile('sgd', loss=tf.keras.losses.SquaredHinge())
498  ```
499  """
500
501  def __init__(self,
502               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
503               name='squared_hinge'):
504    super(SquaredHinge, self).__init__(
505        squared_hinge, name=name, reduction=reduction)
506
507
508@keras_export('keras.losses.CategoricalHinge')
509class CategoricalHinge(LossFunctionWrapper):
510  """Computes the categorical hinge loss between `y_true` and `y_pred`.
511
512  Usage:
513
514  ```python
515  ch = tf.keras.losses.CategoricalHinge()
516  loss = ch([0., 1., 1.], [1., 0., 1.])
517  print('Loss: ', loss.numpy())  # Loss: 1.0
518  ```
519
520  Usage with tf.keras API:
521
522  ```python
523  model = tf.keras.Model(inputs, outputs)
524  model.compile('sgd', loss=tf.keras.losses.CategoricalHinge())
525  ```
526  """
527
528  def __init__(self,
529               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
530               name='categorical_hinge'):
531    super(CategoricalHinge, self).__init__(
532        categorical_hinge, name=name, reduction=reduction)
533
534
535@keras_export('keras.losses.Poisson')
536class Poisson(LossFunctionWrapper):
537  """Computes the Poisson loss between `y_true` and `y_pred`.
538
539  `loss = y_pred - y_true * log(y_pred)`
540
541  Usage:
542
543  ```python
544  p = tf.keras.losses.Poisson()
545  loss = p([1, 9, 2], [4, 8, 12])
546  print('Loss: ', loss.numpy())  # Loss: -4.63
547  ```
548
549  Usage with tf.keras API:
550
551  ```python
552  model = tf.keras.Model(inputs, outputs)
553  model.compile('sgd', loss=tf.keras.losses.Poisson())
554  ```
555  """
556
557  def __init__(self,
558               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
559               name='poisson'):
560    super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
561
562
563@keras_export('keras.losses.LogCosh')
564class LogCosh(LossFunctionWrapper):
565  """Computes the logarithm of the hyperbolic cosine of the prediction error.
566
567  `logcosh = log((exp(x) + exp(-x))/2)`, where x is the error (y_pred - y_true)
568
569  Usage:
570
571  ```python
572  l = tf.keras.losses.LogCosh()
573  loss = l([0., 1., 1.], [1., 0., 1.])
574  print('Loss: ', loss.numpy())  # Loss: 0.289
575  ```
576
577  Usage with tf.keras API:
578
579  ```python
580  model = tf.keras.Model(inputs, outputs)
581  model.compile('sgd', loss=tf.keras.losses.LogCosh())
582  ```
583  """
584
585  def __init__(self,
586               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
587               name='logcosh'):
588    super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction)
589
590
591@keras_export('keras.losses.KLDivergence')
592class KLDivergence(LossFunctionWrapper):
593  """Computes Kullback Leibler divergence loss between `y_true` and `y_pred`.
594
595  `loss = y_true * log(y_true / y_pred)`
596
597  Usage:
598
599  ```python
600  k = tf.keras.losses.KLDivergence()
601  loss = k([.4, .9, .2], [.5, .8, .12])
602  print('Loss: ', loss.numpy())  # Loss: -0.043
603  ```
604
605  Usage with tf.keras API:
606
607  ```python
608  model = tf.keras.Model(inputs, outputs)
609  model.compile('sgd', loss=tf.keras.losses.KLDivergence())
610  ```
611  """
612
613  def __init__(self,
614               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
615               name='kullback_leibler_divergence'):
616    super(KLDivergence, self).__init__(
617        kullback_leibler_divergence, name=name, reduction=reduction)
618
619
620@keras_export('keras.losses.Huber')
621class Huber(LossFunctionWrapper):
622  """Computes the Huber loss between `y_true` and `y_pred`.
623
624  For each value x in `error=y_true-y_pred`, the following is calculated:
625
626  ```
627  0.5 * x^2                  if |x| <= d
628  0.5 * d^2 + d * (|x| - d)  if |x| > d
629  ```
630  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
631
632  Usage:
633
634  ```python
635  l = tf.keras.losses.Huber()
636  loss = l([0., 1., 1.], [1., 0., 1.])
637  print('Loss: ', loss.numpy())  # Loss: 0.333
638  ```
639
640  Usage with tf.keras API:
641
642  ```python
643  model = tf.keras.Model(inputs, outputs)
644  model.compile('sgd', loss=tf.keras.losses.Huber())
645  ```
646
647  Args:
648    delta: A float, the point where the Huber loss function changes from a
649      quadratic to linear.
650    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
651      Default value is `SUM_OVER_BATCH_SIZE`.
652    name: Optional name for the op.
653  """
654
655  def __init__(self,
656               delta=1.0,
657               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
658               name='huber_loss'):
659    super(Huber, self).__init__(
660        huber_loss, name=name, reduction=reduction, delta=delta)
661
662
663@keras_export('keras.metrics.mean_squared_error',
664              'keras.metrics.mse',
665              'keras.metrics.MSE',
666              'keras.losses.mean_squared_error',
667              'keras.losses.mse',
668              'keras.losses.MSE')
669def mean_squared_error(y_true, y_pred):
670  y_pred = ops.convert_to_tensor(y_pred)
671  y_true = math_ops.cast(y_true, y_pred.dtype)
672  return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
673
674
675@keras_export('keras.metrics.mean_absolute_error',
676              'keras.metrics.mae',
677              'keras.metrics.MAE',
678              'keras.losses.mean_absolute_error',
679              'keras.losses.mae',
680              'keras.losses.MAE')
681def mean_absolute_error(y_true, y_pred):
682  y_pred = ops.convert_to_tensor(y_pred)
683  y_true = math_ops.cast(y_true, y_pred.dtype)
684  return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
685
686
687@keras_export('keras.metrics.mean_absolute_percentage_error',
688              'keras.metrics.mape',
689              'keras.metrics.MAPE',
690              'keras.losses.mean_absolute_percentage_error',
691              'keras.losses.mape',
692              'keras.losses.MAPE')
693def mean_absolute_percentage_error(y_true, y_pred):  # pylint: disable=missing-docstring
694  y_pred = ops.convert_to_tensor(y_pred)
695  y_true = math_ops.cast(y_true, y_pred.dtype)
696  diff = math_ops.abs(
697      (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None))
698  return 100. * K.mean(diff, axis=-1)
699
700
701@keras_export('keras.metrics.mean_squared_logarithmic_error',
702              'keras.metrics.msle',
703              'keras.metrics.MSLE',
704              'keras.losses.mean_squared_logarithmic_error',
705              'keras.losses.msle',
706              'keras.losses.MSLE')
707def mean_squared_logarithmic_error(y_true, y_pred):  # pylint: disable=missing-docstring
708  y_pred = ops.convert_to_tensor(y_pred)
709  y_true = math_ops.cast(y_true, y_pred.dtype)
710  first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.)
711  second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.)
712  return K.mean(math_ops.squared_difference(first_log, second_log), axis=-1)
713
714
715def _maybe_convert_labels(y_true):
716  """Converts binary labels into -1/1."""
717  are_zeros = math_ops.equal(y_true, 0)
718  are_ones = math_ops.equal(y_true, 1)
719  is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones))
720
721  def _convert_binary_labels():
722    # Convert the binary labels to -1 or 1.
723    return 2. * y_true - 1.
724
725  updated_y_true = smart_cond.smart_cond(is_binary,
726                                         _convert_binary_labels, lambda: y_true)
727  return updated_y_true
728
729
730@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
731def squared_hinge(y_true, y_pred):
732  """Computes the squared hinge loss between `y_true` and `y_pred`.
733
734  Args:
735    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
736      If binary (0 or 1) labels are provided we will convert them to -1 or 1.
737    y_pred: The predicted values.
738
739  Returns:
740    Tensor with one scalar loss entry per sample.
741  """
742  y_pred = ops.convert_to_tensor(y_pred)
743  y_true = math_ops.cast(y_true, y_pred.dtype)
744  y_true = _maybe_convert_labels(y_true)
745  return K.mean(
746      math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1)
747
748
749@keras_export('keras.metrics.hinge', 'keras.losses.hinge')
750def hinge(y_true, y_pred):
751  """Computes the hinge loss between `y_true` and `y_pred`.
752
753  Args:
754    y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
755      If binary (0 or 1) labels are provided we will convert them to -1 or 1.
756    y_pred: The predicted values.
757
758  Returns:
759    Tensor with one scalar loss entry per sample.
760  """
761  y_pred = ops.convert_to_tensor(y_pred)
762  y_true = math_ops.cast(y_true, y_pred.dtype)
763  y_true = _maybe_convert_labels(y_true)
764  return K.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1)
765
766
767@keras_export('keras.losses.categorical_hinge')
768def categorical_hinge(y_true, y_pred):
769  y_pred = ops.convert_to_tensor(y_pred)
770  y_true = math_ops.cast(y_true, y_pred.dtype)
771  pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
772  neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
773  return math_ops.maximum(0., neg - pos + 1.)
774
775
776def huber_loss(y_true, y_pred, delta=1.0):
777  """Computes Huber loss value.
778
779  For each value x in `error=y_true-y_pred`, the following is calculated:
780
781  ```
782  0.5 * x^2                  if |x| <= d
783  0.5 * d^2 + d * (|x| - d)  if |x| > d
784  ```
785  where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss
786
787  Args:
788    y_true: tensor of true targets.
789    y_pred: tensor of predicted targets.
790    delta: A float, the point where the Huber loss function changes from a
791      quadratic to linear.
792
793  Returns:
794    Tensor with one scalar loss entry per sample.
795  """
796  y_pred = math_ops.cast(y_pred, dtype=K.floatx())
797  y_true = math_ops.cast(y_true, dtype=K.floatx())
798  error = math_ops.subtract(y_pred, y_true)
799  abs_error = math_ops.abs(error)
800  quadratic = math_ops.minimum(abs_error, delta)
801  linear = math_ops.subtract(abs_error, quadratic)
802  return math_ops.add(
803      math_ops.multiply(
804          ops.convert_to_tensor(0.5, dtype=quadratic.dtype),
805          math_ops.multiply(quadratic, quadratic)),
806      math_ops.multiply(delta, linear))
807
808
809@keras_export('keras.losses.logcosh')
810def logcosh(y_true, y_pred):
811  """Logarithm of the hyperbolic cosine of the prediction error.
812
813  `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
814  to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
815  like the mean squared error, but will not be so strongly affected by the
816  occasional wildly incorrect prediction.
817
818  Arguments:
819      y_true: tensor of true targets.
820      y_pred: tensor of predicted targets.
821
822  Returns:
823      Tensor with one scalar loss entry per sample.
824  """
825  y_pred = ops.convert_to_tensor(y_pred)
826  y_true = math_ops.cast(y_true, y_pred.dtype)
827
828  def _logcosh(x):
829    return x + nn.softplus(-2. * x) - math_ops.log(2.)
830
831  return K.mean(_logcosh(y_pred - y_true), axis=-1)
832
833
834@keras_export('keras.metrics.categorical_crossentropy',
835              'keras.losses.categorical_crossentropy')
836def categorical_crossentropy(y_true,
837                             y_pred,
838                             from_logits=False,
839                             label_smoothing=0):
840  """Computes the categorical crossentropy loss.
841
842  Args:
843    y_true: tensor of true targets.
844    y_pred: tensor of predicted targets.
845    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
846      we assume that `y_pred` encodes a probability distribution.
847    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
848
849  Returns:
850    Categorical crossentropy loss value.
851  """
852  y_pred = ops.convert_to_tensor(y_pred)
853  y_true = math_ops.cast(y_true, y_pred.dtype)
854  label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx())
855
856  def _smooth_labels():
857    num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype)
858    return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)
859
860  y_true = smart_cond.smart_cond(label_smoothing,
861                                 _smooth_labels, lambda: y_true)
862  return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
863
864
865@keras_export('keras.metrics.sparse_categorical_crossentropy',
866              'keras.losses.sparse_categorical_crossentropy')
867def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
868  return K.sparse_categorical_crossentropy(
869      y_true, y_pred, from_logits=from_logits, axis=axis)
870
871
872@keras_export('keras.metrics.binary_crossentropy',
873              'keras.losses.binary_crossentropy')
874def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):  # pylint: disable=missing-docstring
875  y_pred = ops.convert_to_tensor(y_pred)
876  y_true = math_ops.cast(y_true, y_pred.dtype)
877  label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx())
878
879  def _smooth_labels():
880    return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing
881
882  y_true = smart_cond.smart_cond(label_smoothing,
883                                 _smooth_labels, lambda: y_true)
884  return K.mean(
885      K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
886
887
888@keras_export('keras.metrics.kullback_leibler_divergence',
889              'keras.metrics.kld',
890              'keras.metrics.KLD',
891              'keras.losses.kullback_leibler_divergence',
892              'keras.losses.kld',
893              'keras.losses.KLD')
894def kullback_leibler_divergence(y_true, y_pred):  # pylint: disable=missing-docstring
895  y_pred = ops.convert_to_tensor(y_pred)
896  y_true = math_ops.cast(y_true, y_pred.dtype)
897  y_true = K.clip(y_true, K.epsilon(), 1)
898  y_pred = K.clip(y_pred, K.epsilon(), 1)
899  return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
900
901
902@keras_export('keras.metrics.poisson', 'keras.losses.poisson')
903def poisson(y_true, y_pred):
904  y_pred = ops.convert_to_tensor(y_pred)
905  y_true = math_ops.cast(y_true, y_pred.dtype)
906  return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1)
907
908
909# Retaining the legacy namespaces: 'cosine_proximity' and 'cosine'.
910# TODO(psv): Change name of this function to `cosine_similarity` after fixing
911# estimator test.
912@keras_export(
913    'keras.losses.cosine_similarity',
914    v1=[
915        'keras.metrics.cosine_proximity',
916        'keras.metrics.cosine',
917        'keras.losses.cosine_proximity',
918        'keras.losses.cosine',
919        'keras.losses.cosine_similarity',
920    ])
921def cosine_proximity(y_true, y_pred, axis=-1):
922  """Computes the cosine similarity between labels and predictions."""
923  y_true = nn.l2_normalize(y_true, axis=axis)
924  y_pred = nn.l2_normalize(y_pred, axis=axis)
925  return math_ops.reduce_sum(y_true * y_pred, axis=axis)
926
927
928@keras_export('keras.losses.CosineSimilarity')
929class CosineSimilarity(LossFunctionWrapper):
930  """Computes the cosine similarity between `y_true` and `y_pred`.
931
932  Usage:
933
934  ```python
935  cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
936  loss = cosine_loss([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]])
937  # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
938  # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
939  # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
940  # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
941         = ((0. + 0.) +  (0.5 + 0.5)) / 2
942
943  print('Loss: ', loss.numpy())  # Loss: 0.5
944  ```
945
946  Usage with tf.keras API:
947
948  ```python
949  model = tf.keras.Model(inputs, outputs)
950  model.compile('sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))
951  ```
952
953  Args:
954    axis: (Optional) Defaults to -1. The dimension along which the cosine
955      similarity is computed.
956    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
957      Default value is `SUM_OVER_BATCH_SIZE`.
958    name: Optional name for the op.
959  """
960
961  def __init__(self,
962               axis=-1,
963               reduction=losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE,
964               name='cosine_similarity'):
965    super(CosineSimilarity, self).__init__(
966        cosine_similarity, reduction=reduction, name=name, axis=axis)
967
968
969# Aliases.
970
971mse = MSE = mean_squared_error
972mae = MAE = mean_absolute_error
973mape = MAPE = mean_absolute_percentage_error
974msle = MSLE = mean_squared_logarithmic_error
975kld = KLD = kullback_leibler_divergence
976cosine_similarity = cosine_proximity
977
978
979def is_categorical_crossentropy(loss):
980  result = ((isinstance(loss, CategoricalCrossentropy) or
981             (isinstance(loss, LossFunctionWrapper) and
982              loss.fn == categorical_crossentropy) or
983             (hasattr(loss, '__name__') and
984              loss.__name__ == 'categorical_crossentropy') or
985             (loss == 'categorical_crossentropy')))
986  return result
987
988
989@keras_export('keras.losses.serialize')
990def serialize(loss):
991  return serialize_keras_object(loss)
992
993
994@keras_export('keras.losses.deserialize')
995def deserialize(name, custom_objects=None):
996  return deserialize_keras_object(
997      name,
998      module_objects=globals(),
999      custom_objects=custom_objects,
1000      printable_module_name='loss function')
1001
1002
1003@keras_export('keras.losses.get')
1004def get(identifier):
1005  if identifier is None:
1006    return None
1007  if isinstance(identifier, six.string_types):
1008    identifier = str(identifier)
1009    return deserialize(identifier)
1010  if isinstance(identifier, dict):
1011    return deserialize(identifier)
1012  elif callable(identifier):
1013    return identifier
1014  else:
1015    raise ValueError('Could not interpret '
1016                     'loss function identifier:', identifier)
1017
1018
1019LABEL_DTYPES_FOR_LOSSES = {
1020    losses_impl.sparse_softmax_cross_entropy: 'int32',
1021    sparse_categorical_crossentropy: 'int32'
1022}
1023