1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Utilities for manipulating the loss collections.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.python.eager import context 22from tensorflow.python.framework import constant_op 23from tensorflow.python.framework import dtypes 24from tensorflow.python.framework import ops 25from tensorflow.python.ops import array_ops 26from tensorflow.python.ops import check_ops 27from tensorflow.python.ops import confusion_matrix 28from tensorflow.python.ops import control_flow_ops 29from tensorflow.python.ops import math_ops 30from tensorflow.python.util import tf_contextlib 31from tensorflow.python.util.tf_export import tf_export 32 33 34def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): 35 """Squeeze or expand last dimension if needed. 36 37 1. Squeezes last dim of `y_pred` or `y_true` if their rank differs by 1 38 (using `confusion_matrix.remove_squeezable_dimensions`). 39 2. Squeezes or expands last dim of `sample_weight` if its rank differs by 1 40 from the new rank of `y_pred`. 41 If `sample_weight` is scalar, it is kept scalar. 42 43 This will use static shape if available. Otherwise, it will add graph 44 operations, which could result in a performance hit. 45 46 Args: 47 y_pred: Predicted values, a `Tensor` of arbitrary dimensions. 48 y_true: Optional label `Tensor` whose dimensions match `y_pred`. 49 sample_weight: Optional weight scalar or `Tensor` whose dimensions match 50 `y_pred`. 51 52 Returns: 53 Tuple of `y_pred`, `y_true` and `sample_weight`. Each of them possibly has 54 the last dimension squeezed, 55 `sample_weight` could be extended by one dimension. 56 If `sample_weight` is None, (y_pred, y_true) is returned. 57 """ 58 y_pred_shape = y_pred.shape 59 y_pred_rank = y_pred_shape.ndims 60 if y_true is not None: 61 62 # If sparse matrix is provided as `y_true`, the last dimension in `y_pred` 63 # may be > 1. Eg: y_true = [0, 1, 2] (shape=(3,)), 64 # y_pred = [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]] (shape=(3, 3)) 65 # In this case, we should not try to remove squeezable dimension. 66 y_true_shape = y_true.shape 67 y_true_rank = y_true_shape.ndims 68 if (y_true_rank is not None) and (y_pred_rank is not None): 69 # Use static rank for `y_true` and `y_pred`. 70 if (y_pred_rank - y_true_rank != 1) or y_pred_shape[-1] == 1: 71 y_true, y_pred = confusion_matrix.remove_squeezable_dimensions( 72 y_true, y_pred) 73 else: 74 # Use dynamic rank. 75 rank_diff = array_ops.rank(y_pred) - array_ops.rank(y_true) 76 squeeze_dims = lambda: confusion_matrix.remove_squeezable_dimensions( # pylint: disable=g-long-lambda 77 y_true, y_pred) 78 is_last_dim_1 = math_ops.equal(1, array_ops.shape(y_pred)[-1]) 79 maybe_squeeze_dims = lambda: control_flow_ops.cond( # pylint: disable=g-long-lambda 80 is_last_dim_1, squeeze_dims, lambda: (y_true, y_pred)) 81 y_true, y_pred = control_flow_ops.cond( 82 math_ops.equal(1, rank_diff), maybe_squeeze_dims, squeeze_dims) 83 84 if sample_weight is None: 85 return y_pred, y_true 86 87 weights_shape = sample_weight.shape 88 weights_rank = weights_shape.ndims 89 if weights_rank == 0: # If weights is scalar, do nothing. 90 return y_pred, y_true, sample_weight 91 92 if (y_pred_rank is not None) and (weights_rank is not None): 93 # Use static rank. 94 if weights_rank - y_pred_rank == 1: 95 sample_weight = array_ops.squeeze(sample_weight, [-1]) 96 elif y_pred_rank - weights_rank == 1: 97 sample_weight = array_ops.expand_dims(sample_weight, [-1]) 98 return y_pred, y_true, sample_weight 99 100 # Use dynamic rank. 101 weights_rank_tensor = array_ops.rank(sample_weight) 102 rank_diff = weights_rank_tensor - array_ops.rank(y_pred) 103 maybe_squeeze_weights = lambda: array_ops.squeeze(sample_weight, [-1]) 104 105 def _maybe_expand_weights(): 106 expand_weights = lambda: array_ops.expand_dims(sample_weight, [-1]) 107 return control_flow_ops.cond( 108 math_ops.equal(rank_diff, -1), expand_weights, lambda: sample_weight) 109 110 def _maybe_adjust_weights(): 111 return control_flow_ops.cond( 112 math_ops.equal(rank_diff, 1), maybe_squeeze_weights, 113 _maybe_expand_weights) 114 115 # squeeze or expand last dim of `sample_weight` if its rank differs by 1 116 # from the new rank of `y_pred`. 117 sample_weight = control_flow_ops.cond( 118 math_ops.equal(weights_rank_tensor, 0), lambda: sample_weight, 119 _maybe_adjust_weights) 120 return y_pred, y_true, sample_weight 121 122 123def scale_losses_by_sample_weight(losses, sample_weight): 124 """Scales loss values by the given sample weights. 125 126 `sample_weight` dimensions are updated to match with the dimension of `losses` 127 if possible by using squeeze/expand/broadcast. 128 129 Args: 130 losses: Loss tensor. 131 sample_weight: Sample weights tensor. 132 133 Returns: 134 `losses` scaled by `sample_weight` with dtype float32. 135 """ 136 # TODO(psv): Handle the casting here in a better way, eg. if losses is float64 137 # we do not want to lose precision. 138 losses = math_ops.cast(losses, dtypes.float32) 139 sample_weight = math_ops.cast(sample_weight, dtypes.float32) 140 141 # Update dimensions of `sample_weight` to match with `losses` if possible. 142 losses, _, sample_weight = squeeze_or_expand_dimensions( 143 losses, None, sample_weight) 144 return math_ops.multiply(losses, sample_weight) 145 146 147@tf_contextlib.contextmanager 148def check_per_example_loss_rank(per_example_loss): 149 """Context manager that checks that the rank of per_example_loss is at least 1. 150 151 Args: 152 per_example_loss: Per example loss tensor. 153 154 Yields: 155 A context manager. 156 """ 157 loss_rank = per_example_loss.shape.rank 158 if loss_rank is not None: 159 # Handle static rank. 160 if loss_rank == 0: 161 raise ValueError( 162 "Invalid value passed for `per_example_loss`. Expected a tensor with " 163 "at least rank 1, received: {}".format(per_example_loss)) 164 yield 165 else: 166 # Handle dynamic rank. 167 with ops.control_dependencies([ 168 check_ops.assert_greater_equal( 169 array_ops.rank(per_example_loss), 170 math_ops.cast(1, dtype=dtypes.int32), 171 message="Invalid value passed for `per_example_loss`. Expected a " 172 "tensor with at least rank 1.") 173 ]): 174 yield 175 176 177@tf_export(v1=["losses.add_loss"]) 178def add_loss(loss, loss_collection=ops.GraphKeys.LOSSES): 179 """Adds a externally defined loss to the collection of losses. 180 181 Args: 182 loss: A loss `Tensor`. 183 loss_collection: Optional collection to add the loss to. 184 """ 185 # Since we have no way of figuring out when a training iteration starts or 186 # ends, holding on to a loss when executing eagerly is indistinguishable from 187 # leaking memory. We instead leave the collection empty. 188 if loss_collection and not context.executing_eagerly(): 189 ops.add_to_collection(loss_collection, loss) 190 191 192@tf_export(v1=["losses.get_losses"]) 193def get_losses(scope=None, loss_collection=ops.GraphKeys.LOSSES): 194 """Gets the list of losses from the loss_collection. 195 196 Args: 197 scope: An optional scope name for filtering the losses to return. 198 loss_collection: Optional losses collection. 199 200 Returns: 201 a list of loss tensors. 202 """ 203 return ops.get_collection(loss_collection, scope) 204 205 206@tf_export(v1=["losses.get_regularization_losses"]) 207def get_regularization_losses(scope=None): 208 """Gets the list of regularization losses. 209 210 Args: 211 scope: An optional scope name for filtering the losses to return. 212 213 Returns: 214 A list of regularization losses as Tensors. 215 """ 216 return ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES, scope) 217 218 219@tf_export(v1=["losses.get_regularization_loss"]) 220def get_regularization_loss(scope=None, name="total_regularization_loss"): 221 """Gets the total regularization loss. 222 223 Args: 224 scope: An optional scope name for filtering the losses to return. 225 name: The name of the returned tensor. 226 227 Returns: 228 A scalar regularization loss. 229 """ 230 losses = get_regularization_losses(scope) 231 if losses: 232 return math_ops.add_n(losses, name=name) 233 else: 234 return constant_op.constant(0.0) 235 236 237@tf_export(v1=["losses.get_total_loss"]) 238def get_total_loss(add_regularization_losses=True, 239 name="total_loss", 240 scope=None): 241 """Returns a tensor whose value represents the total loss. 242 243 In particular, this adds any losses you have added with `tf.add_loss()` to 244 any regularization losses that have been added by regularization parameters 245 on layers constructors e.g. `tf.layers`. Be very sure to use this if you 246 are constructing a loss_op manually. Otherwise regularization arguments 247 on `tf.layers` methods will not function. 248 249 Args: 250 add_regularization_losses: A boolean indicating whether or not to use the 251 regularization losses in the sum. 252 name: The name of the returned tensor. 253 scope: An optional scope name for filtering the losses to return. Note that 254 this filters the losses added with `tf.add_loss()` as well as the 255 regularization losses to that scope. 256 257 Returns: 258 A `Tensor` whose value represents the total loss. 259 260 Raises: 261 ValueError: if `losses` is not iterable. 262 """ 263 losses = get_losses(scope=scope) 264 if add_regularization_losses: 265 losses += get_regularization_losses(scope=scope) 266 return math_ops.add_n(losses, name=name) 267