1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Weight broadcasting operations.
16
17In `tf.losses` and `tf.metrics`, we support limited weight broadcasting. This
18file includes operations for those broadcasting rules.
19"""
20
21from __future__ import absolute_import
22from __future__ import division
23from __future__ import print_function
24
25from tensorflow.python.framework import ops
26from tensorflow.python.framework import tensor_util
27from tensorflow.python.ops import array_ops
28from tensorflow.python.ops import control_flow_ops
29from tensorflow.python.ops import math_ops
30from tensorflow.python.ops import sets
31from tensorflow.python.util.tf_export import tf_export
32
33
34def _has_valid_dims(weights_shape, values_shape):
35  with ops.name_scope(
36      None, "has_invalid_dims", (weights_shape, values_shape)) as scope:
37    values_shape_2d = array_ops.expand_dims(values_shape, -1)
38    valid_dims = array_ops.concat(
39        (values_shape_2d, array_ops.ones_like(values_shape_2d)), axis=1)
40    weights_shape_2d = array_ops.expand_dims(weights_shape, -1)
41    invalid_dims = sets.set_difference(weights_shape_2d, valid_dims)
42    num_invalid_dims = array_ops.size(
43        invalid_dims.values, name="num_invalid_dims")
44    return math_ops.equal(0, num_invalid_dims, name=scope)
45
46
47def _has_valid_nonscalar_shape(
48    weights_rank, weights_shape, values_rank, values_shape):
49  with ops.name_scope(
50      None, "has_valid_nonscalar_shape",
51      (weights_rank, weights_shape, values_rank, values_shape)) as scope:
52    is_same_rank = math_ops.equal(
53        values_rank, weights_rank, name="is_same_rank")
54    return control_flow_ops.cond(
55        is_same_rank,
56        lambda: _has_valid_dims(weights_shape, values_shape),
57        lambda: is_same_rank,
58        name=scope)
59
60
61_ASSERT_BROADCASTABLE_ERROR_PREFIX = "weights can not be broadcast to values."
62
63
64def assert_broadcastable(weights, values):
65  """Asserts `weights` can be broadcast to `values`.
66
67  In `tf.losses` and `tf.metrics`, we support limited weight broadcasting. We
68  let weights be either scalar, or the same rank as the target values, with each
69  dimension either 1, or the same as the corresponding values dimension.
70
71  Args:
72    weights: `Tensor` of weights.
73    values: `Tensor` of values to which weights are applied.
74
75  Returns:
76    `Operation` raising `InvalidArgumentError` if `weights` has incorrect shape.
77    `no_op` if static checks determine `weights` has correct shape.
78
79  Raises:
80    ValueError:  If static checks determine `weights` has incorrect shape.
81  """
82  with ops.name_scope(None, "assert_broadcastable", (weights, values)) as scope:
83    with ops.name_scope(None, "weights", (weights,)) as weights_scope:
84      weights = ops.convert_to_tensor(weights, name=weights_scope)
85      weights_shape = array_ops.shape(weights, name="shape")
86      weights_rank = array_ops.rank(weights, name="rank")
87    weights_rank_static = tensor_util.constant_value(weights_rank)
88
89    with ops.name_scope(None, "values", (values,)) as values_scope:
90      values = ops.convert_to_tensor(values, name=values_scope)
91      values_shape = array_ops.shape(values, name="shape")
92      values_rank = array_ops.rank(values, name="rank")
93    values_rank_static = tensor_util.constant_value(values_rank)
94
95    # Try static checks.
96    if weights_rank_static is not None and values_rank_static is not None:
97      if weights_rank_static == 0:
98        return control_flow_ops.no_op(name="static_scalar_check_success")
99      if weights_rank_static != values_rank_static:
100        raise ValueError(
101            "%s values.rank=%s. weights.rank=%s."
102            " values.shape=%s. weights.shape=%s." % (
103                _ASSERT_BROADCASTABLE_ERROR_PREFIX, values_rank_static,
104                weights_rank_static, values.shape, weights.shape))
105      weights_shape_static = tensor_util.constant_value(weights_shape)
106      values_shape_static = tensor_util.constant_value(values_shape)
107      if weights_shape_static is not None and values_shape_static is not None:
108        # Sanity check, this should always be true since we checked rank above.
109        ndims = len(values_shape_static)
110        assert ndims == len(weights_shape_static)
111
112        for i in range(ndims):
113          if weights_shape_static[i] not in (1, values_shape_static[i]):
114            raise ValueError(
115                "%s Mismatch at dim %s. values.shape=%s weights.shape=%s." % (
116                    _ASSERT_BROADCASTABLE_ERROR_PREFIX, i, values_shape_static,
117                    weights_shape_static))
118        return control_flow_ops.no_op(name="static_dims_check_success")
119
120    # Dynamic checks.
121    is_scalar = math_ops.equal(0, weights_rank, name="is_scalar")
122    data = (
123        _ASSERT_BROADCASTABLE_ERROR_PREFIX,
124        "weights.shape=", weights.name, weights_shape,
125        "values.shape=", values.name, values_shape,
126        "is_scalar=", is_scalar,
127    )
128    is_valid_shape = control_flow_ops.cond(
129        is_scalar,
130        lambda: is_scalar,
131        lambda: _has_valid_nonscalar_shape(  # pylint: disable=g-long-lambda
132            weights_rank, weights_shape, values_rank, values_shape),
133        name="is_valid_shape")
134    return control_flow_ops.Assert(is_valid_shape, data, name=scope)
135
136
137@tf_export("__internal__.ops.broadcast_weights", v1=[])
138def broadcast_weights(weights, values):
139  """Broadcast `weights` to the same shape as `values`.
140
141  This returns a version of `weights` following the same broadcast rules as
142  `mul(weights, values)`, but limited to the weights shapes allowed by
143  `assert_broadcastable`. When computing a weighted average, use this function
144  to broadcast `weights` before summing them; e.g.,
145  `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
146
147  Args:
148    weights: `Tensor` whose shape is broadcastable to `values` according to the
149      rules of `assert_broadcastable`.
150    values: `Tensor` of any shape.
151
152  Returns:
153    `weights` broadcast to `values` shape according to the rules of
154      `assert_broadcastable`.
155  """
156  with ops.name_scope(None, "broadcast_weights", (weights, values)) as scope:
157    values = ops.convert_to_tensor(values, name="values")
158    weights = ops.convert_to_tensor(
159        weights, dtype=values.dtype.base_dtype, name="weights")
160
161    # Try static check for exact match.
162    weights_shape = weights.get_shape()
163    values_shape = values.get_shape()
164    if (weights_shape.is_fully_defined() and
165        values_shape.is_fully_defined() and
166        weights_shape.is_compatible_with(values_shape)):
167      return weights
168
169    # Skip the assert_broadcastable on TPU/GPU because asserts are not
170    # supported so it only causes unnecessary ops. Also skip it because it uses
171    # a DenseToDenseSetOperation op that is incompatible with the TPU/GPU when
172    # the shape(s) are dynamic.
173    if control_flow_ops.get_enclosing_xla_context() is not None:
174      return math_ops.multiply(
175          weights, array_ops.ones_like(values), name=scope)
176    with ops.control_dependencies((assert_broadcastable(weights, values),)):
177      return math_ops.multiply(
178          weights, array_ops.ones_like(values), name=scope)
179