1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Operations for linear algebra."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import numpy as np
22
23from tensorflow.python.framework import dtypes
24from tensorflow.python.framework import ops
25from tensorflow.python.ops import array_ops
26from tensorflow.python.ops import control_flow_ops
27from tensorflow.python.ops import gen_linalg_ops
28from tensorflow.python.ops import linalg_ops_impl
29from tensorflow.python.ops import map_fn
30from tensorflow.python.ops import math_ops
31# pylint: disable=wildcard-import
32from tensorflow.python.ops.gen_linalg_ops import *
33# pylint: enable=wildcard-import
34from tensorflow.python.util import deprecation
35from tensorflow.python.util.tf_export import tf_export
36
37# Names below are lower_case.
38# pylint: disable=invalid-name
39
40
41def _RegularizedGramianCholesky(matrix, l2_regularizer, first_kind):
42  r"""Computes Cholesky factorization of regularized gramian matrix.
43
44  Below we will use the following notation for each pair of matrix and
45  right-hand sides in the batch:
46
47  `matrix`=\\(A \in \Re^{m \times n}\\),
48  `output`=\\(C  \in \Re^{\min(m, n) \times \min(m,n)}\\),
49  `l2_regularizer`=\\(\lambda\\).
50
51  If `first_kind` is True, returns the Cholesky factorization \\(L\\) such that
52  \\(L L^H =  A^H A + \lambda I\\).
53  If `first_kind` is False, returns the Cholesky factorization \\(L\\) such that
54  \\(L L^H =  A A^H + \lambda I\\).
55
56  Args:
57    matrix: `Tensor` of shape `[..., M, N]`.
58    l2_regularizer: 0-D `double` `Tensor`. Ignored if `fast=False`.
59    first_kind: bool. Controls what gramian matrix to factor.
60  Returns:
61    output: `Tensor` of shape `[..., min(M,N), min(M,N)]` whose inner-most 2
62      dimensions contain the Cholesky factors \\(L\\) described above.
63  """
64
65  gramian = math_ops.matmul(
66      matrix, matrix, adjoint_a=first_kind, adjoint_b=not first_kind)
67  if isinstance(l2_regularizer, ops.Tensor) or l2_regularizer != 0:
68    matrix_shape = array_ops.shape(matrix)
69    batch_shape = matrix_shape[:-2]
70    if first_kind:
71      small_dim = matrix_shape[-1]
72    else:
73      small_dim = matrix_shape[-2]
74    identity = eye(small_dim, batch_shape=batch_shape, dtype=matrix.dtype)
75    small_dim_static = matrix.shape[-1 if first_kind else -2]
76    identity.set_shape(
77        matrix.shape[:-2].concatenate([small_dim_static, small_dim_static]))
78    gramian += l2_regularizer * identity
79  return gen_linalg_ops.cholesky(gramian)
80
81
82@tf_export(
83    'linalg.cholesky_solve', v1=['linalg.cholesky_solve', 'cholesky_solve'])
84@deprecation.deprecated_endpoints('cholesky_solve')
85def cholesky_solve(chol, rhs, name=None):
86  """Solves systems of linear eqns `A X = RHS`, given Cholesky factorizations.
87
88  ```python
89  # Solve 10 separate 2x2 linear systems:
90  A = ... # shape 10 x 2 x 2
91  RHS = ... # shape 10 x 2 x 1
92  chol = tf.cholesky(A)  # shape 10 x 2 x 2
93  X = tf.cholesky_solve(chol, RHS)  # shape 10 x 2 x 1
94  # tf.matmul(A, X) ~ RHS
95  X[3, :, 0]  # Solution to the linear system A[3, :, :] x = RHS[3, :, 0]
96
97  # Solve five linear systems (K = 5) for every member of the length 10 batch.
98  A = ... # shape 10 x 2 x 2
99  RHS = ... # shape 10 x 2 x 5
100  ...
101  X[3, :, 2]  # Solution to the linear system A[3, :, :] x = RHS[3, :, 2]
102  ```
103
104  Args:
105    chol:  A `Tensor`.  Must be `float32` or `float64`, shape is `[..., M, M]`.
106      Cholesky factorization of `A`, e.g. `chol = tf.cholesky(A)`.
107      For that reason, only the lower triangular parts (including the diagonal)
108      of the last two dimensions of `chol` are used.  The strictly upper part is
109      assumed to be zero and not accessed.
110    rhs:  A `Tensor`, same type as `chol`, shape is `[..., M, K]`.
111    name:  A name to give this `Op`.  Defaults to `cholesky_solve`.
112
113  Returns:
114    Solution to `A x = rhs`, shape `[..., M, K]`.
115  """
116  # To solve C C^* x = rhs, we
117  # 1. Solve C y = rhs for y, thus y = C^* x
118  # 2. Solve C^* x = y for x
119  with ops.name_scope(name, 'cholesky_solve', [chol, rhs]):
120    y = gen_linalg_ops.matrix_triangular_solve(
121        chol, rhs, adjoint=False, lower=True)
122    x = gen_linalg_ops.matrix_triangular_solve(
123        chol, y, adjoint=True, lower=True)
124    return x
125
126
127@tf_export('eye', 'linalg.eye')
128def eye(num_rows,
129        num_columns=None,
130        batch_shape=None,
131        dtype=dtypes.float32,
132        name=None):
133  """Construct an identity matrix, or a batch of matrices.
134
135  ```python
136  # Construct one identity matrix.
137  tf.eye(2)
138  ==> [[1., 0.],
139       [0., 1.]]
140
141  # Construct a batch of 3 identity matricies, each 2 x 2.
142  # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
143  batch_identity = tf.eye(2, batch_shape=[3])
144
145  # Construct one 2 x 3 "identity" matrix
146  tf.eye(2, num_columns=3)
147  ==> [[ 1.,  0.,  0.],
148       [ 0.,  1.,  0.]]
149  ```
150
151  Args:
152    num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows
153      in each batch matrix.
154    num_columns: Optional non-negative `int32` scalar `Tensor` giving the number
155      of columns in each batch matrix.  Defaults to `num_rows`.
156    batch_shape:  A list or tuple of Python integers or a 1-D `int32` `Tensor`.
157      If provided, the returned `Tensor` will have leading batch dimensions of
158      this shape.
159    dtype:  The type of an element in the resulting `Tensor`
160    name:  A name for this `Op`.  Defaults to "eye".
161
162  Returns:
163    A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
164  """
165  return linalg_ops_impl.eye(num_rows,
166                             num_columns=num_columns,
167                             batch_shape=batch_shape,
168                             dtype=dtype,
169                             name=name)
170
171
172@tf_export('linalg.lstsq', v1=['linalg.lstsq', 'matrix_solve_ls'])
173@deprecation.deprecated_endpoints('matrix_solve_ls')
174def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
175  r"""Solves one or more linear least-squares problems.
176
177  `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
178  form `M`-by-`N` matrices. Rhs is a tensor of shape `[..., M, K]` whose
179  inner-most 2 dimensions form `M`-by-`K` matrices.  The computed output is a
180  `Tensor` of shape `[..., N, K]` whose inner-most 2 dimensions form `M`-by-`K`
181  matrices that solve the equations
182  `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least squares
183  sense.
184
185  Below we will use the following notation for each pair of matrix and
186  right-hand sides in the batch:
187
188  `matrix`=\\(A \in \Re^{m \times n}\\),
189  `rhs`=\\(B  \in \Re^{m \times k}\\),
190  `output`=\\(X  \in \Re^{n \times k}\\),
191  `l2_regularizer`=\\(\lambda\\).
192
193  If `fast` is `True`, then the solution is computed by solving the normal
194  equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
195  \\(X = (A^T A + \lambda I)^{-1} A^T B\\), which solves the least-squares
196  problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k}} ||A Z - B||_F^2 +
197  \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as
198  \\(X = A^T (A A^T + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is
199  the minimum-norm solution to the under-determined linear system, i.e.
200  \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k}} ||Z||_F^2 \\), subject to
201  \\(A Z = B\\). Notice that the fast path is only numerically stable when
202  \\(A\\) is numerically full rank and has a condition number
203  \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach}}}\\) or\\(\lambda\\)
204  is sufficiently large.
205
206  If `fast` is `False` an algorithm based on the numerically robust complete
207  orthogonal decomposition is used. This computes the minimum-norm
208  least-squares solution, even when \\(A\\) is rank deficient. This path is
209  typically 6-7 times slower than the fast path. If `fast` is `False` then
210  `l2_regularizer` is ignored.
211
212  Args:
213    matrix: `Tensor` of shape `[..., M, N]`.
214    rhs: `Tensor` of shape `[..., M, K]`.
215    l2_regularizer: 0-D `double` `Tensor`. Ignored if `fast=False`.
216    fast: bool. Defaults to `True`.
217    name: string, optional name of the operation.
218
219  Returns:
220    output: `Tensor` of shape `[..., N, K]` whose inner-most 2 dimensions form
221      `M`-by-`K` matrices that solve the equations
222      `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least
223      squares sense.
224
225  Raises:
226    NotImplementedError: linalg.lstsq is currently disabled for complex128
227    and l2_regularizer != 0 due to poor accuracy.
228  """
229
230  # pylint: disable=long-lambda
231  def _use_composite_impl(fast, tensor_shape):
232    """Determines whether to use the composite or specialized CPU kernel.
233
234    When the total size of the tensor is larger than the cache size and the
235    batch size is large compared to the smallest matrix dimension, then the
236    composite implementation is inefficient since it has to read the entire
237    tensor from memory multiple times. In this case we fall back to the
238    original CPU kernel, which does all the computational steps on each
239    matrix separately.
240
241    Only fast mode is supported by the composite impl, so `False` is returned
242    if `fast` is `False`.
243
244    Args:
245      fast: bool indicating if fast mode in the solver was requested.
246      tensor_shape: The shape of the tensor.
247
248    Returns:
249      True if the composite impl should be used. False otherwise.
250    """
251    if fast is False:
252      return False
253    batch_shape = tensor_shape[:-2]
254    matrix_shape = tensor_shape[-2:]
255    if not tensor_shape.is_fully_defined():
256      return True
257    tensor_size = tensor_shape.num_elements() * matrix.dtype.size
258    is_io_bound = batch_shape.num_elements() > np.min(matrix_shape)
259    L2_CACHE_SIZE_GUESSTIMATE = 256000
260    if tensor_size > L2_CACHE_SIZE_GUESSTIMATE and is_io_bound:
261      return False
262    else:
263      return True
264
265  def _overdetermined(matrix, rhs, l2_regularizer):
266    """Computes (A^H*A + l2_regularizer)^{-1} * A^H * rhs."""
267    chol = _RegularizedGramianCholesky(
268        matrix, l2_regularizer=l2_regularizer, first_kind=True)
269    return cholesky_solve(chol, math_ops.matmul(matrix, rhs, adjoint_a=True))
270
271  def _underdetermined(matrix, rhs, l2_regularizer):
272    """Computes A^H * (A*A^H + l2_regularizer)^{-1} * rhs."""
273    chol = _RegularizedGramianCholesky(
274        matrix, l2_regularizer=l2_regularizer, first_kind=False)
275    return math_ops.matmul(matrix, cholesky_solve(chol, rhs), adjoint_a=True)
276
277  def _composite_impl(matrix, rhs, l2_regularizer):
278    """Composite implementation of matrix_solve_ls that supports GPU."""
279    with ops.name_scope(name, 'matrix_solve_ls', [matrix, rhs, l2_regularizer]):
280      matrix_shape = matrix.get_shape()[-2:]
281      if matrix_shape.is_fully_defined():
282        if matrix_shape[-2] >= matrix_shape[-1]:
283          return _overdetermined(matrix, rhs, l2_regularizer)
284        else:
285          return _underdetermined(matrix, rhs, l2_regularizer)
286      else:
287        # We have to defer determining the shape to runtime and use
288        # conditional execution of the appropriate graph.
289        matrix_shape = array_ops.shape(matrix)[-2:]
290        return control_flow_ops.cond(
291            matrix_shape[-2] >= matrix_shape[-1],
292            lambda: _overdetermined(matrix, rhs, l2_regularizer),
293            lambda: _underdetermined(matrix, rhs, l2_regularizer))
294
295  matrix = ops.convert_to_tensor(matrix, name='matrix')
296  if matrix.dtype == dtypes.complex128 and l2_regularizer != 0:
297    # TODO(rmlarsen): Investigate and fix accuracy bug.
298    raise NotImplementedError('matrix_solve_ls is currently disabled for '
299                              'complex128 and l2_regularizer != 0 due to '
300                              'poor accuracy.')
301  tensor_shape = matrix.get_shape()
302  if _use_composite_impl(fast, tensor_shape):
303    return _composite_impl(matrix, rhs, l2_regularizer)
304  else:
305    return gen_linalg_ops.matrix_solve_ls(
306        matrix, rhs, l2_regularizer, fast=fast, name=name)
307
308
309@tf_export('linalg.eigh', v1=['linalg.eigh', 'self_adjoint_eig'])
310@deprecation.deprecated_endpoints('self_adjoint_eig')
311def self_adjoint_eig(tensor, name=None):
312  """Computes the eigen decomposition of a batch of self-adjoint matrices.
313
314  Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices
315  in `tensor` such that
316  `tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i]`, for i=0...N-1.
317
318  Args:
319    tensor: `Tensor` of shape `[..., N, N]`. Only the lower triangular part of
320      each inner inner matrix is referenced.
321    name: string, optional name of the operation.
322
323  Returns:
324    e: Eigenvalues. Shape is `[..., N]`. Sorted in non-decreasing order.
325    v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most
326      matrices contain eigenvectors of the corresponding matrices in `tensor`
327  """
328  e, v = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=True, name=name)
329  return e, v
330
331
332@tf_export('linalg.eigvalsh', v1=['linalg.eigvalsh', 'self_adjoint_eigvals'])
333@deprecation.deprecated_endpoints('self_adjoint_eigvals')
334def self_adjoint_eigvals(tensor, name=None):
335  """Computes the eigenvalues of one or more self-adjoint matrices.
336
337  Note: If your program backpropagates through this function, you should replace
338  it with a call to tf.linalg.eigh (possibly ignoring the second output) to
339  avoid computing the eigen decomposition twice. This is because the
340  eigenvectors are used to compute the gradient w.r.t. the eigenvalues. See
341  _SelfAdjointEigV2Grad in linalg_grad.py.
342
343  Args:
344    tensor: `Tensor` of shape `[..., N, N]`.
345    name: string, optional name of the operation.
346
347  Returns:
348    e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N`
349      eigenvalues of `tensor[..., :, :]`.
350  """
351  e, _ = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=False, name=name)
352  return e
353
354
355@tf_export('linalg.svd', v1=['linalg.svd', 'svd'])
356@deprecation.deprecated_endpoints('svd')
357def svd(tensor, full_matrices=False, compute_uv=True, name=None):
358  r"""Computes the singular value decompositions of one or more matrices.
359
360  Computes the SVD of each inner matrix in `tensor` such that
361  `tensor[..., :, :] = u[..., :, :] * diag(s[..., :, :]) *
362   transpose(conj(v[..., :, :]))`
363
364  ```python
365  # a is a tensor.
366  # s is a tensor of singular values.
367  # u is a tensor of left singular vectors.
368  # v is a tensor of right singular vectors.
369  s, u, v = svd(a)
370  s = svd(a, compute_uv=False)
371  ```
372
373  Args:
374    tensor: `Tensor` of shape `[..., M, N]`. Let `P` be the minimum of `M` and
375      `N`.
376    full_matrices: If true, compute full-sized `u` and `v`. If false
377      (the default), compute only the leading `P` singular vectors.
378      Ignored if `compute_uv` is `False`.
379    compute_uv: If `True` then left and right singular vectors will be
380      computed and returned in `u` and `v`, respectively. Otherwise, only the
381      singular values will be computed, which can be significantly faster.
382    name: string, optional name of the operation.
383
384  Returns:
385    s: Singular values. Shape is `[..., P]`. The values are sorted in reverse
386      order of magnitude, so s[..., 0] is the largest value, s[..., 1] is the
387      second largest, etc.
388    u: Left singular vectors. If `full_matrices` is `False` (default) then
389      shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
390      `[..., M, M]`. Not returned if `compute_uv` is `False`.
391    v: Right singular vectors. If `full_matrices` is `False` (default) then
392      shape is `[..., N, P]`. If `full_matrices` is `True` then shape is
393      `[..., N, N]`. Not returned if `compute_uv` is `False`.
394
395  @compatibility(numpy)
396  Mostly equivalent to numpy.linalg.svd, except that
397    * The order of output  arguments here is `s`, `u`, `v` when `compute_uv` is
398      `True`, as opposed to `u`, `s`, `v` for numpy.linalg.svd.
399    * full_matrices is `False` by default as opposed to `True` for
400       numpy.linalg.svd.
401    * tf.linalg.svd uses the standard definition of the SVD
402      \\(A = U \Sigma V^H\\), such that the left singular vectors of `a` are
403      the columns of `u`, while the right singular vectors of `a` are the
404      columns of `v`. On the other hand, numpy.linalg.svd returns the adjoint
405      \\(V^H\\) as the third output argument.
406  ```python
407  import tensorflow as tf
408  import numpy as np
409  s, u, v = tf.linalg.svd(a)
410  tf_a_approx = tf.matmul(u, tf.matmul(tf.linalg.diag(s), v, adjoint_b=True))
411  u, s, v_adj = np.linalg.svd(a, full_matrices=False)
412  np_a_approx = np.dot(u, np.dot(np.diag(s), v_adj))
413  # tf_a_approx and np_a_approx should be numerically close.
414  ```
415  @end_compatibility
416  """
417  s, u, v = gen_linalg_ops.svd(
418      tensor, compute_uv=compute_uv, full_matrices=full_matrices, name=name)
419  if compute_uv:
420    return math_ops.real(s), u, v
421  else:
422    return math_ops.real(s)
423
424
425# pylint: disable=redefined-builtin
426@tf_export('norm', 'linalg.norm', v1=[])
427def norm_v2(tensor,
428            ord='euclidean',
429            axis=None,
430            keepdims=None,
431            name=None):
432  r"""Computes the norm of vectors, matrices, and tensors.
433
434  This function can compute several different vector norms (the 1-norm, the
435  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
436  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).
437
438  Args:
439    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
440    ord: Order of the norm. Supported values are `'fro'`, `'euclidean'`,
441      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
442      p-norm. Default is `'euclidean'` which is equivalent to Frobenius norm if
443      `tensor` is a matrix and equivalent to 2-norm for vectors.
444      Some restrictions apply:
445        a) The Frobenius norm `'fro'` is not defined for vectors,
446        b) If axis is a 2-tuple (matrix norm), only `'euclidean'`, '`fro'`, `1`,
447           `2`, `np.inf` are supported.
448      See the description of `axis` on how to compute norms for a batch of
449      vectors or matrices stored in a tensor.
450    axis: If `axis` is `None` (the default), the input is considered a vector
451      and a single vector norm is computed over the entire set of values in the
452      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
453      `norm(reshape(tensor, [-1]), ord=ord)`.
454      If `axis` is a Python integer, the input is considered a batch of vectors,
455      and `axis` determines the axis in `tensor` over which to compute vector
456      norms.
457      If `axis` is a 2-tuple of Python integers it is considered a batch of
458      matrices and `axis` determines the axes in `tensor` over which to compute
459      a matrix norm.
460      Negative indices are supported. Example: If you are passing a tensor that
461      can be either a matrix or a batch of matrices at runtime, pass
462      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
463      computed.
464    keepdims: If True, the axis indicated in `axis` are kept with size 1.
465      Otherwise, the dimensions in `axis` are removed from the output shape.
466    name: The name of the op.
467
468  Returns:
469    output: A `Tensor` of the same type as tensor, containing the vector or
470      matrix norms. If `keepdims` is True then the rank of output is equal to
471      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
472      if `axis` is an integer, the rank of `output` is one less than the rank
473      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
474      than the rank of `tensor`.
475
476  Raises:
477    ValueError: If `ord` or `axis` is invalid.
478
479  @compatibility(numpy)
480  Mostly equivalent to numpy.linalg.norm.
481  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
482  Other differences:
483    a) If axis is `None`, treats the flattened `tensor` as a vector
484     regardless of rank.
485    b) Explicitly supports 'euclidean' norm as the default, including for
486     higher order tensors.
487  @end_compatibility
488  """
489  return norm(tensor=tensor,
490              ord=ord,
491              axis=axis,
492              keepdims=keepdims,
493              name=name)
494
495
496# pylint: disable=redefined-builtin
497@tf_export(v1=['norm', 'linalg.norm'])
498@deprecation.deprecated_args(
499    None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims')
500def norm(tensor,
501         ord='euclidean',
502         axis=None,
503         keepdims=None,
504         name=None,
505         keep_dims=None):
506  r"""Computes the norm of vectors, matrices, and tensors.
507
508  This function can compute several different vector norms (the 1-norm, the
509  Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and
510  matrix norms (Frobenius, 1-norm, 2-norm and inf-norm).
511
512  Args:
513    tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128`
514    ord: Order of the norm. Supported values are 'fro', 'euclidean',
515      `1`, `2`, `np.inf` and any positive real number yielding the corresponding
516      p-norm. Default is 'euclidean' which is equivalent to Frobenius norm if
517      `tensor` is a matrix and equivalent to 2-norm for vectors.
518      Some restrictions apply:
519        a) The Frobenius norm `fro` is not defined for vectors,
520        b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`,
521           `2`, `np.inf` are supported.
522      See the description of `axis` on how to compute norms for a batch of
523      vectors or matrices stored in a tensor.
524    axis: If `axis` is `None` (the default), the input is considered a vector
525      and a single vector norm is computed over the entire set of values in the
526      tensor, i.e. `norm(tensor, ord=ord)` is equivalent to
527      `norm(reshape(tensor, [-1]), ord=ord)`.
528      If `axis` is a Python integer, the input is considered a batch of vectors,
529      and `axis` determines the axis in `tensor` over which to compute vector
530      norms.
531      If `axis` is a 2-tuple of Python integers it is considered a batch of
532      matrices and `axis` determines the axes in `tensor` over which to compute
533      a matrix norm.
534      Negative indices are supported. Example: If you are passing a tensor that
535      can be either a matrix or a batch of matrices at runtime, pass
536      `axis=[-2,-1]` instead of `axis=None` to make sure that matrix norms are
537      computed.
538    keepdims: If True, the axis indicated in `axis` are kept with size 1.
539      Otherwise, the dimensions in `axis` are removed from the output shape.
540    name: The name of the op.
541    keep_dims: Deprecated alias for `keepdims`.
542
543  Returns:
544    output: A `Tensor` of the same type as tensor, containing the vector or
545      matrix norms. If `keepdims` is True then the rank of output is equal to
546      the rank of `tensor`. Otherwise, if `axis` is none the output is a scalar,
547      if `axis` is an integer, the rank of `output` is one less than the rank
548      of `tensor`, if `axis` is a 2-tuple the rank of `output` is two less
549      than the rank of `tensor`.
550
551  Raises:
552    ValueError: If `ord` or `axis` is invalid.
553
554  @compatibility(numpy)
555  Mostly equivalent to numpy.linalg.norm.
556  Not supported: ord <= 0, 2-norm for matrices, nuclear norm.
557  Other differences:
558    a) If axis is `None`, treats the flattened `tensor` as a vector
559     regardless of rank.
560    b) Explicitly supports 'euclidean' norm as the default, including for
561     higher order tensors.
562  @end_compatibility
563  """
564  keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
565                                                    'keep_dims', keep_dims)
566  if keepdims is None:
567    keepdims = False
568
569  is_matrix_norm = ((isinstance(axis, tuple) or isinstance(axis, list)) and
570                    len(axis) == 2)
571  if is_matrix_norm:
572    axis = tuple(axis)
573    if (not isinstance(axis[0], int) or not isinstance(axis[1], int) or
574        axis[0] == axis[1]):
575      raise ValueError(
576          "'axis' must be None, an integer, or a tuple of 2 unique integers")
577    supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf]
578    if ord not in supported_matrix_norms:
579      raise ValueError("'ord' must be a supported matrix norm in %s, got %s" %
580                       (supported_matrix_norms, ord))
581  else:
582    if not (isinstance(axis, int) or axis is None):
583      raise ValueError(
584          "'axis' must be None, an integer, or a tuple of 2 unique integers")
585
586    supported_vector_norms = ['euclidean', 1, 2, np.inf]
587    if (not np.isreal(ord) or ord <= 0) and ord not in supported_vector_norms:
588      raise ValueError("'ord' must be a supported vector norm, got %s" % ord)
589    if axis is not None:
590      axis = (axis,)
591
592  with ops.name_scope(name, 'norm', [tensor]):
593    tensor = ops.convert_to_tensor(tensor)
594
595    if ord in ['fro', 'euclidean', 2, 2.0]:
596      if is_matrix_norm and ord in [2, 2.0]:
597        rank = array_ops.rank(tensor)
598        positive_axis = map_fn.map_fn(
599            lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank),
600            ops.convert_to_tensor(axis))
601        axes = math_ops.range(rank)
602        perm_before = array_ops.concat(
603            [array_ops.setdiff1d(axes, positive_axis)[0], positive_axis],
604            axis=0)
605        perm_after = map_fn.map_fn(
606            lambda i: math_ops.cast(
607                array_ops.squeeze(
608                    array_ops.where(math_ops.equal(perm_before, i))),
609                dtype=dtypes.int32), axes)
610        permed = array_ops.transpose(tensor, perm=perm_before)
611        matrix_2_norm = array_ops.expand_dims(
612            math_ops.reduce_max(
613                math_ops.abs(gen_linalg_ops.svd(permed, compute_uv=False)[0]),
614                axis=-1,
615                keepdims=True),
616            axis=-1)
617        result = array_ops.transpose(matrix_2_norm, perm=perm_after)
618      else:
619        result = math_ops.sqrt(
620            math_ops.reduce_sum(
621                tensor * math_ops.conj(tensor), axis, keepdims=True))
622        # TODO(rmlarsen): Replace with the following, once gradients are defined
623        # result = math_ops.reduce_euclidean_norm(tensor, axis, keepdims=True)
624    else:
625      result = math_ops.abs(tensor)
626      if ord == 1:
627        sum_axis = None if axis is None else axis[0]
628        result = math_ops.reduce_sum(result, sum_axis, keepdims=True)
629        if is_matrix_norm:
630          result = math_ops.reduce_max(result, axis[-1], keepdims=True)
631      elif ord == np.inf:
632        if is_matrix_norm:
633          result = math_ops.reduce_sum(result, axis[1], keepdims=True)
634        max_axis = None if axis is None else axis[0]
635        result = math_ops.reduce_max(result, max_axis, keepdims=True)
636      else:
637        # General p-norms (positive p only)
638        result = math_ops.pow(
639            math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True),
640            1.0 / ord)
641    if not keepdims:
642      result = array_ops.squeeze(result, axis)
643    return result
644
645
646# pylint: enable=invalid-name,redefined-builtin
647