android-12.0.0_r2/s

/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include <cstddef>

#include "absl/algorithm/container.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "tensorflow/compiler/xla/xla_data.pb.h"
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/lib/core/errors.h"

namespace tensorflow {
namespace {

// Helper shape function for operators that return an output with the same rank
// as their first input.
Status UnchangedRank(shape_inference::InferenceContext* c) {
  if (c->RankKnown(c->input(0))) {
    c->set_output(0, c->UnknownShapeOfRank(c->Rank(c->input(0))));
  } else {
    c->set_output(0, c->input(0));
  }
  return Status::OK();
}

REGISTER_OP("XlaBroadcastHelper")
    .Input("lhs: T")
    .Input("rhs: T")
    .Input("broadcast_dims: Tindices")
    .Attr("T: numbertype")
    .Attr("Tindices: {int32, int64}")
    .Output("lhs_output: T")
    .Output("rhs_output: T")
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
Helper operator for performing XLA-style broadcasts

Broadcasts `lhs` and `rhs` to the same rank, by adding size 1 dimensions to
whichever of `lhs` and `rhs` has the lower rank, using XLA's broadcasting rules
for binary operators.

lhs: the LHS input tensor
rhs: the RHS input tensor
broadcast_dims: an XLA-style broadcast dimension specification
lhs_output: the broadcasted LHS tensor
rhs_output: the broadcasted RHS tensor
)doc");

REGISTER_OP("XlaSelfAdjointEig")
    .Input("a: T")
    .Attr("lower: bool")
    .Attr("max_iter: int")
    .Attr("epsilon: float")
    .Output("w: T")
    .Output("v: T")
    .SetShapeFn(shape_inference::UnknownShape)
    .Attr("T: numbertype")
    .Doc(R"doc(
Computes the eigen decomposition of a batch of self-adjoint matrices
(Note: Only real inputs are supported).

Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices in
tensor such that tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i], for
i=0...N-1.

a: the input tensor.

lower: a boolean specifies whether the calculation is done with the lower
  triangular part or the upper triangular part.

max_iter: maximum number of sweep update, i.e., the whole lower triangular
  part or upper triangular part based on parameter lower. Heuristically, it has
  been argued that approximately logN sweeps are needed in practice (Ref: Golub &
  van Loan "Matrix Computation").

epsilon: the tolerance ratio.

w: The eigenvalues in ascending order, each repeated according to its
  multiplicity.
v: The column v[..., :, i] is the normalized eigenvector corresponding to the
  eigenvalue w[..., i].
)doc");

REGISTER_OP("XlaSvd")
    .Input("a: T")
    .Attr("max_iter: int")
    .Attr("epsilon: float")
    .Attr("precision_config: string")
    .Output("s: T")
    .Output("u: T")
    .Output("v: T")
    .SetShapeFn(shape_inference::UnknownShape)
    .Attr("T: numbertype")
    .Doc(R"doc(
Computes the eigen decomposition of a batch of self-adjoint matrices
(Note: Only real inputs are supported).

Computes the eigenvalues and eigenvectors of the innermost M-by-N matrices in
tensor such that tensor[...,:,:] = u[..., :, :] * Diag(s[..., :]) * Transpose(v[...,:,:]).

a: the input tensor.

max_iter: maximum number of sweep update, i.e., the whole lower triangular
  part or upper triangular part based on parameter lower. Heuristically, it has
  been argued that approximately log(min (M, N)) sweeps are needed in practice
  (Ref: Golub & van Loan "Matrix Computation").

epsilon: the tolerance ratio.

precision_config: a serialized xla::PrecisionConfig proto.

s: Singular values. The values are sorted in reverse order of magnitude, so
  s[..., 0] is the largest value, s[..., 1] is the second largest, etc.
u: Left singular vectors.
v: Right singular vectors.
)doc");

REGISTER_OP("XlaConv")
    .Input("lhs: T")
    .Input("rhs: T")
    .Input("window_strides: Tindices")
    .Input("padding: Tindices")
    .Input("lhs_dilation: Tindices")
    .Input("rhs_dilation: Tindices")
    .Input("feature_group_count: Tindices")
    .Attr("T: numbertype")
    .Attr("Tindices: {int32, int64}")
    .Attr("dimension_numbers: string")
    .Attr("precision_config: string")
    .Output("output: T")
    .SetShapeFn(UnchangedRank)
    .Doc(R"doc(
Wraps the XLA ConvGeneralDilated operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#conv_convolution
.

lhs: the input tensor
rhs: the kernel tensor
window_strides: the inter-window strides
padding: the padding to apply at the start and end of each input dimensions
lhs_dilation: dilation to apply between input elements
rhs_dilation: dilation to apply between kernel elements
feature_group_count: number of feature groups for grouped convolution.
dimension_numbers: a serialized xla::ConvolutionDimensionNumbers proto.
precision_config: a serialized xla::PrecisionConfig proto.
)doc");

REGISTER_OP("XlaDot")
    .Input("lhs: T")
    .Input("rhs: T")
    .Attr("T: numbertype")
    .Attr("dimension_numbers: string")
    .Attr("precision_config: string")
    .Output("output: T")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      shape_inference::ShapeHandle lhs_shape_handle = c->input(0);
      shape_inference::ShapeHandle rhs_shape_handle = c->input(1);
      if (!c->FullyDefined(lhs_shape_handle) ||
          !c->FullyDefined(rhs_shape_handle)) {
        return shape_inference::UnknownShape(c);
      }

      string dimension_numbers_string;
      TF_RETURN_IF_ERROR(
          c->GetAttr("dimension_numbers", &dimension_numbers_string));

      xla::DotDimensionNumbers dimension_numbers;
      dimension_numbers.ParseFromString(dimension_numbers_string);

      // Check that number of contracting dimensions match.
      if (dimension_numbers.lhs_contracting_dimensions_size() !=
          dimension_numbers.rhs_contracting_dimensions_size())
        return errors::InvalidArgument(
            "Must specify the same number of contracting dimensions for lhs "
            "and rhs. Got: ",
            dimension_numbers.lhs_contracting_dimensions_size(), " and ",
            dimension_numbers.rhs_contracting_dimensions_size());

      // Check that contracting dimension sizes match.
      for (int64 i = 0; i < dimension_numbers.lhs_contracting_dimensions_size();
           ++i) {
        const int64 lhs_contracting_dimension =
            dimension_numbers.lhs_contracting_dimensions(i);
        const int64 rhs_contracting_dimension =
            dimension_numbers.rhs_contracting_dimensions(i);
        shape_inference::DimensionOrConstant
            lhs_contracting_dimension_or_constant(
                c->DimKnownRank(lhs_shape_handle, lhs_contracting_dimension));
        shape_inference::DimensionOrConstant
            rhs_contracting_dimension_or_constant(
                c->DimKnownRank(rhs_shape_handle, rhs_contracting_dimension));
        const int64 lhs_contracting_dimension_size =
            c->Value(lhs_contracting_dimension_or_constant);
        const int64 rhs_contracting_dimension_size =
            c->Value(rhs_contracting_dimension_or_constant);
        if (lhs_contracting_dimension_size != rhs_contracting_dimension_size) {
          return errors::InvalidArgument(
              "Contracting dimension sizes do not match. Got: ",
              lhs_contracting_dimension_size, " and ",
              rhs_contracting_dimension_size);
        }
      }

      // Check that number of batch dimensions match.
      if (dimension_numbers.lhs_batch_dimensions_size() !=
          dimension_numbers.rhs_batch_dimensions_size())
        return errors::InvalidArgument(
            "Must specify the same number of batch dimensions for lhs "
            "and rhs. Got: ",
            dimension_numbers.lhs_batch_dimensions_size(), " and ",
            dimension_numbers.rhs_batch_dimensions_size());

      // Check that batch dimension sizes match.
      for (int64 i = 0; i < dimension_numbers.lhs_batch_dimensions_size();
           ++i) {
        const int64 lhs_batch_dimension =
            dimension_numbers.lhs_batch_dimensions(i);
        const int64 rhs_batch_dimension =
            dimension_numbers.rhs_batch_dimensions(i);
        shape_inference::DimensionOrConstant lhs_batch_dimension_or_constant(
            c->DimKnownRank(lhs_shape_handle, lhs_batch_dimension));
        shape_inference::DimensionOrConstant rhs_batch_dimension_or_constant(
            c->DimKnownRank(rhs_shape_handle, rhs_batch_dimension));
        const int64 lhs_batch_dimension_size =
            c->Value(lhs_batch_dimension_or_constant);
        const int64 rhs_batch_dimension_size =
            c->Value(rhs_batch_dimension_or_constant);
        if (lhs_batch_dimension_size != rhs_batch_dimension_size) {
          return errors::InvalidArgument(
              "Batch dimension sizes do not match. Got: ",
              lhs_batch_dimension_size, " and ", rhs_batch_dimension_size);
        }
      }

      // The ranks of lhs and rhs are decremented by 1 respectively due to the
      // contraction, and added for the rank of the result. When an input tensor
      // is a scalar, its contribution to the rank of the result is 0. Generate
      // the result dimensions in order, rhs dimensions followed by lhs
      // dimensions except the contracted and batch dimensions.
      std::vector<shape_inference::DimensionHandle> output_dims;
      for (int64 lhs_dim : dimension_numbers.lhs_batch_dimensions()) {
        output_dims.emplace_back(c->Dim(lhs_shape_handle, lhs_dim));
      }
      const int32 lhs_rank = c->Rank(lhs_shape_handle);
      for (int64 i = 0; i < lhs_rank; ++i) {
        if (absl::c_linear_search(
                dimension_numbers.lhs_contracting_dimensions(), i) ||
            absl::c_linear_search(dimension_numbers.lhs_batch_dimensions(),
                                  i)) {
          continue;
        }
        output_dims.emplace_back(c->Dim(lhs_shape_handle, i));
      }

      const int32 rhs_rank = c->Rank(rhs_shape_handle);
      for (int64 i = 0; i < rhs_rank; ++i) {
        if (absl::c_linear_search(
                dimension_numbers.rhs_contracting_dimensions(), i) ||
            absl::c_linear_search(dimension_numbers.rhs_batch_dimensions(),
                                  i)) {
          continue;
        }
        output_dims.emplace_back(c->Dim(rhs_shape_handle, i));
      }

      c->set_output(0, c->MakeShape(output_dims));
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the XLA DotGeneral operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#dotgeneral
.

lhs: the LHS tensor
rhs: the RHS tensor
dimension_numbers: a serialized xla::DotDimensionNumbers proto.
precision_config: a serialized xla::PrecisionConfig proto.
)doc");

REGISTER_OP("XlaSetBound")
    .Input("input: int32")
    .Input("bound: int32")
    .Output("output: int32")
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(
        R"doc(Set a bound for the given input value as a hint to Xla compiler,
        returns the same value.
)doc");

REGISTER_OP("XlaSetDynamicDimensionSize")
    .Input("input: T")
    .Input("dim_index: int32")
    .Input("size: int32")
    .Output("output: T")
    .Attr("T: type")
    // Use unknown shape to prevent constant folding.
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(
        R"doc(Make a static dimension into a xla bounded dynamic dimension.
        The current static dimension size will become the bound and the second
        operand becomes the dynamic size of the dimension.)doc");

REGISTER_OP("XlaDynamicSlice")
    .Input("input: T")
    .Input("start_indices: Tindices")
    .Input("size_indices: Tindices")
    .Output("output: T")
    .Attr("T: type")
    .Attr("Tindices: {int32, int64}")
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
Wraps the XLA DynamicSlice operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#dynamicslice
.

DynamicSlice extracts a sub-array from the input array at dynamic
start_indices. The size of the slice in each dimension is passed in
size_indices, which specify the end point of exclusive slice intervals in each
dimension -- [start, start + size). The shape of start_indices must have rank 1,
with dimension size equal to the rank of operand.

input: A `Tensor` of type T.

start_indices: Rank 1 tensor of N integers containing the starting indices of
  the slice for each dimension. Value must be greater than or equal to zero.

start_indices: List of N integers containing the slice size for each
  dimension. Each value must be strictly greater than zero, and start + size
  must be less than or equal to the size of the dimension to avoid
  implementation defined behavior.
)doc");

REGISTER_OP("XlaDynamicUpdateSlice")
    .Input("input: T")
    .Input("update: T")
    .Input("indices: Tindices")
    .Output("output: T")
    .Attr("T: type")
    .Attr("Tindices: {int32, int64}")
    .SetShapeFn(shape_inference::UnchangedShape)
    .Doc(R"doc(
Wraps the XLA DynamicUpdateSlice operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#dynamicupdateslice
.

XlaDynamicUpdateSlice generates a result which is the value of the `input`
operand, with a slice update overwritten at `indices`. The shape of `update`
determines the shape of the sub-array of the result which is updated. The shape
of indices must be rank == 1, with dimension size equal to the rank of `input`.

Handling of out-of-bounds slice indices is implementation-defined.

input: A `Tensor` of type T.
indices: A vector of indices into `input`. Must have length equal to the rank of
  `input`.
update: A `Tensor` of type T. Same rank as `input`.
output: A `Tensor` of type T.
)doc");

// TODO(b/37549631) setting the If Op to always be stateful is too
// conservative.
REGISTER_OP("XlaIf")
    .Input("cond: Tcond")
    .Input("inputs: Tin")
    .Output("output: Tout")
    .Attr("Tcond: type")
    .Attr("then_branch: func")
    .Attr("else_branch: func")
    .Attr("Tin: list(type) >= 0")
    .Attr("Tout: list(type) >= 0")
    .SetIsStateful()
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
output = cond ? then_branch(inputs) : else_branch(inputs).

cond: A boolean scalar.
inputs: A list of input tensors.
output: A list of tensors returned by either then_branch(inputs) or
        else_branch(inputs). The input shapes of the then_branch and
        else_branch must match.
then_branch: A function takes 'inputs' and returns a list of tensors,
             whose types are the same as what else_branch returns.
else_branch: A function takes 'inputs' and returns a list of tensors.
             whose types are the same as what then_branch returns.
)doc");

REGISTER_OP("XlaPad")
    .Input("input: T")
    .Input("padding_value: T")
    .Input("padding_low: Tindices")
    .Input("padding_high: Tindices")
    .Input("padding_interior: Tindices")
    .Output("output: T")
    .Attr("T: type")
    .Attr("Tindices: {int32, int64}")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      shape_inference::ShapeHandle input_shape_handle = c->input(0);
      if (!c->FullyDefined(input_shape_handle)) {
        return UnchangedRank(c);
      }
      const int32 op_rank = c->Rank(input_shape_handle);

      shape_inference::ShapeHandle padding_shape_handle = c->input(1);
      if (!c->RankKnown(padding_shape_handle) ||
          c->Rank(padding_shape_handle) != 0) {
        return errors::InvalidArgument(
            "padding_value input must be scalar, found rank ",
            c->Rank(padding_shape_handle));
      }
      const Tensor* padding_low_tensor = c->input_tensor(2);
      const Tensor* padding_high_tensor = c->input_tensor(3);
      const Tensor* padding_interior_tensor = c->input_tensor(4);
      if (padding_low_tensor == nullptr || padding_high_tensor == nullptr ||
          padding_interior_tensor == nullptr) {
        return UnchangedRank(c);
      }

      if (padding_low_tensor->shape().dims() != 1 ||
          padding_low_tensor->shape().dim_size(0) != op_rank) {
        return errors::InvalidArgument(
            "padding_low must be a 1D tensor of size ", op_rank);
      }
      if (padding_high_tensor->shape().dims() != 1 ||
          padding_high_tensor->shape().dim_size(0) != op_rank) {
        return errors::InvalidArgument(
            "padding_high must be a 1D tensor of size ", op_rank);
      }
      if (padding_interior_tensor->shape().dims() != 1 ||
          padding_interior_tensor->shape().dim_size(0) != op_rank) {
        return errors::InvalidArgument(
            "padding_interior must be a 1D tensor of size ", op_rank);
      }
      std::vector<shape_inference::DimensionHandle> output_dims;
      output_dims.reserve(op_rank);
      for (int64 i = 0; i < op_rank; ++i) {
        int64 low, high, interior;
        TF_RETURN_IF_ERROR(c->GetScalarFromTensor(padding_low_tensor, i, &low));
        TF_RETURN_IF_ERROR(
            c->GetScalarFromTensor(padding_high_tensor, i, &high));
        TF_RETURN_IF_ERROR(
            c->GetScalarFromTensor(padding_interior_tensor, i, &interior));
        if (interior < 0) {
          return errors::InvalidArgument(
              "padding_interior must contain only non-negative values, found ",
              interior);
        }

        shape_inference::DimensionHandle orig_size_handle =
            c->Dim(input_shape_handle, i);
        if (c->ValueKnown(orig_size_handle)) {
          auto orig_dim = c->Value(orig_size_handle);
          int64 new_dim = orig_dim + low + high;
          if (orig_dim > 0) {
            new_dim += interior * (orig_dim - 1);
          }
          if (new_dim < 0) {
            return errors::InvalidArgument(
                "resulting padded dimension has negative size ", new_dim);
          }
          output_dims.emplace_back(c->MakeDim(new_dim));
        } else {
          output_dims.emplace_back(c->UnknownDim());
        }
      }

      c->set_output(0, c->MakeShape(output_dims));
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the XLA Pad operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#pad
.

input: A `Tensor` of type T.
padding_value: A scalar `Tensor` of type T.
padding_low: the padding to apply at the start of each input dimensions. Must
  be a compile-time constant 1D tensor of length equal to rank of input.
padding_high: the padding to apply at the end of each input dimension. Must
  be a compile-time constant 1D tensor of length equal to rank of input.
padding_interior: the padding to apply between each input element. Must
  be a compile-time constant 1D tensor of length equal to rank of input,
  containing only non-negative values.
output: A `Tensor` of type T.
)doc");

REGISTER_OP("XlaRecv")
    .Output("tensor: dtype")
    .Attr("dtype: type")
    .Attr("tensor_name: string")
    .Attr("shape: shape")
    .SetIsStateful()
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      TensorShape shape_attr;
      TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape_attr));
      shape_inference::ShapeHandle s;
      TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(shape_attr, &s));
      c->set_output(0, s);
      return Status::OK();
    })
    .Doc(R"doc(
Receives the named tensor from another XLA computation. Wraps the XLA Recv
operator documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#recv .

tensor: The tensor to receive.
dtype: The type of the tensor.
tensor_name: A string key that identifies the channel.
shape: The shape of the tensor.
)doc");

REGISTER_OP("XlaReduce")
    .Input("input: T")
    .Input("init_value: T")
    .Attr("T: numbertype")
    .Attr("dimensions_to_reduce: list(int)")
    .Attr("reducer: func")
    .Output("output: T")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      if (c->RankKnown(c->input(0))) {
        int rank = c->Rank(c->input(0));
        std::vector<int64> dimensions_to_reduce;
        TF_RETURN_IF_ERROR(
            c->GetAttr("dimensions_to_reduce", &dimensions_to_reduce));
        std::set<int64> dims_set(dimensions_to_reduce.begin(),
                                 dimensions_to_reduce.end());
        auto dim_in_range = [rank](int64 dim) {
          return dim >= 0 && dim < rank;
        };
        const int dimensions_to_reduce_size = dimensions_to_reduce.size();
        if (rank < dimensions_to_reduce_size ||
            dims_set.size() != dimensions_to_reduce.size() ||
            !absl::c_all_of(dimensions_to_reduce, dim_in_range)) {
          return errors::InvalidArgument(
              "Invalid dimensions_to_reduce argument to XlaReduce");
        }
        c->set_output(
            0, c->UnknownShapeOfRank(rank - dimensions_to_reduce.size()));
      } else {
        c->set_output(0, c->input(0));
      }
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the XLA Reduce operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#reduce .

input: the input tensor
init_value: a scalar representing the initial value for the reduction
reducer: a reducer function to apply
dimensions_to_reduce: dimension numbers over which to reduce
)doc");

REGISTER_OP("XlaVariadicReduce")
    .Input("input: N * T")
    .Input("init_value: N * T")
    .Attr("N: int >= 1")
    .Attr("T: numbertype")
    .Attr("dimensions_to_reduce: list(int)")
    .Attr("reducer: func")
    .Output("output: N * T")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      int n;
      TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
      for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
          c->MergeInput(i, c->input(j));
        }
      }
      if (c->RankKnown(c->input(0))) {
        int rank = c->Rank(c->input(0));
        std::vector<int64> dimensions_to_reduce;
        TF_RETURN_IF_ERROR(
            c->GetAttr("dimensions_to_reduce", &dimensions_to_reduce));
        std::set<int64> dims_set(dimensions_to_reduce.begin(),
                                 dimensions_to_reduce.end());
        auto dim_in_range = [rank](int64 dim) {
          return dim >= 0 && dim < rank;
        };
        const int dimensions_to_reduce_size = dimensions_to_reduce.size();
        if (rank < dimensions_to_reduce_size ||
            dims_set.size() != dimensions_to_reduce.size() ||
            !absl::c_all_of(dimensions_to_reduce, dim_in_range)) {
          return errors::InvalidArgument(
              "Invalid dimensions_to_reduce argument to XlaVariadicReduce");
        }
        for (int i = 0; i < n; i++) {
          c->set_output(
              i, c->UnknownShapeOfRank(rank - dimensions_to_reduce.size()));
        }
      } else {
        for (int i = 0; i < n; i++) {
          c->set_output(i, c->input(i));
        }
      }
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the variadic XLA Reduce operator.

Semantics are documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#variadic_reduce.

input: the input tensor(s)
init_value: scalar initial value(s) for the reduction
reducer: a reducer function to apply
dimensions_to_reduce: dimension numbers over which to reduce
)doc");

REGISTER_OP("XlaReduceWindow")
    .Input("input: T")
    .Input("init_value: T")
    .Input("window_dimensions: Tindices")
    .Input("window_strides: Tindices")
    .Input("base_dilations: Tindices")
    .Input("window_dilations: Tindices")
    .Input("padding: Tindices")
    .Attr("T: numbertype")
    .Attr("Tindices: {int32, int64}")
    .Attr("computation: func")
    .Output("output: T")
    .SetShapeFn(UnchangedRank)
    .Doc(R"doc(
Wraps the XLA ReduceWindow operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#reducewindow .

input: the input tensor
init_value: a scalar representing the initial value for the reduction
computation: a reducer function to apply
window_dimensions: the shape of the window
window_strides: the inter-window strides
padding: the padding to apply at the start and end of each input dimensions
)doc");

REGISTER_OP("XlaSelectAndScatter")
    .Input("operand: T")
    .Input("window_dimensions: Tindices")
    .Input("window_strides: Tindices")
    .Input("padding: Tindices")
    .Input("source: T")
    .Input("init_value: T")
    .Attr("T: numbertype")
    .Attr("Tindices: {int32, int64}")
    .Attr("select: func")
    .Attr("scatter: func")
    .Output("output: T")
    .SetShapeFn(UnchangedRank)
    .Doc(R"doc(
Wraps the XLA SelectAndScatter operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#selectandscatter
.

operand: the input tensor
window_dimensions: the shape of the window
window_strides: the inter-window strides
padding: the padding to apply at the start and end of each input dimensions
source: a tensor of values to scatter
init_value: a scalar representing the initial value for the output tensor
select: a selection function to apply
scatter: a scatter function to apply
)doc");

REGISTER_OP("XlaSend")
    .Input("tensor: T")
    .Attr("T: type")
    .Attr("tensor_name: string")
    .SetIsStateful()
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
Sends the named tensor to another XLA computation. Wraps the XLA Send operator
documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#send .

tensor: The tensor to send.
tensor_name: A string key that identifies the channel.
)doc");

REGISTER_OP("XlaSort")
    .Input("input: T")
    .Output("output: T")
    .Attr("T: type")
    .SetShapeFn(shape_inference::UnchangedShape)
    .Doc(R"doc(
Wraps the XLA Sort operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#sort
.

Sorts a tensor. Currently only sorts in ascending order are supported.

input: A `Tensor` of type T.
output: A `Tensor` of type T.
)doc");

REGISTER_OP("XlaKeyValueSort")
    .Input("keys: K")
    .Input("values: V")
    .Output("sorted_keys: K")
    .Output("sorted_values: V")
    .Attr("K: realnumbertype")
    .Attr("V: type")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      c->set_output(0, c->input(0));
      c->set_output(1, c->input(1));
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the XLA Sort operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#sort
.

Sorts a tensor. Currently only sorts in ascending order are supported.

keys: A `Tensor` of type K.
values: A `Tensor` of type V.
sorted_keys: A `Tensor` of type K.
sorted_values: A `Tensor` of type V.
)doc");

REGISTER_OP("XlaVariadicSort")
    .Input("inputs: T")
    .Input("dimension: int32")
    .Output("outputs: T")
    .Attr("T: list(type) >= 1")
    .Attr("comparator: func")
    .Attr("is_stable: bool")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      std::vector<shape_inference::ShapeHandle> input_shapes;
      TF_RETURN_IF_ERROR(c->input("inputs", &input_shapes));
      TF_RETURN_IF_ERROR(c->set_output("outputs", input_shapes));
      return Status::OK();
    })
    .Doc(R"doc(
Wraps the XLA Sort operator, documented at
 https://www.tensorflow.org/performance/xla/operation_semantics#sort
.

Sorts one or more tensors, with support for custom comparator, dimension, and
is_stable attributes.

inputs: A list of `Tensor` of identical shape but possibly different types.
dimension: The dimension along which to sort. Must be a compile-time constant.
is_stable: Whether to use stable sort.
comparator: A comparator function to apply to 2*N scalars and returning a
  boolean. N is the number of sort inputs. If you want to sort in ascending
  order then the comparator should perform a less-than comparison.
outputs: A list of `Tensor` of same shape and types as the `input`.
)doc");

// TODO(b/37549631) setting the While Op to always be stateful is too
// conservative.
REGISTER_OP("XlaWhile")
    .Input("input: T")
    .Output("output: T")
    .Attr("T: list(type) >= 0")
    .Attr("cond: func")
    .Attr("body: func")
    .SetIsStateful()
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
output = input; While (Cond(output)) { output = Body(output) }

input: A list of input tensors whose types are T.
output: A list of output tensors whose types are T.
cond: A function takes 'input' and returns a tensor.  If the tensor is
      a scalar of non-boolean, the scalar is converted to a boolean
      according to the following rule: if the scalar is a numerical
      value, non-zero means True and zero means False; if the scalar is
      a string, non-empty means True and empty means False. If the
      tensor is not a scalar, non-emptiness means True and False
      otherwise.
body: A function that takes a list of tensors and returns another
      list of tensors. Both lists have the same types as specified by T.
)doc");

REGISTER_OP("XlaDequantize")
    .Input("input: uint32")
    .Output("output: bfloat16")
    .Attr("min_range: float")
    .Attr("max_range: float")
    .Attr("mode: string")
    .Attr("transpose_output: bool")
    .SetIsStateful()
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
Takes the packed uint32 input and unpacks the input to uint8 to do
Dequantization on device.

input: Input tensors whose types is uint32, shape is [d0, ..., dn].
output: Output tensors whose types is bloat16. If transpose_output is true,
     output shape is [dn * 4, dn-1, ..., d1, d0]. If transpose_output
     is false, output shape is [d0,..., dn * 4].
min_range: The minimum scalar value possibly produced for the input.
max_range: The maximum scalar value possibly produced for the input.
mode: String to determine the dequantize mode in {"MIN_COMBINED", "MIN_FIRST", "SCALED"}.
transpose_output: Boolean to determine if output is transposed. transpose_output
     is faster when input is large and rank of input is higher than 1.
)doc");

REGISTER_OP("XlaEinsum")
    .Input("a: T")
    .Input("b: T")
    .Output("product: T")
    .Attr("equation: string")
    .Attr("T: {complex64, bfloat16, float}")
    .SetShapeFn([](shape_inference::InferenceContext* context) {
      string equation;
      TF_RETURN_IF_ERROR(context->GetAttr("equation", &equation));
      // XlaEinsum supports only two-input einsum equations.
      if (!absl::StrContains(equation, ",")) {
        return errors::InvalidArgument("Expected one \",\" in equation. Got: ",
                                       equation);
      }
      // Use EinsumShape for the rest of the inference now that we know we must
      // have a two-input einsum.
      return shape_inference::EinsumShape(context);
    })
    .Doc(R"doc(
An op which supports basic einsum op with 2 inputs and 1 output.

This op has better TPU performance since it doesn't have explicitly reshape and
transpose operations as tf.einsum does.
)doc");

REGISTER_OP("XlaSpmdFullToShardShape")
    .Input("input: T")
    .Output("output: T")
    .Attr("T: type")
    .Attr("manual_sharding: string")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      auto input_handle = c->input(0);
      if (!c->RankKnown(input_handle)) {
        return shape_inference::UnknownShape(c);
      }
      string sharding_attr;
      TF_RETURN_IF_ERROR(c->GetAttr("manual_sharding", &sharding_attr));
      xla::OpSharding sharding;
      sharding.ParseFromString(sharding_attr);
      if (sharding.type() != xla::OpSharding::OTHER) {
        return shape_inference::UnchangedShape(c);
      }
      std::vector<shape_inference::DimensionHandle> dims;
      for (int64 i = 0; i < c->Rank(input_handle); ++i) {
        auto dim = c->Value(c->Dim(input_handle, i));
        int64 partitions_i = sharding.tile_assignment_dimensions(i);
        if (dim != shape_inference::InferenceContext::kUnknownDim &&
            partitions_i != 1) {
          dim = (dim + partitions_i - 1) / partitions_i;
        }
        dims.push_back(c->MakeDim(dim));
      }
      c->set_output(0, c->MakeShape(dims));
      return Status::OK();
    })
    .Doc(R"doc(
An op used by XLA SPMD partitioner to switch from automatic partitioning to
manual partitioning. It annotates the input (full-shape, to be automatically
partitioned) with the same sharding used by manual partitioning, and outputs a
shard-shaped tensor to be consumed by later manually-partitioned ops. If the
shape is not evenly partitionable, the padding region will be masked with 0s.
)doc");

REGISTER_OP("XlaSpmdShardToFullShape")
    .Input("input: T")
    .Output("output: T")
    .Attr("T: type")
    .Attr("manual_sharding: string")
    .Attr("full_shape: shape")
    .SetShapeFn([](shape_inference::InferenceContext* c) {
      TensorShape shape_attr;
      TF_RETURN_IF_ERROR(c->GetAttr("full_shape", &shape_attr));
      shape_inference::ShapeHandle s;
      TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(shape_attr, &s));
      c->set_output(0, s);
      return Status::OK();
    })
    .Doc(R"doc(
An op used by XLA SPMD partitioner to switch from manual partitioning to
automatic partitioning. It converts the shard-shaped, manually partitioned input
into full-shaped tensor to be partitioned automatically with the same sharding
used by manual partitioning.
)doc");

REGISTER_OP("XlaSharding")
    .Input("input: T")
    .Output("output: T")
    .Attr("T: type")
    .Attr("sharding: string = ''")
    .SetShapeFn(shape_inference::UnchangedShape)
    .Doc(R"doc(
An op which shards the input based on the given sharding attribute.
)doc");

REGISTER_OP("XlaReplicaId")
    .Output("id: int32")
    .SetShapeFn([](shape_inference::InferenceContext* context) {
      context->set_output(0, context->MakeShape({}));
      return Status::OK();
    })
    .Doc("Replica ID.");

REGISTER_OP("XlaGather")
    .Input("operand: T")
    .Input("start_indices: Tindices")
    .Input("slice_sizes: Tindices")
    .Attr("dimension_numbers: string")
    .Attr("indices_are_sorted: bool")
    .Attr("T: {numbertype, bool}")
    .Attr("Tindices: {int32, int64}")
    .Output("output: T")
    .SetShapeFn(shape_inference::UnknownShape)
    .Doc(R"doc(
Wraps the XLA Gather operator documented at
  https://www.tensorflow.org/xla/operation_semantics#gather
operand: The array we're gathering from.
start_indices: Array containing the starting indices of the slices we gather.
dimension_numbers: A serialized xla::GatherDimensionNumbers proto.
slice_sizes: slice_sizes[i] is the bounds for the slice on dimension i.
indices_are_sorted: Boolean indicating if the indices are sorted.
)doc");

REGISTER_OP("XlaScatter")
    .Input("operand: T")
    .Input("scatter_indices: Tindices")
    .Input("updates: T")
    .Attr("update_computation: func")
    .Attr("dimension_numbers: string")
    .Attr("indices_are_sorted: bool")
    .Attr("T: {numbertype, bool}")
    .Attr("Tindices: {int32, int64}")
    .Output("output: T")
    .SetShapeFn(shape_inference::UnchangedShape)
    .Doc(R"doc(
Wraps the XLA Scatter operator documented at
  https://www.tensorflow.org/xla/operation_semantics#scatter.

operand: Array to be scattered into.
scatter_indices: Array containing the starting indices of the slices that must
  be scattered to.
updates: Array containing the values that must be used for scattering.
update_computation: Computation to be used for combining the existing values in
  the input array and the updates during scatter.
dimension_numbers: A serialized xla::ScatterDimensionNumbers proto.
indices_are_sorted: Boolean indicating if the indices are sorted.
)doc");

}  // namespace
}  // namespace tensorflow