1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This file implements logic for legalizing HLO to TensorFlow.
17
18 #include <cstdint>
19 #include <functional>
20 #include <memory>
21 #include <numeric>
22 #include <vector>
23
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
32 #include "mlir/IR/Attributes.h" // from @llvm-project
33 #include "mlir/IR/BuiltinAttributes.h" // from @llvm-project
34 #include "mlir/IR/BuiltinTypes.h" // from @llvm-project
35 #include "mlir/IR/Location.h" // from @llvm-project
36 #include "mlir/IR/MLIRContext.h" // from @llvm-project
37 #include "mlir/IR/Matchers.h" // from @llvm-project
38 #include "mlir/IR/Operation.h" // from @llvm-project
39 #include "mlir/IR/PatternMatch.h" // from @llvm-project
40 #include "mlir/IR/Value.h" // from @llvm-project
41 #include "mlir/Pass/Pass.h" // from @llvm-project
42 #include "mlir/Support/LLVM.h" // from @llvm-project
43 #include "mlir/Support/LogicalResult.h" // from @llvm-project
44 #include "mlir/Transforms/DialectConversion.h" // from @llvm-project
45 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h"
46 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
47 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h"
48 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h"
49 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
50 #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
51 #include "tensorflow/core/framework/kernel_shape_util.h"
52 #include "tensorflow/core/lib/math/math_util.h"
53
54 namespace mlir {
55 namespace TF {
56 namespace {
57
58 using mhlo::DotDimensionNumbers;
59
60 class ConvertConvOp : public OpConversionPattern<mhlo::ConvOp> {
61 public:
62 using OpConversionPattern::OpConversionPattern;
63
matchAndRewrite(mhlo::ConvOp conv_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const64 LogicalResult matchAndRewrite(
65 mhlo::ConvOp conv_op, ArrayRef<Value> args,
66 ConversionPatternRewriter &rewriter) const final {
67 if (!IsSupportedConvOp(conv_op)) {
68 return failure();
69 }
70
71 // Constructs strides array.
72 // For example, [2, 3] -> [1, 2, 3, 1].
73 SmallVector<int64_t, 4> strides({1});
74 for (const auto v :
75 conv_op.window_strides().getValue().getValues<int64_t>()) {
76 strides.emplace_back(v);
77 }
78 strides.emplace_back(1);
79
80 // Constructs dilation array.
81 SmallVector<int64_t, 4> dilation;
82 if (auto rhs_dilation = conv_op.rhs_dilation()) {
83 // For example, [2, 3] -> [1, 2, 3, 1].
84 dilation.emplace_back(1);
85 dilation.append(rhs_dilation.getValue().getValues<int64_t>().begin(),
86 rhs_dilation.getValue().getValues<int64_t>().end());
87 dilation.emplace_back(1);
88 } else {
89 // Default value
90 dilation = {1, 1, 1, 1};
91 }
92
93 const int input_feature_dimension =
94 conv_op.dimension_numbers().input_feature_dimension().getInt();
95 const int input_channels =
96 conv_op.lhs().getType().cast<ShapedType>().getDimSize(
97 input_feature_dimension);
98 int feature_group_count = conv_op.feature_group_count();
99
100 if (feature_group_count != 1 && feature_group_count != input_channels) {
101 // Group convolution is not supported yet.
102 return failure();
103 }
104
105 const bool is_depthwise_conv = input_channels == feature_group_count;
106 std::string padding;
107
108 if (!conv_op.padding().hasValue() ||
109 (conv_op.padding().getValue().isSplat() &&
110 conv_op.padding()->getSplatValue<int64_t>() == 0)) {
111 padding = "VALID";
112 } else {
113 // Check if padding is "SAME".
114 // TODO(chhe): To support "EXPLICIT" padding.
115 SmallVector<int64_t, 8> padding_array;
116 for (const auto v : conv_op.padding().getValue().getValues<int64_t>()) {
117 padding_array.emplace_back(v);
118 }
119
120 const int num_spatial_dims = conv_op.dimension_numbers()
121 .input_spatial_dimensions()
122 .getNumElements();
123 if (!IsSamePadding(conv_op, num_spatial_dims, strides, dilation,
124 padding_array))
125 return failure();
126
127 padding = "SAME";
128 }
129
130 CreateConvOp(conv_op, strides, padding, dilation, is_depthwise_conv,
131 input_channels, rewriter);
132 return success();
133 };
134
135 private:
IsSamePadding(mhlo::ConvOp conv_op,int num_spatial_dims,ArrayRef<int64_t> strides,ArrayRef<int64_t> dilation,ArrayRef<int64_t> padding_array) const136 bool IsSamePadding(mhlo::ConvOp conv_op, int num_spatial_dims,
137 ArrayRef<int64_t> strides, ArrayRef<int64_t> dilation,
138 ArrayRef<int64_t> padding_array) const {
139 for (auto i : llvm::seq<int>(0, num_spatial_dims)) {
140 int dim = i + 1;
141 tensorflow::int64 output_size;
142 tensorflow::int64 pad_low_int64;
143 tensorflow::int64 pad_high_int64;
144 tensorflow::Status status = tensorflow::GetWindowedOutputSizeVerboseV2(
145 conv_op.lhs().getType().cast<ShapedType>().getDimSize(dim),
146 conv_op.rhs().getType().cast<ShapedType>().getDimSize(i),
147 dilation[dim], strides[dim], tensorflow::Padding::SAME, &output_size,
148 &pad_low_int64, &pad_high_int64);
149 if (!status.ok()) return false;
150 if (padding_array[2 * i] != pad_low_int64 ||
151 padding_array[2 * i + 1] != pad_high_int64)
152 return false;
153 }
154
155 return true;
156 }
157
CreateConvOp(mhlo::ConvOp conv_op,ArrayRef<int64_t> strides,StringRef padding,ArrayRef<int64_t> dilation,bool is_depthwise_conv,int input_channels,ConversionPatternRewriter & rewriter) const158 void CreateConvOp(mhlo::ConvOp conv_op, ArrayRef<int64_t> strides,
159 StringRef padding, ArrayRef<int64_t> dilation,
160 bool is_depthwise_conv, int input_channels,
161 ConversionPatternRewriter &rewriter) const {
162 // TODO(chhe): To support more data formats other than "NHWC".
163 if (is_depthwise_conv) {
164 // Reshapes filter format to [filter_height, filter_width, in_channels,
165 // channel_multiplier] from HLO's [filter_height, filter_width, 1,
166 // in_channels * channel_multiplier] format.
167 auto filter_type = conv_op.rhs().getType().cast<ShapedType>();
168 llvm::ArrayRef<int64_t> hlo_filter_shape = filter_type.getShape();
169 llvm::SmallVector<int64_t, 4> tf_filter_shape(hlo_filter_shape.begin(),
170 hlo_filter_shape.end());
171 tf_filter_shape[2] = input_channels;
172 tf_filter_shape[3] = hlo_filter_shape.back() / input_channels;
173 auto reshaped_filter = rewriter.create<mhlo::ReshapeOp>(
174 conv_op.rhs().getLoc(),
175 RankedTensorType::get(tf_filter_shape, filter_type.getElementType()),
176 conv_op.rhs());
177
178 rewriter.replaceOpWithNewOp<DepthwiseConv2dNativeOp>(
179 conv_op, conv_op.getType(), conv_op.lhs(), reshaped_filter,
180 rewriter.getI64ArrayAttr(strides),
181 /*padding=*/rewriter.getStringAttr(padding),
182 /*explicit_paddings=*/rewriter.getI64ArrayAttr({}),
183 /*data_format=*/rewriter.getStringAttr("NHWC"),
184 /*dilations=*/rewriter.getI64ArrayAttr(dilation));
185 } else {
186 rewriter.replaceOpWithNewOp<Conv2DOp>(
187 conv_op, conv_op.getType(), conv_op.lhs(), conv_op.rhs(),
188 rewriter.getI64ArrayAttr(strides),
189 /*use_cudnn_on_gpu=*/rewriter.getBoolAttr(true),
190 /*padding=*/rewriter.getStringAttr(padding),
191 /*explicit_paddings=*/rewriter.getI64ArrayAttr({}),
192 /*data_format=*/rewriter.getStringAttr("NHWC"),
193 /*dilations=*/rewriter.getI64ArrayAttr(dilation));
194 }
195 }
196
IsSupportedConvOp(mhlo::ConvOp conv_op) const197 bool IsSupportedConvOp(mhlo::ConvOp conv_op) const {
198 if (!conv_op.lhs().getType().cast<ShapedType>().hasStaticShape() ||
199 !conv_op.rhs().getType().cast<ShapedType>().hasStaticShape() ||
200 !conv_op.getType().cast<ShapedType>().hasStaticShape())
201 return false;
202
203 // All ones in "lhs_dilation" means this "mhlo.conv" op should be
204 // converted to "tf.Conv2D" or "tf.DepthwiseConv2dNativeOp".
205 if (conv_op.lhs_dilation().hasValue()) {
206 auto lhs_dilation = conv_op.lhs_dilation().getValue();
207 if (!lhs_dilation.isSplat() || lhs_dilation.getSplatValue<int64_t>() != 1)
208 return false;
209 }
210
211 if (!conv_op.window_strides().hasValue() || conv_op.window_strides()
212 .getValue()
213 .getType()
214 .cast<ShapedType>()
215 .getRank() != 1)
216 return false;
217
218 int num_spatial_dims =
219 conv_op.dimension_numbers().input_spatial_dimensions().getNumElements();
220 // TODO(b/158636600): Currently we don't support 3D Convolution.
221 if (num_spatial_dims != 2) return false;
222
223 // TODO(chhe): To support more data formats other than "NHWC".
224 // Checks input dimensions.
225 if (conv_op.dimension_numbers().input_batch_dimension().getInt() != 0 ||
226 conv_op.dimension_numbers().input_feature_dimension().getInt() !=
227 num_spatial_dims + 1)
228 return false;
229 DenseIntElementsAttr input_spatial_dimensions =
230 conv_op.dimension_numbers().input_spatial_dimensions();
231 for (auto p :
232 llvm::enumerate(input_spatial_dimensions.getValues<int64_t>())) {
233 if (p.value() != p.index() + 1) return false;
234 }
235
236 // Checks output dimensions.
237 if (conv_op.dimension_numbers().output_batch_dimension().getInt() != 0 ||
238 conv_op.dimension_numbers().output_feature_dimension().getInt() !=
239 num_spatial_dims + 1)
240 return false;
241 DenseIntElementsAttr output_spatial_dimensions =
242 conv_op.dimension_numbers().output_spatial_dimensions();
243 for (auto p :
244 llvm::enumerate(output_spatial_dimensions.getValues<int64_t>())) {
245 if (p.value() != p.index() + 1) return false;
246 }
247
248 // Checks kernel dimensions.
249 if (conv_op.dimension_numbers().kernel_input_feature_dimension().getInt() !=
250 num_spatial_dims ||
251 conv_op.dimension_numbers()
252 .kernel_output_feature_dimension()
253 .getInt() != num_spatial_dims + 1)
254 return false;
255 DenseIntElementsAttr kernal_spatial_dimensions =
256 conv_op.dimension_numbers().kernel_spatial_dimensions();
257 for (auto p :
258 llvm::enumerate(kernal_spatial_dimensions.getValues<int64_t>())) {
259 if (p.value() != p.index()) return false;
260 }
261
262 return true;
263 }
264 };
265
266 class ConvertSliceOp : public OpConversionPattern<mhlo::SliceOp> {
267 public:
268 using OpConversionPattern::OpConversionPattern;
269
matchAndRewrite(mhlo::SliceOp slice_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const270 LogicalResult matchAndRewrite(
271 mhlo::SliceOp slice_op, ArrayRef<Value> args,
272 ConversionPatternRewriter &rewriter) const final {
273 DenseIntElementsAttr strides = slice_op.strides();
274 // Strides must be 1 otherwise we cannot legalize this `mhlo.slice` op.
275 if (!strides.isSplat() ||
276 strides.getSplatValue().cast<IntegerAttr>().getInt() != 1)
277 return failure();
278
279 rewriter.setInsertionPointAfter(slice_op.getOperation());
280 auto start_indices = slice_op.start_indices();
281 auto limit_indices = slice_op.limit_indices();
282 std::vector<int64_t> size_values;
283 for (auto pair : llvm::zip(start_indices.getValues<APInt>(),
284 limit_indices.getValues<APInt>())) {
285 size_values.emplace_back(std::get<1>(pair).getSExtValue() -
286 std::get<0>(pair).getSExtValue());
287 }
288
289 RankedTensorType ty =
290 RankedTensorType::get({static_cast<int64_t>(size_values.size())},
291 rewriter.getIntegerType(64));
292 auto start = rewriter.create<ConstOp>(slice_op.getLoc(), start_indices);
293 auto size = rewriter.create<ConstOp>(
294 slice_op.getLoc(), DenseIntElementsAttr::get(ty, size_values));
295 rewriter.replaceOpWithNewOp<SliceOp>(slice_op, slice_op.getType(),
296 slice_op.operand(), start, size);
297 return success();
298 };
299 };
300
301 class ConvertDynamicSliceOp : public OpConversionPattern<mhlo::DynamicSliceOp> {
302 public:
303 using OpConversionPattern::OpConversionPattern;
304
matchAndRewrite(mhlo::DynamicSliceOp op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const305 LogicalResult matchAndRewrite(
306 mhlo::DynamicSliceOp op, ArrayRef<Value> args,
307 ConversionPatternRewriter &rewriter) const final {
308 ShapedType input_type = op.operand().getType().cast<ShapedType>();
309 if (!input_type.hasStaticShape()) return failure();
310 Type start_indices_element_type = op.start_indices()
311 .front()
312 .getType()
313 .cast<ShapedType>()
314 .getElementType();
315
316 // Clamp indices to [0, input_size - output_size]
317 llvm::SmallVector<Value, 4> start_indices_vector;
318 start_indices_vector.reserve(op.start_indices().size());
319 Value clamp_min = rewriter.create<ConstOp>(
320 op.getLoc(), rewriter.getIntegerAttr(start_indices_element_type, 0));
321 for (uint64_t i = 0, e = op.start_indices().size(); i < e; ++i) {
322 Value clamp_max = rewriter.create<ConstOp>(
323 op.getLoc(),
324 rewriter.getIntegerAttr(start_indices_element_type,
325 input_type.getShape()[i] -
326 op.slice_sizes().getValue<int64_t>({i})));
327 Value clamped_index = rewriter.create<mhlo::ClampOp>(
328 op.getLoc(), op.start_indices()[i].getType(), op.start_indices()[i],
329 clamp_min, clamp_max);
330 start_indices_vector.push_back(clamped_index);
331 }
332
333 // Pack individual start indices to start indices tensor.
334 Type start_indices_type = RankedTensorType::get(
335 {static_cast<int64_t>(start_indices_vector.size())},
336 start_indices_element_type);
337 Value start_indices_op = rewriter.create<PackOp>(
338 op.getLoc(), start_indices_type, ValueRange(start_indices_vector));
339
340 Value slice_sices_op =
341 rewriter.create<ConstOp>(op.getLoc(), op.slice_sizes());
342 rewriter.replaceOpWithNewOp<SliceOp>(op, op.getType(), op.operand(),
343 start_indices_op, slice_sices_op);
344 return success();
345 };
346 };
347
348 // Appends all elements in `range` to `values`.
349 template <typename ValueT, typename Range>
Append(llvm::SmallVectorImpl<ValueT> & values,Range && range)350 void Append(llvm::SmallVectorImpl<ValueT> &values, Range &&range) {
351 values.insert(values.end(), range.begin(), range.end());
352 }
353
354 // Appends all elements in `range` to `values`.
355 template <typename ValueT, typename Range, typename... RangeTs>
Append(llvm::SmallVectorImpl<ValueT> & values,Range && range,RangeTs &&...ranges)356 void Append(llvm::SmallVectorImpl<ValueT> &values, Range &&range,
357 RangeTs &&...ranges) {
358 values.insert(values.end(), range.begin(), range.end());
359 Append(values, ranges...);
360 }
361
362 // Returns the number of elements in `range`.
363 template <typename Range>
Size(Range && range)364 size_t Size(Range &&range) {
365 return range.size();
366 }
367
368 // Returns the total number of elements in a variadic number of `ranges`.
369 template <typename Range, typename... RangeTs>
Size(Range && range,RangeTs &&...ranges)370 size_t Size(Range &&range, RangeTs &&...ranges) {
371 return range.size() + Size(std::forward<RangeTs>(ranges)...);
372 }
373
374 // Concats all elements in `ranges` and returns a small vector as a result.
375 template <typename ValueT, typename... RangeTs>
Concat(RangeTs &&...ranges)376 llvm::SmallVector<ValueT, 4> Concat(RangeTs &&...ranges) {
377 llvm::SmallVector<int64_t, 4> results;
378 results.reserve(Size(std::forward<RangeTs>(ranges)...));
379 Append(results, std::forward<RangeTs>(ranges)...);
380 return results;
381 }
382
383 // A struct to hold axes and sizes for a set of dimensions.
384 struct DimensionVector {
AxesArraymlir::TF::__anonce99d8c50111::DimensionVector385 llvm::ArrayRef<int64_t> AxesArray() const { return axes; }
SizesArraymlir::TF::__anonce99d8c50111::DimensionVector386 llvm::ArrayRef<int64_t> SizesArray() const { return sizes; }
387
388 llvm::SmallVector<int64_t, 4> axes;
389 llvm::SmallVector<int64_t, 4> sizes;
390 };
391
392 // A struct to hold information about dimensions of dot_general operands.
393 class DotDimensionsInfo {
394 public:
DotDimensionsInfo(ShapedType type,DenseIntElementsAttr batch_dimensions,DenseIntElementsAttr contracting_dimensions)395 DotDimensionsInfo(ShapedType type, DenseIntElementsAttr batch_dimensions,
396 DenseIntElementsAttr contracting_dimensions) {
397 const int rank = type.getRank();
398 for (const int dim : batch_dimensions.getValues<int64_t>()) {
399 batch_dimensions_.axes.push_back(dim);
400 batch_dimensions_.sizes.push_back(type.getDimSize(dim));
401 }
402
403 for (const int dim : contracting_dimensions.getValues<int64_t>()) {
404 contracting_dimensions_.axes.push_back(dim);
405 contracting_dimensions_.sizes.push_back(type.getDimSize(dim));
406 }
407
408 for (int dim = 0; dim < rank; ++dim) {
409 if (llvm::count(contracting_dimensions_.axes, dim) > 0 ||
410 llvm::count(batch_dimensions_.axes, dim) > 0) {
411 continue;
412 }
413 out_dimensions_.axes.push_back(dim);
414 out_dimensions_.sizes.push_back(type.getDimSize(dim));
415 }
416 }
417
batch_dimensions() const418 const DimensionVector &batch_dimensions() const { return batch_dimensions_; }
contracting_dimensions() const419 const DimensionVector &contracting_dimensions() const {
420 return contracting_dimensions_;
421 }
422 // Out dimensions are any dimensions that are neither batch nor contracting
423 // dimensions, hence will be propagated to output shape.
out_dimensions() const424 const DimensionVector &out_dimensions() const { return out_dimensions_; }
425
426 // Returns the total dimension size after flattening all contracting
427 // dimensions.
FlattenedContractingDimensionSize() const428 int FlattenedContractingDimensionSize() const {
429 return std::accumulate(contracting_dimensions_.sizes.begin(),
430 contracting_dimensions_.sizes.end(), 1,
431 std::multiplies<int64_t>());
432 }
433
434 // Returns the total dimension size after flattening all out dimensions.
FlattenedOutDimensionSize() const435 int FlattenedOutDimensionSize() const {
436 return std::accumulate(out_dimensions_.sizes.begin(),
437 out_dimensions_.sizes.end(), 1,
438 std::multiplies<int64_t>());
439 }
440
441 private:
442 DimensionVector batch_dimensions_;
443 DimensionVector contracting_dimensions_;
444 // Out dimensions are any dimensions that are neither batch nor contracting
445 // dimensions, hence will be propagated to output shape.
446 DimensionVector out_dimensions_;
447 };
448
ConvertDot(PatternRewriter & rewriter,Value lhs,Value rhs,DotDimensionNumbers dot_dimension_numbers,ShapedType result_type,mlir::Location loc)449 Value ConvertDot(PatternRewriter &rewriter, Value lhs, Value rhs,
450 DotDimensionNumbers dot_dimension_numbers,
451 ShapedType result_type, mlir::Location loc) {
452 auto lhs_type = lhs.getType().cast<ShapedType>();
453 auto rhs_type = rhs.getType().cast<ShapedType>();
454 const int lhs_rank = lhs_type.getRank();
455 const int rhs_rank = rhs_type.getRank();
456
457 // Collects lhs and rhs dimensions information.
458 DotDimensionsInfo lhs_dot_dimensions_info(
459 lhs_type, dot_dimension_numbers.lhs_batching_dimensions(),
460 dot_dimension_numbers.lhs_contracting_dimensions());
461 DotDimensionsInfo rhs_dot_dimensions_info(
462 rhs_type, dot_dimension_numbers.rhs_batching_dimensions(),
463 dot_dimension_numbers.rhs_contracting_dimensions());
464
465 // Transposes lhs shape to be in the order of {batch_dimensions,
466 // out_dimensions, contracting dimensions}.
467 llvm::SmallVector<int64_t, 4> lhs_permutation = Concat<int64_t>(
468 lhs_dot_dimensions_info.batch_dimensions().AxesArray(),
469 lhs_dot_dimensions_info.out_dimensions().AxesArray(),
470 lhs_dot_dimensions_info.contracting_dimensions().AxesArray());
471 llvm::SmallVector<int64_t, 4> lhs_transposed_shape = Concat<int64_t>(
472 lhs_dot_dimensions_info.batch_dimensions().SizesArray(),
473 lhs_dot_dimensions_info.out_dimensions().SizesArray(),
474 lhs_dot_dimensions_info.contracting_dimensions().SizesArray());
475 auto lhs_transposed = rewriter.create<mhlo::TransposeOp>(
476 loc,
477 RankedTensorType::get(lhs_transposed_shape, lhs_type.getElementType()),
478 lhs,
479 DenseIntElementsAttr::get(
480 RankedTensorType::get({lhs_rank}, rewriter.getI64Type()),
481 lhs_permutation));
482
483 // Transposes rhs shape to be in the order of {batch_dimensions, contracting
484 // dimensions, out_dimensions}.
485 llvm::SmallVector<int64_t, 4> rhs_permutation = Concat<int64_t>(
486 rhs_dot_dimensions_info.batch_dimensions().AxesArray(),
487 rhs_dot_dimensions_info.contracting_dimensions().AxesArray(),
488 rhs_dot_dimensions_info.out_dimensions().AxesArray());
489 llvm::SmallVector<int64_t, 4> rhs_transposed_shape = Concat<int64_t>(
490 rhs_dot_dimensions_info.batch_dimensions().SizesArray(),
491 rhs_dot_dimensions_info.contracting_dimensions().SizesArray(),
492 rhs_dot_dimensions_info.out_dimensions().SizesArray());
493 auto rhs_transposed = rewriter.create<mhlo::TransposeOp>(
494 loc,
495 RankedTensorType::get(rhs_transposed_shape, rhs_type.getElementType()),
496 rhs,
497 DenseIntElementsAttr::get(
498 RankedTensorType::get({rhs_rank}, rewriter.getI64Type()),
499 rhs_permutation));
500
501 // Reshapes lhs to flatten out_dimensions and contracting_dimensions.
502 llvm::SmallVector<int64_t, 4> lhs_flattened_shape = Concat<int64_t>(
503 lhs_dot_dimensions_info.batch_dimensions().SizesArray(),
504 llvm::ArrayRef<int64_t>{
505 lhs_dot_dimensions_info.FlattenedOutDimensionSize()},
506 llvm::ArrayRef<int64_t>{
507 lhs_dot_dimensions_info.FlattenedContractingDimensionSize()});
508 auto lhs_flattend = rewriter.create<mhlo::ReshapeOp>(
509 loc,
510 RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()),
511 lhs_transposed.getResult());
512
513 // Reshapes rhs to flatten out_dimensions and contracting_dimensions.
514 llvm::SmallVector<int64_t, 4> rhs_flattened_shape = Concat<int64_t>(
515 rhs_dot_dimensions_info.batch_dimensions().SizesArray(),
516 llvm::ArrayRef<int64_t>{
517 rhs_dot_dimensions_info.FlattenedContractingDimensionSize()},
518 llvm::ArrayRef<int64_t>{
519 rhs_dot_dimensions_info.FlattenedOutDimensionSize()});
520 auto rhs_flattend = rewriter.create<mhlo::ReshapeOp>(
521 loc,
522 RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()),
523 rhs_transposed.getResult());
524
525 // Creates matmul op of `lhs_flattend` and `rhs_flattend`.
526 llvm::SmallVector<int64_t, 4> matmul_shape =
527 Concat<int64_t>(lhs_dot_dimensions_info.batch_dimensions().SizesArray(),
528 llvm::ArrayRef<int64_t>{
529 lhs_dot_dimensions_info.FlattenedOutDimensionSize()},
530 llvm::ArrayRef<int64_t>{
531 rhs_dot_dimensions_info.FlattenedOutDimensionSize()});
532 auto matmul = rewriter.create<TF::BatchMatMulV2Op>(
533 loc, RankedTensorType::get(matmul_shape, result_type.getElementType()),
534 lhs_flattend.getResult(), rhs_flattend.getResult());
535 auto reshaped =
536 rewriter.create<mhlo::ReshapeOp>(loc, result_type, matmul.getResult());
537 return reshaped.getResult();
538 }
539
540 // Converts mhlo.dot to tf.MatMul. Reshape ops will be inserted when
541 // necessary.
ConvertDotOp(PatternRewriter & rewriter,Operation * old_op)542 Value ConvertDotOp(PatternRewriter &rewriter, Operation *old_op) {
543 auto dot_op = cast<mhlo::DotOp>(old_op);
544 auto lhs_rank = dot_op.lhs().getType().cast<ShapedType>().getRank();
545 auto dot_dimension_numbers = DotDimensionNumbers::get(
546 /*lhs_batching_dimensions=*/rewriter.getI64TensorAttr({}),
547 /*rhs_batching_dimensions=*/rewriter.getI64TensorAttr({}),
548 /*lhs_contracting_dimensions=*/
549 rewriter.getI64TensorAttr({lhs_rank == 1 ? 0 : 1}),
550 /*rhs_contracting_dimensions=*/rewriter.getI64TensorAttr({0}),
551 rewriter.getContext());
552 return ConvertDot(rewriter, dot_op.lhs(), dot_op.rhs(), dot_dimension_numbers,
553 dot_op.getResult().getType().cast<ShapedType>(),
554 dot_op.getLoc());
555 }
556
557 // Converts mhlo.dot to tf.BatchMatMul. Reshape or Transpose ops will also be
558 // inserted to convert to well-formed matrix multiply.
ConvertDotGeneralOp(PatternRewriter & rewriter,Operation * old_op)559 Value ConvertDotGeneralOp(PatternRewriter &rewriter, Operation *old_op) {
560 auto dot_general_op = cast<mhlo::DotGeneralOp>(old_op);
561 return ConvertDot(rewriter, dot_general_op.lhs(), dot_general_op.rhs(),
562 dot_general_op.dot_dimension_numbers(),
563 dot_general_op.getResult().getType().cast<ShapedType>(),
564 dot_general_op.getLoc());
565 }
566
567 // Checks if the specified region is a binary reduction function what takes 2
568 // inputs, passes it to an instance of the specifiied reduction op and then
569 // returns the result.
570 template <typename ReductionOp>
MatchBinaryReduceFunction(mlir::Region & function)571 LogicalResult MatchBinaryReduceFunction(mlir::Region &function) {
572 Block &body = function.front();
573 if (body.getNumArguments() != 2) return failure();
574
575 mhlo::ReturnOp return_op = dyn_cast<mhlo::ReturnOp>(body.back());
576 if (!return_op) return failure();
577 if (return_op.getNumOperands() != 1) return failure();
578
579 ReductionOp reduce_op = dyn_cast_or_null<ReductionOp>(
580 return_op.getOperands().front().getDefiningOp());
581 if (!reduce_op) return failure();
582 if (reduce_op.lhs() != body.getArgument(0) ||
583 reduce_op.rhs() != body.getArgument(1))
584 return failure();
585
586 return success();
587 }
588
589 // Check if the specified region is a binary reduction function what takes 2
590 // inputs and returns the second input. Functions like this are used by update
591 // scatter like ops.
592 template <>
MatchBinaryReduceFunction(mlir::Region & function)593 LogicalResult MatchBinaryReduceFunction<void>(mlir::Region &function) {
594 Block &body = function.front();
595 if (body.getNumArguments() != 2) return failure();
596
597 mhlo::ReturnOp return_op = dyn_cast<mhlo::ReturnOp>(body.back());
598 if (!return_op) return failure();
599 if (return_op.getNumOperands() != 1) return failure();
600 if (return_op.getOperands().front() != body.getArgument(1)) return failure();
601 return success();
602 }
603
604 // Converts an mhlo.reduce op with the specified BinaryOp as the reduction
605 // operation into the specified TfOp.
606 template <typename BinaryOp, typename TfOp>
607 class ConvertReduceOpToTfOp : public OpConversionPattern<mhlo::ReduceOp> {
608 public:
609 using OpConversionPattern::OpConversionPattern;
610
matchAndRewrite(mhlo::ReduceOp reduce_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const611 LogicalResult matchAndRewrite(
612 mhlo::ReduceOp reduce_op, ArrayRef<Value> args,
613 ConversionPatternRewriter &rewriter) const final {
614 if (failed(MatchReduceOpInput(reduce_op))) return failure();
615
616 if (failed(MatchBinaryReduceFunction<BinaryOp>(reduce_op.body())))
617 return failure();
618
619 // In `MatchReduceOpInput` function, we already match that the
620 // "mhlo::ReduceOp" only has one input, one init_value and one result.
621 if (failed(MatchInitValue(reduce_op.init_values()[0]))) return failure();
622
623 auto input = reduce_op.operands()[0];
624
625 // Get reduction dimension.
626 DenseIntElementsAttr dimension = reduce_op.dimensions();
627 SmallVector<int64_t, 4> reduce_dims;
628 for (const int64_t &dim : dimension.getValues<int64_t>()) {
629 reduce_dims.emplace_back(dim);
630 }
631 auto dim_type = RankedTensorType::get(
632 {static_cast<int64_t>(reduce_dims.size())}, rewriter.getI64Type());
633 auto reduction_indices = rewriter.create<ConstOp>(
634 reduce_op.getLoc(), dim_type, rewriter.getI64TensorAttr(reduce_dims));
635
636 rewriter.replaceOpWithNewOp<TfOp>(reduce_op, reduce_op.getType(0), input,
637 reduction_indices,
638 /*keep_dim=*/rewriter.getBoolAttr(false));
639 return success();
640 }
641
642 private:
643 // Checks that the init value matches with the init value expected for the
644 // target TfOp.
645 virtual LogicalResult MatchInitValue(Value init_value) const = 0;
646
647 // This function tries to match that the "mhlo::ReduceOp" only has one
648 // input, one init_value and one result.
MatchReduceOpInput(mhlo::ReduceOp reduce_op) const649 LogicalResult MatchReduceOpInput(mhlo::ReduceOp reduce_op) const {
650 if (reduce_op.operands().size() != 1 ||
651 reduce_op.init_values().size() != 1 ||
652 reduce_op.getResults().size() != 1)
653 return failure();
654
655 if (!reduce_op.operands()[0].getType().isa<RankedTensorType>())
656 return failure();
657 if (!reduce_op.getType(0).isa<RankedTensorType>()) return failure();
658 return success();
659 }
660 };
661
662 class ConvertReduceOpToTfSum
663 : public ConvertReduceOpToTfOp<mhlo::AddOp, TF::SumOp> {
664 public:
665 using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp;
666
MatchInitValue(Value init_value) const667 LogicalResult MatchInitValue(Value init_value) const override {
668 DenseFPElementsAttr init_attr;
669 if (!matchPattern(init_value, m_Constant(&init_attr)) ||
670 !init_attr.isSplat() || !init_attr.getSplatValue<APFloat>().isZero())
671 return failure();
672 return success();
673 }
674 };
675
676 class ConvertReduceOpToTfMax
677 : public ConvertReduceOpToTfOp<mhlo::MaxOp, TF::MaxOp> {
678 public:
679 using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp;
680
MatchInitValue(Value init_value) const681 LogicalResult MatchInitValue(Value init_value) const override {
682 DenseFPElementsAttr init_attr;
683 if (!matchPattern(init_value, m_Constant(&init_attr)) ||
684 !init_attr.isSplat() ||
685 !init_attr.getSplatValue<APFloat>().isInfinity() ||
686 !init_attr.getSplatValue<APFloat>().isNegative())
687 return failure();
688 return success();
689 }
690 };
691
692 class ConvertReduceOpToTfMin
693 : public ConvertReduceOpToTfOp<mhlo::MinOp, TF::MinOp> {
694 public:
695 using ConvertReduceOpToTfOp::ConvertReduceOpToTfOp;
696
MatchInitValue(Value init_value) const697 LogicalResult MatchInitValue(Value init_value) const override {
698 DenseFPElementsAttr init_attr;
699 if (!matchPattern(init_value, m_Constant(&init_attr)) ||
700 !init_attr.isSplat() ||
701 !init_attr.getSplatValue<APFloat>().isInfinity() ||
702 init_attr.getSplatValue<APFloat>().isNegative())
703 return failure();
704 return success();
705 }
706 };
707
708 class ConvertIotaOpToTfRange : public OpConversionPattern<mhlo::IotaOp> {
709 public:
710 using OpConversionPattern::OpConversionPattern;
711
matchAndRewrite(mhlo::IotaOp iota_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const712 LogicalResult matchAndRewrite(
713 mhlo::IotaOp iota_op, ArrayRef<Value> args,
714 ConversionPatternRewriter &rewriter) const final {
715 RankedTensorType type =
716 iota_op.getType().dyn_cast_or_null<RankedTensorType>();
717 if (!type) return failure();
718
719 const uint64_t dimension = iota_op.iota_dimension();
720 Type element_type = type.getElementType();
721 Attribute start, limit, delta;
722 if (element_type.isa<FloatType>()) {
723 start = rewriter.getFloatAttr(element_type, 0.0);
724 limit = rewriter.getFloatAttr(element_type, type.getShape()[dimension]);
725 delta = rewriter.getFloatAttr(element_type, 1.0);
726 } else if (element_type.isa<IntegerType>()) {
727 start = rewriter.getIntegerAttr(element_type, 0);
728 limit = rewriter.getIntegerAttr(element_type, type.getShape()[dimension]);
729 delta = rewriter.getIntegerAttr(element_type, 1);
730 } else {
731 return failure();
732 }
733
734 auto range_type =
735 RankedTensorType::get({type.getShape()[dimension]}, element_type);
736 Value start_op = rewriter.create<TF::ConstOp>(iota_op.getLoc(), start);
737 Value limit_op = rewriter.create<TF::ConstOp>(iota_op.getLoc(), limit);
738 Value delta_op = rewriter.create<TF::ConstOp>(iota_op.getLoc(), delta);
739 Value result = rewriter.create<TF::RangeOp>(iota_op.getLoc(), range_type,
740 start_op, limit_op, delta_op);
741
742 if (type.getRank() > 1) {
743 std::vector<int64_t> reshape_shape(type.getRank(), 1);
744 reshape_shape[iota_op.iota_dimension()] = type.getShape()[dimension];
745 auto reshape_type = RankedTensorType::get(reshape_shape, element_type);
746 Value reshape_shape_op = rewriter.create<TF::ConstOp>(
747 iota_op.getLoc(), rewriter.getI64TensorAttr(reshape_shape));
748 result = rewriter.create<TF::ReshapeOp>(iota_op.getLoc(), reshape_type,
749 result, reshape_shape_op);
750
751 Value broadcast_shape_op = rewriter.create<TF::ConstOp>(
752 iota_op.getLoc(), rewriter.getI64TensorAttr(type.getShape()));
753 result = rewriter.create<TF::BroadcastToOp>(iota_op.getLoc(), type,
754 result, broadcast_shape_op);
755 }
756
757 rewriter.replaceOp(iota_op, result);
758 return success();
759 }
760 };
761
762 // Maps the following represenattions of AvgPool in MHLO into a tf.AvgPool{3D}
763 // operation when they cleanly map to 2D or 3D average pool with VALID or SAME
764 // padding:
765 // * div(reduce_sum_window(x), constant(sizeof(window)))
766 // * div(reduce_sum_window(x), reduce_sum_window(constant(1)))
767 class ConvertAvgPoolOp : public OpConversionPattern<mhlo::DivOp> {
768 public:
769 using OpConversionPattern::OpConversionPattern;
770
matchAndRewrite(mhlo::DivOp div_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const771 LogicalResult matchAndRewrite(
772 mhlo::DivOp div_op, ArrayRef<Value> args,
773 ConversionPatternRewriter &rewriter) const final {
774 auto rw =
775 dyn_cast_or_null<mhlo::ReduceWindowOp>(div_op.lhs().getDefiningOp());
776 if (!rw) return failure();
777
778 // Check that the reduce-window is a sum-reduce-window.
779 if (failed(MatchBinaryReduceFunction<mhlo::AddOp>(rw.body())))
780 return failure();
781
782 // Check that this is a floating point reduce window with a rank of 4 or 5.
783 RankedTensorType rw_type = rw.getType().dyn_cast<RankedTensorType>();
784 if (!rw_type || !rw_type.getElementType().isa<FloatType>() ||
785 rw_type.getRank() <= 3 || rw_type.getRank() > 5)
786 return failure();
787
788 // Check that the Div op doesn't do broadcasting on the output of the reduce
789 // window.
790 if (div_op.getType() != rw.getType()) return failure();
791
792 // tf.avg_pool need at least 3 dimensions (batch, spatial, channel)
793 const uint64_t rank = rw.window_dimensions().size();
794 if (rank <= 2) return failure();
795
796 // If the init value isn't zero then it can't be an average pool.
797 if (!isFloatZero(rw.init_value())) return failure();
798
799 llvm::SmallVector<int64_t, 5> window_strides;
800 if (rw.window_strides().hasValue()) {
801 window_strides.insert(window_strides.end(),
802 rw.window_strides()->getValues<int64_t>().begin(),
803 rw.window_strides()->getValues<int64_t>().end());
804 } else {
805 window_strides.resize(rank, 1);
806 }
807
808 llvm::SmallVector<int64_t, 10> padding;
809 if (rw.padding().hasValue()) {
810 padding.insert(padding.begin(),
811 rw.padding()->getValues<int64_t>().begin(),
812 rw.padding()->getValues<int64_t>().end());
813 } else {
814 padding.resize(2 * rank, 0);
815 }
816
817 // Check that we don't do any reduction along the batch (first) and channel
818 // (last) dimensions.
819 const uint64_t batch_dim = 0;
820 const uint64_t channel_dim = rank - 1;
821 if (rw.window_dimensions().getValue<int64_t>({batch_dim}) != 1 ||
822 rw.window_dimensions().getValue<int64_t>({channel_dim}) != 1 ||
823 window_strides[batch_dim] != 1 || window_strides[channel_dim] != 1 ||
824 padding[2 * batch_dim] != 0 || padding[2 * batch_dim + 1] != 0 ||
825 padding[2 * channel_dim] != 0 || padding[2 * channel_dim + 1] != 0)
826 return failure();
827
828 if (rw.window_dilations().hasValue() &&
829 !(rw.window_dilations()->isSplat() &&
830 rw.window_dilations()->getSplatValue<APInt>() == 1))
831 return failure();
832
833 if (rw.base_dilations().hasValue() &&
834 !(rw.base_dilations()->isSplat() &&
835 rw.base_dilations()->getSplatValue<APInt>() == 1))
836 return failure();
837
838 DenseFPElementsAttr divisor;
839 if (matchPattern(div_op.rhs(), m_Constant(&divisor))) {
840 // If the divisor is a constant then check that it matches with the number
841 // of elements inside the window what is required for a VALID AvgPool.
842 if (!divisor.isSplat()) return failure();
843 int64_t window_size = 1;
844 for (int64_t w : rw.window_dimensions().getValues<int64_t>()) {
845 window_size *= w;
846 }
847 if (!divisor.getSplatValue<APFloat>().isExactlyValue(window_size))
848 return failure();
849
850 // Check that we have no padding.
851 if (!llvm::all_of(padding, [](int64_t i) { return i == 0; }))
852 return failure();
853
854 return replaceWithAvgPool(
855 div_op, rw.operand(),
856 llvm::to_vector<4>(rw.window_dimensions().getValues<int64_t>()),
857 window_strides, "VALID", rewriter);
858 }
859
860 auto rw_rhs =
861 dyn_cast_or_null<mhlo::ReduceWindowOp>(div_op.rhs().getDefiningOp());
862 if (rw_rhs) {
863 // Check that RHS is a sum-reduce-window.
864 if (failed(MatchBinaryReduceFunction<mhlo::AddOp>(rw_rhs.body())))
865 return failure();
866
867 // Check that the RHS is a reduce_window over a constant 1 input with 0 as
868 // the init value.
869 DenseFPElementsAttr rhs_input;
870 if (!isFloatZero(rw_rhs.init_value()) ||
871 !matchPattern(rw_rhs.operand(), m_Constant(&rhs_input)) ||
872 !rhs_input.isSplat() ||
873 !rhs_input.getSplatValue<APFloat>().isExactlyValue(1.0))
874 return failure();
875
876 // Check that the two reduce window have the same window configuration.
877 if (rw.window_dimensions() != rw_rhs.window_dimensions() ||
878 rw.window_strides() != rw_rhs.window_strides() ||
879 rw.window_dilations() != rw_rhs.window_dilations() ||
880 rw.base_dilations() != rw_rhs.base_dilations() ||
881 rw.padding() != rw_rhs.padding())
882 return failure();
883
884 if (llvm::all_of(padding, [](int64_t i) { return i == 0; }))
885 return replaceWithAvgPool(
886 div_op, rw.operand(),
887 llvm::to_vector<4>(rw.window_dimensions().getValues<int64_t>()),
888 window_strides, "VALID", rewriter);
889
890 RankedTensorType input_type =
891 rw.operand().getType().dyn_cast<RankedTensorType>();
892 RankedTensorType output_type = rw.getType().dyn_cast<RankedTensorType>();
893 if (!input_type || !output_type) return failure();
894
895 // Check that the individual padding values are corresponding to SAME
896 // padding from TensorFlow.
897 for (uint64_t i = 1; i < rank - 1; ++i) {
898 int64_t padding_size =
899 (output_type.getShape()[i] - 1) * window_strides[i] +
900 rw.window_dimensions().getValue<int64_t>({i}) -
901 input_type.getShape()[i];
902 if (padding[2 * i] !=
903 tensorflow::MathUtil::FloorOfRatio(padding_size, int64_t(2)) ||
904 padding[2 * i + 1] !=
905 tensorflow::MathUtil::CeilOfRatio(padding_size, int64_t(2)))
906 return failure();
907 }
908 return replaceWithAvgPool(
909 div_op, rw.operand(),
910 llvm::to_vector<4>(rw.window_dimensions().getValues<int64_t>()),
911 window_strides, "SAME", rewriter);
912 }
913 return failure();
914 }
915
916 private:
isFloatZero(Value value) const917 bool isFloatZero(Value value) const {
918 DenseFPElementsAttr initial_value;
919 return matchPattern(value, m_Constant(&initial_value)) &&
920 initial_value.getNumElements() == 1 &&
921 initial_value.getValue<APFloat>({}).isZero();
922 }
923
replaceWithAvgPool(mhlo::DivOp op,Value input,llvm::ArrayRef<int64_t> ksizes,llvm::ArrayRef<int64_t> kstrides,llvm::StringRef padding,ConversionPatternRewriter & rewriter) const924 LogicalResult replaceWithAvgPool(mhlo::DivOp op, Value input,
925 llvm::ArrayRef<int64_t> ksizes,
926 llvm::ArrayRef<int64_t> kstrides,
927 llvm::StringRef padding,
928 ConversionPatternRewriter &rewriter) const {
929 if (ksizes.size() == 4) {
930 rewriter.replaceOpWithNewOp<AvgPoolOp>(
931 op, op.getType(), input, rewriter.getI64ArrayAttr(ksizes),
932 rewriter.getI64ArrayAttr(kstrides), rewriter.getStringAttr(padding),
933 rewriter.getStringAttr("NHWC"));
934 return success();
935 } else if (ksizes.size() == 5) {
936 rewriter.replaceOpWithNewOp<AvgPool3DOp>(
937 op, op.getType(), input, rewriter.getI64ArrayAttr(ksizes),
938 rewriter.getI64ArrayAttr(kstrides), rewriter.getStringAttr(padding),
939 rewriter.getStringAttr("NDHWC"));
940 return success();
941 }
942 return failure();
943 }
944 };
945
946 class LegalizeHloToTf : public PassWrapper<LegalizeHloToTf, FunctionPass> {
getDependentDialects(DialectRegistry & registry) const947 void getDependentDialects(DialectRegistry ®istry) const override {
948 registry.insert<TF::TensorFlowDialect>();
949 }
950
951 public:
952 LegalizeHloToTf() = default;
LegalizeHloToTf(const LegalizeHloToTf &)953 LegalizeHloToTf(const LegalizeHloToTf &) {}
954
955 /// Performs the legalization to the TF dialect.
956 void runOnFunction() override;
957 };
958
959 // Returns the shape of the given value in a Constant Op.
ShapeToConst(PatternRewriter & rewriter,Value value)960 ConstantOp ShapeToConst(PatternRewriter &rewriter, Value value) {
961 ArrayRef<int64_t> shape = value.getType().cast<ShapedType>().getShape();
962 auto attr_type = RankedTensorType::get({static_cast<int64_t>(shape.size())},
963 rewriter.getIntegerType(64));
964 auto attr = DenseElementsAttr::get(attr_type, shape);
965 return rewriter.create<ConstantOp>(value.getLoc(), attr_type, attr);
966 }
967
968 // If index_vector_dim == indices.rank() then insert the implicit extra
969 // dimension into indices to normalize everything to index_vector_dim ==
970 // indices.rank() - 1.
NormalizeIndexVector(Operation * parent_op,Value & indices,ShapedType & indices_type,int64_t index_vector_dim,ConversionPatternRewriter & rewriter)971 LogicalResult NormalizeIndexVector(Operation *parent_op, Value &indices,
972 ShapedType &indices_type,
973 int64_t index_vector_dim,
974 ConversionPatternRewriter &rewriter) {
975 if (index_vector_dim == indices_type.getRank()) {
976 llvm::SmallVector<int64_t, 4> new_start_indices_shape(
977 indices_type.getShape().begin(), indices_type.getShape().end());
978 new_start_indices_shape.push_back(1);
979 indices_type = RankedTensorType::get(new_start_indices_shape,
980 indices_type.getElementType());
981 indices = rewriter.create<mhlo::ReshapeOp>(parent_op->getLoc(),
982 indices_type, indices);
983 } else if (index_vector_dim != indices_type.getRank() - 1) {
984 // If index_vector_dim isn't the last dimension in indices then it isn't
985 // supported yet.
986 // TODO(tberghammer): Transpose indices to support this usecase.
987 return rewriter.notifyMatchFailure(
988 parent_op,
989 "index vector dim isn't the last dimension in start indices");
990 }
991 return success();
992 }
993
994 // Check that `attr` is an R1 iota with integer element type starting from `0`
995 // with `size` number of values.
IsIotaAttr(const DenseIntElementsAttr & attr,int64_t size)996 bool IsIotaAttr(const DenseIntElementsAttr &attr, int64_t size) {
997 if (!attr.getType().getElementType().isa<IntegerType>()) return false;
998 if (attr.getType().getRank() != 1) return false;
999 if (attr.getNumElements() != size) return false;
1000 int64_t iota = 0;
1001 for (auto s : attr.getIntValues()) {
1002 if (s != iota) return false;
1003 ++iota;
1004 }
1005 return true;
1006 }
1007
1008 class ConvertGatherOp : public OpConversionPattern<mhlo::GatherOp> {
1009 public:
1010 using OpConversionPattern::OpConversionPattern;
1011
matchAndRewrite(mhlo::GatherOp gather_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const1012 LogicalResult matchAndRewrite(
1013 mhlo::GatherOp gather_op, ArrayRef<Value> args,
1014 ConversionPatternRewriter &rewriter) const final {
1015 Value operand = gather_op.operand();
1016 Value start_indices = gather_op.start_indices();
1017
1018 // Can only convert with static shaped gather.
1019 ShapedType operand_type = operand.getType().cast<ShapedType>();
1020 ShapedType start_indices_type = start_indices.getType().cast<ShapedType>();
1021 ShapedType result_type = gather_op.getResult().getType().cast<ShapedType>();
1022 if (!operand_type.hasStaticShape() ||
1023 !start_indices_type.hasStaticShape() || !result_type.hasStaticShape()) {
1024 return failure();
1025 }
1026
1027 // Normalize start_indices so index_vector_dim == start_indices.rank() - 1.
1028 int64_t index_vector_dim =
1029 gather_op.dimension_numbers().index_vector_dim().getInt();
1030 if (failed(NormalizeIndexVector(gather_op, start_indices,
1031 start_indices_type, index_vector_dim,
1032 rewriter))) {
1033 return failure();
1034 }
1035
1036 // Verify that start_index_map and collapsed_slice_dims are both an iota
1037 // with the same number of elements as the last dimension of start_indices.
1038 auto start_index_map = gather_op.dimension_numbers().start_index_map();
1039 auto collapsed_slice_dims =
1040 gather_op.dimension_numbers().collapsed_slice_dims();
1041 if (!IsIotaAttr(start_index_map, start_indices_type.getShape().back()) ||
1042 !IsIotaAttr(collapsed_slice_dims,
1043 start_indices_type.getShape().back())) {
1044 // TODO(tberghammer): Transform start_indices to support non-standard
1045 // start_index_maps.
1046 return rewriter.notifyMatchFailure(
1047 gather_op, "unsupported start index map and/or collapsed slice dims");
1048 }
1049
1050 // Verify that slice_sizes is 1 for the indexed dimensions and the full
1051 // shape for the rest of the dimensions.
1052 auto slice_sizes = gather_op.slice_sizes();
1053 int64_t index = 0;
1054 for (int64_t s : slice_sizes.getValues<int64_t>()) {
1055 if (index < start_indices_type.getShape().back()) {
1056 if (s != 1) {
1057 return rewriter.notifyMatchFailure(gather_op,
1058 "unsupported slice sizes");
1059 }
1060 } else {
1061 if (s != operand_type.getShape()[index]) {
1062 return rewriter.notifyMatchFailure(gather_op,
1063 "unsupported slice sizes");
1064 }
1065 }
1066 ++index;
1067 }
1068
1069 // Verify that offset_dims are the tailing dimensions in the output tensor.
1070 auto offset_dims = gather_op.dimension_numbers().offset_dims();
1071 int64_t offset = start_indices_type.getRank() - 1;
1072 for (int64_t o : offset_dims.getValues<int64_t>()) {
1073 if (o != offset) {
1074 return rewriter.notifyMatchFailure(gather_op,
1075 "unsupported offset dims");
1076 }
1077 ++offset;
1078 }
1079
1080 rewriter.replaceOpWithNewOp<TF::GatherNdOp>(gather_op, result_type, operand,
1081 start_indices);
1082 return success();
1083 }
1084 };
1085
1086 template <typename BinaryOp, typename TfOp>
1087 class ConvertScatterOp : public OpConversionPattern<mhlo::ScatterOp> {
1088 public:
1089 using OpConversionPattern::OpConversionPattern;
1090
matchAndRewrite(mhlo::ScatterOp scatter_op,ArrayRef<Value> args,ConversionPatternRewriter & rewriter) const1091 LogicalResult matchAndRewrite(
1092 mhlo::ScatterOp scatter_op, ArrayRef<Value> args,
1093 ConversionPatternRewriter &rewriter) const final {
1094 Value operand = scatter_op.operand();
1095 Value indices = scatter_op.scatter_indices();
1096 Value updates = scatter_op.updates();
1097 ShapedType operand_type = operand.getType().cast<ShapedType>();
1098 ShapedType indices_type = indices.getType().cast<ShapedType>();
1099 ShapedType updates_type = updates.getType().cast<ShapedType>();
1100
1101 // Can only convert with static shaped scatter.
1102 if (!operand_type.hasStaticShape() || !indices_type.hasStaticShape() ||
1103 !updates_type.hasStaticShape()) {
1104 return failure();
1105 }
1106
1107 // Normalize start_indices so index_vector_dim == start_indices.rank() - 1.
1108 int64_t index_vector_dim =
1109 scatter_op.scatter_dimension_numbers().index_vector_dim().getInt();
1110 if (failed(NormalizeIndexVector(scatter_op, indices, indices_type,
1111 index_vector_dim, rewriter))) {
1112 return failure();
1113 }
1114
1115 // Verify that inserted_window_dims and scatter_dims_to_operand_dims are
1116 // both an iota with the same number of elements as the last dimension of
1117 // start_indices.
1118 auto inserted_window_dims =
1119 scatter_op.scatter_dimension_numbers().inserted_window_dims();
1120 auto scatter_dims_to_operand_dims =
1121 scatter_op.scatter_dimension_numbers().scatter_dims_to_operand_dims();
1122 if (!IsIotaAttr(inserted_window_dims, indices_type.getShape().back()) ||
1123 !IsIotaAttr(scatter_dims_to_operand_dims,
1124 indices_type.getShape().back())) {
1125 // TODO(tberghammer): Transform indices to support non-standard
1126 // scatter_dims_to_operand_dims.
1127 return rewriter.notifyMatchFailure(
1128 scatter_op,
1129 "unsupported inserted window dims and/or scatter dims to operand "
1130 "dims");
1131 }
1132
1133 // Verify that update window dims are the tailing dimensions in the update
1134 // tensor.
1135 auto update_window_dims =
1136 scatter_op.scatter_dimension_numbers().update_window_dims();
1137 int64_t offset = indices_type.getRank() - 1;
1138 for (int64_t o : update_window_dims.getValues<int64_t>()) {
1139 if (o != offset) {
1140 return rewriter.notifyMatchFailure(scatter_op,
1141 "unsupported update window dims");
1142 }
1143 ++offset;
1144 }
1145
1146 // Match the scatter computation against computations supported by TF.
1147 if (failed(MatchBinaryReduceFunction<BinaryOp>(
1148 scatter_op.update_computation()))) {
1149 return failure();
1150 }
1151
1152 rewriter.replaceOpWithNewOp<TfOp>(scatter_op,
1153 scatter_op.getResult().getType(), operand,
1154 indices, updates);
1155 return success();
1156 }
1157 };
1158 using ConvertScatterAddOp =
1159 ConvertScatterOp<mhlo::AddOp, TF::TensorScatterAddOp>;
1160 using ConvertScatterMaxOp =
1161 ConvertScatterOp<mhlo::MaxOp, TF::TensorScatterMaxOp>;
1162 using ConvertScatterMinOp =
1163 ConvertScatterOp<mhlo::MinOp, TF::TensorScatterMinOp>;
1164 using ConvertScatterSubOp =
1165 ConvertScatterOp<mhlo::SubOp, TF::TensorScatterSubOp>;
1166 using ConvertScatterUpdateOp =
1167 ConvertScatterOp<void, TF::TensorScatterUpdateOp>;
1168
1169 // Converts mhlo.pad to tf.PadV2
ConvertPadOp(PatternRewriter & rewriter,Operation * old_op)1170 Value ConvertPadOp(PatternRewriter &rewriter, Operation *old_op) {
1171 auto pad_op = cast<mhlo::PadOp>(old_op);
1172 mlir::Location loc = pad_op.getLoc();
1173
1174 llvm::SmallVector<APInt, 8> padding;
1175 for (auto p : llvm::zip(pad_op.edge_padding_low().getValues<APInt>(),
1176 pad_op.edge_padding_high().getValues<APInt>())) {
1177 padding.push_back(std::get<0>(p));
1178 padding.push_back(std::get<1>(p));
1179 }
1180 auto attr_type = RankedTensorType::get({pad_op.edge_padding_low().size(), 2},
1181 rewriter.getI64Type());
1182 auto padding_attr = DenseIntElementsAttr::get(attr_type, padding);
1183 auto padding_op = rewriter.create<ConstantOp>(loc, attr_type, padding_attr);
1184 return rewriter.create<PadV2Op>(loc, pad_op.getType(), pad_op.operand(),
1185 padding_op, pad_op.padding_value());
1186 }
1187
1188 // Returns true if broadcast_dimensions obey Tensorflow convention, as in new
1189 // dimensions are added as prefix.
IsTFStyleBroadcast(DenseIntElementsAttr broadcast_dimensions,Value output)1190 bool IsTFStyleBroadcast(DenseIntElementsAttr broadcast_dimensions,
1191 Value output) {
1192 // broadcast_dimensions is an increasing list by definition, thus it suffices
1193 // to check the first element.
1194 int64_t input_rank = broadcast_dimensions.getNumElements();
1195 int64_t output_rank = output.getType().cast<ShapedType>().getRank();
1196 return input_rank == 0 ||
1197 (broadcast_dimensions.getValue({0}).cast<IntegerAttr>().getInt() ==
1198 output_rank - input_rank);
1199 }
1200
1201 // Returns the intermediate shape that input tensor should be reshaped to during
1202 // legalization of BroadcastInDimOp.
ExpandedShape(PatternRewriter & rewriter,Value input,DenseIntElementsAttr broadcast_dimensions,Value output)1203 ConstantOp ExpandedShape(PatternRewriter &rewriter, Value input,
1204 DenseIntElementsAttr broadcast_dimensions,
1205 Value output) {
1206 // Initialize expanded shape with output rank and dimensions of 1.
1207 SmallVector<Attribute, 4> expanded_shape(
1208 output.getType().cast<ShapedType>().getRank(),
1209 /*Value=*/rewriter.getI64IntegerAttr(1));
1210
1211 // Set dimension sizes specified by broadcast_dimensions.
1212 ArrayRef<int64_t> input_shape = input.getType().cast<ShapedType>().getShape();
1213 for (auto x : llvm::enumerate(broadcast_dimensions)) {
1214 expanded_shape[x.value().getSExtValue()] =
1215 rewriter.getI64IntegerAttr(input_shape[x.index()]);
1216 }
1217
1218 // Create the expanded type wrapped in a ConstantOp.
1219 auto attr_type =
1220 RankedTensorType::get({static_cast<int64_t>(expanded_shape.size())},
1221 rewriter.getIntegerType(64));
1222 auto attr = DenseElementsAttr::get(attr_type, expanded_shape);
1223 return rewriter.create<ConstantOp>(output.getLoc(), attr_type, attr);
1224 }
1225
1226 #include "tensorflow/compiler/mlir/tensorflow/transforms/generated_legalize_hlo.inc"
1227
1228 /// Performs the lowering to XLA dialect.
runOnFunction()1229 void LegalizeHloToTf::runOnFunction() {
1230 MLIRContext &context = getContext();
1231
1232 // Add legalization patterns to the list.
1233 OwningRewritePatternList patterns;
1234 PopulateLegalizeHloToTfPatterns(&patterns, &context);
1235
1236 ConversionTarget target(context);
1237 target.addLegalDialect<TensorFlowDialect>();
1238 target.addLegalOp<CallOp, ConstantOp>();
1239 if (failed(
1240 applyPartialConversion(getFunction(), target, std::move(patterns)))) {
1241 getFunction().emitError("mhlo to TF legalization failed.");
1242 signalPassFailure();
1243 }
1244 }
1245
1246 static PassRegistration<LegalizeHloToTf> pass(
1247 "tf-legalize-hlo", "Legalize from HLO to the TF dialect");
1248
1249 } // end namespace
1250
PopulateLegalizeHloToTfPatterns(OwningRewritePatternList * patterns,MLIRContext * context)1251 void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList *patterns,
1252 MLIRContext *context) {
1253 patterns
1254 ->insert<ConvertAvgPoolOp, ConvertConvOp, ConvertDynamicSliceOp,
1255 ConvertGatherOp, ConvertScatterAddOp, ConvertScatterMaxOp,
1256 ConvertScatterMinOp, ConvertScatterSubOp, ConvertScatterUpdateOp,
1257 ConvertSliceOp, ConvertReduceOpToTfMax, ConvertReduceOpToTfMin,
1258 ConvertReduceOpToTfSum, ConvertIotaOpToTfRange>(context);
1259 populateWithGenerated(context, *patterns);
1260 }
1261
CreateLegalizeHloToTfPass()1262 std::unique_ptr<OperationPass<FuncOp>> CreateLegalizeHloToTfPass() {
1263 return std::make_unique<LegalizeHloToTf>();
1264 }
1265
1266 } // end namespace TF
1267 } // end namespace mlir
1268