1//===- VectorOps.td - Vector op definitions ---------------*- tablegen -*-====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines MLIR vector operations.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef VECTOR_OPS
14#define VECTOR_OPS
15
16include "mlir/Interfaces/SideEffectInterfaces.td"
17include "mlir/Interfaces/VectorInterfaces.td"
18include "mlir/Interfaces/ViewLikeInterface.td"
19
20def Vector_Dialect : Dialect {
21  let name = "vector";
22  let cppNamespace = "::mlir::vector";
23  let hasConstantMaterializer = 1;
24}
25
26// Base class for Vector dialect ops.
27class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
28    Op<Vector_Dialect, mnemonic, traits> {
29  // For every vector op, there needs to be a:
30  //   * void print(OpAsmPrinter &p, ${C++ class of Op} op)
31  //   * LogicalResult verify(${C++ class of Op} op)
32  //   * ParseResult parse${C++ class of Op}(OpAsmParser &parser,
33  //                                         OperationState &result)
34  // functions.
35  let printer = [{ return ::print(p, *this); }];
36  let verifier = [{ return ::verify(*this); }];
37  let parser = [{ return ::parse$cppClass(parser, result); }];
38}
39
40// TODO: Add an attribute to specify a different algebra with operators other
41// than the current set: {*, +}.
42def Vector_ContractionOp :
43  Vector_Op<"contract", [
44      NoSideEffect,
45      PredOpTrait<"lhs and rhs have same element type", TCopVTEtIsSameAs<0, 1>>,
46      PredOpTrait<"third operand acc and result have same element type",
47                  TCresVTEtIsSameAsOpBase<0, 2>>,
48      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
49    ]>,
50    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyType:$acc,
51               Variadic<VectorOf<[I1]>>:$masks,
52               AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
53    Results<(outs AnyType)> {
54  let summary = "vector contraction operation";
55  let description = [{
56    Computes the sum of products of vector elements along contracting
57    dimension pairs from 2 vectors of rank M and N respectively, adds this
58    intermediate result to the accumulator argument of rank K, and returns a
59    vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
60    num_batch_dims (see dimension type descriptions below)). For K = 0 (no
61    free or batch dimensions), the accumulator and output are a scalar.
62
63    Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
64    specify the dynamic dimension sizes of valid data within the lhs/rhs vector
65    arguments.
66
67    An iterator type attribute list must be specified, where each element of
68    the list represents an iterator with one of the following types:
69
70    *) "reduction": reduction dimensions are present in the lhs and rhs
71                    arguments but not in the output (and accumulator
72                    argument). These are the dimensions along which the vector
73                    contraction op computes the sum of products, and
74                    contracting dimension pair dimension sizes must match
75                    between lhs/rhs.
76    *) "parallel": Batch dimensions are iterator type "parallel", and
77                   are non-contracting dimensions present in the lhs, rhs and
78                   output. The lhs/rhs co-iterate along the batch dimensions,
79                   which should be expressed in their indexing maps.
80
81                   Free dimensions are iterator type "parallel", and are
82                   non-contraction, non-batch dimensions accessed by either the
83                   lhs or rhs (but not both). The lhs and rhs free dimensions
84                   are unrelated to each other and do not co-iterate, which
85                   should be expressed in their indexing maps.
86
87    An indexing map attribute list must be specified with an entry for lhs, rhs
88    and acc arguments. An indexing map attribute specifies a mapping from each
89    iterator in the iterator type list, to each dimension of an N-D vector.
90
91    Example:
92
93    ```mlir
94    // Simple DOT product (K = 0).
95    #contraction_accesses = [
96     affine_map<(i) -> (i)>,
97     affine_map<(i) -> (i)>,
98     affine_map<(i) -> ()>
99    ]
100    #contraction_trait = {
101      indexing_maps = #contraction_accesses,
102      iterator_types = ["reduction"]
103    }
104    %3 = vector.contract #contraction_trait %0, %1, %2
105      : vector<10xf32>, vector<10xf32> into f32
106
107    // 2D vector contraction with one contracting dimension (matmul, K = 2).
108    #contraction_accesses = [
109      affine_map<(i, j, k) -> (i, k)>,
110      affine_map<(i, j, k) -> (k, j)>,
111      affine_map<(i, j, k) -> (i, j)>
112    ]
113    #contraction_trait = {
114      indexing_maps = #contraction_accesses,
115      iterator_types = ["parallel", "parallel", "reduction"]
116    }
117
118    %3 = vector.contract #contraction_trait %0, %1, %2
119      : vector<4x3xf32>, vector<3x7xf32> into vector<4x7xf32>
120
121    // 4D to 3D vector contraction with two contracting dimensions and
122    // one batch dimension (K = 3).
123    #contraction_accesses = [
124      affine_map<(b0, f0, f1, c0, c1) -> (c0, b0, c1, f0)>,
125      affine_map<(b0, f0, f1, c0, c1) -> (b0, c1, c0, f1)>,
126      affine_map<(b0, f0, f1, c0, c1) -> (b0, f0, f1)>
127    ]
128    #contraction_trait = {
129      indexing_maps = #contraction_accesses,
130      iterator_types = ["parallel", "parallel", "parallel",
131                        "reduction", "reduction"]
132    }
133
134    %4 = vector.contract #contraction_trait %0, %1, %2
135        : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x5xf32>
136
137    // 4D vector contraction with two contracting dimensions and optional
138    // vector mask arguments.
139    %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
140    %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
141
142    %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
143       : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
144
145    // Vector contraction with mixed typed. lhs/rhs have different element
146    // types than accumulator/result.
147    %6 = vector.contract #contraction_trait %0, %1, %2
148      : vector<10xf16>, vector<10xf16> into f32
149    ```
150  }];
151  let builders = [
152    OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
153      "ArrayAttr":$indexingMaps, "ArrayAttr":$iteratorTypes)>,
154    OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc,
155      "ArrayRef<ArrayRef<AffineExpr>>":$indexingExprs,
156      "ArrayRef<StringRef>":$iteratorTypes)>
157  ];
158  let extraClassDeclaration = [{
159    VectorType getLhsType() {
160      return lhs().getType().cast<VectorType>();
161    }
162    VectorType getRhsType() {
163      return rhs().getType().cast<VectorType>();
164    }
165    Type getAccType() { return acc().getType(); }
166    VectorType getLHSVectorMaskType() {
167      if (llvm::size(masks()) != 2) return VectorType();
168      return getOperand(3).getType().cast<VectorType>();
169    }
170    VectorType getRHSVectorMaskType() {
171      if (llvm::size(masks()) != 2) return VectorType();
172      return getOperand(4).getType().cast<VectorType>();
173    }
174    Type getResultType() { return getResult().getType(); }
175    ArrayRef<StringRef> getTraitAttrNames();
176    SmallVector<AffineMap, 4> getIndexingMaps();
177    static unsigned getAccOperandIndex() { return 2; }
178
179    // Returns the bounds of each dimension in the iteration space spanned
180    // by the iterator types of this operation.
181    void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
182
183    // Returns a list of index maps, where there is a list entry for each
184    // op indexing map attribute (i.e. one for each input and output, with
185    // the output listed last). Each index map, maps from this operations
186    // iteration space, to vector dimensions of the maps input/output.
187    void getIterationIndexMap(
188      std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
189
190    std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
191    std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
192  }];
193}
194
195def Vector_ReductionOp :
196  Vector_Op<"reduction", [NoSideEffect,
197     PredOpTrait<"source operand and result have same element type",
198                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
199    Arguments<(ins StrAttr:$kind, AnyVector:$vector, Variadic<AnyType>:$acc)>,
200    Results<(outs AnyType:$dest)> {
201  let summary = "reduction operation";
202  let description = [{
203    Reduces an 1-D vector "horizontally" into a scalar using the given
204    operation (add/mul/min/max for int/fp and and/or/xor for int only).
205    Some reductions (add/mul for fp) also allow an optional fused
206    accumulator.
207
208    Note that these operations are restricted to 1-D vectors to remain
209    close to the corresponding LLVM intrinsics:
210
211    http://llvm.org/docs/LangRef.html#vector-reduction-intrinsics
212
213    Example:
214
215    ```mlir
216    %1 = vector.reduction "add", %0 : vector<16xf32> into f32
217
218    %3 = vector.reduction "xor", %2 : vector<4xi32> into i32
219
220    %4 = vector.reduction "mul", %0, %1 : vector<16xf32> into f32
221    ```
222  }];
223  let extraClassDeclaration = [{
224    VectorType getVectorType() {
225      return vector().getType().cast<VectorType>();
226    }
227  }];
228}
229
230def Vector_BroadcastOp :
231  Vector_Op<"broadcast", [NoSideEffect,
232     PredOpTrait<"source operand and result have same element type",
233                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
234    Arguments<(ins AnyType:$source)>,
235    Results<(outs AnyVector:$vector)> {
236  let summary = "broadcast operation";
237  let description = [{
238    Broadcasts the scalar or k-D vector value in the source operand
239    to a n-D result vector such that the broadcast makes sense, i.e.,
240    the source operand is duplicated to match the given rank and sizes
241    in the result vector. The legality rules are:
242    * the source operand must have the same element type as the result type
243    * a k-D vector <s_1 x .. x s_k x type> can be broadcast to
244      a n-D vector <t_1 x .. x t_n x type> if
245       * k <= n, and
246       * the sizes in the trailing dimensions n-k < i <= n with j=i+k-n
247          match exactly as s_j = t_i or s_j = 1:
248       ```
249           t_1 x   ..  t_n-k x t_n-k+1 x .. x t_i x .. x t_n
250                               s_1     x .. x s_j x .. x s_k
251               <duplication>         <potential stretch>
252       ```
253    The source operand is duplicated over all the missing leading dimensions
254    and stretched over the trailing dimensions where the source has a non-equal
255    dimension of 1. These rules imply that any scalar broadcast (k=0) to any
256    shaped vector with the same element type is always legal.
257
258    Example:
259
260    ```mlir
261    %0 = constant 0.0 : f32
262    %1 = vector.broadcast %0 : f32 to vector<16xf32>
263    %2 = vector.broadcast %1 : vector<16xf32> to vector<4x16xf32>
264    ```
265  }];
266  let extraClassDeclaration = [{
267    Type getSourceType() { return source().getType(); }
268    VectorType getVectorType() {
269      return vector().getType().cast<VectorType>();
270    }
271  }];
272  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($vector)";
273  let hasFolder = 1;
274}
275
276def Vector_ShuffleOp :
277  Vector_Op<"shuffle", [NoSideEffect,
278     PredOpTrait<"first operand v1 and result have same element type",
279                 TCresVTEtIsSameAsOpBase<0, 0>>,
280     PredOpTrait<"second operand v2 and result have same element type",
281                 TCresVTEtIsSameAsOpBase<0, 1>>]>,
282     Arguments<(ins AnyVector:$v1, AnyVector:$v2, I64ArrayAttr:$mask)>,
283     Results<(outs AnyVector:$vector)> {
284  let summary = "shuffle operation";
285  let description = [{
286    The shuffle operation constructs a permutation (or duplication) of elements
287    from two input vectors, returning a vector with the same element type as
288    the input and a length that is the same as the shuffle mask. The two input
289    vectors must have the same element type, rank, and trailing dimension sizes
290    and shuffles their values in the leading dimension (which may differ in size)
291    according to the given mask. The legality rules are:
292    * the two operands must have the same element type as the result
293    * the two operands and the result must have the same rank and trailing
294      dimension sizes, viz. given two k-D operands
295              v1 : <s_1 x s_2 x .. x s_k x type> and
296              v2 : <t_1 x t_2 x .. x t_k x type>
297      we have s_i = t_i for all 1 < i <= k
298    * the mask length equals the leading dimension size of the result
299    * numbering the input vector indices left to right across the operands, all
300      mask values must be within range, viz. given two k-D operands v1 and v2
301      above, all mask values are in the range [0,s_1+t_1)
302
303    Example:
304
305    ```mlir
306    %0 = vector.shuffle %a, %b[0, 3]
307               : vector<2xf32>, vector<2xf32>       ; yields vector<2xf32>
308    %1 = vector.shuffle %c, %b[0, 1, 2]
309               : vector<2x16xf32>, vector<1x16xf32> ; yields vector<3x16xf32>
310    %2 = vector.shuffle %a, %b[3, 2, 1, 0]
311               : vector<2xf32>, vector<2xf32>       ; yields vector<4xf32>
312    ```
313  }];
314  let builders = [
315    OpBuilderDAG<(ins "Value":$v1, "Value":$v2, "ArrayRef<int64_t>")>
316  ];
317  let extraClassDeclaration = [{
318    static StringRef getMaskAttrName() { return "mask"; }
319    VectorType getV1VectorType() {
320      return v1().getType().cast<VectorType>();
321    }
322    VectorType getV2VectorType() {
323      return v2().getType().cast<VectorType>();
324    }
325    VectorType getVectorType() {
326      return vector().getType().cast<VectorType>();
327    }
328  }];
329}
330
331def Vector_ExtractElementOp :
332  Vector_Op<"extractelement", [NoSideEffect,
333     TypesMatchWith<"result type matches element type of vector operand",
334                    "vector", "result",
335                    "$_self.cast<ShapedType>().getElementType()">]>,
336    Arguments<(ins AnyVector:$vector, AnySignlessInteger:$position)>,
337    Results<(outs AnyType:$result)> {
338  let summary = "extractelement operation";
339  let description = [{
340    Takes an 1-D vector and a dynamic index position and extracts the
341    scalar at that position. Note that this instruction resembles
342    vector.extract, but is restricted to 1-D vectors and relaxed
343    to dynamic indices. It is meant to be closer to LLVM's version:
344    https://llvm.org/docs/LangRef.html#extractelement-instruction
345
346    Example:
347
348    ```mlir
349    %c = constant 15 : i32
350    %1 = vector.extractelement %0[%c : i32]: vector<16xf32>
351    ```
352  }];
353  let assemblyFormat = [{
354    $vector `[` $position `:` type($position) `]` attr-dict `:` type($vector)
355  }];
356
357  let builders = [
358    OpBuilderDAG<(ins "Value":$source, "int64_t":$position)>,
359    OpBuilderDAG<(ins "Value":$source, "Value":$position)>
360  ];
361  let extraClassDeclaration = [{
362    VectorType getVectorType() {
363      return vector().getType().cast<VectorType>();
364    }
365  }];
366}
367
368def Vector_ExtractOp :
369  Vector_Op<"extract", [NoSideEffect,
370     PredOpTrait<"operand and result have same element type",
371                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
372    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$position)>,
373    Results<(outs AnyType)> {
374  let summary = "extract operation";
375  let description = [{
376    Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
377    the proper position. Degenerates to an element type in the 0-D case.
378
379    Example:
380
381    ```mlir
382    %1 = vector.extract %0[3]: vector<4x8x16xf32>
383    %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
384    ```
385  }];
386  let builders = [
387    OpBuilderDAG<(ins "Value":$source, "ArrayRef<int64_t>":$position)>,
388    // Convenience builder which assumes the values in `position` are defined by
389    // ConstantIndexOp.
390    OpBuilderDAG<(ins "Value":$source, "ValueRange":$position)>
391  ];
392  let extraClassDeclaration = [{
393    static StringRef getPositionAttrName() { return "position"; }
394    VectorType getVectorType() {
395      return vector().getType().cast<VectorType>();
396    }
397  }];
398  let hasFolder = 1;
399}
400
401def Vector_ExtractSlicesOp :
402  Vector_Op<"extract_slices", [NoSideEffect]>,
403    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$sizes,
404                   I64ArrayAttr:$strides)>,
405    Results<(outs TupleOf<[AnyVector]>)> {
406  let summary = "vector extract slices operation";
407  let description = [{
408    Takes an N-d vector and returns a tuple of vector slices of 'vector',
409    based on 'sizes' and 'strides' parameters.
410
411    The arguments 'sizes' and 'strides' represent a specification for
412    generating the unrolling of 'vector' shape, which has all slices of shape
413    'sizes' except for slices at dimension boundaries when 'vector' dimension
414    sizes are not a multiple of 'sizes'.
415
416    Each slice is returned at the tuple element index corresponding to the
417    linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
418    Currently, only unit strides are supported.
419
420    Example:
421
422    ```mlir
423    %0 = vector.transfer_read ...: vector<4x2xf32>
424
425    %1 = vector.extract_slices %0, [2, 2], [1, 1]
426      : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
427
428    // Example with partial slices at dimension boundaries.
429    %2 = vector.transfer_read ...: vector<4x3xf32>
430
431    %3 = vector.extract_slices %2, [2, 2], [1, 1]
432      : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
433                                   vector<2x2xf32>, vector<2x1xf32>>
434    ```
435  }];
436  let builders = [
437    OpBuilderDAG<(ins "TupleType":$tupleType, "Value":$vector,
438      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)>
439  ];
440  let extraClassDeclaration = [{
441    VectorType getSourceVectorType() {
442      return vector().getType().cast<VectorType>();
443    }
444    TupleType getResultTupleType() {
445      return getResult().getType().cast<TupleType>();
446    }
447    void getSizes(SmallVectorImpl<int64_t> &results);
448    void getStrides(SmallVectorImpl<int64_t> &results);
449    static StringRef getSizesAttrName() { return "sizes"; }
450    static StringRef getStridesAttrName() { return "strides"; }
451  }];
452  let assemblyFormat = [{
453    $vector `,` $sizes `,` $strides attr-dict `:` type($vector) `into`
454    type(results)
455  }];
456}
457
458def Vector_ExtractMapOp :
459  Vector_Op<"extract_map", [NoSideEffect]>,
460    Arguments<(ins AnyVector:$vector, Variadic<Index>:$ids)>,
461    Results<(outs AnyVector)> {
462  let summary = "vector extract map operation";
463  let description = [{
464    Takes an N-D vector and extracts a sub-part of the vector starting at id
465    along each dimension.
466
467    The dimension associated to each element of `ids` used to extract are
468    implicitly deduced from the the destination type. For each dimension the
469    multiplicity is the destination dimension size divided by the source
470    dimension size, each dimension with a multiplicity greater than 1 is
471    associated to the next id, following ids order.
472    For example if the source type is `vector<64x4x32xf32>` and the destination
473    type is `vector<4x4x2xf32>`, the first id maps to dimension 0 and the second
474    id to dimension 2.
475
476    Similarly to vector.tuple_get, this operation is used for progressive
477    lowering and should be folded away before converting to LLVM.
478
479    It is different than `vector.extract_slice` and
480    `vector.extract_strided_slice` as it takes a Value as index instead of an
481    attribute. Also in the future it is meant to support extracting along any
482    dimensions and not only the most major ones.
483
484    For instance:
485    ```
486    // dynamic computation producing the value 0 of index type
487    %idx0 = ... : index
488    // dynamic computation producing the value 1 of index type
489    %idx1 = ... : index
490    %0 = constant dense<0, 1, 2, 3>: vector<4xi32>
491    // extracts values [0, 1]
492    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
493    // extracts values [1, 2]
494    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
495    ```
496
497    Example:
498
499    ```mlir
500    %ev = vector.extract_map %v[%id] : vector<32xf32> to vector<1xf32>
501    %ev1 = vector.extract_map %v1[%id1, %id2] : vector<64x4x32xf32>
502      to vector<4x4x2xf32>
503    ```
504  }];
505  let builders = [
506    OpBuilderDAG<(ins "Value":$vector, "ValueRange":$ids,
507                  "ArrayRef<int64_t>":$multiplicity,
508                  "AffineMap":$map)>];
509  let extraClassDeclaration = [{
510    VectorType getSourceVectorType() {
511      return vector().getType().cast<VectorType>();
512    }
513    VectorType getResultType() {
514      return getResult().getType().cast<VectorType>();
515    }
516    void getMultiplicity(SmallVectorImpl<int64_t> &multiplicity);
517    AffineMap map();
518  }];
519  let assemblyFormat = [{
520    $vector `[` $ids `]` attr-dict `:` type($vector) `to` type(results)
521  }];
522
523  let hasFolder = 1;
524}
525
526def Vector_FMAOp :
527  Op<Vector_Dialect, "fma", [NoSideEffect,
528                             AllTypesMatch<["lhs", "rhs", "acc", "result"]>]>,
529    Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc)>,
530    Results<(outs AnyVector:$result)> {
531  let summary = "vector fused multiply-add";
532  let description = [{
533    Multiply-add expressions operate on n-D vectors and compute a fused
534    pointwise multiply-and-accumulate: `$result = `$lhs * $rhs + $acc`.
535    All operands and result have the same vector type. The semantics
536    of the operation correspond to those of the `llvm.fma`
537    [intrinsic](https://llvm.org/docs/LangRef.html#int-fma). In the
538    particular case of lowering to LLVM, this is guaranteed to lower
539    to the `llvm.fma.*` intrinsic.
540
541    Example:
542
543    ```mlir
544    %3 = vector.fma %0, %1, %2: vector<8x16xf32>
545    ```
546  }];
547  // Fully specified by traits.
548  let verifier = ?;
549  let assemblyFormat = "$lhs `,` $rhs `,` $acc attr-dict `:` type($lhs)";
550  let builders = [
551    OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc),
552    [{build($_builder, $_state, lhs.getType(), lhs, rhs, acc);}]>
553  ];
554  let extraClassDeclaration = [{
555    VectorType getVectorType() { return lhs().getType().cast<VectorType>(); }
556  }];
557}
558
559def Vector_InsertElementOp :
560  Vector_Op<"insertelement", [NoSideEffect,
561     TypesMatchWith<"source operand type matches element type of result",
562                    "result", "source",
563                    "$_self.cast<ShapedType>().getElementType()">,
564     AllTypesMatch<["dest", "result"]>]>,
565     Arguments<(ins AnyType:$source, AnyVector:$dest,
566                    AnySignlessInteger:$position)>,
567     Results<(outs AnyVector:$result)> {
568  let summary = "insertelement operation";
569  let description = [{
570    Takes a scalar source, an 1-D destination vector and a dynamic index
571    position and inserts the source into the destination at the proper
572    position.  Note that this instruction resembles vector.insert, but
573    is restricted to 1-D vectors and relaxed to dynamic indices. It is
574    meant to be closer to LLVM's version:
575    https://llvm.org/docs/LangRef.html#insertelement-instruction
576
577    Example:
578
579    ```mlir
580    %c = constant 15 : i32
581    %f = constant 0.0f : f32
582    %1 = vector.insertelement %f, %0[%c : i32]: vector<16xf32>
583    ```
584  }];
585  let assemblyFormat = [{
586    $source `,` $dest `[` $position `:` type($position) `]` attr-dict `:`
587    type($result)
588  }];
589
590  let builders = [
591    OpBuilderDAG<(ins "Value":$source, "Value":$dest, "int64_t":$position)>,
592    OpBuilderDAG<(ins "Value":$source, "Value":$dest, "Value":$position)>
593  ];
594  let extraClassDeclaration = [{
595    Type getSourceType() { return source().getType(); }
596    VectorType getDestVectorType() {
597      return dest().getType().cast<VectorType>();
598    }
599  }];
600
601}
602
603def Vector_InsertOp :
604  Vector_Op<"insert", [NoSideEffect,
605     PredOpTrait<"source operand and result have same element type",
606                 TCresVTEtIsSameAsOpBase<0, 0>>,
607     AllTypesMatch<["dest", "res"]>]>,
608     Arguments<(ins AnyType:$source, AnyVector:$dest, I64ArrayAttr:$position)>,
609     Results<(outs AnyVector:$res)> {
610  let summary = "insert operation";
611  let description = [{
612    Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
613    and inserts the n-D source into the (n+k)-D destination at the proper
614    position. Degenerates to a scalar source type when n = 0.
615
616    Example:
617
618    ```mlir
619    %2 = vector.insert %0, %1[3] : vector<8x16xf32> into vector<4x8x16xf32>
620    %5 = vector.insert %3, %4[3, 3, 3] : f32 into vector<4x8x16xf32>
621    ```
622  }];
623  let assemblyFormat = [{
624    $source `,` $dest $position attr-dict `:` type($source) `into` type($dest)
625  }];
626
627  let builders = [
628    OpBuilderDAG<(ins "Value":$source, "Value":$dest,
629      "ArrayRef<int64_t>":$position)>,
630    // Convenience builder which assumes all values are constant indices.
631    OpBuilderDAG<(ins "Value":$source, "Value":$dest, "ValueRange":$position)>
632  ];
633  let extraClassDeclaration = [{
634    static StringRef getPositionAttrName() { return "position"; }
635    Type getSourceType() { return source().getType(); }
636    VectorType getDestVectorType() {
637      return dest().getType().cast<VectorType>();
638    }
639  }];
640}
641
642def Vector_InsertSlicesOp :
643  Vector_Op<"insert_slices", [NoSideEffect]>,
644    Arguments<(ins TupleOf<[AnyVector]>:$vectors, I64ArrayAttr:$sizes,
645                   I64ArrayAttr:$strides)>,
646    Results<(outs AnyVector)> {
647  let summary = "vector insert slices operation";
648  let description = [{
649    Takes a tuple of vector slices and inserts them into the vector result
650    according to the 'sizes' and 'strides' parameters.
651
652    The arguments 'sizes' and 'strides' represent a specification for
653    generating the unrolling of 'vector' shape, which has all slices of shape
654    'sizes' except for slices at dimension boundaries when 'vector' dimension
655    sizes are not a multiple of 'sizes'.
656
657    Each slice in 'vectors' is at the tuple element index corresponding to the
658    linear index of the slice w.r.t the unrolling scheme represented by 'sizes'.
659    Currently, only unit strides are supported.
660
661    Example:
662
663    ```mlir
664    %0 = vector.extract_slices %0, [2, 2], [1, 1]
665      : vector<4x2xf32> into tuple<vector<2x2xf32>, vector<2x2xf32>>
666
667    %1 = vector.insert_slices %0, [2, 2], [1, 1]
668      : tuple<vector<2x2xf32>, vector<2x2xf32>> into vector<4x2xf32>
669
670    // Example with partial slices at dimension boundaries.
671    %3 = vector.extract_slices %2, [2, 2], [1, 1]
672      : vector<4x3xf32> into tuple<vector<2x2xf32>, vector<2x1xf32>,
673                                   vector<2x2xf32>, vector<2x1xf32>>
674
675    %4 = vector.insert_slices %3, [2, 2], [1, 1]
676      : tuple<vector<2x2xf32>, vector<2x1xf32>,
677              vector<2x2xf32>, vector<2x1xf32>> into vector<4x3xf32>
678    ```
679  }];
680
681  let extraClassDeclaration = [{
682    TupleType getSourceTupleType() {
683      return vectors().getType().cast<TupleType>();
684    }
685    VectorType getResultVectorType() {
686      return getResult().getType().cast<VectorType>();
687    }
688    void getSizes(SmallVectorImpl<int64_t> &results);
689    void getStrides(SmallVectorImpl<int64_t> &results);
690    static StringRef getSizesAttrName() { return "sizes"; }
691    static StringRef getStridesAttrName() { return "strides"; }
692  }];
693  let assemblyFormat = [{
694    $vectors `,` $sizes `,` $strides attr-dict `:` type($vectors) `into`
695    type(results)
696  }];
697}
698
699def Vector_InsertMapOp :
700  Vector_Op<"insert_map", [NoSideEffect, AllTypesMatch<["dest", "result"]>]>,
701    Arguments<(ins AnyVector:$vector, AnyVector:$dest, Variadic<Index>:$ids)>,
702    Results<(outs AnyVector:$result)> {
703  let summary = "vector insert map operation";
704  let description = [{
705    Inserts a N-D vector and within a larger vector starting at id. The new
706    vector created will have the same size as the destination operand vector.
707
708    The dimension associated to each element of `ids` used to insert is
709    implicitly deduced from the source type (see `ExtractMapOp` for details).
710    For example if source type is `vector<4x4x2xf32>` and the destination type
711    is `vector<64x4x32xf32>`, the first id maps to dimension 0 and the second id
712    to dimension 2.
713
714    Similarly to vector.tuple_get, this operation is used for progressive
715    lowering and should be folded away before converting to LLVM.
716
717    It is different than `vector.insert` and `vector.insert_strided_slice` as it
718    takes a Value as index instead of an attribute. Also in the future it is
719    meant to support inserting along any dimensions and not only the most major
720    ones.
721
722    This operations is meant to be used in combination with vector.extract_map.
723
724    For instance:
725    ```
726    // dynamic computation producing the value 0 of index type
727    %idx0 = ... : index
728    // dynamic computation producing the value 1 of index type
729    %idx1 = ... : index /
730    %0 = constant dense<0, 1, 2, 3>: vector<4xi32>
731    // extracts values [0, 1]
732    %1 = vector.extract_map %0[%idx0] : vector<4xi32> to vector<2xi32>
733    // extracts values [1, 2]
734    %2 = vector.extract_map %0[%idx1] : vector<4xi32> to vector<2xi32>
735    // insert [0, 1] into [x, x, x, x] and produce [0, 1, x, x]
736    %3 = vector.insert_map %1, %0[%idx0] : vector<2xi32> into vector<4xi32>
737    // insert [1, 2] into [x, x, x, x] and produce [x, 1, 2, x]
738    %4 = vector.insert_map %2, %0[%idx1] : vector<2xi32> into vector<4xi32>
739    ```
740    Example:
741
742    ```mlir
743    %v = vector.insert_map %ev %v[%id] : vector<1xf32> into vector<32xf32>
744    %v1 = vector.insert_map %ev1, %v1[%arg0, %arg1] : vector<2x4x1xf32>
745      into vector<64x4x32xf32>
746    ```
747  }];
748  let builders = [OpBuilderDAG<(ins "Value":$vector, "Value":$dest,
749                                "ValueRange":$ids)>];
750  let extraClassDeclaration = [{
751    VectorType getSourceVectorType() {
752      return vector().getType().cast<VectorType>();
753    }
754    VectorType getResultType() {
755      return getResult().getType().cast<VectorType>();
756    }
757    // Return a map indicating the dimension mapping to the given ids.
758    AffineMap map();
759  }];
760  let assemblyFormat = [{
761    $vector `,` $dest `[` $ids `]` attr-dict
762      `:` type($vector) `into` type($result)
763  }];
764}
765
766def Vector_InsertStridedSliceOp :
767  Vector_Op<"insert_strided_slice", [NoSideEffect,
768    PredOpTrait<"operand #0 and result have same element type",
769                 TCresVTEtIsSameAsOpBase<0, 0>>,
770    AllTypesMatch<["dest", "res"]>]>,
771    Arguments<(ins AnyVector:$source, AnyVector:$dest, I64ArrayAttr:$offsets,
772               I64ArrayAttr:$strides)>,
773    Results<(outs AnyVector:$res)> {
774  let summary = "strided_slice operation";
775  let description = [{
776    Takes a k-D source vector, an n-D destination vector (n >= k), n-sized
777    `offsets` integer array attribute, a k-sized `strides` integer array attribute
778    and inserts the k-D source vector as a strided subvector at the proper offset
779    into the n-D destination vector.
780
781    At the moment strides must contain only 1s.
782
783    Returns an n-D vector that is a copy of the n-D destination vector in which
784    the last k-D dimensions contain the k-D source vector elements strided at
785    the proper location as specified by the offsets.
786
787    Example:
788
789    ```mlir
790    %2 = vector.insert_strided_slice %0, %1
791        {offsets = [0, 0, 2], strides = [1, 1]}:
792      vector<2x4xf32> into vector<16x4x8xf32>
793    ```
794  }];
795
796  let assemblyFormat = [{
797    $source `,` $dest attr-dict `:` type($source) `into` type($dest)
798  }];
799
800  let builders = [
801    OpBuilderDAG<(ins "Value":$source, "Value":$dest,
802      "ArrayRef<int64_t>":$offsets, "ArrayRef<int64_t>":$strides)>
803  ];
804  let extraClassDeclaration = [{
805    static StringRef getOffsetsAttrName() { return "offsets"; }
806    static StringRef getStridesAttrName() { return "strides"; }
807    VectorType getSourceVectorType() {
808      return source().getType().cast<VectorType>();
809    }
810    VectorType getDestVectorType() {
811      return dest().getType().cast<VectorType>();
812    }
813  }];
814}
815
816def Vector_OuterProductOp :
817  Vector_Op<"outerproduct", [NoSideEffect,
818    PredOpTrait<"lhs operand and result have same element type",
819                TCresVTEtIsSameAsOpBase<0, 0>>,
820    PredOpTrait<"rhs operand and result have same element type",
821                TCresVTEtIsSameAsOpBase<0, 1>>]>,
822    Arguments<(ins AnyVector:$lhs, AnyType:$rhs, Variadic<AnyVector>:$acc)>,
823    Results<(outs AnyVector)> {
824  let summary = "vector outerproduct with optional fused add";
825  let description = [{
826    Takes 2 1-D vectors and returns the 2-D vector containing the outer-product,
827    as illustrated below:
828    ```
829     outer |   [c, d]
830     ------+------------
831       [a, | [ [a*c, a*d],
832        b] |   [b*c, b*d] ]
833    ```
834    This operation also accepts a 1-D vector lhs and a scalar rhs. In this
835    case a simple AXPY operation is performed, which returns a 1-D vector.
836    ```
837        [a, b] * c = [a*c, b*c]
838    ```
839
840    An optional extra vector argument with the same shape as the output
841    vector may be specified in which case the operation returns the sum of
842    the outer-product and the extra vector. In this multiply-accumulate
843    scenario for floating-point arguments, the rounding mode is enforced
844    by guaranteeing that a fused-multiply add operation is emitted. When
845    lowered to the LLVMIR dialect, this form emits `llvm.intr.fma`, which
846    is guaranteed to lower to actual `fma` instructions on x86.
847
848    Example:
849
850    ```
851    %2 = vector.outerproduct %0, %1: vector<4xf32>, vector<8xf32>
852    return %2: vector<4x8xf32>
853
854    %3 = vector.outerproduct %0, %1, %2:
855      vector<4xf32>, vector<8xf32>, vector<4x8xf32>
856    return %3: vector<4x8xf32>
857
858    %6 = vector.outerproduct %4, %5: vector<10xf32>, f32
859    return %6: vector<10xf32>
860
861    ```
862  }];
863  let builders = [
864    // Build an op without mask, use the type of `acc` as the return type.
865    OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "Value":$acc)>
866  ];
867  let extraClassDeclaration = [{
868    VectorType getOperandVectorTypeLHS() {
869      return lhs().getType().cast<VectorType>();
870    }
871    Type getOperandTypeRHS() {
872      return rhs().getType();
873    }
874    VectorType getOperandVectorTypeACC() {
875      return (llvm::size(acc()) == 0)
876        ? VectorType()
877        : (*acc().begin()).getType().cast<VectorType>();
878    }
879    VectorType getVectorType() {
880      return getResult().getType().cast<VectorType>();
881    }
882  }];
883}
884
885// TODO: Add transformation which decomposes ReshapeOp into an optimized
886// sequence of vector rotate/shuffle/select operations.
887def Vector_ReshapeOp :
888  Vector_Op<"reshape", [AttrSizedOperandSegments, NoSideEffect]>,
889    Arguments<(ins AnyVector:$vector, Variadic<Index>:$input_shape,
890               Variadic<Index>:$output_shape,
891               I64ArrayAttr:$fixed_vector_sizes)>,
892    Results<(outs AnyVector:$result)> {
893  let summary = "vector reshape operation";
894  let description = [{
895    Reshapes its vector operand from 'input_shape' to 'output_shape' maintaining
896    fixed vector dimension 'fixed_vector_sizes' on the innermost vector
897    dimensions.
898
899    The parameters 'input_shape' and 'output_shape' represent valid data shapes
900    across fixed vector shapes. For example, if a vector has a valid data
901    shape [6] with fixed vector size [8], then the valid data elements are
902    assumed to be stored at the beginning of the vector with the remaining
903    vector elements undefined.
904
905    In the examples below, valid data elements are represented by an alphabetic
906    character, and undefined data elements are represented by '-'.
907
908    Example
909
910      vector<1x8xf32> with valid data shape [6], fixed vector sizes [8]
911
912                input: [a, b, c, d, e, f]
913
914           layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
915
916        vector layout: [a, b, c, d, e, f, -, -]
917
918    Example
919
920      vector<2x8xf32> with valid data shape [10], fixed vector sizes [8]
921
922                input: [a, b, c, d, e, f, g, h, i, j]
923
924           layout map: (d0) -> (d0 floordiv 8, d0 mod 8)
925
926        vector layout: [[a, b, c, d, e, f, g, h],
927                        [i, j, -, -, -, -, -, -]]
928
929    Example
930
931      vector<2x2x2x3xf32> with valid data shape [3, 5], fixed vector sizes
932      [2, 3]
933
934                input: [[a, b, c, d, e],
935                        [f, g, h, i, j],
936                        [k, l, m, n, o]]
937
938           layout map: (d0, d1) -> (d0 floordiv 3, d1 floordiv 5,
939                                    d0 mod 3, d1 mod 5)
940
941        vector layout: [[[[a, b, c],
942                          [f, g, h]]
943                         [[d, e, -],
944                          [i, j, -]]],
945                        [[[k, l, m],
946                          [-, -, -]]
947                         [[n, o, -],
948                          [-, -, -]]]]
949
950    Example
951
952      %1 = vector.reshape %0, [%c3, %c6], [%c2, %c9], [4]
953        : vector<3x2x4xf32> to vector<2x3x4xf32>
954
955             input: [[a, b, c, d, e, f],
956                     [g, h, i, j, k, l],
957                     [m, n, o, p, q, r]]
958
959        layout map: (d0, d1) -> (d0, d1 floordiv 4, d1 mod 4)
960
961
962      Input vector:  [[[a, b, c, d],
963                       [e, f, -, -]],
964                      [[g, h, i, j],
965                       [k, l, -, -]],
966                      [[m, n, o, p],
967                       [q, r, -, -]]]
968
969      Output vector:  [[[a, b, c, d],
970                        [e, f, g, h],
971                        [i, -, -, -]],
972                       [[j, k, l, m],
973                        [n, o, p, q],
974                        [r, -, -, -]]]
975  }];
976
977  let extraClassDeclaration = [{
978    VectorType getInputVectorType() {
979      return vector().getType().cast<VectorType>();
980    }
981    VectorType getOutputVectorType() {
982      return getResult().getType().cast<VectorType>();
983    }
984
985    /// Returns as integer value the number of input shape operands.
986    int64_t getNumInputShapeSizes() { return input_shape().size(); }
987
988    /// Returns as integer value the number of output shape operands.
989    int64_t getNumOutputShapeSizes() { return output_shape().size(); }
990
991    void getFixedVectorSizes(SmallVectorImpl<int64_t> &results);
992
993    static StringRef getFixedVectorSizesAttrName() {
994      return "fixed_vector_sizes";
995    }
996    static StringRef getInputShapeAttrName() { return "input_shape"; }
997    static StringRef getOutputShapeAttrName() { return "output_shape"; }
998  }];
999
1000  let assemblyFormat = [{
1001    $vector `,` `[` $input_shape `]` `,` `[` $output_shape `]` `,`
1002    $fixed_vector_sizes attr-dict `:` type($vector) `to` type($result)
1003  }];
1004}
1005
1006def Vector_ExtractStridedSliceOp :
1007  Vector_Op<"extract_strided_slice", [NoSideEffect,
1008    PredOpTrait<"operand and result have same element type",
1009                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
1010    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$offsets,
1011               I64ArrayAttr:$sizes, I64ArrayAttr:$strides)>,
1012    Results<(outs AnyVector)> {
1013  let summary = "extract_strided_slice operation";
1014  let description = [{
1015    Takes an n-D vector, k-D `offsets` integer array attribute, a k-sized
1016    `sizes` integer array attribute, a k-sized `strides` integer array
1017    attribute and extracts the n-D subvector at the proper offset.
1018
1019    At the moment strides must contain only 1s.
1020    // TODO: support non-1 strides.
1021
1022    Returns an n-D vector where the first k-D dimensions match the `sizes`
1023    attribute. The returned subvector contains the elements starting at offset
1024    `offsets` and ending at `offsets + sizes`.
1025
1026    Example:
1027
1028    ```mlir
1029    %1 = vector.extract_strided_slice %0
1030        {offsets = [0, 2], sizes = [2, 4], strides = [1, 1]}:
1031      vector<4x8x16xf32> to vector<2x4x16xf32>
1032
1033    // TODO: Evolve to a range form syntax similar to:
1034    %1 = vector.extract_strided_slice %0[0:2:1][2:4:1]
1035      vector<4x8x16xf32> to vector<2x4x16xf32>
1036    ```
1037  }];
1038  let builders = [
1039    OpBuilderDAG<(ins "Value":$source, "ArrayRef<int64_t>":$offsets,
1040      "ArrayRef<int64_t>":$sizes, "ArrayRef<int64_t>":$strides)>
1041  ];
1042  let extraClassDeclaration = [{
1043    static StringRef getOffsetsAttrName() { return "offsets"; }
1044    static StringRef getSizesAttrName() { return "sizes"; }
1045    static StringRef getStridesAttrName() { return "strides"; }
1046    VectorType getVectorType(){ return vector().getType().cast<VectorType>(); }
1047    void getOffsets(SmallVectorImpl<int64_t> &results);
1048  }];
1049  let hasCanonicalizer = 1;
1050  let hasFolder = 1;
1051  let assemblyFormat = "$vector attr-dict `:` type($vector) `to` type(results)";
1052}
1053
1054def Vector_TransferReadOp :
1055  Vector_Op<"transfer_read", [
1056      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1057      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
1058    ]>,
1059    Arguments<(ins AnyMemRef:$memref, Variadic<Index>:$indices,
1060               AffineMapAttr:$permutation_map, AnyType:$padding,
1061               OptionalAttr<BoolArrayAttr>:$masked)>,
1062    Results<(outs AnyVector:$vector)> {
1063
1064  let summary = "Reads a supervector from memory into an SSA vector value.";
1065
1066  let description = [{
1067    The `vector.transfer_read` op performs a read from a slice within a
1068    [MemRef](../LangRef.md#memref-type) supplied as its first operand
1069    into a [vector](../LangRef.md#vector-type) of the same base elemental type.
1070
1071    A memref operand with vector element type, must have its vector element
1072    type match a suffix (shape and element type) of the vector (e.g.
1073    memref<3x2x6x4x3xf32>, vector<1x1x4x3xf32>).
1074
1075    The slice is further defined by a full-rank index within the MemRef,
1076    supplied as the operands `2 .. 1 + rank(memref)`.
1077
1078    The permutation_map [attribute](../LangRef.md#attributes) is an
1079    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1080    slice to match the vector shape. The permutation map may be implicit and
1081    omitted from parsing and printing if it is the canonical minor identity map
1082    (i.e. if it does not permute or broadcast any dimension).
1083
1084    The size of the slice is specified by the size of the vector, given as the
1085    return type.
1086
1087    An `ssa-value` of the same elemental type as the MemRef is provided as the
1088    last operand to specify padding in the case of out-of-bounds accesses.
1089
1090    An optional boolean array attribute is provided to specify which dimensions
1091    of the transfer need masking. When a dimension is specified as not requiring
1092    masking, the `vector.transfer_read` may be lowered to simple loads. The
1093    absence of this `masked` attribute signifies that all dimensions of the
1094    transfer need to be masked.
1095
1096    This operation is called 'read' by opposition to 'load' because the
1097    super-vector granularity is generally not representable with a single
1098    hardware register. A `vector.transfer_read` is thus a mid-level abstraction
1099    that supports super-vectorization with non-effecting padding for full-tile
1100    only operations.
1101
1102    More precisely, let's dive deeper into the permutation_map for the following
1103    MLIR:
1104
1105    ```mlir
1106    vector.transfer_read %A[%expr1, %expr2, %expr3, %expr4]
1107      { permutation_map : (d0,d1,d2,d3) -> (d2,0,d0) } :
1108      memref<?x?x?x?xf32>, vector<3x4x5xf32>
1109    ```
1110
1111    This operation always reads a slice starting at `%A[%expr1, %expr2, %expr3,
1112    %expr4]`. The size of the slice is 3 along d2 and 5 along d0, so the slice
1113    is: `%A[%expr1 : %expr1 + 5, %expr2, %expr3:%expr3 + 3, %expr4]`
1114
1115    That slice needs to be read into a `vector<3x4x5xf32>`. Since the
1116    permutation map is not full rank, there must be a broadcast along vector
1117    dimension `1`.
1118
1119    A notional lowering of vector.transfer_read could generate code resembling:
1120
1121    ```mlir
1122    // %expr1, %expr2, %expr3, %expr4 defined before this point
1123    %tmp = alloc() : vector<3x4x5xf32>
1124    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
1125    for %i = 0 to 3 {
1126      affine.for %j = 0 to 4 {
1127        affine.for %k = 0 to 5 {
1128          %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
1129            memref<?x?x?x?xf32>
1130          store %tmp[%i, %j, %k] : vector<3x4x5xf32>
1131    }}}
1132    %c0 = constant 0 : index
1133    %vec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
1134    ```
1135
1136    On a GPU one could then map `i`, `j`, `k` to blocks and threads. Notice that
1137    the temporary storage footprint is `3 * 5` values but `3 * 4 * 5` values are
1138    actually transferred between `%A` and `%tmp`.
1139
1140    Alternatively, if a notional vector broadcast operation were available, the
1141    lowered code would resemble:
1142
1143    ```mlir
1144    // %expr1, %expr2, %expr3, %expr4 defined before this point
1145    %tmp = alloc() : vector<3x4x5xf32>
1146    %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
1147    for %i = 0 to 3 {
1148      affine.for %k = 0 to 5 {
1149        %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] :
1150          memref<?x?x?x?xf32>
1151        store %tmp[%i, 0, %k] : vector<3x4x5xf32>
1152    }}
1153    %c0 = constant 0 : index
1154    %tmpvec = load %view_in_tmp[%c0] : vector<3x4x5xf32>
1155    %vec = broadcast %tmpvec, 1 : vector<3x4x5xf32>
1156    ```
1157
1158    where `broadcast` broadcasts from element 0 to all others along the
1159    specified dimension. This time, the temporary storage footprint is `3 * 5`
1160    values which is the same amount of data as the `3 * 5` values transferred.
1161    An additional `1` broadcast is required. On a GPU this broadcast could be
1162    implemented using a warp-shuffle if loop `j` were mapped to `threadIdx.x`.
1163
1164    Syntax
1165    ```
1166    operation ::= ssa-id `=` `vector.transfer_read` ssa-use-list
1167      `{` attribute-entry `} :` memref-type `,` vector-type
1168    ```
1169
1170    Example:
1171
1172    ```mlir
1173    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32>
1174    // and pad with %f0 to handle the boundary case:
1175    %f0 = constant 0.0f : f32
1176    for %i0 = 0 to %0 {
1177      affine.for %i1 = 0 to %1 step 256 {
1178        affine.for %i2 = 0 to %2 step 32 {
1179          %v = vector.transfer_read %A[%i0, %i1, %i2], (%f0)
1180               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
1181               memref<?x?x?xf32>, vector<32x256xf32>
1182    }}}
1183
1184    // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
1185    // vector<128xf32>. The underlying implementation will require a 1-D vector
1186    // broadcast:
1187    for %i0 = 0 to %0 {
1188      affine.for %i1 = 0 to %1 {
1189        %3 = vector.transfer_read %A[%i0, %i1]
1190             {permutation_map: (d0, d1) -> (0)} :
1191             memref<?x?xf32>, vector<128xf32>
1192      }
1193    }
1194
1195    // Read from a memref with vector element type.
1196    %4 = vector.transfer_read %arg1[%c3, %c3], %vf0
1197      {permutation_map = (d0, d1)->(d0, d1)}
1198        : memref<?x?xvector<4x3xf32>>, vector<1x1x4x3xf32>
1199    ```
1200  }];
1201
1202  let builders = [
1203    // Builder that sets padding to zero.
1204    OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref,
1205      "ValueRange":$indices, "AffineMap":$permutationMap,
1206      CArg<"ArrayRef<bool>", "{}">:$maybeMasked)>,
1207    // Builder that sets permutation map (resp. padding) to
1208    // 'getMinorIdentityMap' (resp. zero).
1209    OpBuilderDAG<(ins "VectorType":$vector, "Value":$memref,
1210      "ValueRange":$indices, CArg<"ArrayRef<bool>", "{}">:$maybeMasked)>
1211  ];
1212
1213  let hasFolder = 1;
1214}
1215
1216def Vector_TransferWriteOp :
1217  Vector_Op<"transfer_write", [
1218      DeclareOpInterfaceMethods<VectorTransferOpInterface>,
1219      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
1220    ]>,
1221    Arguments<(ins AnyVector:$vector, AnyMemRef:$memref,
1222               Variadic<Index>:$indices,
1223               AffineMapAttr:$permutation_map,
1224               OptionalAttr<BoolArrayAttr>:$masked)> {
1225
1226  let summary = "The vector.transfer_write op writes a supervector to memory.";
1227
1228  let description = [{
1229    The `vector.transfer_write` op performs a write from a
1230    [vector](../LangRef.md#vector-type), supplied as its first operand, into a
1231    slice within a [MemRef](../LangRef.md#memref-type) of the same base
1232    elemental type, supplied as its second operand.
1233
1234    A vector memref operand must have its vector element type match a suffix
1235    (shape and element type) of the vector (e.g. memref<3x2x6x4x3xf32>,
1236    vector<1x1x4x3xf32>).
1237
1238    The slice is further defined by a full-rank index within the MemRef,
1239    supplied as the operands `3 .. 2 + rank(memref)`.
1240
1241    The permutation_map [attribute](../LangRef.md#attributes) is an
1242    [affine-map](Affine.md#affine-maps) which specifies the transposition on the
1243    slice to match the vector shape. The permutation map may be implicit and
1244    omitted from parsing and printing if it is the canonical minor identity map
1245    (i.e. if it does not permute or broadcast any dimension).
1246
1247    The size of the slice is specified by the size of the vector.
1248
1249    An optional boolean array attribute is provided to specify which dimensions
1250    of the transfer need masking. When a dimension is specified as not requiring
1251    masking, the `vector.transfer_write` may be lowered to simple stores. The
1252    absence of this `mask` attribute signifies that all dimensions of the
1253    transfer need to be masked.
1254
1255    This operation is called 'write' by opposition to 'store' because the
1256    super-vector granularity is generally not representable with a single
1257    hardware register. A `vector.transfer_write` is thus a
1258    mid-level abstraction that supports super-vectorization with non-effecting
1259    padding for full-tile-only code. It is the responsibility of
1260    `vector.transfer_write`'s implementation to ensure the memory writes are
1261    valid. Different lowerings may be pertinent depending on the hardware
1262    support.
1263
1264    Example:
1265
1266    ```mlir
1267    // write vector<16x32x64xf32> into the slice
1268    //   `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
1269    for %i0 = 0 to %0 {
1270      affine.for %i1 = 0 to %1 step 32 {
1271        affine.for %i2 = 0 to %2 step 64 {
1272          affine.for %i3 = 0 to %3 step 16 {
1273            %val = `ssa-value` : vector<16x32x64xf32>
1274            vector.transfer_write %val, %A[%i0, %i1, %i2, %i3]
1275              {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
1276              vector<16x32x64xf32>, memref<?x?x?x?xf32>
1277    }}}}
1278
1279    // write to a memref with vector element type.
1280    vector.transfer_write %4, %arg1[%c3, %c3]
1281      {permutation_map = (d0, d1)->(d0, d1)}
1282        : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
1283    ```
1284  }];
1285
1286  let builders = [
1287    // Builder that sets permutation map to 'getMinorIdentityMap'.
1288    OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices,
1289      CArg<"ArrayRef<bool>", "{}">:$maybeMasked)>,
1290    OpBuilderDAG<(ins "Value":$vector, "Value":$memref, "ValueRange":$indices,
1291      "AffineMap":$permutationMap)>,
1292  ];
1293
1294  let hasFolder = 1;
1295}
1296
1297def Vector_MaskedLoadOp :
1298  Vector_Op<"maskedload">,
1299    Arguments<(ins AnyMemRef:$base,
1300               VectorOfRankAndType<[1], [I1]>:$mask,
1301               VectorOfRank<[1]>:$pass_thru)>,
1302    Results<(outs VectorOfRank<[1]>:$result)> {
1303
1304  let summary = "loads elements from memory into a vector as defined by a mask vector";
1305
1306  let description = [{
1307    The masked load reads elements from memory into a 1-D vector as defined
1308    by a base and a 1-D mask vector. When the mask is set, the element is read
1309    from memory. Otherwise, the corresponding element is taken from a 1-D
1310    pass-through vector. Informally the semantics are:
1311    ```
1312    result[0] := mask[0] ? MEM[base+0] : pass_thru[0]
1313    result[1] := mask[1] ? MEM[base+1] : pass_thru[1]
1314    etc.
1315    ```
1316    The masked load can be used directly where applicable, or can be used
1317    during progressively lowering to bring other memory operations closer to
1318    hardware ISA support for a masked load. The semantics of the operation
1319    closely correspond to those of the `llvm.masked.load`
1320    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics).
1321
1322    Example:
1323
1324    ```mlir
1325    %0 = vector.maskedload %base, %mask, %pass_thru
1326       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
1327    ```
1328  }];
1329  let extraClassDeclaration = [{
1330    MemRefType getMemRefType() {
1331      return base().getType().cast<MemRefType>();
1332    }
1333    VectorType getMaskVectorType() {
1334      return mask().getType().cast<VectorType>();
1335    }
1336    VectorType getPassThruVectorType() {
1337      return pass_thru().getType().cast<VectorType>();
1338    }
1339    VectorType getResultVectorType() {
1340      return result().getType().cast<VectorType>();
1341    }
1342  }];
1343  let assemblyFormat = "$base `,` $mask `,` $pass_thru attr-dict `:` "
1344    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
1345  let hasCanonicalizer = 1;
1346}
1347
1348def Vector_MaskedStoreOp :
1349  Vector_Op<"maskedstore">,
1350    Arguments<(ins AnyMemRef:$base,
1351               VectorOfRankAndType<[1], [I1]>:$mask,
1352               VectorOfRank<[1]>:$value)> {
1353
1354  let summary = "stores elements from a vector into memory as defined by a mask vector";
1355
1356  let description = [{
1357    The masked store operation writes elements from a 1-D vector into memory
1358    as defined by a base and a 1-D mask vector. When the mask is set, the
1359    corresponding element from the vector is written to memory. Otherwise,
1360    no action is taken for the element. Informally the semantics are:
1361    ```
1362    if (mask[0]) MEM[base+0] = value[0]
1363    if (mask[1]) MEM[base+1] = value[1]
1364    etc.
1365    ```
1366    The masked store can be used directly where applicable, or can be used
1367    during progressively lowering to bring other memory operations closer to
1368    hardware ISA support for a masked store. The semantics of the operation
1369    closely correspond to those of the `llvm.masked.store`
1370    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-store-intrinsics).
1371
1372    Example:
1373
1374    ```mlir
1375    vector.maskedstore %base, %mask, %value
1376      : memref<?xf32>, vector<8xi1>, vector<8xf32>
1377    ```
1378  }];
1379  let extraClassDeclaration = [{
1380    MemRefType getMemRefType() {
1381      return base().getType().cast<MemRefType>();
1382    }
1383    VectorType getMaskVectorType() {
1384      return mask().getType().cast<VectorType>();
1385    }
1386    VectorType getValueVectorType() {
1387      return value().getType().cast<VectorType>();
1388    }
1389  }];
1390  let assemblyFormat = "$base `,` $mask `,` $value attr-dict `:` "
1391    "type($mask) `,` type($value) `into` type($base)";
1392  let hasCanonicalizer = 1;
1393}
1394
1395def Vector_GatherOp :
1396  Vector_Op<"gather">,
1397    Arguments<(ins AnyMemRef:$base,
1398               VectorOfRankAndType<[1], [AnyInteger]>:$indices,
1399               VectorOfRankAndType<[1], [I1]>:$mask,
1400               Variadic<VectorOfRank<[1]>>:$pass_thru)>,
1401    Results<(outs VectorOfRank<[1]>:$result)> {
1402
1403  let summary = "gathers elements from memory into a vector as defined by an index vector and mask";
1404
1405  let description = [{
1406    The gather operation gathers elements from memory into a 1-D vector as
1407    defined by a base and a 1-D index vector, but only if the corresponding
1408    bit is set in a 1-D mask vector. Otherwise, the element is taken from a
1409    1-D pass-through vector, if provided, or left undefined. Informally the
1410    semantics are:
1411    ```
1412    if (!defined(pass_thru)) pass_thru = [undef, .., undef]
1413    result[0] := mask[0] ? MEM[base + index[0]] : pass_thru[0]
1414    result[1] := mask[1] ? MEM[base + index[1]] : pass_thru[1]
1415    etc.
1416    ```
1417    The vector dialect leaves out-of-bounds behavior undefined.
1418
1419    The gather operation can be used directly where applicable, or can be used
1420    during progressively lowering to bring other memory operations closer to
1421    hardware ISA support for a gather. The semantics of the operation closely
1422    correspond to those of the `llvm.masked.gather`
1423    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-gather-intrinsics).
1424
1425    Example:
1426
1427    ```mlir
1428    %g = vector.gather %base, %indices, %mask, %pass_thru
1429        : (memref<?xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32>) -> vector<16xf32>
1430    ```
1431  }];
1432  let extraClassDeclaration = [{
1433    MemRefType getMemRefType() {
1434      return base().getType().cast<MemRefType>();
1435    }
1436    VectorType getIndicesVectorType() {
1437      return indices().getType().cast<VectorType>();
1438    }
1439    VectorType getMaskVectorType() {
1440      return mask().getType().cast<VectorType>();
1441    }
1442    VectorType getPassThruVectorType() {
1443      return (llvm::size(pass_thru()) == 0)
1444        ? VectorType()
1445        : (*pass_thru().begin()).getType().cast<VectorType>();
1446    }
1447    VectorType getResultVectorType() {
1448      return result().getType().cast<VectorType>();
1449    }
1450  }];
1451  let assemblyFormat = "operands attr-dict `:` functional-type(operands, results)";
1452  let hasCanonicalizer = 1;
1453}
1454
1455def Vector_ScatterOp :
1456  Vector_Op<"scatter">,
1457    Arguments<(ins AnyMemRef:$base,
1458               VectorOfRankAndType<[1], [AnyInteger]>:$indices,
1459               VectorOfRankAndType<[1], [I1]>:$mask,
1460               VectorOfRank<[1]>:$value)> {
1461
1462  let summary = "scatters elements from a vector into memory as defined by an index vector and mask";
1463
1464  let description = [{
1465    The scatter operation scatters elements from a 1-D vector into memory as
1466    defined by a base and a 1-D index vector, but only if the corresponding
1467    bit in a 1-D mask vector is set. Otherwise, no action is taken for that
1468    element. Informally the semantics are:
1469    ```
1470    if (mask[0]) MEM[base + index[0]] = value[0]
1471    if (mask[1]) MEM[base + index[1]] = value[1]
1472    etc.
1473    ```
1474    The vector dialect leaves out-of-bounds and repeated index behavior
1475    undefined. Underlying implementations may enforce strict sequential
1476    semantics for the latter, though.
1477    TODO: enforce the latter always?
1478
1479    The scatter operation can be used directly where applicable, or can be used
1480    during progressively lowering to bring other memory operations closer to
1481    hardware ISA support for a scatter. The semantics of the operation closely
1482    correspond to those of the `llvm.masked.scatter`
1483    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-scatter-intrinsics).
1484
1485    Example:
1486
1487    ```mlir
1488    vector.scatter %base, %indices, %mask, %value
1489        : vector<16xi32>, vector<16xi1>, vector<16xf32> into memref<?xf32>
1490    ```
1491  }];
1492  let extraClassDeclaration = [{
1493    MemRefType getMemRefType() {
1494      return base().getType().cast<MemRefType>();
1495    }
1496    VectorType getIndicesVectorType() {
1497      return indices().getType().cast<VectorType>();
1498    }
1499    VectorType getMaskVectorType() {
1500      return mask().getType().cast<VectorType>();
1501    }
1502    VectorType getValueVectorType() {
1503      return value().getType().cast<VectorType>();
1504    }
1505  }];
1506  let assemblyFormat = "$base `,` $indices `,` $mask `,` $value attr-dict `:` "
1507    "type($indices) `,` type($mask) `,` type($value) `into` type($base)";
1508  let hasCanonicalizer = 1;
1509}
1510
1511def Vector_ExpandLoadOp :
1512  Vector_Op<"expandload">,
1513    Arguments<(ins AnyMemRef:$base,
1514               VectorOfRankAndType<[1], [I1]>:$mask,
1515               VectorOfRank<[1]>:$pass_thru)>,
1516    Results<(outs VectorOfRank<[1]>:$result)> {
1517
1518  let summary = "reads elements from memory and spreads them into a vector as defined by a mask";
1519
1520  let description = [{
1521    The expand load reads elements from memory into a 1-D vector as defined
1522    by a base and a 1-D mask vector. When the mask is set, the next element
1523    is read from memory. Otherwise, the corresponding element is taken from
1524    a 1-D pass-through vector. Informally the semantics are:
1525    ```
1526    index = base
1527    result[0] := mask[0] ? MEM[index++] : pass_thru[0]
1528    result[1] := mask[1] ? MEM[index++] : pass_thru[1]
1529    etc.
1530    ```
1531    Note that the index increment is done conditionally.
1532
1533    The expand load can be used directly where applicable, or can be used
1534    during progressively lowering to bring other memory operations closer to
1535    hardware ISA support for an expand. The semantics of the operation closely
1536    correspond to those of the `llvm.masked.expandload`
1537    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics).
1538
1539    Example:
1540
1541    ```mlir
1542    %0 = vector.expandload %base, %mask, %pass_thru
1543       : memref<?xf32>, vector<8xi1>, vector<8xf32> into vector<8xf32>
1544    ```
1545  }];
1546  let extraClassDeclaration = [{
1547    MemRefType getMemRefType() {
1548      return base().getType().cast<MemRefType>();
1549    }
1550    VectorType getMaskVectorType() {
1551      return mask().getType().cast<VectorType>();
1552    }
1553    VectorType getPassThruVectorType() {
1554      return pass_thru().getType().cast<VectorType>();
1555    }
1556    VectorType getResultVectorType() {
1557      return result().getType().cast<VectorType>();
1558    }
1559  }];
1560  let assemblyFormat = "$base `,` $mask `,` $pass_thru attr-dict `:` "
1561    "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)";
1562  let hasCanonicalizer = 1;
1563}
1564
1565def Vector_CompressStoreOp :
1566  Vector_Op<"compressstore">,
1567    Arguments<(ins AnyMemRef:$base,
1568               VectorOfRankAndType<[1], [I1]>:$mask,
1569               VectorOfRank<[1]>:$value)> {
1570
1571  let summary = "writes elements selectively from a vector as defined by a mask";
1572
1573  let description = [{
1574    The compress store operation writes elements from a 1-D vector into memory
1575    as defined by a base and a 1-D mask vector. When the mask is set, the
1576    corresponding element from the vector is written next to memory. Otherwise,
1577    no action is taken for the element. Informally the semantics are:
1578    ```
1579    index = base
1580    if (mask[0]) MEM[index++] = value[0]
1581    if (mask[1]) MEM[index++] = value[1]
1582    etc.
1583    ```
1584    Note that the index increment is done conditionally.
1585
1586    The compress store can be used directly where applicable, or can be used
1587    during progressively lowering to bring other memory operations closer to
1588    hardware ISA support for a compress. The semantics of the operation closely
1589    correspond to those of the `llvm.masked.compressstore`
1590    [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics).
1591
1592    Example:
1593
1594    ```mlir
1595    vector.compressstore %base, %mask, %value
1596      : memref<?xf32>, vector<8xi1>, vector<8xf32>
1597    ```
1598  }];
1599  let extraClassDeclaration = [{
1600    MemRefType getMemRefType() {
1601      return base().getType().cast<MemRefType>();
1602    }
1603    VectorType getMaskVectorType() {
1604      return mask().getType().cast<VectorType>();
1605    }
1606    VectorType getValueVectorType() {
1607      return value().getType().cast<VectorType>();
1608    }
1609  }];
1610  let assemblyFormat = "$base `,` $mask `,` $value attr-dict `:` "
1611    "type($base) `,` type($mask) `,` type($value)";
1612  let hasCanonicalizer = 1;
1613}
1614
1615def Vector_ShapeCastOp :
1616  Vector_Op<"shape_cast", [NoSideEffect]>,
1617    Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>,
1618    Results<(outs AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$result)> {
1619  let summary = "shape_cast casts between vector shapes";
1620  let description = [{
1621    The shape_cast operation casts between an n-D source vector shape and
1622    a k-D result vector shape (the element type remains the same).
1623
1624    If reducing rank (n > k), result dimension sizes must be a product
1625    of contiguous source dimension sizes.
1626    If expanding rank (n < k), source dimensions must factor into a
1627    contiguous sequence of destination dimension sizes.
1628    Each source dim is expanded (or contiguous sequence of source dims combined)
1629    in source dimension list order (i.e. 0 <= i < n), to produce a contiguous
1630    sequence of result dims (or a single result dim), in result dimension list
1631    order (i.e. 0 <= j < k). The product of all source dimension sizes and all
1632    result dimension sizes must match.
1633
1634    If the source/result types are a tuple of vectors, the casting operation
1635    described above is applied to each source/result tuple element pair.
1636
1637    It is currently assumed that this operation does not require moving data,
1638    and that it will be folded away before lowering vector operations.
1639
1640    There is an exception to the folding expectation when targeting
1641    llvm.intr.matrix operations. We need a type conversion back and forth from a
1642    2-D MLIR vector to a 1-D flattened LLVM vector.shape_cast lowering to LLVM
1643    is supported in that particular case, for now.
1644
1645    Example:
1646
1647    ```mlir
1648    // Example casting to a lower vector rank.
1649    %1 = vector.shape_cast %0 : vector<5x1x4x3xf32> to vector<20x3xf32>
1650
1651    // Example casting to a higher vector rank.
1652    %3 = vector.shape_cast %2 : vector<10x12x8xf32> to vector<5x2x3x4x8xf32>
1653
1654    // Example casting a tuple of vectors of same rank, where tuple elements
1655    // may have different shapes.
1656    %5 = vector.shape_cast %4 : tuple<vector<3x4x2xf32>, vector<3x3x2xf32>> to
1657                                tuple<vector<12x2xf32>, vector<9x2xf32>>
1658    ```
1659  }];
1660  let extraClassDeclaration = [{
1661    VectorType getSourceVectorType() {
1662      return source().getType().cast<VectorType>();
1663    }
1664    VectorType getResultVectorType() {
1665      return getResult().getType().cast<VectorType>();
1666    }
1667  }];
1668  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
1669  let hasFolder = 1;
1670  let hasCanonicalizer = 1;
1671}
1672
1673def Vector_BitCastOp :
1674  Vector_Op<"bitcast", [NoSideEffect, AllRanksMatch<["source", "result"]>]>,
1675    Arguments<(ins AnyVector:$source)>,
1676    Results<(outs AnyVector:$result)>{
1677  let summary = "bitcast casts between vectors";
1678  let description = [{
1679    The bitcast operation casts between vectors of the same rank, the minor 1-D
1680    vector size is casted to a vector with a different element type but same
1681    bitwidth.
1682
1683    Example:
1684
1685    ```mlir
1686    // Example casting to a smaller element type.
1687    %1 = vector.bitcast %0 : vector<5x1x4x3xf32> to vector<5x1x4x6xi16>
1688
1689    // Example casting to a bigger element type.
1690    %3 = vector.bitcast %2 : vector<10x12x8xi8> to vector<10x12x2xi32>
1691
1692    // Example casting to an element type of the same size.
1693    %5 = vector.bitcast %4 : vector<5x1x4x3xf32> to vector<5x1x4x3xi32>
1694    ```
1695  }];
1696  let extraClassDeclaration = [{
1697    VectorType getSourceVectorType() {
1698      return source().getType().cast<VectorType>();
1699    }
1700    VectorType getResultVectorType() {
1701      return getResult().getType().cast<VectorType>();
1702    }
1703  }];
1704  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($result)";
1705  let hasFolder = 1;
1706}
1707
1708def Vector_TypeCastOp :
1709  Vector_Op<"type_cast", [NoSideEffect, ViewLikeOpInterface]>,
1710    Arguments<(ins StaticShapeMemRefOf<[AnyType]>:$memref)>,
1711    Results<(outs AnyMemRef:$result)> {
1712  let summary = "type_cast op converts a scalar memref to a vector memref";
1713  let description = [{
1714    Performs a conversion from a memref with scalar element to a memref with a
1715    *single* vector element, copying the shape of the memref to the vector. This
1716    is the minimal viable operation that is required to makeke
1717    super-vectorization operational. It can be seen as a special case of the
1718    `view` operation but scoped in the super-vectorization context.
1719
1720    Syntax:
1721
1722    ```
1723    operation ::= `vector.type_cast` ssa-use : memref-type to memref-type
1724    ```
1725
1726    Example:
1727
1728    ```mlir
1729    %A  = alloc() : memref<5x4x3xf32>
1730    %VA = vector.type_cast %A : memref<5x4x3xf32> to memref<vector<5x4x3xf32>>
1731    ```
1732  }];
1733
1734  /// Build the canonical memRefType with a single vector.
1735  /// E.g. memref<4 x 5 x vector<6 x f32>> -> memref<vector<4 x 5 x 6 x f32>>.
1736  let builders = [OpBuilderDAG<(ins "Value":$source)>];
1737
1738  let extraClassDeclaration = [{
1739    MemRefType getMemRefType() {
1740      return memref().getType().cast<MemRefType>();
1741    }
1742    MemRefType getResultMemRefType() {
1743      return getResult().getType().cast<MemRefType>();
1744    }
1745    // Implement ViewLikeOpInterface.
1746    Value getViewSource() { return memref(); }
1747  }];
1748
1749  let assemblyFormat = [{
1750    $memref attr-dict `:` type($memref) `to` type($result)
1751  }];
1752}
1753
1754def Vector_ConstantMaskOp :
1755  Vector_Op<"constant_mask", [NoSideEffect]>,
1756    Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
1757    Results<(outs VectorOf<[I1]>)> {
1758  let summary = "creates a constant vector mask";
1759  let description = [{
1760    Creates and returns a vector mask where elements of the result vector
1761    are set to '0' or '1', based on whether the element indices are contained
1762    within a hyper-rectangular region specified by the 'mask_dim_sizes'
1763    array attribute argument. Each element of the 'mask_dim_sizes' array,
1764    specifies an exclusive upper bound [0, mask-dim-size-element-value)
1765    for a unique dimension in the vector result. The conjunction of the ranges
1766    define a hyper-rectangular region within which elements values are set to 1
1767    (otherwise element values are set to 0).
1768
1769    Example:
1770
1771    ```mlir
1772    // create a constant vector mask of size 4x3xi1 with elements in range
1773    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
1774    %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
1775
1776    print %1
1777                  columns
1778                0    1    2
1779              |------------
1780            0 | 1    1    0
1781      rows  1 | 1    1    0
1782            2 | 1    1    0
1783            3 | 0    0    0
1784    ```
1785  }];
1786
1787  let extraClassDeclaration = [{
1788    static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
1789  }];
1790  let assemblyFormat = "$mask_dim_sizes attr-dict `:` type(results)";
1791}
1792
1793def Vector_CreateMaskOp :
1794  Vector_Op<"create_mask", [NoSideEffect]>,
1795    Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
1796  let summary = "creates a vector mask";
1797  let description = [{
1798    Creates and returns a vector mask where elements of the result vector
1799    are set to '0' or '1', based on whether the element indices are contained
1800    within a hyper-rectangular region specified by the operands. Specifically,
1801    each operand specifies a range [0, operand-value) for a unique dimension in
1802    the vector result. The conjunction of the operand ranges define a
1803    hyper-rectangular region within which elements values are set to 1
1804    (otherwise element values are set to 0).
1805
1806    Example:
1807
1808    ```mlir
1809    // create a vector mask of size 4x3xi1 where elements in range
1810    // 0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
1811    %1 = vector.create_mask %c3, %c2 : vector<4x3xi1>
1812
1813    print %1
1814                  columns
1815                0    1    2
1816              |------------
1817            0 | 1    1    0
1818      rows  1 | 1    1    0
1819            2 | 1    1    0
1820            3 | 0    0    0
1821    ```
1822  }];
1823
1824  let hasCanonicalizer = 1;
1825  let assemblyFormat = "$operands attr-dict `:` type(results)";
1826}
1827
1828def Vector_TupleOp :
1829  Vector_Op<"tuple", [NoSideEffect]>,
1830    Arguments<(ins Variadic<AnyVector>:$vectors)>,
1831    Results<(outs TupleOf<[AnyVector]>)> {
1832  let summary = "make tuple of vectors operation";
1833  let description = [{
1834    Returns a tuple of its operands 'vectors'.
1835
1836    Note that this operation is used during the vector op unrolling
1837    transformation and should be removed before lowering to lower-level
1838    dialects.
1839
1840
1841    Example:
1842
1843    ```mlir
1844    %0 = vector.transfer_read ... : vector<2x2xf32>
1845    %1 = vector.transfer_read ... : vector<2x1xf32>
1846    %2 = vector.transfer_read ... : vector<2x2xf32>
1847    %3 = vector.transfer_read ... : vector<2x1xf32>
1848
1849    %4 = vector.tuple %0, %1, %2, %3
1850      : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>
1851    ```
1852  }];
1853
1854  let extraClassDeclaration = [{
1855    TupleType getResultTupleType() {
1856      return getResult().getType().cast<TupleType>();
1857    }
1858  }];
1859}
1860
1861def Vector_TransposeOp :
1862  Vector_Op<"transpose", [NoSideEffect,
1863    PredOpTrait<"operand and result have same element type",
1864                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
1865    Arguments<(ins AnyVector:$vector, I64ArrayAttr:$transp)>,
1866    Results<(outs AnyVector:$result)> {
1867  let summary = "vector transpose operation";
1868  let description = [{
1869    Takes a n-D vector and returns the transposed n-D vector defined by
1870    the permutation of ranks in the n-sized integer array attribute.
1871    In the operation
1872
1873    ```mlir
1874    %1 = vector.transpose %0, [i_1, .., i_n]
1875      : vector<d_1 x .. x d_n x f32>
1876      to vector<d_trans[0] x .. x d_trans[n-1] x f32>
1877    ```
1878
1879    the transp array [i_1, .., i_n] must be a permutation of [0, .., n-1].
1880
1881    Example:
1882
1883    ```mlir
1884    %1 = vector.transpose %0, [1, 0] : vector<2x3xf32> to vector<3x2xf32>
1885
1886     [ [a, b, c],       [ [a, d],
1887       [d, e, f] ]  ->    [b, e],
1888                          [c, f] ]
1889    ```
1890  }];
1891  let builders = [
1892    OpBuilderDAG<(ins "Value":$vector, "ArrayRef<int64_t>":$transp)>
1893  ];
1894  let extraClassDeclaration = [{
1895    VectorType getVectorType() {
1896      return vector().getType().cast<VectorType>();
1897    }
1898    VectorType getResultType() {
1899      return result().getType().cast<VectorType>();
1900    }
1901    void getTransp(SmallVectorImpl<int64_t> &results);
1902    static StringRef getTranspAttrName() { return "transp"; }
1903  }];
1904  let assemblyFormat = [{
1905    $vector `,` $transp attr-dict `:` type($vector) `to` type($result)
1906  }];
1907  let hasCanonicalizer = 1;
1908  let hasFolder = 1;
1909}
1910
1911def Vector_TupleGetOp :
1912  Vector_Op<"tuple_get", [NoSideEffect]>,
1913    Arguments<(ins TupleOf<[AnyVector]>:$vectors, APIntAttr:$index)>,
1914    Results<(outs AnyVector)> {
1915  let summary = "vector tuple get operation";
1916  let description = [{
1917    Returns the tuple element of 'vectors' at 'index'.
1918
1919    Note that this operation is used during the vector op unrolling
1920    transformation and should be removed before lowering to lower-level
1921    dialects.
1922
1923    Example:
1924
1925    ```mlir
1926    %4 = vector.tuple %0, %1, %2, %3
1927      : vector<2x2xf32>, vector<2x1xf32>, vector<2x2xf32>, vector<2x1xf32>>
1928
1929    %5 = vector.tuple_get %4, 1
1930      : tuple<vector<2x2xf32>, vector<2x1xf32>,
1931              vector<2x2xf32>, vector<2x1xf32>>
1932    ```
1933  }];
1934
1935  let extraClassDeclaration = [{
1936    VectorType getResultVectorType() {
1937      return getResult().getType().cast<VectorType>();
1938    }
1939    int64_t getIndex() {
1940      auto index = (*this)->getAttrOfType<IntegerAttr>("index");
1941      return index.getValue().getSExtValue();
1942    }
1943    static StringRef getIndexAttrName() { return "index"; }
1944  }];
1945  let hasFolder = 1;
1946}
1947
1948def Vector_PrintOp :
1949  Vector_Op<"print", []>, Arguments<(ins AnyType:$source)> {
1950  let summary = "print operation (for testing and debugging)";
1951  let description = [{
1952    Prints the source vector (or scalar) to stdout in human readable
1953    format (for testing and debugging). No return value.
1954
1955    Example:
1956
1957    ```mlir
1958    %0 = constant 0.0 : f32
1959    %1 = vector.broadcast %0 : f32 to vector<4xf32>
1960    vector.print %1 : vector<4xf32>
1961
1962    when lowered to LLVM, the vector print is unrolled into
1963    elementary printing method calls that at runtime will yield
1964
1965    ( 0.0, 0.0, 0.0, 0.0 )
1966
1967    on stdout when linked with a small runtime support library,
1968    which only needs to provide a few printing methods (single
1969    value for all data types, opening/closing bracket, comma,
1970    newline).
1971    ```
1972  }];
1973  let verifier = ?;
1974  let extraClassDeclaration = [{
1975    Type getPrintType() {
1976      return source().getType();
1977    }
1978  }];
1979  let assemblyFormat = "$source attr-dict `:` type($source)";
1980}
1981
1982//===----------------------------------------------------------------------===//
1983// Ops used for supporting progressive lowering and conversion type changes.
1984// The Ops are typically not used directly by higher level dialects, but are
1985// used by intra-dialect rewriting rules to bring vector operations closer
1986// to the hardware ISA.
1987//===----------------------------------------------------------------------===//
1988
1989/// Vector dialect matrix multiplication op that operates on flattened 1-D
1990/// MLIR vectors. This is the counterpart of llvm.matrix.multiply in MLIR.
1991/// This may seem redundant with vector.contract but it serves the purposes of
1992/// more progressive lowering and localized type conversion on the path:
1993///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
1994def Vector_MatmulOp : Vector_Op<"matrix_multiply", [NoSideEffect,
1995        PredOpTrait<"lhs operand and result have same element type",
1996                    TCresVTEtIsSameAsOpBase<0, 0>>,
1997        PredOpTrait<"rhs operand and result have same element type",
1998                    TCresVTEtIsSameAsOpBase<0, 1>>]>,
1999      Arguments<(
2000        // TODO: tighten vector element types that make sense.
2001        ins VectorOfRankAndType<[1],
2002              [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$lhs,
2003            VectorOfRankAndType<[1],
2004              [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$rhs,
2005            I32Attr:$lhs_rows, I32Attr:$lhs_columns, I32Attr:$rhs_columns)>,
2006      Results<(
2007        outs VectorOfRankAndType<[1],
2008               [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)>
2009{
2010  let summary = "Vector matrix multiplication op that operates on flattened 1-D"
2011    " MLIR vectors";
2012  let description = [{
2013    This is the counterpart of llvm.matrix.multiply in MLIR. It serves the
2014    purposes of more progressive lowering and localized type conversion.
2015    Higher levels typically lower matrix multiplications into 'vector.contract'
2016    operations. Subsequent rewriting rule progressively lower these operations
2017    into 'vector.matrix_multiply' operations to bring the operations closer
2018    to the hardware ISA.
2019
2020    The ‘vector.matrix_multiply’ op treats `lhs` as matrix with <lhs_rows> rows
2021    and <lhs_columns> columns, `rhs` as matrix with <lhs_columns> rows and
2022    <rhs_columns> and multiplies them. The result matrix is returned embedded in
2023    the result vector.
2024
2025    Also see:
2026
2027    http://llvm.org/docs/LangRef.html#llvm-matrix-multiply-intrinsic
2028
2029    Example:
2030
2031    ```mlir
2032    %C = vector.matrix_multiply %A, %B
2033      { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } :
2034      (vector<64xf64>, vector<48xf64>) -> vector<12xf64>
2035    ```
2036  }];
2037  let builders = [
2038   OpBuilderDAG<(ins "Value":$lhs, "Value":$rhs, "unsigned":$lhsRows,
2039     "unsigned":$lhsColumns, "unsigned":$rhsColumns),
2040   [{
2041     $_state.addOperands({lhs, rhs});
2042     $_state.addAttribute("lhs_rows",$_builder.getI32IntegerAttr(lhsRows));
2043     $_state.addAttribute("lhs_columns",$_builder.getI32IntegerAttr(lhsColumns));
2044     $_state.addAttribute("rhs_columns",$_builder.getI32IntegerAttr(rhsColumns));
2045     $_state.addTypes(VectorType::get(lhsRows * rhsColumns,
2046       lhs.getType().cast<VectorType>().getElementType()));
2047   }]>,
2048  ];
2049  let verifier = ?;
2050  let assemblyFormat = "$lhs `,` $rhs attr-dict "
2051    "`:` `(` type($lhs) `,` type($rhs) `)` `->` type($res)";
2052}
2053
2054/// Vector dialect matrix tranposition op that operates on flattened 1-D
2055/// MLIR vectors. This is the counterpart of llvm.matrix.transpose in MLIR.
2056/// This may seem redundant with vector.transpose but it serves the purposes of
2057/// more progressive lowering and localized type conversion on the path:
2058///   `vector<...x...xf32> -> vector<...xf32> -> !llvm<... x float>`.
2059def Vector_FlatTransposeOp : Vector_Op<"flat_transpose", [NoSideEffect,
2060  PredOpTrait<"source operand and result have same element type",
2061                 TCresVTEtIsSameAsOpBase<0, 0>>]>,
2062    Arguments<(
2063      // TODO: tighten vector element types that make sense.
2064      ins VectorOfRankAndType<[1],
2065            [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$matrix,
2066          I32Attr:$rows, I32Attr:$columns)>,
2067    Results<(
2068      outs VectorOfRankAndType<[1],
2069             [AnySignlessInteger, AnySignedInteger, AnyFloat]>:$res)> {
2070  let summary = "Vector matrix transposition on flattened 1-D MLIR vectors";
2071  let description = [{
2072    This is the counterpart of llvm.matrix.transpose in MLIR. It serves
2073    the purposes of more progressive lowering and localized type conversion.
2074    Higher levels typically lower matrix tranpositions into 'vector.transpose'
2075    operations. Subsequent rewriting rule progressively lower these operations
2076    into 'vector.flat_transpose' operations to bring the operations closer
2077    to the hardware ISA.
2078
2079    The ‘vector.flat_transpose’ op treats the 1-D input `matrix` as
2080    a 2-D matrix with <rows> rows and <columns> columns, and returns the
2081    transposed matrix in flattened form in 'res'.
2082
2083    Also see:
2084
2085    http://llvm.org/docs/LangRef.html#llvm-matrix-transpose-intrinsic
2086
2087    Example:
2088
2089    ```mlir
2090    %1 = vector.flat_transpose %0 { rows = 4: i32, columns = 4: i32 }
2091       : (vector<16xf32>) -> vector<16xf32>
2092    ```
2093  }];
2094  let verifier = ?;
2095  let assemblyFormat = "$matrix attr-dict `:` type($matrix) `->` type($res)";
2096}
2097
2098#endif // VECTOR_OPS
2099