1//===- LinalgStructuredOps.td - Linalg dialect library ops -*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is the operation definition file for structured operations on buffers
10// that correspond to underlying library calls (e.g. BLAS).
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LINALG_STRUCTURED_OPS
15#define LINALG_STRUCTURED_OPS
16
17include "mlir/Dialect/Linalg/IR/LinalgBase.td"
18include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td"
19include "mlir/Interfaces/CopyOpInterface.td"
20include "mlir/Interfaces/SideEffectInterfaces.td"
21
22// The Linalg `NInputs` trait provides the API for ops that are known
23// to have a specified number of inputs, all passed as operands.
24// See Linalg/LinalgTraits.h for implementation details and usage.
25class NInputs<int n> :
26  NativeOpTrait<"linalg::NInputs<" # !cast<string>(n) # ">::Impl"> {}
27
28// The Linalg `ZeroInitTensors` trait provides the API for ops that are known
29// to not have input tensor operands.
30// See Linalg/LinalgTraits.h for implementation details and usage.
31def ZeroInitTensors : NativeOpTrait<"linalg::ZeroInitTensors"> {}
32
33// The Linalg `NOutputs` trait provides the API for ops that are known
34// to have a specified number of outputs, all passed as operands.
35// See Linalg/LinalgTraits.h for implementation details and usage.
36class NOutputs<int n> :
37  NativeOpTrait<"linalg::NOutputs<" # !cast<string>(n) # ">::Impl"> {}
38
39def StructuredOpTraits : NativeOpTrait<"linalg::StructuredOpTraits">;
40def NamedStructuredOpTrait : NativeOpTrait<"linalg::NamedStructuredOpTrait">;
41
42// Base Tablegen class for Linalg ops.
43// Linalg ops that correspond to library calls operate on ShapedType as their
44// first operands. These may be optionally followed by non-view operands
45// depending on the specific Linalg op.
46class LinalgStructuredBase_Op<string mnemonic, list<OpTrait> props>
47  : Op<Linalg_Dialect, mnemonic, !listconcat(props, [
48       LinalgStructuredInterface])> {}
49
50class LinalgStructured_Op<string mnemonic, list<OpTrait> props>
51  : LinalgStructuredBase_Op<mnemonic,
52       !listconcat(props, [
53         StructuredOpTraits,
54         DeclareOpInterfaceMethods<MemoryEffectsOpInterface>])> {
55  code libraryCallName = [{
56    std::string getLibraryCallName() {
57      return generateLibraryCallName(getOperation());
58    }
59  }];
60  let assemblyFormat = "`(` operands `)` attr-dict `:` type(operands)";
61}
62
63//===----------------------------------------------------------------------===//
64// Named Linalg ops, implemented as special configurations of generic ops.
65//===----------------------------------------------------------------------===//
66// At the moment these are not declarative and require a bunch of C++ code.
67// In the future, these should be migrated to a declarative specification.
68def CopyOp : LinalgStructured_Op<"copy", [
69    CopyOpInterface,
70    NInputs<1>,
71    ZeroInitTensors,
72    NOutputs<1>
73  ]> {
74  let description = [{
75    Copies the data in the input view into the output view.
76
77    Usage:
78
79    ```mlir
80    linalg.copy(%arg0, %arg1) : memref<?xf32, stride_specification>,
81                                memref<?xf32, stride_specification>
82    ```
83
84    One possible lowering to loop form is:
85
86    ```mlir
87    %0 = linalg.dim %arg0, 0 : index
88    scf.for %i0 = %c0 to %0 step %c1 {
89      %1 = load %arg0[%i0] : memref<?xf32, stride_specification>
90      store %1, %arg1[%i0] : memref<?xf32, stride_specification>
91    }
92    ```
93
94    Optionally, can take `input_permutation` and `output_permutation` attributes
95    to reorder the dimensions of the input and output views.
96
97    Usage:
98
99    ```mlir
100    linalg.copy(%arg0, %arg1) {inputPermutation : (i, j, k) -> (i, k, j),
101                               outputPermutation : (i, j, k) -> (k, j, i)} :
102      memref<?x?x?xf32, stride_specification>,
103      memref<?x?x?xf32, stride_specification>
104    ```
105
106    One possible lowering to loop form is:
107
108    ```mlir
109    %0 = linalg.dim %arg0, 0
110    %1 = linalg.dim %arg0, 1
111    %2 = linalg.dim %arg0, 2
112    scf.for %i0 = %c0 to %{{.*}} step %c1 {
113      scf.for %i1 = %c0 to %{{.*}} step %c1 {
114        scf.for %i2 = %c0 to %{{.*}} step %c1 {
115          %3 = load %arg0[%i0, %i2, %i1] :
116                  memref<?x?x?xf32, stride_specification>
117          store %3, %arg1[%i2, %i1, %i0] :
118                  memref<?x?x?xf32, stride_specification>
119    ```
120
121    The views are expected to be compatible for correctness but this is not
122    enforced at the moment.
123  }];
124
125  let arguments = (ins
126    AnyStridedMemRef:$input,
127    AnyStridedMemRef:$output,
128    OptionalAttr<AffineMapAttr>:$inputPermutation,
129    OptionalAttr<AffineMapAttr>:$outputPermutation);
130
131  // TODO: this should go away once the usage of OptionalAttr triggers emission
132  // of builders with default arguments left unspecified.
133  let builders = [OpBuilderDAG<(ins "Value":$input, "Value":$output),
134    [{
135      return build(
136        $_builder, $_state, input, output, AffineMapAttr(), AffineMapAttr());
137    }]>];
138
139  let extraClassDeclaration = libraryCallName # [{
140    // Rank-polymorphic.
141    //   filling_value -> O(ivs) with parallel iterators.
142    ArrayAttr iterator_types() {
143      unsigned nPar = getInputShapedType(0).getRank();
144      return Builder(getContext()).getStrArrayAttr(
145        SmallVector<StringRef, 8>(nPar, getParallelIteratorTypeName()));
146    }
147
148    // I(input_perm(ivs)) -> O(output_perm(ivs))
149    ArrayAttr indexing_maps() {
150      MLIRContext *context = getContext();
151      auto maybeInputMap = inputPermutation();
152      auto maybeOutputMap = outputPermutation();
153      unsigned inputRank = getInputShapedType(0).getRank();
154      unsigned outputRank = getOutputShapedType(0).getRank();
155      return Builder(getContext()).getAffineMapArrayAttr({
156          extractOrIdentityMap(maybeInputMap, inputRank, context),
157          extractOrIdentityMap(maybeOutputMap, outputRank, context)});
158    }
159
160    Value getSource() { return input();}
161    Value getTarget() { return output(); }
162
163    static std::function<void(Block &)> getRegionBuilder() {
164      return nullptr;
165    }
166  }];
167  let verifier = [{ return ::verify(*this); }];
168
169  let hasFolder = 1;
170  let hasCanonicalizer = 1;
171}
172
173def FillOp : LinalgStructured_Op<"fill", [
174    NInputs<0>,
175    ZeroInitTensors,
176    NOutputs<1>]> {
177
178  let arguments = (ins AnyStridedMemRef:$output,
179                   AnyTypeOf<[AnyFloat, AnySignlessInteger, AnyVector]>:$value);
180  let extraClassDeclaration = libraryCallName # [{
181    // Rank-polymorphic.
182    //   filling_value -> O(ivs) with parallel iterators.
183    ArrayAttr iterator_types() {
184      unsigned nPar = getOutputShapedType(0).getRank();
185      return Builder(getContext()).getStrArrayAttr(
186        SmallVector<StringRef, 8>(nPar, getParallelIteratorTypeName()));
187    }
188
189    ArrayAttr indexing_maps() {
190      MLIRContext *context = getContext();
191      // filling_value -> O(ivs)
192      return Builder(getContext()).getAffineMapArrayAttr({
193          extractOrIdentityMap(llvm::None, getNumParallelLoops(), context)});
194    }
195
196    static std::function<void(Block &)> getRegionBuilder() {
197      return nullptr;
198    }
199  }];
200
201  let verifier = [{ return ::verify(*this); }];
202
203  let hasFolder = 1;
204  let hasCanonicalizer = 1;
205}
206
207/// A base class for pooling operation such as conv. The arguments must contain
208/// optional arguments `strides`, `dilations` and `padding` with following type:
209///   OptionalAttr<I64ArrayAttr>:$strides
210///   OptionalAttr<I64ArrayAttr>:$dilations
211///   OptionalAttr<I64ElementsAttr>:$padding
212/// `strides` denotes the step of each window along the dimension.
213class PoolingBase_Op<string mnemonic, list<OpTrait> props>
214  : LinalgStructured_Op<mnemonic, props> {
215  let description = [{
216    Performs an N-D pooling operation similarly to the description in the TF
217    documentation:
218    https://www.tensorflow.org/api_docs/python/tf/nn/pool
219
220    Different from the description, this operation doesn't perform on batch and
221    channel. It only takes tensors of rank `N`.
222
223    ```
224      output[x[0], ..., x[N-1]] =
225        REDUCE_{z[0], ..., z[N-1]}
226          input[
227                x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0],
228                ...
229                x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1]
230                ],
231    ```
232
233    The required optional arguments are:
234      - strides: an i64 array specifying the stride (i.e. step) for window
235        loops.
236      - dilations: an i64 array specifying the filter upsampling/input
237        downsampling rate
238      - padding: an i64 array of pairs (low, high) specifying the number of
239        elements to pad along a dimension.
240
241    If strides or dilations attributes are missing then the default value is
242    one for each of the input dimensions. Similarly, padding values are zero
243    for both low and high in each of the dimensions, if not specified.
244  }];
245
246  code commonUtils = libraryCallName # [{
247    int64_t getStride(unsigned i) {
248      assert(i < getNumWindowLoops());
249      if (!strides().hasValue()) return 1;
250      return strides()->getValue()[i]
251        .cast<IntegerAttr>().getValue().getSExtValue();
252    }
253
254    int64_t getDilation(unsigned i) {
255      assert(i < getNumWindowLoops());
256      if (!dilations().hasValue()) return 1;
257      return dilations()->getValue()[i]
258        .cast<IntegerAttr>().getValue().getSExtValue();
259    }
260
261    int64_t getLowPad(unsigned i) {
262      assert(i < getNumWindowLoops());
263      if (!padding().hasValue()) return 0;
264      return padding().getValue().getValue<int64_t>({i, 0});
265    }
266
267    int64_t getHighPad(unsigned i) {
268      assert(i < getNumWindowLoops());
269      if (!padding().hasValue()) return 0;
270      return padding().getValue().getValue<int64_t>({i, 1});
271    }
272
273    static std::function<void(Block &)> getRegionBuilder() {
274      return nullptr;
275    }
276  }];
277}
278
279def ConvOp : PoolingBase_Op<"conv", [
280    NInputs<2>,
281    // Despite having reductions, this manually defined ConvOp may only take
282    // memref operands and can never have init tensors.
283    ZeroInitTensors,
284    NOutputs<1>]> {
285
286  let description = [{
287    Generic n-D convolution as described in the TF documentation:
288    https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/nn/convolution
289
290    ```
291      output[b, x[0], ..., x[N-1], k] =
292      sum_{z[0], ..., z[N-1], q}
293          filter[z[0], ..., z[N-1], q, k] *
294          padded_input[b,
295                       x[0] * strides[0] + dilation_rate[0] * z[0],
296                       ...,
297                       x[N-1] * strides[N-1] + dilation_rate[N-1] * z[N-1],
298                       q]
299    ```
300  }];
301
302  // Following the TF source of truth above, strides, dilations and padding are
303  // integer attributes of the same rank as the number of window dimensions.
304  // The padding attribute specifies the amount of zero padding to be applied to
305  // the base area, which is a n-d array of (low, high) padding. Each pair has
306  // the low padding as the first element and the high padding as the second
307  // element. Using padding is equivalent to inserting those same zero values
308  // into the input before doing the convolution.
309  let arguments = (ins AnyStridedMemRef:$filter, AnyStridedMemRef:$input,
310                   AnyStridedMemRef:$output,
311                   OptionalAttr<I64ArrayAttr>:$strides,
312                   OptionalAttr<I64ArrayAttr>:$dilations,
313                   OptionalAttr<I64ElementsAttr>:$padding);
314
315  let extraClassDeclaration = commonUtils # [{
316    // TODO: extend to support more than 1 dimensions and potentially grouping
317    // too.
318    unsigned getNumBatchDimensions() { return 1; }
319
320    unsigned getNumInputFeatureDimensions() { return 1; }
321
322    unsigned getNumOutputFeatureDimensions() { return 1; }
323
324    unsigned getNumSpatialDimensions() {
325      return getOutputShapedType(0).getRank() - getNumBatchDimensions() -
326             getNumOutputFeatureDimensions();
327    }
328
329    ArrayAttr iterator_types() {
330      // Outer parallel loops are always the number of output dimensions; i.e.
331      // [b, xs, q] in the TF notation above.
332      unsigned nPar = getOutputShapedType(0).getRank();
333      unsigned nRed = getNumInputFeatureDimensions();
334      // Window loops are a special kind of reduction that is never tiled or
335      // parallelized across; i.e. [zs] in the TF notation above whose number
336      // match `xs` (i.e. 1 window loop per "image" dimension).
337      // This may evolve in the future.
338      unsigned nWin =
339        nPar - getNumBatchDimensions() - getNumInputFeatureDimensions();
340      SmallVector<StringRef, 8> iters(nPar, getParallelIteratorTypeName());
341      iters.reserve(nPar + nRed + nWin);
342      iters.append(nRed, getReductionIteratorTypeName());
343      iters.append(nWin, getWindowIteratorTypeName());
344      return Builder(getContext()).getStrArrayAttr(iters);
345    }
346
347    //   F(z0, ..., zN-1, q, k) *
348    //     I(b, x0 + z0 - pad_low_0, ..., xN-1 + zN-1 - pad_low_N-1, q)
349    //   ->  O(b, x0, ..., xN-1, k)
350    // for N equal to `nWindow`. If there is no padding attribute, it will be
351    // ignored.
352    ArrayAttr indexing_maps() {
353      MLIRContext *context = getContext();
354      auto nWin = getNumWindowLoops();
355      assert(nWin > 0 && "expected at least one window dimension");
356      unsigned idx = 0;
357      // In the following, AffineDimExprs are indexed in loop order:
358      //   [ b, xs, k,           q,                     zs]
359      //    parallels     non-window reductions     windows
360      //
361      // Parallel dims are exactly the dimensions indexing `output`:
362      //     output[b, x[0], ..., x[N-1], k]; i.e.
363      //  * batch dimensions (bs with #bs = 1 for now)
364      //  * "image" dimensions (xs with #xs = #zs = output_rank - #bs - #ks)
365      //  * output filter dimensions (ks with #ks = 1 for now)
366      auto bs = makeAffineDimExprs(getNumBatchDimensions(), idx, context);
367      auto xs = makeAffineDimExprs(nWin, idx, context);
368      auto ks = makeAffineDimExprs(
369        getNumOutputFeatureDimensions(), idx, context);
370      // Non-window reduction dim: sum_{z[0], ..., z[N-1], q}
371      auto qs = makeAffineDimExprs(
372        getNumInputFeatureDimensions(), idx, context);
373      // Window reduction dims: sum_{z[0], ..., z[N-1], q}
374      auto zs = makeAffineDimExprs(nWin, idx, context);
375      // Construct the weighedSum expression.
376      auto ws = weightedPoolingInputIndex(*this, xs, zs);
377      return Builder(getContext()).getAffineMapArrayAttr({
378        // filter[z[0], ..., z[N-1], q, k]
379        AffineMap::get(idx, 0, concat(concat(zs, qs), ks), context),
380        // input[b,
381        //       x[0]*s[0] + d[0]*z[0] - pad_low[0],
382        //       ...
383        //       x[N-1]*s[N-1] + d[N-1]*z[N-1] - pad_low[N-1],
384        //       q]
385        AffineMap::get(idx, 0, concat(concat(bs, ws), qs), context),
386        // output[b, x[0], ..., x[N-1], k]
387        AffineMap::get(idx, 0, concat(concat(bs, xs), ks), context)});
388    }
389  }];
390
391  let verifier = [{ return ::verify(*this); }];
392
393  let hasFolder = 1;
394  let hasCanonicalizer = 1;
395}
396
397class SingleInputPoolingBase_Op<string mnemonic>
398  : PoolingBase_Op<mnemonic, [
399    NInputs<2>,
400    // Despite having reductions, this manually defined ConvOp may only take
401    // memref operands and can never have init tensors.
402    ZeroInitTensors,
403    NOutputs<1>]> {
404  let description = [{
405    A base class for single input pooling function.
406
407    TODO: Figure out a better way to handle window dimensions, i.e., eliminate
408    the fake memref.
409    The window dimensions are specified by argument `windowDims`. The i-th
410    dimension in the shape of `windowDims` denotes the size of the window along
411    dimension i. For example, if the window size is 2x3, then a memref<2x3>
412    should be passed to the operation as `windowDims`.
413  }];
414
415  let arguments = (ins AnyStridedMemRef:$input,
416                   AnyStridedMemRef:$windowDims,
417                   AnyStridedMemRef:$output,
418                   OptionalAttr<I64ArrayAttr>:$strides,
419                   OptionalAttr<I64ArrayAttr>:$dilations,
420                   OptionalAttr<I64ElementsAttr>:$padding);
421
422  let extraClassDeclaration = commonUtils# [{
423    ArrayAttr iterator_types() {
424      // Outer parallel loops are always the number of output dimensions.
425      unsigned nPar = getOutputShapedType(0).getRank();
426      // The window loops has the same number loops with output dimensions.
427      unsigned nWin = nPar;
428      SmallVector<StringRef, 8> iters(nPar, getParallelIteratorTypeName());
429      iters.reserve(nPar + nWin);
430      iters.append(nWin, getWindowIteratorTypeName());
431      return Builder(getContext()).getStrArrayAttr(iters);
432    }
433
434    ArrayAttr indexing_maps() {
435      MLIRContext *context = getContext();
436      auto nPar = getNumParallelLoops();
437      auto nWin = getNumWindowLoops();
438      assert(nWin > 0 && "expected at least one window dimension");
439      unsigned idx = 0;
440      auto outputDims = makeAffineDimExprs(nPar, idx, context);
441      auto windowDims = makeAffineDimExprs(nWin, idx, context);
442      // Construct the weighedSum expression.
443      auto inputDims =
444          weightedPoolingInputIndex(*this, outputDims, windowDims);
445      return Builder(getContext()).getAffineMapArrayAttr({
446        // input
447        AffineMap::get(idx, 0, inputDims, context),
448        // windowDims
449        AffineMap::get(idx, 0, windowDims, context),
450        // output
451        AffineMap::get(idx, 0, outputDims, context)});
452    }
453  }];
454
455  let verifier = [{ return ::verify(*this); }];
456
457  let hasFolder = 1;
458  let hasCanonicalizer = 1;
459}
460
461def PoolingMaxOp: SingleInputPoolingBase_Op<"pooling_max"> {
462  let description = [{
463    Takes max op as pooling operation, i.e., it samples the maximum value in the
464    window.
465  }];
466}
467
468def PoolingMinOp: SingleInputPoolingBase_Op<"pooling_min"> {
469  let description = [{
470    Takes min op as pooling operation, i.e., it samples the minimum value in the
471    window.
472  }];
473}
474
475def PoolingSumOp: SingleInputPoolingBase_Op<"pooling_sum"> {
476  let description = [{
477    Takes add op as pooling operation, i.e., it accumulates the values in the
478    window.
479  }];
480}
481
482//===----------------------------------------------------------------------===//
483// Generic Linalg ops.
484//===----------------------------------------------------------------------===//
485def LinalgOperand: AnyTypeOf<[AnyRankedTensor, AnyStridedMemRef]>;
486
487class LinalgOperandOfRank<int rank>: Type<
488  And<[
489    LinalgOperand.predicate,
490    CPred<"$_self.cast<ShapedType>().getRank() == " # rank>]
491  >>;
492
493class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, [
494    AttrSizedOperandSegments,
495    DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
496    NamedStructuredOpTrait,
497    SingleBlockImplicitTerminator<"YieldOp">]> {
498  let arguments = (ins Variadic<AnyShaped>:$inputs,
499                       Variadic<AnyMemRef>:$output_buffers,
500                       Variadic<AnyRankedTensor>:$init_tensors,
501                       AffineMapArrayAttr:$indexing_maps,
502                       ArrayAttr:$iterator_types,
503                       OptionalAttr<StrAttr>:$doc,
504                       OptionalAttr<StrAttr>:$library_call,
505                       // ArrayAttr of StrArrayAttr:
506                       OptionalAttr<ArrayAttr>:$sparse);
507  let results = (outs Variadic<AnyRankedTensor>:$result_tensors);
508  let regions = (region AnyRegion:$region);
509  let extraClassDeclaration = [{
510    SmallVector<StringRef, 8> linalgTraitAttrNames() {
511      return SmallVector<StringRef, 8>{
512        getDocAttrName(),
513        getIndexingMapsAttrName(), getLibraryCallAttrName(),
514        getIteratorTypesAttrName(),
515      };
516    }
517    std::string getLibraryCallName() {
518      return library_call().hasValue() ?
519        library_call()->str() : "op_has_no_registered_library_name";
520    }
521
522    static std::function<void(Block &)> getRegionBuilder() {
523      return nullptr;
524    }
525  }];
526  let printer = [{ return ::print(p, *this); }];
527  let parser = [{ return ::parseGenericOp(parser, result); }];
528}
529
530/// Index-free GenericOp.
531def GenericOp : GenericOpBase<"generic"> {
532  let description = [{
533    Generic Linalg op form where the key properties of the computation are
534    specified as attributes. In pretty form, a `linalg.generic` op is written
535    as:
536
537      ```mlir
538      linalg.generic #trait_attribute
539          ins(%A, %B : memref<?x?xf32, stride_specification>,
540                       memref<?x?xf32, stride_specification>)
541          outs(%C : memref<?x?xf32, stride_specification>)
542          attrs = {other-optional-attributes}
543          {region}
544      ```
545
546    Where #trait_attributes is an alias of a dictionary attribute containing:
547      - doc [optional]: a documentation string
548      - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
549        and output view. Such AffineMapAttr specifies the mapping between the
550        loops and the indexing within each view.
551      - library_call [optional]: a StringAttr containing the name of an
552        external library function that the linalg.generic operation maps to.
553        The external library is assumed to be dynamically linked and no strong
554        compile-time guarantees are provided. In the absence of such a library
555        call, linalg.generic will always lower to loops.
556      - iterator_types: an ArrayAttr specifying the type of the enclosing loops.
557        Each element of the list represents and iterator of one of the following
558        types:
559          parallel, reduction, window
560      - sparse: an optional list with per-dimension sparsity annotations (either
561        "D" for dense or "S" for sparse) for each input and output view.
562
563    Example:
564    Defining a #matmul_trait attribute in MLIR can be done as follows:
565      ```mlir
566      #matmul_accesses = [
567        (m, n, k) -> (m, k),
568        (m, n, k) -> (k, n),
569        (m, n, k) -> (m, n)
570      ]
571      #matmul_trait = {
572        doc = "C(m, n) += A(m, k) * B(k, n)",
573        indexing_maps = #matmul_accesses,
574        library_call = "linalg_matmul",
575        iterator_types = ["parallel", "parallel", "reduction"]
576      }
577      ```
578
579    And can be reused in multiple places as:
580      ```mlir
581      linalg.generic #matmul_trait
582        ins(%A, %B : memref<?x?xf32, stride_specification>,
583                     memref<?x?xf32, stride_specification>)
584        outs(%C : memref<?x?xf32, stride_specification>)
585        {other-optional-attributes} {
586        ^bb0(%a: f32, %b: f32, %c: f32) :
587          %d = mulf %a, %b: f32
588          %e = addf %c, %d: f32
589          linalg.yield %e : f32
590      }
591      ```
592
593    This may lower to either:
594      ```mlir
595      call @linalg_matmul(%A, %B, %C) :
596        (memref<?x?xf32, stride_specification>,
597         memref<?x?xf32, stride_specification>,
598         memref<?x?xf32, stride_specification>)
599        -> ()
600      ```
601
602    or IR resembling:
603    ```mlir
604    scf.for %m = %c0 to %M step %c1 {
605      scf.for %n = %c0 to %N step %c1 {
606        scf.for %k = %c0 to %K step %c1 {
607          %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
608          %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
609          %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
610          %d = mulf %a, %b: f32
611          %e = addf %c, %d: f32
612          store %e, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
613        }
614      }
615    }
616    ```
617
618    To allow progressive lowering from the value world (a.k.a tensor values) to
619    the buffer world (a.k.a memref values), a `linalg.generic` op allows mixing
620    tensors and buffers operands and tensor results.
621
622    ```mlir
623    %C = linalg.generic #trait_attribute
624      ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>)
625      init(%C : tensor<?x?xf32>)
626      {other-optional-attributes}
627      {region}
628      -> (tensor<?x?xf32>)
629    ```
630
631    The `init` operand and the conventions around mixing tensors and buffers are
632    described in more detail in the "Tensors and Buffers: Conventions and
633    Limitations" section in the [Linalg Document](../docs/Linalg.md)
634
635    Tensor values must be legalized by a buffer allocation pass before most
636    transformations can be applied. Such legalizations move tensor return values
637    into output buffer operands and updates the region arguments accordingly.
638  }];
639
640  let builders = [
641    OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
642      "ValueRange":$outputBuffers, "ValueRange":$initTensors,
643      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
644      "StringRef":$doc, "StringRef":$libraryCall,
645      CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>,
646    OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers,
647      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
648      "StringRef":$doc, "StringRef":$libraryCall,
649      CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>,
650    OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
651      "ValueRange":$outputBuffers, "ValueRange":$initTensors,
652      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
653      CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>,
654    OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers,
655      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
656      CArg<"function_ref<void(OpBuilder &, Location, ValueRange)>", "nullptr">)>
657  ];
658  let verifier = [{ return ::verify(*this); }];
659
660  let hasFolder = 1;
661  let hasCanonicalizer = 1;
662}
663
664/// GenericOp with Indexing (i.e. multi-for style in which the region is passed
665/// the enclosing loop induction variables)
666def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
667  let description = [{
668    Indexed Generic Linalg op form where the key properties of the computation
669    are specified as attributes. In pretty form, a `linalg.indexed_generic` op
670    is written as:
671
672      ```mlir
673      linalg.indexed_generic #trait_attribute
674          ins(%A, %B : memref<?x?xf32, stride_specification>,
675                       memref<?x?xf32, stride_specification>)
676          outs(%C : memref<?x?xf32, stride_specification>)
677          attrs = {other-optional-attributes}
678          {region}
679      ```
680
681    Where #trait_attributes is an alias of a dictionary attribute containing:
682      - doc [optional]: a documentation string
683      - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
684        and output view. Such AffineMapAttr specifies the mapping between the
685        loops and the indexing within each view.
686      - library_call [optional]: a StringAttr containing the name of an
687        external library function that the linalg.indexed_generic operation
688        maps to.  The external library is assumed to be dynamically linked and
689        no strong compile-time guarantees are provided. In the absence of such
690        a library call, linalg.indexed_generic will always lower to loops.
691      - iterator_types: an ArrayAttr they type of the enclosing loops; Each
692        element of the list represents and iterator of one of the following
693        types:
694          parallel, reduction, window
695
696    Example:
697    Defining a #matmul_trait attribute in MLIR can be done as follows:
698
699    ```mlir
700    #matmul_accesses = [
701      (m, n, k) -> (m, k),
702      (m, n, k) -> (k, n),
703      (m, n, k) -> (m, n)
704    ]
705    #matmul_trait = {
706      doc = "C(m, n) += A(m, k) * B(k, n)",
707      indexing_maps = #matmul_accesses,
708      library_call = "linalg_matmul",
709      iterator_types = ["parallel", "parallel", "reduction"]
710    }
711    ```
712
713    And can be reused in multiple places as:
714
715    ```mlir
716      linalg.indexed_generic #matmul_trait
717        ins(%A, %B : memref<?x?xf32, stride_specification>,
718                     memref<?x?xf32, stride_specification>)
719        outs(%C : memref<?x?xf32, stride_specification>) {
720      (%offset_m: index, %offset_n: index, %offset_k: index,
721       %a: f32, %b: f32, %c: f32) :
722        "some_optional_computation"(%offset_m, %offset_n, %offset_k)
723        %d = mulf %a, %b: f32
724        %e = addf %c, %d: f32
725        linalg_yield %e : f32
726    }
727    ```
728
729    This may lower to either:
730
731    ```mlir
732    call @linalg_matmul(%offset_m, %offset_n, %offset_k, %A, %B, %C) :
733      (index, index, index,
734       memref<?x?xf32, stride_specification>,
735       memref<?x?xf32, stride_specification>,
736       memref<?x?xf32, stride_specification>)
737      -> ()
738    ```
739
740    or IR resembling:
741
742    ```mlir
743    scf.for %m = %c0 to %M step %c1 {
744      scf.for %n = %c0 to %N step %c1 {
745        scf.for %k = %c0 to %K step %c1 {
746          %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
747          %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
748          %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
749          "some_optional_computation"(%m, %n, %k)
750          %d = mulf %a, %b: f32
751          %e = addf %c, %d: f32
752          store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
753        }
754      }
755    }
756    ```
757
758    To allow progressive lowering from the value world (a.k.a tensor values) to
759    the buffer world (a.k.a memref values), a `linalg.indexed_generic` op
760    allows mixing tensors and buffers operands and tensor results.
761
762    ```mlir
763    %C = linalg.indexed_generic #trait_attribute
764      ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>)
765      init(%C : tensor<?x?xf32>)
766      {other-optional-attributes}
767      {region_with_index_arguments}
768      -> (tensor<?x?xf32>)
769    ```
770
771    The `init` operand and the conventions around mixing tensors and buffers are
772    described in more detail in the "Tensors and Buffers: Conventions and
773    Limitations" section in the [Linalg Document](../docs/Linalg.md)
774
775    Tensor values must be legalized by a buffer allocation pass before most
776    transformations can be applied. Such legalizations move tensor return values
777    into output buffer operands and update the region arguments accordingly.
778  }];
779
780  let builders = [
781    OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
782      "ValueRange":$outputBuffers, "ValueRange":$initTensors,
783      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
784      "StringRef":$doc, "StringRef":$libraryCall,
785      CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>",
786           "nullptr">)>,
787    OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers,
788      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
789      "StringRef":$doc, "StringRef":$libraryCall,
790      CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>",
791           "nullptr">)>,
792    OpBuilderDAG<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
793      "ValueRange":$outputBuffers, "ValueRange":$initTensors,
794      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
795      CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>",
796           "nullptr">)>,
797    OpBuilderDAG<(ins "ValueRange":$inputs, "ValueRange":$outputBuffers,
798      "ArrayRef<AffineMap>":$indexingMaps, "ArrayRef<StringRef>":$iteratorTypes,
799      CArg<"function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>",
800           "nullptr">)>
801  ];
802  let verifier = [{ return ::verify(*this); }];
803
804  let hasFolder = 1;
805  let hasCanonicalizer = 1;
806}
807
808//===----------------------------------------------------------------------===//
809// Named Linalg ops, implemented as a declarative configurations of generic ops.
810//===----------------------------------------------------------------------===//
811
812// This file is auto-generated from a TC def specification.
813include "mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.td"
814
815#endif // LINALG_STRUCTURED_OPS
816