1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
17 
18 #include "mlir/Dialect/Tosa/IR/TosaOps.h"  // from @llvm-project
19 #include "mlir/Dialect/Tosa/Utils/QuantUtils.h"  // from @llvm-project
20 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
21 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
22 
23 // Implements legalization and post-legalization optimization helper functions
24 
25 namespace mlir {
26 namespace tosa {
27 
28 // Create a TOSA rescale op from TFLite scaling, zero points and rounding mode
buildRescale(PatternRewriter & rewriter,Operation * op,RankedTensorType output_type,Value input_val,double scale,int64_t input_zp,int64_t output_zp,bool double_round)29 Value buildRescale(PatternRewriter& rewriter, Operation* op,
30                    RankedTensorType output_type, Value input_val, double scale,
31                    int64_t input_zp, int64_t output_zp, bool double_round) {
32   int32_t multiplier;
33   int32_t shift;
34 
35   // We currently only support 32-bit quantized multiplier.
36   computeMultiplierAndShift(scale, multiplier, shift, 32);
37 
38   auto rescale_op = rewriter.create<tosa::RescaleOp>(
39       op->getLoc(), output_type, input_val,
40       rewriter.getI32IntegerAttr(static_cast<int32_t>(input_zp)),
41       rewriter.getI32IntegerAttr(static_cast<int32_t>(output_zp)),
42       rewriter.getI32ArrayAttr({multiplier}), rewriter.getI32ArrayAttr({shift}),
43       rewriter.getBoolAttr(true), rewriter.getBoolAttr(double_round),
44       rewriter.getBoolAttr(false));
45 
46   return rescale_op.getResult();
47 }
48 
49 // Creates TOSA rescale op with int32 output
buildRescaleToInt32(PatternRewriter & rewriter,Operation * op,Value input_val,double input_scale,int64_t input_zp)50 Value buildRescaleToInt32(PatternRewriter& rewriter, Operation* op,
51                           Value input_val, double input_scale,
52                           int64_t input_zp) {
53   // Output is always int32 type
54   auto input_type = input_val.getType().dyn_cast<mlir::RankedTensorType>();
55   assert(input_type);
56   auto output_type =
57       RankedTensorType::get(input_type.getShape(), rewriter.getI32Type());
58 
59   return buildRescale(rewriter, op, output_type, input_val, input_scale,
60                       input_zp, 0, false);
61 }
62 
63 // Creates TOSA rescale op with int32 input
buildRescaleFromInt32(PatternRewriter & rewriter,Operation * op,RankedTensorType output_type,Value input_val,double output_scale,int64_t output_zp)64 Value buildRescaleFromInt32(PatternRewriter& rewriter, Operation* op,
65                             RankedTensorType output_type, Value input_val,
66                             double output_scale, int64_t output_zp) {
67   // Input should be int32 type
68   auto input_type = input_val.getType().dyn_cast<mlir::RankedTensorType>();
69   (void)input_type;
70   assert(input_type && input_type.getElementType().isInteger(32) &&
71          "expected rescale input element type to be i32");
72 
73   // Potentially check input_shape == output_shape here
74   return buildRescale(rewriter, op, output_type, input_val, output_scale, 0,
75                       output_zp, true);
76 }
77 
78 // Creates a TOSA rescale op based on conv2d parameters.
buildRescaleOpConvOutput(PatternRewriter & rewriter,Operation * op,Value conv_val,RankedTensorType input_type,RankedTensorType weight_type,RankedTensorType output_type)79 Value buildRescaleOpConvOutput(PatternRewriter& rewriter, Operation* op,
80                                Value conv_val, RankedTensorType input_type,
81                                RankedTensorType weight_type,
82                                RankedTensorType output_type) {
83   auto input_qtype =
84       input_type.getElementType().dyn_cast<mlir::quant::UniformQuantizedType>();
85   auto output_qtype = output_type.getElementType()
86                           .dyn_cast<mlir::quant::UniformQuantizedType>();
87 
88   double input_scale = input_qtype.getScale();
89 
90   int64_t output_zp = output_qtype.getZeroPoint();
91   double output_scale = output_qtype.getScale();
92 
93   if (auto weight_per_tensor_qtype =
94           weight_type.getElementType()
95               .dyn_cast<mlir::quant::UniformQuantizedType>()) {
96     // Per-tensor quantization
97     double weight_scale = weight_per_tensor_qtype.getScale();
98 
99     int32_t multiplier;
100     int32_t shift;
101 
102     double op_tensor_scale = (input_scale * weight_scale) / output_scale;
103 
104     // We currently only support 32-bit quantized multiplier.
105     computeMultiplierAndShift(op_tensor_scale, multiplier, shift, 32);
106 
107     auto rescale_op = rewriter.create<tosa::RescaleOp>(
108         op->getLoc(), output_type, conv_val, rewriter.getI32IntegerAttr(0),
109         rewriter.getI32IntegerAttr(output_zp),
110         rewriter.getI32ArrayAttr({multiplier}),
111         rewriter.getI32ArrayAttr({shift}), rewriter.getBoolAttr(true),
112         rewriter.getBoolAttr(true), rewriter.getBoolAttr(false));
113 
114     return rescale_op.getResult();
115 
116   } else if (auto weight_per_channel_qtype =
117                  weight_type.getElementType()
118                      .dyn_cast<mlir::quant::UniformQuantizedPerAxisType>()) {
119     // Per-channel quantization
120     auto output_last_axis = output_type.getShape().size() - 1;
121     uint32_t output_channels = output_type.getShape()[output_last_axis];
122 
123     llvm::SmallVector<int32_t, 4> multiplier_arr;
124     llvm::SmallVector<int32_t, 4> shift_arr;
125 
126     llvm::SmallVector<double, 4> weight_scale_arr(
127         weight_per_channel_qtype.getScales().begin(),
128         weight_per_channel_qtype.getScales().end());
129 
130     int64_t output_zp = output_qtype.getZeroPoint();
131     double output_scale = output_qtype.getScale();
132 
133     for (uint32_t oc = 0; oc < output_channels; oc++) {
134       double weight_scale = weight_scale_arr[oc];
135 
136       int32_t multiplier;
137       int32_t shift;
138 
139       double op_channel_scale = (input_scale * weight_scale) / output_scale;
140 
141       // We currently only support 32-bit quantized multiplier.
142       computeMultiplierAndShift(op_channel_scale, multiplier, shift, 32);
143 
144       multiplier_arr.push_back(multiplier);
145       shift_arr.push_back(shift);
146     }
147 
148     auto rescale_op = rewriter.create<tosa::RescaleOp>(
149         op->getLoc(), output_type, conv_val, rewriter.getI32IntegerAttr(0),
150         rewriter.getI32IntegerAttr(output_zp),
151         rewriter.getI32ArrayAttr(multiplier_arr),
152         rewriter.getI32ArrayAttr(shift_arr), rewriter.getBoolAttr(true),
153         rewriter.getBoolAttr(true), rewriter.getBoolAttr(true));
154 
155     return rescale_op.getResult();
156 
157   } else {
158     op->emitOpError("buildConvRescaleOp: unknown weight quantized type");
159     return nullptr;
160   }
161 }
162 
163 // Create a 513 entry TOSA constant tensor suitable for the Table operator based
164 // on the values from an int32_t func(int32_t) lambda function.
getTosa1DConstTensorTable(PatternRewriter & rewriter,Operation * op,std::function<int32_t (int32_t)> func)165 Value getTosa1DConstTensorTable(PatternRewriter& rewriter, Operation* op,
166                                 std::function<int32_t(int32_t)> func) {
167   llvm::SmallVector<int16_t, 4> table_vec;
168 
169   for (int32_t i = -256; i <= 256; i++) {
170     int32_t value = func(i);
171     // Table entry is int16_t; clamp to expressible range.
172     table_vec.push_back(
173         static_cast<int16_t>(std::min(std::max(value, -32768), 32767)));
174   }
175 
176   auto element_qtype =
177       UniformQuantizedType::get(true, rewriter.getIntegerType(16),
178                                 rewriter.getF32Type(), 1.0f, 0, -32768, 32767);
179   auto const_type = RankedTensorType::get({513}, element_qtype);
180   auto storage_type =
181       RankedTensorType::get({513}, element_qtype.getStorageType());
182   auto const_attr = DenseElementsAttr::get(
183       storage_type, llvm::makeArrayRef<int16_t>(table_vec));
184 
185   auto const_op =
186       rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
187   return const_op.getResult();
188 }
189 
190 // Create a 32-bit float constant operator from a float
getTosaConstTensorSingleF32(PatternRewriter & rewriter,Operation * op,float val)191 Value getTosaConstTensorSingleF32(PatternRewriter& rewriter, Operation* op,
192                                   float val) {
193   auto const_type = RankedTensorType::get({}, rewriter.getF32Type());
194   auto const_attr = DenseElementsAttr::get(const_type, val);
195 
196   auto const_op =
197       rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
198   return const_op.getResult();
199 }
200 
201 // Create a 32-bit integer constant operator from an int
getTosaConstTensorSingleI32(PatternRewriter & rewriter,Operation * op,int32_t val)202 Value getTosaConstTensorSingleI32(PatternRewriter& rewriter, Operation* op,
203                                   int32_t val) {
204   auto const_type = RankedTensorType::get({}, rewriter.getIntegerType(32));
205   auto const_attr = DenseElementsAttr::get(const_type, val);
206 
207   auto const_op =
208       rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
209   return const_op.getResult();
210 }
211 
212 // Create a vector from a 32-bit value tensor.  Returns the size of
213 // the new vector or -1 on error.
getVectorFromValue32(Value val,llvm::SmallVector<int32_t,4> & vec)214 int getVectorFromValue32(Value val, llvm::SmallVector<int32_t, 4>& vec) {
215   int i = 0;
216 
217   ElementsAttr elems;
218 
219   if (!matchPattern(val, m_Constant(&elems))) return -1;
220 
221   for (auto idx : elems.getValues<IntegerAttr>()) {
222     vec.push_back(idx.getInt());
223     i++;
224   }
225 
226   return i;
227 }
228 
229 // Calculates the TOSA padding values based on TF operators padded with
230 // SAME/VALID.
231 //
232 // This could pass tensorflow::FilterTensorFormat and do
233 // GetFilterTensorSpatialDimIndex but the current TF core libs do not support
234 // FORMAT_OHWI parsing by that function in core/util/tensor_format.h
getPaddingValuesFromPadType(tensorflow::Padding tf_pad,tensorflow::TensorFormat data_format_tf,uint32_t first_filter_spatial_dim,RankedTensorType input_type,RankedTensorType filter_type,ArrayAttr strides,ArrayAttr dilations,PatternRewriter & rewriter,ArrayAttr & explicit_padding)235 bool getPaddingValuesFromPadType(
236     tensorflow::Padding tf_pad, tensorflow::TensorFormat data_format_tf,
237     uint32_t first_filter_spatial_dim, RankedTensorType input_type,
238     RankedTensorType filter_type, ArrayAttr strides, ArrayAttr dilations,
239     PatternRewriter& rewriter, ArrayAttr& explicit_padding) {
240   assert(tf_pad != tensorflow::Padding::EXPLICIT);
241 
242   // Storing the numeric padding values is useful for TOSA codegen, as opposed
243   // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ...
244   SmallVector<int64_t, 4> computed_paddings;
245 
246   int64_t pad_before, pad_after;
247   for (int i = 0; i < 2; i++) {  // Two spatial dimensions X&Y
248     int64_t ifm_dim = GetTensorSpatialDimIndex(
249         4, data_format_tf, i);  // 4D tensor, NHWC/NCHW format
250     int64_t filter_dim = first_filter_spatial_dim + i;
251 
252     int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt();
253     int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt();
254 
255     tensorflow::int64 op_size, pad_before_tf,
256         pad_after_tf;  // Complains if using int64_T
257     tensorflow::Status status = tensorflow::GetWindowedOutputSizeVerboseV2(
258         input_type.getDimSize(ifm_dim), filter_type.getDimSize(filter_dim),
259         dim_dilation, dim_stride, tf_pad, &op_size, &pad_before_tf,
260         &pad_after_tf);
261     if (!status.ok()) return false;
262 
263     pad_before = pad_before_tf;
264     pad_after = pad_after_tf;
265     computed_paddings.push_back(pad_before);
266     computed_paddings.push_back(pad_after);
267   }
268 
269   explicit_padding = rewriter.getI64ArrayAttr(computed_paddings);
270   return true;
271 }
272 
273 // Calculates the TOSA padding values for explicit-padded TF operators.
274 //
275 // This function only handles the TF padding array explicit_padding, which is
276 // only present in certain TF ops. All others encode padding using the string
277 // SAME/VALID, which is interpreted using the getPaddingValuesFromPadString
278 // function below.
279 
280 // The explicit padding array in TF holds 2 pad values for every
281 // dimension, even those that are not the 2 spatial ones. Just extract the
282 // 2x pad values for the XY dims.
getPaddingValuesFromExplicitPadAttr(ArrayAttr explicit_pad,tensorflow::TensorFormat data_format_tf,PatternRewriter & rewriter)283 ArrayAttr getPaddingValuesFromExplicitPadAttr(
284     ArrayAttr explicit_pad, tensorflow::TensorFormat data_format_tf,
285     PatternRewriter& rewriter) {
286   SmallVector<int64_t, 4> computed_paddings;
287 
288   int64_t pad_before, pad_after;
289   for (int i = 0; i < 2; i++) {  // Two spatial dimensions X&Y
290     int64_t dim = GetTensorSpatialDimIndex(4, data_format_tf,
291                                            i);  // 4D tensor, NHWC/NCHW format
292 
293     pad_before = explicit_pad[dim * 2].template cast<IntegerAttr>().getInt();
294     pad_after = explicit_pad[dim * 2 + 1].template cast<IntegerAttr>().getInt();
295     computed_paddings.push_back(pad_before);
296     computed_paddings.push_back(pad_after);
297   }
298 
299   return rewriter.getI64ArrayAttr(computed_paddings);
300 }
301 
302 // Calculates the TOSA padding values for transposeConv2d
getTransposeConv2dPaddingValues(tensorflow::Padding tf_pad,tensorflow::TensorFormat data_format_tf,uint32_t first_filter_spatial_dim,RankedTensorType input_type,RankedTensorType filter_type,RankedTensorType output_type,ArrayAttr strides,ArrayAttr dilations,PatternRewriter & rewriter,ArrayAttr & explicit_padding)303 bool getTransposeConv2dPaddingValues(
304     tensorflow::Padding tf_pad, tensorflow::TensorFormat data_format_tf,
305     uint32_t first_filter_spatial_dim, RankedTensorType input_type,
306     RankedTensorType filter_type, RankedTensorType output_type,
307     ArrayAttr strides, ArrayAttr dilations, PatternRewriter& rewriter,
308     ArrayAttr& explicit_padding) {
309   assert(tf_pad != tensorflow::Padding::EXPLICIT);
310 
311   // Storing the numeric padding values is useful for TOSA codegen, as opposed
312   // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ...
313 
314   SmallVector<int64_t, 2> computed_paddings;
315 
316   int64_t pad_before, pad_after;
317   for (int i = 0; i < 2; i++) {  // Two spatial dimensions X&Y
318     int64_t ifm_dim = GetTensorSpatialDimIndex(
319         4, data_format_tf, i);  // 4D tensor, NHWC/NCHW format
320     int64_t ofm_dim = GetTensorSpatialDimIndex(
321         4, data_format_tf, i);  // 4D tensor, NHWC/NCHW format
322     int64_t filter_dim = first_filter_spatial_dim + i;
323 
324     int64_t ifm_size = input_type.getDimSize(ifm_dim);
325     int64_t filter_size = filter_type.getDimSize(filter_dim);
326     int64_t ofm_size = output_type.getDimSize(ofm_dim);
327     int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt();
328     int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt();
329 
330     int effective_filter_size = (filter_size - 1) * dim_dilation + 1;
331     int total_padding =
332         ((ifm_size - 1) * dim_stride + effective_filter_size - ofm_size);
333     total_padding = total_padding > 0 ? total_padding : 0;
334 
335     pad_before = total_padding / 2;
336     pad_after = total_padding - pad_before;
337 
338     computed_paddings.push_back(pad_before);
339   }
340 
341   explicit_padding = rewriter.getI64ArrayAttr(computed_paddings);
342   return true;
343 }
344 
345 // Templated function to create a constant op in a given dialect and with a
346 // given type.  Specializations below.
347 
348 // T0: target dialect constant op
349 // T1: native c++ integer type
350 template <typename T0, typename T1>
get1DConstTensor(PatternRewriter & rewriter,Operation * op,SmallVector<T1,8> arr)351 Value get1DConstTensor(PatternRewriter& rewriter, Operation* op,
352                        SmallVector<T1, 8> arr) {
353   auto const_type =
354       RankedTensorType::get({static_cast<int32_t>(arr.size())},
355                             rewriter.getIntegerType(sizeof(T1) * 8));
356   auto const_attr =
357       DenseElementsAttr::get(const_type, llvm::makeArrayRef<T1>(arr));
358 
359   auto const_op = rewriter.create<T0>(op->getLoc(), const_type, const_attr);
360   return const_op.getResult();
361 }
362 
363 // Specialization for Const ops
364 template <>
get1DConstTensor(PatternRewriter & rewriter,Operation * op,SmallVector<float,8> arr)365 Value get1DConstTensor<tosa::ConstOp, float>(PatternRewriter& rewriter,
366                                              Operation* op,
367                                              SmallVector<float, 8> arr) {
368   auto const_type = RankedTensorType::get({static_cast<int32_t>(arr.size())},
369                                           rewriter.getF32Type());
370   auto const_attr =
371       DenseElementsAttr::get(const_type, llvm::makeArrayRef<float>(arr));
372 
373   auto const_op =
374       rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
375   return const_op.getResult();
376 }
377 
378 template Value get1DConstTensor<tosa::ConstOp, int32_t>(
379     PatternRewriter&, Operation*, SmallVector<int32_t, 8> arr);
380 template Value get1DConstTensor<tosa::ConstOp, int64_t>(
381     PatternRewriter&, Operation*, SmallVector<int64_t, 8> arr);
382 template Value get1DConstTensor<TFL::ConstOp, int32_t>(
383     PatternRewriter&, Operation*, SmallVector<int32_t, 8> arr);
384 template Value get1DConstTensor<TFL::ConstOp, int64_t>(
385     PatternRewriter&, Operation*, SmallVector<int64_t, 8> arr);
386 
387 // Same as get1DConstTensor, but int48 is not native c++ type, needs additional
388 // interface
get1DConstTensorInt48(PatternRewriter & rewriter,Operation * op,SmallVector<int64_t,8> arr)389 Value get1DConstTensorInt48(PatternRewriter& rewriter, Operation* op,
390                             SmallVector<int64_t, 8> arr) {
391   auto const_type = RankedTensorType::get({static_cast<int32_t>(arr.size())},
392                                           rewriter.getIntegerType(48));
393   auto const_attr =
394       DenseElementsAttr::get(const_type, llvm::makeArrayRef<int64_t>(arr));
395 
396   auto const_op =
397       rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
398   return const_op.getResult();
399 }
400 
401 // Strip off quantization information for bias tensor and return a unquantized
402 // bias
getUnquantizedBias(PatternRewriter & rewriter,Operation * op,Value input)403 Value getUnquantizedBias(PatternRewriter& rewriter, Operation* op,
404                          Value input) {
405   auto input_type = input.getType().dyn_cast<mlir::RankedTensorType>();
406   assert(input_type);
407   auto input_element_type = input_type.getElementType();
408   auto input_element_qtype =
409       input_element_type.dyn_cast<mlir::quant::QuantizedType>();
410 
411   if (input_element_qtype) {
412     auto output_type = RankedTensorType::get(
413         input_type.getShape(),
414         rewriter.getIntegerType(
415             input_element_qtype.getStorageTypeIntegralWidth()));
416 
417     auto input_defining_op = dyn_cast<TFL::QConstOp>(input.getDefiningOp());
418     auto dense_attr = input_defining_op.value().dyn_cast<DenseElementsAttr>();
419 
420     if (dense_attr) {
421       auto const_op =
422           rewriter.create<tosa::ConstOp>(op->getLoc(), output_type, dense_attr);
423       return const_op.getResult();
424     } else {
425       return input;
426     }
427 
428   } else {
429     return input;
430   }
431 }
432 
433 }  // namespace tosa
434 }  // namespace mlir
435