1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
17 
18 #include <algorithm>
19 #include <cstdint>
20 #include <iterator>
21 #include <limits>
22 #include <numeric>
23 
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Support/Casting.h"
27 #include "mlir/Dialect/Quant/FakeQuantSupport.h"  // from @llvm-project
28 #include "mlir/Dialect/Quant/QuantOps.h"  // from @llvm-project
29 #include "mlir/Dialect/Quant/QuantTypes.h"  // from @llvm-project
30 #include "mlir/Dialect/Quant/QuantizeUtils.h"  // from @llvm-project
31 #include "mlir/Dialect/Quant/UniformSupport.h"  // from @llvm-project
32 #include "mlir/IR/Attributes.h"  // from @llvm-project
33 #include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
34 #include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
35 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
36 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
37 #include "mlir/Support/LLVM.h"  // from @llvm-project
38 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
39 #include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
40 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
41 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
42 
43 namespace mlir {
44 
45 // This includes the interface class definition. It couldn't be in a namespace
46 // because the table gen doesn't emit the namespace when it is used.
47 #include "tensorflow/compiler/mlir/lite/quantization/quantization_interface.cc.inc"
48 
49 namespace quant {
50 
51 constexpr double kNearZeroTolerance = 1.0e-6;
52 constexpr double kSmallestHalfRange = kNearZeroTolerance / 2;
53 
54 // This method expands the range to be larger than or equal to 1.0e-6, if it is
55 // very small (< 1.0e-6). This is to prevent very large quantized value by this
56 // range.
ExpandVerySmallRange(ArrayRef<double> mins,ArrayRef<double> maxs,SmallVectorImpl<double> * effective_mins,SmallVectorImpl<double> * effective_maxs)57 static void ExpandVerySmallRange(ArrayRef<double> mins, ArrayRef<double> maxs,
58                                  SmallVectorImpl<double>* effective_mins,
59                                  SmallVectorImpl<double>* effective_maxs) {
60   for (auto arg : llvm::zip(mins, maxs)) {
61     double min = std::get<0>(arg);
62     double max = std::get<1>(arg);
63     // The range is wide, then use the same min/max.
64     if ((max - min) > kNearZeroTolerance) {
65       effective_mins->push_back(min);
66       effective_maxs->push_back(max);
67       continue;
68     }
69 
70     // The range is small. Expands the range to stride 0.0 and also at least
71     // 1.0e-6.
72     effective_mins->push_back(std::min(min, -kSmallestHalfRange));
73     effective_maxs->push_back(std::max(max, kSmallestHalfRange));
74   }
75 }
76 
77 // Returns the quantized type for the
78 // input_type/min/max/storag_type_width/narrow_range.
79 // This is entry point to the Quant dialect and used for both quantizing
80 // activations and weights.
GetQuantizedType(Builder builder,Type input_type,ArrayRef<double> min,ArrayRef<double> max,int quant_dim,int storage_type_width,bool narrow_range,bool is_signed,bool legacy_float_scale)81 Type GetQuantizedType(Builder builder, Type input_type, ArrayRef<double> min,
82                       ArrayRef<double> max, int quant_dim,
83                       int storage_type_width, bool narrow_range, bool is_signed,
84                       bool legacy_float_scale) {
85   auto converter =
86       quant::ExpressedToQuantizedConverter::forInputType(input_type);
87 
88   // Expand the range to prevent extremely small scales and large quantized
89   // integers which can cause overflow. This leads to scale
90   // 7.843137254901961e-9 with 8 bits.
91   SmallVector<double, 4> effective_mins, effective_maxs;
92   ExpandVerySmallRange(min, max, &effective_mins, &effective_maxs);
93 
94   quant::QuantizedType quantizedEleType;
95   if (min.size() == 1 && max.size() == 1 && quant_dim == -1) {
96     quantizedEleType = quant::fakeQuantAttrsToType(
97         builder.getUnknownLoc(), storage_type_width, effective_mins[0],
98         effective_maxs[0], narrow_range, converter.expressedType, is_signed);
99     if (legacy_float_scale) {
100       quantizedEleType =
101           DownCastScale(quantizedEleType, effective_mins[0], effective_maxs[0],
102                         builder.getUnknownLoc());
103     }
104   } else if (min.size() == max.size()) {
105     auto shape = input_type.dyn_cast<ShapedType>();
106     if (!shape || shape.getRank() <= quant_dim ||
107         static_cast<int64_t>(min.size()) != shape.getDimSize(quant_dim)) {
108       return {};
109     }
110     // TODO(b/141508873): the quantization dim is set to the last dimension.
111     quantizedEleType = quant::fakeQuantAttrsToType(
112         builder.getUnknownLoc(), storage_type_width, quant_dim, effective_mins,
113         effective_maxs, narrow_range, converter.expressedType, is_signed);
114     if (legacy_float_scale) {
115       quantizedEleType = DownCastScale(quantizedEleType, effective_mins,
116                                        effective_maxs, builder.getUnknownLoc());
117     }
118   }
119   if (!quantizedEleType) return {};
120   return converter.convert(quantizedEleType);
121 }
122 
123 // TODO(fengliuai): promote this utility method to mlir QuantOps.
RescaleQuantizedType(Type input,Attribute factor)124 TypeAttr RescaleQuantizedType(Type input, Attribute factor) {
125   auto factor_values = factor.dyn_cast_or_null<DenseFPElementsAttr>();
126   if (!factor_values) return {};
127   auto ele_type = quant::QuantizedType::getQuantizedElementType(input);
128   if (!ele_type) return {};
129   if (auto qtype = ele_type.dyn_cast<quant::UniformQuantizedPerAxisType>()) {
130     ArrayRef<double> scales = qtype.getScales();
131     // Broadcasting hasn't been implemented yet.
132     if (static_cast<int64_t>(scales.size()) != factor_values.getNumElements())
133       return {};
134     SmallVector<double, 4> new_scales;
135     new_scales.reserve(scales.size());
136     auto scales_iter = scales.begin();
137     for (const auto& f : factor_values) {
138       new_scales.push_back(*(scales_iter++) *
139                            std::fabs(FloatAttr::getValueAsDouble(f)));
140     }
141     // We are assuming symmetric quantization.
142     auto new_ele_type = quant::UniformQuantizedPerAxisType::get(
143         qtype.getFlags(), qtype.getStorageType(), qtype.getExpressedType(),
144         new_scales, qtype.getZeroPoints(), qtype.getQuantizedDimension(),
145         qtype.getStorageTypeMin(), qtype.getStorageTypeMax());
146     if (auto new_type = new_ele_type.castFromExpressedType(
147             quant::QuantizedType::castToExpressedType(input))) {
148       return TypeAttr::get(new_type);
149     }
150   }
151   // Currently, we only support per-axis quantized type.
152   return {};
153 }
154 
GetQuantizedTypeAttr(Builder builder,Type input_type,Attribute min,Attribute max,int quant_dim,IntegerAttr num_bits,BoolAttr narrow_range,bool is_signed,bool legacy_float_scale)155 TypeAttr GetQuantizedTypeAttr(Builder builder, Type input_type, Attribute min,
156                               Attribute max, int quant_dim,
157                               IntegerAttr num_bits, BoolAttr narrow_range,
158                               bool is_signed, bool legacy_float_scale) {
159   SmallVector<double, 4> min_value, max_value;
160   auto mins = min.dyn_cast<DenseFPElementsAttr>();
161   auto maxs = max.dyn_cast<DenseFPElementsAttr>();
162   if (mins && maxs) {
163     min_value.reserve(mins.getNumElements());
164     max_value.reserve(maxs.getNumElements());
165     for (auto it = mins.begin(), e = mins.end(); it != e; ++it) {
166       min_value.push_back(FloatAttr::getValueAsDouble(*it));
167     }
168     for (auto it = maxs.begin(), e = maxs.end(); it != e; ++it) {
169       max_value.push_back(FloatAttr::getValueAsDouble(*it));
170     }
171   } else {
172     auto fmin = min.dyn_cast<FloatAttr>();
173     auto fmax = max.dyn_cast<FloatAttr>();
174     if (fmin && fmax) {
175       min_value.push_back(fmin.getValueAsDouble());
176       max_value.push_back(fmax.getValueAsDouble());
177     } else {
178       return {};
179     }
180   }
181   Type final_type = GetQuantizedType(
182       builder, input_type, min_value, max_value, quant_dim, num_bits.getInt(),
183       narrow_range.getValue(), is_signed, legacy_float_scale);
184   if (!final_type) return {};
185   return TypeAttr::get(final_type);
186 }
187 
188 // Repeats the content of `data` multiple times to resize to `target_size`.
189 // Note that this only broadcast across one dimension.
190 template <typename T>
BroadcastVector(int target_size,SmallVectorImpl<T> & data)191 static bool BroadcastVector(int target_size, SmallVectorImpl<T>& data) {
192   int size = data.size();
193   if (size != target_size) {
194     if (target_size % size != 0) return true;
195     data.reserve(target_size);
196     for (int i = 1, e = target_size / size; i != e; ++i) {
197       data.insert(data.end(), data.begin(), data.begin() + size);
198     }
199   }
200   return false;
201 }
202 
203 // Changes the axis of the input per-channel quantized type to match the
204 // dimension of the target type. Returns nullptr if it fails.
ResetAxisAndBroadcast(ArrayRef<int64_t> shape,quant::UniformQuantizedPerAxisType qtype,Type target,int quant_dim)205 static quant::UniformQuantizedPerAxisType ResetAxisAndBroadcast(
206     ArrayRef<int64_t> shape, quant::UniformQuantizedPerAxisType qtype,
207     Type target, int quant_dim) {
208   auto shaped = target.dyn_cast<RankedTensorType>();
209   if (!shaped) return {};
210   ArrayRef<int64_t> new_shape = shaped.getShape();
211 
212   SmallVector<double, 4> scales(qtype.getScales().begin(),
213                                 qtype.getScales().end());
214   SmallVector<int64_t, 4> zero_points(qtype.getZeroPoints().begin(),
215                                       qtype.getZeroPoints().end());
216 
217   if (new_shape.size() == shape.size()) {  // same rank
218     // Broadcast the scales and zero points to match the target size, which is
219     // usually the axis-th dimension of the target type. Currently, it covers
220     // two cases:
221     // - for Transpose, the data layout is changed so the `dim[axis]` still
222     // equals to the `scales_size`. The broadcast skips;
223     // - for Reshape, the data layout isn't changed but the innermost dimension
224     // is expand to cover the last two original dimensions. Thus we just need to
225     // be repeated the `scales` dim[2] times to covers the new dim length.
226     //
227     // TODO(b/141709944): after the fix, the `scales` can be for dim[2], thus we
228     // have to repeat each elements in the `scales` locally dim[3] times.
229     if (BroadcastVector<double>(shaped.getDimSize(quant_dim), scales) ||
230         BroadcastVector<int64_t>(shaped.getDimSize(quant_dim), zero_points)) {
231       return {};
232     }
233   } else if ((new_shape.size() == shape.size() + 1) && new_shape.back() == 1) {
234     // This is a trivial shift left, then we shift the quant_dim as well.
235     if (std::equal(shape.begin(), shape.end(), new_shape.begin()) &&
236         quant_dim == -1) {
237       quant_dim = shape.size() + quant_dim;
238     } else {
239       return {};
240     }
241   } else {
242     return {};
243   }
244 
245   return quant::UniformQuantizedPerAxisType::get(
246       qtype.getFlags(), qtype.getStorageType(), qtype.getExpressedType(),
247       scales, zero_points, quant_dim, qtype.getStorageTypeMin(),
248       qtype.getStorageTypeMax());
249 }
250 
CastQuantizedTypeAttrFromExpressedType(Builder builder,TypeAttr source,Type target,int axis)251 TypeAttr CastQuantizedTypeAttrFromExpressedType(Builder builder,
252                                                 TypeAttr source, Type target,
253                                                 int axis) {
254   auto source_type = source.getValue().dyn_cast_or_null<ShapedType>();
255   if (!source_type) return {};
256   auto src_ele_type = source_type.getElementType();
257   auto qtype = src_ele_type.dyn_cast<quant::QuantizedType>();
258 
259   // Reset the quantization dimensions if it is per-axis.
260   if (auto per_axis =
261           qtype.dyn_cast_or_null<quant::UniformQuantizedPerAxisType>()) {
262     qtype =
263         ResetAxisAndBroadcast(source_type.getShape(), per_axis, target, axis);
264   }
265   if (!qtype) return {};
266   Type final_type = qtype.castFromExpressedType(target);
267   if (!final_type) return {};
268   return TypeAttr::get(final_type);
269 }
270 
ExtractMinMaxFromAttr(DenseFPElementsAttr values,int dim_size,int slice_size,bool symmetric,SmallVectorImpl<double> & mins,SmallVectorImpl<double> & maxs)271 void ExtractMinMaxFromAttr(DenseFPElementsAttr values, int dim_size,
272                            int slice_size, bool symmetric,
273                            SmallVectorImpl<double>& mins,
274                            SmallVectorImpl<double>& maxs) {
275   // If all the element values are same we don't need to scan the content.
276   if (values.isSplat()) {
277     double single_value =
278         FloatAttr::getValueAsDouble(values.getSplatValue<llvm::APFloat>());
279 
280     // When the single value isn't 0.0, we expand it to a range to include
281     // this single value and 0.0. This will give us a scale and zero point
282     // works for both this value and 0.0.
283     if (single_value < 0.0) {
284       mins[0] = single_value;
285       maxs[0] = symmetric ? -single_value : 0.0;
286     } else if (single_value > 0.0) {
287       mins[0] = symmetric ? -single_value : 0.0;
288       maxs[0] = single_value;
289     } else {
290       mins[0] = maxs[0] = single_value;
291     }
292     for (int i = 1; i < dim_size; ++i) {
293       mins[i] = mins[0];
294       maxs[i] = maxs[0];
295     }
296   } else {
297     int64_t flatten_index = 0;
298     for (auto it = values.begin(), e = values.end(); it != e;
299          ++it, ++flatten_index) {
300       double ele_value = FloatAttr::getValueAsDouble(*it);
301       int slice_index = flatten_index / slice_size;
302       int channel_index = slice_index % dim_size;
303       mins[channel_index] = std::min(mins[channel_index], ele_value);
304       maxs[channel_index] = std::max(maxs[channel_index], ele_value);
305     }
306     // Expand range to include 0.
307     for (int i = 0; i < dim_size; ++i) {
308       maxs[i] = std::max(maxs[i], 0.0);
309       mins[i] = std::min(mins[i], 0.0);
310     }
311     if (symmetric) {
312       for (int i = 0; i < dim_size; ++i) {
313         maxs[i] = std::max(std::abs(mins[i]), std::abs(maxs[i]));
314         mins[i] = -maxs[i];
315       }
316     }
317   }
318 }
319 
GetUniformQuantizedTypeForWeight(ElementsAttr attr,bool symmetric,unsigned num_bits,bool is_signed,bool narrow_range,bool legacy_float_scale)320 Type GetUniformQuantizedTypeForWeight(ElementsAttr attr, bool symmetric,
321                                       unsigned num_bits, bool is_signed,
322                                       bool narrow_range,
323                                       bool legacy_float_scale) {
324   Builder builder(attr.getContext());
325   // `symmetric` can only be used when it is `signed` and `narrow_range`.
326   if (symmetric && (!is_signed || !narrow_range)) return {};
327 
328   SmallVector<double, 4> mins(1, std::numeric_limits<double>::max());
329   SmallVector<double, 4> maxs(1, std::numeric_limits<double>::min());
330   auto fp = attr.dyn_cast<DenseFPElementsAttr>();
331   if (!fp) return {};
332 
333   // Computes the effective min/max values of the attribute values.
334   ExtractMinMaxFromAttr(fp, /*dim_size=*/1, /*slice_size=*/1, symmetric, mins,
335                         maxs);
336 
337   auto type = GetQuantizedType(builder, attr.getType(), mins[0], maxs[0],
338                                /*quant_dim=*/-1, num_bits, narrow_range,
339                                is_signed, legacy_float_scale);
340   if (auto ele_type = type.dyn_cast_or_null<TensorType>())
341     return ele_type.getElementType();
342 
343   return {};
344 }
345 
GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr,int quant_dim,bool symmetric,unsigned num_bits,bool is_signed,bool narrow_range,bool legacy_float_scale)346 Type GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr, int quant_dim,
347                                              bool symmetric, unsigned num_bits,
348                                              bool is_signed, bool narrow_range,
349                                              bool legacy_float_scale) {
350   Builder builder(attr.getContext());
351   auto shape = attr.getType().cast<ShapedType>().getShape();
352   if (static_cast<int>(shape.size()) <= quant_dim) return {};
353   // `symmetric` can only be used when it is `signed` and `narrow_range`.
354   if (symmetric && (!is_signed || !narrow_range)) return {};
355 
356   int dim_size = shape[quant_dim];
357   int slice_size = std::accumulate(std::next(shape.begin(), quant_dim + 1),
358                                    shape.end(), 1, std::multiplies<int64_t>());
359   SmallVector<double, 4> mins(dim_size, std::numeric_limits<double>::max());
360   SmallVector<double, 4> maxs(dim_size, std::numeric_limits<double>::min());
361   auto fp = attr.dyn_cast<DenseFPElementsAttr>();
362   if (!fp) return {};
363 
364   // Computes the effective min/max values of the attribute values.
365   ExtractMinMaxFromAttr(fp, dim_size, slice_size, symmetric, mins, maxs);
366 
367   auto type =
368       GetQuantizedType(builder, attr.getType(), mins, maxs, quant_dim, num_bits,
369                        narrow_range, is_signed, legacy_float_scale);
370   if (auto ele_type = type.dyn_cast_or_null<TensorType>())
371     return ele_type.getElementType();
372 
373   return {};
374 }
375 
GetUniformQuantizedTypeForBias(const std::vector<quant::QuantizedType> & op_types,bool legacy_float_scale)376 quant::QuantizedType GetUniformQuantizedTypeForBias(
377     const std::vector<quant::QuantizedType>& op_types,
378     bool legacy_float_scale) {
379   if (op_types.empty()) return {};
380 
381   size_t axis_size = 1;
382   int32_t quant_dim = -1;
383   Type expressed_type;
384   // Requires all the op types are valid UniformQuantizedTypes or
385   // UniformQuantizedPerAxisTypes and also have same expressed type. For all
386   // the UniformQuantizedPerAxisTypes, the quantization dimension index and
387   // dimension sizes are same.
388   for (auto op_type : op_types) {
389     if (!op_type) return {};
390     if (expressed_type && expressed_type != op_type.getExpressedType()) {
391       return {};
392     }
393     expressed_type = op_type.getExpressedType();
394 
395     if (auto type = op_type.dyn_cast<quant::UniformQuantizedPerAxisType>()) {
396       if ((axis_size != 1 && axis_size != type.getScales().size())) return {};
397       if (quant_dim != -1 && quant_dim != type.getQuantizedDimension())
398         return {};
399       axis_size = type.getScales().size();
400       quant_dim = type.getQuantizedDimension();
401     } else if (!op_type.isa<quant::UniformQuantizedType>()) {
402       return {};
403     }
404   }
405 
406   // The scale from the UniformQuantizedTypes is broadcasted if there are
407   // UniformQuantizedPerAxisTypes.
408   llvm::SmallVector<double, 4> scales(axis_size, 1.0);
409   for (auto op_type : op_types) {
410     if (auto type = op_type.dyn_cast<quant::UniformQuantizedPerAxisType>()) {
411       for (auto index_scale : llvm::enumerate(type.getScales())) {
412         scales[index_scale.index()] *= index_scale.value();
413       }
414     } else if (auto type = op_type.dyn_cast<quant::UniformQuantizedType>()) {
415       for (int index = 0, e = axis_size; index != e; ++index) {
416         scales[index] *= type.getScale();
417       }
418     }
419   }
420   if (legacy_float_scale) {
421     for (int i = 0; i < scales.size(); ++i) {
422       scales[i] = static_cast<float>(scales[i]);
423     }
424   }
425 
426   // Builds the result quantized type, which has signed 32 bits storage type.
427   Builder builder(expressed_type.getContext());
428   IntegerType storage_type = builder.getIntegerType(32);
429   int64_t storage_type_min =
430       quant::QuantizedType::getDefaultMinimumForInteger(/*isSigned=*/true, 32);
431   int64_t storage_type_max =
432       quant::QuantizedType::getDefaultMaximumForInteger(/*isSigned=*/true, 32);
433   if (axis_size == 1) {
434     return quant::UniformQuantizedType::getChecked(
435         /*flags=*/true, storage_type, expressed_type, scales[0],
436         /*zeroPoint=*/0, storage_type_min, storage_type_max,
437         builder.getUnknownLoc());
438   } else {
439     llvm::SmallVector<int64_t, 4> zero_points(axis_size, 0);
440     // TODO(b/141508873): Assume the bias is a 1-D tensor, and set the
441     // quantization dim to the last dimension, which is 0. If the bias rank is
442     // larger than 1, this returned quantized type couldn't be used to
443     // quantize the bias.
444     return quant::UniformQuantizedPerAxisType::getChecked(
445         /*flags=*/true, storage_type, expressed_type, scales, zero_points,
446         /*quantizedDimension=*/0, storage_type_min, storage_type_max,
447         builder.getUnknownLoc());
448   }
449 }
450 
QuantizeLegacy(Attribute real_value,Type tensor_type)451 ElementsAttr QuantizeLegacy(Attribute real_value, Type tensor_type) {
452   if (!real_value.isa<DenseFPElementsAttr>() ||
453       !quant::QuantizedType::getQuantizedElementType(tensor_type)) {
454     return {};
455   }
456   auto real_values_attr = real_value.cast<DenseFPElementsAttr>();
457   auto q_type = quant::QuantizedType::getQuantizedElementType(tensor_type);
458   std::vector<float> real_values;
459   llvm::SmallVector<APInt, 8> quantized_attr;
460   real_values.reserve(real_values_attr.getNumElements());
461   quantized_attr.reserve(real_values_attr.getNumElements());
462   std::transform(real_values_attr.begin(), real_values_attr.end(),
463                  std::back_inserter(real_values), [&](APFloat value) -> float {
464                    return value.convertToFloat();
465                  });
466   ShapedType new_dense_type =
467       q_type.castExpressedToStorageType(real_values_attr.getType())
468           .dyn_cast_or_null<ShapedType>();
469   int width = q_type.getStorageType().dyn_cast<mlir::IntegerType>().getWidth();
470 
471   if (width == 8 && q_type.getStorageTypeMax() == 127 &&
472       q_type.getStorageTypeMin() == -127) {
473     std::vector<int8_t> quantized_values(real_values_attr.getNumElements());
474     if (q_type.isa<UniformQuantizedType>()) {
475       float min, max, scale;
476       tflite::tensor_utils::SymmetricQuantizeFloats(
477           real_values.data(), real_values.size(), quantized_values.data(), &min,
478           &max, &scale);
479     } else if (auto uniform_type =
480                    q_type.dyn_cast<UniformQuantizedPerAxisType>()) {
481       std::vector<float> scales_inv;
482       std::vector<int32_t> dimension;
483       dimension.insert(dimension.end(), new_dense_type.getShape().begin(),
484                        new_dense_type.getShape().end());
485       std::transform(uniform_type.getScales().begin(),
486                      uniform_type.getScales().end(),
487                      std::back_inserter(scales_inv),
488                      [](float scale) { return 1.0 / scale; });
489 
490       tflite::optimize::utils::SymmetricPerChannelQuantizeValues(
491           real_values.data(), scales_inv, dimension,
492           uniform_type.getQuantizedDimension(), &quantized_values);
493     } else {
494       return {};
495     }
496     std::transform(quantized_values.begin(), quantized_values.end(),
497                    std::back_inserter(quantized_attr),
498                    [&](int8_t value) -> APInt {
499                      return APInt(8, value, /*isSigned=*/true);
500                    });
501     return DenseElementsAttr::get(new_dense_type, quantized_attr);
502   } else if (width == 16) {
503     if (auto uniform_type = q_type.dyn_cast<UniformQuantizedType>()) {
504       auto quantized_values =
505           tflite::optimize::utils::SymmetricQuantizeFloatsToInt16(
506               real_values.data(), real_values.size(), uniform_type.getScale());
507       std::transform(quantized_values.begin(), quantized_values.end(),
508                      std::back_inserter(quantized_attr),
509                      [&](int16_t value) -> APInt {
510                        return APInt(16, value, /*isSigned=*/true);
511                      });
512       return DenseElementsAttr::get(new_dense_type, quantized_attr);
513     }
514   } else if (width == 32) {
515     std::vector<float> scales;
516     if (auto uniform_type = q_type.dyn_cast<UniformQuantizedType>()) {
517       scales.push_back(uniform_type.getScale());
518     } else if (auto uniform_type =
519                    q_type.dyn_cast<UniformQuantizedPerAxisType>()) {
520       scales.insert(scales.end(), uniform_type.getScales().begin(),
521                     uniform_type.getScales().end());
522     } else {
523       return {};
524     }
525     auto quantized_bias =
526         tflite::optimize::utils::SymmetricBiasQuantize<std::int32_t>(
527             real_values.data(), real_values.size(), scales);
528     std::transform(quantized_bias.begin(), quantized_bias.end(),
529                    std::back_inserter(quantized_attr),
530                    [&](int32_t value) -> APInt {
531                      return APInt(32, value, /*isSigned=*/true);
532                    });
533     return DenseElementsAttr::get(new_dense_type, quantized_attr);
534   }
535   return {};
536 }
537 
Quantize(Attribute real_value,Type tensor_type)538 ElementsAttr Quantize(Attribute real_value, Type tensor_type) {
539   if (auto q_type =
540           quant::QuantizedType::getQuantizedElementType(tensor_type)) {
541     Type converted_type;
542     return quant::quantizeAttr(real_value, q_type, converted_type)
543         .dyn_cast<ElementsAttr>();
544   }
545   return {};
546 }
547 
DownCastScale(QuantizedType type,double min,double max,Location loc)548 QuantizedType DownCastScale(QuantizedType type, double min, double max,
549                             Location loc) {
550   SmallVector<double, 1> mins = {min};
551   SmallVector<double, 1> maxs = {max};
552   return DownCastScale(type, mins, maxs, loc);
553 }
554 
DownCastScale(QuantizedType type,const SmallVectorImpl<double> & mins,const SmallVectorImpl<double> & maxs,Location loc)555 QuantizedType DownCastScale(QuantizedType type,
556                             const SmallVectorImpl<double>& mins,
557                             const SmallVectorImpl<double>& maxs, Location loc) {
558   SmallVector<double, 4> scales(mins.size());
559   for (int i = 0; i < mins.size(); ++i) {
560     scales[i] = (static_cast<float>(maxs[i]) - static_cast<float>(mins[i])) /
561                 (type.getStorageTypeMax() - type.getStorageTypeMin());
562     if (scales[i] < kNearZeroTolerance &&
563         type.getStorageTypeIntegralWidth() == 8) {
564       emitWarning(loc) << "The scale " << scales[i] << " is too small, and "
565                        << "might cause overflow for bias. Forcing to use scale "
566                        << kNearZeroTolerance;
567       scales[i] = kNearZeroTolerance;
568     }
569   }
570   if (auto q_type = type.dyn_cast<UniformQuantizedType>()) {
571     return UniformQuantizedType::get(
572         q_type.getFlags(), q_type.getStorageType(), q_type.getExpressedType(),
573         scales[0], q_type.getZeroPoint(), q_type.getStorageTypeMin(),
574         q_type.getStorageTypeMax());
575   } else if (auto q_type = type.dyn_cast<UniformQuantizedPerAxisType>()) {
576     return UniformQuantizedPerAxisType::get(
577         q_type.getFlags(), q_type.getStorageType(), q_type.getExpressedType(),
578         scales, q_type.getZeroPoints(), q_type.getQuantizedDimension(),
579         q_type.getStorageTypeMin(), q_type.getStorageTypeMax());
580   }
581   return type;
582 }
583 
584 // A heuristic to determine whether the scales needs to be from operands or
585 // from results for the ops with the `SameOperandsAndResultsScale` property.
586 // The current implementation is based on the number of operands.
PreferResultScale(Operation * op)587 static bool PreferResultScale(Operation* op) {
588   int float_operands = 0;
589   for (auto operand : op->getOperands()) {
590     if (auto operand_type = operand.getType().dyn_cast<ShapedType>()) {
591       if (operand_type.getElementType().isa<FloatType>()) {
592         if (++float_operands > 1) return true;
593       }
594     }
595   }
596   return false;
597 }
598 
599 // The stats op of some of the ops can be redundant. The current implementation
600 // only considers the ops with restricted output params.
IsStatsRedundant(Operation * op,OpQuantSpecGetter op_quant_spec_getter)601 static bool IsStatsRedundant(Operation* op,
602                              OpQuantSpecGetter op_quant_spec_getter) {
603   return llvm::isa<FixedOutputRangeInterface>(op);
604 }
605 
RemoveRedundantStatsOps(mlir::FuncOp func,OpQuantSpecGetter op_quant_spec_getter)606 bool RemoveRedundantStatsOps(mlir::FuncOp func,
607                              OpQuantSpecGetter op_quant_spec_getter) {
608   llvm::SmallVector<quant::StatisticsOp, 16> all_stats_ops;
609   llvm::DenseSet<Operation*> redundant_stats_ops;
610 
611   // Step 0: remove the quant::StatisticsOp which are used by the tfl.quantize
612   // op in case it overrides the information from training FakeQuant ops.
613   func.walk([&](quant::QuantizeCastOp q) {
614     auto input_op = q.arg().getDefiningOp();
615     if (auto stats = llvm::dyn_cast_or_null<quant::StatisticsOp>(input_op)) {
616       q.setOperand(stats.arg());
617       if (stats.use_empty()) stats.erase();
618     }
619   });
620 
621   // Step 1: forward pass: propagate any value scales which are not produces
622   // by `SameOperandsAndResultsScale`. Additionally, remove the value scales
623   // which are produced by the ops with the `FixedOutputRangeInterface`.
624   // Note that we don't propagate across the multiple-operands
625   // `SameOperandsAndResultsScale` ops like `concatenation`.
626   func.walk(
627       [&](quant::StatisticsOp stats_op) { all_stats_ops.push_back(stats_op); });
628 
629   while (!all_stats_ops.empty()) {
630     quant::StatisticsOp stats_op = all_stats_ops.back();
631     all_stats_ops.pop_back();
632 
633     if (auto def = stats_op.arg().getDefiningOp()) {
634       if (IsStatsRedundant(def, op_quant_spec_getter)) {
635         redundant_stats_ops.insert(stats_op);
636       }
637     }
638 
639     for (auto user : stats_op.getResult().getUsers()) {
640       // We don't propagate this parameter down if it has multiple operands.
641       // We want to use the result parameter scales instead.
642 
643       if (llvm::dyn_cast<SameScalesOpInterface>(user) &&
644           !PreferResultScale(user)) {
645         for (Value res : user->getResults()) {
646           if (res.hasOneUse()) {
647             if (auto next_stats = llvm::dyn_cast<quant::StatisticsOp>(
648                     *res.getUsers().begin())) {
649               // quantization parameters can be propagated to next_stats
650               redundant_stats_ops.insert(next_stats);
651               // add next_stats to the work list so propagation can
652               // continue.
653               all_stats_ops.push_back(next_stats);
654             }
655           }
656         }
657       }
658     }
659   }
660 
661   // Step 2: backward pass: For the ops skiped in the forward pass, propagate
662   // its results scale backwards as far as possible.
663   func.walk([&](quant::StatisticsOp stats_op) {
664     if (redundant_stats_ops.find(stats_op) == redundant_stats_ops.end()) {
665       all_stats_ops.push_back(stats_op);
666     }
667   });
668 
669   while (!all_stats_ops.empty()) {
670     quant::StatisticsOp stats_op = all_stats_ops.back();
671     all_stats_ops.pop_back();
672 
673     if (auto def = stats_op.arg().getDefiningOp()) {
674       if (llvm::dyn_cast<SameScalesOpInterface>(def)) {
675         for (auto input : def->getOperands()) {
676           if (auto next_stats = llvm::dyn_cast_or_null<quant::StatisticsOp>(
677                   input.getDefiningOp())) {
678             redundant_stats_ops.insert(next_stats);
679             all_stats_ops.push_back(next_stats);
680           }
681         }
682       }
683     }
684   }
685 
686   // Step3: Remove all the redundant stats ops
687   for (auto it : redundant_stats_ops) {
688     if (!llvm::isa<quant::StatisticsOp>(it)) return true;
689     auto stats_op = llvm::cast<quant::StatisticsOp>(it);
690     stats_op.getResult().replaceAllUsesWith(stats_op.arg());
691     stats_op.erase();
692   }
693 
694   // Returns false if the steps finish without errors.
695   return false;
696 }
697 
VerifySameScales(Operation * op)698 LogicalResult VerifySameScales(Operation* op) {
699   auto same_scale_op = llvm::cast<SameScalesOpInterface>(op);
700 
701   llvm::SmallVector<QuantizedType, 4> collected_quant_params;
702   for (auto input : op->getOperands()) {
703     auto quant_params =
704         UniformQuantizedType::getQuantizedElementType(input.getType());
705     // Skip non-quantizable operands.
706     if (quant_params) {
707       collected_quant_params.push_back(quant_params);
708     }
709   }
710 
711   for (auto output : op->getResults()) {
712     auto quant_params =
713         UniformQuantizedType::getQuantizedElementType(output.getType());
714     // Skip non-quantizable results.
715     if (quant_params) {
716       collected_quant_params.push_back(quant_params);
717     }
718   }
719 
720   if (collected_quant_params.size() <= 1) return success();
721   for (int i = 1; i < collected_quant_params.size(); i++) {
722     auto expected_params = collected_quant_params[0];
723     auto compared_paras = collected_quant_params[i];
724     // Same quantization parameters are always ok.
725     if (expected_params == compared_paras) continue;
726     // If the quantization parameters are not the same, as long as it has the
727     // same storage type and the op interface doesn't require same scale
728     // constraint for this storage type, it is still ok.
729     if ((expected_params.isSigned() == compared_paras.isSigned() &&
730          expected_params.getStorageTypeIntegralWidth() ==
731              compared_paras.getStorageTypeIntegralWidth()) &&
732         !same_scale_op.RequiredSameOperandsAndResultsScale(
733             expected_params.isSigned(),
734             expected_params.getStorageTypeIntegralWidth()))
735       continue;
736 
737     std::string err_msg =
738         "quantization parameters violate the same scale constraint: ";
739     llvm::raw_string_ostream os(err_msg);
740     collected_quant_params[0].print(os);
741     os << " vs. ";
742     collected_quant_params[i].print(os);
743     os.flush();
744     return op->emitOpError(err_msg);
745   }
746   return success();
747 }
748 
GetFixedOutputRange(bool is_signed,int bit_width,Type tensor_type,double scale,int64_t zero_point,int64_t storage_min,int64_t storage_max)749 quant::UniformQuantizedType GetFixedOutputRange(bool is_signed, int bit_width,
750                                                 Type tensor_type, double scale,
751                                                 int64_t zero_point,
752                                                 int64_t storage_min,
753                                                 int64_t storage_max) {
754   auto result_type = tensor_type.cast<ShapedType>();
755   if (!result_type.getElementType().isa<FloatType>()) return {};
756   Builder builder(result_type.getContext());
757 
758   // Only support 8-bits
759   if (bit_width != 8) return {};
760   IntegerType storage_type = builder.getIntegerType(bit_width);
761   if (!is_signed) {
762     zero_point += 128;
763     storage_min += 128;
764     storage_max += 128;
765   }
766   return quant::UniformQuantizedType::getChecked(
767       is_signed, storage_type, result_type.getElementType(), scale, zero_point,
768       storage_min, storage_max, builder.getUnknownLoc());
769 }
770 }  // namespace quant
771 }  // namespace mlir
772