1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This transformation pass convert dense tensor to sparse format.
17 
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
21 #include "mlir/IR/Attributes.h"  // from @llvm-project
22 #include "mlir/IR/Builders.h"  // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
24 #include "mlir/Pass/Pass.h"  // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
27 
28 //===----------------------------------------------------------------------===//
29 // The DenseToSparse Pass.
30 //
31 namespace mlir {
32 namespace TFL {
33 
34 namespace {
35 // If sparsity level is below this threadshold, keep the tensor in dense format.
36 const float kMinSparsityLevel = 0.3;
37 // Heuristic to check if a block configuration is correct.
38 const float kBlockOverRandomSparsityRatio = 0.9;
39 
APFloatToEigenHalf(const APFloat & val)40 Eigen::half APFloatToEigenHalf(const APFloat& val) {
41   uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
42   return Eigen::half_impl::raw_uint16_to_half(raw_data);
43 }
44 
EigenHalfToAPFloat(const Eigen::half & val)45 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
46   uint16_t raw_data = val.x;
47   return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
48 }
49 
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)50 void PopulateEncodingParams(const std::vector<int>& block_size,
51                             std::vector<int>* traversal_order,
52                             std::vector<TfLiteDimensionType>* format,
53                             std::vector<int>* b_map, std::vector<int>* b_size) {
54   const int dims_count = block_size.size();
55   traversal_order->resize(dims_count);
56   format->resize(dims_count);
57   for (int i = 0; i < dims_count; i++) {
58     (*traversal_order)[i] = i;
59   }
60   for (int i = 0; i < dims_count - 1; i++) {
61     (*format)[i] = kTfLiteDimDense;
62   }
63   (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
64   *b_map = {};
65   *b_size = {};
66   int block_rank = 0;
67   for (int i = 0; i < dims_count; i++) {
68     if (block_size[i] != 1) {
69       traversal_order->push_back(block_rank + dims_count);
70       format->push_back(kTfLiteDimDense);
71       block_rank++;
72       b_map->push_back(i);
73       b_size->push_back(block_size[i]);
74     }
75   }
76 }
77 
GetSparsity(const int num_zeros,const int num_elements)78 inline float GetSparsity(const int num_zeros, const int num_elements) {
79   return (1.0 * num_zeros / num_elements);
80 }
81 
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)82 float CalculateRandomSparsity(const ElementsAttr& attr,
83                               const ShapedType& type) {
84   int num_elements = type.getNumElements();
85   int num_zeros = 0;
86 
87   if (type.getElementType().isa<FloatType>()) {
88     for (const auto val : attr.getValues<APFloat>()) {
89       if (val.isZero()) {
90         num_zeros++;
91       }
92     }
93   } else if (type.getElementType().isa<quant::QuantizedType>()) {
94     for (const auto val : attr.getValues<int8_t>()) {
95       if (val == 0) {
96         num_zeros++;
97       }
98     }
99   }
100 
101   return GetSparsity(num_zeros, num_elements);
102 }
103 
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)104 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
105                              const std::vector<int>& block_size) {
106   float sparsity = 0;
107   std::vector<int> shape(2);
108   shape[0] = type.getDimSize(0);
109   shape[1] = type.getDimSize(1);
110 
111   std::vector<int> traversal_order = {};
112   std::vector<TfLiteDimensionType> format = {};
113   std::vector<int> b_size = {};
114   std::vector<int> b_map = {};
115   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
116                          &b_size);
117 
118   if (type.getElementType().isF32()) {
119     tflite::optimize::sparsity::FormatConverter<float> format_converter(
120         shape, traversal_order, format, b_size, b_map);
121     std::vector<float> data;
122     data.reserve(type.getNumElements());
123     for (const auto val : attr.getValues<float>()) data.push_back(val);
124     format_converter.DenseToSparse(data.data());
125     sparsity =
126         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
127                     type.getNumElements());
128   } else if (type.getElementType().isF16()) {
129     tflite::optimize::sparsity::FormatConverter<Eigen::half> format_converter(
130         shape, traversal_order, format, b_size, b_map);
131     std::vector<Eigen::half> data;
132     data.reserve(type.getNumElements());
133     for (const auto& val : attr.getValues<APFloat>())
134       data.push_back(APFloatToEigenHalf(val));
135     format_converter.DenseToSparse(data.data());
136     sparsity =
137         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
138                     type.getNumElements());
139   } else if (type.getElementType().isa<quant::QuantizedType>()) {
140     tflite::optimize::sparsity::FormatConverter<int8_t> format_converter(
141         shape, traversal_order, format, b_size, b_map);
142     std::vector<int8_t> data;
143     data.reserve(type.getNumElements());
144     for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
145     format_converter.DenseToSparse(data.data());
146     sparsity =
147         GetSparsity(type.getNumElements() - format_converter.GetData().size(),
148                     type.getNumElements());
149   }
150 
151   return sparsity;
152 }
153 
154 typedef struct InspectResult {
155   // Whether the weight tensor is sparse enough to be compressed.
156   bool can_compress;
157   // If the weight tensor cannot be encoded in a block configuration that the op
158   // supports, a Densify() op will be inserted afterwards to fall back to dense
159   // execution.
160   bool needs_densify;
161   // Among the supported block configs of an op, which got selected to encode
162   // the sparse weight.
163   std::vector<int> selected_block_size;
164 } InspectResult;
165 
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size)166 InspectResult InspectWeight(
167     Operation* inst,
168     const std::vector<std::vector<int>>& supported_block_size) {
169   ElementsAttr attr;
170   ShapedType type;
171   InspectResult result = {};
172   if (auto cst = dyn_cast<ConstOp>(inst)) {
173     attr = cst.value();
174     type = cst.getType().cast<ShapedType>();
175   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
176     attr = cst.value();
177     type = cst.getType().cast<ShapedType>();
178   } else {
179     result.can_compress = false;
180     return result;
181   }
182 
183   // Currently we only support compressing weights of ops:
184   //   Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
185   //   FullyConnected, whose filter has rank 2.
186   if (type.getRank() != 2 && type.getRank() != 4) {
187     result.can_compress = false;
188     return result;
189   }
190 
191   float random_sparsity = CalculateRandomSparsity(attr, type);
192   if (random_sparsity < kMinSparsityLevel) {
193     result.can_compress = false;
194     return result;
195   }
196 
197   result.can_compress = true;
198 
199   float curr_sparsity = 0;
200   std::vector<int> selected_block_size;
201   result.needs_densify = true;
202   for (const auto& block_size : supported_block_size) {
203     curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
204     if (curr_sparsity / random_sparsity > kBlockOverRandomSparsityRatio) {
205       selected_block_size = block_size;
206       result.can_compress = true;
207       result.needs_densify = false;
208       result.selected_block_size = selected_block_size;
209       break;
210     }
211   }
212 
213   return result;
214 }
215 
216 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)217 std::vector<T> BuildSparsityParameterAttribute(
218     const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
219     OpBuilder* builder, SparsityParameterAttr* s_param) {
220   ElementsAttr attr;
221   ShapedType type;
222   if (auto cst = dyn_cast<ConstOp>(inst)) {
223     attr = cst.value();
224     type = cst.getType().cast<ShapedType>();
225   } else if (auto cst = dyn_cast<QConstOp>(inst)) {
226     attr = cst.value();
227     type = cst.getType().cast<ShapedType>();
228   } else {
229     assert(false && "Expected a constant-like op");
230   }
231   const int dims_count = type.getRank();
232   std::vector<int> shape(dims_count);
233   for (int i = 0; i < dims_count; i++) {
234     shape[i] = type.getDimSize(i);
235   }
236 
237   std::vector<int> traversal_order = {};
238   std::vector<TfLiteDimensionType> format = {};
239   std::vector<int> b_size = {};
240   std::vector<int> b_map = {};
241   PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
242                          &b_size);
243 
244   tflite::optimize::sparsity::FormatConverter<T> format_converter(
245       shape, traversal_order, format, b_size, b_map);
246   format_converter.DenseToSparse(dense_buffer);
247   const auto& metadata = format_converter.GetDimMetadata();
248   const auto& compressed_data = format_converter.GetData();
249   const int dim_size = metadata.size() / 2;
250   std::vector<Attribute> dim_metadata(traversal_order.size());
251   for (int i = 0; i < dim_size; i++) {
252     if (format[i] == kTfLiteDimDense) {
253       dim_metadata[i] = DimensionMetadataAttr::get(
254           builder->getStringAttr("DENSE"),
255           builder->getI32IntegerAttr(metadata[2 * i][0]),
256           builder->getArrayAttr({}), builder->getArrayAttr({}),
257           builder->getContext());
258     } else {
259       dim_metadata[i] = DimensionMetadataAttr::get(
260           builder->getStringAttr("SPARSE_CSR"), builder->getI32IntegerAttr(0),
261           builder->getI32ArrayAttr(metadata[2 * i]),
262           builder->getI32ArrayAttr(metadata[2 * i + 1]), builder->getContext());
263     }
264   }
265   *s_param = SparsityParameterAttr::get(
266       builder->getI32ArrayAttr(traversal_order),
267       builder->getI32ArrayAttr(b_map), builder->getArrayAttr(dim_metadata),
268       builder->getContext());
269 
270   return compressed_data;
271 }
272 
273 // This pass encodes sparse weights in the model in the proper format, and adds
274 // Densify() op if necessary. The general algorithm is:
275 //   1. Get list of operands (weights) of an op that can be sparse.
276 //   2. Get list of supported block configurations of the op.
277 //   3. Calculate random sparsity of the weight.
278 //     3.1. If sparsity level is below the encoding threshold, keep in dense.
279 //     3.2. If sparsity level is above the encoding threshold, go to 4.
280 //   4. Try to encode the weight with supported block configurations. If the
281 //      weight was pruned with the same block config, the blocked sparsity level
282 //      should match the random sparsity.
283 //     4.1. Return the matching block config if found.
284 //     4.2. If no matching block config is found, encode the weight with random
285 //          sparsity, and add Densify() op to fall back to dense execution.
286 struct DenseToSparse : public PassWrapper<DenseToSparse, FunctionPass> {
287   void runOnFunction() override;
288 };
289 
runOnFunction()290 void DenseToSparse::runOnFunction() {
291   FuncOp func = getFunction();
292   OpBuilder builder(func);
293 
294   func.walk([&](SparseOpInterface sparse_op) {
295     const auto& sparse_operands = sparse_op.GetSparseOperands();
296     std::vector<std::vector<int>> supported_block_size;
297     for (int operand : sparse_operands) {
298       auto* op = sparse_op.getOperation();
299       auto value = op->getOperand(operand);
300 
301       auto* inst = value.getDefiningOp();
302       if (!inst) {
303         continue;
304       }
305 
306       // There could be a Dequantize op after the weight tensor in cases like
307       // fp16 post-training quantization. We need to get the weight from the
308       // input of the Dequantize op.
309       if (isa<DequantizeOp>(inst)) {
310         op = inst;
311         value = inst->getOperand(0);
312         inst = value.getDefiningOp();
313         if (!inst) {
314           continue;
315         }
316         operand = 0;
317       }
318 
319       ShapedType type;
320       if (isa<ConstOp>(inst)) {
321         supported_block_size = sparse_op.GetFloatBlockSize();
322         type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
323       } else if (isa<QConstOp>(inst)) {
324         supported_block_size = sparse_op.GetQuantizedBlockSize();
325         type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
326       } else {
327         continue;
328       }
329 
330       InspectResult result = InspectWeight(inst, supported_block_size);
331       if (!result.can_compress) {
332         continue;
333       }
334 
335       // The weight is not block sparse. Encode with random sparsity.
336       if (result.selected_block_size.empty()) {
337         result.selected_block_size = std::vector<int>(type.getRank(), 1);
338       }
339 
340       builder.setInsertionPoint(op);
341       SparsityParameterAttr s_param;
342       if (auto cst = dyn_cast<ConstOp>(inst)) {
343         auto attr = cst.value();
344         auto type = cst.getType().cast<ShapedType>();
345         if (type.getElementType().isF32()) {
346           std::vector<float> dense_data;
347           dense_data.reserve(type.getNumElements());
348           for (const auto val : attr.getValues<float>())
349             dense_data.push_back(val);
350           std::vector<float> compressed_data =
351               BuildSparsityParameterAttribute<float>(result.selected_block_size,
352                                                      dense_data.data(), inst,
353                                                      &builder, &s_param);
354           auto compressed_data_type = RankedTensorType::get(
355               {static_cast<int64_t>(compressed_data.size())},
356               builder.getF32Type());
357           auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
358                                                          compressed_data);
359           auto s_const = builder.create<SparseConstOp>(
360               op->getLoc(), cst.value(), s_param, new_value);
361           value.replaceAllUsesWith(s_const.getResult());
362           cst.erase();
363         } else if (type.getElementType().isF16()) {
364           std::vector<Eigen::half> dense_data;
365           dense_data.reserve(type.getNumElements());
366           for (const auto& val : attr.getValues<APFloat>())
367             dense_data.push_back(APFloatToEigenHalf(val));
368           std::vector<Eigen::half> compressed_data =
369               BuildSparsityParameterAttribute<Eigen::half>(
370                   result.selected_block_size, dense_data.data(), inst, &builder,
371                   &s_param);
372           std::vector<APFloat> apfloat_data;
373           apfloat_data.reserve(type.getNumElements());
374           for (const auto& val : compressed_data)
375             apfloat_data.push_back(EigenHalfToAPFloat(val));
376           auto compressed_data_type = RankedTensorType::get(
377               {static_cast<int64_t>(compressed_data.size())},
378               type.getElementType());
379           auto new_value =
380               DenseElementsAttr::get(compressed_data_type, apfloat_data);
381           auto s_const = builder.create<SparseConstOp>(
382               op->getLoc(), cst.value(), s_param, new_value);
383           value.replaceAllUsesWith(s_const.getResult());
384           cst.erase();
385         }
386       } else if (auto cst = dyn_cast<QConstOp>(inst)) {
387         auto attr = cst.value();
388         auto type = cst.getType().cast<ShapedType>();
389         std::vector<int8_t> dense_data;
390         dense_data.reserve(type.getNumElements());
391         for (const auto& val : attr.getValues<int8_t>())
392           dense_data.push_back(val);
393         std::vector<int8_t> compressed_data =
394             BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
395                                                     dense_data.data(), inst,
396                                                     &builder, &s_param);
397         auto compressed_data_type = RankedTensorType::get(
398             {static_cast<int64_t>(compressed_data.size())},
399             builder.getIntegerType(8, true));
400         auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
401                                                         compressed_data);
402         auto s_qconst = builder.create<SparseQConstOp>(
403             op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
404         value.replaceAllUsesWith(s_qconst.getResult());
405         cst.erase();
406       }
407 
408       if (result.needs_densify) {
409         const auto value = op->getOperand(operand);
410         auto densify =
411             builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
412         value.replaceAllUsesWith(densify);
413         densify.setOperand(value);
414       }
415     }
416   });
417 }
418 
419 }  // namespace
420 
421 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()422 std::unique_ptr<OperationPass<FuncOp>> CreateDenseToSparsePass() {
423   return absl::make_unique<DenseToSparse>();
424 }
425 
426 static PassRegistration<DenseToSparse> pass(
427     "tfl-dense-to-sparse", "Convert dense tensor to sparse format.");
428 
429 }  // namespace TFL
430 }  // namespace mlir
431