1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 // This transformation pass convert dense tensor to sparse format.
17
18 #include "absl/memory/memory.h"
19 #include "third_party/eigen3/Eigen/Core"
20 #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
21 #include "mlir/IR/Attributes.h" // from @llvm-project
22 #include "mlir/IR/Builders.h" // from @llvm-project
23 #include "mlir/IR/BuiltinTypes.h" // from @llvm-project
24 #include "mlir/Pass/Pass.h" // from @llvm-project
25 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
26 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
27
28 //===----------------------------------------------------------------------===//
29 // The DenseToSparse Pass.
30 //
31 namespace mlir {
32 namespace TFL {
33
34 namespace {
35 // If sparsity level is below this threadshold, keep the tensor in dense format.
36 const float kMinSparsityLevel = 0.3;
37 // Heuristic to check if a block configuration is correct.
38 const float kBlockOverRandomSparsityRatio = 0.9;
39
APFloatToEigenHalf(const APFloat & val)40 Eigen::half APFloatToEigenHalf(const APFloat& val) {
41 uint16_t raw_data = val.bitcastToAPInt().getZExtValue();
42 return Eigen::half_impl::raw_uint16_to_half(raw_data);
43 }
44
EigenHalfToAPFloat(const Eigen::half & val)45 APFloat EigenHalfToAPFloat(const Eigen::half& val) {
46 uint16_t raw_data = val.x;
47 return APFloat(APFloat::IEEEhalf(), APInt(16, raw_data));
48 }
49
PopulateEncodingParams(const std::vector<int> & block_size,std::vector<int> * traversal_order,std::vector<TfLiteDimensionType> * format,std::vector<int> * b_map,std::vector<int> * b_size)50 void PopulateEncodingParams(const std::vector<int>& block_size,
51 std::vector<int>* traversal_order,
52 std::vector<TfLiteDimensionType>* format,
53 std::vector<int>* b_map, std::vector<int>* b_size) {
54 const int dims_count = block_size.size();
55 traversal_order->resize(dims_count);
56 format->resize(dims_count);
57 for (int i = 0; i < dims_count; i++) {
58 (*traversal_order)[i] = i;
59 }
60 for (int i = 0; i < dims_count - 1; i++) {
61 (*format)[i] = kTfLiteDimDense;
62 }
63 (*format)[dims_count - 1] = kTfLiteDimSparseCSR;
64 *b_map = {};
65 *b_size = {};
66 int block_rank = 0;
67 for (int i = 0; i < dims_count; i++) {
68 if (block_size[i] != 1) {
69 traversal_order->push_back(block_rank + dims_count);
70 format->push_back(kTfLiteDimDense);
71 block_rank++;
72 b_map->push_back(i);
73 b_size->push_back(block_size[i]);
74 }
75 }
76 }
77
GetSparsity(const int num_zeros,const int num_elements)78 inline float GetSparsity(const int num_zeros, const int num_elements) {
79 return (1.0 * num_zeros / num_elements);
80 }
81
CalculateRandomSparsity(const ElementsAttr & attr,const ShapedType & type)82 float CalculateRandomSparsity(const ElementsAttr& attr,
83 const ShapedType& type) {
84 int num_elements = type.getNumElements();
85 int num_zeros = 0;
86
87 if (type.getElementType().isa<FloatType>()) {
88 for (const auto val : attr.getValues<APFloat>()) {
89 if (val.isZero()) {
90 num_zeros++;
91 }
92 }
93 } else if (type.getElementType().isa<quant::QuantizedType>()) {
94 for (const auto val : attr.getValues<int8_t>()) {
95 if (val == 0) {
96 num_zeros++;
97 }
98 }
99 }
100
101 return GetSparsity(num_zeros, num_elements);
102 }
103
CalculateBlockSparsity(const ElementsAttr & attr,const ShapedType & type,const std::vector<int> & block_size)104 float CalculateBlockSparsity(const ElementsAttr& attr, const ShapedType& type,
105 const std::vector<int>& block_size) {
106 float sparsity = 0;
107 std::vector<int> shape(2);
108 shape[0] = type.getDimSize(0);
109 shape[1] = type.getDimSize(1);
110
111 std::vector<int> traversal_order = {};
112 std::vector<TfLiteDimensionType> format = {};
113 std::vector<int> b_size = {};
114 std::vector<int> b_map = {};
115 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
116 &b_size);
117
118 if (type.getElementType().isF32()) {
119 tflite::optimize::sparsity::FormatConverter<float> format_converter(
120 shape, traversal_order, format, b_size, b_map);
121 std::vector<float> data;
122 data.reserve(type.getNumElements());
123 for (const auto val : attr.getValues<float>()) data.push_back(val);
124 format_converter.DenseToSparse(data.data());
125 sparsity =
126 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
127 type.getNumElements());
128 } else if (type.getElementType().isF16()) {
129 tflite::optimize::sparsity::FormatConverter<Eigen::half> format_converter(
130 shape, traversal_order, format, b_size, b_map);
131 std::vector<Eigen::half> data;
132 data.reserve(type.getNumElements());
133 for (const auto& val : attr.getValues<APFloat>())
134 data.push_back(APFloatToEigenHalf(val));
135 format_converter.DenseToSparse(data.data());
136 sparsity =
137 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
138 type.getNumElements());
139 } else if (type.getElementType().isa<quant::QuantizedType>()) {
140 tflite::optimize::sparsity::FormatConverter<int8_t> format_converter(
141 shape, traversal_order, format, b_size, b_map);
142 std::vector<int8_t> data;
143 data.reserve(type.getNumElements());
144 for (const auto val : attr.getValues<int8_t>()) data.push_back(val);
145 format_converter.DenseToSparse(data.data());
146 sparsity =
147 GetSparsity(type.getNumElements() - format_converter.GetData().size(),
148 type.getNumElements());
149 }
150
151 return sparsity;
152 }
153
154 typedef struct InspectResult {
155 // Whether the weight tensor is sparse enough to be compressed.
156 bool can_compress;
157 // If the weight tensor cannot be encoded in a block configuration that the op
158 // supports, a Densify() op will be inserted afterwards to fall back to dense
159 // execution.
160 bool needs_densify;
161 // Among the supported block configs of an op, which got selected to encode
162 // the sparse weight.
163 std::vector<int> selected_block_size;
164 } InspectResult;
165
InspectWeight(Operation * inst,const std::vector<std::vector<int>> & supported_block_size)166 InspectResult InspectWeight(
167 Operation* inst,
168 const std::vector<std::vector<int>>& supported_block_size) {
169 ElementsAttr attr;
170 ShapedType type;
171 InspectResult result = {};
172 if (auto cst = dyn_cast<ConstOp>(inst)) {
173 attr = cst.value();
174 type = cst.getType().cast<ShapedType>();
175 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
176 attr = cst.value();
177 type = cst.getType().cast<ShapedType>();
178 } else {
179 result.can_compress = false;
180 return result;
181 }
182
183 // Currently we only support compressing weights of ops:
184 // Conv, DepthwiseConv, TransposeConv, whose filter has rank 4, and
185 // FullyConnected, whose filter has rank 2.
186 if (type.getRank() != 2 && type.getRank() != 4) {
187 result.can_compress = false;
188 return result;
189 }
190
191 float random_sparsity = CalculateRandomSparsity(attr, type);
192 if (random_sparsity < kMinSparsityLevel) {
193 result.can_compress = false;
194 return result;
195 }
196
197 result.can_compress = true;
198
199 float curr_sparsity = 0;
200 std::vector<int> selected_block_size;
201 result.needs_densify = true;
202 for (const auto& block_size : supported_block_size) {
203 curr_sparsity = CalculateBlockSparsity(attr, type, block_size);
204 if (curr_sparsity / random_sparsity > kBlockOverRandomSparsityRatio) {
205 selected_block_size = block_size;
206 result.can_compress = true;
207 result.needs_densify = false;
208 result.selected_block_size = selected_block_size;
209 break;
210 }
211 }
212
213 return result;
214 }
215
216 template <typename T>
BuildSparsityParameterAttribute(const std::vector<int> & block_size,const T * dense_buffer,Operation * inst,OpBuilder * builder,SparsityParameterAttr * s_param)217 std::vector<T> BuildSparsityParameterAttribute(
218 const std::vector<int>& block_size, const T* dense_buffer, Operation* inst,
219 OpBuilder* builder, SparsityParameterAttr* s_param) {
220 ElementsAttr attr;
221 ShapedType type;
222 if (auto cst = dyn_cast<ConstOp>(inst)) {
223 attr = cst.value();
224 type = cst.getType().cast<ShapedType>();
225 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
226 attr = cst.value();
227 type = cst.getType().cast<ShapedType>();
228 } else {
229 assert(false && "Expected a constant-like op");
230 }
231 const int dims_count = type.getRank();
232 std::vector<int> shape(dims_count);
233 for (int i = 0; i < dims_count; i++) {
234 shape[i] = type.getDimSize(i);
235 }
236
237 std::vector<int> traversal_order = {};
238 std::vector<TfLiteDimensionType> format = {};
239 std::vector<int> b_size = {};
240 std::vector<int> b_map = {};
241 PopulateEncodingParams(block_size, &traversal_order, &format, &b_map,
242 &b_size);
243
244 tflite::optimize::sparsity::FormatConverter<T> format_converter(
245 shape, traversal_order, format, b_size, b_map);
246 format_converter.DenseToSparse(dense_buffer);
247 const auto& metadata = format_converter.GetDimMetadata();
248 const auto& compressed_data = format_converter.GetData();
249 const int dim_size = metadata.size() / 2;
250 std::vector<Attribute> dim_metadata(traversal_order.size());
251 for (int i = 0; i < dim_size; i++) {
252 if (format[i] == kTfLiteDimDense) {
253 dim_metadata[i] = DimensionMetadataAttr::get(
254 builder->getStringAttr("DENSE"),
255 builder->getI32IntegerAttr(metadata[2 * i][0]),
256 builder->getArrayAttr({}), builder->getArrayAttr({}),
257 builder->getContext());
258 } else {
259 dim_metadata[i] = DimensionMetadataAttr::get(
260 builder->getStringAttr("SPARSE_CSR"), builder->getI32IntegerAttr(0),
261 builder->getI32ArrayAttr(metadata[2 * i]),
262 builder->getI32ArrayAttr(metadata[2 * i + 1]), builder->getContext());
263 }
264 }
265 *s_param = SparsityParameterAttr::get(
266 builder->getI32ArrayAttr(traversal_order),
267 builder->getI32ArrayAttr(b_map), builder->getArrayAttr(dim_metadata),
268 builder->getContext());
269
270 return compressed_data;
271 }
272
273 // This pass encodes sparse weights in the model in the proper format, and adds
274 // Densify() op if necessary. The general algorithm is:
275 // 1. Get list of operands (weights) of an op that can be sparse.
276 // 2. Get list of supported block configurations of the op.
277 // 3. Calculate random sparsity of the weight.
278 // 3.1. If sparsity level is below the encoding threshold, keep in dense.
279 // 3.2. If sparsity level is above the encoding threshold, go to 4.
280 // 4. Try to encode the weight with supported block configurations. If the
281 // weight was pruned with the same block config, the blocked sparsity level
282 // should match the random sparsity.
283 // 4.1. Return the matching block config if found.
284 // 4.2. If no matching block config is found, encode the weight with random
285 // sparsity, and add Densify() op to fall back to dense execution.
286 struct DenseToSparse : public PassWrapper<DenseToSparse, FunctionPass> {
287 void runOnFunction() override;
288 };
289
runOnFunction()290 void DenseToSparse::runOnFunction() {
291 FuncOp func = getFunction();
292 OpBuilder builder(func);
293
294 func.walk([&](SparseOpInterface sparse_op) {
295 const auto& sparse_operands = sparse_op.GetSparseOperands();
296 std::vector<std::vector<int>> supported_block_size;
297 for (int operand : sparse_operands) {
298 auto* op = sparse_op.getOperation();
299 auto value = op->getOperand(operand);
300
301 auto* inst = value.getDefiningOp();
302 if (!inst) {
303 continue;
304 }
305
306 // There could be a Dequantize op after the weight tensor in cases like
307 // fp16 post-training quantization. We need to get the weight from the
308 // input of the Dequantize op.
309 if (isa<DequantizeOp>(inst)) {
310 op = inst;
311 value = inst->getOperand(0);
312 inst = value.getDefiningOp();
313 if (!inst) {
314 continue;
315 }
316 operand = 0;
317 }
318
319 ShapedType type;
320 if (isa<ConstOp>(inst)) {
321 supported_block_size = sparse_op.GetFloatBlockSize();
322 type = dyn_cast<ConstOp>(inst).getType().cast<ShapedType>();
323 } else if (isa<QConstOp>(inst)) {
324 supported_block_size = sparse_op.GetQuantizedBlockSize();
325 type = dyn_cast<QConstOp>(inst).getType().cast<ShapedType>();
326 } else {
327 continue;
328 }
329
330 InspectResult result = InspectWeight(inst, supported_block_size);
331 if (!result.can_compress) {
332 continue;
333 }
334
335 // The weight is not block sparse. Encode with random sparsity.
336 if (result.selected_block_size.empty()) {
337 result.selected_block_size = std::vector<int>(type.getRank(), 1);
338 }
339
340 builder.setInsertionPoint(op);
341 SparsityParameterAttr s_param;
342 if (auto cst = dyn_cast<ConstOp>(inst)) {
343 auto attr = cst.value();
344 auto type = cst.getType().cast<ShapedType>();
345 if (type.getElementType().isF32()) {
346 std::vector<float> dense_data;
347 dense_data.reserve(type.getNumElements());
348 for (const auto val : attr.getValues<float>())
349 dense_data.push_back(val);
350 std::vector<float> compressed_data =
351 BuildSparsityParameterAttribute<float>(result.selected_block_size,
352 dense_data.data(), inst,
353 &builder, &s_param);
354 auto compressed_data_type = RankedTensorType::get(
355 {static_cast<int64_t>(compressed_data.size())},
356 builder.getF32Type());
357 auto new_value = DenseElementsAttr::get<float>(compressed_data_type,
358 compressed_data);
359 auto s_const = builder.create<SparseConstOp>(
360 op->getLoc(), cst.value(), s_param, new_value);
361 value.replaceAllUsesWith(s_const.getResult());
362 cst.erase();
363 } else if (type.getElementType().isF16()) {
364 std::vector<Eigen::half> dense_data;
365 dense_data.reserve(type.getNumElements());
366 for (const auto& val : attr.getValues<APFloat>())
367 dense_data.push_back(APFloatToEigenHalf(val));
368 std::vector<Eigen::half> compressed_data =
369 BuildSparsityParameterAttribute<Eigen::half>(
370 result.selected_block_size, dense_data.data(), inst, &builder,
371 &s_param);
372 std::vector<APFloat> apfloat_data;
373 apfloat_data.reserve(type.getNumElements());
374 for (const auto& val : compressed_data)
375 apfloat_data.push_back(EigenHalfToAPFloat(val));
376 auto compressed_data_type = RankedTensorType::get(
377 {static_cast<int64_t>(compressed_data.size())},
378 type.getElementType());
379 auto new_value =
380 DenseElementsAttr::get(compressed_data_type, apfloat_data);
381 auto s_const = builder.create<SparseConstOp>(
382 op->getLoc(), cst.value(), s_param, new_value);
383 value.replaceAllUsesWith(s_const.getResult());
384 cst.erase();
385 }
386 } else if (auto cst = dyn_cast<QConstOp>(inst)) {
387 auto attr = cst.value();
388 auto type = cst.getType().cast<ShapedType>();
389 std::vector<int8_t> dense_data;
390 dense_data.reserve(type.getNumElements());
391 for (const auto& val : attr.getValues<int8_t>())
392 dense_data.push_back(val);
393 std::vector<int8_t> compressed_data =
394 BuildSparsityParameterAttribute<int8_t>(result.selected_block_size,
395 dense_data.data(), inst,
396 &builder, &s_param);
397 auto compressed_data_type = RankedTensorType::get(
398 {static_cast<int64_t>(compressed_data.size())},
399 builder.getIntegerType(8, true));
400 auto new_value = DenseElementsAttr::get<int8_t>(compressed_data_type,
401 compressed_data);
402 auto s_qconst = builder.create<SparseQConstOp>(
403 op->getLoc(), cst.qtypeAttr(), cst.value(), s_param, new_value);
404 value.replaceAllUsesWith(s_qconst.getResult());
405 cst.erase();
406 }
407
408 if (result.needs_densify) {
409 const auto value = op->getOperand(operand);
410 auto densify =
411 builder.create<DensifyOp>(op->getLoc(), value.getType(), value);
412 value.replaceAllUsesWith(densify);
413 densify.setOperand(value);
414 }
415 }
416 });
417 }
418
419 } // namespace
420
421 // Creates an instance of the TensorFlow Lite dialect DenseToSparse pass.
CreateDenseToSparsePass()422 std::unique_ptr<OperationPass<FuncOp>> CreateDenseToSparsePass() {
423 return absl::make_unique<DenseToSparse>();
424 }
425
426 static PassRegistration<DenseToSparse> pass(
427 "tfl-dense-to-sparse", "Convert dense tensor to sparse format.");
428
429 } // namespace TFL
430 } // namespace mlir
431