1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/huffman_table.h"
6 
7 #include <algorithm>
8 #include <vector>
9 
10 #include "puffin/src/logging.h"
11 
12 using std::string;
13 using std::vector;
14 
15 namespace puffin {
16 
17 // Permutations of input Huffman code lengths (used only to read code lengths
18 // necessary for reading Huffman table.)
19 const uint8_t kPermutations[19] = {16, 17, 18, 0, 8,  7, 9,  6, 10, 5,
20                                    11, 4,  12, 3, 13, 2, 14, 1, 15};
21 
22 // The bases of each alphabet which is added to the integer value of extra
23 // bits that comes after the Huffman code in the input to create the given
24 // length value. The last element is a guard.
25 const uint16_t kLengthBases[30] = {
26     3,  4,  5,  6,  7,  8,  9,  10, 11,  13,  15,  17,  19,  23,  27,
27     31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF};
28 
29 // Number of extra bits that comes after the associating Huffman code.
30 const uint8_t kLengthExtraBits[29] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
31                                       1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
32                                       4, 4, 4, 4, 5, 5, 5, 5, 0};
33 
34 // Same as |kLengthBases| but for the distances instead of lengths. The last
35 // element is a guard.
36 const uint16_t kDistanceBases[31] = {
37     1,    2,    3,    4,    5,    7,     9,     13,    17,    25,   33,
38     49,   65,   97,   129,  193,  257,   385,   513,   769,   1025, 1537,
39     2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF};
40 
41 // Same as |kLengthExtraBits| but for distances instead of lengths.
42 const uint8_t kDistanceExtraBits[30] = {0, 0, 0,  0,  1,  1,  2,  2,  3,  3,
43                                         4, 4, 5,  5,  6,  6,  7,  7,  8,  8,
44                                         9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
45 
46 // 288 is the maximum number of needed huffman codes for an alphabet. Fixed
47 // huffman table needs 288 and dynamic huffman table needs maximum 286.
48 // 286 = 256 (coding a byte) +
49 //         1 (coding the end of block symbole) +
50 //        29 (coding the lengths)
HuffmanTable()51 HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {}
52 
InitHuffmanCodes(const Buffer & lens,size_t * max_bits)53 bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) {
54   // Temporary buffers used in |InitHuffmanCodes|.
55   uint16_t len_count_[kMaxHuffmanBits + 1] = {0};
56   uint16_t next_code_[kMaxHuffmanBits + 1] = {0};
57 
58   // 1. Count the number of codes for each length;
59   for (auto len : lens) {
60     len_count_[len]++;
61   }
62 
63   for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) {
64     if (len_count_[*max_bits] != 0) {
65       break;
66     }
67   }
68 
69   // Check for oversubscribed code lengths. (A code with length 'L' cannot have
70   // more than 2^L items.
71   for (size_t idx = 1; idx <= *max_bits; idx++) {
72     if (len_count_[idx] > (1 << idx)) {
73       LOG(ERROR) << "Oversubscribed code lengths error!";
74       return false;
75     }
76   }
77 
78   // 2. Compute the coding of the first element for each length.
79   uint16_t code = 0;
80   len_count_[0] = 0;
81   for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) {
82     code = (code + len_count_[bits - 1]) << 1;
83     next_code_[bits] = code;
84   }
85 
86   codeindexpairs_.clear();
87   // 3. Calculate all the code values.
88   for (size_t idx = 0; idx < lens.size(); idx++) {
89     auto len = lens[idx];
90     if (len == 0) {
91       continue;
92     }
93 
94     // Reverse the code
95     CodeIndexPair cip;
96     cip.code = 0;
97     auto tmp_code = next_code_[len];
98     for (size_t r = 0; r < len; r++) {
99       cip.code <<= 1;
100       cip.code |= tmp_code & 1U;
101       tmp_code >>= 1;
102     }
103     cip.index = idx;
104     codeindexpairs_.push_back(cip);
105     next_code_[len]++;
106   }
107   return true;
108 }
109 
BuildHuffmanCodes(const Buffer & lens,vector<uint16_t> * hcodes,size_t * max_bits)110 bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens,
111                                      vector<uint16_t>* hcodes,
112                                      size_t* max_bits) {
113   TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
114   // Sort descending based on the bit-length of the code.
115   std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
116             [&lens](const CodeIndexPair& a, const CodeIndexPair& b) {
117               return lens[a.index] > lens[b.index];
118             });
119 
120   // Only zero out the part of hcodes which is valuable.
121   memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t));
122   for (const auto& cip : codeindexpairs_) {
123     // The MSB bit of the code in hcodes is set if it is a valid code and its
124     // code exists in the input Huffman table.
125     (*hcodes)[cip.code] = cip.index | 0x8000;
126     auto fill_bits = *max_bits - lens[cip.index];
127     for (auto idx = 1; idx < (1 << fill_bits); idx++) {
128       auto location = (idx << lens[cip.index]) | cip.code;
129       if (!((*hcodes)[location] & 0x8000)) {
130         (*hcodes)[location] = cip.index | 0x8000;
131       }
132     }
133   }
134   return true;
135 }
136 
BuildHuffmanReverseCodes(const Buffer & lens,vector<uint16_t> * rcodes,size_t * max_bits)137 bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens,
138                                             vector<uint16_t>* rcodes,
139                                             size_t* max_bits) {
140   TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits));
141   // Sort ascending based on the index.
142   std::sort(codeindexpairs_.begin(), codeindexpairs_.end(),
143             [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool {
144               return a.index < b.index;
145             });
146 
147   size_t index = 0;
148   for (size_t idx = 0; idx < rcodes->size(); idx++) {
149     if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) {
150       (*rcodes)[idx] = codeindexpairs_[index].code;
151       index++;
152     } else {
153       (*rcodes)[idx] = 0;
154     }
155   }
156   return true;
157 }
158 
BuildFixedHuffmanTable()159 bool HuffmanTable::BuildFixedHuffmanTable() {
160   if (!initialized_) {
161     // For all the vectors used in this class, we set the size in the
162     // constructor and we do not change the size later. This is for optimization
163     // purposes. The total size of data in this class is approximately
164     // 2KB. Because it is a constructor return values cannot be checked.
165     lit_len_lens_.resize(288);
166     lit_len_rcodes_.resize(288);
167     lit_len_hcodes_.resize(1 << 9);
168 
169     distance_lens_.resize(30);
170     distance_rcodes_.resize(30);
171     distance_hcodes_.resize(1 << 5);
172 
173     size_t i = 0;
174     while (i < 144) {
175       lit_len_lens_[i++] = 8;
176     }
177     while (i < 256) {
178       lit_len_lens_[i++] = 9;
179     }
180     while (i < 280) {
181       lit_len_lens_[i++] = 7;
182     }
183     while (i < 288) {
184       lit_len_lens_[i++] = 8;
185     }
186 
187     i = 0;
188     while (i < 30) {
189       distance_lens_[i++] = 5;
190     }
191 
192     TEST_AND_RETURN_FALSE(
193         BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
194 
195     TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
196                                             &distance_max_bits_));
197 
198     TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
199         lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
200 
201     TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
202         distance_lens_, &distance_rcodes_, &distance_max_bits_));
203 
204     initialized_ = true;
205   }
206   return true;
207 }
208 
BuildDynamicHuffmanTable(BitReaderInterface * br,uint8_t * buffer,size_t * length)209 bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br,
210                                             uint8_t* buffer,
211                                             size_t* length) {
212   // Initilize only once and reuse.
213   if (!initialized_) {
214     // Only resizing the arrays needed.
215     code_lens_.resize(19);
216     code_hcodes_.resize(1 << 7);
217 
218     lit_len_lens_.resize(286);
219     lit_len_hcodes_.resize(1 << 15);
220 
221     distance_lens_.resize(30);
222     distance_hcodes_.resize(1 << 15);
223 
224     // 286: Maximum number of literal/lengths symbols.
225     // 30: Maximum number of distance symbols.
226     // The reason we reserve this to the sum of both maximum sizes is that we
227     // need to calculate both huffman codes contiguously. See b/72815313.
228     tmp_lens_.resize(286 + 30);
229     initialized_ = true;
230   }
231 
232   // Read the header. Reads the first portion of the Huffman data from input and
233   // writes it into the puff |buffer|. The first portion includes the size
234   // (|num_lit_len|) of the literals/lengths Huffman code length array
235   // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman
236   // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of
237   // Huffman code length array (dynamic_code_lens_) for reading
238   // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by
239   // reading |dynamic_code_lens_|.
240 
241   TEST_AND_RETURN_FALSE(*length >= 3);
242   size_t index = 0;
243   TEST_AND_RETURN_FALSE(br->CacheBits(14));
244   buffer[index++] = br->ReadBits(5);  // HLIST
245   auto num_lit_len = br->ReadBits(5) + 257;
246   br->DropBits(5);
247 
248   buffer[index++] = br->ReadBits(5);  // HDIST
249   auto num_distance = br->ReadBits(5) + 1;
250   br->DropBits(5);
251 
252   buffer[index++] = br->ReadBits(4);  // HCLEN
253   auto num_codes = br->ReadBits(4) + 4;
254   br->DropBits(4);
255 
256   TEST_AND_RETURN_FALSE(
257       CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
258 
259   bool checked = false;
260   size_t idx = 0;
261   TEST_AND_RETURN_FALSE(*length - index >= (num_codes + 1) / 2);
262   // Two codes per byte
263   for (; idx < num_codes; idx++) {
264     TEST_AND_RETURN_FALSE(br->CacheBits(3));
265     code_lens_[kPermutations[idx]] = br->ReadBits(3);
266     if (checked) {
267       buffer[index++] |= br->ReadBits(3);
268     } else {
269       buffer[index] = br->ReadBits(3) << 4;
270     }
271     checked = !checked;
272     br->DropBits(3);
273   }
274   // Pad the last byte if odd number of codes.
275   if (checked) {
276     index++;
277   }
278   for (; idx < 19; idx++) {
279     code_lens_[kPermutations[idx]] = 0;
280   }
281 
282   TEST_AND_RETURN_FALSE(
283       BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_));
284 
285   // Build literals/lengths and distance Huffman code length arrays.
286   auto bytes_available = (*length - index);
287   tmp_lens_.clear();
288   TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths(
289       br, buffer + index, &bytes_available, code_max_bits_,
290       num_lit_len + num_distance, &tmp_lens_));
291   index += bytes_available;
292 
293   // TODO(ahassani): Optimize this so the memcpy is not needed anymore.
294   lit_len_lens_.clear();
295   lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
296                        tmp_lens_.begin() + num_lit_len);
297 
298   distance_lens_.clear();
299   distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
300                         tmp_lens_.end());
301 
302   TEST_AND_RETURN_FALSE(
303       BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_));
304 
305   // Build distance Huffman codes.
306   TEST_AND_RETURN_FALSE(BuildHuffmanCodes(distance_lens_, &distance_hcodes_,
307                                           &distance_max_bits_));
308 
309   *length = index;
310   return true;
311 }
312 
BuildHuffmanCodeLengths(BitReaderInterface * br,uint8_t * buffer,size_t * length,size_t max_bits,size_t num_codes,Buffer * lens)313 bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br,
314                                            uint8_t* buffer,
315                                            size_t* length,
316                                            size_t max_bits,
317                                            size_t num_codes,
318                                            Buffer* lens) {
319   size_t index = 0;
320   lens->clear();
321   for (size_t idx = 0; idx < num_codes;) {
322     TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
323     auto bits = br->ReadBits(max_bits);
324     uint16_t code;
325     size_t nbits;
326     TEST_AND_RETURN_FALSE(CodeAlphabet(bits, &code, &nbits));
327     TEST_AND_RETURN_FALSE(index < *length);
328     br->DropBits(nbits);
329     if (code < 16) {
330       buffer[index++] = code;
331       lens->push_back(code);
332       idx++;
333     } else {
334       TEST_AND_RETURN_FALSE(code < 19);
335       size_t copy_num = 0;
336       uint8_t copy_val;
337       switch (code) {
338         case 16:
339           TEST_AND_RETURN_FALSE(idx != 0);
340           TEST_AND_RETURN_FALSE(br->CacheBits(2));
341           copy_num = 3 + br->ReadBits(2);
342           buffer[index++] = 16 + br->ReadBits(2);  // 3 - 6 times
343           copy_val = (*lens)[idx - 1];
344           br->DropBits(2);
345           break;
346 
347         case 17:
348           TEST_AND_RETURN_FALSE(br->CacheBits(3));
349           copy_num = 3 + br->ReadBits(3);
350           buffer[index++] = 20 + br->ReadBits(3);  // 3 - 10 times
351           copy_val = 0;
352           br->DropBits(3);
353           break;
354 
355         case 18:
356           TEST_AND_RETURN_FALSE(br->CacheBits(7));
357           copy_num = 11 + br->ReadBits(7);
358           buffer[index++] = 28 + br->ReadBits(7);  // 11 - 138 times
359           copy_val = 0;
360           br->DropBits(7);
361           break;
362 
363         default:
364           LOG(ERROR) << "Invalid code!";
365           return false;
366       }
367       idx += copy_num;
368       while (copy_num--) {
369         lens->push_back(copy_val);
370       }
371     }
372   }
373   TEST_AND_RETURN_FALSE(lens->size() == num_codes);
374   *length = index;
375   return true;
376 }
377 
BuildDynamicHuffmanTable(const uint8_t * buffer,size_t length,BitWriterInterface * bw)378 bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer,
379                                             size_t length,
380                                             BitWriterInterface* bw) {
381   if (!initialized_) {
382     // Only resizing the arrays needed.
383     code_lens_.resize(19);
384     code_rcodes_.resize(19);
385 
386     lit_len_lens_.resize(286);
387     lit_len_rcodes_.resize(286);
388 
389     distance_lens_.resize(30);
390     distance_rcodes_.resize(30);
391 
392     tmp_lens_.resize(286 + 30);
393 
394     initialized_ = true;
395   }
396 
397   TEST_AND_RETURN_FALSE(length >= 3);
398   size_t index = 0;
399   // Write the header.
400   size_t num_lit_len = buffer[index] + 257;
401   TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
402 
403   size_t num_distance = buffer[index] + 1;
404   TEST_AND_RETURN_FALSE(bw->WriteBits(5, buffer[index++]));
405 
406   size_t num_codes = buffer[index] + 4;
407   TEST_AND_RETURN_FALSE(bw->WriteBits(4, buffer[index++]));
408 
409   TEST_AND_RETURN_FALSE(
410       CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes));
411 
412   TEST_AND_RETURN_FALSE(length - index >= (num_codes + 1) / 2);
413   bool checked = false;
414   size_t idx = 0;
415   for (; idx < num_codes; idx++) {
416     uint8_t len;
417     if (checked) {
418       len = buffer[index++] & 0x0F;
419     } else {
420       len = buffer[index] >> 4;
421     }
422     checked = !checked;
423     code_lens_[kPermutations[idx]] = len;
424     TEST_AND_RETURN_FALSE(bw->WriteBits(3, len));
425   }
426   if (checked) {
427     index++;
428   }
429   for (; idx < 19; idx++) {
430     code_lens_[kPermutations[idx]] = 0;
431   }
432 
433   TEST_AND_RETURN_FALSE(
434       BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_));
435 
436   // Build literal/lengths and distance Huffman code length arrays.
437   auto bytes_available = length - index;
438   TEST_AND_RETURN_FALSE(
439       BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw,
440                               num_lit_len + num_distance, &tmp_lens_));
441   index += bytes_available;
442 
443   lit_len_lens_.clear();
444   lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(),
445                        tmp_lens_.begin() + num_lit_len);
446 
447   distance_lens_.clear();
448   distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len,
449                         tmp_lens_.end());
450 
451   // Build literal/lengths Huffman reverse codes.
452   TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
453       lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_));
454 
455   // Build distance Huffman reverse codes.
456   TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes(
457       distance_lens_, &distance_rcodes_, &distance_max_bits_));
458 
459   TEST_AND_RETURN_FALSE(length == index);
460 
461   return true;
462 }
463 
BuildHuffmanCodeLengths(const uint8_t * buffer,size_t * length,BitWriterInterface * bw,size_t num_codes,Buffer * lens)464 bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer,
465                                            size_t* length,
466                                            BitWriterInterface* bw,
467                                            size_t num_codes,
468                                            Buffer* lens) {
469   lens->clear();
470   uint16_t hcode;
471   size_t nbits;
472   size_t index = 0;
473   for (size_t idx = 0; idx < num_codes;) {
474     TEST_AND_RETURN_FALSE(index < *length);
475     auto pcode = buffer[index++];
476     TEST_AND_RETURN_FALSE(pcode <= 155);
477 
478     auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18;
479     TEST_AND_RETURN_FALSE(CodeHuffman(code, &hcode, &nbits));
480     TEST_AND_RETURN_FALSE(bw->WriteBits(nbits, hcode));
481     if (code < 16) {
482       lens->push_back(code);
483       idx++;
484     } else {
485       size_t copy_num = 0;
486       uint8_t copy_val;
487       switch (code) {
488         case 16:
489           // Cannot repeat a non-existent last code if idx == 0.
490           TEST_AND_RETURN_FALSE(idx != 0);
491           TEST_AND_RETURN_FALSE(bw->WriteBits(2, pcode - 16));
492           copy_num = 3 + pcode - 16;
493           copy_val = (*lens)[idx - 1];
494           break;
495 
496         case 17:
497           TEST_AND_RETURN_FALSE(bw->WriteBits(3, pcode - 20));
498           copy_num = 3 + pcode - 20;
499           copy_val = 0;
500           break;
501 
502         case 18:
503           TEST_AND_RETURN_FALSE(bw->WriteBits(7, pcode - 28));
504           copy_num = 11 + pcode - 28;
505           copy_val = 0;
506           break;
507 
508         default:
509           break;
510       }
511       idx += copy_num;
512       while (copy_num--) {
513         lens->push_back(copy_val);
514       }
515     }
516   }
517   TEST_AND_RETURN_FALSE(lens->size() == num_codes);
518   *length = index;
519   return true;
520 }
521 
BlockTypeToString(BlockType type)522 string BlockTypeToString(BlockType type) {
523   switch (type) {
524     case BlockType::kUncompressed:
525       return "Uncompressed";
526 
527     case BlockType::kFixed:
528       return "Fixed";
529 
530     case BlockType::kDynamic:
531       return "Dynamic";
532 
533     default:
534       return "Unknown";
535   }
536 }
537 
538 }  // namespace puffin
539