1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef SRC_HUFFMAN_TABLE_H_
6 #define SRC_HUFFMAN_TABLE_H_
7 
8 #include <cstddef>
9 #include <cstdint>
10 #include <string>
11 #include <vector>
12 
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/bit_writer.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/logging.h"
17 
18 namespace puffin {
19 
20 // Maximum Huffman code length based on RFC1951.
21 constexpr size_t kMaxHuffmanBits = 15;
22 
23 // Permutations of input Huffman code lengths (used only to read
24 // |dynamic_code_lens_|).
25 extern const uint8_t kPermutations[];
26 
27 // The bases of each alphabet which is added to the integer value of extra
28 // bits that comes after the Huffman code in the input to create the given
29 // length value. The last element is a guard.
30 extern const uint16_t kLengthBases[];
31 
32 // Number of extra bits that comes after the associating Huffman code.
33 extern const uint8_t kLengthExtraBits[];
34 
35 // same as |kLengthBases| except for the the distances instead of lengths.
36 // The last element is a guard.
37 extern const uint16_t kDistanceBases[];
38 
39 // Same as |kLengthExtraBits| except for distances instead of lengths.
40 extern const uint8_t kDistanceExtraBits[];
41 
42 class HuffmanTable {
43  public:
44   HuffmanTable();
45   virtual ~HuffmanTable() = default;
46 
47   // Checks the lengths of Huffman length arrays for correctness
48   //
49   // |num_lit_len|  IN  The number of literal/lengths code lengths
50   // |num_distance| IN  The number of distance code lengths
51   // |num_codes|    IN  The number of code lengths for reading Huffman table.
CheckHuffmanArrayLengths(size_t num_lit_len,size_t num_distance,size_t num_codes)52   inline bool CheckHuffmanArrayLengths(size_t num_lit_len,
53                                        size_t num_distance,
54                                        size_t num_codes) {
55     if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) {
56       LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: "
57                  << "num_lit_len(" << num_lit_len << ") "
58                  << "num_distance(" << num_distance << ") "
59                  << "num_codes(" << num_codes << ")";
60       return false;
61     }
62     return true;
63   }
64 
65   // Returns the maximum number of bits used in the current literal/length
66   // Huffman codes.
LitLenMaxBits()67   inline size_t LitLenMaxBits() { return lit_len_max_bits_; }
68 
69   // Returns the maximum number of bits used in the current distance Huffman
70   // codes.
DistanceMaxBits()71   inline size_t DistanceMaxBits() { return distance_max_bits_; }
72 
73   // Returns the alphabet associated with the set of input bits for the code
74   // length array.
75   //
76   // |bits|     IN   The input Huffman bits read from the deflate stream.
77   // |alphabet| OUT  The alphabet associated with the given |bits|.
78   // |nbits|    OUT  The number of bits in the Huffman code of alphabet.
79   // Returns true if there is an alphabet associated with |bits|.
CodeAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)80   inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
81     auto hc = code_hcodes_[bits];
82     TEST_AND_RETURN_FALSE(hc & 0x8000);
83     *alphabet = hc & 0x7FFF;
84     *nbits = code_lens_[*alphabet];
85     return true;
86   }
87 
88   // Returns the alphabet associated with the set of input bits for the
89   // literal/length code length array.
90   //
91   // |bits|     IN   The input Huffman bits read from the deflate stream.
92   // |alphabet| OUT  The alphabet associated with the given |bits|.
93   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
94   // Returns true if there is an alphabet associated with |bits|.
LitLenAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)95   inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
96     auto hc = lit_len_hcodes_[bits];
97     TEST_AND_RETURN_FALSE(hc & 0x8000);
98     *alphabet = hc & 0x7FFF;
99     *nbits = lit_len_lens_[*alphabet];
100     return true;
101   }
102 
103   // Returns the alphabet associated with the set of input bits for the
104   // distance code length array.
105   //
106   // |bits|     IN   The input Huffman bits read from the deflate stream.
107   // |alphabet| OUT  The alphabet associated with the given |bits|.
108   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
109   // Returns true if there is an alphabet associated with |bits|.
DistanceAlphabet(uint32_t bits,uint16_t * alphabet,size_t * nbits)110   inline bool DistanceAlphabet(uint32_t bits,
111                                uint16_t* alphabet,
112                                size_t* nbits) {
113     auto hc = distance_hcodes_[bits];
114     TEST_AND_RETURN_FALSE(hc & 0x8000);
115     *alphabet = hc & 0x7FFF;
116     *nbits = distance_lens_[*alphabet];
117     return true;
118   }
119 
120   // Returns the Huffman code of a give alphabet for Huffman table codes.
121   //
122   // |alphabet| IN   The alphabet.
123   // |huffman|  OUT  The Huffman code for |alphabet|.
124   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
125   //                 |alphabet|.
CodeHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)126   inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) {
127     TEST_AND_RETURN_FALSE(alphabet < code_lens_.size());
128     *huffman = code_rcodes_[alphabet];
129     *nbits = code_lens_[alphabet];
130     return true;
131   }
132 
133   // Returns the Huffman code of a give alphabet for literal/length codes.
134   //
135   // |alphabet| IN   The alphabet.
136   // |huffman|  OUT  The Huffman code for |alphabet|.
137   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
138   //                 |alphabet|.
LitLenHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)139   inline bool LitLenHuffman(uint16_t alphabet,
140                             uint16_t* huffman,
141                             size_t* nbits) {
142     TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size());
143     *huffman = lit_len_rcodes_[alphabet];
144     *nbits = lit_len_lens_[alphabet];
145     return true;
146   }
147 
EndOfBlockBitLength(size_t * nbits)148   inline bool EndOfBlockBitLength(size_t* nbits) {
149     TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size());
150     *nbits = lit_len_lens_[256];
151     return true;
152   }
153 
154   // Returns the Huffman code of a give alphabet for distance codes.
155   //
156   // |alphabet| IN   The alphabet.
157   // |huffman|  OUT  The Huffman code for |alphabet|.
158   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
159   //                 |alphabet|.
DistanceHuffman(uint16_t alphabet,uint16_t * huffman,size_t * nbits)160   inline bool DistanceHuffman(uint16_t alphabet,
161                               uint16_t* huffman,
162                               size_t* nbits) {
163     TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size());
164     *huffman = distance_rcodes_[alphabet];
165     *nbits = distance_lens_[alphabet];
166     return true;
167   }
168 
169   // This populates the object with fixed huffman table parameters.
170   // TODO(ahassani): Revamp the use of this function to be initiliazed once in
171   // the lifetime of the program and only one instance needed.
172   bool BuildFixedHuffmanTable();
173 
174   // This functions first reads the Huffman code length arrays from the input
175   // deflate stream, then builds both literal/length and distance Huffman
176   // code arrays. It also writes the Huffman table into the puffed stream.
177   //
178   // |br|      IN      The |BitReaderInterface| reading the deflate stream.
179   // |buffer|  OUT     The object to write the Huffman table.
180   // |length|  IN/OUT  The length available in the |buffer| and in return it
181   //                   will be the length of Huffman table data written into
182   //                   the |buffer|.
183   bool BuildDynamicHuffmanTable(BitReaderInterface* br,
184                                 uint8_t* buffer,
185                                 size_t* length);
186 
187   // This functions first reads the Huffman code length arrays from the input
188   // puffed |buffer|, then builds both literal/length and distance Huffman code
189   // arrays. It also writes the coded Huffman table arrays into the deflate
190   // stream.
191   //
192   // |buffer| IN      The array to read the Huffman table from.
193   // |length| IN      The length available in the |buffer|.
194   // |bw|     IN/OUT  The |BitWriterInterface| for writing into the deflate
195   //                  stream.
196   bool BuildDynamicHuffmanTable(const uint8_t* buffer,
197                                 size_t length,
198                                 BitWriterInterface* bw);
199 
200  protected:
201   // Initializes the Huffman codes from an array of lengths.
202   //
203   // |lens|     IN   The input array of code lengths.
204   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
205   bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits);
206 
207   // Creates the Huffman code to alphabet array.
208   // |lens|     IN   The input array of code lengths.
209   // |hcodes|   OUT  The Huffman to alphabet array.
210   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
211   bool BuildHuffmanCodes(const Buffer& lens,
212                          std::vector<uint16_t>* hcodes,
213                          size_t* max_bits);
214 
215   // Creates the alphabet to Huffman code array.
216   // |lens|     IN   The input array of code lengths.
217   // |rcodes|   OUT  The Huffman to Huffman array.
218   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
219   bool BuildHuffmanReverseCodes(const Buffer& lens,
220                                 std::vector<uint16_t>* rcodes,
221                                 size_t* max_bits);
222 
223   // Reads a specific Huffman code length array from input. At the same time
224   // writes the array into the puffed stream. The Huffman code length array is
225   // either the literal/lengths or distance codes.
226   //
227   // |br|        IN      The |BitReaderInterface| for reading the deflate
228   //                      stream.
229   // |buffer|    OUT     The array to write the Huffman table.
230   // |length|    IN/OUT  The length available in the |buffer| and in return it
231   //                     will be the length of data written into the |buffer|.
232   // |max_bits|  IN      The maximum number of bits in the Huffman codes.
233   // |num_codes| IN      The size of the Huffman code length array in the input.
234   // |lens|      OUT     The resulting Huffman code length array.
235   bool BuildHuffmanCodeLengths(BitReaderInterface* br,
236                                uint8_t* buffer,
237                                size_t* length,
238                                size_t max_bits,
239                                size_t num_codes,
240                                Buffer* lens);
241 
242   // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and
243   // writing into deflate stream.
244   //
245   // |buffer|    IN      The array to read the Huffman table from.
246   // |length|    IN      The length available in the |buffer|.
247   // |bw|        IN/OUT  The |BitWriterInterface| for writing into the deflate
248   //                     stream.
249   // |num_codes| IN      Number of Huffman code lengths to read from the
250   //                     |buffer|.
251   // |lens|      OUT     The Huffman code lengths array.
252   bool BuildHuffmanCodeLengths(const uint8_t* buffer,
253                                size_t* length,
254                                BitWriterInterface* bw,
255                                size_t num_codes,
256                                Buffer* lens);
257 
258  private:
259   // A utility struct used to create Huffman codes.
260   struct CodeIndexPair {
261     uint16_t code;   // The Huffman code
262     uint16_t index;  // The alphabet
263   };
264   // A vector with maximum size of 286 that is only uses as temporary space for
265   // building Huffman codes.
266   std::vector<CodeIndexPair> codeindexpairs_;
267 
268   // Used in building Huffman codes for literals/lengths and distances.
269   std::vector<uint8_t> lit_len_lens_;
270   std::vector<uint16_t> lit_len_hcodes_;
271   std::vector<uint16_t> lit_len_rcodes_;
272   size_t lit_len_max_bits_;
273   std::vector<uint8_t> distance_lens_;
274   std::vector<uint16_t> distance_hcodes_;
275   std::vector<uint16_t> distance_rcodes_;
276   size_t distance_max_bits_;
277 
278   // The reason for keeping a temporary buffer here is to avoid reallocing each
279   // time.
280   std::vector<uint8_t> tmp_lens_;
281 
282   // Used in building Huffman codes for reading and decoding literal/length and
283   // distance Huffman code length arrays.
284   std::vector<uint8_t> code_lens_;
285   std::vector<uint16_t> code_hcodes_;
286   std::vector<uint16_t> code_rcodes_;
287   size_t code_max_bits_;
288 
289   bool initialized_;
290 
291   DISALLOW_COPY_AND_ASSIGN(HuffmanTable);
292 };
293 
294 // The type of a block in a deflate stream.
295 enum class BlockType : uint8_t {
296   kUncompressed = 0x00,
297   kFixed = 0x01,
298   kDynamic = 0x02,
299 };
300 
301 std::string BlockTypeToString(BlockType type);
302 
303 }  // namespace puffin
304 
305 #endif  // SRC_HUFFMAN_TABLE_H_
306