1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "puffin/src/include/puffin/puffer.h"
6 
7 #include <algorithm>
8 #include <memory>
9 #include <string>
10 #include <utility>
11 #include <vector>
12 
13 #include "puffin/src/bit_reader.h"
14 #include "puffin/src/huffman_table.h"
15 #include "puffin/src/include/puffin/common.h"
16 #include "puffin/src/include/puffin/stream.h"
17 #include "puffin/src/logging.h"
18 #include "puffin/src/puff_data.h"
19 #include "puffin/src/puff_writer.h"
20 
21 using std::string;
22 using std::vector;
23 
24 namespace puffin {
25 
Puffer(bool exclude_bad_distance_caches)26 Puffer::Puffer(bool exclude_bad_distance_caches)
27     : dyn_ht_(new HuffmanTable()),
28       fix_ht_(new HuffmanTable()),
29       exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
30 
Puffer()31 Puffer::Puffer() : Puffer(false) {}
32 
~Puffer()33 Puffer::~Puffer() {}
34 
PuffDeflate(BitReaderInterface * br,PuffWriterInterface * pw,vector<BitExtent> * deflates) const35 bool Puffer::PuffDeflate(BitReaderInterface* br,
36                          PuffWriterInterface* pw,
37                          vector<BitExtent>* deflates) const {
38   PuffData pd;
39   HuffmanTable* cur_ht;
40   bool end_loop = false;
41   // No bits left to read, return. We try to cache at least eight bits because
42   // the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
43   // bits header + 5 bits just one len/dist symbol.
44   while (!end_loop && br->CacheBits(8)) {
45     auto start_bit_offset = br->OffsetInBits();
46 
47     TEST_AND_RETURN_FALSE(br->CacheBits(3));
48     uint8_t final_bit = br->ReadBits(1);  // BFINAL
49     br->DropBits(1);
50     uint8_t type = br->ReadBits(2);  // BTYPE
51     br->DropBits(2);
52     DVLOG(2) << "Read block type: "
53              << BlockTypeToString(static_cast<BlockType>(type));
54 
55     // If it is the final block and we are just looking for deflate locations,
56     // we consider this the end of the search.
57     if (deflates != nullptr && final_bit) {
58       end_loop = true;
59     }
60 
61     // Header structure
62     // +-+-+-+-+-+-+-+-+
63     // |F| TP|   SKIP  |
64     // +-+-+-+-+-+-+-+-+
65     // F -> final_bit
66     // TP -> type
67     // SKIP -> skipped_bits (only in kUncompressed type)
68     auto block_header = (final_bit << 7) | (type << 5);
69     switch (static_cast<BlockType>(type)) {
70       case BlockType::kUncompressed: {
71         auto skipped_bits = br->ReadBoundaryBits();
72         br->SkipBoundaryBits();
73         TEST_AND_RETURN_FALSE(br->CacheBits(32));
74         auto len = br->ReadBits(16);  // LEN
75         br->DropBits(16);
76         auto nlen = br->ReadBits(16);  // NLEN
77         br->DropBits(16);
78 
79         if ((len ^ nlen) != 0xFFFF) {
80           LOG(ERROR) << "Length of uncompressed data is invalid;"
81                      << " LEN(" << len << ") NLEN(" << nlen << ")";
82           return false;
83         }
84 
85         // Put skipped bits into header.
86         block_header |= skipped_bits;
87 
88         // Insert block header.
89         pd.type = PuffData::Type::kBlockMetadata;
90         pd.block_metadata[0] = block_header;
91         pd.length = 1;
92         TEST_AND_RETURN_FALSE(pw->Insert(pd));
93 
94         // Insert all the raw literals.
95         pd.type = PuffData::Type::kLiterals;
96         pd.length = len;
97         TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
98         TEST_AND_RETURN_FALSE(pw->Insert(pd));
99 
100         pd.type = PuffData::Type::kEndOfBlock;
101         TEST_AND_RETURN_FALSE(pw->Insert(pd));
102 
103         // There is no need to insert the location of uncompressed deflates
104         // because we do not want the uncompressed blocks when trying to find
105         // the bit-addressed location of deflates. They better be ignored.
106 
107         // continue the loop. Do not read any literal/length/distance.
108         continue;
109       }
110 
111       case BlockType::kFixed:
112         fix_ht_->BuildFixedHuffmanTable();
113         cur_ht = fix_ht_.get();
114         pd.type = PuffData::Type::kBlockMetadata;
115         pd.block_metadata[0] = block_header;
116         pd.length = 1;
117         TEST_AND_RETURN_FALSE(pw->Insert(pd));
118         break;
119 
120       case BlockType::kDynamic:
121         pd.type = PuffData::Type::kBlockMetadata;
122         pd.block_metadata[0] = block_header;
123         pd.length = sizeof(pd.block_metadata) - 1;
124         TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
125             br, &pd.block_metadata[1], &pd.length));
126         pd.length += 1;  // For the header.
127         TEST_AND_RETURN_FALSE(pw->Insert(pd));
128         cur_ht = dyn_ht_.get();
129         break;
130 
131       default:
132         LOG(ERROR) << "Invalid block compression type: "
133                    << static_cast<int>(type);
134         return false;
135     }
136 
137     // If true and the list of output |deflates| is non-null, the current
138     // deflate location will be added to that list.
139     bool include_deflate = true;
140 
141     while (true) {  // Breaks when the end of block is reached.
142       auto max_bits = cur_ht->LitLenMaxBits();
143       if (!br->CacheBits(max_bits)) {
144         // It could be the end of buffer and the bit length of the end_of_block
145         // symbol has less than maximum bit length of current Huffman table. So
146         // only asking for the size of end of block symbol (256).
147         TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
148       }
149       TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
150       auto bits = br->ReadBits(max_bits);
151       uint16_t lit_len_alphabet;
152       size_t nbits;
153       TEST_AND_RETURN_FALSE(
154           cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
155       br->DropBits(nbits);
156       if (lit_len_alphabet < 256) {
157         pd.type = PuffData::Type::kLiteral;
158         pd.byte = lit_len_alphabet;
159         TEST_AND_RETURN_FALSE(pw->Insert(pd));
160 
161       } else if (256 == lit_len_alphabet) {
162         pd.type = PuffData::Type::kEndOfBlock;
163         TEST_AND_RETURN_FALSE(pw->Insert(pd));
164         if (deflates != nullptr && include_deflate) {
165           deflates->emplace_back(start_bit_offset,
166                                  br->OffsetInBits() - start_bit_offset);
167         }
168         break;  // Breaks the loop.
169       } else {
170         TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
171         // Reading length.
172         auto len_code_start = lit_len_alphabet - 257;
173         auto extra_bits_len = kLengthExtraBits[len_code_start];
174         uint16_t extra_bits_value = 0;
175         if (extra_bits_len) {
176           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
177           extra_bits_value = br->ReadBits(extra_bits_len);
178           br->DropBits(extra_bits_len);
179         }
180         auto length = kLengthBases[len_code_start] + extra_bits_value;
181 
182         auto bits_to_cache = cur_ht->DistanceMaxBits();
183         if (!br->CacheBits(bits_to_cache)) {
184           // This is a corner case that is present in the older versions of the
185           // puffin. So we need to catch it and correctly discard this kind of
186           // deflate when we encounter it. See crbug.com/915559 for more info.
187           bits_to_cache = br->BitsRemaining();
188           TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
189           if (exclude_bad_distance_caches_) {
190             include_deflate = false;
191           }
192           LOG(WARNING) << "A rare condition that older puffin clients fail to"
193                        << " recognize happened. Nothing to worry about."
194                        << " See crbug.com/915559";
195         }
196         auto bits = br->ReadBits(bits_to_cache);
197         uint16_t distance_alphabet;
198         size_t nbits;
199         TEST_AND_RETURN_FALSE(
200             cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
201         br->DropBits(nbits);
202 
203         // Reading distance.
204         extra_bits_len = kDistanceExtraBits[distance_alphabet];
205         extra_bits_value = 0;
206         if (extra_bits_len) {
207           TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
208           extra_bits_value = br->ReadBits(extra_bits_len);
209           br->DropBits(extra_bits_len);
210         }
211 
212         pd.type = PuffData::Type::kLenDist;
213         pd.length = length;
214         pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
215         TEST_AND_RETURN_FALSE(pw->Insert(pd));
216       }
217     }
218   }
219   TEST_AND_RETURN_FALSE(pw->Flush());
220   return true;
221 }
222 
223 }  // namespace puffin
224