1 /* Copyright 2015 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Function for fast encoding of an input fragment, independently from the input
8    history. This function uses one-pass processing: when we find a backward
9    match, we immediately emit the corresponding command and literal codes to
10    the bit stream.
11 
12    Adapted from the CompressFragment() function in
13    https://github.com/google/snappy/blob/master/snappy.cc */
14 
15 #include "./compress_fragment.h"
16 
17 #include <string.h>  /* memcmp, memcpy, memset */
18 
19 #include "../common/constants.h"
20 #include "../common/platform.h"
21 #include <brotli/types.h>
22 #include "./brotli_bit_stream.h"
23 #include "./entropy_encode.h"
24 #include "./fast_log.h"
25 #include "./find_match_length.h"
26 #include "./memory.h"
27 #include "./write_bits.h"
28 
29 #if defined(__cplusplus) || defined(c_plusplus)
30 extern "C" {
31 #endif
32 
33 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
34 
35 /* kHashMul32 multiplier has these properties:
36    * The multiplier must be odd. Otherwise we may lose the highest bit.
37    * No long streaks of ones or zeros.
38    * There is no effort to ensure that it is a prime, the oddity is enough
39      for this use.
40    * The number has been tuned heuristically against compression benchmarks. */
41 static const uint32_t kHashMul32 = 0x1E35A7BD;
42 
Hash(const uint8_t * p,size_t shift)43 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
44   const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
45   return (uint32_t)(h >> shift);
46 }
47 
HashBytesAtOffset(uint64_t v,int offset,size_t shift)48 static BROTLI_INLINE uint32_t HashBytesAtOffset(
49     uint64_t v, int offset, size_t shift) {
50   BROTLI_DCHECK(offset >= 0);
51   BROTLI_DCHECK(offset <= 3);
52   {
53     const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
54     return (uint32_t)(h >> shift);
55   }
56 }
57 
IsMatch(const uint8_t * p1,const uint8_t * p2)58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
59   return TO_BROTLI_BOOL(
60       BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
61       p1[4] == p2[4]);
62 }
63 
64 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
65    of the "input" string and stores it into the bit stream.
66    Note that the prefix code here is built from the pre-LZ77 input, therefore
67    we can only approximate the statistics of the actual literal stream.
68    Moreover, for long inputs we build a histogram from a sample of the input
69    and thus have to assign a non-zero depth for each literal.
70    Returns estimated compression ratio millibytes/char for encoding given input
71    with generated code. */
BuildAndStoreLiteralPrefixCode(MemoryManager * m,const uint8_t * input,const size_t input_size,uint8_t depths[256],uint16_t bits[256],size_t * storage_ix,uint8_t * storage)72 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
73                                              const uint8_t* input,
74                                              const size_t input_size,
75                                              uint8_t depths[256],
76                                              uint16_t bits[256],
77                                              size_t* storage_ix,
78                                              uint8_t* storage) {
79   uint32_t histogram[256] = { 0 };
80   size_t histogram_total;
81   size_t i;
82   if (input_size < (1 << 15)) {
83     for (i = 0; i < input_size; ++i) {
84       ++histogram[input[i]];
85     }
86     histogram_total = input_size;
87     for (i = 0; i < 256; ++i) {
88       /* We weigh the first 11 samples with weight 3 to account for the
89          balancing effect of the LZ77 phase on the histogram. */
90       const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
91       histogram[i] += adjust;
92       histogram_total += adjust;
93     }
94   } else {
95     static const size_t kSampleRate = 29;
96     for (i = 0; i < input_size; i += kSampleRate) {
97       ++histogram[input[i]];
98     }
99     histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
100     for (i = 0; i < 256; ++i) {
101       /* We add 1 to each population count to avoid 0 bit depths (since this is
102          only a sample and we don't know if the symbol appears or not), and we
103          weigh the first 11 samples with weight 3 to account for the balancing
104          effect of the LZ77 phase on the histogram (more frequent symbols are
105          more likely to be in backward references instead as literals). */
106       const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
107       histogram[i] += adjust;
108       histogram_total += adjust;
109     }
110   }
111   BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
112                                      /* max_bits = */ 8,
113                                      depths, bits, storage_ix, storage);
114   if (BROTLI_IS_OOM(m)) return 0;
115   {
116     size_t literal_ratio = 0;
117     for (i = 0; i < 256; ++i) {
118       if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119     }
120     /* Estimated encoding ratio, millibytes per symbol. */
121     return (literal_ratio * 125) / histogram_total;
122   }
123 }
124 
125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126    "bits" based on "histogram" and stores it into the bit stream. */
BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],uint8_t depth[128],uint16_t bits[128],size_t * storage_ix,uint8_t * storage)127 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
128     uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
129     uint8_t* storage) {
130   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
131   HuffmanTree tree[129];
132   uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
133   uint16_t cmd_bits[64];
134 
135   BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
136   BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
137   /* We have to jump through a few hoops here in order to compute
138      the command bits because the symbols are in a different order than in
139      the full alphabet. This looks complicated, but having the symbols
140      in this order in the command bits saves a few branches in the Emit*
141      functions. */
142   memcpy(cmd_depth, depth, 24);
143   memcpy(cmd_depth + 24, depth + 40, 8);
144   memcpy(cmd_depth + 32, depth + 24, 8);
145   memcpy(cmd_depth + 40, depth + 48, 8);
146   memcpy(cmd_depth + 48, depth + 32, 8);
147   memcpy(cmd_depth + 56, depth + 56, 8);
148   BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
149   memcpy(bits, cmd_bits, 48);
150   memcpy(bits + 24, cmd_bits + 32, 16);
151   memcpy(bits + 32, cmd_bits + 48, 16);
152   memcpy(bits + 40, cmd_bits + 24, 16);
153   memcpy(bits + 48, cmd_bits + 40, 16);
154   memcpy(bits + 56, cmd_bits + 56, 16);
155   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
156   {
157     /* Create the bit length array for the full command alphabet. */
158     size_t i;
159     memset(cmd_depth, 0, 64);  /* only 64 first values were used */
160     memcpy(cmd_depth, depth, 8);
161     memcpy(cmd_depth + 64, depth + 8, 8);
162     memcpy(cmd_depth + 128, depth + 16, 8);
163     memcpy(cmd_depth + 192, depth + 24, 8);
164     memcpy(cmd_depth + 384, depth + 32, 8);
165     for (i = 0; i < 8; ++i) {
166       cmd_depth[128 + 8 * i] = depth[40 + i];
167       cmd_depth[256 + 8 * i] = depth[48 + i];
168       cmd_depth[448 + 8 * i] = depth[56 + i];
169     }
170     BrotliStoreHuffmanTree(
171         cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
172   }
173   BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
174 }
175 
176 /* REQUIRES: insertlen < 6210 */
EmitInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)177 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
178                                         const uint8_t depth[128],
179                                         const uint16_t bits[128],
180                                         uint32_t histo[128],
181                                         size_t* storage_ix,
182                                         uint8_t* storage) {
183   if (insertlen < 6) {
184     const size_t code = insertlen + 40;
185     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
186     ++histo[code];
187   } else if (insertlen < 130) {
188     const size_t tail = insertlen - 2;
189     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
190     const size_t prefix = tail >> nbits;
191     const size_t inscode = (nbits << 1) + prefix + 42;
192     BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
193     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
194     ++histo[inscode];
195   } else if (insertlen < 2114) {
196     const size_t tail = insertlen - 66;
197     const uint32_t nbits = Log2FloorNonZero(tail);
198     const size_t code = nbits + 50;
199     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
200     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
201     ++histo[code];
202   } else {
203     BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
204     BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
205     ++histo[61];
206   }
207 }
208 
EmitLongInsertLen(size_t insertlen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)209 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
210                                             const uint8_t depth[128],
211                                             const uint16_t bits[128],
212                                             uint32_t histo[128],
213                                             size_t* storage_ix,
214                                             uint8_t* storage) {
215   if (insertlen < 22594) {
216     BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
217     BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
218     ++histo[62];
219   } else {
220     BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
221     BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
222     ++histo[63];
223   }
224 }
225 
EmitCopyLen(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)226 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
227                                       const uint8_t depth[128],
228                                       const uint16_t bits[128],
229                                       uint32_t histo[128],
230                                       size_t* storage_ix,
231                                       uint8_t* storage) {
232   if (copylen < 10) {
233     BrotliWriteBits(
234         depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
235     ++histo[copylen + 14];
236   } else if (copylen < 134) {
237     const size_t tail = copylen - 6;
238     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
239     const size_t prefix = tail >> nbits;
240     const size_t code = (nbits << 1) + prefix + 20;
241     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
242     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
243     ++histo[code];
244   } else if (copylen < 2118) {
245     const size_t tail = copylen - 70;
246     const uint32_t nbits = Log2FloorNonZero(tail);
247     const size_t code = nbits + 28;
248     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
249     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
250     ++histo[code];
251   } else {
252     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
253     BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
254     ++histo[39];
255   }
256 }
257 
EmitCopyLenLastDistance(size_t copylen,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)258 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
259                                                   const uint8_t depth[128],
260                                                   const uint16_t bits[128],
261                                                   uint32_t histo[128],
262                                                   size_t* storage_ix,
263                                                   uint8_t* storage) {
264   if (copylen < 12) {
265     BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
266     ++histo[copylen - 4];
267   } else if (copylen < 72) {
268     const size_t tail = copylen - 8;
269     const uint32_t nbits = Log2FloorNonZero(tail) - 1;
270     const size_t prefix = tail >> nbits;
271     const size_t code = (nbits << 1) + prefix + 4;
272     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
273     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
274     ++histo[code];
275   } else if (copylen < 136) {
276     const size_t tail = copylen - 8;
277     const size_t code = (tail >> 5) + 30;
278     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
279     BrotliWriteBits(5, tail & 31, storage_ix, storage);
280     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
281     ++histo[code];
282     ++histo[64];
283   } else if (copylen < 2120) {
284     const size_t tail = copylen - 72;
285     const uint32_t nbits = Log2FloorNonZero(tail);
286     const size_t code = nbits + 28;
287     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
288     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
289     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
290     ++histo[code];
291     ++histo[64];
292   } else {
293     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
294     BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
295     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
296     ++histo[39];
297     ++histo[64];
298   }
299 }
300 
EmitDistance(size_t distance,const uint8_t depth[128],const uint16_t bits[128],uint32_t histo[128],size_t * storage_ix,uint8_t * storage)301 static BROTLI_INLINE void EmitDistance(size_t distance,
302                                        const uint8_t depth[128],
303                                        const uint16_t bits[128],
304                                        uint32_t histo[128],
305                                        size_t* storage_ix, uint8_t* storage) {
306   const size_t d = distance + 3;
307   const uint32_t nbits = Log2FloorNonZero(d) - 1u;
308   const size_t prefix = (d >> nbits) & 1;
309   const size_t offset = (2 + prefix) << nbits;
310   const size_t distcode = 2 * (nbits - 1) + prefix + 80;
311   BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
312   BrotliWriteBits(nbits, d - offset, storage_ix, storage);
313   ++histo[distcode];
314 }
315 
EmitLiterals(const uint8_t * input,const size_t len,const uint8_t depth[256],const uint16_t bits[256],size_t * storage_ix,uint8_t * storage)316 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
317                                        const uint8_t depth[256],
318                                        const uint16_t bits[256],
319                                        size_t* storage_ix, uint8_t* storage) {
320   size_t j;
321   for (j = 0; j < len; j++) {
322     const uint8_t lit = input[j];
323     BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
324   }
325 }
326 
327 /* REQUIRES: len <= 1 << 24. */
BrotliStoreMetaBlockHeader(size_t len,BROTLI_BOOL is_uncompressed,size_t * storage_ix,uint8_t * storage)328 static void BrotliStoreMetaBlockHeader(
329     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
330     uint8_t* storage) {
331   size_t nibbles = 6;
332   /* ISLAST */
333   BrotliWriteBits(1, 0, storage_ix, storage);
334   if (len <= (1U << 16)) {
335     nibbles = 4;
336   } else if (len <= (1U << 20)) {
337     nibbles = 5;
338   }
339   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
340   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
341   /* ISUNCOMPRESSED */
342   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
343 }
344 
UpdateBits(size_t n_bits,uint32_t bits,size_t pos,uint8_t * array)345 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
346     uint8_t* array) {
347   while (n_bits > 0) {
348     size_t byte_pos = pos >> 3;
349     size_t n_unchanged_bits = pos & 7;
350     size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
351     size_t total_bits = n_unchanged_bits + n_changed_bits;
352     uint32_t mask =
353         (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
354     uint32_t unchanged_bits = array[byte_pos] & mask;
355     uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
356     array[byte_pos] =
357         (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
358     n_bits -= n_changed_bits;
359     bits >>= n_changed_bits;
360     pos += n_changed_bits;
361   }
362 }
363 
RewindBitPosition(const size_t new_storage_ix,size_t * storage_ix,uint8_t * storage)364 static void RewindBitPosition(const size_t new_storage_ix,
365                               size_t* storage_ix, uint8_t* storage) {
366   const size_t bitpos = new_storage_ix & 7;
367   const size_t mask = (1u << bitpos) - 1;
368   storage[new_storage_ix >> 3] &= (uint8_t)mask;
369   *storage_ix = new_storage_ix;
370 }
371 
ShouldMergeBlock(const uint8_t * data,size_t len,const uint8_t * depths)372 static BROTLI_BOOL ShouldMergeBlock(
373     const uint8_t* data, size_t len, const uint8_t* depths) {
374   size_t histo[256] = { 0 };
375   static const size_t kSampleRate = 43;
376   size_t i;
377   for (i = 0; i < len; i += kSampleRate) {
378     ++histo[data[i]];
379   }
380   {
381     const size_t total = (len + kSampleRate - 1) / kSampleRate;
382     double r = (FastLog2(total) + 0.5) * (double)total + 200;
383     for (i = 0; i < 256; ++i) {
384       r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
385     }
386     return TO_BROTLI_BOOL(r >= 0.0);
387   }
388 }
389 
390 /* Acceptable loss for uncompressible speedup is 2% */
391 #define MIN_RATIO 980
392 
ShouldUseUncompressedMode(const uint8_t * metablock_start,const uint8_t * next_emit,const size_t insertlen,const size_t literal_ratio)393 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
394     const uint8_t* metablock_start, const uint8_t* next_emit,
395     const size_t insertlen, const size_t literal_ratio) {
396   const size_t compressed = (size_t)(next_emit - metablock_start);
397   if (compressed * 50 > insertlen) {
398     return BROTLI_FALSE;
399   } else {
400     return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
401   }
402 }
403 
EmitUncompressedMetaBlock(const uint8_t * begin,const uint8_t * end,const size_t storage_ix_start,size_t * storage_ix,uint8_t * storage)404 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
405                                       const size_t storage_ix_start,
406                                       size_t* storage_ix, uint8_t* storage) {
407   const size_t len = (size_t)(end - begin);
408   RewindBitPosition(storage_ix_start, storage_ix, storage);
409   BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
410   *storage_ix = (*storage_ix + 7u) & ~7u;
411   memcpy(&storage[*storage_ix >> 3], begin, len);
412   *storage_ix += len << 3;
413   storage[*storage_ix >> 3] = 0;
414 }
415 
416 static uint32_t kCmdHistoSeed[128] = {
417   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
418   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
419   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
420   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
421   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
422   1, 1, 1, 1, 0, 0, 0, 0,
423 };
424 
BrotliCompressFragmentFastImpl(MemoryManager * m,const uint8_t * input,size_t input_size,BROTLI_BOOL is_last,int * table,size_t table_bits,uint8_t cmd_depth[128],uint16_t cmd_bits[128],size_t * cmd_code_numbits,uint8_t * cmd_code,size_t * storage_ix,uint8_t * storage)425 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
426     MemoryManager* m, const uint8_t* input, size_t input_size,
427     BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
428     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
429     size_t* storage_ix, uint8_t* storage) {
430   uint32_t cmd_histo[128];
431   const uint8_t* ip_end;
432 
433   /* "next_emit" is a pointer to the first byte that is not covered by a
434      previous copy. Bytes between "next_emit" and the start of the next copy or
435      the end of the input will be emitted as literal bytes. */
436   const uint8_t* next_emit = input;
437   /* Save the start of the first block for position and distance computations.
438   */
439   const uint8_t* base_ip = input;
440 
441   static const size_t kFirstBlockSize = 3 << 15;
442   static const size_t kMergeBlockSize = 1 << 16;
443 
444   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
445   const size_t kMinMatchLen = 5;
446 
447   const uint8_t* metablock_start = input;
448   size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
449   size_t total_block_size = block_size;
450   /* Save the bit position of the MLEN field of the meta-block header, so that
451      we can update it later if we decide to extend this meta-block. */
452   size_t mlen_storage_ix = *storage_ix + 3;
453 
454   uint8_t lit_depth[256];
455   uint16_t lit_bits[256];
456 
457   size_t literal_ratio;
458 
459   const uint8_t* ip;
460   int last_distance;
461 
462   const size_t shift = 64u - table_bits;
463 
464   BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
465   /* No block splits, no contexts. */
466   BrotliWriteBits(13, 0, storage_ix, storage);
467 
468   literal_ratio = BuildAndStoreLiteralPrefixCode(
469       m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
470   if (BROTLI_IS_OOM(m)) return;
471 
472   {
473     /* Store the pre-compressed command and distance prefix codes. */
474     size_t i;
475     for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
476       BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
477     }
478   }
479   BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
480                   storage_ix, storage);
481 
482  emit_commands:
483   /* Initialize the command and distance histograms. We will gather
484      statistics of command and distance codes during the processing
485      of this block and use it to update the command and distance
486      prefix codes for the next block. */
487   memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
488 
489   /* "ip" is the input pointer. */
490   ip = input;
491   last_distance = -1;
492   ip_end = input + block_size;
493 
494   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
495     /* For the last block, we need to keep a 16 bytes margin so that we can be
496        sure that all distances are at most window size - 16.
497        For all other blocks, we only need to keep a margin of 5 bytes so that
498        we don't go over the block size with a copy. */
499     const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
500                                         input_size - kInputMarginBytes);
501     const uint8_t* ip_limit = input + len_limit;
502 
503     uint32_t next_hash;
504     for (next_hash = Hash(++ip, shift); ; ) {
505       /* Step 1: Scan forward in the input looking for a 5-byte-long match.
506          If we get close to exhausting the input then goto emit_remainder.
507 
508          Heuristic match skipping: If 32 bytes are scanned with no matches
509          found, start looking only at every other byte. If 32 more bytes are
510          scanned, look at every third byte, etc.. When a match is found,
511          immediately go back to looking at every byte. This is a small loss
512          (~5% performance, ~0.1% density) for compressible data due to more
513          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
514          win since the compressor quickly "realizes" the data is incompressible
515          and doesn't bother looking for matches everywhere.
516 
517          The "skip" variable keeps track of how many bytes there are since the
518          last match; dividing it by 32 (i.e. right-shifting by five) gives the
519          number of bytes to move ahead for each iteration. */
520       uint32_t skip = 32;
521 
522       const uint8_t* next_ip = ip;
523       const uint8_t* candidate;
524       BROTLI_DCHECK(next_emit < ip);
525 trawl:
526       do {
527         uint32_t hash = next_hash;
528         uint32_t bytes_between_hash_lookups = skip++ >> 5;
529         BROTLI_DCHECK(hash == Hash(next_ip, shift));
530         ip = next_ip;
531         next_ip = ip + bytes_between_hash_lookups;
532         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
533           goto emit_remainder;
534         }
535         next_hash = Hash(next_ip, shift);
536         candidate = ip - last_distance;
537         if (IsMatch(ip, candidate)) {
538           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
539             table[hash] = (int)(ip - base_ip);
540             break;
541           }
542         }
543         candidate = base_ip + table[hash];
544         BROTLI_DCHECK(candidate >= base_ip);
545         BROTLI_DCHECK(candidate < ip);
546 
547         table[hash] = (int)(ip - base_ip);
548       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
549 
550       /* Check copy distance. If candidate is not feasible, continue search.
551          Checking is done outside of hot loop to reduce overhead. */
552       if (ip - candidate > MAX_DISTANCE) goto trawl;
553 
554       /* Step 2: Emit the found match together with the literal bytes from
555          "next_emit" to the bit stream, and then see if we can find a next match
556          immediately afterwards. Repeat until we find no match for the input
557          without emitting some literal bytes. */
558 
559       {
560         /* We have a 5-byte match at ip, and we need to emit bytes in
561            [next_emit, ip). */
562         const uint8_t* base = ip;
563         size_t matched = 5 + FindMatchLengthWithLimit(
564             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
565         int distance = (int)(base - candidate);  /* > 0 */
566         size_t insert = (size_t)(base - next_emit);
567         ip += matched;
568         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
569         if (BROTLI_PREDICT_TRUE(insert < 6210)) {
570           EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
571                         storage_ix, storage);
572         } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
573                                              literal_ratio)) {
574           EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
575                                     storage_ix, storage);
576           input_size -= (size_t)(base - input);
577           input = base;
578           next_emit = input;
579           goto next_block;
580         } else {
581           EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582                             storage_ix, storage);
583         }
584         EmitLiterals(next_emit, insert, lit_depth, lit_bits,
585                      storage_ix, storage);
586         if (distance == last_distance) {
587           BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
588           ++cmd_histo[64];
589         } else {
590           EmitDistance((size_t)distance, cmd_depth, cmd_bits,
591                        cmd_histo, storage_ix, storage);
592           last_distance = distance;
593         }
594         EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
595                                 storage_ix, storage);
596 
597         next_emit = ip;
598         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
599           goto emit_remainder;
600         }
601         /* We could immediately start working at ip now, but to improve
602            compression we first update "table" with the hashes of some positions
603            within the last copy. */
604         {
605           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
606           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
607           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
608           table[prev_hash] = (int)(ip - base_ip - 3);
609           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
610           table[prev_hash] = (int)(ip - base_ip - 2);
611           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
612           table[prev_hash] = (int)(ip - base_ip - 1);
613 
614           candidate = base_ip + table[cur_hash];
615           table[cur_hash] = (int)(ip - base_ip);
616         }
617       }
618 
619       while (IsMatch(ip, candidate)) {
620         /* We have a 5-byte match at ip, and no need to emit any literal bytes
621            prior to ip. */
622         const uint8_t* base = ip;
623         size_t matched = 5 + FindMatchLengthWithLimit(
624             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
625         if (ip - candidate > MAX_DISTANCE) break;
626         ip += matched;
627         last_distance = (int)(base - candidate);  /* > 0 */
628         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
629         EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
630                     storage_ix, storage);
631         EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
632                      cmd_histo, storage_ix, storage);
633 
634         next_emit = ip;
635         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
636           goto emit_remainder;
637         }
638         /* We could immediately start working at ip now, but to improve
639            compression we first update "table" with the hashes of some positions
640            within the last copy. */
641         {
642           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
643           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
644           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
645           table[prev_hash] = (int)(ip - base_ip - 3);
646           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
647           table[prev_hash] = (int)(ip - base_ip - 2);
648           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
649           table[prev_hash] = (int)(ip - base_ip - 1);
650 
651           candidate = base_ip + table[cur_hash];
652           table[cur_hash] = (int)(ip - base_ip);
653         }
654       }
655 
656       next_hash = Hash(++ip, shift);
657     }
658   }
659 
660  emit_remainder:
661   BROTLI_DCHECK(next_emit <= ip_end);
662   input += block_size;
663   input_size -= block_size;
664   block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
665 
666   /* Decide if we want to continue this meta-block instead of emitting the
667      last insert-only command. */
668   if (input_size > 0 &&
669       total_block_size + block_size <= (1 << 20) &&
670       ShouldMergeBlock(input, block_size, lit_depth)) {
671     BROTLI_DCHECK(total_block_size > (1 << 16));
672     /* Update the size of the current meta-block and continue emitting commands.
673        We can do this because the current size and the new size both have 5
674        nibbles. */
675     total_block_size += block_size;
676     UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
677     goto emit_commands;
678   }
679 
680   /* Emit the remaining bytes as literals. */
681   if (next_emit < ip_end) {
682     const size_t insert = (size_t)(ip_end - next_emit);
683     if (BROTLI_PREDICT_TRUE(insert < 6210)) {
684       EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
685                     storage_ix, storage);
686       EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
687     } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
688                                          literal_ratio)) {
689       EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
690                                 storage_ix, storage);
691     } else {
692       EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
693                         storage_ix, storage);
694       EmitLiterals(next_emit, insert, lit_depth, lit_bits,
695                    storage_ix, storage);
696     }
697   }
698   next_emit = ip_end;
699 
700 next_block:
701   /* If we have more data, write a new meta-block header and prefix codes and
702      then continue emitting commands. */
703   if (input_size > 0) {
704     metablock_start = input;
705     block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
706     total_block_size = block_size;
707     /* Save the bit position of the MLEN field of the meta-block header, so that
708        we can update it later if we decide to extend this meta-block. */
709     mlen_storage_ix = *storage_ix + 3;
710     BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
711     /* No block splits, no contexts. */
712     BrotliWriteBits(13, 0, storage_ix, storage);
713     literal_ratio = BuildAndStoreLiteralPrefixCode(
714         m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
715     if (BROTLI_IS_OOM(m)) return;
716     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
717                                    storage_ix, storage);
718     goto emit_commands;
719   }
720 
721   if (!is_last) {
722     /* If this is not the last block, update the command and distance prefix
723        codes for the next block and store the compressed forms. */
724     cmd_code[0] = 0;
725     *cmd_code_numbits = 0;
726     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
727                                    cmd_code_numbits, cmd_code);
728   }
729 }
730 
731 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
732 
733 #define BAKE_METHOD_PARAM_(B) \
734 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
735     MemoryManager* m, const uint8_t* input, size_t input_size,               \
736     BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
737     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
738     size_t* storage_ix, uint8_t* storage) {                                  \
739   BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
740       cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
741 }
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)742 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
743 #undef BAKE_METHOD_PARAM_
744 
745 void BrotliCompressFragmentFast(
746     MemoryManager* m, const uint8_t* input, size_t input_size,
747     BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
748     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
749     size_t* storage_ix, uint8_t* storage) {
750   const size_t initial_storage_ix = *storage_ix;
751   const size_t table_bits = Log2FloorNonZero(table_size);
752 
753   if (input_size == 0) {
754     BROTLI_DCHECK(is_last);
755     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
756     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
757     *storage_ix = (*storage_ix + 7u) & ~7u;
758     return;
759   }
760 
761   switch (table_bits) {
762 #define CASE_(B)                                                     \
763     case B:                                                          \
764       BrotliCompressFragmentFastImpl ## B(                           \
765           m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
766           cmd_code_numbits, cmd_code, storage_ix, storage);          \
767       break;
768     FOR_TABLE_BITS_(CASE_)
769 #undef CASE_
770     default: BROTLI_DCHECK(0); break;
771   }
772 
773   /* If output is larger than single uncompressed block, rewrite it. */
774   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
775     EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
776                               storage_ix, storage);
777   }
778 
779   if (is_last) {
780     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
781     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
782     *storage_ix = (*storage_ix + 7u) & ~7u;
783   }
784 }
785 
786 #undef FOR_TABLE_BITS_
787 
788 #if defined(__cplusplus) || defined(c_plusplus)
789 }  /* extern "C" */
790 #endif
791