1 /* NOLINT(build/header_guard) */
2 /* Copyright 2016 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
9                         NUM_LAST_DISTANCES_TO_CHECK */
10 
11 /* A (forgetful) hash table to the data seen by the compressor, to
12    help create backward references to previous data.
13 
14    Hashes are stored in chains which are bucketed to groups. Group of chains
15    share a storage "bank". When more than "bank size" chain nodes are added,
16    oldest nodes are replaced; this way several chains may share a tail. */
17 
18 #define HashForgetfulChain HASHER()
19 
20 #define BANK_SIZE (1 << BANK_BITS)
21 
22 /* Number of hash buckets. */
23 #define BUCKET_SIZE (1 << BUCKET_BITS)
24 
25 #define CAPPED_CHAINS 0
26 
FN(HashTypeLength)27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
29 
30 /* HashBytes is the function that chooses the bucket to place the address in.*/
FN(HashBytes)31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
32   const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33   /* The higher bits contain more mixture from the multiplication,
34      so we take our results from there. */
35   return h >> (32 - BUCKET_BITS);
36 }
37 
38 typedef struct FN(Slot) {
39   uint16_t delta;
40   uint16_t next;
41 } FN(Slot);
42 
43 typedef struct FN(Bank) {
44   FN(Slot) slots[BANK_SIZE];
45 } FN(Bank);
46 
47 typedef struct HashForgetfulChain {
48   uint16_t free_slot_idx[NUM_BANKS];  /* Up to 1KiB. Move to dynamic? */
49   size_t max_hops;
50 
51   /* Shortcuts. */
52   void* extra;
53   HasherCommon* common;
54 
55   /* --- Dynamic size members --- */
56 
57   /* uint32_t addr[BUCKET_SIZE]; */
58 
59   /* uint16_t head[BUCKET_SIZE]; */
60 
61   /* Truncated hash used for quick rejection of "distance cache" candidates. */
62   /* uint8_t tiny_hash[65536];*/
63 
64   /* FN(Bank) banks[NUM_BANKS]; */
65 } HashForgetfulChain;
66 
FN(Addr)67 static uint32_t* FN(Addr)(void* extra) {
68   return (uint32_t*)extra;
69 }
70 
FN(Head)71 static uint16_t* FN(Head)(void* extra) {
72   return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
73 }
74 
FN(TinyHash)75 static uint8_t* FN(TinyHash)(void* extra) {
76   return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
77 }
78 
FN(Bank)79 static FN(Bank)* FN(Banks)(void* extra) {
80   return (FN(Bank)*)(&FN(TinyHash)(extra)[65536]);
81 }
82 
FN(Initialize)83 static void FN(Initialize)(
84     HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
85     const BrotliEncoderParams* params) {
86   self->common = common;
87   self->extra = common->extra;
88 
89   self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
90 }
91 
FN(Prepare)92 static void FN(Prepare)(
93     HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
94     size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
95   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
96   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
97   uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
98   /* Partial preparation is 100 times slower (per socket). */
99   size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
100   if (one_shot && input_size <= partial_prepare_threshold) {
101     size_t i;
102     for (i = 0; i < input_size; ++i) {
103       size_t bucket = FN(HashBytes)(&data[i]);
104       /* See InitEmpty comment. */
105       addr[bucket] = 0xCCCCCCCC;
106       head[bucket] = 0xCCCC;
107     }
108   } else {
109     /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
110        processed by hasher never reaches 3GB + 64M; this makes all new chains
111        to be terminated after the first node. */
112     memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
113     memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
114   }
115   memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
116   memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
117 }
118 
FN(HashMemAllocInBytes)119 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
120     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
121     size_t input_size) {
122   BROTLI_UNUSED(params);
123   BROTLI_UNUSED(one_shot);
124   BROTLI_UNUSED(input_size);
125   return sizeof(uint32_t) * BUCKET_SIZE + sizeof(uint16_t) * BUCKET_SIZE +
126          sizeof(uint8_t) * 65536 + sizeof(FN(Bank)) * NUM_BANKS;
127 }
128 
129 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
130    node to corresponding chain; also update tiny_hash for current position. */
FN(Store)131 static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
132     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
133   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
134   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
135   uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
136   FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
137   const size_t key = FN(HashBytes)(&data[ix & mask]);
138   const size_t bank = key & (NUM_BANKS - 1);
139   const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
140   size_t delta = ix - addr[key];
141   tiny_hash[(uint16_t)ix] = (uint8_t)key;
142   if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
143   banks[bank].slots[idx].delta = (uint16_t)delta;
144   banks[bank].slots[idx].next = head[key];
145   addr[key] = (uint32_t)ix;
146   head[key] = (uint16_t)idx;
147 }
148 
FN(StoreRange)149 static BROTLI_INLINE void FN(StoreRange)(
150     HashForgetfulChain* BROTLI_RESTRICT self,
151     const uint8_t* BROTLI_RESTRICT data, const size_t mask,
152     const size_t ix_start, const size_t ix_end) {
153   size_t i;
154   for (i = ix_start; i < ix_end; ++i) {
155     FN(Store)(self, data, mask, i);
156   }
157 }
158 
FN(StitchToPreviousBlock)159 static BROTLI_INLINE void FN(StitchToPreviousBlock)(
160     HashForgetfulChain* BROTLI_RESTRICT self,
161     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
162     size_t ring_buffer_mask) {
163   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
164     /* Prepare the hashes for three last bytes of the last write.
165        These could not be calculated before, since they require knowledge
166        of both the previous and the current block. */
167     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
168     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
169     FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
170   }
171 }
172 
FN(PrepareDistanceCache)173 static BROTLI_INLINE void FN(PrepareDistanceCache)(
174     HashForgetfulChain* BROTLI_RESTRICT self,
175     int* BROTLI_RESTRICT distance_cache) {
176   BROTLI_UNUSED(self);
177   PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
178 }
179 
180 /* Find a longest backward match of &data[cur_ix] up to the length of
181    max_length and stores the position cur_ix in the hash table.
182 
183    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
184              values; if this method is invoked repeatedly with the same distance
185              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
186 
187    Does not look for matches longer than max_length.
188    Does not look for matches further away than max_backward.
189    Writes the best match into |out|.
190    |out|->score is updated only if a better match is found. */
FN(FindLongestMatch)191 static BROTLI_INLINE void FN(FindLongestMatch)(
192     HashForgetfulChain* BROTLI_RESTRICT self,
193     const BrotliEncoderDictionary* dictionary,
194     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
195     const int* BROTLI_RESTRICT distance_cache,
196     const size_t cur_ix, const size_t max_length, const size_t max_backward,
197     const size_t dictionary_distance, const size_t max_distance,
198     HasherSearchResult* BROTLI_RESTRICT out) {
199   uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
200   uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
201   uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra);
202   FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
203   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
204   /* Don't accept a short copy from far away. */
205   score_t min_score = out->score;
206   score_t best_score = out->score;
207   size_t best_len = out->len;
208   size_t i;
209   const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
210   const uint8_t tiny_hash = (uint8_t)(key);
211   out->len = 0;
212   out->len_code_delta = 0;
213   /* Try last distance first. */
214   for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
215     const size_t backward = (size_t)distance_cache[i];
216     size_t prev_ix = (cur_ix - backward);
217     /* For distance code 0 we want to consider 2-byte matches. */
218     if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
219     if (prev_ix >= cur_ix || backward > max_backward) {
220       continue;
221     }
222     prev_ix &= ring_buffer_mask;
223     {
224       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
225                                                   &data[cur_ix_masked],
226                                                   max_length);
227       if (len >= 2) {
228         score_t score = BackwardReferenceScoreUsingLastDistance(len);
229         if (best_score < score) {
230           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
231           if (best_score < score) {
232             best_score = score;
233             best_len = len;
234             out->len = best_len;
235             out->distance = backward;
236             out->score = best_score;
237           }
238         }
239       }
240     }
241   }
242   {
243     const size_t bank = key & (NUM_BANKS - 1);
244     size_t backward = 0;
245     size_t hops = self->max_hops;
246     size_t delta = cur_ix - addr[key];
247     size_t slot = head[key];
248     while (hops--) {
249       size_t prev_ix;
250       size_t last = slot;
251       backward += delta;
252       if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
253       prev_ix = (cur_ix - backward) & ring_buffer_mask;
254       slot = banks[bank].slots[last].next;
255       delta = banks[bank].slots[last].delta;
256       if (cur_ix_masked + best_len > ring_buffer_mask ||
257           prev_ix + best_len > ring_buffer_mask ||
258           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
259         continue;
260       }
261       {
262         const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
263                                                     &data[cur_ix_masked],
264                                                     max_length);
265         if (len >= 4) {
266           /* Comparing for >= 3 does not change the semantics, but just saves
267              for a few unnecessary binary logarithms in backward reference
268              score, since we are not interested in such short matches. */
269           score_t score = BackwardReferenceScore(len, backward);
270           if (best_score < score) {
271             best_score = score;
272             best_len = len;
273             out->len = best_len;
274             out->distance = backward;
275             out->score = best_score;
276           }
277         }
278       }
279     }
280     FN(Store)(self, data, ring_buffer_mask, cur_ix);
281   }
282   if (out->score == min_score) {
283     SearchInStaticDictionary(dictionary,
284         self->common, &data[cur_ix_masked], max_length, dictionary_distance,
285         max_distance, out, BROTLI_FALSE);
286   }
287 }
288 
289 #undef BANK_SIZE
290 #undef BUCKET_SIZE
291 #undef CAPPED_CHAINS
292 
293 #undef HashForgetfulChain
294