1 /* NOLINT(build/header_guard) */
2 /* Copyright 2016 Google Inc. All Rights Reserved.
3 
4    Distributed under MIT license.
5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 */
7 
8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
9                         NUM_LAST_DISTANCES_TO_CHECK */
10 
11 /* A (forgetful) hash table to the data seen by the compressor, to
12    help create backward references to previous data.
13 
14    Hashes are stored in chains which are bucketed to groups. Group of chains
15    share a storage "bank". When more than "bank size" chain nodes are added,
16    oldest nodes are replaced; this way several chains may share a tail. */
17 
18 #define HashForgetfulChain HASHER()
19 
20 #define BANK_SIZE (1 << BANK_BITS)
21 
22 /* Number of hash buckets. */
23 #define BUCKET_SIZE (1 << BUCKET_BITS)
24 
25 #define CAPPED_CHAINS 0
26 
FN(HashTypeLength)27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
FN(StoreLookahead)28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
29 
30 /* HashBytes is the function that chooses the bucket to place the address in.*/
FN(HashBytes)31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
32   const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33   /* The higher bits contain more mixture from the multiplication,
34      so we take our results from there. */
35   return h >> (32 - BUCKET_BITS);
36 }
37 
38 typedef struct FN(Slot) {
39   uint16_t delta;
40   uint16_t next;
41 } FN(Slot);
42 
43 typedef struct FN(Bank) {
44   FN(Slot) slots[BANK_SIZE];
45 } FN(Bank);
46 
47 typedef struct HashForgetfulChain {
48   uint32_t addr[BUCKET_SIZE];
49   uint16_t head[BUCKET_SIZE];
50   /* Truncated hash used for quick rejection of "distance cache" candidates. */
51   uint8_t tiny_hash[65536];
52   FN(Bank) banks[NUM_BANKS];
53   uint16_t free_slot_idx[NUM_BANKS];
54   size_t max_hops;
55 } HashForgetfulChain;
56 
FN(Self)57 static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
58   return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
59 }
60 
FN(Initialize)61 static void FN(Initialize)(
62     HasherHandle handle, const BrotliEncoderParams* params) {
63   FN(Self)(handle)->max_hops =
64       (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
65 }
66 
FN(Prepare)67 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
68     size_t input_size, const uint8_t* data) {
69   HashForgetfulChain* self = FN(Self)(handle);
70   /* Partial preparation is 100 times slower (per socket). */
71   size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
72   if (one_shot && input_size <= partial_prepare_threshold) {
73     size_t i;
74     for (i = 0; i < input_size; ++i) {
75       size_t bucket = FN(HashBytes)(&data[i]);
76       /* See InitEmpty comment. */
77       self->addr[bucket] = 0xCCCCCCCC;
78       self->head[bucket] = 0xCCCC;
79     }
80   } else {
81     /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
82        processed by hasher never reaches 3GB + 64M; this makes all new chains
83        to be terminated after the first node. */
84     memset(self->addr, 0xCC, sizeof(self->addr));
85     memset(self->head, 0, sizeof(self->head));
86   }
87   memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
88   memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
89 }
90 
FN(HashMemAllocInBytes)91 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
92     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
93     size_t input_size) {
94   BROTLI_UNUSED(params);
95   BROTLI_UNUSED(one_shot);
96   BROTLI_UNUSED(input_size);
97   return sizeof(HashForgetfulChain);
98 }
99 
100 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
101    node to corresponding chain; also update tiny_hash for current position. */
FN(Store)102 static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
103     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
104   HashForgetfulChain* self = FN(Self)(handle);
105   const size_t key = FN(HashBytes)(&data[ix & mask]);
106   const size_t bank = key & (NUM_BANKS - 1);
107   const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
108   size_t delta = ix - self->addr[key];
109   self->tiny_hash[(uint16_t)ix] = (uint8_t)key;
110   if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
111   self->banks[bank].slots[idx].delta = (uint16_t)delta;
112   self->banks[bank].slots[idx].next = self->head[key];
113   self->addr[key] = (uint32_t)ix;
114   self->head[key] = (uint16_t)idx;
115 }
116 
FN(StoreRange)117 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
118     const uint8_t* data, const size_t mask, const size_t ix_start,
119     const size_t ix_end) {
120   size_t i;
121   for (i = ix_start; i < ix_end; ++i) {
122     FN(Store)(handle, data, mask, i);
123   }
124 }
125 
FN(StitchToPreviousBlock)126 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
127     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
128     size_t ring_buffer_mask) {
129   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
130     /* Prepare the hashes for three last bytes of the last write.
131        These could not be calculated before, since they require knowledge
132        of both the previous and the current block. */
133     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
134     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
135     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
136   }
137 }
138 
FN(PrepareDistanceCache)139 static BROTLI_INLINE void FN(PrepareDistanceCache)(
140     HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
141   BROTLI_UNUSED(handle);
142   PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
143 }
144 
145 /* Find a longest backward match of &data[cur_ix] up to the length of
146    max_length and stores the position cur_ix in the hash table.
147 
148    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
149              values; if this method is invoked repeatedly with the same distance
150              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
151 
152    Does not look for matches longer than max_length.
153    Does not look for matches further away than max_backward.
154    Writes the best match into |out|.
155    |out|->score is updated only if a better match is found. */
FN(FindLongestMatch)156 static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
157     const BrotliEncoderDictionary* dictionary,
158     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
159     const int* BROTLI_RESTRICT distance_cache,
160     const size_t cur_ix, const size_t max_length, const size_t max_backward,
161     const size_t gap, const size_t max_distance,
162     HasherSearchResult* BROTLI_RESTRICT out) {
163   HashForgetfulChain* self = FN(Self)(handle);
164   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
165   /* Don't accept a short copy from far away. */
166   score_t min_score = out->score;
167   score_t best_score = out->score;
168   size_t best_len = out->len;
169   size_t i;
170   const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
171   const uint8_t tiny_hash = (uint8_t)(key);
172   out->len = 0;
173   out->len_code_delta = 0;
174   /* Try last distance first. */
175   for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
176     const size_t backward = (size_t)distance_cache[i];
177     size_t prev_ix = (cur_ix - backward);
178     /* For distance code 0 we want to consider 2-byte matches. */
179     if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
180     if (prev_ix >= cur_ix || backward > max_backward) {
181       continue;
182     }
183     prev_ix &= ring_buffer_mask;
184     {
185       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
186                                                   &data[cur_ix_masked],
187                                                   max_length);
188       if (len >= 2) {
189         score_t score = BackwardReferenceScoreUsingLastDistance(len);
190         if (best_score < score) {
191           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
192           if (best_score < score) {
193             best_score = score;
194             best_len = len;
195             out->len = best_len;
196             out->distance = backward;
197             out->score = best_score;
198           }
199         }
200       }
201     }
202   }
203   {
204     const size_t bank = key & (NUM_BANKS - 1);
205     size_t backward = 0;
206     size_t hops = self->max_hops;
207     size_t delta = cur_ix - self->addr[key];
208     size_t slot = self->head[key];
209     while (hops--) {
210       size_t prev_ix;
211       size_t last = slot;
212       backward += delta;
213       if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
214       prev_ix = (cur_ix - backward) & ring_buffer_mask;
215       slot = self->banks[bank].slots[last].next;
216       delta = self->banks[bank].slots[last].delta;
217       if (cur_ix_masked + best_len > ring_buffer_mask ||
218           prev_ix + best_len > ring_buffer_mask ||
219           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
220         continue;
221       }
222       {
223         const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
224                                                     &data[cur_ix_masked],
225                                                     max_length);
226         if (len >= 4) {
227           /* Comparing for >= 3 does not change the semantics, but just saves
228              for a few unnecessary binary logarithms in backward reference
229              score, since we are not interested in such short matches. */
230           score_t score = BackwardReferenceScore(len, backward);
231           if (best_score < score) {
232             best_score = score;
233             best_len = len;
234             out->len = best_len;
235             out->distance = backward;
236             out->score = best_score;
237           }
238         }
239       }
240     }
241     FN(Store)(handle, data, ring_buffer_mask, cur_ix);
242   }
243   if (out->score == min_score) {
244     SearchInStaticDictionary(dictionary,
245         handle, &data[cur_ix_masked], max_length, max_backward + gap,
246         max_distance, out, BROTLI_FALSE);
247   }
248 }
249 
250 #undef BANK_SIZE
251 #undef BUCKET_SIZE
252 #undef CAPPED_CHAINS
253 
254 #undef HashForgetfulChain
255