1 /* NOLINT(build/header_guard) */ 2 /* Copyright 2016 Google Inc. All Rights Reserved. 3 4 Distributed under MIT license. 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6 */ 7 8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS, 9 NUM_LAST_DISTANCES_TO_CHECK */ 10 11 /* A (forgetful) hash table to the data seen by the compressor, to 12 help create backward references to previous data. 13 14 Hashes are stored in chains which are bucketed to groups. Group of chains 15 share a storage "bank". When more than "bank size" chain nodes are added, 16 oldest nodes are replaced; this way several chains may share a tail. */ 17 18 #define HashForgetfulChain HASHER() 19 20 #define BANK_SIZE (1 << BANK_BITS) 21 22 /* Number of hash buckets. */ 23 #define BUCKET_SIZE (1 << BUCKET_BITS) 24 25 #define CAPPED_CHAINS 0 26 27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } 28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } 29 30 /* HashBytes is the function that chooses the bucket to place the address in.*/ 31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) { 32 const uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; 33 /* The higher bits contain more mixture from the multiplication, 34 so we take our results from there. */ 35 return h >> (32 - BUCKET_BITS); 36 } 37 38 typedef struct FN(Slot) { 39 uint16_t delta; 40 uint16_t next; 41 } FN(Slot); 42 43 typedef struct FN(Bank) { 44 FN(Slot) slots[BANK_SIZE]; 45 } FN(Bank); 46 47 typedef struct HashForgetfulChain { 48 uint32_t addr[BUCKET_SIZE]; 49 uint16_t head[BUCKET_SIZE]; 50 /* Truncated hash used for quick rejection of "distance cache" candidates. */ 51 uint8_t tiny_hash[65536]; 52 FN(Bank) banks[NUM_BANKS]; 53 uint16_t free_slot_idx[NUM_BANKS]; 54 size_t max_hops; 55 } HashForgetfulChain; 56 57 static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) { 58 return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]); 59 } 60 61 static void FN(Initialize)( 62 HasherHandle handle, const BrotliEncoderParams* params) { 63 FN(Self)(handle)->max_hops = 64 (params->quality > 6 ? 7u : 8u) << (params->quality - 4); 65 } 66 67 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, 68 size_t input_size, const uint8_t* data) { 69 HashForgetfulChain* self = FN(Self)(handle); 70 /* Partial preparation is 100 times slower (per socket). */ 71 size_t partial_prepare_threshold = BUCKET_SIZE >> 6; 72 if (one_shot && input_size <= partial_prepare_threshold) { 73 size_t i; 74 for (i = 0; i < input_size; ++i) { 75 size_t bucket = FN(HashBytes)(&data[i]); 76 /* See InitEmpty comment. */ 77 self->addr[bucket] = 0xCCCCCCCC; 78 self->head[bucket] = 0xCCCC; 79 } 80 } else { 81 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position 82 processed by hasher never reaches 3GB + 64M; this makes all new chains 83 to be terminated after the first node. */ 84 memset(self->addr, 0xCC, sizeof(self->addr)); 85 memset(self->head, 0, sizeof(self->head)); 86 } 87 memset(self->tiny_hash, 0, sizeof(self->tiny_hash)); 88 memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx)); 89 } 90 91 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( 92 const BrotliEncoderParams* params, BROTLI_BOOL one_shot, 93 size_t input_size) { 94 BROTLI_UNUSED(params); 95 BROTLI_UNUSED(one_shot); 96 BROTLI_UNUSED(input_size); 97 return sizeof(HashForgetfulChain); 98 } 99 100 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend 101 node to corresponding chain; also update tiny_hash for current position. */ 102 static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, 103 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { 104 HashForgetfulChain* self = FN(Self)(handle); 105 const size_t key = FN(HashBytes)(&data[ix & mask]); 106 const size_t bank = key & (NUM_BANKS - 1); 107 const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1); 108 size_t delta = ix - self->addr[key]; 109 self->tiny_hash[(uint16_t)ix] = (uint8_t)key; 110 if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF; 111 self->banks[bank].slots[idx].delta = (uint16_t)delta; 112 self->banks[bank].slots[idx].next = self->head[key]; 113 self->addr[key] = (uint32_t)ix; 114 self->head[key] = (uint16_t)idx; 115 } 116 117 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, 118 const uint8_t *data, const size_t mask, const size_t ix_start, 119 const size_t ix_end) { 120 size_t i; 121 for (i = ix_start; i < ix_end; ++i) { 122 FN(Store)(handle, data, mask, i); 123 } 124 } 125 126 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, 127 size_t num_bytes, size_t position, const uint8_t* ringbuffer, 128 size_t ring_buffer_mask) { 129 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) { 130 /* Prepare the hashes for three last bytes of the last write. 131 These could not be calculated before, since they require knowledge 132 of both the previous and the current block. */ 133 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3); 134 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2); 135 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1); 136 } 137 } 138 139 static BROTLI_INLINE void FN(PrepareDistanceCache)( 140 HasherHandle handle, int* BROTLI_RESTRICT distance_cache) { 141 BROTLI_UNUSED(handle); 142 PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK); 143 } 144 145 /* Find a longest backward match of &data[cur_ix] up to the length of 146 max_length and stores the position cur_ix in the hash table. 147 148 REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache 149 values; if this method is invoked repeatedly with the same distance 150 cache values, it is enough to invoke FN(PrepareDistanceCache) once. 151 152 Does not look for matches longer than max_length. 153 Does not look for matches further away than max_backward. 154 Writes the best match into |out|. 155 |out|->score is updated only if a better match is found. */ 156 static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, 157 const BrotliDictionary* dictionary, const uint16_t* dictionary_hash, 158 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, 159 const int* BROTLI_RESTRICT distance_cache, 160 const size_t cur_ix, const size_t max_length, const size_t max_backward, 161 const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) { 162 HashForgetfulChain* self = FN(Self)(handle); 163 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 164 /* Don't accept a short copy from far away. */ 165 score_t min_score = out->score; 166 score_t best_score = out->score; 167 size_t best_len = out->len; 168 size_t i; 169 const size_t key = FN(HashBytes)(&data[cur_ix_masked]); 170 const uint8_t tiny_hash = (uint8_t)(key); 171 out->len = 0; 172 out->len_code_delta = 0; 173 /* Try last distance first. */ 174 for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) { 175 const size_t backward = (size_t)distance_cache[i]; 176 size_t prev_ix = (cur_ix - backward); 177 /* For distance code 0 we want to consider 2-byte matches. */ 178 if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue; 179 if (prev_ix >= cur_ix || backward > max_backward) { 180 continue; 181 } 182 prev_ix &= ring_buffer_mask; 183 { 184 const size_t len = FindMatchLengthWithLimit(&data[prev_ix], 185 &data[cur_ix_masked], 186 max_length); 187 if (len >= 2) { 188 score_t score = BackwardReferenceScoreUsingLastDistance(len); 189 if (best_score < score) { 190 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i); 191 if (best_score < score) { 192 best_score = score; 193 best_len = len; 194 out->len = best_len; 195 out->distance = backward; 196 out->score = best_score; 197 } 198 } 199 } 200 } 201 } 202 { 203 const size_t bank = key & (NUM_BANKS - 1); 204 size_t backward = 0; 205 size_t hops = self->max_hops; 206 size_t delta = cur_ix - self->addr[key]; 207 size_t slot = self->head[key]; 208 while (hops--) { 209 size_t prev_ix; 210 size_t last = slot; 211 backward += delta; 212 if (backward > max_backward || (CAPPED_CHAINS && !delta)) break; 213 prev_ix = (cur_ix - backward) & ring_buffer_mask; 214 slot = self->banks[bank].slots[last].next; 215 delta = self->banks[bank].slots[last].delta; 216 if (cur_ix_masked + best_len > ring_buffer_mask || 217 prev_ix + best_len > ring_buffer_mask || 218 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { 219 continue; 220 } 221 { 222 const size_t len = FindMatchLengthWithLimit(&data[prev_ix], 223 &data[cur_ix_masked], 224 max_length); 225 if (len >= 4) { 226 /* Comparing for >= 3 does not change the semantics, but just saves 227 for a few unnecessary binary logarithms in backward reference 228 score, since we are not interested in such short matches. */ 229 score_t score = BackwardReferenceScore(len, backward); 230 if (best_score < score) { 231 best_score = score; 232 best_len = len; 233 out->len = best_len; 234 out->distance = backward; 235 out->score = best_score; 236 } 237 } 238 } 239 } 240 FN(Store)(handle, data, ring_buffer_mask, cur_ix); 241 } 242 if (out->score == min_score) { 243 SearchInStaticDictionary(dictionary, dictionary_hash, 244 handle, &data[cur_ix_masked], max_length, max_backward + gap, out, 245 BROTLI_FALSE); 246 } 247 } 248 249 #undef BANK_SIZE 250 #undef BUCKET_SIZE 251 #undef CAPPED_CHAINS 252 253 #undef HashForgetfulChain 254