1 /*
2  * Binary Tree match finder with 2-, 3-, and 4-byte hashing
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lz;
12 
13 import org.tukaani.xz.ArrayCache;
14 
15 final class BT4 extends LZEncoder {
16     private final Hash234 hash;
17     private final int[] tree;
18     private final Matches matches;
19     private final int depthLimit;
20 
21     private final int cyclicSize;
22     private int cyclicPos = -1;
23     private int lzPos;
24 
getMemoryUsage(int dictSize)25     static int getMemoryUsage(int dictSize) {
26         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
27     }
28 
BT4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit, ArrayCache arrayCache)29     BT4(int dictSize, int beforeSizeMin, int readAheadMax,
30             int niceLen, int matchLenMax, int depthLimit,
31             ArrayCache arrayCache) {
32         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
33               arrayCache);
34 
35         cyclicSize = dictSize + 1;
36         lzPos = cyclicSize;
37 
38         hash = new Hash234(dictSize, arrayCache);
39         tree = arrayCache.getIntArray(cyclicSize * 2, false);
40 
41         // Substracting 1 because the shortest match that this match
42         // finder can find is 2 bytes, so there's no need to reserve
43         // space for one-byte matches.
44         matches = new Matches(niceLen - 1);
45 
46         this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
47     }
48 
putArraysToCache(ArrayCache arrayCache)49     public void putArraysToCache(ArrayCache arrayCache) {
50         arrayCache.putArray(tree);
51         hash.putArraysToCache(arrayCache);
52         super.putArraysToCache(arrayCache);
53     }
54 
movePos()55     private int movePos() {
56         int avail = movePos(niceLen, 4);
57 
58         if (avail != 0) {
59             if (++lzPos == Integer.MAX_VALUE) {
60                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
61                 hash.normalize(normalizationOffset);
62                 normalize(tree, cyclicSize * 2, normalizationOffset);
63                 lzPos -= normalizationOffset;
64             }
65 
66             if (++cyclicPos == cyclicSize)
67                 cyclicPos = 0;
68         }
69 
70         return avail;
71     }
72 
getMatches()73     public Matches getMatches() {
74         matches.count = 0;
75 
76         int matchLenLimit = matchLenMax;
77         int niceLenLimit = niceLen;
78         int avail = movePos();
79 
80         if (avail < matchLenLimit) {
81             if (avail == 0)
82                 return matches;
83 
84             matchLenLimit = avail;
85             if (niceLenLimit > avail)
86                 niceLenLimit = avail;
87         }
88 
89         hash.calcHashes(buf, readPos);
90         int delta2 = lzPos - hash.getHash2Pos();
91         int delta3 = lzPos - hash.getHash3Pos();
92         int currentMatch = hash.getHash4Pos();
93         hash.updateTables(lzPos);
94 
95         int lenBest = 0;
96 
97         // See if the hash from the first two bytes found a match.
98         // The hashing algorithm guarantees that if the first byte
99         // matches, also the second byte does, so there's no need to
100         // test the second byte.
101         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
102             lenBest = 2;
103             matches.len[0] = 2;
104             matches.dist[0] = delta2 - 1;
105             matches.count = 1;
106         }
107 
108         // See if the hash from the first three bytes found a match that
109         // is different from the match possibly found by the two-byte hash.
110         // Also here the hashing algorithm guarantees that if the first byte
111         // matches, also the next two bytes do.
112         if (delta2 != delta3 && delta3 < cyclicSize
113                 && buf[readPos - delta3] == buf[readPos]) {
114             lenBest = 3;
115             matches.dist[matches.count++] = delta3 - 1;
116             delta2 = delta3;
117         }
118 
119         // If a match was found, see how long it is.
120         if (matches.count > 0) {
121             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
122                                               == buf[readPos + lenBest])
123                 ++lenBest;
124 
125             matches.len[matches.count - 1] = lenBest;
126 
127             // Return if it is long enough (niceLen or reached the end of
128             // the dictionary).
129             if (lenBest >= niceLenLimit) {
130                 skip(niceLenLimit, currentMatch);
131                 return matches;
132             }
133         }
134 
135         // Long enough match wasn't found so easily. Look for better matches
136         // from the binary tree.
137         if (lenBest < 3)
138             lenBest = 3;
139 
140         int depth = depthLimit;
141 
142         int ptr0 = (cyclicPos << 1) + 1;
143         int ptr1 = cyclicPos << 1;
144         int len0 = 0;
145         int len1 = 0;
146 
147         while (true) {
148             int delta = lzPos - currentMatch;
149 
150             // Return if the search depth limit has been reached or
151             // if the distance of the potential match exceeds the
152             // dictionary size.
153             if (depth-- == 0 || delta >= cyclicSize) {
154                 tree[ptr0] = 0;
155                 tree[ptr1] = 0;
156                 return matches;
157             }
158 
159             int pair = (cyclicPos - delta
160                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
161             int len = Math.min(len0, len1);
162 
163             if (buf[readPos + len - delta] == buf[readPos + len]) {
164                 while (++len < matchLenLimit)
165                     if (buf[readPos + len - delta] != buf[readPos + len])
166                         break;
167 
168                 if (len > lenBest) {
169                     lenBest = len;
170                     matches.len[matches.count] = len;
171                     matches.dist[matches.count] = delta - 1;
172                     ++matches.count;
173 
174                     if (len >= niceLenLimit) {
175                         tree[ptr1] = tree[pair];
176                         tree[ptr0] = tree[pair + 1];
177                         return matches;
178                     }
179                 }
180             }
181 
182             if ((buf[readPos + len - delta] & 0xFF)
183                     < (buf[readPos + len] & 0xFF)) {
184                 tree[ptr1] = currentMatch;
185                 ptr1 = pair + 1;
186                 currentMatch = tree[ptr1];
187                 len1 = len;
188             } else {
189                 tree[ptr0] = currentMatch;
190                 ptr0 = pair;
191                 currentMatch = tree[ptr0];
192                 len0 = len;
193             }
194         }
195     }
196 
skip(int niceLenLimit, int currentMatch)197     private void skip(int niceLenLimit, int currentMatch) {
198         int depth = depthLimit;
199 
200         int ptr0 = (cyclicPos << 1) + 1;
201         int ptr1 = cyclicPos << 1;
202         int len0 = 0;
203         int len1 = 0;
204 
205         while (true) {
206             int delta = lzPos - currentMatch;
207 
208             if (depth-- == 0 || delta >= cyclicSize) {
209                 tree[ptr0] = 0;
210                 tree[ptr1] = 0;
211                 return;
212             }
213 
214             int pair = (cyclicPos - delta
215                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
216             int len = Math.min(len0, len1);
217 
218             if (buf[readPos + len - delta] == buf[readPos + len]) {
219                 // No need to look for longer matches than niceLenLimit
220                 // because we only are updating the tree, not returning
221                 // matches found to the caller.
222                 do {
223                     if (++len == niceLenLimit) {
224                         tree[ptr1] = tree[pair];
225                         tree[ptr0] = tree[pair + 1];
226                         return;
227                     }
228                 } while (buf[readPos + len - delta] == buf[readPos + len]);
229             }
230 
231             if ((buf[readPos + len - delta] & 0xFF)
232                     < (buf[readPos + len] & 0xFF)) {
233                 tree[ptr1] = currentMatch;
234                 ptr1 = pair + 1;
235                 currentMatch = tree[ptr1];
236                 len1 = len;
237             } else {
238                 tree[ptr0] = currentMatch;
239                 ptr0 = pair;
240                 currentMatch = tree[ptr0];
241                 len0 = len;
242             }
243         }
244     }
245 
skip(int len)246     public void skip(int len) {
247         while (len-- > 0) {
248             int niceLenLimit = niceLen;
249             int avail = movePos();
250 
251             if (avail < niceLenLimit) {
252                 if (avail == 0)
253                     continue;
254 
255                 niceLenLimit = avail;
256             }
257 
258             hash.calcHashes(buf, readPos);
259             int currentMatch = hash.getHash4Pos();
260             hash.updateTables(lzPos);
261 
262             skip(niceLenLimit, currentMatch);
263         }
264     }
265 }
266