1 /*
2  * Binary Tree match finder with 2-, 3-, and 4-byte hashing
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lz;
12 
13 final class BT4 extends LZEncoder {
14     private final Hash234 hash;
15     private final int[] tree;
16     private final Matches matches;
17     private final int depthLimit;
18 
19     private final int cyclicSize;
20     private int cyclicPos = -1;
21     private int lzPos;
22 
getMemoryUsage(int dictSize)23     static int getMemoryUsage(int dictSize) {
24         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
25     }
26 
BT4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit)27     BT4(int dictSize, int beforeSizeMin, int readAheadMax,
28             int niceLen, int matchLenMax, int depthLimit) {
29         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
30 
31         cyclicSize = dictSize + 1;
32         lzPos = cyclicSize;
33 
34         hash = new Hash234(dictSize);
35         tree = new int[cyclicSize * 2];
36 
37         // Substracting 1 because the shortest match that this match
38         // finder can find is 2 bytes, so there's no need to reserve
39         // space for one-byte matches.
40         matches = new Matches(niceLen - 1);
41 
42         this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
43     }
44 
movePos()45     private int movePos() {
46         int avail = movePos(niceLen, 4);
47 
48         if (avail != 0) {
49             if (++lzPos == Integer.MAX_VALUE) {
50                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
51                 hash.normalize(normalizationOffset);
52                 normalize(tree, normalizationOffset);
53                 lzPos -= normalizationOffset;
54             }
55 
56             if (++cyclicPos == cyclicSize)
57                 cyclicPos = 0;
58         }
59 
60         return avail;
61     }
62 
getMatches()63     public Matches getMatches() {
64         matches.count = 0;
65 
66         int matchLenLimit = matchLenMax;
67         int niceLenLimit = niceLen;
68         int avail = movePos();
69 
70         if (avail < matchLenLimit) {
71             if (avail == 0)
72                 return matches;
73 
74             matchLenLimit = avail;
75             if (niceLenLimit > avail)
76                 niceLenLimit = avail;
77         }
78 
79         hash.calcHashes(buf, readPos);
80         int delta2 = lzPos - hash.getHash2Pos();
81         int delta3 = lzPos - hash.getHash3Pos();
82         int currentMatch = hash.getHash4Pos();
83         hash.updateTables(lzPos);
84 
85         int lenBest = 0;
86 
87         // See if the hash from the first two bytes found a match.
88         // The hashing algorithm guarantees that if the first byte
89         // matches, also the second byte does, so there's no need to
90         // test the second byte.
91         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
92             lenBest = 2;
93             matches.len[0] = 2;
94             matches.dist[0] = delta2 - 1;
95             matches.count = 1;
96         }
97 
98         // See if the hash from the first three bytes found a match that
99         // is different from the match possibly found by the two-byte hash.
100         // Also here the hashing algorithm guarantees that if the first byte
101         // matches, also the next two bytes do.
102         if (delta2 != delta3 && delta3 < cyclicSize
103                 && buf[readPos - delta3] == buf[readPos]) {
104             lenBest = 3;
105             matches.dist[matches.count++] = delta3 - 1;
106             delta2 = delta3;
107         }
108 
109         // If a match was found, see how long it is.
110         if (matches.count > 0) {
111             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
112                                               == buf[readPos + lenBest])
113                 ++lenBest;
114 
115             matches.len[matches.count - 1] = lenBest;
116 
117             // Return if it is long enough (niceLen or reached the end of
118             // the dictionary).
119             if (lenBest >= niceLenLimit) {
120                 skip(niceLenLimit, currentMatch);
121                 return matches;
122             }
123         }
124 
125         // Long enough match wasn't found so easily. Look for better matches
126         // from the binary tree.
127         if (lenBest < 3)
128             lenBest = 3;
129 
130         int depth = depthLimit;
131 
132         int ptr0 = (cyclicPos << 1) + 1;
133         int ptr1 = cyclicPos << 1;
134         int len0 = 0;
135         int len1 = 0;
136 
137         while (true) {
138             int delta = lzPos - currentMatch;
139 
140             // Return if the search depth limit has been reached or
141             // if the distance of the potential match exceeds the
142             // dictionary size.
143             if (depth-- == 0 || delta >= cyclicSize) {
144                 tree[ptr0] = 0;
145                 tree[ptr1] = 0;
146                 return matches;
147             }
148 
149             int pair = (cyclicPos - delta
150                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
151             int len = Math.min(len0, len1);
152 
153             if (buf[readPos + len - delta] == buf[readPos + len]) {
154                 while (++len < matchLenLimit)
155                     if (buf[readPos + len - delta] != buf[readPos + len])
156                         break;
157 
158                 if (len > lenBest) {
159                     lenBest = len;
160                     matches.len[matches.count] = len;
161                     matches.dist[matches.count] = delta - 1;
162                     ++matches.count;
163 
164                     if (len >= niceLenLimit) {
165                         tree[ptr1] = tree[pair];
166                         tree[ptr0] = tree[pair + 1];
167                         return matches;
168                     }
169                 }
170             }
171 
172             if ((buf[readPos + len - delta] & 0xFF)
173                     < (buf[readPos + len] & 0xFF)) {
174                 tree[ptr1] = currentMatch;
175                 ptr1 = pair + 1;
176                 currentMatch = tree[ptr1];
177                 len1 = len;
178             } else {
179                 tree[ptr0] = currentMatch;
180                 ptr0 = pair;
181                 currentMatch = tree[ptr0];
182                 len0 = len;
183             }
184         }
185     }
186 
skip(int niceLenLimit, int currentMatch)187     private void skip(int niceLenLimit, int currentMatch) {
188         int depth = depthLimit;
189 
190         int ptr0 = (cyclicPos << 1) + 1;
191         int ptr1 = cyclicPos << 1;
192         int len0 = 0;
193         int len1 = 0;
194 
195         while (true) {
196             int delta = lzPos - currentMatch;
197 
198             if (depth-- == 0 || delta >= cyclicSize) {
199                 tree[ptr0] = 0;
200                 tree[ptr1] = 0;
201                 return;
202             }
203 
204             int pair = (cyclicPos - delta
205                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
206             int len = Math.min(len0, len1);
207 
208             if (buf[readPos + len - delta] == buf[readPos + len]) {
209                 // No need to look for longer matches than niceLenLimit
210                 // because we only are updating the tree, not returning
211                 // matches found to the caller.
212                 do {
213                     if (++len == niceLenLimit) {
214                         tree[ptr1] = tree[pair];
215                         tree[ptr0] = tree[pair + 1];
216                         return;
217                     }
218                 } while (buf[readPos + len - delta] == buf[readPos + len]);
219             }
220 
221             if ((buf[readPos + len - delta] & 0xFF)
222                     < (buf[readPos + len] & 0xFF)) {
223                 tree[ptr1] = currentMatch;
224                 ptr1 = pair + 1;
225                 currentMatch = tree[ptr1];
226                 len1 = len;
227             } else {
228                 tree[ptr0] = currentMatch;
229                 ptr0 = pair;
230                 currentMatch = tree[ptr0];
231                 len0 = len;
232             }
233         }
234     }
235 
skip(int len)236     public void skip(int len) {
237         while (len-- > 0) {
238             int niceLenLimit = niceLen;
239             int avail = movePos();
240 
241             if (avail < niceLenLimit) {
242                 if (avail == 0)
243                     continue;
244 
245                 niceLenLimit = avail;
246             }
247 
248             hash.calcHashes(buf, readPos);
249             int currentMatch = hash.getHash4Pos();
250             hash.updateTables(lzPos);
251 
252             skip(niceLenLimit, currentMatch);
253         }
254     }
255 }
256