1 /*
2  * Hash Chain match finder with 2-, 3-, and 4-byte hashing
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lz;
12 
13 final class HC4 extends LZEncoder {
14     private final Hash234 hash;
15     private final int[] chain;
16     private final Matches matches;
17     private final int depthLimit;
18 
19     private final int cyclicSize;
20     private int cyclicPos = -1;
21     private int lzPos;
22 
23     /**
24      * Gets approximate memory usage of the match finder as kibibytes.
25      */
getMemoryUsage(int dictSize)26     static int getMemoryUsage(int dictSize) {
27         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
28     }
29 
30     /**
31      * Creates a new LZEncoder with the HC4 match finder.
32      * See <code>LZEncoder.getInstance</code> for parameter descriptions.
33      */
HC4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit)34     HC4(int dictSize, int beforeSizeMin, int readAheadMax,
35             int niceLen, int matchLenMax, int depthLimit) {
36         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
37 
38         hash = new Hash234(dictSize);
39 
40         // +1 because we need dictSize bytes of history + the current byte.
41         cyclicSize = dictSize + 1;
42         chain = new int[cyclicSize];
43         lzPos = cyclicSize;
44 
45         // Substracting 1 because the shortest match that this match
46         // finder can find is 2 bytes, so there's no need to reserve
47         // space for one-byte matches.
48         matches = new Matches(niceLen - 1);
49 
50         // Use a default depth limit if no other value was specified.
51         // The default is just something based on experimentation;
52         // it's nothing magic.
53         this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
54     }
55 
56     /**
57      * Moves to the next byte, checks that there is enough available space,
58      * and possibly normalizes the hash tables and the hash chain.
59      *
60      * @return      number of bytes available, including the current byte
61      */
movePos()62     private int movePos() {
63         int avail = movePos(4, 4);
64 
65         if (avail != 0) {
66             if (++lzPos == Integer.MAX_VALUE) {
67                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
68                 hash.normalize(normalizationOffset);
69                 normalize(chain, normalizationOffset);
70                 lzPos -= normalizationOffset;
71             }
72 
73             if (++cyclicPos == cyclicSize)
74                 cyclicPos = 0;
75         }
76 
77         return avail;
78     }
79 
getMatches()80     public Matches getMatches() {
81         matches.count = 0;
82         int matchLenLimit = matchLenMax;
83         int niceLenLimit = niceLen;
84         int avail = movePos();
85 
86         if (avail < matchLenLimit) {
87             if (avail == 0)
88                 return matches;
89 
90             matchLenLimit = avail;
91             if (niceLenLimit > avail)
92                 niceLenLimit = avail;
93         }
94 
95         hash.calcHashes(buf, readPos);
96         int delta2 = lzPos - hash.getHash2Pos();
97         int delta3 = lzPos - hash.getHash3Pos();
98         int currentMatch = hash.getHash4Pos();
99         hash.updateTables(lzPos);
100 
101         chain[cyclicPos] = currentMatch;
102 
103         int lenBest = 0;
104 
105         // See if the hash from the first two bytes found a match.
106         // The hashing algorithm guarantees that if the first byte
107         // matches, also the second byte does, so there's no need to
108         // test the second byte.
109         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
110             lenBest = 2;
111             matches.len[0] = 2;
112             matches.dist[0] = delta2 - 1;
113             matches.count = 1;
114         }
115 
116         // See if the hash from the first three bytes found a match that
117         // is different from the match possibly found by the two-byte hash.
118         // Also here the hashing algorithm guarantees that if the first byte
119         // matches, also the next two bytes do.
120         if (delta2 != delta3 && delta3 < cyclicSize
121                 && buf[readPos - delta3] == buf[readPos]) {
122             lenBest = 3;
123             matches.dist[matches.count++] = delta3 - 1;
124             delta2 = delta3;
125         }
126 
127         // If a match was found, see how long it is.
128         if (matches.count > 0) {
129             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
130                                               == buf[readPos + lenBest])
131                 ++lenBest;
132 
133             matches.len[matches.count - 1] = lenBest;
134 
135             // Return if it is long enough (niceLen or reached the end of
136             // the dictionary).
137             if (lenBest >= niceLenLimit)
138                 return matches;
139         }
140 
141         // Long enough match wasn't found so easily. Look for better matches
142         // from the hash chain.
143         if (lenBest < 3)
144             lenBest = 3;
145 
146         int depth = depthLimit;
147 
148         while (true) {
149             int delta = lzPos - currentMatch;
150 
151             // Return if the search depth limit has been reached or
152             // if the distance of the potential match exceeds the
153             // dictionary size.
154             if (depth-- == 0 || delta >= cyclicSize)
155                 return matches;
156 
157             currentMatch = chain[cyclicPos - delta
158                                  + (delta > cyclicPos ? cyclicSize : 0)];
159 
160             // Test the first byte and the first new byte that would give us
161             // a match that is at least one byte longer than lenBest. This
162             // too short matches get quickly skipped.
163             if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
164                     && buf[readPos - delta] == buf[readPos]) {
165                 // Calculate the length of the match.
166                 int len = 0;
167                 while (++len < matchLenLimit)
168                     if (buf[readPos + len - delta] != buf[readPos + len])
169                         break;
170 
171                 // Use the match if and only if it is better than the longest
172                 // match found so far.
173                 if (len > lenBest) {
174                     lenBest = len;
175                     matches.len[matches.count] = len;
176                     matches.dist[matches.count] = delta - 1;
177                     ++matches.count;
178 
179                     // Return if it is long enough (niceLen or reached the
180                     // end of the dictionary).
181                     if (len >= niceLenLimit)
182                         return matches;
183                 }
184             }
185         }
186     }
187 
skip(int len)188     public void skip(int len) {
189         assert len >= 0;
190 
191         while (len-- > 0) {
192             if (movePos() != 0) {
193                 // Update the hash chain and hash tables.
194                 hash.calcHashes(buf, readPos);
195                 chain[cyclicPos] = hash.getHash4Pos();
196                 hash.updateTables(lzPos);
197             }
198         }
199     }
200 }
201