1 /*
2  * Hash Chain match finder with 2-, 3-, and 4-byte hashing
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lz;
12 
13 import org.tukaani.xz.ArrayCache;
14 
15 final class HC4 extends LZEncoder {
16     private final Hash234 hash;
17     private final int[] chain;
18     private final Matches matches;
19     private final int depthLimit;
20 
21     private final int cyclicSize;
22     private int cyclicPos = -1;
23     private int lzPos;
24 
25     /**
26      * Gets approximate memory usage of the match finder as kibibytes.
27      */
getMemoryUsage(int dictSize)28     static int getMemoryUsage(int dictSize) {
29         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
30     }
31 
32     /**
33      * Creates a new LZEncoder with the HC4 match finder.
34      * See <code>LZEncoder.getInstance</code> for parameter descriptions.
35      */
HC4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit, ArrayCache arrayCache)36     HC4(int dictSize, int beforeSizeMin, int readAheadMax,
37             int niceLen, int matchLenMax, int depthLimit,
38             ArrayCache arrayCache) {
39         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
40               arrayCache);
41 
42         hash = new Hash234(dictSize, arrayCache);
43 
44         // +1 because we need dictSize bytes of history + the current byte.
45         cyclicSize = dictSize + 1;
46         chain = arrayCache.getIntArray(cyclicSize, false);
47         lzPos = cyclicSize;
48 
49         // Substracting 1 because the shortest match that this match
50         // finder can find is 2 bytes, so there's no need to reserve
51         // space for one-byte matches.
52         matches = new Matches(niceLen - 1);
53 
54         // Use a default depth limit if no other value was specified.
55         // The default is just something based on experimentation;
56         // it's nothing magic.
57         this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
58     }
59 
putArraysToCache(ArrayCache arrayCache)60     public void putArraysToCache(ArrayCache arrayCache) {
61         arrayCache.putArray(chain);
62         hash.putArraysToCache(arrayCache);
63         super.putArraysToCache(arrayCache);
64     }
65 
66     /**
67      * Moves to the next byte, checks that there is enough available space,
68      * and possibly normalizes the hash tables and the hash chain.
69      *
70      * @return      number of bytes available, including the current byte
71      */
movePos()72     private int movePos() {
73         int avail = movePos(4, 4);
74 
75         if (avail != 0) {
76             if (++lzPos == Integer.MAX_VALUE) {
77                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
78                 hash.normalize(normalizationOffset);
79                 normalize(chain, cyclicSize, normalizationOffset);
80                 lzPos -= normalizationOffset;
81             }
82 
83             if (++cyclicPos == cyclicSize)
84                 cyclicPos = 0;
85         }
86 
87         return avail;
88     }
89 
getMatches()90     public Matches getMatches() {
91         matches.count = 0;
92         int matchLenLimit = matchLenMax;
93         int niceLenLimit = niceLen;
94         int avail = movePos();
95 
96         if (avail < matchLenLimit) {
97             if (avail == 0)
98                 return matches;
99 
100             matchLenLimit = avail;
101             if (niceLenLimit > avail)
102                 niceLenLimit = avail;
103         }
104 
105         hash.calcHashes(buf, readPos);
106         int delta2 = lzPos - hash.getHash2Pos();
107         int delta3 = lzPos - hash.getHash3Pos();
108         int currentMatch = hash.getHash4Pos();
109         hash.updateTables(lzPos);
110 
111         chain[cyclicPos] = currentMatch;
112 
113         int lenBest = 0;
114 
115         // See if the hash from the first two bytes found a match.
116         // The hashing algorithm guarantees that if the first byte
117         // matches, also the second byte does, so there's no need to
118         // test the second byte.
119         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
120             lenBest = 2;
121             matches.len[0] = 2;
122             matches.dist[0] = delta2 - 1;
123             matches.count = 1;
124         }
125 
126         // See if the hash from the first three bytes found a match that
127         // is different from the match possibly found by the two-byte hash.
128         // Also here the hashing algorithm guarantees that if the first byte
129         // matches, also the next two bytes do.
130         if (delta2 != delta3 && delta3 < cyclicSize
131                 && buf[readPos - delta3] == buf[readPos]) {
132             lenBest = 3;
133             matches.dist[matches.count++] = delta3 - 1;
134             delta2 = delta3;
135         }
136 
137         // If a match was found, see how long it is.
138         if (matches.count > 0) {
139             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
140                                               == buf[readPos + lenBest])
141                 ++lenBest;
142 
143             matches.len[matches.count - 1] = lenBest;
144 
145             // Return if it is long enough (niceLen or reached the end of
146             // the dictionary).
147             if (lenBest >= niceLenLimit)
148                 return matches;
149         }
150 
151         // Long enough match wasn't found so easily. Look for better matches
152         // from the hash chain.
153         if (lenBest < 3)
154             lenBest = 3;
155 
156         int depth = depthLimit;
157 
158         while (true) {
159             int delta = lzPos - currentMatch;
160 
161             // Return if the search depth limit has been reached or
162             // if the distance of the potential match exceeds the
163             // dictionary size.
164             if (depth-- == 0 || delta >= cyclicSize)
165                 return matches;
166 
167             currentMatch = chain[cyclicPos - delta
168                                  + (delta > cyclicPos ? cyclicSize : 0)];
169 
170             // Test the first byte and the first new byte that would give us
171             // a match that is at least one byte longer than lenBest. This
172             // too short matches get quickly skipped.
173             if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
174                     && buf[readPos - delta] == buf[readPos]) {
175                 // Calculate the length of the match.
176                 int len = 0;
177                 while (++len < matchLenLimit)
178                     if (buf[readPos + len - delta] != buf[readPos + len])
179                         break;
180 
181                 // Use the match if and only if it is better than the longest
182                 // match found so far.
183                 if (len > lenBest) {
184                     lenBest = len;
185                     matches.len[matches.count] = len;
186                     matches.dist[matches.count] = delta - 1;
187                     ++matches.count;
188 
189                     // Return if it is long enough (niceLen or reached the
190                     // end of the dictionary).
191                     if (len >= niceLenLimit)
192                         return matches;
193                 }
194             }
195         }
196     }
197 
skip(int len)198     public void skip(int len) {
199         assert len >= 0;
200 
201         while (len-- > 0) {
202             if (movePos() != 0) {
203                 // Update the hash chain and hash tables.
204                 hash.calcHashes(buf, readPos);
205                 chain[cyclicPos] = hash.getHash4Pos();
206                 hash.updateTables(lzPos);
207             }
208         }
209     }
210 }
211