1 /* 2 * Binary Tree match finder with 2-, 3-, and 4-byte hashing 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 import org.tukaani.xz.ArrayCache; 14 15 final class BT4 extends LZEncoder { 16 private final Hash234 hash; 17 private final int[] tree; 18 private final Matches matches; 19 private final int depthLimit; 20 21 private final int cyclicSize; 22 private int cyclicPos = -1; 23 private int lzPos; 24 getMemoryUsage(int dictSize)25 static int getMemoryUsage(int dictSize) { 26 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10; 27 } 28 BT4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit, ArrayCache arrayCache)29 BT4(int dictSize, int beforeSizeMin, int readAheadMax, 30 int niceLen, int matchLenMax, int depthLimit, 31 ArrayCache arrayCache) { 32 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax, 33 arrayCache); 34 35 cyclicSize = dictSize + 1; 36 lzPos = cyclicSize; 37 38 hash = new Hash234(dictSize, arrayCache); 39 tree = arrayCache.getIntArray(cyclicSize * 2, false); 40 41 // Substracting 1 because the shortest match that this match 42 // finder can find is 2 bytes, so there's no need to reserve 43 // space for one-byte matches. 44 matches = new Matches(niceLen - 1); 45 46 this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2; 47 } 48 putArraysToCache(ArrayCache arrayCache)49 public void putArraysToCache(ArrayCache arrayCache) { 50 arrayCache.putArray(tree); 51 hash.putArraysToCache(arrayCache); 52 super.putArraysToCache(arrayCache); 53 } 54 movePos()55 private int movePos() { 56 int avail = movePos(niceLen, 4); 57 58 if (avail != 0) { 59 if (++lzPos == Integer.MAX_VALUE) { 60 int normalizationOffset = Integer.MAX_VALUE - cyclicSize; 61 hash.normalize(normalizationOffset); 62 normalize(tree, cyclicSize * 2, normalizationOffset); 63 lzPos -= normalizationOffset; 64 } 65 66 if (++cyclicPos == cyclicSize) 67 cyclicPos = 0; 68 } 69 70 return avail; 71 } 72 getMatches()73 public Matches getMatches() { 74 matches.count = 0; 75 76 int matchLenLimit = matchLenMax; 77 int niceLenLimit = niceLen; 78 int avail = movePos(); 79 80 if (avail < matchLenLimit) { 81 if (avail == 0) 82 return matches; 83 84 matchLenLimit = avail; 85 if (niceLenLimit > avail) 86 niceLenLimit = avail; 87 } 88 89 hash.calcHashes(buf, readPos); 90 int delta2 = lzPos - hash.getHash2Pos(); 91 int delta3 = lzPos - hash.getHash3Pos(); 92 int currentMatch = hash.getHash4Pos(); 93 hash.updateTables(lzPos); 94 95 int lenBest = 0; 96 97 // See if the hash from the first two bytes found a match. 98 // The hashing algorithm guarantees that if the first byte 99 // matches, also the second byte does, so there's no need to 100 // test the second byte. 101 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { 102 lenBest = 2; 103 matches.len[0] = 2; 104 matches.dist[0] = delta2 - 1; 105 matches.count = 1; 106 } 107 108 // See if the hash from the first three bytes found a match that 109 // is different from the match possibly found by the two-byte hash. 110 // Also here the hashing algorithm guarantees that if the first byte 111 // matches, also the next two bytes do. 112 if (delta2 != delta3 && delta3 < cyclicSize 113 && buf[readPos - delta3] == buf[readPos]) { 114 lenBest = 3; 115 matches.dist[matches.count++] = delta3 - 1; 116 delta2 = delta3; 117 } 118 119 // If a match was found, see how long it is. 120 if (matches.count > 0) { 121 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] 122 == buf[readPos + lenBest]) 123 ++lenBest; 124 125 matches.len[matches.count - 1] = lenBest; 126 127 // Return if it is long enough (niceLen or reached the end of 128 // the dictionary). 129 if (lenBest >= niceLenLimit) { 130 skip(niceLenLimit, currentMatch); 131 return matches; 132 } 133 } 134 135 // Long enough match wasn't found so easily. Look for better matches 136 // from the binary tree. 137 if (lenBest < 3) 138 lenBest = 3; 139 140 int depth = depthLimit; 141 142 int ptr0 = (cyclicPos << 1) + 1; 143 int ptr1 = cyclicPos << 1; 144 int len0 = 0; 145 int len1 = 0; 146 147 while (true) { 148 int delta = lzPos - currentMatch; 149 150 // Return if the search depth limit has been reached or 151 // if the distance of the potential match exceeds the 152 // dictionary size. 153 if (depth-- == 0 || delta >= cyclicSize) { 154 tree[ptr0] = 0; 155 tree[ptr1] = 0; 156 return matches; 157 } 158 159 int pair = (cyclicPos - delta 160 + (delta > cyclicPos ? cyclicSize : 0)) << 1; 161 int len = Math.min(len0, len1); 162 163 if (buf[readPos + len - delta] == buf[readPos + len]) { 164 while (++len < matchLenLimit) 165 if (buf[readPos + len - delta] != buf[readPos + len]) 166 break; 167 168 if (len > lenBest) { 169 lenBest = len; 170 matches.len[matches.count] = len; 171 matches.dist[matches.count] = delta - 1; 172 ++matches.count; 173 174 if (len >= niceLenLimit) { 175 tree[ptr1] = tree[pair]; 176 tree[ptr0] = tree[pair + 1]; 177 return matches; 178 } 179 } 180 } 181 182 if ((buf[readPos + len - delta] & 0xFF) 183 < (buf[readPos + len] & 0xFF)) { 184 tree[ptr1] = currentMatch; 185 ptr1 = pair + 1; 186 currentMatch = tree[ptr1]; 187 len1 = len; 188 } else { 189 tree[ptr0] = currentMatch; 190 ptr0 = pair; 191 currentMatch = tree[ptr0]; 192 len0 = len; 193 } 194 } 195 } 196 skip(int niceLenLimit, int currentMatch)197 private void skip(int niceLenLimit, int currentMatch) { 198 int depth = depthLimit; 199 200 int ptr0 = (cyclicPos << 1) + 1; 201 int ptr1 = cyclicPos << 1; 202 int len0 = 0; 203 int len1 = 0; 204 205 while (true) { 206 int delta = lzPos - currentMatch; 207 208 if (depth-- == 0 || delta >= cyclicSize) { 209 tree[ptr0] = 0; 210 tree[ptr1] = 0; 211 return; 212 } 213 214 int pair = (cyclicPos - delta 215 + (delta > cyclicPos ? cyclicSize : 0)) << 1; 216 int len = Math.min(len0, len1); 217 218 if (buf[readPos + len - delta] == buf[readPos + len]) { 219 // No need to look for longer matches than niceLenLimit 220 // because we only are updating the tree, not returning 221 // matches found to the caller. 222 do { 223 if (++len == niceLenLimit) { 224 tree[ptr1] = tree[pair]; 225 tree[ptr0] = tree[pair + 1]; 226 return; 227 } 228 } while (buf[readPos + len - delta] == buf[readPos + len]); 229 } 230 231 if ((buf[readPos + len - delta] & 0xFF) 232 < (buf[readPos + len] & 0xFF)) { 233 tree[ptr1] = currentMatch; 234 ptr1 = pair + 1; 235 currentMatch = tree[ptr1]; 236 len1 = len; 237 } else { 238 tree[ptr0] = currentMatch; 239 ptr0 = pair; 240 currentMatch = tree[ptr0]; 241 len0 = len; 242 } 243 } 244 } 245 skip(int len)246 public void skip(int len) { 247 while (len-- > 0) { 248 int niceLenLimit = niceLen; 249 int avail = movePos(); 250 251 if (avail < niceLenLimit) { 252 if (avail == 0) 253 continue; 254 255 niceLenLimit = avail; 256 } 257 258 hash.calcHashes(buf, readPos); 259 int currentMatch = hash.getHash4Pos(); 260 hash.updateTables(lzPos); 261 262 skip(niceLenLimit, currentMatch); 263 } 264 } 265 } 266