1 /* 2 * Binary Tree match finder with 2-, 3-, and 4-byte hashing 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 final class BT4 extends LZEncoder { 14 private final Hash234 hash; 15 private final int[] tree; 16 private final Matches matches; 17 private final int depthLimit; 18 19 private final int cyclicSize; 20 private int cyclicPos = -1; 21 private int lzPos; 22 getMemoryUsage(int dictSize)23 static int getMemoryUsage(int dictSize) { 24 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10; 25 } 26 BT4(int dictSize, int beforeSizeMin, int readAheadMax, int niceLen, int matchLenMax, int depthLimit)27 BT4(int dictSize, int beforeSizeMin, int readAheadMax, 28 int niceLen, int matchLenMax, int depthLimit) { 29 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); 30 31 cyclicSize = dictSize + 1; 32 lzPos = cyclicSize; 33 34 hash = new Hash234(dictSize); 35 tree = new int[cyclicSize * 2]; 36 37 // Substracting 1 because the shortest match that this match 38 // finder can find is 2 bytes, so there's no need to reserve 39 // space for one-byte matches. 40 matches = new Matches(niceLen - 1); 41 42 this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2; 43 } 44 movePos()45 private int movePos() { 46 int avail = movePos(niceLen, 4); 47 48 if (avail != 0) { 49 if (++lzPos == Integer.MAX_VALUE) { 50 int normalizationOffset = Integer.MAX_VALUE - cyclicSize; 51 hash.normalize(normalizationOffset); 52 normalize(tree, normalizationOffset); 53 lzPos -= normalizationOffset; 54 } 55 56 if (++cyclicPos == cyclicSize) 57 cyclicPos = 0; 58 } 59 60 return avail; 61 } 62 getMatches()63 public Matches getMatches() { 64 matches.count = 0; 65 66 int matchLenLimit = matchLenMax; 67 int niceLenLimit = niceLen; 68 int avail = movePos(); 69 70 if (avail < matchLenLimit) { 71 if (avail == 0) 72 return matches; 73 74 matchLenLimit = avail; 75 if (niceLenLimit > avail) 76 niceLenLimit = avail; 77 } 78 79 hash.calcHashes(buf, readPos); 80 int delta2 = lzPos - hash.getHash2Pos(); 81 int delta3 = lzPos - hash.getHash3Pos(); 82 int currentMatch = hash.getHash4Pos(); 83 hash.updateTables(lzPos); 84 85 int lenBest = 0; 86 87 // See if the hash from the first two bytes found a match. 88 // The hashing algorithm guarantees that if the first byte 89 // matches, also the second byte does, so there's no need to 90 // test the second byte. 91 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { 92 lenBest = 2; 93 matches.len[0] = 2; 94 matches.dist[0] = delta2 - 1; 95 matches.count = 1; 96 } 97 98 // See if the hash from the first three bytes found a match that 99 // is different from the match possibly found by the two-byte hash. 100 // Also here the hashing algorithm guarantees that if the first byte 101 // matches, also the next two bytes do. 102 if (delta2 != delta3 && delta3 < cyclicSize 103 && buf[readPos - delta3] == buf[readPos]) { 104 lenBest = 3; 105 matches.dist[matches.count++] = delta3 - 1; 106 delta2 = delta3; 107 } 108 109 // If a match was found, see how long it is. 110 if (matches.count > 0) { 111 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] 112 == buf[readPos + lenBest]) 113 ++lenBest; 114 115 matches.len[matches.count - 1] = lenBest; 116 117 // Return if it is long enough (niceLen or reached the end of 118 // the dictionary). 119 if (lenBest >= niceLenLimit) { 120 skip(niceLenLimit, currentMatch); 121 return matches; 122 } 123 } 124 125 // Long enough match wasn't found so easily. Look for better matches 126 // from the binary tree. 127 if (lenBest < 3) 128 lenBest = 3; 129 130 int depth = depthLimit; 131 132 int ptr0 = (cyclicPos << 1) + 1; 133 int ptr1 = cyclicPos << 1; 134 int len0 = 0; 135 int len1 = 0; 136 137 while (true) { 138 int delta = lzPos - currentMatch; 139 140 // Return if the search depth limit has been reached or 141 // if the distance of the potential match exceeds the 142 // dictionary size. 143 if (depth-- == 0 || delta >= cyclicSize) { 144 tree[ptr0] = 0; 145 tree[ptr1] = 0; 146 return matches; 147 } 148 149 int pair = (cyclicPos - delta 150 + (delta > cyclicPos ? cyclicSize : 0)) << 1; 151 int len = Math.min(len0, len1); 152 153 if (buf[readPos + len - delta] == buf[readPos + len]) { 154 while (++len < matchLenLimit) 155 if (buf[readPos + len - delta] != buf[readPos + len]) 156 break; 157 158 if (len > lenBest) { 159 lenBest = len; 160 matches.len[matches.count] = len; 161 matches.dist[matches.count] = delta - 1; 162 ++matches.count; 163 164 if (len >= niceLenLimit) { 165 tree[ptr1] = tree[pair]; 166 tree[ptr0] = tree[pair + 1]; 167 return matches; 168 } 169 } 170 } 171 172 if ((buf[readPos + len - delta] & 0xFF) 173 < (buf[readPos + len] & 0xFF)) { 174 tree[ptr1] = currentMatch; 175 ptr1 = pair + 1; 176 currentMatch = tree[ptr1]; 177 len1 = len; 178 } else { 179 tree[ptr0] = currentMatch; 180 ptr0 = pair; 181 currentMatch = tree[ptr0]; 182 len0 = len; 183 } 184 } 185 } 186 skip(int niceLenLimit, int currentMatch)187 private void skip(int niceLenLimit, int currentMatch) { 188 int depth = depthLimit; 189 190 int ptr0 = (cyclicPos << 1) + 1; 191 int ptr1 = cyclicPos << 1; 192 int len0 = 0; 193 int len1 = 0; 194 195 while (true) { 196 int delta = lzPos - currentMatch; 197 198 if (depth-- == 0 || delta >= cyclicSize) { 199 tree[ptr0] = 0; 200 tree[ptr1] = 0; 201 return; 202 } 203 204 int pair = (cyclicPos - delta 205 + (delta > cyclicPos ? cyclicSize : 0)) << 1; 206 int len = Math.min(len0, len1); 207 208 if (buf[readPos + len - delta] == buf[readPos + len]) { 209 // No need to look for longer matches than niceLenLimit 210 // because we only are updating the tree, not returning 211 // matches found to the caller. 212 do { 213 if (++len == niceLenLimit) { 214 tree[ptr1] = tree[pair]; 215 tree[ptr0] = tree[pair + 1]; 216 return; 217 } 218 } while (buf[readPos + len - delta] == buf[readPos + len]); 219 } 220 221 if ((buf[readPos + len - delta] & 0xFF) 222 < (buf[readPos + len] & 0xFF)) { 223 tree[ptr1] = currentMatch; 224 ptr1 = pair + 1; 225 currentMatch = tree[ptr1]; 226 len1 = len; 227 } else { 228 tree[ptr0] = currentMatch; 229 ptr0 = pair; 230 currentMatch = tree[ptr0]; 231 len0 = len; 232 } 233 } 234 } 235 skip(int len)236 public void skip(int len) { 237 while (len-- > 0) { 238 int niceLenLimit = niceLen; 239 int avail = movePos(); 240 241 if (avail < niceLenLimit) { 242 if (avail == 0) 243 continue; 244 245 niceLenLimit = avail; 246 } 247 248 hash.calcHashes(buf, readPos); 249 int currentMatch = hash.getHash4Pos(); 250 hash.updateTables(lzPos); 251 252 skip(niceLenLimit, currentMatch); 253 } 254 } 255 } 256