1 /* 2 * LZMAEncoderNormal 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lzma; 12 13 import org.tukaani.xz.ArrayCache; 14 import org.tukaani.xz.lz.LZEncoder; 15 import org.tukaani.xz.lz.Matches; 16 import org.tukaani.xz.rangecoder.RangeEncoder; 17 18 final class LZMAEncoderNormal extends LZMAEncoder { 19 private static final int OPTS = 4096; 20 21 private static final int EXTRA_SIZE_BEFORE = OPTS; 22 private static final int EXTRA_SIZE_AFTER = OPTS; 23 24 private final Optimum[] opts = new Optimum[OPTS]; 25 private int optCur = 0; 26 private int optEnd = 0; 27 28 private Matches matches; 29 30 // These are fields solely to avoid allocating the objects again and 31 // again on each function call. 32 private final int[] repLens = new int[REPS]; 33 private final State nextState = new State(); 34 getMemoryUsage(int dictSize, int extraSizeBefore, int mf)35 static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { 36 return LZEncoder.getMemoryUsage(dictSize, 37 Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), 38 EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) 39 + OPTS * 64 / 1024; 40 } 41 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, int dictSize, int extraSizeBefore, int niceLen, int mf, int depthLimit, ArrayCache arrayCache)42 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, 43 int dictSize, int extraSizeBefore, 44 int niceLen, int mf, int depthLimit, 45 ArrayCache arrayCache) { 46 super(rc, LZEncoder.getInstance(dictSize, 47 Math.max(extraSizeBefore, 48 EXTRA_SIZE_BEFORE), 49 EXTRA_SIZE_AFTER, 50 niceLen, MATCH_LEN_MAX, 51 mf, depthLimit, arrayCache), 52 lc, lp, pb, dictSize, niceLen); 53 54 for (int i = 0; i < OPTS; ++i) 55 opts[i] = new Optimum(); 56 } 57 reset()58 public void reset() { 59 optCur = 0; 60 optEnd = 0; 61 super.reset(); 62 } 63 64 /** 65 * Converts the opts array from backward indexes to forward indexes. 66 * Then it will be simple to get the next symbol from the array 67 * in later calls to <code>getNextSymbol()</code>. 68 */ convertOpts()69 private int convertOpts() { 70 optEnd = optCur; 71 72 int optPrev = opts[optCur].optPrev; 73 74 do { 75 Optimum opt = opts[optCur]; 76 77 if (opt.prev1IsLiteral) { 78 opts[optPrev].optPrev = optCur; 79 opts[optPrev].backPrev = -1; 80 optCur = optPrev--; 81 82 if (opt.hasPrev2) { 83 opts[optPrev].optPrev = optPrev + 1; 84 opts[optPrev].backPrev = opt.backPrev2; 85 optCur = optPrev; 86 optPrev = opt.optPrev2; 87 } 88 } 89 90 int temp = opts[optPrev].optPrev; 91 opts[optPrev].optPrev = optCur; 92 optCur = optPrev; 93 optPrev = temp; 94 } while (optCur > 0); 95 96 optCur = opts[0].optPrev; 97 back = opts[optCur].backPrev; 98 return optCur; 99 } 100 getNextSymbol()101 int getNextSymbol() { 102 // If there are pending symbols from an earlier call to this 103 // function, return those symbols first. 104 if (optCur < optEnd) { 105 int len = opts[optCur].optPrev - optCur; 106 optCur = opts[optCur].optPrev; 107 back = opts[optCur].backPrev; 108 return len; 109 } 110 111 assert optCur == optEnd; 112 optCur = 0; 113 optEnd = 0; 114 back = -1; 115 116 if (readAhead == -1) 117 matches = getMatches(); 118 119 // Get the number of bytes available in the dictionary, but 120 // not more than the maximum match length. If there aren't 121 // enough bytes remaining to encode a match at all, return 122 // immediately to encode this byte as a literal. 123 int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); 124 if (avail < MATCH_LEN_MIN) 125 return 1; 126 127 // Get the lengths of repeated matches. 128 int repBest = 0; 129 for (int rep = 0; rep < REPS; ++rep) { 130 repLens[rep] = lz.getMatchLen(reps[rep], avail); 131 132 if (repLens[rep] < MATCH_LEN_MIN) { 133 repLens[rep] = 0; 134 continue; 135 } 136 137 if (repLens[rep] > repLens[repBest]) 138 repBest = rep; 139 } 140 141 // Return if the best repeated match is at least niceLen bytes long. 142 if (repLens[repBest] >= niceLen) { 143 back = repBest; 144 skip(repLens[repBest] - 1); 145 return repLens[repBest]; 146 } 147 148 // Initialize mainLen and mainDist to the longest match found 149 // by the match finder. 150 int mainLen = 0; 151 int mainDist = 0; 152 if (matches.count > 0) { 153 mainLen = matches.len[matches.count - 1]; 154 mainDist = matches.dist[matches.count - 1]; 155 156 // Return if it is at least niceLen bytes long. 157 if (mainLen >= niceLen) { 158 back = mainDist + REPS; 159 skip(mainLen - 1); 160 return mainLen; 161 } 162 } 163 164 int curByte = lz.getByte(0); 165 int matchByte = lz.getByte(reps[0] + 1); 166 167 // If the match finder found no matches and this byte cannot be 168 // encoded as a repeated match (short or long), we must be return 169 // to have the byte encoded as a literal. 170 if (mainLen < MATCH_LEN_MIN && curByte != matchByte 171 && repLens[repBest] < MATCH_LEN_MIN) 172 return 1; 173 174 175 int pos = lz.getPos(); 176 int posState = pos & posMask; 177 178 // Calculate the price of encoding the current byte as a literal. 179 { 180 int prevByte = lz.getByte(1); 181 int literalPrice = literalEncoder.getPrice(curByte, matchByte, 182 prevByte, pos, state); 183 opts[1].set1(literalPrice, 0, -1); 184 } 185 186 int anyMatchPrice = getAnyMatchPrice(state, posState); 187 int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); 188 189 // If it is possible to encode this byte as a short rep, see if 190 // it is cheaper than encoding it as a literal. 191 if (matchByte == curByte) { 192 int shortRepPrice = getShortRepPrice(anyRepPrice, 193 state, posState); 194 if (shortRepPrice < opts[1].price) 195 opts[1].set1(shortRepPrice, 0, 0); 196 } 197 198 // Return if there is neither normal nor long repeated match. Use 199 // a short match instead of a literal if is is possible and cheaper. 200 optEnd = Math.max(mainLen, repLens[repBest]); 201 if (optEnd < MATCH_LEN_MIN) { 202 assert optEnd == 0 : optEnd; 203 back = opts[1].backPrev; 204 return 1; 205 } 206 207 208 // Update the lookup tables for distances and lengths before using 209 // those price calculation functions. (The price function above 210 // don't need these tables.) 211 updatePrices(); 212 213 // Initialize the state and reps of this position in opts[]. 214 // updateOptStateAndReps() will need these to get the new 215 // state and reps for the next byte. 216 opts[0].state.set(state); 217 System.arraycopy(reps, 0, opts[0].reps, 0, REPS); 218 219 // Initialize the prices for latter opts that will be used below. 220 for (int i = optEnd; i >= MATCH_LEN_MIN; --i) 221 opts[i].reset(); 222 223 // Calculate the prices of repeated matches of all lengths. 224 for (int rep = 0; rep < REPS; ++rep) { 225 int repLen = repLens[rep]; 226 if (repLen < MATCH_LEN_MIN) 227 continue; 228 229 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 230 state, posState); 231 do { 232 int price = longRepPrice + repLenEncoder.getPrice(repLen, 233 posState); 234 if (price < opts[repLen].price) 235 opts[repLen].set1(price, 0, rep); 236 } while (--repLen >= MATCH_LEN_MIN); 237 } 238 239 // Calculate the prices of normal matches that are longer than rep0. 240 { 241 int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); 242 if (len <= mainLen) { 243 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 244 state); 245 246 // Set i to the index of the shortest match that is 247 // at least len bytes long. 248 int i = 0; 249 while (len > matches.len[i]) 250 ++i; 251 252 while (true) { 253 int dist = matches.dist[i]; 254 int price = getMatchAndLenPrice(normalMatchPrice, 255 dist, len, posState); 256 if (price < opts[len].price) 257 opts[len].set1(price, 0, dist + REPS); 258 259 if (len == matches.len[i]) 260 if (++i == matches.count) 261 break; 262 263 ++len; 264 } 265 } 266 } 267 268 269 avail = Math.min(lz.getAvail(), OPTS - 1); 270 271 // Get matches for later bytes and optimize the use of LZMA symbols 272 // by calculating the prices and picking the cheapest symbol 273 // combinations. 274 while (++optCur < optEnd) { 275 matches = getMatches(); 276 if (matches.count > 0 277 && matches.len[matches.count - 1] >= niceLen) 278 break; 279 280 --avail; 281 ++pos; 282 posState = pos & posMask; 283 284 updateOptStateAndReps(); 285 anyMatchPrice = opts[optCur].price 286 + getAnyMatchPrice(opts[optCur].state, posState); 287 anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); 288 289 calc1BytePrices(pos, posState, avail, anyRepPrice); 290 291 if (avail >= MATCH_LEN_MIN) { 292 int startLen = calcLongRepPrices(pos, posState, 293 avail, anyRepPrice); 294 if (matches.count > 0) 295 calcNormalMatchPrices(pos, posState, avail, 296 anyMatchPrice, startLen); 297 } 298 } 299 300 return convertOpts(); 301 } 302 303 /** 304 * Updates the state and reps for the current byte in the opts array. 305 */ updateOptStateAndReps()306 private void updateOptStateAndReps() { 307 int optPrev = opts[optCur].optPrev; 308 assert optPrev < optCur; 309 310 if (opts[optCur].prev1IsLiteral) { 311 --optPrev; 312 313 if (opts[optCur].hasPrev2) { 314 opts[optCur].state.set(opts[opts[optCur].optPrev2].state); 315 if (opts[optCur].backPrev2 < REPS) 316 opts[optCur].state.updateLongRep(); 317 else 318 opts[optCur].state.updateMatch(); 319 } else { 320 opts[optCur].state.set(opts[optPrev].state); 321 } 322 323 opts[optCur].state.updateLiteral(); 324 } else { 325 opts[optCur].state.set(opts[optPrev].state); 326 } 327 328 if (optPrev == optCur - 1) { 329 // Must be either a short rep or a literal. 330 assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; 331 332 if (opts[optCur].backPrev == 0) 333 opts[optCur].state.updateShortRep(); 334 else 335 opts[optCur].state.updateLiteral(); 336 337 System.arraycopy(opts[optPrev].reps, 0, 338 opts[optCur].reps, 0, REPS); 339 } else { 340 int back; 341 if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { 342 optPrev = opts[optCur].optPrev2; 343 back = opts[optCur].backPrev2; 344 opts[optCur].state.updateLongRep(); 345 } else { 346 back = opts[optCur].backPrev; 347 if (back < REPS) 348 opts[optCur].state.updateLongRep(); 349 else 350 opts[optCur].state.updateMatch(); 351 } 352 353 if (back < REPS) { 354 opts[optCur].reps[0] = opts[optPrev].reps[back]; 355 356 int rep; 357 for (rep = 1; rep <= back; ++rep) 358 opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; 359 360 for (; rep < REPS; ++rep) 361 opts[optCur].reps[rep] = opts[optPrev].reps[rep]; 362 } else { 363 opts[optCur].reps[0] = back - REPS; 364 System.arraycopy(opts[optPrev].reps, 0, 365 opts[optCur].reps, 1, REPS - 1); 366 } 367 } 368 } 369 370 /** 371 * Calculates prices of a literal, a short rep, and literal + rep0. 372 */ 373 private void calc1BytePrices(int pos, int posState, 374 int avail, int anyRepPrice) { 375 // This will be set to true if using a literal or a short rep. 376 boolean nextIsByte = false; 377 378 int curByte = lz.getByte(0); 379 int matchByte = lz.getByte(opts[optCur].reps[0] + 1); 380 381 // Try a literal. 382 int literalPrice = opts[optCur].price 383 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), 384 pos, opts[optCur].state); 385 if (literalPrice < opts[optCur + 1].price) { 386 opts[optCur + 1].set1(literalPrice, optCur, -1); 387 nextIsByte = true; 388 } 389 390 // Try a short rep. 391 if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur 392 || opts[optCur + 1].backPrev != 0)) { 393 int shortRepPrice = getShortRepPrice(anyRepPrice, 394 opts[optCur].state, 395 posState); 396 if (shortRepPrice <= opts[optCur + 1].price) { 397 opts[optCur + 1].set1(shortRepPrice, optCur, 0); 398 nextIsByte = true; 399 } 400 } 401 402 // If neither a literal nor a short rep was the cheapest choice, 403 // try literal + long rep0. 404 if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { 405 int lenLimit = Math.min(niceLen, avail - 1); 406 int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); 407 408 if (len >= MATCH_LEN_MIN) { 409 nextState.set(opts[optCur].state); 410 nextState.updateLiteral(); 411 int nextPosState = (pos + 1) & posMask; 412 int price = literalPrice 413 + getLongRepAndLenPrice(0, len, 414 nextState, nextPosState); 415 416 int i = optCur + 1 + len; 417 while (optEnd < i) 418 opts[++optEnd].reset(); 419 420 if (price < opts[i].price) 421 opts[i].set2(price, optCur, 0); 422 } 423 } 424 } 425 426 /** 427 * Calculates prices of long rep and long rep + literal + rep0. 428 */ 429 private int calcLongRepPrices(int pos, int posState, 430 int avail, int anyRepPrice) { 431 int startLen = MATCH_LEN_MIN; 432 int lenLimit = Math.min(avail, niceLen); 433 434 for (int rep = 0; rep < REPS; ++rep) { 435 int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); 436 if (len < MATCH_LEN_MIN) 437 continue; 438 439 while (optEnd < optCur + len) 440 opts[++optEnd].reset(); 441 442 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 443 opts[optCur].state, posState); 444 445 for (int i = len; i >= MATCH_LEN_MIN; --i) { 446 int price = longRepPrice 447 + repLenEncoder.getPrice(i, posState); 448 if (price < opts[optCur + i].price) 449 opts[optCur + i].set1(price, optCur, rep); 450 } 451 452 if (rep == 0) 453 startLen = len + 1; 454 455 int len2Limit = Math.min(niceLen, avail - len - 1); 456 int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], 457 len2Limit); 458 459 if (len2 >= MATCH_LEN_MIN) { 460 // Rep 461 int price = longRepPrice 462 + repLenEncoder.getPrice(len, posState); 463 nextState.set(opts[optCur].state); 464 nextState.updateLongRep(); 465 466 // Literal 467 int curByte = lz.getByte(len, 0); 468 int matchByte = lz.getByte(0); // lz.getByte(len, len) 469 int prevByte = lz.getByte(len, 1); 470 price += literalEncoder.getPrice(curByte, matchByte, prevByte, 471 pos + len, nextState); 472 nextState.updateLiteral(); 473 474 // Rep0 475 int nextPosState = (pos + len + 1) & posMask; 476 price += getLongRepAndLenPrice(0, len2, 477 nextState, nextPosState); 478 479 int i = optCur + len + 1 + len2; 480 while (optEnd < i) 481 opts[++optEnd].reset(); 482 483 if (price < opts[i].price) 484 opts[i].set3(price, optCur, rep, len, 0); 485 } 486 } 487 488 return startLen; 489 } 490 491 /** 492 * Calculates prices of a normal match and normal match + literal + rep0. 493 */ 494 private void calcNormalMatchPrices(int pos, int posState, int avail, 495 int anyMatchPrice, int startLen) { 496 // If the longest match is so long that it would not fit into 497 // the opts array, shorten the matches. 498 if (matches.len[matches.count - 1] > avail) { 499 matches.count = 0; 500 while (matches.len[matches.count] < avail) 501 ++matches.count; 502 503 matches.len[matches.count++] = avail; 504 } 505 506 if (matches.len[matches.count - 1] < startLen) 507 return; 508 509 while (optEnd < optCur + matches.len[matches.count - 1]) 510 opts[++optEnd].reset(); 511 512 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 513 opts[optCur].state); 514 515 int match = 0; 516 while (startLen > matches.len[match]) 517 ++match; 518 519 for (int len = startLen; ; ++len) { 520 int dist = matches.dist[match]; 521 522 // Calculate the price of a match of len bytes from the nearest 523 // possible distance. 524 int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, 525 dist, len, posState); 526 if (matchAndLenPrice < opts[optCur + len].price) 527 opts[optCur + len].set1(matchAndLenPrice, 528 optCur, dist + REPS); 529 530 if (len != matches.len[match]) 531 continue; 532 533 // Try match + literal + rep0. First get the length of the rep0. 534 int len2Limit = Math.min(niceLen, avail - len - 1); 535 int len2 = lz.getMatchLen(len + 1, dist, len2Limit); 536 537 if (len2 >= MATCH_LEN_MIN) { 538 nextState.set(opts[optCur].state); 539 nextState.updateMatch(); 540 541 // Literal 542 int curByte = lz.getByte(len, 0); 543 int matchByte = lz.getByte(0); // lz.getByte(len, len) 544 int prevByte = lz.getByte(len, 1); 545 int price = matchAndLenPrice 546 + literalEncoder.getPrice(curByte, matchByte, 547 prevByte, pos + len, 548 nextState); 549 nextState.updateLiteral(); 550 551 // Rep0 552 int nextPosState = (pos + len + 1) & posMask; 553 price += getLongRepAndLenPrice(0, len2, 554 nextState, nextPosState); 555 556 int i = optCur + len + 1 + len2; 557 while (optEnd < i) 558 opts[++optEnd].reset(); 559 560 if (price < opts[i].price) 561 opts[i].set3(price, optCur, dist + REPS, len, 0); 562 } 563 564 if (++match == matches.count) 565 break; 566 } 567 } 568 } 569