1 /*
2  * LZMAEncoderNormal
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz.lzma;
12 
13 import org.tukaani.xz.ArrayCache;
14 import org.tukaani.xz.lz.LZEncoder;
15 import org.tukaani.xz.lz.Matches;
16 import org.tukaani.xz.rangecoder.RangeEncoder;
17 
18 final class LZMAEncoderNormal extends LZMAEncoder {
19     private static final int OPTS = 4096;
20 
21     private static final int EXTRA_SIZE_BEFORE = OPTS;
22     private static final int EXTRA_SIZE_AFTER = OPTS;
23 
24     private final Optimum[] opts = new Optimum[OPTS];
25     private int optCur = 0;
26     private int optEnd = 0;
27 
28     private Matches matches;
29 
30     // These are fields solely to avoid allocating the objects again and
31     // again on each function call.
32     private final int[] repLens = new int[REPS];
33     private final State nextState = new State();
34 
getMemoryUsage(int dictSize, int extraSizeBefore, int mf)35     static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
36         return LZEncoder.getMemoryUsage(dictSize,
37                    Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
38                    EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
39                + OPTS * 64 / 1024;
40     }
41 
LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, int dictSize, int extraSizeBefore, int niceLen, int mf, int depthLimit, ArrayCache arrayCache)42     LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
43                              int dictSize, int extraSizeBefore,
44                              int niceLen, int mf, int depthLimit,
45                              ArrayCache arrayCache) {
46         super(rc, LZEncoder.getInstance(dictSize,
47                                         Math.max(extraSizeBefore,
48                                                  EXTRA_SIZE_BEFORE),
49                                         EXTRA_SIZE_AFTER,
50                                         niceLen, MATCH_LEN_MAX,
51                                         mf, depthLimit, arrayCache),
52               lc, lp, pb, dictSize, niceLen);
53 
54         for (int i = 0; i < OPTS; ++i)
55             opts[i] = new Optimum();
56     }
57 
reset()58     public void reset() {
59         optCur = 0;
60         optEnd = 0;
61         super.reset();
62     }
63 
64     /**
65      * Converts the opts array from backward indexes to forward indexes.
66      * Then it will be simple to get the next symbol from the array
67      * in later calls to <code>getNextSymbol()</code>.
68      */
convertOpts()69     private int convertOpts() {
70         optEnd = optCur;
71 
72         int optPrev = opts[optCur].optPrev;
73 
74         do {
75             Optimum opt = opts[optCur];
76 
77             if (opt.prev1IsLiteral) {
78                 opts[optPrev].optPrev = optCur;
79                 opts[optPrev].backPrev = -1;
80                 optCur = optPrev--;
81 
82                 if (opt.hasPrev2) {
83                     opts[optPrev].optPrev = optPrev + 1;
84                     opts[optPrev].backPrev = opt.backPrev2;
85                     optCur = optPrev;
86                     optPrev = opt.optPrev2;
87                 }
88             }
89 
90             int temp = opts[optPrev].optPrev;
91             opts[optPrev].optPrev = optCur;
92             optCur = optPrev;
93             optPrev = temp;
94         } while (optCur > 0);
95 
96         optCur = opts[0].optPrev;
97         back = opts[optCur].backPrev;
98         return optCur;
99     }
100 
getNextSymbol()101     int getNextSymbol() {
102         // If there are pending symbols from an earlier call to this
103         // function, return those symbols first.
104         if (optCur < optEnd) {
105             int len = opts[optCur].optPrev - optCur;
106             optCur = opts[optCur].optPrev;
107             back = opts[optCur].backPrev;
108             return len;
109         }
110 
111         assert optCur == optEnd;
112         optCur = 0;
113         optEnd = 0;
114         back = -1;
115 
116         if (readAhead == -1)
117             matches = getMatches();
118 
119         // Get the number of bytes available in the dictionary, but
120         // not more than the maximum match length. If there aren't
121         // enough bytes remaining to encode a match at all, return
122         // immediately to encode this byte as a literal.
123         int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
124         if (avail < MATCH_LEN_MIN)
125             return 1;
126 
127         // Get the lengths of repeated matches.
128         int repBest = 0;
129         for (int rep = 0; rep < REPS; ++rep) {
130             repLens[rep] = lz.getMatchLen(reps[rep], avail);
131 
132             if (repLens[rep] < MATCH_LEN_MIN) {
133                 repLens[rep] = 0;
134                 continue;
135             }
136 
137             if (repLens[rep] > repLens[repBest])
138                 repBest = rep;
139         }
140 
141         // Return if the best repeated match is at least niceLen bytes long.
142         if (repLens[repBest] >= niceLen) {
143             back = repBest;
144             skip(repLens[repBest] - 1);
145             return repLens[repBest];
146         }
147 
148         // Initialize mainLen and mainDist to the longest match found
149         // by the match finder.
150         int mainLen = 0;
151         int mainDist = 0;
152         if (matches.count > 0) {
153             mainLen = matches.len[matches.count - 1];
154             mainDist = matches.dist[matches.count - 1];
155 
156             // Return if it is at least niceLen bytes long.
157             if (mainLen >= niceLen) {
158                 back = mainDist + REPS;
159                 skip(mainLen - 1);
160                 return mainLen;
161             }
162         }
163 
164         int curByte = lz.getByte(0);
165         int matchByte = lz.getByte(reps[0] + 1);
166 
167         // If the match finder found no matches and this byte cannot be
168         // encoded as a repeated match (short or long), we must be return
169         // to have the byte encoded as a literal.
170         if (mainLen < MATCH_LEN_MIN && curByte != matchByte
171                 && repLens[repBest] < MATCH_LEN_MIN)
172             return 1;
173 
174 
175         int pos = lz.getPos();
176         int posState = pos & posMask;
177 
178         // Calculate the price of encoding the current byte as a literal.
179         {
180             int prevByte = lz.getByte(1);
181             int literalPrice = literalEncoder.getPrice(curByte, matchByte,
182                                                        prevByte, pos, state);
183             opts[1].set1(literalPrice, 0, -1);
184         }
185 
186         int anyMatchPrice = getAnyMatchPrice(state, posState);
187         int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
188 
189         // If it is possible to encode this byte as a short rep, see if
190         // it is cheaper than encoding it as a literal.
191         if (matchByte == curByte) {
192             int shortRepPrice = getShortRepPrice(anyRepPrice,
193                                                  state, posState);
194             if (shortRepPrice < opts[1].price)
195                 opts[1].set1(shortRepPrice, 0, 0);
196         }
197 
198         // Return if there is neither normal nor long repeated match. Use
199         // a short match instead of a literal if is is possible and cheaper.
200         optEnd = Math.max(mainLen, repLens[repBest]);
201         if (optEnd < MATCH_LEN_MIN) {
202             assert optEnd == 0 : optEnd;
203             back = opts[1].backPrev;
204             return 1;
205         }
206 
207 
208         // Update the lookup tables for distances and lengths before using
209         // those price calculation functions. (The price function above
210         // don't need these tables.)
211         updatePrices();
212 
213         // Initialize the state and reps of this position in opts[].
214         // updateOptStateAndReps() will need these to get the new
215         // state and reps for the next byte.
216         opts[0].state.set(state);
217         System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
218 
219         // Initialize the prices for latter opts that will be used below.
220         for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
221             opts[i].reset();
222 
223         // Calculate the prices of repeated matches of all lengths.
224         for (int rep = 0; rep < REPS; ++rep) {
225             int repLen = repLens[rep];
226             if (repLen < MATCH_LEN_MIN)
227                 continue;
228 
229             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
230                                                state, posState);
231             do {
232                 int price = longRepPrice + repLenEncoder.getPrice(repLen,
233                                                                   posState);
234                 if (price < opts[repLen].price)
235                     opts[repLen].set1(price, 0, rep);
236             } while (--repLen >= MATCH_LEN_MIN);
237         }
238 
239         // Calculate the prices of normal matches that are longer than rep0.
240         {
241             int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
242             if (len <= mainLen) {
243                 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
244                                                            state);
245 
246                 // Set i to the index of the shortest match that is
247                 // at least len bytes long.
248                 int i = 0;
249                 while (len > matches.len[i])
250                     ++i;
251 
252                 while (true) {
253                     int dist = matches.dist[i];
254                     int price = getMatchAndLenPrice(normalMatchPrice,
255                                                     dist, len, posState);
256                     if (price < opts[len].price)
257                         opts[len].set1(price, 0, dist + REPS);
258 
259                     if (len == matches.len[i])
260                         if (++i == matches.count)
261                             break;
262 
263                     ++len;
264                 }
265             }
266         }
267 
268 
269         avail = Math.min(lz.getAvail(), OPTS - 1);
270 
271         // Get matches for later bytes and optimize the use of LZMA symbols
272         // by calculating the prices and picking the cheapest symbol
273         // combinations.
274         while (++optCur < optEnd) {
275             matches = getMatches();
276             if (matches.count > 0
277                     && matches.len[matches.count - 1] >= niceLen)
278                 break;
279 
280             --avail;
281             ++pos;
282             posState = pos & posMask;
283 
284             updateOptStateAndReps();
285             anyMatchPrice = opts[optCur].price
286                             + getAnyMatchPrice(opts[optCur].state, posState);
287             anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
288 
289             calc1BytePrices(pos, posState, avail, anyRepPrice);
290 
291             if (avail >= MATCH_LEN_MIN) {
292                 int startLen = calcLongRepPrices(pos, posState,
293                                                  avail, anyRepPrice);
294                 if (matches.count > 0)
295                     calcNormalMatchPrices(pos, posState, avail,
296                                           anyMatchPrice, startLen);
297             }
298         }
299 
300         return convertOpts();
301     }
302 
303     /**
304      * Updates the state and reps for the current byte in the opts array.
305      */
updateOptStateAndReps()306     private void updateOptStateAndReps() {
307         int optPrev = opts[optCur].optPrev;
308         assert optPrev < optCur;
309 
310         if (opts[optCur].prev1IsLiteral) {
311             --optPrev;
312 
313             if (opts[optCur].hasPrev2) {
314                 opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
315                 if (opts[optCur].backPrev2 < REPS)
316                     opts[optCur].state.updateLongRep();
317                 else
318                     opts[optCur].state.updateMatch();
319             } else {
320                 opts[optCur].state.set(opts[optPrev].state);
321             }
322 
323             opts[optCur].state.updateLiteral();
324         } else {
325             opts[optCur].state.set(opts[optPrev].state);
326         }
327 
328         if (optPrev == optCur - 1) {
329             // Must be either a short rep or a literal.
330             assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
331 
332             if (opts[optCur].backPrev == 0)
333                 opts[optCur].state.updateShortRep();
334             else
335                 opts[optCur].state.updateLiteral();
336 
337             System.arraycopy(opts[optPrev].reps, 0,
338                              opts[optCur].reps, 0, REPS);
339         } else {
340             int back;
341             if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
342                 optPrev = opts[optCur].optPrev2;
343                 back = opts[optCur].backPrev2;
344                 opts[optCur].state.updateLongRep();
345             } else {
346                 back = opts[optCur].backPrev;
347                 if (back < REPS)
348                     opts[optCur].state.updateLongRep();
349                 else
350                     opts[optCur].state.updateMatch();
351             }
352 
353             if (back < REPS) {
354                 opts[optCur].reps[0] = opts[optPrev].reps[back];
355 
356                 int rep;
357                 for (rep = 1; rep <= back; ++rep)
358                     opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
359 
360                 for (; rep < REPS; ++rep)
361                     opts[optCur].reps[rep] = opts[optPrev].reps[rep];
362             } else {
363                 opts[optCur].reps[0] = back - REPS;
364                 System.arraycopy(opts[optPrev].reps, 0,
365                                  opts[optCur].reps, 1, REPS - 1);
366             }
367         }
368     }
369 
370     /**
371      * Calculates prices of a literal, a short rep, and literal + rep0.
372      */
373     private void calc1BytePrices(int pos, int posState,
374                                  int avail, int anyRepPrice) {
375         // This will be set to true if using a literal or a short rep.
376         boolean nextIsByte = false;
377 
378         int curByte = lz.getByte(0);
379         int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
380 
381         // Try a literal.
382         int literalPrice = opts[optCur].price
383                 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
384                                           pos, opts[optCur].state);
385         if (literalPrice < opts[optCur + 1].price) {
386             opts[optCur + 1].set1(literalPrice, optCur, -1);
387             nextIsByte = true;
388         }
389 
390         // Try a short rep.
391         if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
392                                       || opts[optCur + 1].backPrev != 0)) {
393             int shortRepPrice = getShortRepPrice(anyRepPrice,
394                                                  opts[optCur].state,
395                                                  posState);
396             if (shortRepPrice <= opts[optCur + 1].price) {
397                 opts[optCur + 1].set1(shortRepPrice, optCur, 0);
398                 nextIsByte = true;
399             }
400         }
401 
402         // If neither a literal nor a short rep was the cheapest choice,
403         // try literal + long rep0.
404         if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
405             int lenLimit = Math.min(niceLen, avail - 1);
406             int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
407 
408             if (len >= MATCH_LEN_MIN) {
409                 nextState.set(opts[optCur].state);
410                 nextState.updateLiteral();
411                 int nextPosState = (pos + 1) & posMask;
412                 int price = literalPrice
413                             + getLongRepAndLenPrice(0, len,
414                                                     nextState, nextPosState);
415 
416                 int i = optCur + 1 + len;
417                 while (optEnd < i)
418                     opts[++optEnd].reset();
419 
420                 if (price < opts[i].price)
421                     opts[i].set2(price, optCur, 0);
422             }
423         }
424     }
425 
426     /**
427      * Calculates prices of long rep and long rep + literal + rep0.
428      */
429     private int calcLongRepPrices(int pos, int posState,
430                                   int avail, int anyRepPrice) {
431         int startLen = MATCH_LEN_MIN;
432         int lenLimit = Math.min(avail, niceLen);
433 
434         for (int rep = 0; rep < REPS; ++rep) {
435             int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
436             if (len < MATCH_LEN_MIN)
437                 continue;
438 
439             while (optEnd < optCur + len)
440                 opts[++optEnd].reset();
441 
442             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
443                                                opts[optCur].state, posState);
444 
445             for (int i = len; i >= MATCH_LEN_MIN; --i) {
446                 int price = longRepPrice
447                             + repLenEncoder.getPrice(i, posState);
448                 if (price < opts[optCur + i].price)
449                     opts[optCur + i].set1(price, optCur, rep);
450             }
451 
452             if (rep == 0)
453                 startLen = len + 1;
454 
455             int len2Limit = Math.min(niceLen, avail - len - 1);
456             int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
457                                       len2Limit);
458 
459             if (len2 >= MATCH_LEN_MIN) {
460                 // Rep
461                 int price = longRepPrice
462                             + repLenEncoder.getPrice(len, posState);
463                 nextState.set(opts[optCur].state);
464                 nextState.updateLongRep();
465 
466                 // Literal
467                 int curByte = lz.getByte(len, 0);
468                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
469                 int prevByte = lz.getByte(len, 1);
470                 price += literalEncoder.getPrice(curByte, matchByte, prevByte,
471                                                  pos + len, nextState);
472                 nextState.updateLiteral();
473 
474                 // Rep0
475                 int nextPosState = (pos + len + 1) & posMask;
476                 price += getLongRepAndLenPrice(0, len2,
477                                                nextState, nextPosState);
478 
479                 int i = optCur + len + 1 + len2;
480                 while (optEnd < i)
481                     opts[++optEnd].reset();
482 
483                 if (price < opts[i].price)
484                     opts[i].set3(price, optCur, rep, len, 0);
485             }
486         }
487 
488         return startLen;
489     }
490 
491     /**
492      * Calculates prices of a normal match and normal match + literal + rep0.
493      */
494     private void calcNormalMatchPrices(int pos, int posState, int avail,
495                                        int anyMatchPrice, int startLen) {
496         // If the longest match is so long that it would not fit into
497         // the opts array, shorten the matches.
498         if (matches.len[matches.count - 1] > avail) {
499             matches.count = 0;
500             while (matches.len[matches.count] < avail)
501                 ++matches.count;
502 
503             matches.len[matches.count++] = avail;
504         }
505 
506         if (matches.len[matches.count - 1] < startLen)
507             return;
508 
509         while (optEnd < optCur + matches.len[matches.count - 1])
510             opts[++optEnd].reset();
511 
512         int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
513                                                    opts[optCur].state);
514 
515         int match = 0;
516         while (startLen > matches.len[match])
517             ++match;
518 
519         for (int len = startLen; ; ++len) {
520             int dist = matches.dist[match];
521 
522             // Calculate the price of a match of len bytes from the nearest
523             // possible distance.
524             int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
525                                                        dist, len, posState);
526             if (matchAndLenPrice < opts[optCur + len].price)
527                 opts[optCur + len].set1(matchAndLenPrice,
528                                         optCur, dist + REPS);
529 
530             if (len != matches.len[match])
531                 continue;
532 
533             // Try match + literal + rep0. First get the length of the rep0.
534             int len2Limit = Math.min(niceLen, avail - len - 1);
535             int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
536 
537             if (len2 >= MATCH_LEN_MIN) {
538                 nextState.set(opts[optCur].state);
539                 nextState.updateMatch();
540 
541                 // Literal
542                 int curByte = lz.getByte(len, 0);
543                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
544                 int prevByte = lz.getByte(len, 1);
545                 int price = matchAndLenPrice
546                         + literalEncoder.getPrice(curByte, matchByte,
547                                                   prevByte, pos + len,
548                                                   nextState);
549                 nextState.updateLiteral();
550 
551                 // Rep0
552                 int nextPosState = (pos + len + 1) & posMask;
553                 price += getLongRepAndLenPrice(0, len2,
554                                                nextState, nextPosState);
555 
556                 int i = optCur + len + 1 + len2;
557                 while (optEnd < i)
558                     opts[++optEnd].reset();
559 
560                 if (price < opts[i].price)
561                     opts[i].set3(price, optCur, dist + REPS, len, 0);
562             }
563 
564             if (++match == matches.count)
565                 break;
566         }
567     }
568 }
569