1 /*
2  * LZMAInputStream
3  *
4  * Authors: Lasse Collin <lasse.collin@tukaani.org>
5  *          Igor Pavlov <http://7-zip.org/>
6  *
7  * This file has been put into the public domain.
8  * You can do whatever you want with this file.
9  */
10 
11 package org.tukaani.xz;
12 
13 import java.io.InputStream;
14 import java.io.DataInputStream;
15 import java.io.IOException;
16 import org.tukaani.xz.lz.LZDecoder;
17 import org.tukaani.xz.rangecoder.RangeDecoderFromStream;
18 import org.tukaani.xz.lzma.LZMADecoder;
19 
20 /**
21  * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header).
22  * <p>
23  * <b>IMPORTANT:</b> In contrast to other classes in this package, this class
24  * reads data from its input stream one byte at a time. If the input stream
25  * is for example {@link java.io.FileInputStream}, wrapping it into
26  * {@link java.io.BufferedInputStream} tends to improve performance a lot.
27  * This is not automatically done by this class because there may be use
28  * cases where it is desired that this class won't read any bytes past
29  * the end of the LZMA stream.
30  * <p>
31  * Even when using <code>BufferedInputStream</code>, the performance tends
32  * to be worse (maybe 10-20&nbsp;% slower) than with {@link LZMA2InputStream}
33  * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data).
34  *
35  * @since 1.4
36  */
37 public class LZMAInputStream extends InputStream {
38     /**
39      * Largest dictionary size supported by this implementation.
40      * <p>
41      * LZMA allows dictionaries up to one byte less than 4 GiB. This
42      * implementation supports only 16 bytes less than 2 GiB. This
43      * limitation is due to Java using signed 32-bit integers for array
44      * indexing. The limitation shouldn't matter much in practice since so
45      * huge dictionaries are not normally used.
46      */
47     public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
48 
49     private InputStream in;
50     private ArrayCache arrayCache;
51     private LZDecoder lz;
52     private RangeDecoderFromStream rc;
53     private LZMADecoder lzma;
54 
55     private boolean endReached = false;
56 
57     private final byte[] tempBuf = new byte[1];
58 
59     /**
60      * Number of uncompressed bytes left to be decompressed, or -1 if
61      * the end marker is used.
62      */
63     private long remainingSize;
64 
65     private IOException exception = null;
66 
67     /**
68      * Gets approximate decompressor memory requirements as kibibytes for
69      * the given dictionary size and LZMA properties byte (lc, lp, and pb).
70      *
71      * @param       dictSize    LZMA dictionary size as bytes, should be
72      *                          in the range [<code>0</code>,
73      *                          <code>DICT_SIZE_MAX</code>]
74      *
75      * @param       propsByte   LZMA properties byte that encodes the values
76      *                          of lc, lp, and pb
77      *
78      * @return      approximate memory requirements as kibibytes (KiB)
79      *
80      * @throws      UnsupportedOptionsException
81      *                          if <code>dictSize</code> is outside
82      *                          the range [<code>0</code>,
83      *                          <code>DICT_SIZE_MAX</code>]
84      *
85      * @throws      CorruptedInputException
86      *                          if <code>propsByte</code> is invalid
87      */
getMemoryUsage(int dictSize, byte propsByte)88     public static int getMemoryUsage(int dictSize, byte propsByte)
89             throws UnsupportedOptionsException, CorruptedInputException {
90         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
91             throw new UnsupportedOptionsException(
92                     "LZMA dictionary is too big for this implementation");
93 
94         int props = propsByte & 0xFF;
95         if (props > (4 * 5 + 4) * 9 + 8)
96             throw new CorruptedInputException("Invalid LZMA properties byte");
97 
98         props %= 9 * 5;
99         int lp = props / 9;
100         int lc = props - lp * 9;
101 
102         return getMemoryUsage(dictSize, lc, lp);
103     }
104 
105     /**
106      * Gets approximate decompressor memory requirements as kibibytes for
107      * the given dictionary size, lc, and lp. Note that pb isn't needed.
108      *
109      * @param       dictSize    LZMA dictionary size as bytes, must be
110      *                          in the range [<code>0</code>,
111      *                          <code>DICT_SIZE_MAX</code>]
112      *
113      * @param       lc          number of literal context bits, must be
114      *                          in the range [0, 8]
115      *
116      * @param       lp          number of literal position bits, must be
117      *                          in the range [0, 4]
118      *
119      * @return      approximate memory requirements as kibibytes (KiB)
120      */
getMemoryUsage(int dictSize, int lc, int lp)121     public static int getMemoryUsage(int dictSize, int lc, int lp) {
122         if (lc < 0 || lc > 8 || lp < 0 || lp > 4)
123             throw new IllegalArgumentException("Invalid lc or lp");
124 
125         // Probability variables have the type "short". There are
126         // 0x300 (768) probability variables in each literal subcoder.
127         // The number of literal subcoders is 2^(lc + lp).
128         //
129         // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer
130         // + sizeof(short) * number probability variables per literal subcoder
131         //   * number of literal subcoders
132         return 10 + getDictSize(dictSize) / 1024
133                + ((2 * 0x300) << (lc + lp)) / 1024;
134     }
135 
getDictSize(int dictSize)136     private static int getDictSize(int dictSize) {
137         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
138             throw new IllegalArgumentException(
139                     "LZMA dictionary is too big for this implementation");
140 
141         // For performance reasons, use a 4 KiB dictionary if something
142         // smaller was requested. It's a rare situation and the performance
143         // difference isn't huge, and it starts to matter mostly when the
144         // dictionary is just a few bytes. But we need to handle the special
145         // case of dictSize == 0 anyway, which is an allowed value but in
146         // practice means one-byte dictionary.
147         //
148         // Note that using a dictionary bigger than specified in the headers
149         // can hide errors if there is a reference to data beyond the original
150         // dictionary size but is still within 4 KiB.
151         if (dictSize < 4096)
152             dictSize = 4096;
153 
154         // Round dictionary size upward to a multiple of 16. This way LZMA
155         // can use LZDecoder.getPos() for calculating LZMA's posMask.
156         return (dictSize + 15) & ~15;
157     }
158 
159     /**
160      * Creates a new .lzma file format decompressor without
161      * a memory usage limit.
162      *
163      * @param       in          input stream from which .lzma data is read;
164      *                          it might be a good idea to wrap it in
165      *                          <code>BufferedInputStream</code>, see the
166      *                          note at the top of this page
167      *
168      * @throws      CorruptedInputException
169      *                          file is corrupt or perhaps not in
170      *                          the .lzma format at all
171      *
172      * @throws      UnsupportedOptionsException
173      *                          dictionary size or uncompressed size is too
174      *                          big for this implementation
175      *
176      * @throws      EOFException
177      *                          file is truncated or perhaps not in
178      *                          the .lzma format at all
179      *
180      * @throws      IOException may be thrown by <code>in</code>
181      */
LZMAInputStream(InputStream in)182     public LZMAInputStream(InputStream in) throws IOException {
183         this(in, -1);
184     }
185 
186     /**
187      * Creates a new .lzma file format decompressor without
188      * a memory usage limit.
189      * <p>
190      * This is identical to <code>LZMAInputStream(InputStream)</code>
191      * except that this also takes the <code>arrayCache</code> argument.
192      *
193      * @param       in          input stream from which .lzma data is read;
194      *                          it might be a good idea to wrap it in
195      *                          <code>BufferedInputStream</code>, see the
196      *                          note at the top of this page
197      *
198      *
199      * @param       arrayCache  cache to be used for allocating large arrays
200      *
201      * @throws      CorruptedInputException
202      *                          file is corrupt or perhaps not in
203      *                          the .lzma format at all
204      *
205      * @throws      UnsupportedOptionsException
206      *                          dictionary size or uncompressed size is too
207      *                          big for this implementation
208      *
209      * @throws      EOFException
210      *                          file is truncated or perhaps not in
211      *                          the .lzma format at all
212      *
213      * @throws      IOException may be thrown by <code>in</code>
214      *
215      * @since 1.7
216      */
LZMAInputStream(InputStream in, ArrayCache arrayCache)217     public LZMAInputStream(InputStream in, ArrayCache arrayCache)
218             throws IOException {
219         this(in, -1, arrayCache);
220     }
221 
222     /**
223      * Creates a new .lzma file format decompressor with an optional
224      * memory usage limit.
225      *
226      * @param       in          input stream from which .lzma data is read;
227      *                          it might be a good idea to wrap it in
228      *                          <code>BufferedInputStream</code>, see the
229      *                          note at the top of this page
230      *
231      * @param       memoryLimit memory usage limit in kibibytes (KiB)
232      *                          or <code>-1</code> to impose no
233      *                          memory usage limit
234      *
235      * @throws      CorruptedInputException
236      *                          file is corrupt or perhaps not in
237      *                          the .lzma format at all
238      *
239      * @throws      UnsupportedOptionsException
240      *                          dictionary size or uncompressed size is too
241      *                          big for this implementation
242      *
243      * @throws      MemoryLimitException
244      *                          memory usage limit was exceeded
245      *
246      * @throws      EOFException
247      *                          file is truncated or perhaps not in
248      *                          the .lzma format at all
249      *
250      * @throws      IOException may be thrown by <code>in</code>
251      */
LZMAInputStream(InputStream in, int memoryLimit)252     public LZMAInputStream(InputStream in, int memoryLimit)
253             throws IOException {
254         this(in, memoryLimit, ArrayCache.getDefaultCache());
255     }
256 
257     /**
258      * Creates a new .lzma file format decompressor with an optional
259      * memory usage limit.
260      * <p>
261      * This is identical to <code>LZMAInputStream(InputStream, int)</code>
262      * except that this also takes the <code>arrayCache</code> argument.
263      *
264      * @param       in          input stream from which .lzma data is read;
265      *                          it might be a good idea to wrap it in
266      *                          <code>BufferedInputStream</code>, see the
267      *                          note at the top of this page
268      *
269      * @param       memoryLimit memory usage limit in kibibytes (KiB)
270      *                          or <code>-1</code> to impose no
271      *                          memory usage limit
272      *
273      * @param       arrayCache  cache to be used for allocating large arrays
274      *
275      * @throws      CorruptedInputException
276      *                          file is corrupt or perhaps not in
277      *                          the .lzma format at all
278      *
279      * @throws      UnsupportedOptionsException
280      *                          dictionary size or uncompressed size is too
281      *                          big for this implementation
282      *
283      * @throws      MemoryLimitException
284      *                          memory usage limit was exceeded
285      *
286      * @throws      EOFException
287      *                          file is truncated or perhaps not in
288      *                          the .lzma format at all
289      *
290      * @throws      IOException may be thrown by <code>in</code>
291      *
292      * @since 1.7
293      */
LZMAInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)294     public LZMAInputStream(InputStream in, int memoryLimit,
295                            ArrayCache arrayCache) throws IOException {
296         DataInputStream inData = new DataInputStream(in);
297 
298         // Properties byte (lc, lp, and pb)
299         byte propsByte = inData.readByte();
300 
301         // Dictionary size is an unsigned 32-bit little endian integer.
302         int dictSize = 0;
303         for (int i = 0; i < 4; ++i)
304             dictSize |= inData.readUnsignedByte() << (8 * i);
305 
306         // Uncompressed size is an unsigned 64-bit little endian integer.
307         // The maximum 64-bit value is a special case (becomes -1 here)
308         // which indicates that the end marker is used instead of knowing
309         // the uncompressed size beforehand.
310         long uncompSize = 0;
311         for (int i = 0; i < 8; ++i)
312             uncompSize |= (long)inData.readUnsignedByte() << (8 * i);
313 
314         // Check the memory usage limit.
315         int memoryNeeded = getMemoryUsage(dictSize, propsByte);
316         if (memoryLimit != -1 && memoryNeeded > memoryLimit)
317             throw new MemoryLimitException(memoryNeeded, memoryLimit);
318 
319         initialize(in, uncompSize, propsByte, dictSize, null, arrayCache);
320     }
321 
322     /**
323      * Creates a new input stream that decompresses raw LZMA data (no .lzma
324      * header) from <code>in</code>.
325      * <p>
326      * The caller needs to know if the "end of payload marker (EOPM)" alias
327      * "end of stream marker (EOS marker)" alias "end marker" present.
328      * If the end marker isn't used, the caller must know the exact
329      * uncompressed size of the stream.
330      * <p>
331      * The caller also needs to provide the LZMA properties byte that encodes
332      * the number of literal context bits (lc), literal position bits (lp),
333      * and position bits (pb).
334      * <p>
335      * The dictionary size used when compressing is also needed. Specifying
336      * a too small dictionary size will prevent decompressing the stream.
337      * Specifying a too big dictionary is waste of memory but decompression
338      * will work.
339      * <p>
340      * There is no need to specify a dictionary bigger than
341      * the uncompressed size of the data even if a bigger dictionary
342      * was used when compressing. If you know the uncompressed size
343      * of the data, this might allow saving some memory.
344      *
345      * @param       in          input stream from which compressed
346      *                          data is read
347      *
348      * @param       uncompSize  uncompressed size of the LZMA stream or -1
349      *                          if the end marker is used in the LZMA stream
350      *
351      * @param       propsByte   LZMA properties byte that has the encoded
352      *                          values for literal context bits (lc), literal
353      *                          position bits (lp), and position bits (pb)
354      *
355      * @param       dictSize    dictionary size as bytes, must be in the range
356      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
357      *
358      * @throws      CorruptedInputException
359      *                          if <code>propsByte</code> is invalid or
360      *                          the first input byte is not 0x00
361      *
362      * @throws      UnsupportedOptionsException
363      *                          dictionary size or uncompressed size is too
364      *                          big for this implementation
365      *
366      *
367      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize)368     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
369                            int dictSize) throws IOException {
370         initialize(in, uncompSize, propsByte, dictSize, null,
371                    ArrayCache.getDefaultCache());
372     }
373 
374     /**
375      * Creates a new input stream that decompresses raw LZMA data (no .lzma
376      * header) from <code>in</code> optionally with a preset dictionary.
377      *
378      * @param       in          input stream from which LZMA-compressed
379      *                          data is read
380      *
381      * @param       uncompSize  uncompressed size of the LZMA stream or -1
382      *                          if the end marker is used in the LZMA stream
383      *
384      * @param       propsByte   LZMA properties byte that has the encoded
385      *                          values for literal context bits (lc), literal
386      *                          position bits (lp), and position bits (pb)
387      *
388      * @param       dictSize    dictionary size as bytes, must be in the range
389      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
390      *
391      * @param       presetDict  preset dictionary or <code>null</code>
392      *                          to use no preset dictionary
393      *
394      * @throws      CorruptedInputException
395      *                          if <code>propsByte</code> is invalid or
396      *                          the first input byte is not 0x00
397      *
398      * @throws      UnsupportedOptionsException
399      *                          dictionary size or uncompressed size is too
400      *                          big for this implementation
401      *
402      * @throws      EOFException file is truncated or corrupt
403      *
404      * @throws      IOException may be thrown by <code>in</code>
405      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict)406     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
407                            int dictSize, byte[] presetDict)
408             throws IOException {
409         initialize(in, uncompSize, propsByte, dictSize, presetDict,
410                    ArrayCache.getDefaultCache());
411     }
412 
413     /**
414      * Creates a new input stream that decompresses raw LZMA data (no .lzma
415      * header) from <code>in</code> optionally with a preset dictionary.
416      * <p>
417      * This is identical to <code>LZMAInputStream(InputStream, long, byte, int,
418      * byte[])</code> except that this also takes the <code>arrayCache</code>
419      * argument.
420      *
421      * @param       in          input stream from which LZMA-compressed
422      *                          data is read
423      *
424      * @param       uncompSize  uncompressed size of the LZMA stream or -1
425      *                          if the end marker is used in the LZMA stream
426      *
427      * @param       propsByte   LZMA properties byte that has the encoded
428      *                          values for literal context bits (lc), literal
429      *                          position bits (lp), and position bits (pb)
430      *
431      * @param       dictSize    dictionary size as bytes, must be in the range
432      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
433      *
434      * @param       presetDict  preset dictionary or <code>null</code>
435      *                          to use no preset dictionary
436      *
437      * @param       arrayCache  cache to be used for allocating large arrays
438      *
439      * @throws      CorruptedInputException
440      *                          if <code>propsByte</code> is invalid or
441      *                          the first input byte is not 0x00
442      *
443      * @throws      UnsupportedOptionsException
444      *                          dictionary size or uncompressed size is too
445      *                          big for this implementation
446      *
447      * @throws      EOFException file is truncated or corrupt
448      *
449      * @throws      IOException may be thrown by <code>in</code>
450      *
451      * @since 1.7
452      */
LZMAInputStream(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)453     public LZMAInputStream(InputStream in, long uncompSize, byte propsByte,
454                            int dictSize, byte[] presetDict,
455                            ArrayCache arrayCache)
456             throws IOException {
457         initialize(in, uncompSize, propsByte, dictSize, presetDict,
458                    arrayCache);
459     }
460 
461     /**
462      * Creates a new input stream that decompresses raw LZMA data (no .lzma
463      * header) from <code>in</code> optionally with a preset dictionary.
464      *
465      * @param       in          input stream from which LZMA-compressed
466      *                          data is read
467      *
468      * @param       uncompSize  uncompressed size of the LZMA stream or -1
469      *                          if the end marker is used in the LZMA stream
470      *
471      * @param       lc          number of literal context bits, must be
472      *                          in the range [0, 8]
473      *
474      * @param       lp          number of literal position bits, must be
475      *                          in the range [0, 4]
476      *
477      * @param       pb          number position bits, must be
478      *                          in the range [0, 4]
479      *
480      * @param       dictSize    dictionary size as bytes, must be in the range
481      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
482      *
483      * @param       presetDict  preset dictionary or <code>null</code>
484      *                          to use no preset dictionary
485      *
486      * @throws      CorruptedInputException
487      *                          if the first input byte is not 0x00
488      *
489      * @throws      EOFException file is truncated or corrupt
490      *
491      * @throws      IOException may be thrown by <code>in</code>
492      */
LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict)493     public LZMAInputStream(InputStream in, long uncompSize,
494                            int lc, int lp, int pb,
495                            int dictSize, byte[] presetDict)
496             throws IOException {
497         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
498                    ArrayCache.getDefaultCache());
499     }
500 
501     /**
502      * Creates a new input stream that decompresses raw LZMA data (no .lzma
503      * header) from <code>in</code> optionally with a preset dictionary.
504      * <p>
505      * This is identical to <code>LZMAInputStream(InputStream, long, int, int,
506      * int, int, byte[])</code> except that this also takes the
507      * <code>arrayCache</code> argument.
508      *
509      * @param       in          input stream from which LZMA-compressed
510      *                          data is read
511      *
512      * @param       uncompSize  uncompressed size of the LZMA stream or -1
513      *                          if the end marker is used in the LZMA stream
514      *
515      * @param       lc          number of literal context bits, must be
516      *                          in the range [0, 8]
517      *
518      * @param       lp          number of literal position bits, must be
519      *                          in the range [0, 4]
520      *
521      * @param       pb          number position bits, must be
522      *                          in the range [0, 4]
523      *
524      * @param       dictSize    dictionary size as bytes, must be in the range
525      *                          [<code>0</code>, <code>DICT_SIZE_MAX</code>]
526      *
527      * @param       presetDict  preset dictionary or <code>null</code>
528      *                          to use no preset dictionary
529      *
530      * @param       arrayCache  cache to be used for allocating large arrays
531      *
532      * @throws      CorruptedInputException
533      *                          if the first input byte is not 0x00
534      *
535      * @throws      EOFException file is truncated or corrupt
536      *
537      * @throws      IOException may be thrown by <code>in</code>
538      *
539      * @since 1.7
540      */
LZMAInputStream(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)541     public LZMAInputStream(InputStream in, long uncompSize,
542                            int lc, int lp, int pb,
543                            int dictSize, byte[] presetDict,
544                            ArrayCache arrayCache)
545             throws IOException {
546         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
547                    arrayCache);
548     }
549 
initialize(InputStream in, long uncompSize, byte propsByte, int dictSize, byte[] presetDict, ArrayCache arrayCache)550     private void initialize(InputStream in, long uncompSize, byte propsByte,
551                             int dictSize, byte[] presetDict,
552                             ArrayCache arrayCache)
553             throws IOException {
554         // Validate the uncompressed size since the other "initialize" throws
555         // IllegalArgumentException if uncompSize < -1.
556         if (uncompSize < -1)
557             throw new UnsupportedOptionsException(
558                     "Uncompressed size is too big");
559 
560         // Decode the properties byte. In contrast to LZMA2, there is no
561         // limit of lc + lp <= 4.
562         int props = propsByte & 0xFF;
563         if (props > (4 * 5 + 4) * 9 + 8)
564             throw new CorruptedInputException("Invalid LZMA properties byte");
565 
566         int pb = props / (9 * 5);
567         props -= pb * 9 * 5;
568         int lp = props / 9;
569         int lc = props - lp * 9;
570 
571         // Validate the dictionary size since the other "initialize" throws
572         // IllegalArgumentException if dictSize is not supported.
573         if (dictSize < 0 || dictSize > DICT_SIZE_MAX)
574             throw new UnsupportedOptionsException(
575                     "LZMA dictionary is too big for this implementation");
576 
577         initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict,
578                    arrayCache);
579     }
580 
initialize(InputStream in, long uncompSize, int lc, int lp, int pb, int dictSize, byte[] presetDict, ArrayCache arrayCache)581     private void initialize(InputStream in, long uncompSize,
582                             int lc, int lp, int pb,
583                             int dictSize, byte[] presetDict,
584                             ArrayCache arrayCache)
585             throws IOException {
586         // getDictSize validates dictSize and gives a message in
587         // the exception too, so skip validating dictSize here.
588         if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4
589                 || pb < 0 || pb > 4)
590             throw new IllegalArgumentException();
591 
592         this.in = in;
593         this.arrayCache = arrayCache;
594 
595         // If uncompressed size is known, use it to avoid wasting memory for
596         // a uselessly large dictionary buffer.
597         dictSize = getDictSize(dictSize);
598         if (uncompSize >= 0 && dictSize > uncompSize)
599             dictSize = getDictSize((int)uncompSize);
600 
601         lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
602         rc = new RangeDecoderFromStream(in);
603         lzma = new LZMADecoder(lz, rc, lc, lp, pb);
604 
605         remainingSize = uncompSize;
606     }
607 
608     /**
609      * Decompresses the next byte from this input stream.
610      * <p>
611      * Reading lots of data with <code>read()</code> from this input stream
612      * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
613      * if you need to read lots of data one byte at a time.
614      *
615      * @return      the next decompressed byte, or <code>-1</code>
616      *              to indicate the end of the compressed stream
617      *
618      * @throws      CorruptedInputException
619      *
620      * @throws      XZIOException if the stream has been closed
621      *
622      * @throws      EOFException
623      *                          compressed input is truncated or corrupt
624      *
625      * @throws      IOException may be thrown by <code>in</code>
626      */
read()627     public int read() throws IOException {
628         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
629     }
630 
631     /**
632      * Decompresses into an array of bytes.
633      * <p>
634      * If <code>len</code> is zero, no bytes are read and <code>0</code>
635      * is returned. Otherwise this will block until <code>len</code>
636      * bytes have been decompressed, the end of the LZMA stream is reached,
637      * or an exception is thrown.
638      *
639      * @param       buf         target buffer for uncompressed data
640      * @param       off         start offset in <code>buf</code>
641      * @param       len         maximum number of uncompressed bytes to read
642      *
643      * @return      number of bytes read, or <code>-1</code> to indicate
644      *              the end of the compressed stream
645      *
646      * @throws      CorruptedInputException
647      *
648      * @throws      XZIOException if the stream has been closed
649      *
650      * @throws      EOFException compressed input is truncated or corrupt
651      *
652      * @throws      IOException may be thrown by <code>in</code>
653      */
read(byte[] buf, int off, int len)654     public int read(byte[] buf, int off, int len) throws IOException {
655         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
656             throw new IndexOutOfBoundsException();
657 
658         if (len == 0)
659             return 0;
660 
661         if (in == null)
662             throw new XZIOException("Stream closed");
663 
664         if (exception != null)
665             throw exception;
666 
667         if (endReached)
668             return -1;
669 
670         try {
671             int size = 0;
672 
673             while (len > 0) {
674                 // If uncompressed size is known and thus no end marker will
675                 // be present, set the limit so that the uncompressed size
676                 // won't be exceeded.
677                 int copySizeMax = len;
678                 if (remainingSize >= 0 && remainingSize < len)
679                     copySizeMax = (int)remainingSize;
680 
681                 lz.setLimit(copySizeMax);
682 
683                 // Decode into the dictionary buffer.
684                 try {
685                     lzma.decode();
686                 } catch (CorruptedInputException e) {
687                     // The end marker is encoded with a LZMA symbol that
688                     // indicates maximum match distance. This is larger
689                     // than any supported dictionary and thus causes
690                     // CorruptedInputException from LZDecoder.repeat.
691                     if (remainingSize != -1 || !lzma.endMarkerDetected())
692                         throw e;
693 
694                     endReached = true;
695 
696                     // The exception makes lzma.decode() miss the last range
697                     // decoder normalization, so do it here. This might
698                     // cause an IOException if it needs to read a byte
699                     // from the input stream.
700                     rc.normalize();
701                 }
702 
703                 // Copy from the dictionary to buf.
704                 int copiedSize = lz.flush(buf, off);
705                 off += copiedSize;
706                 len -= copiedSize;
707                 size += copiedSize;
708 
709                 if (remainingSize >= 0) {
710                     // Update the number of bytes left to be decompressed.
711                     remainingSize -= copiedSize;
712                     assert remainingSize >= 0;
713 
714                     if (remainingSize == 0)
715                         endReached = true;
716                 }
717 
718                 if (endReached) {
719                     // Checking these helps a lot when catching corrupt
720                     // or truncated .lzma files. LZMA Utils doesn't do
721                     // the first check and thus it accepts many invalid
722                     // files that this implementation and XZ Utils don't.
723                     if (!rc.isFinished() || lz.hasPending())
724                         throw new CorruptedInputException();
725 
726                     putArraysToCache();
727                     return size == 0 ? -1 : size;
728                 }
729             }
730 
731             return size;
732 
733         } catch (IOException e) {
734             exception = e;
735             throw e;
736         }
737     }
738 
putArraysToCache()739     private void putArraysToCache() {
740         if (lz != null) {
741             lz.putArraysToCache(arrayCache);
742             lz = null;
743         }
744     }
745 
746     /**
747      * Closes the stream and calls <code>in.close()</code>.
748      * If the stream was already closed, this does nothing.
749      *
750      * @throws  IOException if thrown by <code>in.close()</code>
751      */
close()752     public void close() throws IOException {
753         if (in != null) {
754             putArraysToCache();
755 
756             try {
757                 in.close();
758             } finally {
759                 in = null;
760             }
761         }
762     }
763 }
764