1 /*
2  * SingleXZInputStream
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 package org.tukaani.xz;
11 
12 import java.io.InputStream;
13 import java.io.DataInputStream;
14 import java.io.IOException;
15 import java.io.EOFException;
16 import org.tukaani.xz.common.DecoderUtil;
17 import org.tukaani.xz.common.StreamFlags;
18 import org.tukaani.xz.index.IndexHash;
19 import org.tukaani.xz.check.Check;
20 
21 /**
22  * Decompresses exactly one XZ Stream in streamed mode (no seeking).
23  * The decompression stops after the first XZ Stream has been decompressed,
24  * and the read position in the input stream is left at the first byte
25  * after the end of the XZ Stream. This can be useful when XZ data has
26  * been stored inside some other file format or protocol.
27  * <p>
28  * Unless you know what you are doing, don't use this class to decompress
29  * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
30  *
31  * <h4>When uncompressed size is known beforehand</h4>
32  * <p>
33  * If you are decompressing complete XZ streams and your application knows
34  * exactly how much uncompressed data there should be, it is good to try
35  * reading one more byte by calling <code>read()</code> and checking
36  * that it returns <code>-1</code>. This way the decompressor will parse the
37  * file footers and verify the integrity checks, giving the caller more
38  * confidence that the uncompressed data is valid.
39  *
40  * @see XZInputStream
41  */
42 public class SingleXZInputStream extends InputStream {
43     private InputStream in;
44     private final ArrayCache arrayCache;
45     private final int memoryLimit;
46     private final StreamFlags streamHeaderFlags;
47     private final Check check;
48     private final boolean verifyCheck;
49     private BlockInputStream blockDecoder = null;
50     private final IndexHash indexHash = new IndexHash();
51     private boolean endReached = false;
52     private IOException exception = null;
53 
54     private final byte[] tempBuf = new byte[1];
55 
56     /**
57      * Reads the Stream Header into a buffer.
58      * This is a helper function for the constructors.
59      */
readStreamHeader(InputStream in)60     private static byte[] readStreamHeader(InputStream in) throws IOException {
61         byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
62         new DataInputStream(in).readFully(streamHeader);
63         return streamHeader;
64     }
65 
66     /**
67      * Creates a new XZ decompressor that decompresses exactly one
68      * XZ Stream from <code>in</code> without a memory usage limit.
69      * <p>
70      * This constructor reads and parses the XZ Stream Header (12 bytes)
71      * from <code>in</code>. The header of the first Block is not read
72      * until <code>read</code> is called.
73      *
74      * @param       in          input stream from which XZ-compressed
75      *                          data is read
76      *
77      * @throws      XZFormatException
78      *                          input is not in the XZ format
79      *
80      * @throws      CorruptedInputException
81      *                          XZ header CRC32 doesn't match
82      *
83      * @throws      UnsupportedOptionsException
84      *                          XZ header is valid but specifies options
85      *                          not supported by this implementation
86      *
87      * @throws      EOFException
88      *                          less than 12 bytes of input was available
89      *                          from <code>in</code>
90      *
91      * @throws      IOException may be thrown by <code>in</code>
92      */
SingleXZInputStream(InputStream in)93     public SingleXZInputStream(InputStream in) throws IOException {
94         this(in, -1);
95     }
96 
97     /**
98      * Creates a new XZ decompressor that decompresses exactly one
99      * XZ Stream from <code>in</code> without a memory usage limit.
100      * <p>
101      * This is identical to <code>SingleXZInputStream(InputStream)</code>
102      * except that this also takes the <code>arrayCache</code> argument.
103      *
104      * @param       in          input stream from which XZ-compressed
105      *                          data is read
106      *
107      * @param       arrayCache  cache to be used for allocating large arrays
108      *
109      * @throws      XZFormatException
110      *                          input is not in the XZ format
111      *
112      * @throws      CorruptedInputException
113      *                          XZ header CRC32 doesn't match
114      *
115      * @throws      UnsupportedOptionsException
116      *                          XZ header is valid but specifies options
117      *                          not supported by this implementation
118      *
119      * @throws      EOFException
120      *                          less than 12 bytes of input was available
121      *                          from <code>in</code>
122      *
123      * @throws      IOException may be thrown by <code>in</code>
124      *
125      * @since 1.7
126      */
SingleXZInputStream(InputStream in, ArrayCache arrayCache)127     public SingleXZInputStream(InputStream in, ArrayCache arrayCache)
128             throws IOException {
129         this(in, -1, arrayCache);
130     }
131 
132     /**
133      * Creates a new XZ decompressor that decompresses exactly one
134      * XZ Stream from <code>in</code> with an optional memory usage limit.
135      * <p>
136      * This is identical to <code>SingleXZInputStream(InputStream)</code>
137      * except that this also takes the <code>memoryLimit</code> argument.
138      *
139      * @param       in          input stream from which XZ-compressed
140      *                          data is read
141      *
142      * @param       memoryLimit memory usage limit in kibibytes (KiB)
143      *                          or <code>-1</code> to impose no
144      *                          memory usage limit
145      *
146      * @throws      XZFormatException
147      *                          input is not in the XZ format
148      *
149      * @throws      CorruptedInputException
150      *                          XZ header CRC32 doesn't match
151      *
152      * @throws      UnsupportedOptionsException
153      *                          XZ header is valid but specifies options
154      *                          not supported by this implementation
155      *
156      * @throws      EOFException
157      *                          less than 12 bytes of input was available
158      *                          from <code>in</code>
159      *
160      * @throws      IOException may be thrown by <code>in</code>
161      */
SingleXZInputStream(InputStream in, int memoryLimit)162     public SingleXZInputStream(InputStream in, int memoryLimit)
163             throws IOException {
164         this(in, memoryLimit, true);
165     }
166 
167     /**
168      * Creates a new XZ decompressor that decompresses exactly one
169      * XZ Stream from <code>in</code> with an optional memory usage limit.
170      * <p>
171      * This is identical to <code>SingleXZInputStream(InputStream)</code>
172      * except that this also takes the <code>memoryLimit</code> and
173      * <code>arrayCache</code> arguments.
174      *
175      * @param       in          input stream from which XZ-compressed
176      *                          data is read
177      *
178      * @param       memoryLimit memory usage limit in kibibytes (KiB)
179      *                          or <code>-1</code> to impose no
180      *                          memory usage limit
181      *
182      * @param       arrayCache  cache to be used for allocating large arrays
183      *
184      * @throws      XZFormatException
185      *                          input is not in the XZ format
186      *
187      * @throws      CorruptedInputException
188      *                          XZ header CRC32 doesn't match
189      *
190      * @throws      UnsupportedOptionsException
191      *                          XZ header is valid but specifies options
192      *                          not supported by this implementation
193      *
194      * @throws      EOFException
195      *                          less than 12 bytes of input was available
196      *                          from <code>in</code>
197      *
198      * @throws      IOException may be thrown by <code>in</code>
199      *
200      * @since 1.7
201      */
SingleXZInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)202     public SingleXZInputStream(InputStream in, int memoryLimit,
203                                ArrayCache arrayCache) throws IOException {
204         this(in, memoryLimit, true, arrayCache);
205     }
206 
207     /**
208      * Creates a new XZ decompressor that decompresses exactly one
209      * XZ Stream from <code>in</code> with an optional memory usage limit
210      * and ability to disable verification of integrity checks.
211      * <p>
212      * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
213      * except that this also takes the <code>verifyCheck</code> argument.
214      * <p>
215      * Note that integrity check verification should almost never be disabled.
216      * Possible reasons to disable integrity check verification:
217      * <ul>
218      *   <li>Trying to recover data from a corrupt .xz file.</li>
219      *   <li>Speeding up decompression. This matters mostly with SHA-256
220      *   or with files that have compressed extremely well. It's recommended
221      *   that integrity checking isn't disabled for performance reasons
222      *   unless the file integrity is verified externally in some other
223      *   way.</li>
224      * </ul>
225      * <p>
226      * <code>verifyCheck</code> only affects the integrity check of
227      * the actual compressed data. The CRC32 fields in the headers
228      * are always verified.
229      *
230      * @param       in          input stream from which XZ-compressed
231      *                          data is read
232      *
233      * @param       memoryLimit memory usage limit in kibibytes (KiB)
234      *                          or <code>-1</code> to impose no
235      *                          memory usage limit
236      *
237      * @param       verifyCheck if <code>true</code>, the integrity checks
238      *                          will be verified; this should almost never
239      *                          be set to <code>false</code>
240      *
241      * @throws      XZFormatException
242      *                          input is not in the XZ format
243      *
244      * @throws      CorruptedInputException
245      *                          XZ header CRC32 doesn't match
246      *
247      * @throws      UnsupportedOptionsException
248      *                          XZ header is valid but specifies options
249      *                          not supported by this implementation
250      *
251      * @throws      EOFException
252      *                          less than 12 bytes of input was available
253      *                          from <code>in</code>
254      *
255      * @throws      IOException may be thrown by <code>in</code>
256      *
257      * @since 1.6
258      */
SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)259     public SingleXZInputStream(InputStream in, int memoryLimit,
260                                boolean verifyCheck) throws IOException {
261         this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
262     }
263 
264     /**
265      * Creates a new XZ decompressor that decompresses exactly one
266      * XZ Stream from <code>in</code> with an optional memory usage limit
267      * and ability to disable verification of integrity checks.
268      * <p>
269      * This is identical to
270      * <code>SingleXZInputStream(InputStream,int,boolean)</code>
271      * except that this also takes the <code>arrayCache</code> argument.
272      *
273      * @param       in          input stream from which XZ-compressed
274      *                          data is read
275      *
276      * @param       memoryLimit memory usage limit in kibibytes (KiB)
277      *                          or <code>-1</code> to impose no
278      *                          memory usage limit
279      *
280      * @param       verifyCheck if <code>true</code>, the integrity checks
281      *                          will be verified; this should almost never
282      *                          be set to <code>false</code>
283      *
284      * @param       arrayCache  cache to be used for allocating large arrays
285      *
286      * @throws      XZFormatException
287      *                          input is not in the XZ format
288      *
289      * @throws      CorruptedInputException
290      *                          XZ header CRC32 doesn't match
291      *
292      * @throws      UnsupportedOptionsException
293      *                          XZ header is valid but specifies options
294      *                          not supported by this implementation
295      *
296      * @throws      EOFException
297      *                          less than 12 bytes of input was available
298      *                          from <code>in</code>
299      *
300      * @throws      IOException may be thrown by <code>in</code>
301      *
302      * @since 1.7
303      */
SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, ArrayCache arrayCache)304     public SingleXZInputStream(InputStream in, int memoryLimit,
305                                boolean verifyCheck, ArrayCache arrayCache)
306             throws IOException {
307         this(in, memoryLimit, verifyCheck, readStreamHeader(in), arrayCache);
308     }
309 
SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, byte[] streamHeader, ArrayCache arrayCache)310     SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
311                         byte[] streamHeader, ArrayCache arrayCache)
312             throws IOException {
313         this.arrayCache = arrayCache;
314         this.in = in;
315         this.memoryLimit = memoryLimit;
316         this.verifyCheck = verifyCheck;
317         streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
318         check = Check.getInstance(streamHeaderFlags.checkType);
319     }
320 
321     /**
322      * Gets the ID of the integrity check used in this XZ Stream.
323      *
324      * @return      the Check ID specified in the XZ Stream Header
325      */
getCheckType()326     public int getCheckType() {
327         return streamHeaderFlags.checkType;
328     }
329 
330     /**
331      * Gets the name of the integrity check used in this XZ Stream.
332      *
333      * @return      the name of the check specified in the XZ Stream Header
334      */
getCheckName()335     public String getCheckName() {
336         return check.getName();
337     }
338 
339     /**
340      * Decompresses the next byte from this input stream.
341      * <p>
342      * Reading lots of data with <code>read()</code> from this input stream
343      * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
344      * if you need to read lots of data one byte at a time.
345      *
346      * @return      the next decompressed byte, or <code>-1</code>
347      *              to indicate the end of the compressed stream
348      *
349      * @throws      CorruptedInputException
350      * @throws      UnsupportedOptionsException
351      * @throws      MemoryLimitException
352      *
353      * @throws      XZIOException if the stream has been closed
354      *
355      * @throws      EOFException
356      *                          compressed input is truncated or corrupt
357      *
358      * @throws      IOException may be thrown by <code>in</code>
359      */
read()360     public int read() throws IOException {
361         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
362     }
363 
364     /**
365      * Decompresses into an array of bytes.
366      * <p>
367      * If <code>len</code> is zero, no bytes are read and <code>0</code>
368      * is returned. Otherwise this will try to decompress <code>len</code>
369      * bytes of uncompressed data. Less than <code>len</code> bytes may
370      * be read only in the following situations:
371      * <ul>
372      *   <li>The end of the compressed data was reached successfully.</li>
373      *   <li>An error is detected after at least one but less <code>len</code>
374      *       bytes have already been successfully decompressed.
375      *       The next call with non-zero <code>len</code> will immediately
376      *       throw the pending exception.</li>
377      *   <li>An exception is thrown.</li>
378      * </ul>
379      *
380      * @param       buf         target buffer for uncompressed data
381      * @param       off         start offset in <code>buf</code>
382      * @param       len         maximum number of uncompressed bytes to read
383      *
384      * @return      number of bytes read, or <code>-1</code> to indicate
385      *              the end of the compressed stream
386      *
387      * @throws      CorruptedInputException
388      * @throws      UnsupportedOptionsException
389      * @throws      MemoryLimitException
390      *
391      * @throws      XZIOException if the stream has been closed
392      *
393      * @throws      EOFException
394      *                          compressed input is truncated or corrupt
395      *
396      * @throws      IOException may be thrown by <code>in</code>
397      */
read(byte[] buf, int off, int len)398     public int read(byte[] buf, int off, int len) throws IOException {
399         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
400             throw new IndexOutOfBoundsException();
401 
402         if (len == 0)
403             return 0;
404 
405         if (in == null)
406             throw new XZIOException("Stream closed");
407 
408         if (exception != null)
409             throw exception;
410 
411         if (endReached)
412             return -1;
413 
414         int size = 0;
415 
416         try {
417             while (len > 0) {
418                 if (blockDecoder == null) {
419                     try {
420                         blockDecoder = new BlockInputStream(
421                                 in, check, verifyCheck, memoryLimit, -1, -1,
422                                 arrayCache);
423                     } catch (IndexIndicatorException e) {
424                         indexHash.validate(in);
425                         validateStreamFooter();
426                         endReached = true;
427                         return size > 0 ? size : -1;
428                     }
429                 }
430 
431                 int ret = blockDecoder.read(buf, off, len);
432 
433                 if (ret > 0) {
434                     size += ret;
435                     off += ret;
436                     len -= ret;
437                 } else if (ret == -1) {
438                     indexHash.add(blockDecoder.getUnpaddedSize(),
439                                   blockDecoder.getUncompressedSize());
440                     blockDecoder = null;
441                 }
442             }
443         } catch (IOException e) {
444             exception = e;
445             if (size == 0)
446                 throw e;
447         }
448 
449         return size;
450     }
451 
validateStreamFooter()452     private void validateStreamFooter() throws IOException {
453         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
454         new DataInputStream(in).readFully(buf);
455         StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
456 
457         if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
458                                              streamFooterFlags)
459                 || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
460             throw new CorruptedInputException(
461                     "XZ Stream Footer does not match Stream Header");
462     }
463 
464     /**
465      * Returns the number of uncompressed bytes that can be read
466      * without blocking. The value is returned with an assumption
467      * that the compressed input data will be valid. If the compressed
468      * data is corrupt, <code>CorruptedInputException</code> may get
469      * thrown before the number of bytes claimed to be available have
470      * been read from this input stream.
471      *
472      * @return      the number of uncompressed bytes that can be read
473      *              without blocking
474      */
available()475     public int available() throws IOException {
476         if (in == null)
477             throw new XZIOException("Stream closed");
478 
479         if (exception != null)
480             throw exception;
481 
482         return blockDecoder == null ? 0 : blockDecoder.available();
483     }
484 
485     /**
486      * Closes the stream and calls <code>in.close()</code>.
487      * If the stream was already closed, this does nothing.
488      * <p>
489      * This is equivalent to <code>close(true)</code>.
490      *
491      * @throws  IOException if thrown by <code>in.close()</code>
492      */
close()493     public void close() throws IOException {
494         close(true);
495     }
496 
497     /**
498      * Closes the stream and optionally calls <code>in.close()</code>.
499      * If the stream was already closed, this does nothing.
500      * If <code>close(false)</code> has been called, a further
501      * call of <code>close(true)</code> does nothing (it doesn't call
502      * <code>in.close()</code>).
503      * <p>
504      * If you don't want to close the underlying <code>InputStream</code>,
505      * there is usually no need to worry about closing this stream either;
506      * it's fine to do nothing and let the garbage collector handle it.
507      * However, if you are using {@link ArrayCache}, <code>close(false)</code>
508      * can be useful to put the allocated arrays back to the cache without
509      * closing the underlying <code>InputStream</code>.
510      * <p>
511      * Note that if you successfully reach the end of the stream
512      * (<code>read</code> returns <code>-1</code>), the arrays are
513      * automatically put back to the cache by that <code>read</code> call. In
514      * this situation <code>close(false)</code> is redundant (but harmless).
515      *
516      * @throws  IOException if thrown by <code>in.close()</code>
517      *
518      * @since 1.7
519      */
close(boolean closeInput)520     public void close(boolean closeInput) throws IOException {
521         if (in != null) {
522             if (blockDecoder != null) {
523                 blockDecoder.close();
524                 blockDecoder = null;
525             }
526 
527             try {
528                 if (closeInput)
529                     in.close();
530             } finally {
531                 in = null;
532             }
533         }
534     }
535 }
536