1 /*
2  * SingleXZInputStream
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 package org.tukaani.xz;
11 
12 import java.io.InputStream;
13 import java.io.DataInputStream;
14 import java.io.IOException;
15 import java.io.EOFException;
16 import org.tukaani.xz.common.DecoderUtil;
17 import org.tukaani.xz.common.StreamFlags;
18 import org.tukaani.xz.index.IndexHash;
19 import org.tukaani.xz.check.Check;
20 
21 /**
22  * Decompresses exactly one XZ Stream in streamed mode (no seeking).
23  * The decompression stops after the first XZ Stream has been decompressed,
24  * and the read position in the input stream is left at the first byte
25  * after the end of the XZ Stream. This can be useful when XZ data has
26  * been stored inside some other file format or protocol.
27  * <p>
28  * Unless you know what you are doing, don't use this class to decompress
29  * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
30  *
31  * <h4>When uncompressed size is known beforehand</h4>
32  * <p>
33  * If you are decompressing complete XZ streams and your application knows
34  * exactly how much uncompressed data there should be, it is good to try
35  * reading one more byte by calling <code>read()</code> and checking
36  * that it returns <code>-1</code>. This way the decompressor will parse the
37  * file footers and verify the integrity checks, giving the caller more
38  * confidence that the uncompressed data is valid.
39  *
40  * @see XZInputStream
41  */
42 public class SingleXZInputStream extends InputStream {
43     private InputStream in;
44     private final int memoryLimit;
45     private final StreamFlags streamHeaderFlags;
46     private final Check check;
47     private final boolean verifyCheck;
48     private BlockInputStream blockDecoder = null;
49     private final IndexHash indexHash = new IndexHash();
50     private boolean endReached = false;
51     private IOException exception = null;
52 
53     private final byte[] tempBuf = new byte[1];
54 
55     /**
56      * Reads the Stream Header into a buffer.
57      * This is a helper function for the constructors.
58      */
readStreamHeader(InputStream in)59     private static byte[] readStreamHeader(InputStream in) throws IOException {
60         byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
61         new DataInputStream(in).readFully(streamHeader);
62         return streamHeader;
63     }
64 
65     /**
66      * Creates a new XZ decompressor that decompresses exactly one
67      * XZ Stream from <code>in</code> without a memory usage limit.
68      * <p>
69      * This constructor reads and parses the XZ Stream Header (12 bytes)
70      * from <code>in</code>. The header of the first Block is not read
71      * until <code>read</code> is called.
72      *
73      * @param       in          input stream from which XZ-compressed
74      *                          data is read
75      *
76      * @throws      XZFormatException
77      *                          input is not in the XZ format
78      *
79      * @throws      CorruptedInputException
80      *                          XZ header CRC32 doesn't match
81      *
82      * @throws      UnsupportedOptionsException
83      *                          XZ header is valid but specifies options
84      *                          not supported by this implementation
85      *
86      * @throws      EOFException
87      *                          less than 12 bytes of input was available
88      *                          from <code>in</code>
89      *
90      * @throws      IOException may be thrown by <code>in</code>
91      */
SingleXZInputStream(InputStream in)92     public SingleXZInputStream(InputStream in) throws IOException {
93         this(in, -1);
94     }
95 
96     /**
97      * Creates a new XZ decompressor that decompresses exactly one
98      * XZ Stream from <code>in</code> with an optional memory usage limit.
99      * <p>
100      * This is identical to <code>SingleXZInputStream(InputStream)</code>
101      * except that this takes also the <code>memoryLimit</code> argument.
102      *
103      * @param       in          input stream from which XZ-compressed
104      *                          data is read
105      *
106      * @param       memoryLimit memory usage limit in kibibytes (KiB)
107      *                          or <code>-1</code> to impose no
108      *                          memory usage limit
109      *
110      * @throws      XZFormatException
111      *                          input is not in the XZ format
112      *
113      * @throws      CorruptedInputException
114      *                          XZ header CRC32 doesn't match
115      *
116      * @throws      UnsupportedOptionsException
117      *                          XZ header is valid but specifies options
118      *                          not supported by this implementation
119      *
120      * @throws      EOFException
121      *                          less than 12 bytes of input was available
122      *                          from <code>in</code>
123      *
124      * @throws      IOException may be thrown by <code>in</code>
125      */
SingleXZInputStream(InputStream in, int memoryLimit)126     public SingleXZInputStream(InputStream in, int memoryLimit)
127             throws IOException {
128         this(in, memoryLimit, true, readStreamHeader(in));
129     }
130 
131     /**
132      * Creates a new XZ decompressor that decompresses exactly one
133      * XZ Stream from <code>in</code> with an optional memory usage limit
134      * and ability to disable verification of integrity checks.
135      * <p>
136      * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
137      * except that this takes also the <code>verifyCheck</code> argument.
138      * <p>
139      * Note that integrity check verification should almost never be disabled.
140      * Possible reasons to disable integrity check verification:
141      * <ul>
142      *   <li>Trying to recover data from a corrupt .xz file.</li>
143      *   <li>Speeding up decompression. This matters mostly with SHA-256
144      *   or with files that have compressed extremely well. It's recommended
145      *   that integrity checking isn't disabled for performance reasons
146      *   unless the file integrity is verified externally in some other
147      *   way.</li>
148      * </ul>
149      * <p>
150      * <code>verifyCheck</code> only affects the integrity check of
151      * the actual compressed data. The CRC32 fields in the headers
152      * are always verified.
153      *
154      * @param       in          input stream from which XZ-compressed
155      *                          data is read
156      *
157      * @param       memoryLimit memory usage limit in kibibytes (KiB)
158      *                          or <code>-1</code> to impose no
159      *                          memory usage limit
160      *
161      * @param       verifyCheck if <code>true</code>, the integrity checks
162      *                          will be verified; this should almost never
163      *                          be set to <code>false</code>
164      *
165      * @throws      XZFormatException
166      *                          input is not in the XZ format
167      *
168      * @throws      CorruptedInputException
169      *                          XZ header CRC32 doesn't match
170      *
171      * @throws      UnsupportedOptionsException
172      *                          XZ header is valid but specifies options
173      *                          not supported by this implementation
174      *
175      * @throws      EOFException
176      *                          less than 12 bytes of input was available
177      *                          from <code>in</code>
178      *
179      * @throws      IOException may be thrown by <code>in</code>
180      *
181      * @since 1.6
182      */
SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)183     public SingleXZInputStream(InputStream in, int memoryLimit,
184                                boolean verifyCheck) throws IOException {
185         this(in, memoryLimit, verifyCheck, readStreamHeader(in));
186     }
187 
SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, byte[] streamHeader)188     SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
189                         byte[] streamHeader) throws IOException {
190         this.in = in;
191         this.memoryLimit = memoryLimit;
192         this.verifyCheck = verifyCheck;
193         streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
194         check = Check.getInstance(streamHeaderFlags.checkType);
195     }
196 
197     /**
198      * Gets the ID of the integrity check used in this XZ Stream.
199      *
200      * @return      the Check ID specified in the XZ Stream Header
201      */
getCheckType()202     public int getCheckType() {
203         return streamHeaderFlags.checkType;
204     }
205 
206     /**
207      * Gets the name of the integrity check used in this XZ Stream.
208      *
209      * @return      the name of the check specified in the XZ Stream Header
210      */
getCheckName()211     public String getCheckName() {
212         return check.getName();
213     }
214 
215     /**
216      * Decompresses the next byte from this input stream.
217      * <p>
218      * Reading lots of data with <code>read()</code> from this input stream
219      * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
220      * if you need to read lots of data one byte at a time.
221      *
222      * @return      the next decompressed byte, or <code>-1</code>
223      *              to indicate the end of the compressed stream
224      *
225      * @throws      CorruptedInputException
226      * @throws      UnsupportedOptionsException
227      * @throws      MemoryLimitException
228      *
229      * @throws      XZIOException if the stream has been closed
230      *
231      * @throws      EOFException
232      *                          compressed input is truncated or corrupt
233      *
234      * @throws      IOException may be thrown by <code>in</code>
235      */
read()236     public int read() throws IOException {
237         return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
238     }
239 
240     /**
241      * Decompresses into an array of bytes.
242      * <p>
243      * If <code>len</code> is zero, no bytes are read and <code>0</code>
244      * is returned. Otherwise this will try to decompress <code>len</code>
245      * bytes of uncompressed data. Less than <code>len</code> bytes may
246      * be read only in the following situations:
247      * <ul>
248      *   <li>The end of the compressed data was reached successfully.</li>
249      *   <li>An error is detected after at least one but less <code>len</code>
250      *       bytes have already been successfully decompressed.
251      *       The next call with non-zero <code>len</code> will immediately
252      *       throw the pending exception.</li>
253      *   <li>An exception is thrown.</li>
254      * </ul>
255      *
256      * @param       buf         target buffer for uncompressed data
257      * @param       off         start offset in <code>buf</code>
258      * @param       len         maximum number of uncompressed bytes to read
259      *
260      * @return      number of bytes read, or <code>-1</code> to indicate
261      *              the end of the compressed stream
262      *
263      * @throws      CorruptedInputException
264      * @throws      UnsupportedOptionsException
265      * @throws      MemoryLimitException
266      *
267      * @throws      XZIOException if the stream has been closed
268      *
269      * @throws      EOFException
270      *                          compressed input is truncated or corrupt
271      *
272      * @throws      IOException may be thrown by <code>in</code>
273      */
read(byte[] buf, int off, int len)274     public int read(byte[] buf, int off, int len) throws IOException {
275         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
276             throw new IndexOutOfBoundsException();
277 
278         if (len == 0)
279             return 0;
280 
281         if (in == null)
282             throw new XZIOException("Stream closed");
283 
284         if (exception != null)
285             throw exception;
286 
287         if (endReached)
288             return -1;
289 
290         int size = 0;
291 
292         try {
293             while (len > 0) {
294                 if (blockDecoder == null) {
295                     try {
296                         blockDecoder = new BlockInputStream(
297                                 in, check, verifyCheck, memoryLimit, -1, -1);
298                     } catch (IndexIndicatorException e) {
299                         indexHash.validate(in);
300                         validateStreamFooter();
301                         endReached = true;
302                         return size > 0 ? size : -1;
303                     }
304                 }
305 
306                 int ret = blockDecoder.read(buf, off, len);
307 
308                 if (ret > 0) {
309                     size += ret;
310                     off += ret;
311                     len -= ret;
312                 } else if (ret == -1) {
313                     indexHash.add(blockDecoder.getUnpaddedSize(),
314                                   blockDecoder.getUncompressedSize());
315                     blockDecoder = null;
316                 }
317             }
318         } catch (IOException e) {
319             exception = e;
320             if (size == 0)
321                 throw e;
322         }
323 
324         return size;
325     }
326 
validateStreamFooter()327     private void validateStreamFooter() throws IOException {
328         byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
329         new DataInputStream(in).readFully(buf);
330         StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
331 
332         if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
333                                              streamFooterFlags)
334                 || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
335             throw new CorruptedInputException(
336                     "XZ Stream Footer does not match Stream Header");
337     }
338 
339     /**
340      * Returns the number of uncompressed bytes that can be read
341      * without blocking. The value is returned with an assumption
342      * that the compressed input data will be valid. If the compressed
343      * data is corrupt, <code>CorruptedInputException</code> may get
344      * thrown before the number of bytes claimed to be available have
345      * been read from this input stream.
346      *
347      * @return      the number of uncompressed bytes that can be read
348      *              without blocking
349      */
available()350     public int available() throws IOException {
351         if (in == null)
352             throw new XZIOException("Stream closed");
353 
354         if (exception != null)
355             throw exception;
356 
357         return blockDecoder == null ? 0 : blockDecoder.available();
358     }
359 
360     /**
361      * Closes the stream and calls <code>in.close()</code>.
362      * If the stream was already closed, this does nothing.
363      *
364      * @throws  IOException if thrown by <code>in.close()</code>
365      */
close()366     public void close() throws IOException {
367         if (in != null) {
368             try {
369                 in.close();
370             } finally {
371                 in = null;
372             }
373         }
374     }
375 }
376