1 /* 2 * XZInputStream 3 * 4 * Author: Lasse Collin <lasse.collin@tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 package org.tukaani.xz; 11 12 import java.io.InputStream; 13 import java.io.DataInputStream; 14 import java.io.IOException; 15 import java.io.EOFException; 16 import org.tukaani.xz.common.DecoderUtil; 17 18 /** 19 * Decompresses a .xz file in streamed mode (no seeking). 20 * <p> 21 * Use this to decompress regular standalone .xz files. This reads from 22 * its input stream until the end of the input or until an error occurs. 23 * This supports decompressing concatenated .xz files. 24 * 25 * <h4>Typical use cases</h4> 26 * <p> 27 * Getting an input stream to decompress a .xz file: 28 * <p><blockquote><pre> 29 * InputStream infile = new FileInputStream("foo.xz"); 30 * XZInputStream inxz = new XZInputStream(infile); 31 * </pre></blockquote> 32 * <p> 33 * It's important to keep in mind that decompressor memory usage depends 34 * on the settings used to compress the file. The worst-case memory usage 35 * of XZInputStream is currently 1.5 GiB. Still, very few files will 36 * require more than about 65 MiB because that's how much decompressing 37 * a file created with the highest preset level will need, and only a few 38 * people use settings other than the predefined presets. 39 * <p> 40 * It is possible to specify a memory usage limit for 41 * <code>XZInputStream</code>. If decompression requires more memory than 42 * the specified limit, MemoryLimitException will be thrown when reading 43 * from the stream. For example, the following sets the memory usage limit 44 * to 100 MiB: 45 * <p><blockquote><pre> 46 * InputStream infile = new FileInputStream("foo.xz"); 47 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024); 48 * </pre></blockquote> 49 * 50 * <h4>When uncompressed size is known beforehand</h4> 51 * <p> 52 * If you are decompressing complete files and your application knows 53 * exactly how much uncompressed data there should be, it is good to try 54 * reading one more byte by calling <code>read()</code> and checking 55 * that it returns <code>-1</code>. This way the decompressor will parse the 56 * file footers and verify the integrity checks, giving the caller more 57 * confidence that the uncompressed data is valid. (This advice seems to 58 * apply to 59 * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.) 60 * 61 * @see SingleXZInputStream 62 */ 63 public class XZInputStream extends InputStream { 64 private final ArrayCache arrayCache; 65 66 private final int memoryLimit; 67 private InputStream in; 68 private SingleXZInputStream xzIn; 69 private final boolean verifyCheck; 70 private boolean endReached = false; 71 private IOException exception = null; 72 73 private final byte[] tempBuf = new byte[1]; 74 75 /** 76 * Creates a new XZ decompressor without a memory usage limit. 77 * <p> 78 * This constructor reads and parses the XZ Stream Header (12 bytes) 79 * from <code>in</code>. The header of the first Block is not read 80 * until <code>read</code> is called. 81 * 82 * @param in input stream from which XZ-compressed 83 * data is read 84 * 85 * @throws XZFormatException 86 * input is not in the XZ format 87 * 88 * @throws CorruptedInputException 89 * XZ header CRC32 doesn't match 90 * 91 * @throws UnsupportedOptionsException 92 * XZ header is valid but specifies options 93 * not supported by this implementation 94 * 95 * @throws EOFException 96 * less than 12 bytes of input was available 97 * from <code>in</code> 98 * 99 * @throws IOException may be thrown by <code>in</code> 100 */ XZInputStream(InputStream in)101 public XZInputStream(InputStream in) throws IOException { 102 this(in, -1); 103 } 104 105 /** 106 * Creates a new XZ decompressor without a memory usage limit. 107 * <p> 108 * This is identical to <code>XZInputStream(InputStream)</code> 109 * except that this takes also the <code>arrayCache</code> argument. 110 * 111 * @param in input stream from which XZ-compressed 112 * data is read 113 * 114 * @param arrayCache cache to be used for allocating large arrays 115 * 116 * @throws XZFormatException 117 * input is not in the XZ format 118 * 119 * @throws CorruptedInputException 120 * XZ header CRC32 doesn't match 121 * 122 * @throws UnsupportedOptionsException 123 * XZ header is valid but specifies options 124 * not supported by this implementation 125 * 126 * @throws EOFException 127 * less than 12 bytes of input was available 128 * from <code>in</code> 129 * 130 * @throws IOException may be thrown by <code>in</code> 131 * 132 * @since 1.7 133 */ XZInputStream(InputStream in, ArrayCache arrayCache)134 public XZInputStream(InputStream in, ArrayCache arrayCache) 135 throws IOException { 136 this(in, -1, arrayCache); 137 } 138 139 /** 140 * Creates a new XZ decompressor with an optional memory usage limit. 141 * <p> 142 * This is identical to <code>XZInputStream(InputStream)</code> except 143 * that this takes also the <code>memoryLimit</code> argument. 144 * 145 * @param in input stream from which XZ-compressed 146 * data is read 147 * 148 * @param memoryLimit memory usage limit in kibibytes (KiB) 149 * or <code>-1</code> to impose no 150 * memory usage limit 151 * 152 * @throws XZFormatException 153 * input is not in the XZ format 154 * 155 * @throws CorruptedInputException 156 * XZ header CRC32 doesn't match 157 * 158 * @throws UnsupportedOptionsException 159 * XZ header is valid but specifies options 160 * not supported by this implementation 161 * 162 * @throws EOFException 163 * less than 12 bytes of input was available 164 * from <code>in</code> 165 * 166 * @throws IOException may be thrown by <code>in</code> 167 */ XZInputStream(InputStream in, int memoryLimit)168 public XZInputStream(InputStream in, int memoryLimit) throws IOException { 169 this(in, memoryLimit, true); 170 } 171 172 /** 173 * Creates a new XZ decompressor with an optional memory usage limit. 174 * <p> 175 * This is identical to <code>XZInputStream(InputStream)</code> except 176 * that this takes also the <code>memoryLimit</code> and 177 * <code>arrayCache</code> arguments. 178 * 179 * @param in input stream from which XZ-compressed 180 * data is read 181 * 182 * @param memoryLimit memory usage limit in kibibytes (KiB) 183 * or <code>-1</code> to impose no 184 * memory usage limit 185 * 186 * @param arrayCache cache to be used for allocating large arrays 187 * 188 * @throws XZFormatException 189 * input is not in the XZ format 190 * 191 * @throws CorruptedInputException 192 * XZ header CRC32 doesn't match 193 * 194 * @throws UnsupportedOptionsException 195 * XZ header is valid but specifies options 196 * not supported by this implementation 197 * 198 * @throws EOFException 199 * less than 12 bytes of input was available 200 * from <code>in</code> 201 * 202 * @throws IOException may be thrown by <code>in</code> 203 * 204 * @since 1.7 205 */ XZInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache)206 public XZInputStream(InputStream in, int memoryLimit, 207 ArrayCache arrayCache) throws IOException { 208 this(in, memoryLimit, true, arrayCache); 209 } 210 211 /** 212 * Creates a new XZ decompressor with an optional memory usage limit 213 * and ability to disable verification of integrity checks. 214 * <p> 215 * This is identical to <code>XZInputStream(InputStream,int)</code> except 216 * that this takes also the <code>verifyCheck</code> argument. 217 * <p> 218 * Note that integrity check verification should almost never be disabled. 219 * Possible reasons to disable integrity check verification: 220 * <ul> 221 * <li>Trying to recover data from a corrupt .xz file.</li> 222 * <li>Speeding up decompression. This matters mostly with SHA-256 223 * or with files that have compressed extremely well. It's recommended 224 * that integrity checking isn't disabled for performance reasons 225 * unless the file integrity is verified externally in some other 226 * way.</li> 227 * </ul> 228 * <p> 229 * <code>verifyCheck</code> only affects the integrity check of 230 * the actual compressed data. The CRC32 fields in the headers 231 * are always verified. 232 * 233 * @param in input stream from which XZ-compressed 234 * data is read 235 * 236 * @param memoryLimit memory usage limit in kibibytes (KiB) 237 * or <code>-1</code> to impose no 238 * memory usage limit 239 * 240 * @param verifyCheck if <code>true</code>, the integrity checks 241 * will be verified; this should almost never 242 * be set to <code>false</code> 243 * 244 * @throws XZFormatException 245 * input is not in the XZ format 246 * 247 * @throws CorruptedInputException 248 * XZ header CRC32 doesn't match 249 * 250 * @throws UnsupportedOptionsException 251 * XZ header is valid but specifies options 252 * not supported by this implementation 253 * 254 * @throws EOFException 255 * less than 12 bytes of input was available 256 * from <code>in</code> 257 * 258 * @throws IOException may be thrown by <code>in</code> 259 * 260 * @since 1.6 261 */ XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)262 public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck) 263 throws IOException { 264 this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache()); 265 } 266 267 /** 268 * Creates a new XZ decompressor with an optional memory usage limit 269 * and ability to disable verification of integrity checks. 270 * <p> 271 * This is identical to <code>XZInputStream(InputStream,int,boolean)</code> 272 * except that this takes also the <code>arrayCache</code> argument. 273 * 274 * @param in input stream from which XZ-compressed 275 * data is read 276 * 277 * @param memoryLimit memory usage limit in kibibytes (KiB) 278 * or <code>-1</code> to impose no 279 * memory usage limit 280 * 281 * @param verifyCheck if <code>true</code>, the integrity checks 282 * will be verified; this should almost never 283 * be set to <code>false</code> 284 * 285 * @param arrayCache cache to be used for allocating large arrays 286 * 287 * @throws XZFormatException 288 * input is not in the XZ format 289 * 290 * @throws CorruptedInputException 291 * XZ header CRC32 doesn't match 292 * 293 * @throws UnsupportedOptionsException 294 * XZ header is valid but specifies options 295 * not supported by this implementation 296 * 297 * @throws EOFException 298 * less than 12 bytes of input was available 299 * from <code>in</code> 300 * 301 * @throws IOException may be thrown by <code>in</code> 302 * 303 * @since 1.7 304 */ XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, ArrayCache arrayCache)305 public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, 306 ArrayCache arrayCache) throws IOException { 307 this.arrayCache = arrayCache; 308 this.in = in; 309 this.memoryLimit = memoryLimit; 310 this.verifyCheck = verifyCheck; 311 this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, 312 arrayCache); 313 } 314 315 /** 316 * Decompresses the next byte from this input stream. 317 * <p> 318 * Reading lots of data with <code>read()</code> from this input stream 319 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} 320 * if you need to read lots of data one byte at a time. 321 * 322 * @return the next decompressed byte, or <code>-1</code> 323 * to indicate the end of the compressed stream 324 * 325 * @throws CorruptedInputException 326 * @throws UnsupportedOptionsException 327 * @throws MemoryLimitException 328 * 329 * @throws XZIOException if the stream has been closed 330 * 331 * @throws EOFException 332 * compressed input is truncated or corrupt 333 * 334 * @throws IOException may be thrown by <code>in</code> 335 */ read()336 public int read() throws IOException { 337 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 338 } 339 340 /** 341 * Decompresses into an array of bytes. 342 * <p> 343 * If <code>len</code> is zero, no bytes are read and <code>0</code> 344 * is returned. Otherwise this will try to decompress <code>len</code> 345 * bytes of uncompressed data. Less than <code>len</code> bytes may 346 * be read only in the following situations: 347 * <ul> 348 * <li>The end of the compressed data was reached successfully.</li> 349 * <li>An error is detected after at least one but less <code>len</code> 350 * bytes have already been successfully decompressed. 351 * The next call with non-zero <code>len</code> will immediately 352 * throw the pending exception.</li> 353 * <li>An exception is thrown.</li> 354 * </ul> 355 * 356 * @param buf target buffer for uncompressed data 357 * @param off start offset in <code>buf</code> 358 * @param len maximum number of uncompressed bytes to read 359 * 360 * @return number of bytes read, or <code>-1</code> to indicate 361 * the end of the compressed stream 362 * 363 * @throws CorruptedInputException 364 * @throws UnsupportedOptionsException 365 * @throws MemoryLimitException 366 * 367 * @throws XZIOException if the stream has been closed 368 * 369 * @throws EOFException 370 * compressed input is truncated or corrupt 371 * 372 * @throws IOException may be thrown by <code>in</code> 373 */ read(byte[] buf, int off, int len)374 public int read(byte[] buf, int off, int len) throws IOException { 375 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 376 throw new IndexOutOfBoundsException(); 377 378 if (len == 0) 379 return 0; 380 381 if (in == null) 382 throw new XZIOException("Stream closed"); 383 384 if (exception != null) 385 throw exception; 386 387 if (endReached) 388 return -1; 389 390 int size = 0; 391 392 try { 393 while (len > 0) { 394 if (xzIn == null) { 395 prepareNextStream(); 396 if (endReached) 397 return size == 0 ? -1 : size; 398 } 399 400 int ret = xzIn.read(buf, off, len); 401 402 if (ret > 0) { 403 size += ret; 404 off += ret; 405 len -= ret; 406 } else if (ret == -1) { 407 xzIn = null; 408 } 409 } 410 } catch (IOException e) { 411 exception = e; 412 if (size == 0) 413 throw e; 414 } 415 416 return size; 417 } 418 prepareNextStream()419 private void prepareNextStream() throws IOException { 420 DataInputStream inData = new DataInputStream(in); 421 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; 422 423 // The size of Stream Padding must be a multiple of four bytes, 424 // all bytes zero. 425 do { 426 // First try to read one byte to see if we have reached the end 427 // of the file. 428 int ret = inData.read(buf, 0, 1); 429 if (ret == -1) { 430 endReached = true; 431 return; 432 } 433 434 // Since we got one byte of input, there must be at least 435 // three more available in a valid file. 436 inData.readFully(buf, 1, 3); 437 438 } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); 439 440 // Not all bytes are zero. In a valid Stream it indicates the 441 // beginning of the next Stream. Read the rest of the Stream Header 442 // and initialize the XZ decoder. 443 inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); 444 445 try { 446 xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf, 447 arrayCache); 448 } catch (XZFormatException e) { 449 // Since this isn't the first .xz Stream, it is more 450 // logical to tell that the data is corrupt. 451 throw new CorruptedInputException( 452 "Garbage after a valid XZ Stream"); 453 } 454 } 455 456 /** 457 * Returns the number of uncompressed bytes that can be read 458 * without blocking. The value is returned with an assumption 459 * that the compressed input data will be valid. If the compressed 460 * data is corrupt, <code>CorruptedInputException</code> may get 461 * thrown before the number of bytes claimed to be available have 462 * been read from this input stream. 463 * 464 * @return the number of uncompressed bytes that can be read 465 * without blocking 466 */ available()467 public int available() throws IOException { 468 if (in == null) 469 throw new XZIOException("Stream closed"); 470 471 if (exception != null) 472 throw exception; 473 474 return xzIn == null ? 0 : xzIn.available(); 475 } 476 477 /** 478 * Closes the stream and calls <code>in.close()</code>. 479 * If the stream was already closed, this does nothing. 480 * <p> 481 * This is equivalent to <code>close(true)</code>. 482 * 483 * @throws IOException if thrown by <code>in.close()</code> 484 */ close()485 public void close() throws IOException { 486 close(true); 487 } 488 489 /** 490 * Closes the stream and optionally calls <code>in.close()</code>. 491 * If the stream was already closed, this does nothing. 492 * If <code>close(false)</code> has been called, a further 493 * call of <code>close(true)</code> does nothing (it doesn't call 494 * <code>in.close()</code>). 495 * <p> 496 * If you don't want to close the underlying <code>InputStream</code>, 497 * there is usually no need to worry about closing this stream either; 498 * it's fine to do nothing and let the garbage collector handle it. 499 * However, if you are using {@link ArrayCache}, <code>close(false)</code> 500 * can be useful to put the allocated arrays back to the cache without 501 * closing the underlying <code>InputStream</code>. 502 * <p> 503 * Note that if you successfully reach the end of the stream 504 * (<code>read</code> returns <code>-1</code>), the arrays are 505 * automatically put back to the cache by that <code>read</code> call. In 506 * this situation <code>close(false)</code> is redundant (but harmless). 507 * 508 * @throws IOException if thrown by <code>in.close()</code> 509 * 510 * @since 1.7 511 */ close(boolean closeInput)512 public void close(boolean closeInput) throws IOException { 513 if (in != null) { 514 if (xzIn != null) { 515 xzIn.close(false); 516 xzIn = null; 517 } 518 519 try { 520 if (closeInput) 521 in.close(); 522 } finally { 523 in = null; 524 } 525 } 526 } 527 } 528