1 /* 2 * LZMA2InputStream 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz; 12 13 import java.io.InputStream; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import org.tukaani.xz.lz.LZDecoder; 17 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer; 18 import org.tukaani.xz.lzma.LZMADecoder; 19 20 /** 21 * Decompresses a raw LZMA2 stream (no XZ headers). 22 */ 23 public class LZMA2InputStream extends InputStream { 24 /** 25 * Smallest valid LZMA2 dictionary size. 26 * <p> 27 * Very tiny dictionaries would be a performance problem, so 28 * the minimum is 4 KiB. 29 */ 30 public static final int DICT_SIZE_MIN = 4096; 31 32 /** 33 * Largest dictionary size supported by this implementation. 34 * <p> 35 * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. 36 * This implementation supports only 16 bytes less than 2 GiB for raw 37 * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This 38 * limitation is due to Java using signed 32-bit integers for array 39 * indexing. The limitation shouldn't matter much in practice since so 40 * huge dictionaries are not normally used. 41 */ 42 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; 43 44 private static final int COMPRESSED_SIZE_MAX = 1 << 16; 45 46 private final ArrayCache arrayCache; 47 private DataInputStream in; 48 49 private LZDecoder lz; 50 private RangeDecoderFromBuffer rc; 51 private LZMADecoder lzma; 52 53 private int uncompressedSize = 0; 54 private boolean isLZMAChunk = false; 55 56 private boolean needDictReset = true; 57 private boolean needProps = true; 58 private boolean endReached = false; 59 60 private IOException exception = null; 61 62 private final byte[] tempBuf = new byte[1]; 63 64 /** 65 * Gets approximate decompressor memory requirements as kibibytes for 66 * the given dictionary size. 67 * 68 * @param dictSize LZMA2 dictionary size as bytes, must be 69 * in the range [<code>DICT_SIZE_MIN</code>, 70 * <code>DICT_SIZE_MAX</code>] 71 * 72 * @return approximate memory requirements as kibibytes (KiB) 73 */ getMemoryUsage(int dictSize)74 public static int getMemoryUsage(int dictSize) { 75 // The base state is around 30-40 KiB (probabilities etc.), 76 // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, 77 // and LZ decoder needs a dictionary buffer. 78 return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; 79 } 80 getDictSize(int dictSize)81 private static int getDictSize(int dictSize) { 82 if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) 83 throw new IllegalArgumentException( 84 "Unsupported dictionary size " + dictSize); 85 86 // Round dictionary size upward to a multiple of 16. This way LZMA 87 // can use LZDecoder.getPos() for calculating LZMA's posMask. 88 // Note that this check is needed only for raw LZMA2 streams; it is 89 // redundant with .xz. 90 return (dictSize + 15) & ~15; 91 } 92 93 /** 94 * Creates a new input stream that decompresses raw LZMA2 data 95 * from <code>in</code>. 96 * <p> 97 * The caller needs to know the dictionary size used when compressing; 98 * the dictionary size isn't stored as part of a raw LZMA2 stream. 99 * <p> 100 * Specifying a too small dictionary size will prevent decompressing 101 * the stream. Specifying a too big dictionary is waste of memory but 102 * decompression will work. 103 * <p> 104 * There is no need to specify a dictionary bigger than 105 * the uncompressed size of the data even if a bigger dictionary 106 * was used when compressing. If you know the uncompressed size 107 * of the data, this might allow saving some memory. 108 * 109 * @param in input stream from which LZMA2-compressed 110 * data is read 111 * 112 * @param dictSize LZMA2 dictionary size as bytes, must be 113 * in the range [<code>DICT_SIZE_MIN</code>, 114 * <code>DICT_SIZE_MAX</code>] 115 */ LZMA2InputStream(InputStream in, int dictSize)116 public LZMA2InputStream(InputStream in, int dictSize) { 117 this(in, dictSize, null); 118 } 119 120 /** 121 * Creates a new LZMA2 decompressor using a preset dictionary. 122 * <p> 123 * This is like <code>LZMA2InputStream(InputStream, int)</code> except 124 * that the dictionary may be initialized using a preset dictionary. 125 * If a preset dictionary was used when compressing the data, the 126 * same preset dictionary must be provided when decompressing. 127 * 128 * @param in input stream from which LZMA2-compressed 129 * data is read 130 * 131 * @param dictSize LZMA2 dictionary size as bytes, must be 132 * in the range [<code>DICT_SIZE_MIN</code>, 133 * <code>DICT_SIZE_MAX</code>] 134 * 135 * @param presetDict preset dictionary or <code>null</code> 136 * to use no preset dictionary 137 */ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict)138 public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) { 139 this(in, dictSize, presetDict, ArrayCache.getDefaultCache()); 140 } 141 142 /** 143 * Creates a new LZMA2 decompressor using a preset dictionary 144 * and array cache. 145 * <p> 146 * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code> 147 * except that this also takes the <code>arrayCache</code> argument. 148 * 149 * @param in input stream from which LZMA2-compressed 150 * data is read 151 * 152 * @param dictSize LZMA2 dictionary size as bytes, must be 153 * in the range [<code>DICT_SIZE_MIN</code>, 154 * <code>DICT_SIZE_MAX</code>] 155 * 156 * @param presetDict preset dictionary or <code>null</code> 157 * to use no preset dictionary 158 * 159 * @param arrayCache cache to be used for allocating large arrays 160 * 161 * @since 1.7 162 */ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict, ArrayCache arrayCache)163 LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict, 164 ArrayCache arrayCache) { 165 // Check for null because otherwise null isn't detect 166 // in this constructor. 167 if (in == null) 168 throw new NullPointerException(); 169 170 this.arrayCache = arrayCache; 171 this.in = new DataInputStream(in); 172 this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache); 173 this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache); 174 175 if (presetDict != null && presetDict.length > 0) 176 needDictReset = false; 177 } 178 179 /** 180 * Decompresses the next byte from this input stream. 181 * <p> 182 * Reading lots of data with <code>read()</code> from this input stream 183 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code> 184 * if you need to read lots of data one byte at a time. 185 * 186 * @return the next decompressed byte, or <code>-1</code> 187 * to indicate the end of the compressed stream 188 * 189 * @throws CorruptedInputException 190 * 191 * @throws XZIOException if the stream has been closed 192 * 193 * @throws EOFException 194 * compressed input is truncated or corrupt 195 * 196 * @throws IOException may be thrown by <code>in</code> 197 */ read()198 public int read() throws IOException { 199 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 200 } 201 202 /** 203 * Decompresses into an array of bytes. 204 * <p> 205 * If <code>len</code> is zero, no bytes are read and <code>0</code> 206 * is returned. Otherwise this will block until <code>len</code> 207 * bytes have been decompressed, the end of the LZMA2 stream is reached, 208 * or an exception is thrown. 209 * 210 * @param buf target buffer for uncompressed data 211 * @param off start offset in <code>buf</code> 212 * @param len maximum number of uncompressed bytes to read 213 * 214 * @return number of bytes read, or <code>-1</code> to indicate 215 * the end of the compressed stream 216 * 217 * @throws CorruptedInputException 218 * 219 * @throws XZIOException if the stream has been closed 220 * 221 * @throws EOFException 222 * compressed input is truncated or corrupt 223 * 224 * @throws IOException may be thrown by <code>in</code> 225 */ read(byte[] buf, int off, int len)226 public int read(byte[] buf, int off, int len) throws IOException { 227 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 228 throw new IndexOutOfBoundsException(); 229 230 if (len == 0) 231 return 0; 232 233 if (in == null) 234 throw new XZIOException("Stream closed"); 235 236 if (exception != null) 237 throw exception; 238 239 if (endReached) 240 return -1; 241 242 try { 243 int size = 0; 244 245 while (len > 0) { 246 if (uncompressedSize == 0) { 247 decodeChunkHeader(); 248 if (endReached) 249 return size == 0 ? -1 : size; 250 } 251 252 int copySizeMax = Math.min(uncompressedSize, len); 253 254 if (!isLZMAChunk) { 255 lz.copyUncompressed(in, copySizeMax); 256 } else { 257 lz.setLimit(copySizeMax); 258 lzma.decode(); 259 } 260 261 int copiedSize = lz.flush(buf, off); 262 off += copiedSize; 263 len -= copiedSize; 264 size += copiedSize; 265 uncompressedSize -= copiedSize; 266 267 if (uncompressedSize == 0) 268 if (!rc.isFinished() || lz.hasPending()) 269 throw new CorruptedInputException(); 270 } 271 272 return size; 273 274 } catch (IOException e) { 275 exception = e; 276 throw e; 277 } 278 } 279 decodeChunkHeader()280 private void decodeChunkHeader() throws IOException { 281 int control = in.readUnsignedByte(); 282 283 if (control == 0x00) { 284 endReached = true; 285 putArraysToCache(); 286 return; 287 } 288 289 if (control >= 0xE0 || control == 0x01) { 290 needProps = true; 291 needDictReset = false; 292 lz.reset(); 293 } else if (needDictReset) { 294 throw new CorruptedInputException(); 295 } 296 297 if (control >= 0x80) { 298 isLZMAChunk = true; 299 300 uncompressedSize = (control & 0x1F) << 16; 301 uncompressedSize += in.readUnsignedShort() + 1; 302 303 int compressedSize = in.readUnsignedShort() + 1; 304 305 if (control >= 0xC0) { 306 needProps = false; 307 decodeProps(); 308 309 } else if (needProps) { 310 throw new CorruptedInputException(); 311 312 } else if (control >= 0xA0) { 313 lzma.reset(); 314 } 315 316 rc.prepareInputBuffer(in, compressedSize); 317 318 } else if (control > 0x02) { 319 throw new CorruptedInputException(); 320 321 } else { 322 isLZMAChunk = false; 323 uncompressedSize = in.readUnsignedShort() + 1; 324 } 325 } 326 decodeProps()327 private void decodeProps() throws IOException { 328 int props = in.readUnsignedByte(); 329 330 if (props > (4 * 5 + 4) * 9 + 8) 331 throw new CorruptedInputException(); 332 333 int pb = props / (9 * 5); 334 props -= pb * 9 * 5; 335 int lp = props / 9; 336 int lc = props - lp * 9; 337 338 if (lc + lp > 4) 339 throw new CorruptedInputException(); 340 341 lzma = new LZMADecoder(lz, rc, lc, lp, pb); 342 } 343 344 /** 345 * Returns the number of uncompressed bytes that can be read 346 * without blocking. The value is returned with an assumption 347 * that the compressed input data will be valid. If the compressed 348 * data is corrupt, <code>CorruptedInputException</code> may get 349 * thrown before the number of bytes claimed to be available have 350 * been read from this input stream. 351 * <p> 352 * In LZMA2InputStream, the return value will be non-zero when the 353 * decompressor is in the middle of an LZMA2 chunk. The return value 354 * will then be the number of uncompressed bytes remaining from that 355 * chunk. The return value can also be non-zero in the middle of 356 * an uncompressed chunk, but then the return value depends also on 357 * the <code>available()</code> method of the underlying InputStream. 358 * 359 * @return the number of uncompressed bytes that can be read 360 * without blocking 361 */ available()362 public int available() throws IOException { 363 if (in == null) 364 throw new XZIOException("Stream closed"); 365 366 if (exception != null) 367 throw exception; 368 369 return isLZMAChunk ? uncompressedSize 370 : Math.min(uncompressedSize, in.available()); 371 } 372 putArraysToCache()373 private void putArraysToCache() { 374 if (lz != null) { 375 lz.putArraysToCache(arrayCache); 376 lz = null; 377 378 rc.putArraysToCache(arrayCache); 379 rc = null; 380 } 381 } 382 383 /** 384 * Closes the stream and calls <code>in.close()</code>. 385 * If the stream was already closed, this does nothing. 386 * 387 * @throws IOException if thrown by <code>in.close()</code> 388 */ close()389 public void close() throws IOException { 390 if (in != null) { 391 putArraysToCache(); 392 393 try { 394 in.close(); 395 } finally { 396 in = null; 397 } 398 } 399 } 400 } 401