1 /* 2 * LZMA2InputStream 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz; 12 13 import java.io.InputStream; 14 import java.io.DataInputStream; 15 import java.io.IOException; 16 import org.tukaani.xz.lz.LZDecoder; 17 import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer; 18 import org.tukaani.xz.lzma.LZMADecoder; 19 20 /** 21 * Decompresses a raw LZMA2 stream (no XZ headers). 22 */ 23 public class LZMA2InputStream extends InputStream { 24 /** 25 * Smallest valid LZMA2 dictionary size. 26 * <p> 27 * Very tiny dictionaries would be a performance problem, so 28 * the minimum is 4 KiB. 29 */ 30 public static final int DICT_SIZE_MIN = 4096; 31 32 /** 33 * Largest dictionary size supported by this implementation. 34 * <p> 35 * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. 36 * This implementation supports only 16 bytes less than 2 GiB for raw 37 * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This 38 * limitation is due to Java using signed 32-bit integers for array 39 * indexing. The limitation shouldn't matter much in practice since so 40 * huge dictionaries are not normally used. 41 */ 42 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; 43 44 private static final int COMPRESSED_SIZE_MAX = 1 << 16; 45 46 private DataInputStream in; 47 48 private final LZDecoder lz; 49 private final RangeDecoderFromBuffer rc 50 = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX); 51 private LZMADecoder lzma; 52 53 private int uncompressedSize = 0; 54 private boolean isLZMAChunk; 55 56 private boolean needDictReset = true; 57 private boolean needProps = true; 58 private boolean endReached = false; 59 60 private IOException exception = null; 61 62 private final byte[] tempBuf = new byte[1]; 63 64 /** 65 * Gets approximate decompressor memory requirements as kibibytes for 66 * the given dictionary size. 67 * 68 * @param dictSize LZMA2 dictionary size as bytes, must be 69 * in the range [<code>DICT_SIZE_MIN</code>, 70 * <code>DICT_SIZE_MAX</code>] 71 * 72 * @return approximate memory requirements as kibibytes (KiB) 73 */ getMemoryUsage(int dictSize)74 public static int getMemoryUsage(int dictSize) { 75 // The base state is around 30-40 KiB (probabilities etc.), 76 // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, 77 // and LZ decoder needs a dictionary buffer. 78 return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; 79 } 80 getDictSize(int dictSize)81 private static int getDictSize(int dictSize) { 82 if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) 83 throw new IllegalArgumentException( 84 "Unsupported dictionary size " + dictSize); 85 86 // Round dictionary size upward to a multiple of 16. This way LZMA 87 // can use LZDecoder.getPos() for calculating LZMA's posMask. 88 // Note that this check is needed only for raw LZMA2 streams; it is 89 // redundant with .xz. 90 return (dictSize + 15) & ~15; 91 } 92 93 /** 94 * Creates a new input stream that decompresses raw LZMA2 data 95 * from <code>in</code>. 96 * <p> 97 * The caller needs to know the dictionary size used when compressing; 98 * the dictionary size isn't stored as part of a raw LZMA2 stream. 99 * <p> 100 * Specifying a too small dictionary size will prevent decompressing 101 * the stream. Specifying a too big dictionary is waste of memory but 102 * decompression will work. 103 * <p> 104 * There is no need to specify a dictionary bigger than 105 * the uncompressed size of the data even if a bigger dictionary 106 * was used when compressing. If you know the uncompressed size 107 * of the data, this might allow saving some memory. 108 * 109 * @param in input stream from which LZMA2-compressed 110 * data is read 111 * 112 * @param dictSize LZMA2 dictionary size as bytes, must be 113 * in the range [<code>DICT_SIZE_MIN</code>, 114 * <code>DICT_SIZE_MAX</code>] 115 */ LZMA2InputStream(InputStream in, int dictSize)116 public LZMA2InputStream(InputStream in, int dictSize) { 117 this(in, dictSize, null); 118 } 119 120 /** 121 * Creates a new LZMA2 decompressor using a preset dictionary. 122 * <p> 123 * This is like <code>LZMA2InputStream(InputStream, int)</code> except 124 * that the dictionary may be initialized using a preset dictionary. 125 * If a preset dictionary was used when compressing the data, the 126 * same preset dictionary must be provided when decompressing. 127 * 128 * @param in input stream from which LZMA2-compressed 129 * data is read 130 * 131 * @param dictSize LZMA2 dictionary size as bytes, must be 132 * in the range [<code>DICT_SIZE_MIN</code>, 133 * <code>DICT_SIZE_MAX</code>] 134 * 135 * @param presetDict preset dictionary or <code>null</code> 136 * to use no preset dictionary 137 */ LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict)138 public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) { 139 // Check for null because otherwise null isn't detect 140 // in this constructor. 141 if (in == null) 142 throw new NullPointerException(); 143 144 this.in = new DataInputStream(in); 145 this.lz = new LZDecoder(getDictSize(dictSize), presetDict); 146 147 if (presetDict != null && presetDict.length > 0) 148 needDictReset = false; 149 } 150 151 /** 152 * Decompresses the next byte from this input stream. 153 * <p> 154 * Reading lots of data with <code>read()</code> from this input stream 155 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code> 156 * if you need to read lots of data one byte at a time. 157 * 158 * @return the next decompressed byte, or <code>-1</code> 159 * to indicate the end of the compressed stream 160 * 161 * @throws CorruptedInputException 162 * 163 * @throws XZIOException if the stream has been closed 164 * 165 * @throws EOFException 166 * compressed input is truncated or corrupt 167 * 168 * @throws IOException may be thrown by <code>in</code> 169 */ read()170 public int read() throws IOException { 171 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); 172 } 173 174 /** 175 * Decompresses into an array of bytes. 176 * <p> 177 * If <code>len</code> is zero, no bytes are read and <code>0</code> 178 * is returned. Otherwise this will block until <code>len</code> 179 * bytes have been decompressed, the end of the LZMA2 stream is reached, 180 * or an exception is thrown. 181 * 182 * @param buf target buffer for uncompressed data 183 * @param off start offset in <code>buf</code> 184 * @param len maximum number of uncompressed bytes to read 185 * 186 * @return number of bytes read, or <code>-1</code> to indicate 187 * the end of the compressed stream 188 * 189 * @throws CorruptedInputException 190 * 191 * @throws XZIOException if the stream has been closed 192 * 193 * @throws EOFException 194 * compressed input is truncated or corrupt 195 * 196 * @throws IOException may be thrown by <code>in</code> 197 */ read(byte[] buf, int off, int len)198 public int read(byte[] buf, int off, int len) throws IOException { 199 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) 200 throw new IndexOutOfBoundsException(); 201 202 if (len == 0) 203 return 0; 204 205 if (in == null) 206 throw new XZIOException("Stream closed"); 207 208 if (exception != null) 209 throw exception; 210 211 if (endReached) 212 return -1; 213 214 try { 215 int size = 0; 216 217 while (len > 0) { 218 if (uncompressedSize == 0) { 219 decodeChunkHeader(); 220 if (endReached) 221 return size == 0 ? -1 : size; 222 } 223 224 int copySizeMax = Math.min(uncompressedSize, len); 225 226 if (!isLZMAChunk) { 227 lz.copyUncompressed(in, copySizeMax); 228 } else { 229 lz.setLimit(copySizeMax); 230 lzma.decode(); 231 if (!rc.isInBufferOK()) 232 throw new CorruptedInputException(); 233 } 234 235 int copiedSize = lz.flush(buf, off); 236 off += copiedSize; 237 len -= copiedSize; 238 size += copiedSize; 239 uncompressedSize -= copiedSize; 240 241 if (uncompressedSize == 0) 242 if (!rc.isFinished() || lz.hasPending()) 243 throw new CorruptedInputException(); 244 } 245 246 return size; 247 248 } catch (IOException e) { 249 exception = e; 250 throw e; 251 } 252 } 253 decodeChunkHeader()254 private void decodeChunkHeader() throws IOException { 255 int control = in.readUnsignedByte(); 256 257 if (control == 0x00) { 258 endReached = true; 259 return; 260 } 261 262 if (control >= 0xE0 || control == 0x01) { 263 needProps = true; 264 needDictReset = false; 265 lz.reset(); 266 } else if (needDictReset) { 267 throw new CorruptedInputException(); 268 } 269 270 if (control >= 0x80) { 271 isLZMAChunk = true; 272 273 uncompressedSize = (control & 0x1F) << 16; 274 uncompressedSize += in.readUnsignedShort() + 1; 275 276 int compressedSize = in.readUnsignedShort() + 1; 277 278 if (control >= 0xC0) { 279 needProps = false; 280 decodeProps(); 281 282 } else if (needProps) { 283 throw new CorruptedInputException(); 284 285 } else if (control >= 0xA0) { 286 lzma.reset(); 287 } 288 289 rc.prepareInputBuffer(in, compressedSize); 290 291 } else if (control > 0x02) { 292 throw new CorruptedInputException(); 293 294 } else { 295 isLZMAChunk = false; 296 uncompressedSize = in.readUnsignedShort() + 1; 297 } 298 } 299 decodeProps()300 private void decodeProps() throws IOException { 301 int props = in.readUnsignedByte(); 302 303 if (props > (4 * 5 + 4) * 9 + 8) 304 throw new CorruptedInputException(); 305 306 int pb = props / (9 * 5); 307 props -= pb * 9 * 5; 308 int lp = props / 9; 309 int lc = props - lp * 9; 310 311 if (lc + lp > 4) 312 throw new CorruptedInputException(); 313 314 lzma = new LZMADecoder(lz, rc, lc, lp, pb); 315 } 316 317 /** 318 * Returns the number of uncompressed bytes that can be read 319 * without blocking. The value is returned with an assumption 320 * that the compressed input data will be valid. If the compressed 321 * data is corrupt, <code>CorruptedInputException</code> may get 322 * thrown before the number of bytes claimed to be available have 323 * been read from this input stream. 324 * <p> 325 * In LZMA2InputStream, the return value will be non-zero when the 326 * decompressor is in the middle of an LZMA2 chunk. The return value 327 * will then be the number of uncompressed bytes remaining from that 328 * chunk. 329 * 330 * @return the number of uncompressed bytes that can be read 331 * without blocking 332 */ available()333 public int available() throws IOException { 334 if (in == null) 335 throw new XZIOException("Stream closed"); 336 337 if (exception != null) 338 throw exception; 339 340 return uncompressedSize; 341 } 342 343 /** 344 * Closes the stream and calls <code>in.close()</code>. 345 * If the stream was already closed, this does nothing. 346 * 347 * @throws IOException if thrown by <code>in.close()</code> 348 */ close()349 public void close() throws IOException { 350 if (in != null) { 351 try { 352 in.close(); 353 } finally { 354 in = null; 355 } 356 } 357 } 358 } 359