1 /*
2  * XZOutputStream
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 package org.tukaani.xz;
11 
12 import java.io.OutputStream;
13 import java.io.IOException;
14 import org.tukaani.xz.common.EncoderUtil;
15 import org.tukaani.xz.common.StreamFlags;
16 import org.tukaani.xz.check.Check;
17 import org.tukaani.xz.index.IndexEncoder;
18 
19 /**
20  * Compresses into the .xz file format.
21  *
22  * <h4>Examples</h4>
23  * <p>
24  * Getting an output stream to compress with LZMA2 using the default
25  * settings and the default integrity check type (CRC64):
26  * <p><blockquote><pre>
27  * FileOutputStream outfile = new FileOutputStream("foo.xz");
28  * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
29  * </pre></blockquote>
30  * <p>
31  * Using the preset level <code>8</code> for LZMA2 (the default
32  * is <code>6</code>) and SHA-256 instead of CRC64 for integrity checking:
33  * <p><blockquote><pre>
34  * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
35  *                                           XZ.CHECK_SHA256);
36  * </pre></blockquote>
37  * <p>
38  * Using the x86 BCJ filter together with LZMA2 to compress x86 executables
39  * and printing the memory usage information before creating the
40  * XZOutputStream:
41  * <p><blockquote><pre>
42  * X86Options x86 = new X86Options();
43  * LZMA2Options lzma2 = new LZMA2Options();
44  * FilterOptions[] options = { x86, lzma2 };
45  * System.out.println("Encoder memory usage: "
46  *                    + FilterOptions.getEncoderMemoryUsage(options)
47  *                    + " KiB");
48  * System.out.println("Decoder memory usage: "
49  *                    + FilterOptions.getDecoderMemoryUsage(options)
50  *                    + " KiB");
51  * XZOutputStream outxz = new XZOutputStream(outfile, options);
52  * </pre></blockquote>
53  */
54 public class XZOutputStream extends FinishableOutputStream {
55     private OutputStream out;
56     private final StreamFlags streamFlags = new StreamFlags();
57     private final Check check;
58     private final IndexEncoder index = new IndexEncoder();
59 
60     private BlockOutputStream blockEncoder = null;
61     private FilterEncoder[] filters;
62 
63     /**
64      * True if the current filter chain supports flushing.
65      * If it doesn't support flushing, <code>flush()</code>
66      * will use <code>endBlock()</code> as a fallback.
67      */
68     private boolean filtersSupportFlushing;
69 
70     private IOException exception = null;
71     private boolean finished = false;
72 
73     private final byte[] tempBuf = new byte[1];
74 
75     /**
76      * Creates a new XZ compressor using one filter and CRC64 as
77      * the integrity check. This constructor is equivalent to passing
78      * a single-member FilterOptions array to
79      * <code>XZOutputStream(OutputStream, FilterOptions[])</code>.
80      *
81      * @param       out         output stream to which the compressed data
82      *                          will be written
83      *
84      * @param       filterOptions
85      *                          filter options to use
86      *
87      * @throws      UnsupportedOptionsException
88      *                          invalid filter chain
89      *
90      * @throws      IOException may be thrown from <code>out</code>
91      */
XZOutputStream(OutputStream out, FilterOptions filterOptions)92     public XZOutputStream(OutputStream out, FilterOptions filterOptions)
93             throws IOException {
94         this(out, filterOptions, XZ.CHECK_CRC64);
95     }
96 
97     /**
98      * Creates a new XZ compressor using one filter and the specified
99      * integrity check type. This constructor is equivalent to
100      * passing a single-member FilterOptions array to
101      * <code>XZOutputStream(OutputStream, FilterOptions[], int)</code>.
102      *
103      * @param       out         output stream to which the compressed data
104      *                          will be written
105      *
106      * @param       filterOptions
107      *                          filter options to use
108      *
109      * @param       checkType   type of the integrity check,
110      *                          for example XZ.CHECK_CRC32
111      *
112      * @throws      UnsupportedOptionsException
113      *                          invalid filter chain
114      *
115      * @throws      IOException may be thrown from <code>out</code>
116      */
XZOutputStream(OutputStream out, FilterOptions filterOptions, int checkType)117     public XZOutputStream(OutputStream out, FilterOptions filterOptions,
118                           int checkType) throws IOException {
119         this(out, new FilterOptions[] { filterOptions }, checkType);
120     }
121 
122     /**
123      * Creates a new XZ compressor using 1-4 filters and CRC64 as
124      * the integrity check. This constructor is equivalent
125      * <code>XZOutputStream(out, filterOptions, XZ.CHECK_CRC64)</code>.
126      *
127      * @param       out         output stream to which the compressed data
128      *                          will be written
129      *
130      * @param       filterOptions
131      *                          array of filter options to use
132      *
133      * @throws      UnsupportedOptionsException
134      *                          invalid filter chain
135      *
136      * @throws      IOException may be thrown from <code>out</code>
137      */
XZOutputStream(OutputStream out, FilterOptions[] filterOptions)138     public XZOutputStream(OutputStream out, FilterOptions[] filterOptions)
139             throws IOException {
140         this(out, filterOptions, XZ.CHECK_CRC64);
141     }
142 
143     /**
144      * Creates a new XZ compressor using 1-4 filters and the specified
145      * integrity check type.
146      *
147      * @param       out         output stream to which the compressed data
148      *                          will be written
149      *
150      * @param       filterOptions
151      *                          array of filter options to use
152      *
153      * @param       checkType   type of the integrity check,
154      *                          for example XZ.CHECK_CRC32
155      *
156      * @throws      UnsupportedOptionsException
157      *                          invalid filter chain
158      *
159      * @throws      IOException may be thrown from <code>out</code>
160      */
XZOutputStream(OutputStream out, FilterOptions[] filterOptions, int checkType)161     public XZOutputStream(OutputStream out, FilterOptions[] filterOptions,
162                           int checkType) throws IOException {
163         this.out = out;
164         updateFilters(filterOptions);
165 
166         streamFlags.checkType = checkType;
167         check = Check.getInstance(checkType);
168 
169         encodeStreamHeader();
170     }
171 
172     /**
173      * Updates the filter chain with a single filter.
174      * This is equivalent to passing a single-member FilterOptions array
175      * to <code>updateFilters(FilterOptions[])</code>.
176      *
177      * @param       filterOptions
178      *                          new filter to use
179      *
180      * @throws      UnsupportedOptionsException
181      *                          unsupported filter chain, or trying to change
182      *                          the filter chain in the middle of a Block
183      */
updateFilters(FilterOptions filterOptions)184     public void updateFilters(FilterOptions filterOptions)
185             throws XZIOException {
186         FilterOptions[] opts = new FilterOptions[1];
187         opts[0] = filterOptions;
188         updateFilters(opts);
189     }
190 
191     /**
192      * Updates the filter chain with 1-4 filters.
193      * <p>
194      * Currently this cannot be used to update e.g. LZMA2 options in the
195      * middle of a XZ Block. Use <code>endBlock()</code> to finish the
196      * current XZ Block before calling this function. The new filter chain
197      * will then be used for the next XZ Block.
198      *
199      * @param       filterOptions
200      *                          new filter chain to use
201      *
202      * @throws      UnsupportedOptionsException
203      *                          unsupported filter chain, or trying to change
204      *                          the filter chain in the middle of a Block
205      */
updateFilters(FilterOptions[] filterOptions)206     public void updateFilters(FilterOptions[] filterOptions)
207             throws XZIOException {
208         if (blockEncoder != null)
209             throw new UnsupportedOptionsException("Changing filter options "
210                     + "in the middle of a XZ Block not implemented");
211 
212         if (filterOptions.length < 1 || filterOptions.length > 4)
213             throw new UnsupportedOptionsException(
214                         "XZ filter chain must be 1-4 filters");
215 
216         filtersSupportFlushing = true;
217         FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length];
218         for (int i = 0; i < filterOptions.length; ++i) {
219             newFilters[i] = filterOptions[i].getFilterEncoder();
220             filtersSupportFlushing &= newFilters[i].supportsFlushing();
221         }
222 
223         RawCoder.validate(newFilters);
224         filters = newFilters;
225     }
226 
227     /**
228      * Writes one byte to be compressed.
229      *
230      * @throws      XZIOException
231      *                          XZ Stream has grown too big
232      *
233      * @throws      XZIOException
234      *                          <code>finish()</code> or <code>close()</code>
235      *                          was already called
236      *
237      * @throws      IOException may be thrown by the underlying output stream
238      */
write(int b)239     public void write(int b) throws IOException {
240         tempBuf[0] = (byte)b;
241         write(tempBuf, 0, 1);
242     }
243 
244     /**
245      * Writes an array of bytes to be compressed.
246      * The compressors tend to do internal buffering and thus the written
247      * data won't be readable from the compressed output immediately.
248      * Use <code>flush()</code> to force everything written so far to
249      * be written to the underlaying output stream, but be aware that
250      * flushing reduces compression ratio.
251      *
252      * @param       buf         buffer of bytes to be written
253      * @param       off         start offset in <code>buf</code>
254      * @param       len         number of bytes to write
255      *
256      * @throws      XZIOException
257      *                          XZ Stream has grown too big: total file size
258      *                          about 8&nbsp;EiB or the Index field exceeds
259      *                          16&nbsp;GiB; you shouldn't reach these sizes
260      *                          in practice
261      *
262      * @throws      XZIOException
263      *                          <code>finish()</code> or <code>close()</code>
264      *                          was already called and len &gt; 0
265      *
266      * @throws      IOException may be thrown by the underlying output stream
267      */
write(byte[] buf, int off, int len)268     public void write(byte[] buf, int off, int len) throws IOException {
269         if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
270             throw new IndexOutOfBoundsException();
271 
272         if (exception != null)
273             throw exception;
274 
275         if (finished)
276             throw new XZIOException("Stream finished or closed");
277 
278         try {
279             if (blockEncoder == null)
280                 blockEncoder = new BlockOutputStream(out, filters, check);
281 
282             blockEncoder.write(buf, off, len);
283         } catch (IOException e) {
284             exception = e;
285             throw e;
286         }
287     }
288 
289     /**
290      * Finishes the current XZ Block (but not the whole XZ Stream).
291      * This doesn't flush the stream so it's possible that not all data will
292      * be decompressible from the output stream when this function returns.
293      * Call also <code>flush()</code> if flushing is wanted in addition to
294      * finishing the current XZ Block.
295      * <p>
296      * If there is no unfinished Block open, this function will do nothing.
297      * (No empty XZ Block will be created.)
298      * <p>
299      * This function can be useful, for example, to create
300      * random-accessible .xz files.
301      * <p>
302      * Starting a new XZ Block means that the encoder state is reset.
303      * Doing this very often will increase the size of the compressed
304      * file a lot (more than plain <code>flush()</code> would do).
305      *
306      * @throws      XZIOException
307      *                          XZ Stream has grown too big
308      *
309      * @throws      XZIOException
310      *                          stream finished or closed
311      *
312      * @throws      IOException may be thrown by the underlying output stream
313      */
endBlock()314     public void endBlock() throws IOException {
315         if (exception != null)
316             throw exception;
317 
318         if (finished)
319             throw new XZIOException("Stream finished or closed");
320 
321         // NOTE: Once there is threading with multiple Blocks, it's possible
322         // that this function will be more like a barrier that returns
323         // before the last Block has been finished.
324         if (blockEncoder != null) {
325             try {
326                 blockEncoder.finish();
327                 index.add(blockEncoder.getUnpaddedSize(),
328                           blockEncoder.getUncompressedSize());
329                 blockEncoder = null;
330             } catch (IOException e) {
331                 exception = e;
332                 throw e;
333             }
334         }
335     }
336 
337     /**
338      * Flushes the encoder and calls <code>out.flush()</code>.
339      * All buffered pending data will then be decompressible from
340      * the output stream.
341      * <p>
342      * Calling this function very often may increase the compressed
343      * file size a lot. The filter chain options may affect the size
344      * increase too. For example, with LZMA2 the HC4 match finder has
345      * smaller penalty with flushing than BT4.
346      * <p>
347      * Some filters don't support flushing. If the filter chain has
348      * such a filter, <code>flush()</code> will call <code>endBlock()</code>
349      * before flushing.
350      *
351      * @throws      XZIOException
352      *                          XZ Stream has grown too big
353      *
354      * @throws      XZIOException
355      *                          stream finished or closed
356      *
357      * @throws      IOException may be thrown by the underlying output stream
358      */
flush()359     public void flush() throws IOException {
360         if (exception != null)
361             throw exception;
362 
363         if (finished)
364             throw new XZIOException("Stream finished or closed");
365 
366         try {
367             if (blockEncoder != null) {
368                 if (filtersSupportFlushing) {
369                     // This will eventually call out.flush() so
370                     // no need to do it here again.
371                     blockEncoder.flush();
372                 } else {
373                     endBlock();
374                     out.flush();
375                 }
376             } else {
377                 out.flush();
378             }
379         } catch (IOException e) {
380             exception = e;
381             throw e;
382         }
383     }
384 
385     /**
386      * Finishes compression without closing the underlying stream.
387      * No more data can be written to this stream after finishing
388      * (calling <code>write</code> with an empty buffer is OK).
389      * <p>
390      * Repeated calls to <code>finish()</code> do nothing unless
391      * an exception was thrown by this stream earlier. In that case
392      * the same exception is thrown again.
393      * <p>
394      * After finishing, the stream may be closed normally with
395      * <code>close()</code>. If the stream will be closed anyway, there
396      * usually is no need to call <code>finish()</code> separately.
397      *
398      * @throws      XZIOException
399      *                          XZ Stream has grown too big
400      *
401      * @throws      IOException may be thrown by the underlying output stream
402      */
finish()403     public void finish() throws IOException {
404         if (!finished) {
405             // This checks for pending exceptions so we don't need to
406             // worry about it here.
407             endBlock();
408 
409             try {
410                 index.encode(out);
411                 encodeStreamFooter();
412             } catch (IOException e) {
413                 exception = e;
414                 throw e;
415             }
416 
417             // Set it to true only if everything goes fine. Setting it earlier
418             // would cause repeated calls to finish() do nothing instead of
419             // throwing an exception to indicate an earlier error.
420             finished = true;
421         }
422     }
423 
424     /**
425      * Finishes compression and closes the underlying stream.
426      * The underlying stream <code>out</code> is closed even if finishing
427      * fails. If both finishing and closing fail, the exception thrown
428      * by <code>finish()</code> is thrown and the exception from the failed
429      * <code>out.close()</code> is lost.
430      *
431      * @throws      XZIOException
432      *                          XZ Stream has grown too big
433      *
434      * @throws      IOException may be thrown by the underlying output stream
435      */
close()436     public void close() throws IOException {
437         if (out != null) {
438             // If finish() throws an exception, it stores the exception to
439             // the variable "exception". So we can ignore the possible
440             // exception here.
441             try {
442                 finish();
443             } catch (IOException e) {}
444 
445             try {
446                 out.close();
447             } catch (IOException e) {
448                 // Remember the exception but only if there is no previous
449                 // pending exception.
450                 if (exception == null)
451                     exception = e;
452             }
453 
454             out = null;
455         }
456 
457         if (exception != null)
458             throw exception;
459     }
460 
encodeStreamFlags(byte[] buf, int off)461     private void encodeStreamFlags(byte[] buf, int off) {
462         buf[off] = 0x00;
463         buf[off + 1] = (byte)streamFlags.checkType;
464     }
465 
encodeStreamHeader()466     private void encodeStreamHeader() throws IOException {
467         out.write(XZ.HEADER_MAGIC);
468 
469         byte[] buf = new byte[2];
470         encodeStreamFlags(buf, 0);
471         out.write(buf);
472 
473         EncoderUtil.writeCRC32(out, buf);
474     }
475 
encodeStreamFooter()476     private void encodeStreamFooter() throws IOException {
477         byte[] buf = new byte[6];
478         long backwardSize = index.getIndexSize() / 4 - 1;
479         for (int i = 0; i < 4; ++i)
480             buf[i] = (byte)(backwardSize >>> (i * 8));
481 
482         encodeStreamFlags(buf, 4);
483 
484         EncoderUtil.writeCRC32(out, buf);
485         out.write(buf);
486         out.write(XZ.FOOTER_MAGIC);
487     }
488 }
489