1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements.  See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership.  The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License.  You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied.  See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 package org.apache.commons.compress.archivers;
20 
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.OutputStream;
25 import java.security.AccessController;
26 import java.security.PrivilegedAction;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Iterator;
30 import java.util.Locale;
31 import java.util.Set;
32 import java.util.SortedMap;
33 import java.util.TreeMap;
34 
35 import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
36 import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
37 import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
38 import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
39 import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
40 import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
41 import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
42 import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
43 import org.apache.commons.compress.archivers.sevenz.SevenZFile;
44 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
45 import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
46 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
47 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
48 import org.apache.commons.compress.utils.IOUtils;
49 import org.apache.commons.compress.utils.Lists;
50 import org.apache.commons.compress.utils.ServiceLoaderIterator;
51 import org.apache.commons.compress.utils.Sets;
52 
53 /**
54  * Factory to create Archive[In|Out]putStreams from names or the first bytes of
55  * the InputStream. In order to add other implementations, you should extend
56  * ArchiveStreamFactory and override the appropriate methods (and call their
57  * implementation from super of course).
58  *
59  * Compressing a ZIP-File:
60  *
61  * <pre>
62  * final OutputStream out = Files.newOutputStream(output.toPath());
63  * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
64  *
65  * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
66  * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
67  * os.closeArchiveEntry();
68  *
69  * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
70  * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
71  * os.closeArchiveEntry();
72  * os.close();
73  * </pre>
74  *
75  * Decompressing a ZIP-File:
76  *
77  * <pre>
78  * final InputStream is = Files.newInputStream(input.toPath());
79  * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
80  * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
81  * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
82  * IOUtils.copy(in, out);
83  * out.close();
84  * in.close();
85  * </pre>
86  * @Immutable provided that the deprecated method setEntryEncoding is not used.
87  * @ThreadSafe even if the deprecated method setEntryEncoding is used
88  */
89 public class ArchiveStreamFactory implements ArchiveStreamProvider {
90 
91     private static final int TAR_HEADER_SIZE = 512;
92 
93     private static final int DUMP_SIGNATURE_SIZE = 32;
94 
95     private static final int SIGNATURE_SIZE = 12;
96 
97     private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory();
98 
99     /**
100      * Constant (value {@value}) used to identify the AR archive format.
101      * @since 1.1
102      */
103     public static final String AR = "ar";
104 
105     /**
106      * Constant (value {@value}) used to identify the ARJ archive format.
107      * Not supported as an output stream type.
108      * @since 1.6
109      */
110     public static final String ARJ = "arj";
111 
112     /**
113      * Constant (value {@value}) used to identify the CPIO archive format.
114      * @since 1.1
115      */
116     public static final String CPIO = "cpio";
117 
118     /**
119      * Constant (value {@value}) used to identify the Unix DUMP archive format.
120      * Not supported as an output stream type.
121      * @since 1.3
122      */
123     public static final String DUMP = "dump";
124 
125     /**
126      * Constant (value {@value}) used to identify the JAR archive format.
127      * @since 1.1
128      */
129     public static final String JAR = "jar";
130 
131     /**
132      * Constant used to identify the TAR archive format.
133      * @since 1.1
134      */
135     public static final String TAR = "tar";
136 
137     /**
138      * Constant (value {@value}) used to identify the ZIP archive format.
139      * @since 1.1
140      */
141     public static final String ZIP = "zip";
142 
143     /**
144      * Constant (value {@value}) used to identify the 7z archive format.
145      * @since 1.8
146      */
147     public static final String SEVEN_Z = "7z";
148 
149     /**
150      * Entry encoding, null for the platform default.
151      */
152     private final String encoding;
153 
154     /**
155      * Entry encoding, null for the default.
156      */
157     private volatile String entryEncoding;
158 
159     private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
160 
161     private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
162 
findArchiveStreamProviders()163     private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() {
164         return Lists.newArrayList(serviceLoaderIterator());
165     }
166 
putAll(Set<String> names, ArchiveStreamProvider provider, TreeMap<String, ArchiveStreamProvider> map)167     static void putAll(Set<String> names, ArchiveStreamProvider provider,
168             TreeMap<String, ArchiveStreamProvider> map) {
169         for (String name : names) {
170             map.put(toKey(name), provider);
171         }
172     }
173 
serviceLoaderIterator()174     private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() {
175         return new ServiceLoaderIterator<>(ArchiveStreamProvider.class);
176     }
177 
toKey(final String name)178     private static String toKey(final String name) {
179         return name.toUpperCase(Locale.ROOT);
180     }
181 
182     /**
183      * Constructs a new sorted map from input stream provider names to provider
184      * objects.
185      *
186      * <p>
187      * The map returned by this method will have one entry for each provider for
188      * which support is available in the current Java virtual machine. If two or
189      * more supported provider have the same name then the resulting map will
190      * contain just one of them; which one it will contain is not specified.
191      * </p>
192      *
193      * <p>
194      * The invocation of this method, and the subsequent use of the resulting
195      * map, may cause time-consuming disk or network I/O operations to occur.
196      * This method is provided for applications that need to enumerate all of
197      * the available providers, for example to allow user provider selection.
198      * </p>
199      *
200      * <p>
201      * This method may return different results at different times if new
202      * providers are dynamically made available to the current Java virtual
203      * machine.
204      * </p>
205      *
206      * @return An immutable, map from names to provider objects
207      * @since 1.13
208      */
findAvailableArchiveInputStreamProviders()209     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
210         return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
211             @Override
212             public SortedMap<String, ArchiveStreamProvider> run() {
213                 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
214                 putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map);
215                 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
216                     putAll(provider.getInputStreamArchiveNames(), provider, map);
217                 }
218                 return map;
219             }
220         });
221     }
222 
223     /**
224      * Constructs a new sorted map from output stream provider names to provider
225      * objects.
226      *
227      * <p>
228      * The map returned by this method will have one entry for each provider for
229      * which support is available in the current Java virtual machine. If two or
230      * more supported provider have the same name then the resulting map will
231      * contain just one of them; which one it will contain is not specified.
232      * </p>
233      *
234      * <p>
235      * The invocation of this method, and the subsequent use of the resulting
236      * map, may cause time-consuming disk or network I/O operations to occur.
237      * This method is provided for applications that need to enumerate all of
238      * the available providers, for example to allow user provider selection.
239      * </p>
240      *
241      * <p>
242      * This method may return different results at different times if new
243      * providers are dynamically made available to the current Java virtual
244      * machine.
245      * </p>
246      *
247      * @return An immutable, map from names to provider objects
248      * @since 1.13
249      */
250     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
251         return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
252             @Override
253             public SortedMap<String, ArchiveStreamProvider> run() {
254                 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
255                 putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map);
256                 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
257                     putAll(provider.getOutputStreamArchiveNames(), provider, map);
258                 }
259                 return map;
260             }
261         });
262     }
263 
264     /**
265      * Create an instance using the platform default encoding.
266      */
267     public ArchiveStreamFactory() {
268         this(null);
269     }
270 
271     /**
272      * Create an instance using the specified encoding.
273      *
274      * @param encoding the encoding to be used.
275      *
276      * @since 1.10
277      */
278     public ArchiveStreamFactory(final String encoding) {
279         super();
280         this.encoding = encoding;
281         // Also set the original field so can continue to use it.
282         this.entryEncoding = encoding;
283     }
284 
285     /**
286      * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
287      * files, or null for the archiver default.
288      *
289      * @return entry encoding, or null for the archiver default
290      * @since 1.5
291      */
292     public String getEntryEncoding() {
293         return entryEncoding;
294     }
295 
296     /**
297      * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
298      *
299      * @param entryEncoding the entry encoding, null uses the archiver default.
300      * @since 1.5
301      * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
302      * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
303      * was used to specify the factory encoding.
304      */
305     @Deprecated
306     public void setEntryEncoding(final String entryEncoding) {
307         // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
308         if (encoding != null) {
309             throw new IllegalStateException("Cannot overide encoding set by the constructor");
310         }
311         this.entryEncoding = entryEncoding;
312     }
313 
314     /**
315      * Creates an archive input stream from an archiver name and an input stream.
316      *
317      * @param archiverName the archive name,
318      * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
319      * @param in the input stream
320      * @return the archive input stream
321      * @throws ArchiveException if the archiver name is not known
322      * @throws StreamingNotSupportedException if the format cannot be
323      * read from a stream
324      * @throws IllegalArgumentException if the archiver name or stream is null
325      */
326     public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in)
327             throws ArchiveException {
328         return createArchiveInputStream(archiverName, in, entryEncoding);
329     }
330 
331     @Override
332     public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
333             final String actualEncoding) throws ArchiveException {
334 
335         if (archiverName == null) {
336             throw new IllegalArgumentException("Archivername must not be null.");
337         }
338 
339         if (in == null) {
340             throw new IllegalArgumentException("InputStream must not be null.");
341         }
342 
343         if (AR.equalsIgnoreCase(archiverName)) {
344             return new ArArchiveInputStream(in);
345         }
346         if (ARJ.equalsIgnoreCase(archiverName)) {
347             if (actualEncoding != null) {
348                 return new ArjArchiveInputStream(in, actualEncoding);
349             }
350             return new ArjArchiveInputStream(in);
351         }
352         if (ZIP.equalsIgnoreCase(archiverName)) {
353             if (actualEncoding != null) {
354                 return new ZipArchiveInputStream(in, actualEncoding);
355             }
356             return new ZipArchiveInputStream(in);
357         }
358         if (TAR.equalsIgnoreCase(archiverName)) {
359             if (actualEncoding != null) {
360                 return new TarArchiveInputStream(in, actualEncoding);
361             }
362             return new TarArchiveInputStream(in);
363         }
364         if (JAR.equalsIgnoreCase(archiverName)) {
365             if (actualEncoding != null) {
366                 return new JarArchiveInputStream(in, actualEncoding);
367             }
368             return new JarArchiveInputStream(in);
369         }
370         if (CPIO.equalsIgnoreCase(archiverName)) {
371             if (actualEncoding != null) {
372                 return new CpioArchiveInputStream(in, actualEncoding);
373             }
374             return new CpioArchiveInputStream(in);
375         }
376         if (DUMP.equalsIgnoreCase(archiverName)) {
377             if (actualEncoding != null) {
378                 return new DumpArchiveInputStream(in, actualEncoding);
379             }
380             return new DumpArchiveInputStream(in);
381         }
382         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
383             throw new StreamingNotSupportedException(SEVEN_Z);
384         }
385 
386         final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
387         if (archiveStreamProvider != null) {
388             return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
389         }
390 
391         throw new ArchiveException("Archiver: " + archiverName + " not found.");
392     }
393 
394     /**
395      * Creates an archive output stream from an archiver name and an output stream.
396      *
397      * @param archiverName the archive name,
398      * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
399      * @param out the output stream
400      * @return the archive output stream
401      * @throws ArchiveException if the archiver name is not known
402      * @throws StreamingNotSupportedException if the format cannot be
403      * written to a stream
404      * @throws IllegalArgumentException if the archiver name or stream is null
405      */
406     public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
407             throws ArchiveException {
408         return createArchiveOutputStream(archiverName, out, entryEncoding);
409     }
410 
411     @Override
412     public ArchiveOutputStream createArchiveOutputStream(
413             final String archiverName, final OutputStream out, final String actualEncoding)
414             throws ArchiveException {
415         if (archiverName == null) {
416             throw new IllegalArgumentException("Archivername must not be null.");
417         }
418         if (out == null) {
419             throw new IllegalArgumentException("OutputStream must not be null.");
420         }
421 
422         if (AR.equalsIgnoreCase(archiverName)) {
423             return new ArArchiveOutputStream(out);
424         }
425         if (ZIP.equalsIgnoreCase(archiverName)) {
426             final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
427             if (actualEncoding != null) {
428                 zip.setEncoding(actualEncoding);
429             }
430             return zip;
431         }
432         if (TAR.equalsIgnoreCase(archiverName)) {
433             if (actualEncoding != null) {
434                 return new TarArchiveOutputStream(out, actualEncoding);
435             }
436             return new TarArchiveOutputStream(out);
437         }
438         if (JAR.equalsIgnoreCase(archiverName)) {
439             if (actualEncoding != null) {
440                 return new JarArchiveOutputStream(out, actualEncoding);
441             }
442             return new JarArchiveOutputStream(out);
443         }
444         if (CPIO.equalsIgnoreCase(archiverName)) {
445             if (actualEncoding != null) {
446                 return new CpioArchiveOutputStream(out, actualEncoding);
447             }
448             return new CpioArchiveOutputStream(out);
449         }
450         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
451             throw new StreamingNotSupportedException(SEVEN_Z);
452         }
453 
454         final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
455         if (archiveStreamProvider != null) {
456             return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
457         }
458 
459         throw new ArchiveException("Archiver: " + archiverName + " not found.");
460     }
461 
462     /**
463      * Create an archive input stream from an input stream, autodetecting
464      * the archive type from the first few bytes of the stream. The InputStream
465      * must support marks, like BufferedInputStream.
466      *
467      * @param in the input stream
468      * @return the archive input stream
469      * @throws ArchiveException if the archiver name is not known
470      * @throws StreamingNotSupportedException if the format cannot be
471      * read from a stream
472      * @throws IllegalArgumentException if the stream is null or does not support mark
473      */
474     public ArchiveInputStream createArchiveInputStream(final InputStream in)
475             throws ArchiveException {
476         return createArchiveInputStream(detect(in), in);
477     }
478 
479     /**
480      * Try to determine the type of Archiver
481      * @param in input stream
482      * @return type of archiver if found
483      * @throws ArchiveException if an archiver cannot be detected in the stream
484      * @since 1.14
485      */
486     public static String detect(InputStream in) throws ArchiveException {
487         if (in == null) {
488             throw new IllegalArgumentException("Stream must not be null.");
489         }
490 
491         if (!in.markSupported()) {
492             throw new IllegalArgumentException("Mark is not supported.");
493         }
494 
495         final byte[] signature = new byte[SIGNATURE_SIZE];
496         in.mark(signature.length);
497         int signatureLength = -1;
498         try {
499             signatureLength = IOUtils.readFully(in, signature);
500             in.reset();
501         } catch (IOException e) {
502             throw new ArchiveException("IOException while reading signature.", e);
503         }
504 
505         if (ZipArchiveInputStream.matches(signature, signatureLength)) {
506             return ZIP;
507         } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
508             return JAR;
509         } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
510             return AR;
511         } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
512             return CPIO;
513         } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
514             return ARJ;
515         } else if (SevenZFile.matches(signature, signatureLength)) {
516             return SEVEN_Z;
517         }
518 
519         // Dump needs a bigger buffer to check the signature;
520         final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
521         in.mark(dumpsig.length);
522         try {
523             signatureLength = IOUtils.readFully(in, dumpsig);
524             in.reset();
525         } catch (IOException e) {
526             throw new ArchiveException("IOException while reading dump signature", e);
527         }
528         if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
529             return DUMP;
530         }
531 
532         // Tar needs an even bigger buffer to check the signature; read the first block
533         final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
534         in.mark(tarHeader.length);
535         try {
536             signatureLength = IOUtils.readFully(in, tarHeader);
537             in.reset();
538         } catch (IOException e) {
539             throw new ArchiveException("IOException while reading tar signature", e);
540         }
541         if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
542             return TAR;
543         }
544 
545         // COMPRESS-117 - improve auto-recognition
546         if (signatureLength >= TAR_HEADER_SIZE) {
547             TarArchiveInputStream tais = null;
548             try {
549                 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
550                 // COMPRESS-191 - verify the header checksum
551                 if (tais.getNextTarEntry().isCheckSumOK()) {
552                     return TAR;
553                 }
554             } catch (final Exception e) { // NOPMD // NOSONAR
555                 // can generate IllegalArgumentException as well
556                 // as IOException
557                 // autodetection, simply not a TAR
558                 // ignored
559             } finally {
560                 IOUtils.closeQuietly(tais);
561             }
562         }
563         throw new ArchiveException("No Archiver found for the stream signature");
564     }
565 
566     public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
567         if (archiveInputStreamProviders == null) {
568             archiveInputStreamProviders = Collections
569                     .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
570         }
571         return archiveInputStreamProviders;
572     }
573 
574     public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
575         if (archiveOutputStreamProviders == null) {
576             archiveOutputStreamProviders = Collections
577                     .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
578         }
579         return archiveOutputStreamProviders;
580     }
581 
582     @Override
583     public Set<String> getInputStreamArchiveNames() {
584         return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
585     }
586 
587     @Override
588     public Set<String> getOutputStreamArchiveNames() {
589         return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
590     }
591 
592 }
593