1 // Copyright 2016 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.google.archivepatcher.generator;
16 
17 import com.google.archivepatcher.shared.ByteArrayInputStreamFactory;
18 import com.google.archivepatcher.shared.DefaultDeflateCompatibilityWindow;
19 import com.google.archivepatcher.shared.JreDeflateParameters;
20 import com.google.archivepatcher.shared.MultiViewInputStreamFactory;
21 import com.google.archivepatcher.shared.RandomAccessFileInputStreamFactory;
22 import java.io.File;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.zip.Deflater;
32 import java.util.zip.DeflaterOutputStream;
33 import java.util.zip.Inflater;
34 import java.util.zip.InflaterInputStream;
35 import java.util.zip.ZipException;
36 
37 /**
38  * Divines information about the compression used for a resource that has been compressed with a
39  * deflate-compatible algorithm. This implementation produces results that are valid within the
40  * {@link DefaultDeflateCompatibilityWindow}.
41  */
42 public class DefaultDeflateCompressionDiviner {
43 
44   /** The levels to try for each strategy, in the order to attempt them. */
45   private static final Map<Integer, List<Integer>> LEVELS_BY_STRATEGY = getLevelsByStrategy();
46 
47   /**
48    * A simple struct that contains a {@link MinimalZipEntry} describing a specific entry from a zip
49    * archive along with an optional accompanying {@link JreDeflateParameters} describing the
50    * original compression settings that were used to generate the compressed delivery in that entry.
51    */
52   public static class DivinationResult {
53     /**
54      * The {@link MinimalZipEntry} for the result; never null.
55      */
56     public final MinimalZipEntry minimalZipEntry;
57 
58     /**
59      * The {@link JreDeflateParameters} for the result, possibly null. This value is only set if
60      * {@link MinimalZipEntry#isDeflateCompressed()} is true <em>and</em> the compression settings
61      * were successfully divined.
62      */
63     public final JreDeflateParameters divinedParameters;
64 
65     /**
66      * Creates a new result with the specified fields.
67      * @param minimalZipEntry the zip entry
68      * @param divinedParameters the parameters
69      */
DivinationResult( MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters)70     public DivinationResult(
71         MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters) {
72       if (minimalZipEntry == null) {
73         throw new IllegalArgumentException("minimalZipEntry cannot be null");
74       }
75       this.minimalZipEntry = minimalZipEntry;
76       this.divinedParameters = divinedParameters;
77     }
78   }
79 
80   /**
81    * Load the specified archive and attempt to divine deflate parameters for all entries within.
82    * @param archiveFile the archive file to work on
83    * @return a list of results for each entry in the archive, in file order (not central directory
84    * order). There is exactly one result per entry, regardless of whether or not that entry is
85    * compressed. Callers can filter results by checking
86    * {@link MinimalZipEntry#getCompressionMethod()} to see if the result is or is not compressed,
87    * and by checking whether a non-null {@link JreDeflateParameters} was obtained.
88    * @throws IOException if unable to read or parse the file
89    * @see DivinationResult
90    */
divineDeflateParameters(File archiveFile)91   public List<DivinationResult> divineDeflateParameters(File archiveFile) throws IOException {
92     List<DivinationResult> results = new ArrayList<>();
93     for (MinimalZipEntry minimalZipEntry : MinimalZipArchive.listEntries(archiveFile)) {
94       JreDeflateParameters divinedParameters = null;
95       if (minimalZipEntry.isDeflateCompressed()) {
96         // TODO(pasc): Reuse streams to avoid churning file descriptors
97         MultiViewInputStreamFactory isFactory =
98             new RandomAccessFileInputStreamFactory(
99                 archiveFile,
100                 minimalZipEntry.getFileOffsetOfCompressedData(),
101                 minimalZipEntry.getCompressedSize());
102 
103         // Keep small entries in memory to avoid unnecessary file I/O.
104         if (minimalZipEntry.getCompressedSize() < (100 * 1024)) {
105           try (InputStream is = isFactory.newStream()) {
106             byte[] compressedBytes = new byte[(int) minimalZipEntry.getCompressedSize()];
107             is.read(compressedBytes);
108             divinedParameters =
109                 divineDeflateParameters(new ByteArrayInputStreamFactory(compressedBytes));
110           } catch (Exception ignore) {
111             divinedParameters = null;
112           }
113         } else {
114           divinedParameters = divineDeflateParameters(isFactory);
115         }
116       }
117       results.add(new DivinationResult(minimalZipEntry, divinedParameters));
118     }
119     return results;
120   }
121 
122   /**
123    * Returns an unmodifiable map whose keys are deflate strategies and whose values are the levels
124    * that make sense to try with the corresponding strategy, in the recommended testing order.
125    *
126    * <ul>
127    *   <li>For strategy 0, levels 1 through 9 (inclusive) are included.
128    *   <li>For strategy 1, levels 4 through 9 (inclusive) are included. Levels 1, 2 and 3 are
129    *       excluded because they behave the same under strategy 0.
130    *   <li>For strategy 2, only level 1 is included because the level is ignored under strategy 2.
131    * </ul>
132    *
133    * @return such a mapping
134    */
getLevelsByStrategy()135   private static Map<Integer, List<Integer>> getLevelsByStrategy() {
136     final Map<Integer, List<Integer>> levelsByStrategy = new HashMap<>();
137     // The best order for the levels is simply the order of popularity in the world, which is
138     // expected to be default (6), maximum compression (9), and fastest (1).
139     // The rest of the levels are rarely encountered and their order is mostly irrelevant.
140     levelsByStrategy.put(0, Collections.unmodifiableList(Arrays.asList(6, 9, 1, 4, 2, 3, 5, 7, 8)));
141     levelsByStrategy.put(1, Collections.unmodifiableList(Arrays.asList(6, 9, 4, 5, 7, 8)));
142     // Strategy 2 does not have the concept of levels, so vacuously call it 1.
143     levelsByStrategy.put(2, Collections.singletonList(1));
144     return Collections.unmodifiableMap(levelsByStrategy);
145   }
146 
147   /**
148    * Determines the original {@link JreDeflateParameters} that were used to compress a given piece
149    * of deflated delivery.
150    *
151    * @param compressedDataInputStreamFactory a {@link MultiViewInputStreamFactory} that can provide
152    *     multiple independent {@link InputStream} instances for the compressed delivery.
153    * @return the parameters that can be used to replicate the compressed delivery in the {@link
154    *     DefaultDeflateCompatibilityWindow}, if any; otherwise <code>null</code>. Note that <code>
155    *     null</code> is also returned in the case of <em>corrupt</em> zip delivery since, by definition,
156    *     it cannot be replicated via any combination of normal deflate parameters.
157    * @throws IOException if there is a problem reading the delivery, i.e. if the file contents are
158    *     changed while reading
159    */
divineDeflateParameters( MultiViewInputStreamFactory compressedDataInputStreamFactory)160   public JreDeflateParameters divineDeflateParameters(
161       MultiViewInputStreamFactory compressedDataInputStreamFactory) throws IOException {
162     byte[] copyBuffer = new byte[32 * 1024];
163     // Iterate over all relevant combinations of nowrap, strategy and level.
164     for (boolean nowrap : new boolean[] {true, false}) {
165       Inflater inflater = new Inflater(nowrap);
166       Deflater deflater = new Deflater(0, nowrap);
167 
168       strategy_loop:
169       for (int strategy : new int[] {0, 1, 2}) {
170         deflater.setStrategy(strategy);
171         for (int level : LEVELS_BY_STRATEGY.get(strategy)) {
172           deflater.setLevel(level);
173           inflater.reset();
174           deflater.reset();
175           try {
176             if (matches(inflater, deflater, compressedDataInputStreamFactory, copyBuffer)) {
177               end(inflater, deflater);
178               return JreDeflateParameters.of(level, strategy, nowrap);
179             }
180           } catch (ZipException e) {
181             // Parse error in input. The only possibilities are corruption or the wrong nowrap.
182             // Skip all remaining levels and strategies.
183             break strategy_loop;
184           }
185         }
186       }
187       end(inflater, deflater);
188     }
189     return null;
190   }
191 
192   /**
193    * Closes the (de)compressor and discards any unprocessed input. This method should be called when
194    * the (de)compressor is no longer being used. Once this method is called, the behavior
195    * De/Inflater is undefined.
196    *
197    * @see Inflater#end
198    * @see Deflater#end
199    */
end(Inflater inflater, Deflater deflater)200   private static void end(Inflater inflater, Deflater deflater) {
201     inflater.end();
202     deflater.end();
203   }
204 
205   /**
206    * Checks whether the specified deflater will produce the same compressed delivery as the byte
207    * stream.
208    *
209    * @param inflater the inflater for uncompressing the stream
210    * @param deflater the deflater for recompressing the output of the inflater
211    * @param copyBuffer buffer to use for copying bytes between the inflater and the deflater
212    * @return true if the specified deflater reproduces the bytes in compressedDataIn, otherwise
213    *     false
214    * @throws IOException if anything goes wrong; in particular, {@link ZipException} is thrown if
215    *     there is a problem parsing compressedDataIn
216    */
matches( Inflater inflater, Deflater deflater, MultiViewInputStreamFactory compressedDataInputStreamFactory, byte[] copyBuffer)217   private boolean matches(
218       Inflater inflater,
219       Deflater deflater,
220       MultiViewInputStreamFactory compressedDataInputStreamFactory,
221       byte[] copyBuffer)
222       throws IOException {
223 
224     try (MatchingOutputStream matcher =
225             new MatchingOutputStream(
226                 compressedDataInputStreamFactory.newStream(), copyBuffer.length);
227         InflaterInputStream inflaterIn =
228             new InflaterInputStream(
229                 compressedDataInputStreamFactory.newStream(), inflater, copyBuffer.length);
230         DeflaterOutputStream out = new DeflaterOutputStream(matcher, deflater, copyBuffer.length)) {
231       int numRead;
232       while ((numRead = inflaterIn.read(copyBuffer)) >= 0) {
233         out.write(copyBuffer, 0, numRead);
234       }
235       // When done, all bytes have been successfully recompressed. For sanity, check that
236       // the matcher has consumed the same number of bytes and arrived at EOF as well.
237       out.finish();
238       out.flush();
239       matcher.expectEof();
240       // At this point the delivery in the compressed output stream was a perfect match for the
241       // delivery in the compressed input stream; the answer has been found.
242       return true;
243     } catch (MismatchException e) {
244       // Fast-fail case when the compressed output stream doesn't match the compressed input
245       // stream. These are not the parameters you're looking for!
246       return false;
247     }
248   }
249 }
250