1 // Copyright 2016 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package com.google.archivepatcher.generator; 16 17 import com.google.archivepatcher.shared.ByteArrayInputStreamFactory; 18 import com.google.archivepatcher.shared.DefaultDeflateCompatibilityWindow; 19 import com.google.archivepatcher.shared.JreDeflateParameters; 20 import com.google.archivepatcher.shared.MultiViewInputStreamFactory; 21 import com.google.archivepatcher.shared.RandomAccessFileInputStreamFactory; 22 import java.io.File; 23 import java.io.IOException; 24 import java.io.InputStream; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.Collections; 28 import java.util.HashMap; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.zip.Deflater; 32 import java.util.zip.DeflaterOutputStream; 33 import java.util.zip.Inflater; 34 import java.util.zip.InflaterInputStream; 35 import java.util.zip.ZipException; 36 37 /** 38 * Divines information about the compression used for a resource that has been compressed with a 39 * deflate-compatible algorithm. This implementation produces results that are valid within the 40 * {@link DefaultDeflateCompatibilityWindow}. 41 */ 42 public class DefaultDeflateCompressionDiviner { 43 44 /** The levels to try for each strategy, in the order to attempt them. */ 45 private static final Map<Integer, List<Integer>> LEVELS_BY_STRATEGY = getLevelsByStrategy(); 46 47 /** 48 * A simple struct that contains a {@link MinimalZipEntry} describing a specific entry from a zip 49 * archive along with an optional accompanying {@link JreDeflateParameters} describing the 50 * original compression settings that were used to generate the compressed delivery in that entry. 51 */ 52 public static class DivinationResult { 53 /** 54 * The {@link MinimalZipEntry} for the result; never null. 55 */ 56 public final MinimalZipEntry minimalZipEntry; 57 58 /** 59 * The {@link JreDeflateParameters} for the result, possibly null. This value is only set if 60 * {@link MinimalZipEntry#isDeflateCompressed()} is true <em>and</em> the compression settings 61 * were successfully divined. 62 */ 63 public final JreDeflateParameters divinedParameters; 64 65 /** 66 * Creates a new result with the specified fields. 67 * @param minimalZipEntry the zip entry 68 * @param divinedParameters the parameters 69 */ DivinationResult( MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters)70 public DivinationResult( 71 MinimalZipEntry minimalZipEntry, JreDeflateParameters divinedParameters) { 72 if (minimalZipEntry == null) { 73 throw new IllegalArgumentException("minimalZipEntry cannot be null"); 74 } 75 this.minimalZipEntry = minimalZipEntry; 76 this.divinedParameters = divinedParameters; 77 } 78 } 79 80 /** 81 * Load the specified archive and attempt to divine deflate parameters for all entries within. 82 * @param archiveFile the archive file to work on 83 * @return a list of results for each entry in the archive, in file order (not central directory 84 * order). There is exactly one result per entry, regardless of whether or not that entry is 85 * compressed. Callers can filter results by checking 86 * {@link MinimalZipEntry#getCompressionMethod()} to see if the result is or is not compressed, 87 * and by checking whether a non-null {@link JreDeflateParameters} was obtained. 88 * @throws IOException if unable to read or parse the file 89 * @see DivinationResult 90 */ divineDeflateParameters(File archiveFile)91 public List<DivinationResult> divineDeflateParameters(File archiveFile) throws IOException { 92 List<DivinationResult> results = new ArrayList<>(); 93 for (MinimalZipEntry minimalZipEntry : MinimalZipArchive.listEntries(archiveFile)) { 94 JreDeflateParameters divinedParameters = null; 95 if (minimalZipEntry.isDeflateCompressed()) { 96 // TODO(pasc): Reuse streams to avoid churning file descriptors 97 MultiViewInputStreamFactory isFactory = 98 new RandomAccessFileInputStreamFactory( 99 archiveFile, 100 minimalZipEntry.getFileOffsetOfCompressedData(), 101 minimalZipEntry.getCompressedSize()); 102 103 // Keep small entries in memory to avoid unnecessary file I/O. 104 if (minimalZipEntry.getCompressedSize() < (100 * 1024)) { 105 try (InputStream is = isFactory.newStream()) { 106 byte[] compressedBytes = new byte[(int) minimalZipEntry.getCompressedSize()]; 107 is.read(compressedBytes); 108 divinedParameters = 109 divineDeflateParameters(new ByteArrayInputStreamFactory(compressedBytes)); 110 } catch (Exception ignore) { 111 divinedParameters = null; 112 } 113 } else { 114 divinedParameters = divineDeflateParameters(isFactory); 115 } 116 } 117 results.add(new DivinationResult(minimalZipEntry, divinedParameters)); 118 } 119 return results; 120 } 121 122 /** 123 * Returns an unmodifiable map whose keys are deflate strategies and whose values are the levels 124 * that make sense to try with the corresponding strategy, in the recommended testing order. 125 * 126 * <ul> 127 * <li>For strategy 0, levels 1 through 9 (inclusive) are included. 128 * <li>For strategy 1, levels 4 through 9 (inclusive) are included. Levels 1, 2 and 3 are 129 * excluded because they behave the same under strategy 0. 130 * <li>For strategy 2, only level 1 is included because the level is ignored under strategy 2. 131 * </ul> 132 * 133 * @return such a mapping 134 */ getLevelsByStrategy()135 private static Map<Integer, List<Integer>> getLevelsByStrategy() { 136 final Map<Integer, List<Integer>> levelsByStrategy = new HashMap<>(); 137 // The best order for the levels is simply the order of popularity in the world, which is 138 // expected to be default (6), maximum compression (9), and fastest (1). 139 // The rest of the levels are rarely encountered and their order is mostly irrelevant. 140 levelsByStrategy.put(0, Collections.unmodifiableList(Arrays.asList(6, 9, 1, 4, 2, 3, 5, 7, 8))); 141 levelsByStrategy.put(1, Collections.unmodifiableList(Arrays.asList(6, 9, 4, 5, 7, 8))); 142 // Strategy 2 does not have the concept of levels, so vacuously call it 1. 143 levelsByStrategy.put(2, Collections.singletonList(1)); 144 return Collections.unmodifiableMap(levelsByStrategy); 145 } 146 147 /** 148 * Determines the original {@link JreDeflateParameters} that were used to compress a given piece 149 * of deflated delivery. 150 * 151 * @param compressedDataInputStreamFactory a {@link MultiViewInputStreamFactory} that can provide 152 * multiple independent {@link InputStream} instances for the compressed delivery. 153 * @return the parameters that can be used to replicate the compressed delivery in the {@link 154 * DefaultDeflateCompatibilityWindow}, if any; otherwise <code>null</code>. Note that <code> 155 * null</code> is also returned in the case of <em>corrupt</em> zip delivery since, by definition, 156 * it cannot be replicated via any combination of normal deflate parameters. 157 * @throws IOException if there is a problem reading the delivery, i.e. if the file contents are 158 * changed while reading 159 */ divineDeflateParameters( MultiViewInputStreamFactory compressedDataInputStreamFactory)160 public JreDeflateParameters divineDeflateParameters( 161 MultiViewInputStreamFactory compressedDataInputStreamFactory) throws IOException { 162 byte[] copyBuffer = new byte[32 * 1024]; 163 // Iterate over all relevant combinations of nowrap, strategy and level. 164 for (boolean nowrap : new boolean[] {true, false}) { 165 Inflater inflater = new Inflater(nowrap); 166 Deflater deflater = new Deflater(0, nowrap); 167 168 strategy_loop: 169 for (int strategy : new int[] {0, 1, 2}) { 170 deflater.setStrategy(strategy); 171 for (int level : LEVELS_BY_STRATEGY.get(strategy)) { 172 deflater.setLevel(level); 173 inflater.reset(); 174 deflater.reset(); 175 try { 176 if (matches(inflater, deflater, compressedDataInputStreamFactory, copyBuffer)) { 177 end(inflater, deflater); 178 return JreDeflateParameters.of(level, strategy, nowrap); 179 } 180 } catch (ZipException e) { 181 // Parse error in input. The only possibilities are corruption or the wrong nowrap. 182 // Skip all remaining levels and strategies. 183 break strategy_loop; 184 } 185 } 186 } 187 end(inflater, deflater); 188 } 189 return null; 190 } 191 192 /** 193 * Closes the (de)compressor and discards any unprocessed input. This method should be called when 194 * the (de)compressor is no longer being used. Once this method is called, the behavior 195 * De/Inflater is undefined. 196 * 197 * @see Inflater#end 198 * @see Deflater#end 199 */ end(Inflater inflater, Deflater deflater)200 private static void end(Inflater inflater, Deflater deflater) { 201 inflater.end(); 202 deflater.end(); 203 } 204 205 /** 206 * Checks whether the specified deflater will produce the same compressed delivery as the byte 207 * stream. 208 * 209 * @param inflater the inflater for uncompressing the stream 210 * @param deflater the deflater for recompressing the output of the inflater 211 * @param copyBuffer buffer to use for copying bytes between the inflater and the deflater 212 * @return true if the specified deflater reproduces the bytes in compressedDataIn, otherwise 213 * false 214 * @throws IOException if anything goes wrong; in particular, {@link ZipException} is thrown if 215 * there is a problem parsing compressedDataIn 216 */ matches( Inflater inflater, Deflater deflater, MultiViewInputStreamFactory compressedDataInputStreamFactory, byte[] copyBuffer)217 private boolean matches( 218 Inflater inflater, 219 Deflater deflater, 220 MultiViewInputStreamFactory compressedDataInputStreamFactory, 221 byte[] copyBuffer) 222 throws IOException { 223 224 try (MatchingOutputStream matcher = 225 new MatchingOutputStream( 226 compressedDataInputStreamFactory.newStream(), copyBuffer.length); 227 InflaterInputStream inflaterIn = 228 new InflaterInputStream( 229 compressedDataInputStreamFactory.newStream(), inflater, copyBuffer.length); 230 DeflaterOutputStream out = new DeflaterOutputStream(matcher, deflater, copyBuffer.length)) { 231 int numRead; 232 while ((numRead = inflaterIn.read(copyBuffer)) >= 0) { 233 out.write(copyBuffer, 0, numRead); 234 } 235 // When done, all bytes have been successfully recompressed. For sanity, check that 236 // the matcher has consumed the same number of bytes and arrived at EOF as well. 237 out.finish(); 238 out.flush(); 239 matcher.expectEof(); 240 // At this point the delivery in the compressed output stream was a perfect match for the 241 // delivery in the compressed input stream; the answer has been found. 242 return true; 243 } catch (MismatchException e) { 244 // Fast-fail case when the compressed output stream doesn't match the compressed input 245 // stream. These are not the parameters you're looking for! 246 return false; 247 } 248 } 249 } 250