1 // Copyright 2016 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package com.google.archivepatcher.generator; 16 17 import com.google.archivepatcher.shared.RandomAccessFileInputStream; 18 19 import java.io.IOException; 20 import java.io.InputStream; 21 import java.util.zip.ZipException; 22 23 /** 24 * A minimal set of zip-parsing utilities just adequate to produce a {@link MinimalZipEntry} and 25 * update it. This parser is neither robust nor exhaustive. The parser is built to understand 26 * version 2.0 of the ZIP specification, with the notable exception that it does not have support 27 * for encrypted central directories. 28 * <p> 29 * The offsets, lengths and fields that this parser understands and exposes are based on version 30 * 6.3.3 of the ZIP specification (the most recent available at the time of this writing), which may 31 * be found at the following URL: 32 * <br><ul><li>https://www.pkware.com/documents/APPNOTE/APPNOTE-6.3.3.TXT</li></ul> 33 * <p> 34 * Please note that the parser does not attempt to verify the version-needed-to-extract field, since 35 * there is no guarantee that all ZIP implementations have set the value correctly to the minimum 36 * needed to truly support extraction. 37 */ 38 class MinimalZipParser { 39 40 /** 41 * Standard 32-bit signature for a "end-of-central-directory" record in a ZIP-like archive. This 42 * is in little-endian order. 43 */ 44 public static final int EOCD_SIGNATURE = 0x06054b50; 45 46 /** 47 * Standard 32-bit signature for a "central directory entry" record in a ZIP-like archive. This is 48 * in little-endian order. 49 */ 50 public static final int CENTRAL_DIRECTORY_ENTRY_SIGNATURE = 0x02014b50; 51 52 /** 53 * Standard 32-bit signature for a "local file entry" in a ZIP-like archive. This is in 54 * little-endian order. 55 */ 56 public static final int LOCAL_ENTRY_SIGNATURE = 0x04034b50; 57 58 /** 59 * Read exactly one byte, throwing an exception if unsuccessful. 60 * @param in the stream to read from 61 * @return the byte read 62 * @throws IOException if EOF is reached 63 */ readByteOrDie(InputStream in)64 private static int readByteOrDie(InputStream in) throws IOException { 65 int result = in.read(); 66 if (result == -1) { 67 throw new IOException("EOF"); 68 } 69 return result; 70 } 71 72 /** 73 * Skips exactly the specified number of bytes, throwing an exception if unsuccessful. 74 * @param in the stream to read from 75 * @param numBytes the number of bytes to skip 76 * @throws IOException if EOF is reached or no more bytes can be skipped 77 */ skipOrDie(InputStream in, long numBytes)78 private static void skipOrDie(InputStream in, long numBytes) throws IOException { 79 long numLeft = numBytes; 80 long numSkipped = 0; 81 while ((numSkipped = in.skip(numLeft)) > 0) { 82 numLeft -= numSkipped; 83 } 84 if (numLeft != 0) { 85 throw new IOException("Unable to skip"); 86 } 87 } 88 89 /** 90 * Reads 2 bytes from the current offset as an unsigned, 32-bit little-endian value. 91 * @param in the stream to read from 92 * @return the value as a java int 93 * @throws IOException if unable to read 94 */ read16BitUnsigned(InputStream in)95 private static int read16BitUnsigned(InputStream in) throws IOException { 96 int value = readByteOrDie(in); 97 value |= readByteOrDie(in) << 8; 98 return value; 99 } 100 101 /** 102 * Reads 4 bytes from the current offset as an unsigned, 32-bit little-endian value. 103 * @param in the stream to read from 104 * @return the value as a java long 105 * @throws IOException if unable to read 106 */ read32BitUnsigned(InputStream in)107 private static long read32BitUnsigned(InputStream in) throws IOException { 108 long value = readByteOrDie(in); 109 value |= ((long) readByteOrDie(in)) << 8; 110 value |= ((long) readByteOrDie(in)) << 16; 111 value |= ((long) readByteOrDie(in)) << 24; 112 return value; 113 } 114 115 /** 116 * Read exactly the specified amount of data into the specified buffer, throwing an exception if 117 * unsuccessful. 118 * @param in the stream to read from 119 * @param buffer the buffer to file 120 * @param offset the offset at which to start writing to the buffer 121 * @param length the number of bytes to place into the buffer from the input stream 122 * @throws IOException if unable to read 123 */ readOrDie(InputStream in, byte[] buffer, int offset, int length)124 private static void readOrDie(InputStream in, byte[] buffer, int offset, int length) 125 throws IOException { 126 if (length < 0) { 127 throw new IllegalArgumentException("length must be >= 0"); 128 } 129 int numRead = 0; 130 while (numRead < length) { 131 int readThisRound = in.read(buffer, offset + numRead, length - numRead); 132 if (numRead == -1) { 133 throw new IOException("EOF"); 134 } 135 numRead += readThisRound; 136 } 137 } 138 139 /** 140 * Parse one central directory entry, starting at the current file position. 141 * @param in the input stream to read from, assumed to start at the first byte of the entry 142 * @return the entry that was parsed 143 * @throws IOException if unable to complete the parsing 144 */ parseCentralDirectoryEntry(InputStream in)145 public static MinimalZipEntry parseCentralDirectoryEntry(InputStream in) throws IOException { 146 // *** 4 bytes encode the CENTRAL_DIRECTORY_ENTRY_SIGNATURE, verify for sanity 147 // 2 bytes encode the version-made-by, ignore 148 // 2 bytes encode the version-needed-to-extract, ignore 149 // *** 2 bytes encode the general-purpose flags, read for language encoding. [READ THIS] 150 // *** 2 bytes encode the compression method, [READ THIS] 151 // 2 bytes encode the MSDOS last modified file time, ignore 152 // 2 bytes encode the MSDOS last modified file date, ignore 153 // *** 4 bytes encode the CRC32 of the uncompressed data [READ THIS] 154 // *** 4 bytes encode the compressed size [READ THIS] 155 // *** 4 bytes encode the uncompressed size [READ THIS] 156 // *** 2 bytes encode the length of the file name [READ THIS] 157 // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS] 158 // *** 2 bytes encode the length of the comment, needed to skip the bytes later [READ THIS] 159 // 2 bytes encode the disk number, ignore 160 // 2 bytes encode the internal file attributes, ignore 161 // 4 bytes encode the external file attributes, ignore 162 // *** 4 bytes encode the offset of the local section entry, where the data is [READ THIS] 163 // n bytes encode the file name 164 // n bytes encode the extras 165 // n bytes encode the comment 166 if (((int) read32BitUnsigned(in)) != CENTRAL_DIRECTORY_ENTRY_SIGNATURE) { 167 throw new ZipException("Bad central directory header"); 168 } 169 skipOrDie(in, 2 + 2); // Skip version stuff 170 int generalPurposeFlags = read16BitUnsigned(in); 171 int compressionMethod = read16BitUnsigned(in); 172 skipOrDie(in, 2 + 2); // Skip MSDOS junk 173 long crc32OfUncompressedData = read32BitUnsigned(in); 174 long compressedSize = read32BitUnsigned(in); 175 long uncompressedSize = read32BitUnsigned(in); 176 int fileNameLength = read16BitUnsigned(in); 177 int extrasLength = read16BitUnsigned(in); 178 int commentLength = read16BitUnsigned(in); 179 skipOrDie(in, 2 + 2 + 4); // Skip the disk number and file attributes 180 long fileOffsetOfLocalEntry = read32BitUnsigned(in); 181 byte[] fileNameBuffer = new byte[fileNameLength]; 182 readOrDie(in, fileNameBuffer, 0, fileNameBuffer.length); 183 skipOrDie(in, extrasLength + commentLength); 184 // General purpose flag bit 11 is an important hint for the character set used for file names. 185 boolean generalPurposeFlagBit11 = (generalPurposeFlags & (0x1 << 10)) != 0; 186 return new MinimalZipEntry( 187 compressionMethod, 188 crc32OfUncompressedData, 189 compressedSize, 190 uncompressedSize, 191 fileNameBuffer, 192 generalPurposeFlagBit11, 193 fileOffsetOfLocalEntry); 194 } 195 196 /** 197 * Parses one local file entry and returns the offset from the first byte at which the compressed 198 * data begins 199 * @param in the input stream to read from, assumed to start at the first byte of the entry 200 * @return as described 201 * @throws IOException if unable to complete the parsing 202 */ parseLocalEntryAndGetCompressedDataOffset(InputStream in)203 public static long parseLocalEntryAndGetCompressedDataOffset(InputStream in) throws IOException { 204 // *** 4 bytes encode the LOCAL_ENTRY_SIGNATURE, verify for sanity 205 // 2 bytes encode the version-needed-to-extract, ignore 206 // 2 bytes encode the general-purpose flags, ignore 207 // 2 bytes encode the compression method, ignore (redundant with central directory) 208 // 2 bytes encode the MSDOS last modified file time, ignore 209 // 2 bytes encode the MSDOS last modified file date, ignore 210 // 4 bytes encode the CRC32 of the uncompressed data, ignore (redundant with central directory) 211 // 4 bytes encode the compressed size, ignore (redundant with central directory) 212 // 4 bytes encode the uncompressed size, ignore (redundant with central directory) 213 // *** 2 bytes encode the length of the file name, needed to skip the bytes later [READ THIS] 214 // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS] 215 // The rest is the data, which is the main attraction here. 216 if (((int) read32BitUnsigned(in)) != LOCAL_ENTRY_SIGNATURE) { 217 throw new ZipException("Bad local entry header"); 218 } 219 int junkLength = 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4; 220 skipOrDie(in, junkLength); // Skip everything up to the length of the file name 221 final int fileNameLength = read16BitUnsigned(in); 222 final int extrasLength = read16BitUnsigned(in); 223 224 // The file name is already known and will match the central directory, so no need to read it. 225 // The extra field length can be different here versus in the central directory and is used for 226 // things like zipaligning APKs. This single value is the critical part as it dictates where the 227 // actual DATA for the entry begins. 228 return 4 + junkLength + 2 + 2 + fileNameLength + extrasLength; 229 } 230 231 /** 232 * Find the end-of-central-directory record by scanning backwards from the end of a file looking 233 * for the signature of the record. 234 * @param in the file to read from 235 * @param searchBufferLength the length of the search buffer, starting from the end of the file 236 * @return the offset in the file at which the first byte of the EOCD signature is located, or -1 237 * if the signature is not found in the search buffer 238 * @throws IOException if there is a problem reading 239 */ locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength)240 public static long locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength) 241 throws IOException { 242 final int maxBufferSize = (int) Math.min(searchBufferLength, in.length()); 243 final byte[] buffer = new byte[maxBufferSize]; 244 final long rangeStart = in.length() - buffer.length; 245 in.setRange(rangeStart, buffer.length); 246 readOrDie(in, buffer, 0, buffer.length); 247 int offset = locateStartOfEocd(buffer); 248 if (offset == -1) { 249 return -1; 250 } 251 return rangeStart + offset; 252 } 253 254 /** 255 * Find the end-of-central-directory record by scanning backwards looking for the signature of the 256 * record. 257 * @param buffer the buffer in which to search 258 * @return the offset in the buffer at which the first byte of the EOCD signature is located, or 259 * -1 if the complete signature is not found 260 */ locateStartOfEocd(byte[] buffer)261 public static int locateStartOfEocd(byte[] buffer) { 262 int last4Bytes = 0; // This is the 32 bits of data from the file 263 for (int offset = buffer.length - 1; offset >= 0; offset--) { 264 last4Bytes <<= 8; 265 last4Bytes |= buffer[offset]; 266 if (last4Bytes == EOCD_SIGNATURE) { 267 return offset; 268 } 269 } 270 return -1; 271 } 272 273 /** 274 * Parse the end-of-central-directory record and return the critical information from it. 275 * @param in the input stream to read from, assumed to start at the first byte of the entry 276 * @return the metadata 277 * @throws IOException if unable to read 278 * @throws ZipException if the metadata indicates this is a zip64 archive, which is not supported 279 */ parseEocd(InputStream in)280 public static MinimalCentralDirectoryMetadata parseEocd(InputStream in) 281 throws IOException, ZipException { 282 if (((int) read32BitUnsigned(in)) != EOCD_SIGNATURE) { 283 throw new ZipException("Bad eocd header"); 284 } 285 286 // *** 4 bytes encode EOCD_SIGNATURE, ignore (already found and verified). 287 // 2 bytes encode disk number for this archive, ignore. 288 // 2 bytes encode disk number for the central directory, ignore. 289 // 2 bytes encode num entries in the central directory on this disk, ignore. 290 // *** 2 bytes encode num entries in the central directory overall [READ THIS] 291 // *** 4 bytes encode the length of the central directory [READ THIS] 292 // *** 4 bytes encode the file offset of the central directory [READ THIS] 293 // 2 bytes encode the length of the zip file comment, ignore. 294 // Everything else from here to the EOF is the zip file comment, or junk. Ignore. 295 skipOrDie(in, 2 + 2 + 2); 296 int numEntriesInCentralDirectory = read16BitUnsigned(in); 297 if (numEntriesInCentralDirectory == 0xffff) { 298 // If 0xffff, this is a zip64 archive and this code doesn't handle that. 299 throw new ZipException("No support for zip64"); 300 } 301 long lengthOfCentralDirectory = read32BitUnsigned(in); 302 long offsetOfCentralDirectory = read32BitUnsigned(in); 303 return new MinimalCentralDirectoryMetadata( 304 numEntriesInCentralDirectory, offsetOfCentralDirectory, lengthOfCentralDirectory); 305 } 306 } 307