1 // Copyright 2016 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.google.archivepatcher.generator;
16 
17 import com.google.archivepatcher.shared.RandomAccessFileInputStream;
18 
19 import java.io.IOException;
20 import java.io.InputStream;
21 import java.util.zip.ZipException;
22 
23 /**
24  * A minimal set of zip-parsing utilities just adequate to produce a {@link MinimalZipEntry} and
25  * update it. This parser is neither robust nor exhaustive. The parser is built to understand
26  * version 2.0 of the ZIP specification, with the notable exception that it does not have support
27  * for encrypted central directories.
28  * <p>
29  * The offsets, lengths and fields that this parser understands and exposes are based on version
30  * 6.3.3 of the ZIP specification (the most recent available at the time of this writing), which may
31  * be found at the following URL:
32  * <br><ul><li>https://www.pkware.com/documents/APPNOTE/APPNOTE-6.3.3.TXT</li></ul>
33  * <p>
34  * Please note that the parser does not attempt to verify the version-needed-to-extract field, since
35  * there is no guarantee that all ZIP implementations have set the value correctly to the minimum
36  * needed to truly support extraction.
37  */
38 class MinimalZipParser {
39 
40   /**
41    * Standard 32-bit signature for a "end-of-central-directory" record in a ZIP-like archive. This
42    * is in little-endian order.
43    */
44   public static final int EOCD_SIGNATURE = 0x06054b50;
45 
46   /**
47    * Standard 32-bit signature for a "central directory entry" record in a ZIP-like archive. This is
48    * in little-endian order.
49    */
50   public static final int CENTRAL_DIRECTORY_ENTRY_SIGNATURE = 0x02014b50;
51 
52   /**
53    * Standard 32-bit signature for a "local file entry" in a ZIP-like archive. This is in
54    * little-endian order.
55    */
56   public static final int LOCAL_ENTRY_SIGNATURE = 0x04034b50;
57 
58   /**
59    * Read exactly one byte, throwing an exception if unsuccessful.
60    * @param in the stream to read from
61    * @return the byte read
62    * @throws IOException if EOF is reached
63    */
readByteOrDie(InputStream in)64   private static int readByteOrDie(InputStream in) throws IOException {
65     int result = in.read();
66     if (result == -1) {
67       throw new IOException("EOF");
68     }
69     return result;
70   }
71 
72   /**
73    * Skips exactly the specified number of bytes, throwing an exception if unsuccessful.
74    * @param in the stream to read from
75    * @param numBytes the number of bytes to skip
76    * @throws IOException if EOF is reached or no more bytes can be skipped
77    */
skipOrDie(InputStream in, long numBytes)78   private static void skipOrDie(InputStream in, long numBytes) throws IOException {
79     long numLeft = numBytes;
80     long numSkipped = 0;
81     while ((numSkipped = in.skip(numLeft)) > 0) {
82       numLeft -= numSkipped;
83     }
84     if (numLeft != 0) {
85       throw new IOException("Unable to skip");
86     }
87   }
88 
89   /**
90    * Reads 2 bytes from the current offset as an unsigned, 32-bit little-endian value.
91    * @param in the stream to read from
92    * @return the value as a java int
93    * @throws IOException if unable to read
94    */
read16BitUnsigned(InputStream in)95   private static int read16BitUnsigned(InputStream in) throws IOException {
96     int value = readByteOrDie(in);
97     value |= readByteOrDie(in) << 8;
98     return value;
99   }
100 
101   /**
102    * Reads 4 bytes from the current offset as an unsigned, 32-bit little-endian value.
103    * @param in the stream to read from
104    * @return the value as a java long
105    * @throws IOException if unable to read
106    */
read32BitUnsigned(InputStream in)107   private static long read32BitUnsigned(InputStream in) throws IOException {
108     long value = readByteOrDie(in);
109     value |= ((long) readByteOrDie(in)) << 8;
110     value |= ((long) readByteOrDie(in)) << 16;
111     value |= ((long) readByteOrDie(in)) << 24;
112     return value;
113   }
114 
115   /**
116    * Read exactly the specified amount of data into the specified buffer, throwing an exception if
117    * unsuccessful.
118    * @param in the stream to read from
119    * @param buffer the buffer to file
120    * @param offset the offset at which to start writing to the buffer
121    * @param length the number of bytes to place into the buffer from the input stream
122    * @throws IOException if unable to read
123    */
readOrDie(InputStream in, byte[] buffer, int offset, int length)124   private static void readOrDie(InputStream in, byte[] buffer, int offset, int length)
125       throws IOException {
126     if (length < 0) {
127       throw new IllegalArgumentException("length must be >= 0");
128     }
129     int numRead = 0;
130     while (numRead < length) {
131       int readThisRound = in.read(buffer, offset + numRead, length - numRead);
132       if (numRead == -1) {
133         throw new IOException("EOF");
134       }
135       numRead += readThisRound;
136     }
137   }
138 
139   /**
140    * Parse one central directory entry, starting at the current file position.
141    * @param in the input stream to read from, assumed to start at the first byte of the entry
142    * @return the entry that was parsed
143    * @throws IOException if unable to complete the parsing
144    */
parseCentralDirectoryEntry(InputStream in)145   public static MinimalZipEntry parseCentralDirectoryEntry(InputStream in) throws IOException {
146     // *** 4 bytes encode the CENTRAL_DIRECTORY_ENTRY_SIGNATURE, verify for sanity
147     // 2 bytes encode the version-made-by, ignore
148     // 2 bytes encode the version-needed-to-extract, ignore
149     // *** 2 bytes encode the general-purpose flags, read for language encoding. [READ THIS]
150     // *** 2 bytes encode the compression method, [READ THIS]
151     // 2 bytes encode the MSDOS last modified file time, ignore
152     // 2 bytes encode the MSDOS last modified file date, ignore
153     // *** 4 bytes encode the CRC32 of the uncompressed data [READ THIS]
154     // *** 4 bytes encode the compressed size [READ THIS]
155     // *** 4 bytes encode the uncompressed size [READ THIS]
156     // *** 2 bytes encode the length of the file name [READ THIS]
157     // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS]
158     // *** 2 bytes encode the length of the comment, needed to skip the bytes later [READ THIS]
159     // 2 bytes encode the disk number, ignore
160     // 2 bytes encode the internal file attributes, ignore
161     // 4 bytes encode the external file attributes, ignore
162     // *** 4 bytes encode the offset of the local section entry, where the data is [READ THIS]
163     // n bytes encode the file name
164     // n bytes encode the extras
165     // n bytes encode the comment
166     if (((int) read32BitUnsigned(in)) != CENTRAL_DIRECTORY_ENTRY_SIGNATURE) {
167       throw new ZipException("Bad central directory header");
168     }
169     skipOrDie(in, 2 + 2); // Skip version stuff
170     int generalPurposeFlags = read16BitUnsigned(in);
171     int compressionMethod = read16BitUnsigned(in);
172     skipOrDie(in, 2 + 2); // Skip MSDOS junk
173     long crc32OfUncompressedData = read32BitUnsigned(in);
174     long compressedSize = read32BitUnsigned(in);
175     long uncompressedSize = read32BitUnsigned(in);
176     int fileNameLength = read16BitUnsigned(in);
177     int extrasLength = read16BitUnsigned(in);
178     int commentLength = read16BitUnsigned(in);
179     skipOrDie(in, 2 + 2 + 4); // Skip the disk number and file attributes
180     long fileOffsetOfLocalEntry = read32BitUnsigned(in);
181     byte[] fileNameBuffer = new byte[fileNameLength];
182     readOrDie(in, fileNameBuffer, 0, fileNameBuffer.length);
183     skipOrDie(in, extrasLength + commentLength);
184     // General purpose flag bit 11 is an important hint for the character set used for file names.
185     boolean generalPurposeFlagBit11 = (generalPurposeFlags & (0x1 << 10)) != 0;
186     return new MinimalZipEntry(
187         compressionMethod,
188         crc32OfUncompressedData,
189         compressedSize,
190         uncompressedSize,
191         fileNameBuffer,
192         generalPurposeFlagBit11,
193         fileOffsetOfLocalEntry);
194   }
195 
196   /**
197    * Parses one local file entry and returns the offset from the first byte at which the compressed
198    * data begins
199    * @param in the input stream to read from, assumed to start at the first byte of the entry
200    * @return as described
201    * @throws IOException if unable to complete the parsing
202    */
parseLocalEntryAndGetCompressedDataOffset(InputStream in)203   public static long parseLocalEntryAndGetCompressedDataOffset(InputStream in) throws IOException {
204     // *** 4 bytes encode the LOCAL_ENTRY_SIGNATURE, verify for sanity
205     // 2 bytes encode the version-needed-to-extract, ignore
206     // 2 bytes encode the general-purpose flags, ignore
207     // 2 bytes encode the compression method, ignore (redundant with central directory)
208     // 2 bytes encode the MSDOS last modified file time, ignore
209     // 2 bytes encode the MSDOS last modified file date, ignore
210     // 4 bytes encode the CRC32 of the uncompressed data, ignore (redundant with central directory)
211     // 4 bytes encode the compressed size, ignore (redundant with central directory)
212     // 4 bytes encode the uncompressed size, ignore (redundant with central directory)
213     // *** 2 bytes encode the length of the file name, needed to skip the bytes later [READ THIS]
214     // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS]
215     // The rest is the data, which is the main attraction here.
216     if (((int) read32BitUnsigned(in)) != LOCAL_ENTRY_SIGNATURE) {
217       throw new ZipException("Bad local entry header");
218     }
219     int junkLength = 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4;
220     skipOrDie(in, junkLength); // Skip everything up to the length of the file name
221     final int fileNameLength = read16BitUnsigned(in);
222     final int extrasLength = read16BitUnsigned(in);
223 
224     // The file name is already known and will match the central directory, so no need to read it.
225     // The extra field length can be different here versus in the central directory and is used for
226     // things like zipaligning APKs. This single value is the critical part as it dictates where the
227     // actual DATA for the entry begins.
228     return 4 + junkLength + 2 + 2 + fileNameLength + extrasLength;
229   }
230 
231   /**
232    * Find the end-of-central-directory record by scanning backwards from the end of a file looking
233    * for the signature of the record.
234    * @param in the file to read from
235    * @param searchBufferLength the length of the search buffer, starting from the end of the file
236    * @return the offset in the file at which the first byte of the EOCD signature is located, or -1
237    * if the signature is not found in the search buffer
238    * @throws IOException if there is a problem reading
239    */
locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength)240   public static long locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength)
241       throws IOException {
242     final int maxBufferSize = (int) Math.min(searchBufferLength, in.length());
243     final byte[] buffer = new byte[maxBufferSize];
244     final long rangeStart = in.length() - buffer.length;
245     in.setRange(rangeStart, buffer.length);
246     readOrDie(in, buffer, 0, buffer.length);
247     int offset = locateStartOfEocd(buffer);
248     if (offset == -1) {
249       return -1;
250     }
251     return rangeStart + offset;
252   }
253 
254   /**
255    * Find the end-of-central-directory record by scanning backwards looking for the signature of the
256    * record.
257    * @param buffer the buffer in which to search
258    * @return the offset in the buffer at which the first byte of the EOCD signature is located, or
259    * -1 if the complete signature is not found
260    */
locateStartOfEocd(byte[] buffer)261   public static int locateStartOfEocd(byte[] buffer) {
262     int last4Bytes = 0; // This is the 32 bits of data from the file
263     for (int offset = buffer.length - 1; offset >= 0; offset--) {
264       last4Bytes <<= 8;
265       last4Bytes |= buffer[offset];
266       if (last4Bytes == EOCD_SIGNATURE) {
267         return offset;
268       }
269     }
270     return -1;
271   }
272 
273   /**
274    * Parse the end-of-central-directory record and return the critical information from it.
275    * @param in the input stream to read from, assumed to start at the first byte of the entry
276    * @return the metadata
277    * @throws IOException if unable to read
278    * @throws ZipException if the metadata indicates this is a zip64 archive, which is not supported
279    */
parseEocd(InputStream in)280   public static MinimalCentralDirectoryMetadata parseEocd(InputStream in)
281       throws IOException, ZipException {
282     if (((int) read32BitUnsigned(in)) != EOCD_SIGNATURE) {
283       throw new ZipException("Bad eocd header");
284     }
285 
286     // *** 4 bytes encode EOCD_SIGNATURE, ignore (already found and verified).
287     // 2 bytes encode disk number for this archive, ignore.
288     // 2 bytes encode disk number for the central directory, ignore.
289     // 2 bytes encode num entries in the central directory on this disk, ignore.
290     // *** 2 bytes encode num entries in the central directory overall [READ THIS]
291     // *** 4 bytes encode the length of the central directory [READ THIS]
292     // *** 4 bytes encode the file offset of the central directory [READ THIS]
293     // 2 bytes encode the length of the zip file comment, ignore.
294     // Everything else from here to the EOF is the zip file comment, or junk. Ignore.
295     skipOrDie(in, 2 + 2 + 2);
296     int numEntriesInCentralDirectory = read16BitUnsigned(in);
297     if (numEntriesInCentralDirectory == 0xffff) {
298       // If 0xffff, this is a zip64 archive and this code doesn't handle that.
299       throw new ZipException("No support for zip64");
300     }
301     long lengthOfCentralDirectory = read32BitUnsigned(in);
302     long offsetOfCentralDirectory = read32BitUnsigned(in);
303     return new MinimalCentralDirectoryMetadata(
304         numEntriesInCentralDirectory, offsetOfCentralDirectory, lengthOfCentralDirectory);
305   }
306 }
307