1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 /*
20  * Read-only access to Zip archives, with minimal heap allocation.
21  */
22 
23 #include <stdint.h>
24 #include <string.h>
25 #include <sys/cdefs.h>
26 #include <sys/types.h>
27 
28 #include <functional>
29 #include <string>
30 #include <string_view>
31 
32 #include "android-base/off64_t.h"
33 
34 /* Zip compression methods we support */
35 enum {
36   kCompressStored = 0,    // no compression
37   kCompressDeflated = 8,  // standard deflate
38 };
39 
40 // This struct holds the common information of a zip entry other than the
41 // the entry size. The compressed and uncompressed length will be handled
42 // separately in the derived class.
43 struct ZipEntryCommon {
44   // Compression method. One of kCompressStored or kCompressDeflated.
45   // See also `gpbf` for deflate subtypes.
46   uint16_t method;
47 
48   // Modification time. The zipfile format specifies
49   // that the first two little endian bytes contain the time
50   // and the last two little endian bytes contain the date.
51   // See `GetModificationTime`. Use signed integer to avoid the
52   // sub-overflow.
53   // TODO: should be overridden by extra time field, if present.
54   int32_t mod_time;
55 
56   // Returns `mod_time` as a broken-down struct tm.
57   struct tm GetModificationTime() const;
58 
59   // Suggested Unix mode for this entry, from the zip archive if created on
60   // Unix, or a default otherwise. See also `external_file_attributes`.
61   mode_t unix_mode;
62 
63   // 1 if this entry contains a data descriptor segment, 0
64   // otherwise.
65   uint8_t has_data_descriptor;
66 
67   // Crc32 value of this ZipEntry. This information might
68   // either be stored in the local file header or in a special
69   // Data descriptor footer at the end of the file entry.
70   uint32_t crc32;
71 
72   // If the value of uncompressed length and compressed length are stored in
73   // the zip64 extended info of the extra field.
74   bool zip64_format_size{false};
75 
76   // The offset to the start of data for this ZipEntry.
77   off64_t offset;
78 
79   // The version of zip and the host file system this came from (for zipinfo).
80   uint16_t version_made_by;
81 
82   // The raw attributes, whose interpretation depends on the host
83   // file system in `version_made_by` (for zipinfo). See also `unix_mode`.
84   uint32_t external_file_attributes;
85 
86   // Specifics about the deflation (for zipinfo).
87   uint16_t gpbf;
88   // Whether this entry is believed to be text or binary (for zipinfo).
89   bool is_text;
90 };
91 
92 struct ZipEntry64;
93 // Many users of the library assume the entry size is capped at UNIT32_MAX. So we keep
94 // the interface for the old ZipEntry here; and we could switch them over to the new
95 // ZipEntry64 later.
96 struct ZipEntry : public ZipEntryCommon {
97   // Compressed length of this ZipEntry. The maximum value is UNIT32_MAX.
98   // Might be present either in the local file header or in the data
99   // descriptor footer.
100   uint32_t compressed_length{0};
101 
102   // Uncompressed length of this ZipEntry. The maximum value is UNIT32_MAX.
103   // Might be present either in the local file header or in the data
104   // descriptor footer.
105   uint32_t uncompressed_length{0};
106 
107   // Copies the contents of a ZipEntry64 object to a 32 bits ZipEntry. Returns 0 if the
108   // size of the entry fits into uint32_t, returns a negative error code
109   // (kUnsupportedEntrySize) otherwise.
110   static int32_t CopyFromZipEntry64(ZipEntry* dst, const ZipEntry64* src);
111 
112  private:
113   ZipEntry& operator=(const ZipEntryCommon& other) {
114     ZipEntryCommon::operator=(other);
115     return *this;
116   }
117 };
118 
119 // Represents information about a zip entry in a zip file.
120 struct ZipEntry64 : public ZipEntryCommon {
121   // Compressed length of this ZipEntry. The maximum value is UNIT64_MAX.
122   // Might be present either in the local file header, the zip64 extended field,
123   // or in the data descriptor footer.
124   uint64_t compressed_length{0};
125 
126   // Uncompressed length of this ZipEntry. The maximum value is UNIT64_MAX.
127   // Might be present either in the local file header, the zip64 extended field,
128   // or in the data descriptor footer.
129   uint64_t uncompressed_length{0};
130 
131   explicit ZipEntry64() = default;
ZipEntry64ZipEntry64132   explicit ZipEntry64(const ZipEntry& zip_entry) : ZipEntryCommon(zip_entry) {
133     compressed_length = zip_entry.compressed_length;
134     uncompressed_length = zip_entry.uncompressed_length;
135   }
136 };
137 
138 struct ZipArchive;
139 typedef ZipArchive* ZipArchiveHandle;
140 
141 /*
142  * Open a Zip archive, and sets handle to the value of the opaque
143  * handle for the file. This handle must be released by calling
144  * CloseArchive with this handle.
145  *
146  * Returns 0 on success, and negative values on failure.
147  */
148 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle);
149 
150 /*
151  * Like OpenArchive, but takes a file descriptor open for reading
152  * at the start of the file.  The descriptor must be mappable (this does
153  * not allow access to a stream).
154  *
155  * Sets handle to the value of the opaque handle for this file descriptor.
156  * This handle must be released by calling CloseArchive with this handle.
157  *
158  * If assume_ownership parameter is 'true' calling CloseArchive will close
159  * the file.
160  *
161  * This function maps and scans the central directory and builds a table
162  * of entries for future lookups.
163  *
164  * "debugFileName" will appear in error messages, but is not otherwise used.
165  *
166  * Returns 0 on success, and negative values on failure.
167  */
168 int32_t OpenArchiveFd(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
169                       bool assume_ownership = true);
170 
171 int32_t OpenArchiveFdRange(const int fd, const char* debugFileName, ZipArchiveHandle* handle,
172                            off64_t length, off64_t offset, bool assume_ownership = true);
173 
174 int32_t OpenArchiveFromMemory(const void* address, size_t length, const char* debugFileName,
175                               ZipArchiveHandle* handle);
176 /*
177  * Close archive, releasing resources associated with it. This will
178  * unmap the central directory of the zipfile and free all internal
179  * data structures associated with the file. It is an error to use
180  * this handle for any further operations without an intervening
181  * call to one of the OpenArchive variants.
182  */
183 void CloseArchive(ZipArchiveHandle archive);
184 
185 /** See GetArchiveInfo(). */
186 struct ZipArchiveInfo {
187   /** The size in bytes of the archive itself. Used by zipinfo. */
188   off64_t archive_size;
189   /** The number of entries in the archive. */
190   uint64_t entry_count;
191 };
192 
193 /**
194  * Returns information about the given archive.
195  */
196 ZipArchiveInfo GetArchiveInfo(ZipArchiveHandle archive);
197 
198 /*
199  * Find an entry in the Zip archive, by name. |data| must be non-null.
200  *
201  * Returns 0 if an entry is found, and populates |data| with information
202  * about this entry. Returns negative values otherwise.
203  *
204  * It's important to note that |data->crc32|, |data->compLen| and
205  * |data->uncompLen| might be set to values from the central directory
206  * if this file entry contains a data descriptor footer. To verify crc32s
207  * and length, a call to VerifyCrcAndLengths must be made after entry data
208  * has been processed.
209  *
210  * On non-Windows platforms this method does not modify internal state and
211  * can be called concurrently.
212  */
213 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName,
214                   ZipEntry64* data);
215 
216 /*
217  * Start iterating over all entries of a zip file. The order of iteration
218  * is not guaranteed to be the same as the order of elements
219  * in the central directory but is stable for a given zip file. |cookie| will
220  * contain the value of an opaque cookie which can be used to make one or more
221  * calls to Next. All calls to StartIteration must be matched by a call to
222  * EndIteration to free any allocated memory.
223  *
224  * This method also accepts optional prefix and suffix to restrict iteration to
225  * entry names that start with |optional_prefix| or end with |optional_suffix|.
226  *
227  * Returns 0 on success and negative values on failure.
228  */
229 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
230                        const std::string_view optional_prefix = "",
231                        const std::string_view optional_suffix = "");
232 
233 /*
234  * Start iterating over all entries of a zip file. Use the matcher functor to
235  * restrict iteration to entry names that make the functor return true.
236  *
237  * Returns 0 on success and negative values on failure.
238  */
239 int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
240                        std::function<bool(std::string_view entry_name)> matcher);
241 
242 /*
243  * Advance to the next element in the zipfile in iteration order.
244  *
245  * Returns 0 on success, -1 if there are no more elements in this
246  * archive and lower negative values on failure.
247  */
248 int32_t Next(void* cookie, ZipEntry64* data, std::string_view* name);
249 int32_t Next(void* cookie, ZipEntry64* data, std::string* name);
250 
251 /*
252  * End iteration over all entries of a zip file and frees the memory allocated
253  * in StartIteration.
254  */
255 void EndIteration(void* cookie);
256 
257 /*
258  * Uncompress and write an entry to an open file identified by |fd|.
259  * |entry->uncompressed_length| bytes will be written to the file at
260  * its current offset, and the file will be truncated at the end of
261  * the uncompressed data (no truncation if |fd| references a block
262  * device).
263  *
264  * Returns 0 on success and negative values on failure.
265  */
266 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry64* entry, int fd);
267 
268 /**
269  * Uncompress a given zip entry to the memory region at |begin| and of
270  * size |size|. This size is expected to be the same as the *declared*
271  * uncompressed length of the zip entry. It is an error if the *actual*
272  * number of uncompressed bytes differs from this number.
273  *
274  * Returns 0 on success and negative values on failure.
275  */
276 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry64* entry, uint8_t* begin,
277                         size_t size);
278 
279 int GetFileDescriptor(const ZipArchiveHandle archive);
280 
281 /**
282  * Returns the offset of the zip archive in the backing file descriptor, or 0 if the zip archive is
283  * not backed by a file descriptor.
284  */
285 off64_t GetFileDescriptorOffset(const ZipArchiveHandle archive);
286 
287 const char* ErrorCodeString(int32_t error_code);
288 
289 // Many users of libziparchive assume the entry size to be 32 bits long. So we keep these
290 // interfaces that use 32 bit ZipEntry to make old code work. TODO(xunchang) Remove the 32 bit
291 // wrapper functions once we switch all users to recognize ZipEntry64.
292 int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryName, ZipEntry* data);
293 int32_t Next(void* cookie, ZipEntry* data, std::string* name);
294 int32_t Next(void* cookie, ZipEntry* data, std::string_view* name);
295 int32_t ExtractEntryToFile(ZipArchiveHandle archive, const ZipEntry* entry, int fd);
296 int32_t ExtractToMemory(ZipArchiveHandle archive, const ZipEntry* entry, uint8_t* begin,
297                         size_t size);
298 
299 //
300 // This gets defined for the version of the library that need to control all
301 // code accessing the zip file. Details in incfs_support/signal_handling.h
302 //
303 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
304 
305 #if !defined(_WIN32)
306 typedef bool (*ProcessZipEntryFunction)(const uint8_t* buf, size_t buf_size, void* cookie);
307 
308 /*
309  * Stream the uncompressed data through the supplied function,
310  * passing cookie to it each time it gets called.
311  */
312 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry* entry,
313                                 ProcessZipEntryFunction func, void* cookie);
314 int32_t ProcessZipEntryContents(ZipArchiveHandle archive, const ZipEntry64* entry,
315                                 ProcessZipEntryFunction func, void* cookie);
316 #endif  // !defined(_WIN32)
317 
318 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
319 
320 namespace zip_archive {
321 
322 class Writer {
323  public:
324   virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
325   virtual ~Writer();
326 
327  protected:
328   Writer() = default;
329 
330  private:
331   Writer(const Writer&) = delete;
332   void operator=(const Writer&) = delete;
333 };
334 
335 class Reader {
336  public:
337   virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
338   virtual ~Reader();
339 
340  protected:
341   Reader() = default;
342 
343  private:
344   Reader(const Reader&) = delete;
345   void operator=(const Reader&) = delete;
346 };
347 
348 //
349 // This gets defined for the version of the library that need to control all
350 // code accessing the zip file. Details in incfs_support/signal_handling.h
351 //
352 #if !ZIPARCHIVE_DISABLE_CALLBACK_API
353 
354 /**
355  * Uncompress a given zip entry to given |writer|.
356  *
357  * Returns 0 on success and negative values on failure.
358  */
359 int32_t ExtractToWriter(ZipArchiveHandle handle, const ZipEntry64* entry,
360                         zip_archive::Writer* writer);
361 
362 #endif  // !ZIPARCHIVE_DISABLE_CALLBACK_API
363 
364 /*
365  * Inflates the first |compressed_length| bytes of |reader| to a given |writer|.
366  * |crc_out| is set to the CRC32 checksum of the uncompressed data.
367  *
368  * Returns 0 on success and negative values on failure, for example if |reader|
369  * cannot supply the right amount of data, or if the number of bytes written to
370  * data does not match |uncompressed_length|.
371  *
372  * If |crc_out| is not nullptr, it is set to the crc32 checksum of the
373  * uncompressed data.
374  *
375  * NOTE: in the IncFS version of the library this function remains
376  * unprotected, because the data |reader| is supplying is under the full reader's
377  * control; it's the reader's duty to ensure it is available and OK to access.
378  */
379 int32_t Inflate(const Reader& reader, const uint64_t compressed_length,
380                 const uint64_t uncompressed_length, Writer* writer, uint64_t* crc_out);
381 
382 }  // namespace zip_archive
383