1 /*
2  * Copyright 2006 The Android Open Source Project
3  *
4  * Simple Zip file support.
5  */
6 #include "safe_iop.h"
7 #include "zlib.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <limits.h>
12 #include <stdint.h>     // for uintptr_t
13 #include <stdlib.h>
14 #include <sys/stat.h>   // for S_ISLNK()
15 #include <unistd.h>
16 
17 #define LOG_TAG "minzip"
18 #include "Zip.h"
19 #include "Bits.h"
20 #include "Log.h"
21 #include "DirUtil.h"
22 
23 #undef NDEBUG   // do this after including Log.h
24 #include <assert.h>
25 
26 #define SORT_ENTRIES 1
27 
28 /*
29  * Offset and length constants (java.util.zip naming convention).
30  */
31 enum {
32     CENSIG = 0x02014b50,      // PK12
33     CENHDR = 46,
34 
35     CENVEM =  4,
36     CENVER =  6,
37     CENFLG =  8,
38     CENHOW = 10,
39     CENTIM = 12,
40     CENCRC = 16,
41     CENSIZ = 20,
42     CENLEN = 24,
43     CENNAM = 28,
44     CENEXT = 30,
45     CENCOM = 32,
46     CENDSK = 34,
47     CENATT = 36,
48     CENATX = 38,
49     CENOFF = 42,
50 
51     ENDSIG = 0x06054b50,     // PK56
52     ENDHDR = 22,
53 
54     ENDSUB =  8,
55     ENDTOT = 10,
56     ENDSIZ = 12,
57     ENDOFF = 16,
58     ENDCOM = 20,
59 
60     EXTSIG = 0x08074b50,     // PK78
61     EXTHDR = 16,
62 
63     EXTCRC =  4,
64     EXTSIZ =  8,
65     EXTLEN = 12,
66 
67     LOCSIG = 0x04034b50,      // PK34
68     LOCHDR = 30,
69 
70     LOCVER =  4,
71     LOCFLG =  6,
72     LOCHOW =  8,
73     LOCTIM = 10,
74     LOCCRC = 14,
75     LOCSIZ = 18,
76     LOCLEN = 22,
77     LOCNAM = 26,
78     LOCEXT = 28,
79 
80     STORED = 0,
81     DEFLATED = 8,
82 
83     CENVEM_UNIX = 3 << 8,   // the high byte of CENVEM
84 };
85 
86 
87 /*
88  * For debugging, dump the contents of a ZipEntry.
89  */
90 #if 0
91 static void dumpEntry(const ZipEntry* pEntry)
92 {
93     LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName);
94     LOGI("   off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset,
95         pEntry->compLen, pEntry->uncompLen, pEntry->compression);
96 }
97 #endif
98 
99 /*
100  * (This is a mzHashTableLookup callback.)
101  *
102  * Compare two ZipEntry structs, by name.
103  */
hashcmpZipEntry(const void * ventry1,const void * ventry2)104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2)
105 {
106     const ZipEntry* entry1 = (const ZipEntry*) ventry1;
107     const ZipEntry* entry2 = (const ZipEntry*) ventry2;
108 
109     if (entry1->fileNameLen != entry2->fileNameLen)
110         return entry1->fileNameLen - entry2->fileNameLen;
111     return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen);
112 }
113 
114 /*
115  * (This is a mzHashTableLookup callback.)
116  *
117  * find a ZipEntry struct by name.
118  */
hashcmpZipName(const void * ventry,const void * vname)119 static int hashcmpZipName(const void* ventry, const void* vname)
120 {
121     const ZipEntry* entry = (const ZipEntry*) ventry;
122     const char* name = (const char*) vname;
123     unsigned int nameLen = strlen(name);
124 
125     if (entry->fileNameLen != nameLen)
126         return entry->fileNameLen - nameLen;
127     return memcmp(entry->fileName, name, nameLen);
128 }
129 
130 /*
131  * Compute the hash code for a ZipEntry filename.
132  *
133  * Not expected to be compatible with any other hash function, so we init
134  * to 2 to ensure it doesn't happen to match.
135  */
computeHash(const char * name,int nameLen)136 static unsigned int computeHash(const char* name, int nameLen)
137 {
138     unsigned int hash = 2;
139 
140     while (nameLen--)
141         hash = hash * 31 + *name++;
142 
143     return hash;
144 }
145 
addEntryToHashTable(HashTable * pHash,ZipEntry * pEntry)146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry)
147 {
148     unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen);
149     const ZipEntry* found;
150 
151     found = (const ZipEntry*)mzHashTableLookup(pHash,
152                 itemHash, pEntry, hashcmpZipEntry, true);
153     if (found != pEntry) {
154         LOGW("WARNING: duplicate entry '%.*s' in Zip\n",
155             found->fileNameLen, found->fileName);
156         /* keep going */
157     }
158 }
159 
validFilename(const char * fileName,unsigned int fileNameLen)160 static int validFilename(const char *fileName, unsigned int fileNameLen)
161 {
162     // Forbid super long filenames.
163     if (fileNameLen >= PATH_MAX) {
164         LOGW("Filename too long (%d chatacters)\n", fileNameLen);
165         return 0;
166     }
167 
168     // Require all characters to be printable ASCII (no NUL, no UTF-8, etc).
169     unsigned int i;
170     for (i = 0; i < fileNameLen; ++i) {
171         if (fileName[i] < 32 || fileName[i] >= 127) {
172             LOGW("Filename contains invalid character '\%03o'\n", fileName[i]);
173             return 0;
174         }
175     }
176 
177     return 1;
178 }
179 
180 /*
181  * Parse the contents of a Zip archive.  After confirming that the file
182  * is in fact a Zip, we scan out the contents of the central directory and
183  * store it in a hash table.
184  *
185  * Returns "true" on success.
186  */
parseZipArchive(ZipArchive * pArchive)187 static bool parseZipArchive(ZipArchive* pArchive)
188 {
189     bool result = false;
190     const unsigned char* ptr;
191     unsigned int i, numEntries, cdOffset;
192     unsigned int val;
193 
194     /*
195      * The first 4 bytes of the file will either be the local header
196      * signature for the first file (LOCSIG) or, if the archive doesn't
197      * have any files in it, the end-of-central-directory signature (ENDSIG).
198      */
199     val = get4LE(pArchive->addr);
200     if (val == ENDSIG) {
201         LOGI("Found Zip archive, but it looks empty\n");
202         goto bail;
203     } else if (val != LOCSIG) {
204         LOGV("Not a Zip archive (found 0x%08x)\n", val);
205         goto bail;
206     }
207 
208     /*
209      * Find the EOCD.  We'll find it immediately unless they have a file
210      * comment.
211      */
212     ptr = pArchive->addr + pArchive->length - ENDHDR;
213 
214     while (ptr >= (const unsigned char*) pArchive->addr) {
215         if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG)
216             break;
217         ptr--;
218     }
219     if (ptr < (const unsigned char*) pArchive->addr) {
220         LOGI("Could not find end-of-central-directory in Zip\n");
221         goto bail;
222     }
223 
224     /*
225      * There are two interesting items in the EOCD block: the number of
226      * entries in the file, and the file offset of the start of the
227      * central directory.
228      */
229     numEntries = get2LE(ptr + ENDSUB);
230     cdOffset = get4LE(ptr + ENDOFF);
231 
232     LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
233     if (numEntries == 0 || cdOffset >= pArchive->length) {
234         LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
235             numEntries, cdOffset, pArchive->length);
236         goto bail;
237     }
238 
239     /*
240      * Create data structures to hold entries.
241      */
242     pArchive->numEntries = numEntries;
243     pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry));
244     pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL);
245     if (pArchive->pEntries == NULL || pArchive->pHash == NULL)
246         goto bail;
247 
248     ptr = pArchive->addr + cdOffset;
249     for (i = 0; i < numEntries; i++) {
250         ZipEntry* pEntry;
251         unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
252         const unsigned char* localHdr;
253         const char *fileName;
254 
255         if (ptr + CENHDR > (const unsigned char*)pArchive->addr + pArchive->length) {
256             LOGW("Ran off the end (at %d)\n", i);
257             goto bail;
258         }
259         if (get4LE(ptr) != CENSIG) {
260             LOGW("Missed a central dir sig (at %d)\n", i);
261             goto bail;
262         }
263 
264         localHdrOffset = get4LE(ptr + CENOFF);
265         fileNameLen = get2LE(ptr + CENNAM);
266         extraLen = get2LE(ptr + CENEXT);
267         commentLen = get2LE(ptr + CENCOM);
268         fileName = (const char*)ptr + CENHDR;
269         if (fileName + fileNameLen > (const char*)pArchive->addr + pArchive->length) {
270             LOGW("Filename ran off the end (at %d)\n", i);
271             goto bail;
272         }
273         if (!validFilename(fileName, fileNameLen)) {
274             LOGW("Invalid filename (at %d)\n", i);
275             goto bail;
276         }
277 
278 #if SORT_ENTRIES
279         /* Figure out where this entry should go (binary search).
280          */
281         if (i > 0) {
282             int low, high;
283 
284             low = 0;
285             high = i - 1;
286             while (low <= high) {
287                 int mid;
288                 int diff;
289                 int diffLen;
290 
291                 mid = low + ((high - low) / 2); // avoid overflow
292 
293                 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) {
294                     diffLen = pArchive->pEntries[mid].fileNameLen;
295                 } else {
296                     diffLen = fileNameLen;
297                 }
298                 diff = strncmp(pArchive->pEntries[mid].fileName, fileName,
299                         diffLen);
300                 if (diff == 0) {
301                     diff = pArchive->pEntries[mid].fileNameLen - fileNameLen;
302                 }
303                 if (diff < 0) {
304                     low = mid + 1;
305                 } else if (diff > 0) {
306                     high = mid - 1;
307                 } else {
308                     high = mid;
309                     break;
310                 }
311             }
312 
313             unsigned int target = high + 1;
314             assert(target <= i);
315             if (target != i) {
316                 /* It belongs somewhere other than at the end of
317                  * the list.  Make some room at [target].
318                  */
319                 memmove(pArchive->pEntries + target + 1,
320                         pArchive->pEntries + target,
321                         (i - target) * sizeof(ZipEntry));
322             }
323             pEntry = &pArchive->pEntries[target];
324         } else {
325             pEntry = &pArchive->pEntries[0];
326         }
327 #else
328         pEntry = &pArchive->pEntries[i];
329 #endif
330         pEntry->fileNameLen = fileNameLen;
331         pEntry->fileName = fileName;
332 
333         pEntry->compLen = get4LE(ptr + CENSIZ);
334         pEntry->uncompLen = get4LE(ptr + CENLEN);
335         pEntry->compression = get2LE(ptr + CENHOW);
336         pEntry->modTime = get4LE(ptr + CENTIM);
337         pEntry->crc32 = get4LE(ptr + CENCRC);
338 
339         /* These two are necessary for finding the mode of the file.
340          */
341         pEntry->versionMadeBy = get2LE(ptr + CENVEM);
342         if ((pEntry->versionMadeBy & 0xff00) != 0 &&
343                 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX)
344         {
345             LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n",
346                     pEntry->versionMadeBy >> 8, i);
347             goto bail;
348         }
349         pEntry->externalFileAttributes = get4LE(ptr + CENATX);
350 
351         // Perform pArchive->addr + localHdrOffset, ensuring that it won't
352         // overflow. This is needed because localHdrOffset is untrusted.
353         if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pArchive->addr,
354             (uintptr_t)localHdrOffset)) {
355             LOGW("Integer overflow adding in parseZipArchive\n");
356             goto bail;
357         }
358         if ((uintptr_t)localHdr + LOCHDR >
359             (uintptr_t)pArchive->addr + pArchive->length) {
360             LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i);
361             goto bail;
362         }
363         if (get4LE(localHdr) != LOCSIG) {
364             LOGW("Missed a local header sig (at %d)\n", i);
365             goto bail;
366         }
367         pEntry->offset = localHdrOffset + LOCHDR
368             + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT);
369         if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) {
370             LOGW("Integer overflow adding in parseZipArchive\n");
371             goto bail;
372         }
373         if ((size_t)pEntry->offset + pEntry->compLen > pArchive->length) {
374             LOGW("Data ran off the end (at %d)\n", i);
375             goto bail;
376         }
377 
378 #if !SORT_ENTRIES
379         /* Add to hash table; no need to lock here.
380          * Can't do this now if we're sorting, because entries
381          * will move around.
382          */
383         addEntryToHashTable(pArchive->pHash, pEntry);
384 #endif
385 
386         //dumpEntry(pEntry);
387         ptr += CENHDR + fileNameLen + extraLen + commentLen;
388     }
389 
390 #if SORT_ENTRIES
391     /* If we're sorting, we have to wait until all entries
392      * are in their final places, otherwise the pointers will
393      * probably point to the wrong things.
394      */
395     for (i = 0; i < numEntries; i++) {
396         /* Add to hash table; no need to lock here.
397          */
398         addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]);
399     }
400 #endif
401 
402     result = true;
403 
404 bail:
405     if (!result) {
406         mzHashTableFree(pArchive->pHash);
407         pArchive->pHash = NULL;
408     }
409     return result;
410 }
411 
412 /*
413  * Open a Zip archive and scan out the contents.
414  *
415  * The easiest way to do this is to mmap() the whole thing and do the
416  * traditional backward scan for central directory.  Since the EOCD is
417  * a relatively small bit at the end, we should end up only touching a
418  * small set of pages.
419  *
420  * This will be called on non-Zip files, especially during startup, so
421  * we don't want to be too noisy about failures.  (Do we want a "quiet"
422  * flag?)
423  *
424  * On success, we fill out the contents of "pArchive".
425  */
mzOpenZipArchive(unsigned char * addr,size_t length,ZipArchive * pArchive)426 int mzOpenZipArchive(unsigned char* addr, size_t length, ZipArchive* pArchive)
427 {
428     int err;
429 
430     if (length < ENDHDR) {
431         err = -1;
432         LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length);
433         goto bail;
434     }
435 
436     pArchive->addr = addr;
437     pArchive->length = length;
438 
439     if (!parseZipArchive(pArchive)) {
440         err = -1;
441         LOGV("Parsing '%s' failed\n", fileName);
442         goto bail;
443     }
444 
445     err = 0;
446 
447 bail:
448     if (err != 0)
449         mzCloseZipArchive(pArchive);
450     return err;
451 }
452 
453 /*
454  * Close a ZipArchive, closing the file and freeing the contents.
455  *
456  * NOTE: the ZipArchive may not have been fully created.
457  */
mzCloseZipArchive(ZipArchive * pArchive)458 void mzCloseZipArchive(ZipArchive* pArchive)
459 {
460     LOGV("Closing archive %p\n", pArchive);
461 
462     free(pArchive->pEntries);
463 
464     mzHashTableFree(pArchive->pHash);
465 
466     pArchive->pHash = NULL;
467     pArchive->pEntries = NULL;
468 }
469 
470 /*
471  * Find a matching entry.
472  *
473  * Returns NULL if no matching entry found.
474  */
mzFindZipEntry(const ZipArchive * pArchive,const char * entryName)475 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive,
476         const char* entryName)
477 {
478     unsigned int itemHash = computeHash(entryName, strlen(entryName));
479 
480     return (const ZipEntry*)mzHashTableLookup(pArchive->pHash,
481                 itemHash, (char*) entryName, hashcmpZipName, false);
482 }
483 
484 /*
485  * Return true if the entry is a symbolic link.
486  */
mzIsZipEntrySymlink(const ZipEntry * pEntry)487 static bool mzIsZipEntrySymlink(const ZipEntry* pEntry)
488 {
489     if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) {
490         return S_ISLNK(pEntry->externalFileAttributes >> 16);
491     }
492     return false;
493 }
494 
495 /* Call processFunction on the uncompressed data of a STORED entry.
496  */
processStoredEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)497 static bool processStoredEntry(const ZipArchive *pArchive,
498     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
499     void *cookie)
500 {
501     return processFunction(pArchive->addr + pEntry->offset, pEntry->uncompLen, cookie);
502 }
503 
processDeflatedEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)504 static bool processDeflatedEntry(const ZipArchive *pArchive,
505     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
506     void *cookie)
507 {
508     long result = -1;
509     unsigned char readBuf[32 * 1024];
510     unsigned char procBuf[32 * 1024];
511     z_stream zstream;
512     int zerr;
513     long compRemaining;
514 
515     compRemaining = pEntry->compLen;
516 
517     /*
518      * Initialize the zlib stream.
519      */
520     memset(&zstream, 0, sizeof(zstream));
521     zstream.zalloc = Z_NULL;
522     zstream.zfree = Z_NULL;
523     zstream.opaque = Z_NULL;
524     zstream.next_in = pArchive->addr + pEntry->offset;
525     zstream.avail_in = pEntry->compLen;
526     zstream.next_out = (Bytef*) procBuf;
527     zstream.avail_out = sizeof(procBuf);
528     zstream.data_type = Z_UNKNOWN;
529 
530     /*
531      * Use the undocumented "negative window bits" feature to tell zlib
532      * that there's no zlib header waiting for it.
533      */
534     zerr = inflateInit2(&zstream, -MAX_WBITS);
535     if (zerr != Z_OK) {
536         if (zerr == Z_VERSION_ERROR) {
537             LOGE("Installed zlib is not compatible with linked version (%s)\n",
538                 ZLIB_VERSION);
539         } else {
540             LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
541         }
542         goto bail;
543     }
544 
545     /*
546      * Loop while we have data.
547      */
548     do {
549         /* uncompress the data */
550         zerr = inflate(&zstream, Z_NO_FLUSH);
551         if (zerr != Z_OK && zerr != Z_STREAM_END) {
552             LOGD("zlib inflate call failed (zerr=%d)\n", zerr);
553             goto z_bail;
554         }
555 
556         /* write when we're full or when we're done */
557         if (zstream.avail_out == 0 ||
558             (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf)))
559         {
560             long procSize = zstream.next_out - procBuf;
561             LOGVV("+++ processing %d bytes\n", (int) procSize);
562             bool ret = processFunction(procBuf, procSize, cookie);
563             if (!ret) {
564                 LOGW("Process function elected to fail (in inflate)\n");
565                 goto z_bail;
566             }
567 
568             zstream.next_out = procBuf;
569             zstream.avail_out = sizeof(procBuf);
570         }
571     } while (zerr == Z_OK);
572 
573     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
574 
575     // success!
576     result = zstream.total_out;
577 
578 z_bail:
579     inflateEnd(&zstream);        /* free up any allocated structures */
580 
581 bail:
582     if (result != pEntry->uncompLen) {
583         if (result != -1)        // error already shown?
584             LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
585                 result, pEntry->uncompLen);
586         return false;
587     }
588     return true;
589 }
590 
591 /*
592  * Stream the uncompressed data through the supplied function,
593  * passing cookie to it each time it gets called.  processFunction
594  * may be called more than once.
595  *
596  * If processFunction returns false, the operation is abandoned and
597  * mzProcessZipEntryContents() immediately returns false.
598  *
599  * This is useful for calculating the hash of an entry's uncompressed contents.
600  */
mzProcessZipEntryContents(const ZipArchive * pArchive,const ZipEntry * pEntry,ProcessZipEntryContentsFunction processFunction,void * cookie)601 bool mzProcessZipEntryContents(const ZipArchive *pArchive,
602     const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
603     void *cookie)
604 {
605     bool ret = false;
606     off_t oldOff;
607 
608     switch (pEntry->compression) {
609     case STORED:
610         ret = processStoredEntry(pArchive, pEntry, processFunction, cookie);
611         break;
612     case DEFLATED:
613         ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie);
614         break;
615     default:
616         LOGE("Unsupported compression type %d for entry '%s'\n",
617                 pEntry->compression, pEntry->fileName);
618         break;
619     }
620 
621     return ret;
622 }
623 
crcProcessFunction(const unsigned char * data,int dataLen,void * crc)624 static bool crcProcessFunction(const unsigned char *data, int dataLen,
625         void *crc)
626 {
627     *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen);
628     return true;
629 }
630 
631 typedef struct {
632     char *buf;
633     int bufLen;
634 } CopyProcessArgs;
635 
copyProcessFunction(const unsigned char * data,int dataLen,void * cookie)636 static bool copyProcessFunction(const unsigned char *data, int dataLen,
637         void *cookie)
638 {
639     CopyProcessArgs *args = (CopyProcessArgs *)cookie;
640     if (dataLen <= args->bufLen) {
641         memcpy(args->buf, data, dataLen);
642         args->buf += dataLen;
643         args->bufLen -= dataLen;
644         return true;
645     }
646     return false;
647 }
648 
649 /*
650  * Read an entry into a buffer allocated by the caller.
651  */
mzReadZipEntry(const ZipArchive * pArchive,const ZipEntry * pEntry,char * buf,int bufLen)652 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry,
653         char *buf, int bufLen)
654 {
655     CopyProcessArgs args;
656     bool ret;
657 
658     args.buf = buf;
659     args.bufLen = bufLen;
660     ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction,
661             (void *)&args);
662     if (!ret) {
663         LOGE("Can't extract entry to buffer.\n");
664         return false;
665     }
666     return true;
667 }
668 
writeProcessFunction(const unsigned char * data,int dataLen,void * cookie)669 static bool writeProcessFunction(const unsigned char *data, int dataLen,
670                                  void *cookie)
671 {
672     int fd = (int)(intptr_t)cookie;
673     if (dataLen == 0) {
674         return true;
675     }
676     ssize_t soFar = 0;
677     while (true) {
678         ssize_t n = TEMP_FAILURE_RETRY(write(fd, data+soFar, dataLen-soFar));
679         if (n <= 0) {
680             LOGE("Error writing %zd bytes from zip file from %p: %s\n",
681                  dataLen-soFar, data+soFar, strerror(errno));
682             return false;
683         } else if (n > 0) {
684             soFar += n;
685             if (soFar == dataLen) return true;
686             if (soFar > dataLen) {
687                 LOGE("write overrun?  (%zd bytes instead of %d)\n",
688                      soFar, dataLen);
689                 return false;
690             }
691         }
692     }
693 }
694 
695 /*
696  * Uncompress "pEntry" in "pArchive" to "fd" at the current offset.
697  */
mzExtractZipEntryToFile(const ZipArchive * pArchive,const ZipEntry * pEntry,int fd)698 bool mzExtractZipEntryToFile(const ZipArchive *pArchive,
699     const ZipEntry *pEntry, int fd)
700 {
701     bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction,
702                                          (void*)(intptr_t)fd);
703     if (!ret) {
704         LOGE("Can't extract entry to file.\n");
705         return false;
706     }
707     return true;
708 }
709 
710 typedef struct {
711     unsigned char* buffer;
712     long len;
713 } BufferExtractCookie;
714 
bufferProcessFunction(const unsigned char * data,int dataLen,void * cookie)715 static bool bufferProcessFunction(const unsigned char *data, int dataLen,
716     void *cookie) {
717     BufferExtractCookie *bec = (BufferExtractCookie*)cookie;
718 
719     memmove(bec->buffer, data, dataLen);
720     bec->buffer += dataLen;
721     bec->len -= dataLen;
722 
723     return true;
724 }
725 
726 /*
727  * Uncompress "pEntry" in "pArchive" to buffer, which must be large
728  * enough to hold mzGetZipEntryUncomplen(pEntry) bytes.
729  */
mzExtractZipEntryToBuffer(const ZipArchive * pArchive,const ZipEntry * pEntry,unsigned char * buffer)730 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive,
731     const ZipEntry *pEntry, unsigned char *buffer)
732 {
733     BufferExtractCookie bec;
734     bec.buffer = buffer;
735     bec.len = mzGetZipEntryUncompLen(pEntry);
736 
737     bool ret = mzProcessZipEntryContents(pArchive, pEntry,
738         bufferProcessFunction, (void*)&bec);
739     if (!ret || bec.len != 0) {
740         LOGE("Can't extract entry to memory buffer.\n");
741         return false;
742     }
743     return true;
744 }
745 
746 
747 /* Helper state to make path translation easier and less malloc-happy.
748  */
749 typedef struct {
750     const char *targetDir;
751     const char *zipDir;
752     char *buf;
753     int targetDirLen;
754     int zipDirLen;
755     int bufLen;
756 } MzPathHelper;
757 
758 /* Given the values of targetDir and zipDir in the helper,
759  * return the target filename of the provided entry.
760  * The helper must be initialized first.
761  */
targetEntryPath(MzPathHelper * helper,ZipEntry * pEntry)762 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry)
763 {
764     int needLen;
765     bool firstTime = (helper->buf == NULL);
766 
767     /* target file <-- targetDir + / + entry[zipDirLen:]
768      */
769     needLen = helper->targetDirLen + 1 +
770             pEntry->fileNameLen - helper->zipDirLen + 1;
771     if (needLen > helper->bufLen) {
772         char *newBuf;
773 
774         needLen *= 2;
775         newBuf = (char *)realloc(helper->buf, needLen);
776         if (newBuf == NULL) {
777             return NULL;
778         }
779         helper->buf = newBuf;
780         helper->bufLen = needLen;
781     }
782 
783     /* Every path will start with the target path and a slash.
784      */
785     if (firstTime) {
786         char *p = helper->buf;
787         memcpy(p, helper->targetDir, helper->targetDirLen);
788         p += helper->targetDirLen;
789         if (p == helper->buf || p[-1] != '/') {
790             helper->targetDirLen += 1;
791             *p++ = '/';
792         }
793     }
794 
795     /* Replace the custom part of the path with the appropriate
796      * part of the entry's path.
797      */
798     char *epath = helper->buf + helper->targetDirLen;
799     memcpy(epath, pEntry->fileName + helper->zipDirLen,
800             pEntry->fileNameLen - helper->zipDirLen);
801     epath += pEntry->fileNameLen - helper->zipDirLen;
802     *epath = '\0';
803 
804     return helper->buf;
805 }
806 
807 /*
808  * Inflate all entries under zipDir to the directory specified by
809  * targetDir, which must exist and be a writable directory.
810  *
811  * The immediate children of zipDir will become the immediate
812  * children of targetDir; e.g., if the archive contains the entries
813  *
814  *     a/b/c/one
815  *     a/b/c/two
816  *     a/b/c/d/three
817  *
818  * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting
819  * files will be
820  *
821  *     /tmp/one
822  *     /tmp/two
823  *     /tmp/d/three
824  *
825  * Returns true on success, false on failure.
826  */
mzExtractRecursive(const ZipArchive * pArchive,const char * zipDir,const char * targetDir,const struct utimbuf * timestamp,void (* callback)(const char * fn,void *),void * cookie,struct selabel_handle * sehnd)827 bool mzExtractRecursive(const ZipArchive *pArchive,
828                         const char *zipDir, const char *targetDir,
829                         const struct utimbuf *timestamp,
830                         void (*callback)(const char *fn, void *), void *cookie,
831                         struct selabel_handle *sehnd)
832 {
833     if (zipDir[0] == '/') {
834         LOGE("mzExtractRecursive(): zipDir must be a relative path.\n");
835         return false;
836     }
837     if (targetDir[0] != '/') {
838         LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n");
839         return false;
840     }
841 
842     unsigned int zipDirLen;
843     char *zpath;
844 
845     zipDirLen = strlen(zipDir);
846     zpath = (char *)malloc(zipDirLen + 2);
847     if (zpath == NULL) {
848         LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2);
849         return false;
850     }
851     /* If zipDir is empty, we'll extract the entire zip file.
852      * Otherwise, canonicalize the path.
853      */
854     if (zipDirLen > 0) {
855         /* Make sure there's (hopefully, exactly one) slash at the
856          * end of the path.  This way we don't need to worry about
857          * accidentally extracting "one/twothree" when a path like
858          * "one/two" is specified.
859          */
860         memcpy(zpath, zipDir, zipDirLen);
861         if (zpath[zipDirLen-1] != '/') {
862             zpath[zipDirLen++] = '/';
863         }
864     }
865     zpath[zipDirLen] = '\0';
866 
867     /* Set up the helper structure that we'll use to assemble paths.
868      */
869     MzPathHelper helper;
870     helper.targetDir = targetDir;
871     helper.targetDirLen = strlen(helper.targetDir);
872     helper.zipDir = zpath;
873     helper.zipDirLen = strlen(helper.zipDir);
874     helper.buf = NULL;
875     helper.bufLen = 0;
876 
877     /* Walk through the entries and extract anything whose path begins
878      * with zpath.
879     //TODO: since the entries are sorted, binary search for the first match
880     //      and stop after the first non-match.
881      */
882     unsigned int i;
883     bool seenMatch = false;
884     int ok = true;
885     int extractCount = 0;
886     for (i = 0; i < pArchive->numEntries; i++) {
887         ZipEntry *pEntry = pArchive->pEntries + i;
888         if (pEntry->fileNameLen < zipDirLen) {
889        //TODO: look out for a single empty directory entry that matches zpath, but
890        //      missing the trailing slash.  Most zip files seem to include
891        //      the trailing slash, but I think it's legal to leave it off.
892        //      e.g., zpath "a/b/", entry "a/b", with no children of the entry.
893             /* No chance of matching.
894              */
895 #if SORT_ENTRIES
896             if (seenMatch) {
897                 /* Since the entries are sorted, we can give up
898                  * on the first mismatch after the first match.
899                  */
900                 break;
901             }
902 #endif
903             continue;
904         }
905         /* If zpath is empty, this strncmp() will match everything,
906          * which is what we want.
907          */
908         if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) {
909 #if SORT_ENTRIES
910             if (seenMatch) {
911                 /* Since the entries are sorted, we can give up
912                  * on the first mismatch after the first match.
913                  */
914                 break;
915             }
916 #endif
917             continue;
918         }
919         /* This entry begins with zipDir, so we'll extract it.
920          */
921         seenMatch = true;
922 
923         /* Find the target location of the entry.
924          */
925         const char *targetFile = targetEntryPath(&helper, pEntry);
926         if (targetFile == NULL) {
927             LOGE("Can't assemble target path for \"%.*s\"\n",
928                     pEntry->fileNameLen, pEntry->fileName);
929             ok = false;
930             break;
931         }
932 
933 #define UNZIP_DIRMODE 0755
934 #define UNZIP_FILEMODE 0644
935         /*
936          * Create the file or directory. We ignore directory entries
937          * because we recursively create paths to each file entry we encounter
938          * in the zip archive anyway.
939          *
940          * NOTE: A "directory entry" in a zip archive is just a zero length
941          * entry that ends in a "/". They're not mandatory and many tools get
942          * rid of them. We need to process them only if we want to preserve
943          * empty directories from the archive.
944          */
945         if (pEntry->fileName[pEntry->fileNameLen-1] != '/') {
946             /* This is not a directory.  First, make sure that
947              * the containing directory exists.
948              */
949             int ret = dirCreateHierarchy(
950                     targetFile, UNZIP_DIRMODE, timestamp, true, sehnd);
951             if (ret != 0) {
952                 LOGE("Can't create containing directory for \"%s\": %s\n",
953                         targetFile, strerror(errno));
954                 ok = false;
955                 break;
956             }
957 
958             /*
959              * The entry is a regular file or a symlink. Open the target for writing.
960              *
961              * TODO: This behavior for symlinks seems rather bizarre. For a
962              * symlink foo/bar/baz -> foo/tar/taz, we will create a file called
963              * "foo/bar/baz" whose contents are the literal "foo/tar/taz". We
964              * warn about this for now and preserve older behavior.
965              */
966             if (mzIsZipEntrySymlink(pEntry)) {
967                 LOGE("Symlink entry \"%.*s\" will be output as a regular file.",
968                      pEntry->fileNameLen, pEntry->fileName);
969             }
970 
971             char *secontext = NULL;
972 
973             if (sehnd) {
974                 selabel_lookup(sehnd, &secontext, targetFile, UNZIP_FILEMODE);
975                 setfscreatecon(secontext);
976             }
977 
978             int fd = open(targetFile, O_CREAT|O_WRONLY|O_TRUNC|O_SYNC,
979                 UNZIP_FILEMODE);
980 
981             if (secontext) {
982                 freecon(secontext);
983                 setfscreatecon(NULL);
984             }
985 
986             if (fd < 0) {
987                 LOGE("Can't create target file \"%s\": %s\n",
988                         targetFile, strerror(errno));
989                 ok = false;
990                 break;
991             }
992 
993             bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd);
994             if (ok) {
995                 ok = (fsync(fd) == 0);
996             }
997             if (close(fd) != 0) {
998                 ok = false;
999             }
1000             if (!ok) {
1001                 LOGE("Error extracting \"%s\"\n", targetFile);
1002                 ok = false;
1003                 break;
1004             }
1005 
1006             if (timestamp != NULL && utime(targetFile, timestamp)) {
1007                 LOGE("Error touching \"%s\"\n", targetFile);
1008                 ok = false;
1009                 break;
1010             }
1011 
1012             LOGV("Extracted file \"%s\"\n", targetFile);
1013             ++extractCount;
1014         }
1015 
1016         if (callback != NULL) callback(targetFile, cookie);
1017     }
1018 
1019     LOGD("Extracted %d file(s)\n", extractCount);
1020 
1021     free(helper.buf);
1022     free(zpath);
1023 
1024     return ok;
1025 }
1026