1 /*
2  * Copyright (C) 2006 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //
18 // Access to Zip archives.
19 //
20 
21 #define LOG_TAG "zip"
22 
23 #include <androidfw/ZipUtils.h>
24 #include <utils/Log.h>
25 
26 #include "ZipFile.h"
27 
28 #include <zlib.h>
29 #define DEF_MEM_LEVEL 8                // normally in zutil.h?
30 
31 #include <memory.h>
32 #include <sys/stat.h>
33 #include <errno.h>
34 #include <assert.h>
35 
36 using namespace android;
37 
38 /*
39  * Some environments require the "b", some choke on it.
40  */
41 #define FILE_OPEN_RO        "rb"
42 #define FILE_OPEN_RW        "r+b"
43 #define FILE_OPEN_RW_CREATE "w+b"
44 
45 /* should live somewhere else? */
errnoToStatus(int err)46 static status_t errnoToStatus(int err)
47 {
48     if (err == ENOENT)
49         return NAME_NOT_FOUND;
50     else if (err == EACCES)
51         return PERMISSION_DENIED;
52     else
53         return UNKNOWN_ERROR;
54 }
55 
56 /*
57  * Open a file and parse its guts.
58  */
open(const char * zipFileName,int flags)59 status_t ZipFile::open(const char* zipFileName, int flags)
60 {
61     bool newArchive = false;
62 
63     assert(mZipFp == NULL);     // no reopen
64 
65     if ((flags & kOpenTruncate))
66         flags |= kOpenCreate;           // trunc implies create
67 
68     if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite))
69         return INVALID_OPERATION;       // not both
70     if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite)))
71         return INVALID_OPERATION;       // not neither
72     if ((flags & kOpenCreate) && !(flags & kOpenReadWrite))
73         return INVALID_OPERATION;       // create requires write
74 
75     if (flags & kOpenTruncate) {
76         newArchive = true;
77     } else {
78         newArchive = (access(zipFileName, F_OK) != 0);
79         if (!(flags & kOpenCreate) && newArchive) {
80             /* not creating, must already exist */
81             ALOGD("File %s does not exist", zipFileName);
82             return NAME_NOT_FOUND;
83         }
84     }
85 
86     /* open the file */
87     const char* openflags;
88     if (flags & kOpenReadWrite) {
89         if (newArchive)
90             openflags = FILE_OPEN_RW_CREATE;
91         else
92             openflags = FILE_OPEN_RW;
93     } else {
94         openflags = FILE_OPEN_RO;
95     }
96     mZipFp = fopen(zipFileName, openflags);
97     if (mZipFp == NULL) {
98         int err = errno;
99         ALOGD("fopen failed: %d\n", err);
100         return errnoToStatus(err);
101     }
102 
103     status_t result;
104     if (!newArchive) {
105         /*
106          * Load the central directory.  If that fails, then this probably
107          * isn't a Zip archive.
108          */
109         result = readCentralDir();
110     } else {
111         /*
112          * Newly-created.  The EndOfCentralDir constructor actually
113          * sets everything to be the way we want it (all zeroes).  We
114          * set mNeedCDRewrite so that we create *something* if the
115          * caller doesn't add any files.  (We could also just unlink
116          * the file if it's brand new and nothing was added, but that's
117          * probably doing more than we really should -- the user might
118          * have a need for empty zip files.)
119          */
120         mNeedCDRewrite = true;
121         result = NO_ERROR;
122     }
123 
124     if (flags & kOpenReadOnly)
125         mReadOnly = true;
126     else
127         assert(!mReadOnly);
128 
129     return result;
130 }
131 
132 /*
133  * Return the Nth entry in the archive.
134  */
getEntryByIndex(int idx) const135 ZipEntry* ZipFile::getEntryByIndex(int idx) const
136 {
137     if (idx < 0 || idx >= (int) mEntries.size())
138         return NULL;
139 
140     return mEntries[idx];
141 }
142 
143 /*
144  * Find an entry by name.
145  */
getEntryByName(const char * fileName) const146 ZipEntry* ZipFile::getEntryByName(const char* fileName) const
147 {
148     /*
149      * Do a stupid linear string-compare search.
150      *
151      * There are various ways to speed this up, especially since it's rare
152      * to intermingle changes to the archive with "get by name" calls.  We
153      * don't want to sort the mEntries vector itself, however, because
154      * it's used to recreate the Central Directory.
155      *
156      * (Hash table works, parallel list of pointers in sorted order is good.)
157      */
158     int idx;
159 
160     for (idx = mEntries.size()-1; idx >= 0; idx--) {
161         ZipEntry* pEntry = mEntries[idx];
162         if (!pEntry->getDeleted() &&
163             strcmp(fileName, pEntry->getFileName()) == 0)
164         {
165             return pEntry;
166         }
167     }
168 
169     return NULL;
170 }
171 
172 /*
173  * Empty the mEntries vector.
174  */
discardEntries(void)175 void ZipFile::discardEntries(void)
176 {
177     int count = mEntries.size();
178 
179     while (--count >= 0)
180         delete mEntries[count];
181 
182     mEntries.clear();
183 }
184 
185 
186 /*
187  * Find the central directory and read the contents.
188  *
189  * The fun thing about ZIP archives is that they may or may not be
190  * readable from start to end.  In some cases, notably for archives
191  * that were written to stdout, the only length information is in the
192  * central directory at the end of the file.
193  *
194  * Of course, the central directory can be followed by a variable-length
195  * comment field, so we have to scan through it backwards.  The comment
196  * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
197  * itself, plus apparently sometimes people throw random junk on the end
198  * just for the fun of it.
199  *
200  * This is all a little wobbly.  If the wrong value ends up in the EOCD
201  * area, we're hosed.  This appears to be the way that everbody handles
202  * it though, so we're in pretty good company if this fails.
203  */
readCentralDir(void)204 status_t ZipFile::readCentralDir(void)
205 {
206     status_t result = NO_ERROR;
207     unsigned char* buf = NULL;
208     off_t fileLength, seekStart;
209     long readAmount;
210     int i;
211 
212     fseek(mZipFp, 0, SEEK_END);
213     fileLength = ftell(mZipFp);
214     rewind(mZipFp);
215 
216     /* too small to be a ZIP archive? */
217     if (fileLength < EndOfCentralDir::kEOCDLen) {
218         ALOGD("Length is %ld -- too small\n", (long)fileLength);
219         result = INVALID_OPERATION;
220         goto bail;
221     }
222 
223     buf = new unsigned char[EndOfCentralDir::kMaxEOCDSearch];
224     if (buf == NULL) {
225         ALOGD("Failure allocating %d bytes for EOCD search",
226              EndOfCentralDir::kMaxEOCDSearch);
227         result = NO_MEMORY;
228         goto bail;
229     }
230 
231     if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
232         seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
233         readAmount = EndOfCentralDir::kMaxEOCDSearch;
234     } else {
235         seekStart = 0;
236         readAmount = (long) fileLength;
237     }
238     if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
239         ALOGD("Failure seeking to end of zip at %ld", (long) seekStart);
240         result = UNKNOWN_ERROR;
241         goto bail;
242     }
243 
244     /* read the last part of the file into the buffer */
245     if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
246         ALOGD("short file? wanted %ld\n", readAmount);
247         result = UNKNOWN_ERROR;
248         goto bail;
249     }
250 
251     /* find the end-of-central-dir magic */
252     for (i = readAmount - 4; i >= 0; i--) {
253         if (buf[i] == 0x50 &&
254             ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
255         {
256             ALOGV("+++ Found EOCD at buf+%d\n", i);
257             break;
258         }
259     }
260     if (i < 0) {
261         ALOGD("EOCD not found, not Zip\n");
262         result = INVALID_OPERATION;
263         goto bail;
264     }
265 
266     /* extract eocd values */
267     result = mEOCD.readBuf(buf + i, readAmount - i);
268     if (result != NO_ERROR) {
269         ALOGD("Failure reading %ld bytes of EOCD values", readAmount - i);
270         goto bail;
271     }
272     //mEOCD.dump();
273 
274     if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 ||
275         mEOCD.mNumEntries != mEOCD.mTotalNumEntries)
276     {
277         ALOGD("Archive spanning not supported\n");
278         result = INVALID_OPERATION;
279         goto bail;
280     }
281 
282     /*
283      * So far so good.  "mCentralDirSize" is the size in bytes of the
284      * central directory, so we can just seek back that far to find it.
285      * We can also seek forward mCentralDirOffset bytes from the
286      * start of the file.
287      *
288      * We're not guaranteed to have the rest of the central dir in the
289      * buffer, nor are we guaranteed that the central dir will have any
290      * sort of convenient size.  We need to skip to the start of it and
291      * read the header, then the other goodies.
292      *
293      * The only thing we really need right now is the file comment, which
294      * we're hoping to preserve.
295      */
296     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
297         ALOGD("Failure seeking to central dir offset %ld\n",
298              mEOCD.mCentralDirOffset);
299         result = UNKNOWN_ERROR;
300         goto bail;
301     }
302 
303     /*
304      * Loop through and read the central dir entries.
305      */
306     ALOGV("Scanning %d entries...\n", mEOCD.mTotalNumEntries);
307     int entry;
308     for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
309         ZipEntry* pEntry = new ZipEntry;
310 
311         result = pEntry->initFromCDE(mZipFp);
312         if (result != NO_ERROR) {
313             ALOGD("initFromCDE failed\n");
314             delete pEntry;
315             goto bail;
316         }
317 
318         mEntries.add(pEntry);
319     }
320 
321 
322     /*
323      * If all went well, we should now be back at the EOCD.
324      */
325     {
326         unsigned char checkBuf[4];
327         if (fread(checkBuf, 1, 4, mZipFp) != 4) {
328             ALOGD("EOCD check read failed\n");
329             result = INVALID_OPERATION;
330             goto bail;
331         }
332         if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
333             ALOGD("EOCD read check failed\n");
334             result = UNKNOWN_ERROR;
335             goto bail;
336         }
337         ALOGV("+++ EOCD read check passed\n");
338     }
339 
340 bail:
341     delete[] buf;
342     return result;
343 }
344 
345 
346 /*
347  * Add a new file to the archive.
348  *
349  * This requires creating and populating a ZipEntry structure, and copying
350  * the data into the file at the appropriate position.  The "appropriate
351  * position" is the current location of the central directory, which we
352  * casually overwrite (we can put it back later).
353  *
354  * If we were concerned about safety, we would want to make all changes
355  * in a temp file and then overwrite the original after everything was
356  * safely written.  Not really a concern for us.
357  */
addCommon(const char * fileName,const void * data,size_t size,const char * storageName,int sourceType,int compressionMethod,ZipEntry ** ppEntry)358 status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size,
359     const char* storageName, int sourceType, int compressionMethod,
360     ZipEntry** ppEntry)
361 {
362     ZipEntry* pEntry = NULL;
363     status_t result = NO_ERROR;
364     long lfhPosn, startPosn, endPosn, uncompressedLen;
365     FILE* inputFp = NULL;
366     unsigned long crc;
367     time_t modWhen;
368 
369     if (mReadOnly)
370         return INVALID_OPERATION;
371 
372     assert(compressionMethod == ZipEntry::kCompressDeflated ||
373            compressionMethod == ZipEntry::kCompressStored);
374 
375     /* make sure we're in a reasonable state */
376     assert(mZipFp != NULL);
377     assert(mEntries.size() == mEOCD.mTotalNumEntries);
378 
379     /* make sure it doesn't already exist */
380     if (getEntryByName(storageName) != NULL)
381         return ALREADY_EXISTS;
382 
383     if (!data) {
384         inputFp = fopen(fileName, FILE_OPEN_RO);
385         if (inputFp == NULL)
386             return errnoToStatus(errno);
387     }
388 
389     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
390         result = UNKNOWN_ERROR;
391         goto bail;
392     }
393 
394     pEntry = new ZipEntry;
395     pEntry->initNew(storageName, NULL);
396 
397     /*
398      * From here on out, failures are more interesting.
399      */
400     mNeedCDRewrite = true;
401 
402     /*
403      * Write the LFH, even though it's still mostly blank.  We need it
404      * as a place-holder.  In theory the LFH isn't necessary, but in
405      * practice some utilities demand it.
406      */
407     lfhPosn = ftell(mZipFp);
408     pEntry->mLFH.write(mZipFp);
409     startPosn = ftell(mZipFp);
410 
411     /*
412      * Copy the data in, possibly compressing it as we go.
413      */
414     if (sourceType == ZipEntry::kCompressStored) {
415         if (compressionMethod == ZipEntry::kCompressDeflated) {
416             bool failed = false;
417             result = compressFpToFp(mZipFp, inputFp, data, size, &crc);
418             if (result != NO_ERROR) {
419                 ALOGD("compression failed, storing\n");
420                 failed = true;
421             } else {
422                 /*
423                  * Make sure it has compressed "enough".  This probably ought
424                  * to be set through an API call, but I don't expect our
425                  * criteria to change over time.
426                  */
427                 long src = inputFp ? ftell(inputFp) : size;
428                 long dst = ftell(mZipFp) - startPosn;
429                 if (dst + (dst / 10) > src) {
430                     ALOGD("insufficient compression (src=%ld dst=%ld), storing\n",
431                         src, dst);
432                     failed = true;
433                 }
434             }
435 
436             if (failed) {
437                 compressionMethod = ZipEntry::kCompressStored;
438                 if (inputFp) rewind(inputFp);
439                 fseek(mZipFp, startPosn, SEEK_SET);
440                 /* fall through to kCompressStored case */
441             }
442         }
443         /* handle "no compression" request, or failed compression from above */
444         if (compressionMethod == ZipEntry::kCompressStored) {
445             if (inputFp) {
446                 result = copyFpToFp(mZipFp, inputFp, &crc);
447             } else {
448                 result = copyDataToFp(mZipFp, data, size, &crc);
449             }
450             if (result != NO_ERROR) {
451                 // don't need to truncate; happens in CDE rewrite
452                 ALOGD("failed copying data in\n");
453                 goto bail;
454             }
455         }
456 
457         // currently seeked to end of file
458         uncompressedLen = inputFp ? ftell(inputFp) : size;
459     } else if (sourceType == ZipEntry::kCompressDeflated) {
460         /* we should support uncompressed-from-compressed, but it's not
461          * important right now */
462         assert(compressionMethod == ZipEntry::kCompressDeflated);
463 
464         bool scanResult;
465         int method;
466         long compressedLen;
467 
468         scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen,
469                         &compressedLen, &crc);
470         if (!scanResult || method != ZipEntry::kCompressDeflated) {
471             ALOGD("this isn't a deflated gzip file?");
472             result = UNKNOWN_ERROR;
473             goto bail;
474         }
475 
476         result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL);
477         if (result != NO_ERROR) {
478             ALOGD("failed copying gzip data in\n");
479             goto bail;
480         }
481     } else {
482         assert(false);
483         result = UNKNOWN_ERROR;
484         goto bail;
485     }
486 
487     /*
488      * We could write the "Data Descriptor", but there doesn't seem to
489      * be any point since we're going to go back and write the LFH.
490      *
491      * Update file offsets.
492      */
493     endPosn = ftell(mZipFp);            // seeked to end of compressed data
494 
495     /*
496      * Success!  Fill out new values.
497      */
498     pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc,
499         compressionMethod);
500     modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp));
501     pEntry->setModWhen(modWhen);
502     pEntry->setLFHOffset(lfhPosn);
503     mEOCD.mNumEntries++;
504     mEOCD.mTotalNumEntries++;
505     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
506     mEOCD.mCentralDirOffset = endPosn;
507 
508     /*
509      * Go back and write the LFH.
510      */
511     if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
512         result = UNKNOWN_ERROR;
513         goto bail;
514     }
515     pEntry->mLFH.write(mZipFp);
516 
517     /*
518      * Add pEntry to the list.
519      */
520     mEntries.add(pEntry);
521     if (ppEntry != NULL)
522         *ppEntry = pEntry;
523     pEntry = NULL;
524 
525 bail:
526     if (inputFp != NULL)
527         fclose(inputFp);
528     delete pEntry;
529     return result;
530 }
531 
532 /*
533  * Add an entry by copying it from another zip file.  If "padding" is
534  * nonzero, the specified number of bytes will be added to the "extra"
535  * field in the header.
536  *
537  * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
538  */
add(const ZipFile * pSourceZip,const ZipEntry * pSourceEntry,int padding,ZipEntry ** ppEntry)539 status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
540     int padding, ZipEntry** ppEntry)
541 {
542     ZipEntry* pEntry = NULL;
543     status_t result;
544     long lfhPosn, endPosn;
545 
546     if (mReadOnly)
547         return INVALID_OPERATION;
548 
549     /* make sure we're in a reasonable state */
550     assert(mZipFp != NULL);
551     assert(mEntries.size() == mEOCD.mTotalNumEntries);
552 
553     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
554         result = UNKNOWN_ERROR;
555         goto bail;
556     }
557 
558     pEntry = new ZipEntry;
559     if (pEntry == NULL) {
560         result = NO_MEMORY;
561         goto bail;
562     }
563 
564     result = pEntry->initFromExternal(pSourceZip, pSourceEntry);
565     if (result != NO_ERROR)
566         goto bail;
567     if (padding != 0) {
568         result = pEntry->addPadding(padding);
569         if (result != NO_ERROR)
570             goto bail;
571     }
572 
573     /*
574      * From here on out, failures are more interesting.
575      */
576     mNeedCDRewrite = true;
577 
578     /*
579      * Write the LFH.  Since we're not recompressing the data, we already
580      * have all of the fields filled out.
581      */
582     lfhPosn = ftell(mZipFp);
583     pEntry->mLFH.write(mZipFp);
584 
585     /*
586      * Copy the data over.
587      *
588      * If the "has data descriptor" flag is set, we want to copy the DD
589      * fields as well.  This is a fixed-size area immediately following
590      * the data.
591      */
592     if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
593     {
594         result = UNKNOWN_ERROR;
595         goto bail;
596     }
597 
598     off_t copyLen;
599     copyLen = pSourceEntry->getCompressedLen();
600     if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
601         copyLen += ZipEntry::kDataDescriptorLen;
602 
603     if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
604         != NO_ERROR)
605     {
606         ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
607         result = UNKNOWN_ERROR;
608         goto bail;
609     }
610 
611     /*
612      * Update file offsets.
613      */
614     endPosn = ftell(mZipFp);
615 
616     /*
617      * Success!  Fill out new values.
618      */
619     pEntry->setLFHOffset(lfhPosn);      // sets mCDE.mLocalHeaderRelOffset
620     mEOCD.mNumEntries++;
621     mEOCD.mTotalNumEntries++;
622     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
623     mEOCD.mCentralDirOffset = endPosn;
624 
625     /*
626      * Add pEntry to the list.
627      */
628     mEntries.add(pEntry);
629     if (ppEntry != NULL)
630         *ppEntry = pEntry;
631     pEntry = NULL;
632 
633     result = NO_ERROR;
634 
635 bail:
636     delete pEntry;
637     return result;
638 }
639 
640 /*
641  * Copy all of the bytes in "src" to "dst".
642  *
643  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
644  * will be seeked immediately past the data.
645  */
copyFpToFp(FILE * dstFp,FILE * srcFp,unsigned long * pCRC32)646 status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, unsigned long* pCRC32)
647 {
648     unsigned char tmpBuf[32768];
649     size_t count;
650 
651     *pCRC32 = crc32(0L, Z_NULL, 0);
652 
653     while (1) {
654         count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp);
655         if (ferror(srcFp) || ferror(dstFp))
656             return errnoToStatus(errno);
657         if (count == 0)
658             break;
659 
660         *pCRC32 = crc32(*pCRC32, tmpBuf, count);
661 
662         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
663             ALOGD("fwrite %d bytes failed\n", (int) count);
664             return UNKNOWN_ERROR;
665         }
666     }
667 
668     return NO_ERROR;
669 }
670 
671 /*
672  * Copy all of the bytes in "src" to "dst".
673  *
674  * On exit, "dstFp" will be seeked immediately past the data.
675  */
copyDataToFp(FILE * dstFp,const void * data,size_t size,unsigned long * pCRC32)676 status_t ZipFile::copyDataToFp(FILE* dstFp,
677     const void* data, size_t size, unsigned long* pCRC32)
678 {
679     size_t count;
680 
681     *pCRC32 = crc32(0L, Z_NULL, 0);
682     if (size > 0) {
683         *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size);
684         if (fwrite(data, 1, size, dstFp) != size) {
685             ALOGD("fwrite %d bytes failed\n", (int) size);
686             return UNKNOWN_ERROR;
687         }
688     }
689 
690     return NO_ERROR;
691 }
692 
693 /*
694  * Copy some of the bytes in "src" to "dst".
695  *
696  * If "pCRC32" is NULL, the CRC will not be computed.
697  *
698  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
699  * will be seeked immediately past the data just written.
700  */
copyPartialFpToFp(FILE * dstFp,FILE * srcFp,long length,unsigned long * pCRC32)701 status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length,
702     unsigned long* pCRC32)
703 {
704     unsigned char tmpBuf[32768];
705     size_t count;
706 
707     if (pCRC32 != NULL)
708         *pCRC32 = crc32(0L, Z_NULL, 0);
709 
710     while (length) {
711         long readSize;
712 
713         readSize = sizeof(tmpBuf);
714         if (readSize > length)
715             readSize = length;
716 
717         count = fread(tmpBuf, 1, readSize, srcFp);
718         if ((long) count != readSize) {     // error or unexpected EOF
719             ALOGD("fread %d bytes failed\n", (int) readSize);
720             return UNKNOWN_ERROR;
721         }
722 
723         if (pCRC32 != NULL)
724             *pCRC32 = crc32(*pCRC32, tmpBuf, count);
725 
726         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
727             ALOGD("fwrite %d bytes failed\n", (int) count);
728             return UNKNOWN_ERROR;
729         }
730 
731         length -= readSize;
732     }
733 
734     return NO_ERROR;
735 }
736 
737 /*
738  * Compress all of the data in "srcFp" and write it to "dstFp".
739  *
740  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
741  * will be seeked immediately past the compressed data.
742  */
compressFpToFp(FILE * dstFp,FILE * srcFp,const void * data,size_t size,unsigned long * pCRC32)743 status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp,
744     const void* data, size_t size, unsigned long* pCRC32)
745 {
746     status_t result = NO_ERROR;
747     const size_t kBufSize = 32768;
748     unsigned char* inBuf = NULL;
749     unsigned char* outBuf = NULL;
750     z_stream zstream;
751     bool atEof = false;     // no feof() aviailable yet
752     unsigned long crc;
753     int zerr;
754 
755     /*
756      * Create an input buffer and an output buffer.
757      */
758     inBuf = new unsigned char[kBufSize];
759     outBuf = new unsigned char[kBufSize];
760     if (inBuf == NULL || outBuf == NULL) {
761         result = NO_MEMORY;
762         goto bail;
763     }
764 
765     /*
766      * Initialize the zlib stream.
767      */
768     memset(&zstream, 0, sizeof(zstream));
769     zstream.zalloc = Z_NULL;
770     zstream.zfree = Z_NULL;
771     zstream.opaque = Z_NULL;
772     zstream.next_in = NULL;
773     zstream.avail_in = 0;
774     zstream.next_out = outBuf;
775     zstream.avail_out = kBufSize;
776     zstream.data_type = Z_UNKNOWN;
777 
778     zerr = deflateInit2(&zstream, Z_BEST_COMPRESSION,
779         Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
780     if (zerr != Z_OK) {
781         result = UNKNOWN_ERROR;
782         if (zerr == Z_VERSION_ERROR) {
783             ALOGE("Installed zlib is not compatible with linked version (%s)\n",
784                 ZLIB_VERSION);
785         } else {
786             ALOGD("Call to deflateInit2 failed (zerr=%d)\n", zerr);
787         }
788         goto bail;
789     }
790 
791     crc = crc32(0L, Z_NULL, 0);
792 
793     /*
794      * Loop while we have data.
795      */
796     do {
797         size_t getSize;
798         int flush;
799 
800         /* only read if the input buffer is empty */
801         if (zstream.avail_in == 0 && !atEof) {
802             ALOGV("+++ reading %d bytes\n", (int)kBufSize);
803             if (data) {
804                 getSize = size > kBufSize ? kBufSize : size;
805                 memcpy(inBuf, data, getSize);
806                 data = ((const char*)data) + getSize;
807                 size -= getSize;
808             } else {
809                 getSize = fread(inBuf, 1, kBufSize, srcFp);
810                 if (ferror(srcFp)) {
811                     ALOGD("deflate read failed (errno=%d)\n", errno);
812                     goto z_bail;
813                 }
814             }
815             if (getSize < kBufSize) {
816                 ALOGV("+++  got %d bytes, EOF reached\n",
817                     (int)getSize);
818                 atEof = true;
819             }
820 
821             crc = crc32(crc, inBuf, getSize);
822 
823             zstream.next_in = inBuf;
824             zstream.avail_in = getSize;
825         }
826 
827         if (atEof)
828             flush = Z_FINISH;       /* tell zlib that we're done */
829         else
830             flush = Z_NO_FLUSH;     /* more to come! */
831 
832         zerr = deflate(&zstream, flush);
833         if (zerr != Z_OK && zerr != Z_STREAM_END) {
834             ALOGD("zlib deflate call failed (zerr=%d)\n", zerr);
835             result = UNKNOWN_ERROR;
836             goto z_bail;
837         }
838 
839         /* write when we're full or when we're done */
840         if (zstream.avail_out == 0 ||
841             (zerr == Z_STREAM_END && zstream.avail_out != (uInt) kBufSize))
842         {
843             ALOGV("+++ writing %d bytes\n", (int) (zstream.next_out - outBuf));
844             if (fwrite(outBuf, 1, zstream.next_out - outBuf, dstFp) !=
845                 (size_t)(zstream.next_out - outBuf))
846             {
847                 ALOGD("write %d failed in deflate\n",
848                     (int) (zstream.next_out - outBuf));
849                 goto z_bail;
850             }
851 
852             zstream.next_out = outBuf;
853             zstream.avail_out = kBufSize;
854         }
855     } while (zerr == Z_OK);
856 
857     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
858 
859     *pCRC32 = crc;
860 
861 z_bail:
862     deflateEnd(&zstream);        /* free up any allocated structures */
863 
864 bail:
865     delete[] inBuf;
866     delete[] outBuf;
867 
868     return result;
869 }
870 
871 /*
872  * Mark an entry as deleted.
873  *
874  * We will eventually need to crunch the file down, but if several files
875  * are being removed (perhaps as part of an "update" process) we can make
876  * things considerably faster by deferring the removal to "flush" time.
877  */
remove(ZipEntry * pEntry)878 status_t ZipFile::remove(ZipEntry* pEntry)
879 {
880     /*
881      * Should verify that pEntry is actually part of this archive, and
882      * not some stray ZipEntry from a different file.
883      */
884 
885     /* mark entry as deleted, and mark archive as dirty */
886     pEntry->setDeleted();
887     mNeedCDRewrite = true;
888     return NO_ERROR;
889 }
890 
891 /*
892  * Flush any pending writes.
893  *
894  * In particular, this will crunch out deleted entries, and write the
895  * Central Directory and EOCD if we have stomped on them.
896  */
flush(void)897 status_t ZipFile::flush(void)
898 {
899     status_t result = NO_ERROR;
900     long eocdPosn;
901     int i, count;
902 
903     if (mReadOnly)
904         return INVALID_OPERATION;
905     if (!mNeedCDRewrite)
906         return NO_ERROR;
907 
908     assert(mZipFp != NULL);
909 
910     result = crunchArchive();
911     if (result != NO_ERROR)
912         return result;
913 
914     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0)
915         return UNKNOWN_ERROR;
916 
917     count = mEntries.size();
918     for (i = 0; i < count; i++) {
919         ZipEntry* pEntry = mEntries[i];
920         pEntry->mCDE.write(mZipFp);
921     }
922 
923     eocdPosn = ftell(mZipFp);
924     mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset;
925 
926     mEOCD.write(mZipFp);
927 
928     /*
929      * If we had some stuff bloat up during compression and get replaced
930      * with plain files, or if we deleted some entries, there's a lot
931      * of wasted space at the end of the file.  Remove it now.
932      */
933     if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) {
934         ALOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno));
935         // not fatal
936     }
937 
938     /* should we clear the "newly added" flag in all entries now? */
939 
940     mNeedCDRewrite = false;
941     return NO_ERROR;
942 }
943 
944 /*
945  * Crunch deleted files out of an archive by shifting the later files down.
946  *
947  * Because we're not using a temp file, we do the operation inside the
948  * current file.
949  */
crunchArchive(void)950 status_t ZipFile::crunchArchive(void)
951 {
952     status_t result = NO_ERROR;
953     int i, count;
954     long delCount, adjust;
955 
956 #if 0
957     printf("CONTENTS:\n");
958     for (i = 0; i < (int) mEntries.size(); i++) {
959         printf(" %d: lfhOff=%ld del=%d\n",
960             i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted());
961     }
962     printf("  END is %ld\n", (long) mEOCD.mCentralDirOffset);
963 #endif
964 
965     /*
966      * Roll through the set of files, shifting them as appropriate.  We
967      * could probably get a slight performance improvement by sliding
968      * multiple files down at once (because we could use larger reads
969      * when operating on batches of small files), but it's not that useful.
970      */
971     count = mEntries.size();
972     delCount = adjust = 0;
973     for (i = 0; i < count; i++) {
974         ZipEntry* pEntry = mEntries[i];
975         long span;
976 
977         if (pEntry->getLFHOffset() != 0) {
978             long nextOffset;
979 
980             /* Get the length of this entry by finding the offset
981              * of the next entry.  Directory entries don't have
982              * file offsets, so we need to find the next non-directory
983              * entry.
984              */
985             nextOffset = 0;
986             for (int ii = i+1; nextOffset == 0 && ii < count; ii++)
987                 nextOffset = mEntries[ii]->getLFHOffset();
988             if (nextOffset == 0)
989                 nextOffset = mEOCD.mCentralDirOffset;
990             span = nextOffset - pEntry->getLFHOffset();
991 
992             assert(span >= ZipEntry::LocalFileHeader::kLFHLen);
993         } else {
994             /* This is a directory entry.  It doesn't have
995              * any actual file contents, so there's no need to
996              * move anything.
997              */
998             span = 0;
999         }
1000 
1001         //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n",
1002         //    i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count);
1003 
1004         if (pEntry->getDeleted()) {
1005             adjust += span;
1006             delCount++;
1007 
1008             delete pEntry;
1009             mEntries.removeAt(i);
1010 
1011             /* adjust loop control */
1012             count--;
1013             i--;
1014         } else if (span != 0 && adjust > 0) {
1015             /* shuffle this entry back */
1016             //printf("+++ Shuffling '%s' back %ld\n",
1017             //    pEntry->getFileName(), adjust);
1018             result = filemove(mZipFp, pEntry->getLFHOffset() - adjust,
1019                         pEntry->getLFHOffset(), span);
1020             if (result != NO_ERROR) {
1021                 /* this is why you use a temp file */
1022                 ALOGE("error during crunch - archive is toast\n");
1023                 return result;
1024             }
1025 
1026             pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust);
1027         }
1028     }
1029 
1030     /*
1031      * Fix EOCD info.  We have to wait until the end to do some of this
1032      * because we use mCentralDirOffset to determine "span" for the
1033      * last entry.
1034      */
1035     mEOCD.mCentralDirOffset -= adjust;
1036     mEOCD.mNumEntries -= delCount;
1037     mEOCD.mTotalNumEntries -= delCount;
1038     mEOCD.mCentralDirSize = 0;  // mark invalid; set by flush()
1039 
1040     assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries);
1041     assert(mEOCD.mNumEntries == count);
1042 
1043     return result;
1044 }
1045 
1046 /*
1047  * Works like memmove(), but on pieces of a file.
1048  */
filemove(FILE * fp,off_t dst,off_t src,size_t n)1049 status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n)
1050 {
1051     if (dst == src || n <= 0)
1052         return NO_ERROR;
1053 
1054     unsigned char readBuf[32768];
1055 
1056     if (dst < src) {
1057         /* shift stuff toward start of file; must read from start */
1058         while (n != 0) {
1059             size_t getSize = sizeof(readBuf);
1060             if (getSize > n)
1061                 getSize = n;
1062 
1063             if (fseek(fp, (long) src, SEEK_SET) != 0) {
1064                 ALOGD("filemove src seek %ld failed\n", (long) src);
1065                 return UNKNOWN_ERROR;
1066             }
1067 
1068             if (fread(readBuf, 1, getSize, fp) != getSize) {
1069                 ALOGD("filemove read %ld off=%ld failed\n",
1070                     (long) getSize, (long) src);
1071                 return UNKNOWN_ERROR;
1072             }
1073 
1074             if (fseek(fp, (long) dst, SEEK_SET) != 0) {
1075                 ALOGD("filemove dst seek %ld failed\n", (long) dst);
1076                 return UNKNOWN_ERROR;
1077             }
1078 
1079             if (fwrite(readBuf, 1, getSize, fp) != getSize) {
1080                 ALOGD("filemove write %ld off=%ld failed\n",
1081                     (long) getSize, (long) dst);
1082                 return UNKNOWN_ERROR;
1083             }
1084 
1085             src += getSize;
1086             dst += getSize;
1087             n -= getSize;
1088         }
1089     } else {
1090         /* shift stuff toward end of file; must read from end */
1091         assert(false);      // write this someday, maybe
1092         return UNKNOWN_ERROR;
1093     }
1094 
1095     return NO_ERROR;
1096 }
1097 
1098 
1099 /*
1100  * Get the modification time from a file descriptor.
1101  */
getModTime(int fd)1102 time_t ZipFile::getModTime(int fd)
1103 {
1104     struct stat sb;
1105 
1106     if (fstat(fd, &sb) < 0) {
1107         ALOGD("HEY: fstat on fd %d failed\n", fd);
1108         return (time_t) -1;
1109     }
1110 
1111     return sb.st_mtime;
1112 }
1113 
1114 
1115 #if 0       /* this is a bad idea */
1116 /*
1117  * Get a copy of the Zip file descriptor.
1118  *
1119  * We don't allow this if the file was opened read-write because we tend
1120  * to leave the file contents in an uncertain state between calls to
1121  * flush().  The duplicated file descriptor should only be valid for reads.
1122  */
1123 int ZipFile::getZipFd(void) const
1124 {
1125     if (!mReadOnly)
1126         return INVALID_OPERATION;
1127     assert(mZipFp != NULL);
1128 
1129     int fd;
1130     fd = dup(fileno(mZipFp));
1131     if (fd < 0) {
1132         ALOGD("didn't work, errno=%d\n", errno);
1133     }
1134 
1135     return fd;
1136 }
1137 #endif
1138 
1139 
1140 #if 0
1141 /*
1142  * Expand data.
1143  */
1144 bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const
1145 {
1146     return false;
1147 }
1148 #endif
1149 
1150 // free the memory when you're done
uncompress(const ZipEntry * entry)1151 void* ZipFile::uncompress(const ZipEntry* entry)
1152 {
1153     size_t unlen = entry->getUncompressedLen();
1154     size_t clen = entry->getCompressedLen();
1155 
1156     void* buf = malloc(unlen);
1157     if (buf == NULL) {
1158         return NULL;
1159     }
1160 
1161     fseek(mZipFp, 0, SEEK_SET);
1162 
1163     off_t offset = entry->getFileOffset();
1164     if (fseek(mZipFp, offset, SEEK_SET) != 0) {
1165         goto bail;
1166     }
1167 
1168     switch (entry->getCompressionMethod())
1169     {
1170         case ZipEntry::kCompressStored: {
1171             ssize_t amt = fread(buf, 1, unlen, mZipFp);
1172             if (amt != (ssize_t)unlen) {
1173                 goto bail;
1174             }
1175 #if 0
1176             printf("data...\n");
1177             const unsigned char* p = (unsigned char*)buf;
1178             const unsigned char* end = p+unlen;
1179             for (int i=0; i<32 && p < end; i++) {
1180                 printf("0x%08x ", (int)(offset+(i*0x10)));
1181                 for (int j=0; j<0x10 && p < end; j++) {
1182                     printf(" %02x", *p);
1183                     p++;
1184                 }
1185                 printf("\n");
1186             }
1187 #endif
1188 
1189             }
1190             break;
1191         case ZipEntry::kCompressDeflated: {
1192             if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) {
1193                 goto bail;
1194             }
1195             }
1196             break;
1197         default:
1198             goto bail;
1199     }
1200     return buf;
1201 
1202 bail:
1203     free(buf);
1204     return NULL;
1205 }
1206 
1207 
1208 /*
1209  * ===========================================================================
1210  *      ZipFile::EndOfCentralDir
1211  * ===========================================================================
1212  */
1213 
1214 /*
1215  * Read the end-of-central-dir fields.
1216  *
1217  * "buf" should be positioned at the EOCD signature, and should contain
1218  * the entire EOCD area including the comment.
1219  */
readBuf(const unsigned char * buf,int len)1220 status_t ZipFile::EndOfCentralDir::readBuf(const unsigned char* buf, int len)
1221 {
1222     /* don't allow re-use */
1223     assert(mComment == NULL);
1224 
1225     if (len < kEOCDLen) {
1226         /* looks like ZIP file got truncated */
1227         ALOGD(" Zip EOCD: expected >= %d bytes, found %d\n",
1228             kEOCDLen, len);
1229         return INVALID_OPERATION;
1230     }
1231 
1232     /* this should probably be an assert() */
1233     if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
1234         return UNKNOWN_ERROR;
1235 
1236     mDiskNumber = ZipEntry::getShortLE(&buf[0x04]);
1237     mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
1238     mNumEntries = ZipEntry::getShortLE(&buf[0x08]);
1239     mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
1240     mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]);
1241     mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
1242     mCommentLen = ZipEntry::getShortLE(&buf[0x14]);
1243 
1244     // TODO: validate mCentralDirOffset
1245 
1246     if (mCommentLen > 0) {
1247         if (kEOCDLen + mCommentLen > len) {
1248             ALOGD("EOCD(%d) + comment(%d) exceeds len (%d)\n",
1249                 kEOCDLen, mCommentLen, len);
1250             return UNKNOWN_ERROR;
1251         }
1252         mComment = new unsigned char[mCommentLen];
1253         memcpy(mComment, buf + kEOCDLen, mCommentLen);
1254     }
1255 
1256     return NO_ERROR;
1257 }
1258 
1259 /*
1260  * Write an end-of-central-directory section.
1261  */
write(FILE * fp)1262 status_t ZipFile::EndOfCentralDir::write(FILE* fp)
1263 {
1264     unsigned char buf[kEOCDLen];
1265 
1266     ZipEntry::putLongLE(&buf[0x00], kSignature);
1267     ZipEntry::putShortLE(&buf[0x04], mDiskNumber);
1268     ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir);
1269     ZipEntry::putShortLE(&buf[0x08], mNumEntries);
1270     ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries);
1271     ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize);
1272     ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset);
1273     ZipEntry::putShortLE(&buf[0x14], mCommentLen);
1274 
1275     if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen)
1276         return UNKNOWN_ERROR;
1277     if (mCommentLen > 0) {
1278         assert(mComment != NULL);
1279         if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen)
1280             return UNKNOWN_ERROR;
1281     }
1282 
1283     return NO_ERROR;
1284 }
1285 
1286 /*
1287  * Dump the contents of an EndOfCentralDir object.
1288  */
dump(void) const1289 void ZipFile::EndOfCentralDir::dump(void) const
1290 {
1291     ALOGD(" EndOfCentralDir contents:\n");
1292     ALOGD("  diskNum=%u diskWCD=%u numEnt=%u totalNumEnt=%u\n",
1293         mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries);
1294     ALOGD("  centDirSize=%lu centDirOff=%lu commentLen=%u\n",
1295         mCentralDirSize, mCentralDirOffset, mCommentLen);
1296 }
1297 
1298