1 /*
2  * Copyright (C) 2006 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //
18 // Access to Zip archives.
19 //
20 
21 #define LOG_TAG "zip"
22 
23 #include <androidfw/ZipUtils.h>
24 #include <utils/Log.h>
25 
26 #include "ZipFile.h"
27 
28 #include <zlib.h>
29 #define DEF_MEM_LEVEL 8                // normally in zutil.h?
30 
31 #include "zopfli/deflate.h"
32 
33 #include <memory.h>
34 #include <sys/stat.h>
35 #include <errno.h>
36 #include <assert.h>
37 #include <inttypes.h>
38 
39 using namespace android;
40 
41 /*
42  * Some environments require the "b", some choke on it.
43  */
44 #define FILE_OPEN_RO        "rb"
45 #define FILE_OPEN_RW        "r+b"
46 #define FILE_OPEN_RW_CREATE "w+b"
47 
48 /* should live somewhere else? */
errnoToStatus(int err)49 static status_t errnoToStatus(int err)
50 {
51     if (err == ENOENT)
52         return NAME_NOT_FOUND;
53     else if (err == EACCES)
54         return PERMISSION_DENIED;
55     else
56         return UNKNOWN_ERROR;
57 }
58 
59 /*
60  * Open a file and parse its guts.
61  */
open(const char * zipFileName,int flags)62 status_t ZipFile::open(const char* zipFileName, int flags)
63 {
64     bool newArchive = false;
65 
66     assert(mZipFp == NULL);     // no reopen
67 
68     if ((flags & kOpenTruncate))
69         flags |= kOpenCreate;           // trunc implies create
70 
71     if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite))
72         return INVALID_OPERATION;       // not both
73     if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite)))
74         return INVALID_OPERATION;       // not neither
75     if ((flags & kOpenCreate) && !(flags & kOpenReadWrite))
76         return INVALID_OPERATION;       // create requires write
77 
78     if (flags & kOpenTruncate) {
79         newArchive = true;
80     } else {
81         newArchive = (access(zipFileName, F_OK) != 0);
82         if (!(flags & kOpenCreate) && newArchive) {
83             /* not creating, must already exist */
84             ALOGD("File %s does not exist", zipFileName);
85             return NAME_NOT_FOUND;
86         }
87     }
88 
89     /* open the file */
90     const char* openflags;
91     if (flags & kOpenReadWrite) {
92         if (newArchive)
93             openflags = FILE_OPEN_RW_CREATE;
94         else
95             openflags = FILE_OPEN_RW;
96     } else {
97         openflags = FILE_OPEN_RO;
98     }
99     mZipFp = fopen(zipFileName, openflags);
100     if (mZipFp == NULL) {
101         int err = errno;
102         ALOGD("fopen failed: %d\n", err);
103         return errnoToStatus(err);
104     }
105 
106     status_t result;
107     if (!newArchive) {
108         /*
109          * Load the central directory.  If that fails, then this probably
110          * isn't a Zip archive.
111          */
112         result = readCentralDir();
113     } else {
114         /*
115          * Newly-created.  The EndOfCentralDir constructor actually
116          * sets everything to be the way we want it (all zeroes).  We
117          * set mNeedCDRewrite so that we create *something* if the
118          * caller doesn't add any files.  (We could also just unlink
119          * the file if it's brand new and nothing was added, but that's
120          * probably doing more than we really should -- the user might
121          * have a need for empty zip files.)
122          */
123         mNeedCDRewrite = true;
124         result = NO_ERROR;
125     }
126 
127     if (flags & kOpenReadOnly)
128         mReadOnly = true;
129     else
130         assert(!mReadOnly);
131 
132     return result;
133 }
134 
135 /*
136  * Return the Nth entry in the archive.
137  */
getEntryByIndex(int idx) const138 ZipEntry* ZipFile::getEntryByIndex(int idx) const
139 {
140     if (idx < 0 || idx >= (int) mEntries.size())
141         return NULL;
142 
143     return mEntries[idx];
144 }
145 
146 /*
147  * Find an entry by name.
148  */
getEntryByName(const char * fileName) const149 ZipEntry* ZipFile::getEntryByName(const char* fileName) const
150 {
151     /*
152      * Do a stupid linear string-compare search.
153      *
154      * There are various ways to speed this up, especially since it's rare
155      * to intermingle changes to the archive with "get by name" calls.  We
156      * don't want to sort the mEntries vector itself, however, because
157      * it's used to recreate the Central Directory.
158      *
159      * (Hash table works, parallel list of pointers in sorted order is good.)
160      */
161     int idx;
162 
163     for (idx = mEntries.size()-1; idx >= 0; idx--) {
164         ZipEntry* pEntry = mEntries[idx];
165         if (!pEntry->getDeleted() &&
166             strcmp(fileName, pEntry->getFileName()) == 0)
167         {
168             return pEntry;
169         }
170     }
171 
172     return NULL;
173 }
174 
175 /*
176  * Empty the mEntries vector.
177  */
discardEntries(void)178 void ZipFile::discardEntries(void)
179 {
180     int count = mEntries.size();
181 
182     while (--count >= 0)
183         delete mEntries[count];
184 
185     mEntries.clear();
186 }
187 
188 
189 /*
190  * Find the central directory and read the contents.
191  *
192  * The fun thing about ZIP archives is that they may or may not be
193  * readable from start to end.  In some cases, notably for archives
194  * that were written to stdout, the only length information is in the
195  * central directory at the end of the file.
196  *
197  * Of course, the central directory can be followed by a variable-length
198  * comment field, so we have to scan through it backwards.  The comment
199  * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
200  * itself, plus apparently sometimes people throw random junk on the end
201  * just for the fun of it.
202  *
203  * This is all a little wobbly.  If the wrong value ends up in the EOCD
204  * area, we're hosed.  This appears to be the way that everbody handles
205  * it though, so we're in pretty good company if this fails.
206  */
readCentralDir(void)207 status_t ZipFile::readCentralDir(void)
208 {
209     status_t result = NO_ERROR;
210     uint8_t* buf = NULL;
211     off_t fileLength, seekStart;
212     long readAmount;
213     int i;
214 
215     fseek(mZipFp, 0, SEEK_END);
216     fileLength = ftell(mZipFp);
217     rewind(mZipFp);
218 
219     /* too small to be a ZIP archive? */
220     if (fileLength < EndOfCentralDir::kEOCDLen) {
221         ALOGD("Length is %ld -- too small\n", (long)fileLength);
222         result = INVALID_OPERATION;
223         goto bail;
224     }
225 
226     buf = new uint8_t[EndOfCentralDir::kMaxEOCDSearch];
227     if (buf == NULL) {
228         ALOGD("Failure allocating %d bytes for EOCD search",
229              EndOfCentralDir::kMaxEOCDSearch);
230         result = NO_MEMORY;
231         goto bail;
232     }
233 
234     if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
235         seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
236         readAmount = EndOfCentralDir::kMaxEOCDSearch;
237     } else {
238         seekStart = 0;
239         readAmount = (long) fileLength;
240     }
241     if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
242         ALOGD("Failure seeking to end of zip at %ld", (long) seekStart);
243         result = UNKNOWN_ERROR;
244         goto bail;
245     }
246 
247     /* read the last part of the file into the buffer */
248     if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
249         ALOGD("short file? wanted %ld\n", readAmount);
250         result = UNKNOWN_ERROR;
251         goto bail;
252     }
253 
254     /* find the end-of-central-dir magic */
255     for (i = readAmount - 4; i >= 0; i--) {
256         if (buf[i] == 0x50 &&
257             ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
258         {
259             ALOGV("+++ Found EOCD at buf+%d\n", i);
260             break;
261         }
262     }
263     if (i < 0) {
264         ALOGD("EOCD not found, not Zip\n");
265         result = INVALID_OPERATION;
266         goto bail;
267     }
268 
269     /* extract eocd values */
270     result = mEOCD.readBuf(buf + i, readAmount - i);
271     if (result != NO_ERROR) {
272         ALOGD("Failure reading %ld bytes of EOCD values", readAmount - i);
273         goto bail;
274     }
275     //mEOCD.dump();
276 
277     if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 ||
278         mEOCD.mNumEntries != mEOCD.mTotalNumEntries)
279     {
280         ALOGD("Archive spanning not supported\n");
281         result = INVALID_OPERATION;
282         goto bail;
283     }
284 
285     /*
286      * So far so good.  "mCentralDirSize" is the size in bytes of the
287      * central directory, so we can just seek back that far to find it.
288      * We can also seek forward mCentralDirOffset bytes from the
289      * start of the file.
290      *
291      * We're not guaranteed to have the rest of the central dir in the
292      * buffer, nor are we guaranteed that the central dir will have any
293      * sort of convenient size.  We need to skip to the start of it and
294      * read the header, then the other goodies.
295      *
296      * The only thing we really need right now is the file comment, which
297      * we're hoping to preserve.
298      */
299     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
300         ALOGD("Failure seeking to central dir offset %" PRIu32 "\n",
301              mEOCD.mCentralDirOffset);
302         result = UNKNOWN_ERROR;
303         goto bail;
304     }
305 
306     /*
307      * Loop through and read the central dir entries.
308      */
309     ALOGV("Scanning %" PRIu16 " entries...\n", mEOCD.mTotalNumEntries);
310     int entry;
311     for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
312         ZipEntry* pEntry = new ZipEntry;
313 
314         result = pEntry->initFromCDE(mZipFp);
315         if (result != NO_ERROR) {
316             ALOGD("initFromCDE failed\n");
317             delete pEntry;
318             goto bail;
319         }
320 
321         mEntries.add(pEntry);
322     }
323 
324 
325     /*
326      * If all went well, we should now be back at the EOCD.
327      */
328     {
329         uint8_t checkBuf[4];
330         if (fread(checkBuf, 1, 4, mZipFp) != 4) {
331             ALOGD("EOCD check read failed\n");
332             result = INVALID_OPERATION;
333             goto bail;
334         }
335         if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
336             ALOGD("EOCD read check failed\n");
337             result = UNKNOWN_ERROR;
338             goto bail;
339         }
340         ALOGV("+++ EOCD read check passed\n");
341     }
342 
343 bail:
344     delete[] buf;
345     return result;
346 }
347 
348 
349 /*
350  * Add a new file to the archive.
351  *
352  * This requires creating and populating a ZipEntry structure, and copying
353  * the data into the file at the appropriate position.  The "appropriate
354  * position" is the current location of the central directory, which we
355  * casually overwrite (we can put it back later).
356  *
357  * If we were concerned about safety, we would want to make all changes
358  * in a temp file and then overwrite the original after everything was
359  * safely written.  Not really a concern for us.
360  */
addCommon(const char * fileName,const void * data,size_t size,const char * storageName,int sourceType,int compressionMethod,ZipEntry ** ppEntry)361 status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size,
362     const char* storageName, int sourceType, int compressionMethod,
363     ZipEntry** ppEntry)
364 {
365     ZipEntry* pEntry = NULL;
366     status_t result = NO_ERROR;
367     long lfhPosn, startPosn, endPosn, uncompressedLen;
368     FILE* inputFp = NULL;
369     uint32_t crc;
370     time_t modWhen;
371 
372     if (mReadOnly)
373         return INVALID_OPERATION;
374 
375     assert(compressionMethod == ZipEntry::kCompressDeflated ||
376            compressionMethod == ZipEntry::kCompressStored);
377 
378     /* make sure we're in a reasonable state */
379     assert(mZipFp != NULL);
380     assert(mEntries.size() == mEOCD.mTotalNumEntries);
381 
382     /* make sure it doesn't already exist */
383     if (getEntryByName(storageName) != NULL)
384         return ALREADY_EXISTS;
385 
386     if (!data) {
387         inputFp = fopen(fileName, FILE_OPEN_RO);
388         if (inputFp == NULL)
389             return errnoToStatus(errno);
390     }
391 
392     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
393         result = UNKNOWN_ERROR;
394         goto bail;
395     }
396 
397     pEntry = new ZipEntry;
398     pEntry->initNew(storageName, NULL);
399 
400     /*
401      * From here on out, failures are more interesting.
402      */
403     mNeedCDRewrite = true;
404 
405     /*
406      * Write the LFH, even though it's still mostly blank.  We need it
407      * as a place-holder.  In theory the LFH isn't necessary, but in
408      * practice some utilities demand it.
409      */
410     lfhPosn = ftell(mZipFp);
411     pEntry->mLFH.write(mZipFp);
412     startPosn = ftell(mZipFp);
413 
414     /*
415      * Copy the data in, possibly compressing it as we go.
416      */
417     if (sourceType == ZipEntry::kCompressStored) {
418         if (compressionMethod == ZipEntry::kCompressDeflated) {
419             bool failed = false;
420             result = compressFpToFp(mZipFp, inputFp, data, size, &crc);
421             if (result != NO_ERROR) {
422                 ALOGD("compression failed, storing\n");
423                 failed = true;
424             } else {
425                 /*
426                  * Make sure it has compressed "enough".  This probably ought
427                  * to be set through an API call, but I don't expect our
428                  * criteria to change over time.
429                  */
430                 long src = inputFp ? ftell(inputFp) : size;
431                 long dst = ftell(mZipFp) - startPosn;
432                 if (dst + (dst / 10) > src) {
433                     ALOGD("insufficient compression (src=%ld dst=%ld), storing\n",
434                         src, dst);
435                     failed = true;
436                 }
437             }
438 
439             if (failed) {
440                 compressionMethod = ZipEntry::kCompressStored;
441                 if (inputFp) rewind(inputFp);
442                 fseek(mZipFp, startPosn, SEEK_SET);
443                 /* fall through to kCompressStored case */
444             }
445         }
446         /* handle "no compression" request, or failed compression from above */
447         if (compressionMethod == ZipEntry::kCompressStored) {
448             if (inputFp) {
449                 result = copyFpToFp(mZipFp, inputFp, &crc);
450             } else {
451                 result = copyDataToFp(mZipFp, data, size, &crc);
452             }
453             if (result != NO_ERROR) {
454                 // don't need to truncate; happens in CDE rewrite
455                 ALOGD("failed copying data in\n");
456                 goto bail;
457             }
458         }
459 
460         // currently seeked to end of file
461         uncompressedLen = inputFp ? ftell(inputFp) : size;
462     } else if (sourceType == ZipEntry::kCompressDeflated) {
463         /* we should support uncompressed-from-compressed, but it's not
464          * important right now */
465         assert(compressionMethod == ZipEntry::kCompressDeflated);
466 
467         bool scanResult;
468         int method;
469         long compressedLen;
470         unsigned long longcrc;
471 
472         scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen,
473                         &compressedLen, &longcrc);
474         if (!scanResult || method != ZipEntry::kCompressDeflated) {
475             ALOGD("this isn't a deflated gzip file?");
476             result = UNKNOWN_ERROR;
477             goto bail;
478         }
479         crc = longcrc;
480 
481         result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL);
482         if (result != NO_ERROR) {
483             ALOGD("failed copying gzip data in\n");
484             goto bail;
485         }
486     } else {
487         assert(false);
488         result = UNKNOWN_ERROR;
489         goto bail;
490     }
491 
492     /*
493      * We could write the "Data Descriptor", but there doesn't seem to
494      * be any point since we're going to go back and write the LFH.
495      *
496      * Update file offsets.
497      */
498     endPosn = ftell(mZipFp);            // seeked to end of compressed data
499 
500     /*
501      * Success!  Fill out new values.
502      */
503     pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc,
504         compressionMethod);
505     modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp));
506     pEntry->setModWhen(modWhen);
507     pEntry->setLFHOffset(lfhPosn);
508     mEOCD.mNumEntries++;
509     mEOCD.mTotalNumEntries++;
510     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
511     mEOCD.mCentralDirOffset = endPosn;
512 
513     /*
514      * Go back and write the LFH.
515      */
516     if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
517         result = UNKNOWN_ERROR;
518         goto bail;
519     }
520     pEntry->mLFH.write(mZipFp);
521 
522     /*
523      * Add pEntry to the list.
524      */
525     mEntries.add(pEntry);
526     if (ppEntry != NULL)
527         *ppEntry = pEntry;
528     pEntry = NULL;
529 
530 bail:
531     if (inputFp != NULL)
532         fclose(inputFp);
533     delete pEntry;
534     return result;
535 }
536 
537 /*
538  * Add an entry by copying it from another zip file.  If "padding" is
539  * nonzero, the specified number of bytes will be added to the "extra"
540  * field in the header.
541  *
542  * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
543  */
add(const ZipFile * pSourceZip,const ZipEntry * pSourceEntry,int padding,ZipEntry ** ppEntry)544 status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
545     int padding, ZipEntry** ppEntry)
546 {
547     ZipEntry* pEntry = NULL;
548     status_t result;
549     long lfhPosn, endPosn;
550 
551     if (mReadOnly)
552         return INVALID_OPERATION;
553 
554     /* make sure we're in a reasonable state */
555     assert(mZipFp != NULL);
556     assert(mEntries.size() == mEOCD.mTotalNumEntries);
557 
558     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
559         result = UNKNOWN_ERROR;
560         goto bail;
561     }
562 
563     pEntry = new ZipEntry;
564     if (pEntry == NULL) {
565         result = NO_MEMORY;
566         goto bail;
567     }
568 
569     result = pEntry->initFromExternal(pSourceEntry);
570     if (result != NO_ERROR)
571         goto bail;
572     if (padding != 0) {
573         result = pEntry->addPadding(padding);
574         if (result != NO_ERROR)
575             goto bail;
576     }
577 
578     /*
579      * From here on out, failures are more interesting.
580      */
581     mNeedCDRewrite = true;
582 
583     /*
584      * Write the LFH.  Since we're not recompressing the data, we already
585      * have all of the fields filled out.
586      */
587     lfhPosn = ftell(mZipFp);
588     pEntry->mLFH.write(mZipFp);
589 
590     /*
591      * Copy the data over.
592      *
593      * If the "has data descriptor" flag is set, we want to copy the DD
594      * fields as well.  This is a fixed-size area immediately following
595      * the data.
596      */
597     if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
598     {
599         result = UNKNOWN_ERROR;
600         goto bail;
601     }
602 
603     off_t copyLen;
604     copyLen = pSourceEntry->getCompressedLen();
605     if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
606         copyLen += ZipEntry::kDataDescriptorLen;
607 
608     if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
609         != NO_ERROR)
610     {
611         ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
612         result = UNKNOWN_ERROR;
613         goto bail;
614     }
615 
616     /*
617      * Update file offsets.
618      */
619     endPosn = ftell(mZipFp);
620 
621     /*
622      * Success!  Fill out new values.
623      */
624     pEntry->setLFHOffset(lfhPosn);      // sets mCDE.mLocalHeaderRelOffset
625     mEOCD.mNumEntries++;
626     mEOCD.mTotalNumEntries++;
627     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
628     mEOCD.mCentralDirOffset = endPosn;
629 
630     /*
631      * Add pEntry to the list.
632      */
633     mEntries.add(pEntry);
634     if (ppEntry != NULL)
635         *ppEntry = pEntry;
636     pEntry = NULL;
637 
638     result = NO_ERROR;
639 
640 bail:
641     delete pEntry;
642     return result;
643 }
644 
645 /*
646  * Add an entry by copying it from another zip file, recompressing with
647  * Zopfli if already compressed.
648  *
649  * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
650  */
addRecompress(const ZipFile * pSourceZip,const ZipEntry * pSourceEntry,ZipEntry ** ppEntry)651 status_t ZipFile::addRecompress(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
652     ZipEntry** ppEntry)
653 {
654     ZipEntry* pEntry = NULL;
655     status_t result;
656     long lfhPosn, startPosn, endPosn, uncompressedLen;
657 
658     if (mReadOnly)
659         return INVALID_OPERATION;
660 
661     /* make sure we're in a reasonable state */
662     assert(mZipFp != NULL);
663     assert(mEntries.size() == mEOCD.mTotalNumEntries);
664 
665     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
666         result = UNKNOWN_ERROR;
667         goto bail;
668     }
669 
670     pEntry = new ZipEntry;
671     if (pEntry == NULL) {
672         result = NO_MEMORY;
673         goto bail;
674     }
675 
676     result = pEntry->initFromExternal(pSourceEntry);
677     if (result != NO_ERROR)
678         goto bail;
679 
680     /*
681      * From here on out, failures are more interesting.
682      */
683     mNeedCDRewrite = true;
684 
685     /*
686      * Write the LFH, even though it's still mostly blank.  We need it
687      * as a place-holder.  In theory the LFH isn't necessary, but in
688      * practice some utilities demand it.
689      */
690     lfhPosn = ftell(mZipFp);
691     pEntry->mLFH.write(mZipFp);
692     startPosn = ftell(mZipFp);
693 
694     /*
695      * Copy the data over.
696      *
697      * If the "has data descriptor" flag is set, we want to copy the DD
698      * fields as well.  This is a fixed-size area immediately following
699      * the data.
700      */
701     if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
702     {
703         result = UNKNOWN_ERROR;
704         goto bail;
705     }
706 
707     uncompressedLen = pSourceEntry->getUncompressedLen();
708 
709     if (pSourceEntry->isCompressed()) {
710         void *buf = pSourceZip->uncompress(pSourceEntry);
711         if (buf == NULL) {
712             result = NO_MEMORY;
713             goto bail;
714         }
715         long startPosn = ftell(mZipFp);
716         uint32_t crc;
717         if (compressFpToFp(mZipFp, NULL, buf, uncompressedLen, &crc) != NO_ERROR) {
718             ALOGW("recompress of '%s' failed\n", pEntry->mCDE.mFileName);
719             result = UNKNOWN_ERROR;
720             free(buf);
721             goto bail;
722         }
723         long endPosn = ftell(mZipFp);
724         pEntry->setDataInfo(uncompressedLen, endPosn - startPosn,
725             pSourceEntry->getCRC32(), ZipEntry::kCompressDeflated);
726         free(buf);
727     } else {
728         off_t copyLen;
729         copyLen = pSourceEntry->getCompressedLen();
730         if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
731             copyLen += ZipEntry::kDataDescriptorLen;
732 
733         if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
734             != NO_ERROR)
735         {
736             ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
737             result = UNKNOWN_ERROR;
738             goto bail;
739         }
740     }
741 
742     /*
743      * Update file offsets.
744      */
745     endPosn = ftell(mZipFp);
746 
747     /*
748      * Success!  Fill out new values.
749      */
750     pEntry->setLFHOffset(lfhPosn);
751     mEOCD.mNumEntries++;
752     mEOCD.mTotalNumEntries++;
753     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
754     mEOCD.mCentralDirOffset = endPosn;
755 
756     /*
757      * Go back and write the LFH.
758      */
759     if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
760         result = UNKNOWN_ERROR;
761         goto bail;
762     }
763     pEntry->mLFH.write(mZipFp);
764 
765     /*
766      * Add pEntry to the list.
767      */
768     mEntries.add(pEntry);
769     if (ppEntry != NULL)
770         *ppEntry = pEntry;
771     pEntry = NULL;
772 
773     result = NO_ERROR;
774 
775 bail:
776     delete pEntry;
777     return result;
778 }
779 
780 /*
781  * Copy all of the bytes in "src" to "dst".
782  *
783  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
784  * will be seeked immediately past the data.
785  */
copyFpToFp(FILE * dstFp,FILE * srcFp,uint32_t * pCRC32)786 status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, uint32_t* pCRC32)
787 {
788     uint8_t tmpBuf[32768];
789     size_t count;
790 
791     *pCRC32 = crc32(0L, Z_NULL, 0);
792 
793     while (1) {
794         count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp);
795         if (ferror(srcFp) || ferror(dstFp))
796             return errnoToStatus(errno);
797         if (count == 0)
798             break;
799 
800         *pCRC32 = crc32(*pCRC32, tmpBuf, count);
801 
802         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
803             ALOGD("fwrite %d bytes failed\n", (int) count);
804             return UNKNOWN_ERROR;
805         }
806     }
807 
808     return NO_ERROR;
809 }
810 
811 /*
812  * Copy all of the bytes in "src" to "dst".
813  *
814  * On exit, "dstFp" will be seeked immediately past the data.
815  */
copyDataToFp(FILE * dstFp,const void * data,size_t size,uint32_t * pCRC32)816 status_t ZipFile::copyDataToFp(FILE* dstFp,
817     const void* data, size_t size, uint32_t* pCRC32)
818 {
819     size_t count;
820 
821     *pCRC32 = crc32(0L, Z_NULL, 0);
822     if (size > 0) {
823         *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size);
824         if (fwrite(data, 1, size, dstFp) != size) {
825             ALOGD("fwrite %d bytes failed\n", (int) size);
826             return UNKNOWN_ERROR;
827         }
828     }
829 
830     return NO_ERROR;
831 }
832 
833 /*
834  * Copy some of the bytes in "src" to "dst".
835  *
836  * If "pCRC32" is NULL, the CRC will not be computed.
837  *
838  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
839  * will be seeked immediately past the data just written.
840  */
copyPartialFpToFp(FILE * dstFp,FILE * srcFp,long length,uint32_t * pCRC32)841 status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length,
842     uint32_t* pCRC32)
843 {
844     uint8_t tmpBuf[32768];
845     size_t count;
846 
847     if (pCRC32 != NULL)
848         *pCRC32 = crc32(0L, Z_NULL, 0);
849 
850     while (length) {
851         long readSize;
852 
853         readSize = sizeof(tmpBuf);
854         if (readSize > length)
855             readSize = length;
856 
857         count = fread(tmpBuf, 1, readSize, srcFp);
858         if ((long) count != readSize) {     // error or unexpected EOF
859             ALOGD("fread %d bytes failed\n", (int) readSize);
860             return UNKNOWN_ERROR;
861         }
862 
863         if (pCRC32 != NULL)
864             *pCRC32 = crc32(*pCRC32, tmpBuf, count);
865 
866         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
867             ALOGD("fwrite %d bytes failed\n", (int) count);
868             return UNKNOWN_ERROR;
869         }
870 
871         length -= readSize;
872     }
873 
874     return NO_ERROR;
875 }
876 
877 /*
878  * Compress all of the data in "srcFp" and write it to "dstFp".
879  *
880  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
881  * will be seeked immediately past the compressed data.
882  */
compressFpToFp(FILE * dstFp,FILE * srcFp,const void * data,size_t size,uint32_t * pCRC32)883 status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp,
884     const void* data, size_t size, uint32_t* pCRC32)
885 {
886     status_t result = NO_ERROR;
887     const size_t kBufSize = 1024 * 1024;
888     uint8_t* inBuf = NULL;
889     uint8_t* outBuf = NULL;
890     size_t outSize = 0;
891     bool atEof = false;     // no feof() aviailable yet
892     uint32_t crc;
893     ZopfliOptions options;
894     unsigned char bp = 0;
895 
896     ZopfliInitOptions(&options);
897 
898     crc = crc32(0L, Z_NULL, 0);
899 
900     if (data) {
901         crc = crc32(crc, (const unsigned char*)data, size);
902         ZopfliDeflate(&options, 2, true, (const unsigned char*)data, size, &bp,
903             &outBuf, &outSize);
904     } else {
905         /*
906          * Create an input buffer and an output buffer.
907          */
908         inBuf = new uint8_t[kBufSize];
909         if (inBuf == NULL) {
910             result = NO_MEMORY;
911             goto bail;
912         }
913 
914         /*
915          * Loop while we have data.
916          */
917         do {
918             size_t getSize;
919             getSize = fread(inBuf, 1, kBufSize, srcFp);
920             if (ferror(srcFp)) {
921                 ALOGD("deflate read failed (errno=%d)\n", errno);
922                 delete[] inBuf;
923                 goto bail;
924             }
925             if (getSize < kBufSize) {
926                 ALOGV("+++  got %d bytes, EOF reached\n",
927                     (int)getSize);
928                 atEof = true;
929             }
930 
931             crc = crc32(crc, inBuf, getSize);
932             ZopfliDeflate(&options, 2, atEof, inBuf, getSize, &bp, &outBuf, &outSize);
933         } while (!atEof);
934         delete[] inBuf;
935     }
936 
937     ALOGV("+++ writing %d bytes\n", (int)outSize);
938     if (fwrite(outBuf, 1, outSize, dstFp) != outSize) {
939         ALOGD("write %d failed in deflate\n", (int)outSize);
940         goto bail;
941     }
942 
943     *pCRC32 = crc;
944 
945 bail:
946     free(outBuf);
947 
948     return result;
949 }
950 
951 /*
952  * Mark an entry as deleted.
953  *
954  * We will eventually need to crunch the file down, but if several files
955  * are being removed (perhaps as part of an "update" process) we can make
956  * things considerably faster by deferring the removal to "flush" time.
957  */
remove(ZipEntry * pEntry)958 status_t ZipFile::remove(ZipEntry* pEntry)
959 {
960     /*
961      * Should verify that pEntry is actually part of this archive, and
962      * not some stray ZipEntry from a different file.
963      */
964 
965     /* mark entry as deleted, and mark archive as dirty */
966     pEntry->setDeleted();
967     mNeedCDRewrite = true;
968     return NO_ERROR;
969 }
970 
971 /*
972  * Flush any pending writes.
973  *
974  * In particular, this will crunch out deleted entries, and write the
975  * Central Directory and EOCD if we have stomped on them.
976  */
flush(void)977 status_t ZipFile::flush(void)
978 {
979     status_t result = NO_ERROR;
980     long eocdPosn;
981     int i, count;
982 
983     if (mReadOnly)
984         return INVALID_OPERATION;
985     if (!mNeedCDRewrite)
986         return NO_ERROR;
987 
988     assert(mZipFp != NULL);
989 
990     result = crunchArchive();
991     if (result != NO_ERROR)
992         return result;
993 
994     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0)
995         return UNKNOWN_ERROR;
996 
997     count = mEntries.size();
998     for (i = 0; i < count; i++) {
999         ZipEntry* pEntry = mEntries[i];
1000         pEntry->mCDE.write(mZipFp);
1001     }
1002 
1003     eocdPosn = ftell(mZipFp);
1004     mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset;
1005 
1006     mEOCD.write(mZipFp);
1007 
1008     /*
1009      * If we had some stuff bloat up during compression and get replaced
1010      * with plain files, or if we deleted some entries, there's a lot
1011      * of wasted space at the end of the file.  Remove it now.
1012      */
1013     if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) {
1014         ALOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno));
1015         // not fatal
1016     }
1017 
1018     /* should we clear the "newly added" flag in all entries now? */
1019 
1020     mNeedCDRewrite = false;
1021     return NO_ERROR;
1022 }
1023 
1024 /*
1025  * Crunch deleted files out of an archive by shifting the later files down.
1026  *
1027  * Because we're not using a temp file, we do the operation inside the
1028  * current file.
1029  */
crunchArchive(void)1030 status_t ZipFile::crunchArchive(void)
1031 {
1032     status_t result = NO_ERROR;
1033     int i, count;
1034     long delCount, adjust;
1035 
1036 #if 0
1037     printf("CONTENTS:\n");
1038     for (i = 0; i < (int) mEntries.size(); i++) {
1039         printf(" %d: lfhOff=%ld del=%d\n",
1040             i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted());
1041     }
1042     printf("  END is %ld\n", (long) mEOCD.mCentralDirOffset);
1043 #endif
1044 
1045     /*
1046      * Roll through the set of files, shifting them as appropriate.  We
1047      * could probably get a slight performance improvement by sliding
1048      * multiple files down at once (because we could use larger reads
1049      * when operating on batches of small files), but it's not that useful.
1050      */
1051     count = mEntries.size();
1052     delCount = adjust = 0;
1053     for (i = 0; i < count; i++) {
1054         ZipEntry* pEntry = mEntries[i];
1055         long span;
1056 
1057         if (pEntry->getLFHOffset() != 0) {
1058             long nextOffset;
1059 
1060             /* Get the length of this entry by finding the offset
1061              * of the next entry.  Directory entries don't have
1062              * file offsets, so we need to find the next non-directory
1063              * entry.
1064              */
1065             nextOffset = 0;
1066             for (int ii = i+1; nextOffset == 0 && ii < count; ii++)
1067                 nextOffset = mEntries[ii]->getLFHOffset();
1068             if (nextOffset == 0)
1069                 nextOffset = mEOCD.mCentralDirOffset;
1070             span = nextOffset - pEntry->getLFHOffset();
1071 
1072             assert(span >= ZipEntry::LocalFileHeader::kLFHLen);
1073         } else {
1074             /* This is a directory entry.  It doesn't have
1075              * any actual file contents, so there's no need to
1076              * move anything.
1077              */
1078             span = 0;
1079         }
1080 
1081         //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n",
1082         //    i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count);
1083 
1084         if (pEntry->getDeleted()) {
1085             adjust += span;
1086             delCount++;
1087 
1088             delete pEntry;
1089             mEntries.removeAt(i);
1090 
1091             /* adjust loop control */
1092             count--;
1093             i--;
1094         } else if (span != 0 && adjust > 0) {
1095             /* shuffle this entry back */
1096             //printf("+++ Shuffling '%s' back %ld\n",
1097             //    pEntry->getFileName(), adjust);
1098             result = filemove(mZipFp, pEntry->getLFHOffset() - adjust,
1099                         pEntry->getLFHOffset(), span);
1100             if (result != NO_ERROR) {
1101                 /* this is why you use a temp file */
1102                 ALOGE("error during crunch - archive is toast\n");
1103                 return result;
1104             }
1105 
1106             pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust);
1107         }
1108     }
1109 
1110     /*
1111      * Fix EOCD info.  We have to wait until the end to do some of this
1112      * because we use mCentralDirOffset to determine "span" for the
1113      * last entry.
1114      */
1115     mEOCD.mCentralDirOffset -= adjust;
1116     mEOCD.mNumEntries -= delCount;
1117     mEOCD.mTotalNumEntries -= delCount;
1118     mEOCD.mCentralDirSize = 0;  // mark invalid; set by flush()
1119 
1120     assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries);
1121     assert(mEOCD.mNumEntries == count);
1122 
1123     return result;
1124 }
1125 
1126 /*
1127  * Works like memmove(), but on pieces of a file.
1128  */
filemove(FILE * fp,off_t dst,off_t src,size_t n)1129 status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n)
1130 {
1131     if (dst == src || n <= 0)
1132         return NO_ERROR;
1133 
1134     uint8_t readBuf[32768];
1135 
1136     if (dst < src) {
1137         /* shift stuff toward start of file; must read from start */
1138         while (n != 0) {
1139             size_t getSize = sizeof(readBuf);
1140             if (getSize > n)
1141                 getSize = n;
1142 
1143             if (fseek(fp, (long) src, SEEK_SET) != 0) {
1144                 ALOGD("filemove src seek %ld failed\n", (long) src);
1145                 return UNKNOWN_ERROR;
1146             }
1147 
1148             if (fread(readBuf, 1, getSize, fp) != getSize) {
1149                 ALOGD("filemove read %ld off=%ld failed\n",
1150                     (long) getSize, (long) src);
1151                 return UNKNOWN_ERROR;
1152             }
1153 
1154             if (fseek(fp, (long) dst, SEEK_SET) != 0) {
1155                 ALOGD("filemove dst seek %ld failed\n", (long) dst);
1156                 return UNKNOWN_ERROR;
1157             }
1158 
1159             if (fwrite(readBuf, 1, getSize, fp) != getSize) {
1160                 ALOGD("filemove write %ld off=%ld failed\n",
1161                     (long) getSize, (long) dst);
1162                 return UNKNOWN_ERROR;
1163             }
1164 
1165             src += getSize;
1166             dst += getSize;
1167             n -= getSize;
1168         }
1169     } else {
1170         /* shift stuff toward end of file; must read from end */
1171         assert(false);      // write this someday, maybe
1172         return UNKNOWN_ERROR;
1173     }
1174 
1175     return NO_ERROR;
1176 }
1177 
1178 
1179 /*
1180  * Get the modification time from a file descriptor.
1181  */
getModTime(int fd)1182 time_t ZipFile::getModTime(int fd)
1183 {
1184     struct stat sb;
1185 
1186     if (fstat(fd, &sb) < 0) {
1187         ALOGD("HEY: fstat on fd %d failed\n", fd);
1188         return (time_t) -1;
1189     }
1190 
1191     return sb.st_mtime;
1192 }
1193 
1194 
1195 #if 0       /* this is a bad idea */
1196 /*
1197  * Get a copy of the Zip file descriptor.
1198  *
1199  * We don't allow this if the file was opened read-write because we tend
1200  * to leave the file contents in an uncertain state between calls to
1201  * flush().  The duplicated file descriptor should only be valid for reads.
1202  */
1203 int ZipFile::getZipFd(void) const
1204 {
1205     if (!mReadOnly)
1206         return INVALID_OPERATION;
1207     assert(mZipFp != NULL);
1208 
1209     int fd;
1210     fd = dup(fileno(mZipFp));
1211     if (fd < 0) {
1212         ALOGD("didn't work, errno=%d\n", errno);
1213     }
1214 
1215     return fd;
1216 }
1217 #endif
1218 
1219 
1220 #if 0
1221 /*
1222  * Expand data.
1223  */
1224 bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const
1225 {
1226     return false;
1227 }
1228 #endif
1229 
1230 // free the memory when you're done
uncompress(const ZipEntry * entry) const1231 void* ZipFile::uncompress(const ZipEntry* entry) const
1232 {
1233     size_t unlen = entry->getUncompressedLen();
1234     size_t clen = entry->getCompressedLen();
1235 
1236     void* buf = malloc(unlen);
1237     if (buf == NULL) {
1238         return NULL;
1239     }
1240 
1241     fseek(mZipFp, 0, SEEK_SET);
1242 
1243     off_t offset = entry->getFileOffset();
1244     if (fseek(mZipFp, offset, SEEK_SET) != 0) {
1245         goto bail;
1246     }
1247 
1248     switch (entry->getCompressionMethod())
1249     {
1250         case ZipEntry::kCompressStored: {
1251             ssize_t amt = fread(buf, 1, unlen, mZipFp);
1252             if (amt != (ssize_t)unlen) {
1253                 goto bail;
1254             }
1255 #if 0
1256             printf("data...\n");
1257             const unsigned char* p = (unsigned char*)buf;
1258             const unsigned char* end = p+unlen;
1259             for (int i=0; i<32 && p < end; i++) {
1260                 printf("0x%08x ", (int)(offset+(i*0x10)));
1261                 for (int j=0; j<0x10 && p < end; j++) {
1262                     printf(" %02x", *p);
1263                     p++;
1264                 }
1265                 printf("\n");
1266             }
1267 #endif
1268 
1269             }
1270             break;
1271         case ZipEntry::kCompressDeflated: {
1272             if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) {
1273                 goto bail;
1274             }
1275             }
1276             break;
1277         default:
1278             goto bail;
1279     }
1280     return buf;
1281 
1282 bail:
1283     free(buf);
1284     return NULL;
1285 }
1286 
1287 
1288 /*
1289  * ===========================================================================
1290  *      ZipFile::EndOfCentralDir
1291  * ===========================================================================
1292  */
1293 
1294 /*
1295  * Read the end-of-central-dir fields.
1296  *
1297  * "buf" should be positioned at the EOCD signature, and should contain
1298  * the entire EOCD area including the comment.
1299  */
readBuf(const uint8_t * buf,int len)1300 status_t ZipFile::EndOfCentralDir::readBuf(const uint8_t* buf, int len)
1301 {
1302     /* don't allow re-use */
1303     assert(mComment == NULL);
1304 
1305     if (len < kEOCDLen) {
1306         /* looks like ZIP file got truncated */
1307         ALOGD(" Zip EOCD: expected >= %d bytes, found %d\n",
1308             kEOCDLen, len);
1309         return INVALID_OPERATION;
1310     }
1311 
1312     /* this should probably be an assert() */
1313     if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
1314         return UNKNOWN_ERROR;
1315 
1316     mDiskNumber = ZipEntry::getShortLE(&buf[0x04]);
1317     mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
1318     mNumEntries = ZipEntry::getShortLE(&buf[0x08]);
1319     mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
1320     mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]);
1321     mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
1322     mCommentLen = ZipEntry::getShortLE(&buf[0x14]);
1323 
1324     // TODO: validate mCentralDirOffset
1325 
1326     if (mCommentLen > 0) {
1327         if (kEOCDLen + mCommentLen > len) {
1328             ALOGD("EOCD(%d) + comment(%" PRIu16 ") exceeds len (%d)\n",
1329                 kEOCDLen, mCommentLen, len);
1330             return UNKNOWN_ERROR;
1331         }
1332         mComment = new uint8_t[mCommentLen];
1333         memcpy(mComment, buf + kEOCDLen, mCommentLen);
1334     }
1335 
1336     return NO_ERROR;
1337 }
1338 
1339 /*
1340  * Write an end-of-central-directory section.
1341  */
write(FILE * fp)1342 status_t ZipFile::EndOfCentralDir::write(FILE* fp)
1343 {
1344     uint8_t buf[kEOCDLen];
1345 
1346     ZipEntry::putLongLE(&buf[0x00], kSignature);
1347     ZipEntry::putShortLE(&buf[0x04], mDiskNumber);
1348     ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir);
1349     ZipEntry::putShortLE(&buf[0x08], mNumEntries);
1350     ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries);
1351     ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize);
1352     ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset);
1353     ZipEntry::putShortLE(&buf[0x14], mCommentLen);
1354 
1355     if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen)
1356         return UNKNOWN_ERROR;
1357     if (mCommentLen > 0) {
1358         assert(mComment != NULL);
1359         if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen)
1360             return UNKNOWN_ERROR;
1361     }
1362 
1363     return NO_ERROR;
1364 }
1365 
1366 /*
1367  * Dump the contents of an EndOfCentralDir object.
1368  */
dump(void) const1369 void ZipFile::EndOfCentralDir::dump(void) const
1370 {
1371     ALOGD(" EndOfCentralDir contents:\n");
1372     ALOGD("  diskNum=%" PRIu16 " diskWCD=%" PRIu16 " numEnt=%" PRIu16 " totalNumEnt=%" PRIu16 "\n",
1373         mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries);
1374     ALOGD("  centDirSize=%" PRIu32 " centDirOff=%" PRIu32 " commentLen=%" PRIu32 "\n",
1375         mCentralDirSize, mCentralDirOffset, mCommentLen);
1376 }
1377 
1378