1 // Copyright 2007 Alan Donovan. All rights reserved.
2 //
3 // Author: Alan Donovan <adonovan@google.com>
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 //    http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 // zip.cc -- .zip (.jar) file reading/writing routines.
18 //
19 
20 // See README.txt for details.
21 //
22 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT
23 // for definition of PKZIP file format.
24 
25 #define _FILE_OFFSET_BITS 64  // Support zip files larger than 2GB
26 
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stddef.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/mman.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include <limits>
39 #include <vector>
40 
41 #include "zip.h"
42 #include <zlib.h>
43 
44 #define LOCAL_FILE_HEADER_SIGNATURE           0x04034b50
45 #define CENTRAL_FILE_HEADER_SIGNATURE         0x02014b50
46 #define END_OF_CENTRAL_DIR_SIGNATURE          0x06054b50
47 #define DATA_DESCRIPTOR_SIGNATURE             0x08074b50
48 
49 // version to extract: 1.0 - default value from APPNOTE.TXT.
50 // Output JAR files contain no extra ZIP features, so this is enough.
51 #define ZIP_VERSION_TO_EXTRACT                10
52 #define COMPRESSION_METHOD_STORED             0   // no compression
53 #define COMPRESSION_METHOD_DEFLATED           8
54 
55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3)
56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11)
57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1))
58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \
59   (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \
60   | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \
61   | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED)
62 
63 namespace devtools_ijar {
64 // In the absence of ZIP64 support, zip files are limited to 4GB.
65 // http://www.info-zip.org/FAQ.html#limits
66 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max();
67 
68 static bool ProcessCentralDirEntry(const u1 *&p,
69                                    size_t *compressed_size,
70                                    size_t *uncompressed_size,
71                                    char *filename,
72                                    size_t filename_size,
73                                    u4 *attr,
74                                    u4 *offset);
75 
76 //
77 // A class representing a ZipFile for reading. Its public API is exposed
78 // using the ZipExtractor abstract class.
79 //
80 class InputZipFile : public ZipExtractor {
81  public:
82   InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length,
83                off_t in_offset, const u1* zipdata_in, const u1* central_dir);
84   virtual ~InputZipFile();
85 
GetError()86   virtual const char* GetError() {
87     if (errmsg[0] == 0) {
88       return NULL;
89     }
90     return errmsg;
91   }
92 
93   virtual bool ProcessNext();
94   virtual void Reset();
GetSize()95   virtual size_t GetSize() {
96     return in_length_;
97   }
98 
99   virtual u8 CalculateOutputLength();
100 
101  private:
102   ZipExtractorProcessor *processor;
103 
104   int fd_in;  // Input file descripor
105 
106   // InputZipFile is responsible for maintaining the following
107   // pointers. They are allocated by the Create() method before
108   // the object is actually created using mmap.
109   const u1 * const zipdata_in_;   // start of input file mmap
110   const u1 * zipdata_in_mapped_;  // start of still mapped region
111   const u1 * const central_dir_;  // central directory in input file
112 
113   size_t in_length_;  // size of the input file
114   size_t in_offset_;  // offset  the input file
115 
116   const u1 *p;  // input cursor
117 
118   const u1* central_dir_current_;  // central dir input cursor
119 
120   // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every
121   // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is
122   // not enough, we bail out. We only decompress class files, so they should
123   // be smaller than 64K anyway, but we give a little leeway.
124   // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the
125   // ZIP. It is set to 128M here so we can uncompress the Bazel server with
126   // this library.
127   static const size_t INITIAL_BUFFER_SIZE = 256 * 1024;  // 256K
128   static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024;
129   static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024;
130 
131   // These metadata fields are the fields of the ZIP header of the file being
132   // processed.
133   u2 extract_version_;
134   u2 general_purpose_bit_flag_;
135   u2 compression_method_;
136   u4 uncompressed_size_;
137   u4 compressed_size_;
138   u2 file_name_length_;
139   u2 extra_field_length_;
140   const u1 *file_name_;
141   const u1 *extra_field_;
142 
143   // Administration of memory reserved for decompressed data. We use the same
144   // buffer for each file to avoid some malloc()/free() calls and free the
145   // memory only in the dtor. C-style memory management is used so that we
146   // can call realloc.
147   u1 *uncompressed_data_;
148   size_t uncompressed_data_allocated_;
149 
150   // Copy of the last filename entry - Null-terminated.
151   char filename[PATH_MAX];
152   // The external file attribute field
153   u4 attr;
154 
155   // last error
156   char errmsg[4*PATH_MAX];
157 
error(const char * fmt,...)158   int error(const char *fmt, ...) {
159     va_list ap;
160     va_start(ap, fmt);
161     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
162     va_end(ap);
163     return -1;
164   }
165 
166   // Check that at least n bytes remain in the input file, otherwise
167   // abort with an error message.  "state" is the name of the field
168   // we're about to read, for diagnostics.
EnsureRemaining(size_t n,const char * state)169   int EnsureRemaining(size_t n, const char *state) {
170     size_t in_offset = p - zipdata_in_;
171     size_t remaining = in_length_ - in_offset;
172     if (n > remaining) {
173       return error("Premature end of file (at offset %zd, state=%s); "
174                    "expected %zd more bytes but found %zd.\n",
175                    in_offset, state, n, remaining);
176     }
177     return 0;
178   }
179 
180   // Read one entry from input zip file
181   int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size);
182 
183   // Uncompress a file from the archive using zlib. The pointer returned
184   // is owned by InputZipFile, so it must not be freed. Advances the input
185   // cursor to the first byte after the compressed data.
186   u1* UncompressFile();
187 
188   // Skip a file
189   int SkipFile(const bool compressed);
190 
191   // Process a file
192   int ProcessFile(const bool compressed);
193 };
194 
195 //
196 // A class implementing ZipBuilder that represent an open zip file for writing.
197 //
198 class OutputZipFile : public ZipBuilder {
199  public:
OutputZipFile(int fd,u1 * const zipdata_out)200   OutputZipFile(int fd, u1 * const zipdata_out) :
201       fd_out(fd),
202       zipdata_out_(zipdata_out),
203       q(zipdata_out) {
204     errmsg[0] = 0;
205   }
206 
GetError()207   virtual const char* GetError() {
208     if (errmsg[0] == 0) {
209       return NULL;
210     }
211     return errmsg;
212   }
213 
~OutputZipFile()214   virtual ~OutputZipFile() { Finish(); }
215   virtual u1* NewFile(const char* filename, const u4 attr);
216   virtual int FinishFile(size_t filelength, bool compress = false,
217                          bool compute_crc = false);
218   virtual int WriteEmptyFile(const char *filename);
GetSize()219   virtual size_t GetSize() {
220     return Offset(q);
221   }
GetNumberFiles()222   virtual int GetNumberFiles() {
223     return entries_.size();
224   }
225   virtual int Finish();
226 
227  private:
228   struct LocalFileEntry {
229     // Start of the local header (in the output buffer).
230     size_t local_header_offset;
231 
232     // Sizes of the file entry
233     size_t uncompressed_length;
234     size_t compressed_length;
235 
236     // Compression method
237     u2 compression_method;
238 
239     // CRC32
240     u4 crc32;
241 
242     // external attributes field
243     u4 external_attr;
244 
245     // Start/length of the file_name in the local header.
246     u1 *file_name;
247     u2 file_name_length;
248 
249     // Start/length of the extra_field in the local header.
250     const u1 *extra_field;
251     u2 extra_field_length;
252   };
253 
254   int fd_out;  // file descriptor for the output file
255 
256   // OutputZipFile is responsible for maintaining the following
257   // pointers. They are allocated by the Create() method before
258   // the object is actually created using mmap.
259   u1 * const zipdata_out_;        // start of output file mmap
260   u1 *q;  // output cursor
261 
262   u1 *header_ptr;  // Current pointer to "compression method" entry.
263 
264   // List of entries to write the central directory
265   std::vector<LocalFileEntry*> entries_;
266 
267   // last error
268   char errmsg[4*PATH_MAX];
269 
error(const char * fmt,...)270   int error(const char *fmt, ...) {
271     va_list ap;
272     va_start(ap, fmt);
273     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
274     va_end(ap);
275     return -1;
276   }
277 
278   // Write the ZIP central directory structure for each local file
279   // entry in "entries".
280   void WriteCentralDirectory();
281 
282   // Returns the offset of the pointer relative to the start of the
283   // output zip file.
Offset(const u1 * const x)284   size_t Offset(const u1 *const x) {
285     return x - zipdata_out_;
286   }
287 
288   // Write ZIP file header in the output. Since the compressed size is not
289   // known in advance, it must be recorded later. This method returns a pointer
290   // to "compressed size" in the file header that should be passed to
291   // WriteFileSizeInLocalFileHeader() later.
292   u1* WriteLocalFileHeader(const char *filename, const u4 attr);
293 
294   // Fill in the "compressed size" and "uncompressed size" fields in a local
295   // file header previously written by WriteLocalFileHeader().
296   size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr,
297                                         size_t out_length,
298                                         bool compress = false,
299                                         const u4 crc = 0);
300 };
301 
302 //
303 // Implementation of InputZipFile
304 //
ProcessNext()305 bool InputZipFile::ProcessNext() {
306   // Process the next entry in the central directory. Also make sure that the
307   // content pointer is in sync.
308   size_t compressed, uncompressed;
309   u4 offset;
310   if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed,
311                               filename, PATH_MAX, &attr, &offset)) {
312     return false;
313   }
314 
315   // There might be an offset specified in the central directory that does
316   // not match the file offset, if so, correct the pointer.
317   if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) {
318     p = zipdata_in_ + offset;
319   }
320 
321   if (EnsureRemaining(4, "signature") < 0) {
322     return false;
323   }
324   u4 signature = get_u4le(p);
325   if (signature == LOCAL_FILE_HEADER_SIGNATURE) {
326     if (ProcessLocalFileEntry(compressed, uncompressed) < 0) {
327       return false;
328     }
329   } else {
330     error("local file header signature for file %s not found\n", filename);
331     return false;
332   }
333 
334   return true;
335 }
336 
ProcessLocalFileEntry(size_t compressed_size,size_t uncompressed_size)337 int InputZipFile::ProcessLocalFileEntry(
338     size_t compressed_size, size_t uncompressed_size) {
339   if (EnsureRemaining(26, "extract_version") < 0) {
340     return -1;
341   }
342   extract_version_ = get_u2le(p);
343   general_purpose_bit_flag_ = get_u2le(p);
344 
345   if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) {
346     return error("Unsupported value (0x%04x) in general purpose bit flag.\n",
347                  general_purpose_bit_flag_);
348   }
349 
350   compression_method_ = get_u2le(p);
351 
352   if (compression_method_ != COMPRESSION_METHOD_DEFLATED &&
353       compression_method_ != COMPRESSION_METHOD_STORED) {
354     return error("Unsupported compression method (%d).\n",
355                  compression_method_);
356   }
357 
358   // skip over: last_mod_file_time, last_mod_file_date, crc32
359   p += 2 + 2 + 4;
360   compressed_size_ = get_u4le(p);
361   uncompressed_size_ = get_u4le(p);
362   file_name_length_ = get_u2le(p);
363   extra_field_length_ = get_u2le(p);
364 
365   if (EnsureRemaining(file_name_length_, "file_name") < 0) {
366     return -1;
367   }
368   file_name_ = p;
369   p += file_name_length_;
370 
371   if (EnsureRemaining(extra_field_length_, "extra_field") < 0) {
372     return -1;
373   }
374   extra_field_ = p;
375   p += extra_field_length_;
376 
377   bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED;
378 
379   // If the zip is compressed, compressed and uncompressed size members are
380   // zero in the local file header. If not, check that they are the same as the
381   // lengths from the central directory, otherwise, just believe the central
382   // directory
383   if (compressed_size_ == 0) {
384     compressed_size_ = compressed_size;
385   } else {
386     if (compressed_size_ != compressed_size) {
387       return error("central directory and file header inconsistent\n");
388     }
389   }
390 
391   if (uncompressed_size_ == 0) {
392     uncompressed_size_ = uncompressed_size;
393   } else {
394     if (uncompressed_size_ != uncompressed_size) {
395       return error("central directory and file header inconsistent\n");
396     }
397   }
398 
399   if (processor->Accept(filename, attr)) {
400     if (ProcessFile(is_compressed) < 0) {
401       return -1;
402     }
403   } else {
404     if (SkipFile(is_compressed) < 0) {
405       return -1;
406     }
407   }
408 
409   if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) {
410     // Skip the data descriptor. Some implementations do not put the signature
411     // here, so check if the next 4 bytes are a signature, and if so, skip the
412     // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip
413     // the next 8 bytes (because the value just read was the CRC).
414     u4 signature = get_u4le(p);
415     if (signature == DATA_DESCRIPTOR_SIGNATURE) {
416       p += 4 * 3;
417     } else {
418       p += 4 * 2;
419     }
420   }
421 
422   if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) {
423     munmap(const_cast<u1 *>(zipdata_in_mapped_), MAX_MAPPED_REGION);
424     zipdata_in_mapped_ += MAX_MAPPED_REGION;
425   }
426 
427   return 0;
428 }
429 
SkipFile(const bool compressed)430 int InputZipFile::SkipFile(const bool compressed) {
431   if (!compressed) {
432     // In this case, compressed_size_ == uncompressed_size_ (since the file is
433     // uncompressed), so we can use either.
434     if (compressed_size_ != uncompressed_size_) {
435       return error("compressed size != uncompressed size, although the file "
436                    "is uncompressed.\n");
437     }
438   }
439 
440   if (EnsureRemaining(compressed_size_, "file_data") < 0) {
441     return -1;
442   }
443   p += compressed_size_;
444   return 0;
445 }
446 
UncompressFile()447 u1* InputZipFile::UncompressFile() {
448   size_t in_offset = p - zipdata_in_;
449   size_t remaining = in_length_ - in_offset;
450   z_stream stream;
451 
452   stream.zalloc = Z_NULL;
453   stream.zfree = Z_NULL;
454   stream.opaque = Z_NULL;
455   stream.avail_in = remaining;
456   stream.next_in = (Bytef *) p;
457 
458   int ret = inflateInit2(&stream, -MAX_WBITS);
459   if (ret != Z_OK) {
460     error("inflateInit: %d\n", ret);
461     return NULL;
462   }
463 
464   int uncompressed_until_now = 0;
465 
466   while (true) {
467     stream.avail_out = uncompressed_data_allocated_ - uncompressed_until_now;
468     stream.next_out = uncompressed_data_ + uncompressed_until_now;
469     int old_avail_out = stream.avail_out;
470 
471     ret = inflate(&stream, Z_SYNC_FLUSH);
472     int uncompressed_now = old_avail_out - stream.avail_out;
473     uncompressed_until_now += uncompressed_now;
474 
475     switch (ret) {
476       case Z_STREAM_END: {
477         // zlib said that there is no more data to decompress.
478 
479         u1 *new_p = reinterpret_cast<u1*>(stream.next_in);
480         compressed_size_ = new_p - p;
481         uncompressed_size_ = uncompressed_until_now;
482         p = new_p;
483         inflateEnd(&stream);
484         return uncompressed_data_;
485       }
486 
487       case Z_OK: {
488         // zlib said that there is no more room in the buffer allocated for
489         // the decompressed data. Enlarge that buffer and try again.
490 
491         if (uncompressed_data_allocated_ == MAX_BUFFER_SIZE) {
492           error("ijar does not support decompressing files "
493                 "larger than %dMB.\n",
494                 (int) (MAX_BUFFER_SIZE/(1024*1024)));
495           return NULL;
496         }
497 
498         uncompressed_data_allocated_ *= 2;
499         if (uncompressed_data_allocated_ > MAX_BUFFER_SIZE) {
500           uncompressed_data_allocated_ = MAX_BUFFER_SIZE;
501         }
502 
503         uncompressed_data_ = reinterpret_cast<u1*>(
504             realloc(uncompressed_data_, uncompressed_data_allocated_));
505         break;
506       }
507 
508       case Z_DATA_ERROR:
509       case Z_BUF_ERROR:
510       case Z_STREAM_ERROR:
511       case Z_NEED_DICT:
512       default: {
513         error("zlib returned error code %d during inflate.\n", ret);
514         return NULL;
515       }
516     }
517   }
518 }
519 
ProcessFile(const bool compressed)520 int InputZipFile::ProcessFile(const bool compressed) {
521   const u1 *file_data;
522   if (compressed) {
523     file_data = UncompressFile();
524     if (file_data == NULL) {
525       return -1;
526     }
527   } else {
528     // In this case, compressed_size_ == uncompressed_size_ (since the file is
529     // uncompressed), so we can use either.
530     if (compressed_size_ != uncompressed_size_) {
531       return error("compressed size != uncompressed size, although the file "
532                    "is uncompressed.\n");
533     }
534 
535     if (EnsureRemaining(compressed_size_, "file_data") < 0) {
536       return -1;
537     }
538     file_data = p;
539     p += compressed_size_;
540   }
541   processor->Process(filename, attr, file_data, uncompressed_size_);
542   return 0;
543 }
544 
545 
546 // Reads and returns some metadata of the next file from the central directory:
547 // - compressed size
548 // - uncompressed size
549 // - whether the entry is a class file (to be included in the output).
550 // Precondition: p points to the beginning of an entry in the central dir
551 // Postcondition: p points to the beginning of the next entry in the central dir
552 // Returns true if the central directory contains another file and false if not.
553 // Of course, in the latter case, the size output variables are not changed.
554 // Note that the central directory is always followed by another data structure
555 // that has a signature, so parsing it this way is safe.
ProcessCentralDirEntry(const u1 * & p,size_t * compressed_size,size_t * uncompressed_size,char * filename,size_t filename_size,u4 * attr,u4 * offset)556 static bool ProcessCentralDirEntry(
557     const u1 *&p, size_t *compressed_size, size_t *uncompressed_size,
558     char *filename, size_t filename_size, u4 *attr, u4 *offset) {
559   u4 signature = get_u4le(p);
560   if (signature != CENTRAL_FILE_HEADER_SIGNATURE) {
561     return false;
562   }
563 
564   p += 16;  // skip to 'compressed size' field
565   *compressed_size = get_u4le(p);
566   *uncompressed_size = get_u4le(p);
567   u2 file_name_length = get_u2le(p);
568   u2 extra_field_length = get_u2le(p);
569   u2 file_comment_length = get_u2le(p);
570   p += 4;  // skip to external file attributes field
571   *attr = get_u4le(p);
572   *offset = get_u4le(p);
573   {
574     size_t len = (file_name_length < filename_size)
575       ? file_name_length
576       : (filename_size - 1);
577     memcpy(reinterpret_cast<void*>(filename), p, len);
578     filename[len] = 0;
579   }
580   p += file_name_length;
581   p += extra_field_length;
582   p += file_comment_length;
583   return true;
584 }
585 
586 // Gives a maximum bound on the size of the interface JAR. Basically, adds
587 // the difference between the compressed and uncompressed sizes to the size
588 // of the input file.
CalculateOutputLength()589 u8 InputZipFile::CalculateOutputLength() {
590   const u1* current = central_dir_;
591 
592   u8 compressed_size = 0;
593   u8 uncompressed_size = 0;
594   u8 skipped_compressed_size = 0;
595   u4 attr;
596   u4 offset;
597   char filename[PATH_MAX];
598 
599   while (true) {
600     size_t file_compressed, file_uncompressed;
601     if (!ProcessCentralDirEntry(current,
602                                 &file_compressed, &file_uncompressed,
603                                 filename, PATH_MAX, &attr, &offset)) {
604       break;
605     }
606 
607     if (processor->Accept(filename, attr)) {
608       compressed_size += (u8) file_compressed;
609       uncompressed_size += (u8) file_uncompressed;
610     } else {
611       skipped_compressed_size += file_compressed;
612     }
613   }
614 
615   // The worst case is when the output is simply the input uncompressed. The
616   // metadata in the zip file will stay the same, so the file will grow by the
617   // difference between the compressed and uncompressed sizes.
618   return (u8) in_length_ - skipped_compressed_size
619       + (uncompressed_size - compressed_size);
620 }
621 
622 // Given the data in the zip file, returns the offset of the central directory
623 // and the number of files contained in it.
FindZipCentralDirectory(const u1 * bytes,size_t in_length,u4 * offset,const u1 ** central_dir)624 bool FindZipCentralDirectory(const u1* bytes, size_t in_length,
625                              u4* offset, const u1** central_dir) {
626   static const int MAX_COMMENT_LENGTH = 0xffff;
627   static const int CENTRAL_DIR_LOCATOR_SIZE = 22;
628   // Maximum distance of start of central dir locator from end of file
629   static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE;
630   const u1* last_pos_to_check = in_length < MAX_DELTA
631       ? bytes
632       : bytes + (in_length - MAX_DELTA);
633   const u1* current;
634   bool found = false;
635 
636   for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE;
637        current >= last_pos_to_check;
638        current-- ) {
639     const u1* p = current;
640     if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) {
641       continue;
642     }
643 
644     p += 16;  // skip to comment length field
645     u2 comment_length = get_u2le(p);
646 
647     // Does the comment go exactly till the end of the file?
648     if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE
649         != bytes + in_length) {
650       continue;
651     }
652 
653     // Hooray, we found it!
654     found = true;
655     break;
656   }
657 
658   if (!found) {
659     fprintf(stderr, "file is invalid or corrupted (missing end of central "
660                     "directory record)\n");
661     return false;
662   }
663 
664   const u1* end_of_central_dir = current;
665   get_u4le(current);  // central directory locator signature, already checked
666   u2 number_of_this_disk = get_u2le(current);
667   u2 disk_with_central_dir = get_u2le(current);
668   u2 central_dir_entries_on_this_disk = get_u2le(current);
669   u2 central_dir_entries = get_u2le(current);
670   u4 central_dir_size = get_u4le(current);
671   u4 central_dir_offset = get_u4le(current);
672   u2 file_comment_length = get_u2le(current);
673   current += file_comment_length;  // set current to the end of the central dir
674 
675   if (number_of_this_disk != 0
676     || disk_with_central_dir != 0
677     || central_dir_entries_on_this_disk != central_dir_entries) {
678     fprintf(stderr, "multi-disk JAR files are not supported\n");
679     return false;
680   }
681 
682   // Do not change output values before determining that they are OK.
683   *offset = central_dir_offset;
684   // Central directory start can then be used to determine the actual
685   // starts of the zip file (which can be different in case of a non-zip
686   // header like for auto-extractable binaries).
687   *central_dir = end_of_central_dir - central_dir_size;
688   return true;
689 }
690 
Reset()691 void InputZipFile::Reset() {
692   central_dir_current_ = central_dir_;
693   zipdata_in_mapped_ = zipdata_in_;
694   p = zipdata_in_ + in_offset_;
695 }
696 
ProcessAll()697 int ZipExtractor::ProcessAll() {
698   while (ProcessNext()) {}
699   if (GetError() != NULL) {
700     return -1;
701   }
702   return 0;
703 }
704 
Create(const char * filename,ZipExtractorProcessor * processor)705 ZipExtractor* ZipExtractor::Create(const char* filename,
706                                    ZipExtractorProcessor *processor) {
707   int fd_in = open(filename, O_RDONLY);
708   if (fd_in < 0) {
709     return NULL;
710   }
711 
712   off_t length = lseek(fd_in, 0, SEEK_END);
713   if (length < 0) {
714     return NULL;
715   }
716 
717   void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0);
718   if (zipdata_in == MAP_FAILED) {
719     return NULL;
720   }
721 
722   u4 central_dir_offset;
723   const u1 *central_dir = NULL;
724 
725   if (!devtools_ijar::FindZipCentralDirectory(
726           static_cast<const u1*>(zipdata_in), length,
727           &central_dir_offset, &central_dir)) {
728     errno = EIO;  // we don't really have a good error number
729     return NULL;
730   }
731   const u1 *zipdata_start = static_cast<const u1*>(zipdata_in);
732   off_t offset = - static_cast<off_t>(zipdata_start
733                                       + central_dir_offset
734                                       - central_dir);
735 
736   return new InputZipFile(processor, fd_in, length, offset,
737                           zipdata_start, central_dir);
738 }
739 
InputZipFile(ZipExtractorProcessor * processor,int fd,off_t in_length,off_t in_offset,const u1 * zipdata_in,const u1 * central_dir)740 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd,
741                            off_t in_length, off_t in_offset,
742                            const u1* zipdata_in, const u1* central_dir)
743   : processor(processor), fd_in(fd),
744     zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in),
745     central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset),
746     p(zipdata_in + in_offset), central_dir_current_(central_dir) {
747   uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE;
748   uncompressed_data_ =
749     reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_));
750   errmsg[0] = 0;
751 }
752 
~InputZipFile()753 InputZipFile::~InputZipFile() {
754   free(uncompressed_data_);
755   close(fd_in);
756 }
757 
758 
759 //
760 // Implementation of OutputZipFile
761 //
WriteEmptyFile(const char * filename)762 int OutputZipFile::WriteEmptyFile(const char *filename) {
763   const u1* file_name = (const u1*) filename;
764   size_t file_name_length = strlen(filename);
765 
766   LocalFileEntry *entry = new LocalFileEntry;
767   entry->local_header_offset = Offset(q);
768   entry->external_attr = 0;
769   entry->crc32 = 0;
770 
771   // Output the ZIP local_file_header:
772   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
773   put_u2le(q, 10);  // extract_version
774   put_u2le(q, 0);  // general_purpose_bit_flag
775   put_u2le(q, 0);  // compression_method
776   put_u2le(q, 0);  // last_mod_file_time
777   put_u2le(q, 0);  // last_mod_file_date
778   put_u4le(q, entry->crc32);  // crc32
779   put_u4le(q, 0);  // compressed_size
780   put_u4le(q, 0);  // uncompressed_size
781   put_u2le(q, file_name_length);
782   put_u2le(q, 0);  // extra_field_length
783   put_n(q, file_name, file_name_length);
784 
785   entry->file_name_length = file_name_length;
786   entry->extra_field_length = 0;
787   entry->compressed_length = 0;
788   entry->uncompressed_length = 0;
789   entry->compression_method = 0;
790   entry->extra_field = (const u1 *)"";
791   entry->file_name = (u1*) strdup((const char *) file_name);
792   entries_.push_back(entry);
793 
794   return 0;
795 }
796 
WriteCentralDirectory()797 void OutputZipFile::WriteCentralDirectory() {
798   // central directory:
799   const u1 *central_directory_start = q;
800   for (size_t ii = 0; ii < entries_.size(); ++ii) {
801     LocalFileEntry *entry = entries_[ii];
802     put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE);
803     put_u2le(q, 0);  // version made by
804 
805     put_u2le(q, ZIP_VERSION_TO_EXTRACT);  // version to extract
806     put_u2le(q, 0);  // general purpose bit flag
807     put_u2le(q, entry->compression_method);  // compression method:
808     put_u2le(q, 0);                          // last_mod_file_time
809     put_u2le(q, 0);  // last_mod_file_date
810     put_u4le(q, entry->crc32);  // crc32
811     put_u4le(q, entry->compressed_length);    // compressed_size
812     put_u4le(q, entry->uncompressed_length);  // uncompressed_size
813     put_u2le(q, entry->file_name_length);
814     put_u2le(q, entry->extra_field_length);
815 
816     put_u2le(q, 0);  // file comment length
817     put_u2le(q, 0);  // disk number start
818     put_u2le(q, 0);  // internal file attributes
819     put_u4le(q, entry->external_attr);  // external file attributes
820     // relative offset of local header:
821     put_u4le(q, entry->local_header_offset);
822 
823     put_n(q, entry->file_name, entry->file_name_length);
824     put_n(q, entry->extra_field, entry->extra_field_length);
825   }
826   u4 central_directory_size = q - central_directory_start;
827 
828   put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE);
829   put_u2le(q, 0);  // number of this disk
830   put_u2le(q, 0);  // number of the disk with the start of the central directory
831   put_u2le(q, entries_.size());  // # central dir entries on this disk
832   put_u2le(q, entries_.size());  // total # entries in the central directory
833   put_u4le(q, central_directory_size);  // size of the central directory
834   put_u4le(q, Offset(central_directory_start));  // offset of start of central
835                                                  // directory wrt starting disk
836   put_u2le(q, 0);  // .ZIP file comment length
837 }
838 
WriteLocalFileHeader(const char * filename,const u4 attr)839 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) {
840   off_t file_name_length_ = strlen(filename);
841   LocalFileEntry *entry = new LocalFileEntry;
842   entry->local_header_offset = Offset(q);
843   entry->file_name_length = file_name_length_;
844   entry->file_name = new u1[file_name_length_];
845   entry->external_attr = attr;
846   memcpy(entry->file_name, filename, file_name_length_);
847   entry->extra_field_length = 0;
848   entry->extra_field = (const u1 *)"";
849 
850   // Output the ZIP local_file_header:
851   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
852   put_u2le(q, ZIP_VERSION_TO_EXTRACT);     // version to extract
853   put_u2le(q, 0);                          // general purpose bit flag
854   u1 *header_ptr = q;
855   put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method = placeholder
856   put_u2le(q, 0);                          // last_mod_file_time
857   put_u2le(q, 0);                          // last_mod_file_date
858   put_u4le(q, entry->crc32);               // crc32
859   put_u4le(q, 0);  // compressed_size = placeholder
860   put_u4le(q, 0);  // uncompressed_size = placeholder
861   put_u2le(q, entry->file_name_length);
862   put_u2le(q, entry->extra_field_length);
863 
864   put_n(q, entry->file_name, entry->file_name_length);
865   put_n(q, entry->extra_field, entry->extra_field_length);
866   entries_.push_back(entry);
867 
868   return header_ptr;
869 }
870 
871 // Try to compress a file entry in memory using the deflate algorithm.
872 // It will compress buf (of size length) unless the compressed size is bigger
873 // than the input size. The result will overwrite the content of buf and the
874 // final size is returned.
TryDeflate(u1 * buf,size_t length)875 size_t TryDeflate(u1 *buf, size_t length) {
876   u1 *outbuf = reinterpret_cast<u1 *>(malloc(length));
877   z_stream stream;
878 
879   // Initialize the z_stream strcut for reading from buf and wrinting in outbuf.
880   stream.zalloc = Z_NULL;
881   stream.zfree = Z_NULL;
882   stream.opaque = Z_NULL;
883   stream.total_in = length;
884   stream.avail_in = length;
885   stream.total_out = length;
886   stream.avail_out = length;
887   stream.next_in = buf;
888   stream.next_out = outbuf;
889 
890   // deflateInit2 negative windows size prevent the zlib wrapper to be used.
891   if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
892                   -MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
893     // Failure to compress => return the buffer uncompressed
894     free(outbuf);
895     return length;
896   }
897 
898   if (deflate(&stream, Z_FINISH) == Z_STREAM_END) {
899     // Compression successful and fits in outbuf, let's copy the result in buf.
900     length = stream.total_out;
901     memcpy(buf, outbuf, length);
902   }
903 
904   deflateEnd(&stream);
905   free(outbuf);
906 
907   // Return the length of the resulting buffer
908   return length;
909 }
910 
WriteFileSizeInLocalFileHeader(u1 * header_ptr,size_t out_length,bool compress,const u4 crc)911 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr,
912                                                      size_t out_length,
913                                                      bool compress,
914                                                      const u4 crc) {
915   size_t compressed_size = out_length;
916   if (compress) {
917     compressed_size = TryDeflate(q, out_length);
918   }
919   // compression method
920   if (compressed_size < out_length) {
921     put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED);
922   } else {
923     put_u2le(header_ptr, COMPRESSION_METHOD_STORED);
924   }
925   header_ptr += 4;
926   put_u4le(header_ptr, crc);              // crc32
927   put_u4le(header_ptr, compressed_size);  // compressed_size
928   put_u4le(header_ptr, out_length);       // uncompressed_size
929   return compressed_size;
930 }
931 
Finish()932 int OutputZipFile::Finish() {
933   if (fd_out > 0) {
934     WriteCentralDirectory();
935     if (ftruncate(fd_out, GetSize()) < 0) {
936       return error("ftruncate(fd_out, GetSize()): %s", strerror(errno));
937     }
938     if (close(fd_out) < 0) {
939       return error("close(fd_out): %s", strerror(errno));
940     }
941     fd_out = -1;
942   }
943   return 0;
944 }
945 
NewFile(const char * filename,const u4 attr)946 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) {
947   header_ptr = WriteLocalFileHeader(filename, attr);
948   return q;
949 }
950 
FinishFile(size_t filelength,bool compress,bool compute_crc)951 int OutputZipFile::FinishFile(size_t filelength, bool compress,
952                               bool compute_crc) {
953   u4 crc = 0;
954   if (compute_crc) {
955     crc = crc32(crc, q, filelength);
956   }
957   size_t compressed_size =
958       WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc);
959   entries_.back()->crc32 = crc;
960   entries_.back()->compressed_length = compressed_size;
961   entries_.back()->uncompressed_length = filelength;
962   if (compressed_size < filelength) {
963     entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED;
964   } else {
965     entries_.back()->compression_method = COMPRESSION_METHOD_STORED;
966   }
967   q += compressed_size;
968   return 0;
969 }
970 
Create(const char * zip_file,u8 estimated_size)971 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) {
972   if (estimated_size > kMaximumOutputSize) {
973     fprintf(stderr,
974             "Uncompressed input jar has size %llu, "
975             "which exceeds the maximum supported output size %llu.\n"
976             "Assuming that ijar will be smaller and hoping for the best.\n",
977             estimated_size, kMaximumOutputSize);
978     estimated_size = kMaximumOutputSize;
979   }
980 
981   int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644);
982   if (fd_out < 0) {
983     return NULL;
984   }
985 
986   // Create mmap-able sparse file
987   if (ftruncate(fd_out, estimated_size) < 0) {
988     return NULL;
989   }
990 
991   // Ensure that any buffer overflow in JarStripper will result in
992   // SIGSEGV or SIGBUS by over-allocating beyond the end of the file.
993   size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE),
994                                 (u8) std::numeric_limits<size_t>::max());
995 
996   void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE,
997                            MAP_SHARED, fd_out, 0);
998   if (zipdata_out == MAP_FAILED) {
999     fprintf(stderr, "output_length=%llu\n", estimated_size);
1000     return NULL;
1001   }
1002 
1003   return new OutputZipFile(fd_out, (u1*) zipdata_out);
1004 }
1005 
EstimateSize(char ** files)1006 u8 ZipBuilder::EstimateSize(char **files) {
1007   struct stat statst;
1008   // Digital signature field size = 6, End of central directory = 22, Total = 28
1009   u8 size = 28;
1010   // Count the size of all the files in the input to estimate the size of the
1011   // output.
1012   for (int i = 0; files[i] != NULL; i++) {
1013     if (stat(files[i], &statst) != 0) {
1014       fprintf(stderr, "File %s does not seem to exist.", files[i]);
1015       return 0;
1016     }
1017     size += statst.st_size;
1018     // Add sizes of Zip meta data
1019     // local file header = 30 bytes
1020     // data descriptor = 12 bytes
1021     // central directory descriptor = 46 bytes
1022     //    Total: 88bytes
1023     size += 88;
1024     // The filename is stored twice (once in the central directory
1025     // and once in the local file header).
1026     size += strlen(files[i]) * 2;
1027   }
1028   return size;
1029 }
1030 
1031 }  // namespace devtools_ijar
1032