1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/zlib/google/zip_reader.h"
6 
7 #include <utility>
8 
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/threading/sequenced_task_runner_handle.h"
16 #include "build/build_config.h"
17 #include "third_party/zlib/google/zip_internal.h"
18 
19 #if defined(USE_SYSTEM_MINIZIP)
20 #include <minizip/unzip.h>
21 #else
22 #include "third_party/zlib/contrib/minizip/unzip.h"
23 #if defined(OS_WIN)
24 #include "third_party/zlib/contrib/minizip/iowin32.h"
25 #endif  // defined(OS_WIN)
26 #endif  // defined(USE_SYSTEM_MINIZIP)
27 
28 namespace zip {
29 
30 namespace {
31 
32 // StringWriterDelegate --------------------------------------------------------
33 
34 // A writer delegate that writes no more than |max_read_bytes| to a given
35 // std::string.
36 class StringWriterDelegate : public WriterDelegate {
37  public:
38   StringWriterDelegate(size_t max_read_bytes, std::string* output);
39   ~StringWriterDelegate() override;
40 
41   // WriterDelegate methods:
42 
43   // Returns true.
44   bool PrepareOutput() override;
45 
46   // Appends |num_bytes| bytes from |data| to the output string. Returns false
47   // if |num_bytes| will cause the string to exceed |max_read_bytes|.
48   bool WriteBytes(const char* data, int num_bytes) override;
49 
50   void SetTimeModified(const base::Time& time) override;
51 
52  private:
53   size_t max_read_bytes_;
54   std::string* output_;
55 
56   DISALLOW_COPY_AND_ASSIGN(StringWriterDelegate);
57 };
58 
StringWriterDelegate(size_t max_read_bytes,std::string * output)59 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
60                                            std::string* output)
61     : max_read_bytes_(max_read_bytes),
62       output_(output) {
63 }
64 
~StringWriterDelegate()65 StringWriterDelegate::~StringWriterDelegate() {
66 }
67 
PrepareOutput()68 bool StringWriterDelegate::PrepareOutput() {
69   return true;
70 }
71 
WriteBytes(const char * data,int num_bytes)72 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
73   if (output_->size() + num_bytes > max_read_bytes_)
74     return false;
75   output_->append(data, num_bytes);
76   return true;
77 }
78 
SetTimeModified(const base::Time & time)79 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
80   // Do nothing.
81 }
82 
83 }  // namespace
84 
85 // TODO(satorux): The implementation assumes that file names in zip files
86 // are encoded in UTF-8. This is true for zip files created by Zip()
87 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)88 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
89                                 const unz_file_info& raw_file_info)
90     : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
91       is_directory_(false),
92       is_unsafe_(false),
93       is_encrypted_(false) {
94   original_size_ = raw_file_info.uncompressed_size;
95 
96   // Directory entries in zip files end with "/".
97   is_directory_ = base::EndsWith(file_name_in_zip, "/",
98                                  base::CompareCase::INSENSITIVE_ASCII);
99 
100   // Check the file name here for directory traversal issues.
101   is_unsafe_ = file_path_.ReferencesParent();
102 
103   // We also consider that the file name is unsafe, if it's invalid UTF-8.
104   std::u16string file_name_utf16;
105   if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
106                          &file_name_utf16)) {
107     is_unsafe_ = true;
108   }
109 
110   // We also consider that the file name is unsafe, if it's absolute.
111   // On Windows, IsAbsolute() returns false for paths starting with "/".
112   if (file_path_.IsAbsolute() ||
113       base::StartsWith(file_name_in_zip, "/",
114                        base::CompareCase::INSENSITIVE_ASCII))
115     is_unsafe_ = true;
116 
117   // Whether the file is encrypted is bit 0 of the flag.
118   is_encrypted_ = raw_file_info.flag & 1;
119 
120   // Construct the last modified time. The timezone info is not present in
121   // zip files, so we construct the time as local time.
122   base::Time::Exploded exploded_time = {};  // Zero-clear.
123   exploded_time.year = raw_file_info.tmu_date.tm_year;
124   // The month in zip file is 0-based, whereas ours is 1-based.
125   exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
126   exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
127   exploded_time.hour = raw_file_info.tmu_date.tm_hour;
128   exploded_time.minute = raw_file_info.tmu_date.tm_min;
129   exploded_time.second = raw_file_info.tmu_date.tm_sec;
130   exploded_time.millisecond = 0;
131 
132   if (!base::Time::FromLocalExploded(exploded_time, &last_modified_))
133     last_modified_ = base::Time::UnixEpoch();
134 }
135 
ZipReader()136 ZipReader::ZipReader() {
137   Reset();
138 }
139 
~ZipReader()140 ZipReader::~ZipReader() {
141   Close();
142 }
143 
Open(const base::FilePath & zip_file_path)144 bool ZipReader::Open(const base::FilePath& zip_file_path) {
145   DCHECK(!zip_file_);
146 
147   // Use of "Unsafe" function does not look good, but there is no way to do
148   // this safely on Linux. See file_util.h for details.
149   zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
150   if (!zip_file_) {
151     return false;
152   }
153 
154   return OpenInternal();
155 }
156 
OpenFromPlatformFile(base::PlatformFile zip_fd)157 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
158   DCHECK(!zip_file_);
159 
160 #if defined(OS_POSIX)
161   zip_file_ = internal::OpenFdForUnzipping(zip_fd);
162 #elif defined(OS_WIN)
163   zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
164 #endif
165   if (!zip_file_) {
166     return false;
167   }
168 
169   return OpenInternal();
170 }
171 
OpenFromString(const std::string & data)172 bool ZipReader::OpenFromString(const std::string& data) {
173   zip_file_ = internal::PrepareMemoryForUnzipping(data);
174   if (!zip_file_)
175     return false;
176   return OpenInternal();
177 }
178 
Close()179 void ZipReader::Close() {
180   if (zip_file_) {
181     unzClose(zip_file_);
182   }
183   Reset();
184 }
185 
HasMore()186 bool ZipReader::HasMore() {
187   return !reached_end_;
188 }
189 
AdvanceToNextEntry()190 bool ZipReader::AdvanceToNextEntry() {
191   DCHECK(zip_file_);
192 
193   // Should not go further if we already reached the end.
194   if (reached_end_)
195     return false;
196 
197   unz_file_pos position = {};
198   if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
199     return false;
200   const int current_entry_index = position.num_of_file;
201   // If we are currently at the last entry, then the next position is the
202   // end of the zip file, so mark that we reached the end.
203   if (current_entry_index + 1 == num_entries_) {
204     reached_end_ = true;
205   } else {
206     DCHECK_LT(current_entry_index + 1, num_entries_);
207     if (unzGoToNextFile(zip_file_) != UNZ_OK) {
208       return false;
209     }
210   }
211   current_entry_info_.reset();
212   return true;
213 }
214 
OpenCurrentEntryInZip()215 bool ZipReader::OpenCurrentEntryInZip() {
216   DCHECK(zip_file_);
217 
218   unz_file_info raw_file_info = {};
219   char raw_file_name_in_zip[internal::kZipMaxPath] = {};
220   const int result = unzGetCurrentFileInfo(zip_file_,
221                                            &raw_file_info,
222                                            raw_file_name_in_zip,
223                                            sizeof(raw_file_name_in_zip) - 1,
224                                            NULL,  // extraField.
225                                            0,  // extraFieldBufferSize.
226                                            NULL,  // szComment.
227                                            0);  // commentBufferSize.
228   if (result != UNZ_OK)
229     return false;
230   if (raw_file_name_in_zip[0] == '\0')
231     return false;
232   current_entry_info_.reset(
233       new EntryInfo(raw_file_name_in_zip, raw_file_info));
234   return true;
235 }
236 
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const237 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
238                                     uint64_t num_bytes_to_extract) const {
239   DCHECK(zip_file_);
240 
241   const int open_result = unzOpenCurrentFile(zip_file_);
242   if (open_result != UNZ_OK)
243     return false;
244 
245   if (!delegate->PrepareOutput())
246     return false;
247   std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
248 
249   uint64_t remaining_capacity = num_bytes_to_extract;
250   bool entire_file_extracted = false;
251 
252   while (remaining_capacity > 0) {
253     const int num_bytes_read =
254         unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
255 
256     if (num_bytes_read == 0) {
257       entire_file_extracted = true;
258       break;
259     } else if (num_bytes_read < 0) {
260       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
261       break;
262     } else if (num_bytes_read > 0) {
263       uint64_t num_bytes_to_write = std::min<uint64_t>(
264           remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
265       if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
266         break;
267       if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
268         // Ensures function returns true if the entire file has been read.
269         entire_file_extracted =
270             (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
271       }
272       CHECK_GE(remaining_capacity, num_bytes_to_write);
273       remaining_capacity -= num_bytes_to_write;
274     }
275   }
276 
277   unzCloseCurrentFile(zip_file_);
278 
279   if (entire_file_extracted &&
280       current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
281     delegate->SetTimeModified(current_entry_info()->last_modified());
282   }
283 
284   return entire_file_extracted;
285 }
286 
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)287 void ZipReader::ExtractCurrentEntryToFilePathAsync(
288     const base::FilePath& output_file_path,
289     SuccessCallback success_callback,
290     FailureCallback failure_callback,
291     const ProgressCallback& progress_callback) {
292   DCHECK(zip_file_);
293   DCHECK(current_entry_info_.get());
294 
295   // If this is a directory, just create it and return.
296   if (current_entry_info()->is_directory()) {
297     if (base::CreateDirectory(output_file_path)) {
298       base::SequencedTaskRunnerHandle::Get()->PostTask(
299           FROM_HERE, std::move(success_callback));
300     } else {
301       DVLOG(1) << "Unzip failed: unable to create directory.";
302       base::SequencedTaskRunnerHandle::Get()->PostTask(
303           FROM_HERE, std::move(failure_callback));
304     }
305     return;
306   }
307 
308   if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
309     DVLOG(1) << "Unzip failed: unable to open current zip entry.";
310     base::SequencedTaskRunnerHandle::Get()->PostTask(
311         FROM_HERE, std::move(failure_callback));
312     return;
313   }
314 
315   base::FilePath output_dir_path = output_file_path.DirName();
316   if (!base::CreateDirectory(output_dir_path)) {
317     DVLOG(1) << "Unzip failed: unable to create containing directory.";
318     base::SequencedTaskRunnerHandle::Get()->PostTask(
319         FROM_HERE, std::move(failure_callback));
320     return;
321   }
322 
323   const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
324   base::File output_file(output_file_path, flags);
325 
326   if (!output_file.IsValid()) {
327     DVLOG(1) << "Unzip failed: unable to create platform file at "
328              << output_file_path.value();
329     base::SequencedTaskRunnerHandle::Get()->PostTask(
330         FROM_HERE, std::move(failure_callback));
331     return;
332   }
333 
334   base::SequencedTaskRunnerHandle::Get()->PostTask(
335       FROM_HERE,
336       base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
337                      std::move(output_file), std::move(success_callback),
338                      std::move(failure_callback), progress_callback,
339                      0 /* initial offset */));
340 }
341 
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const342 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
343                                             std::string* output) const {
344   DCHECK(output);
345   DCHECK(zip_file_);
346 
347   if (max_read_bytes == 0) {
348     output->clear();
349     return true;
350   }
351 
352   if (current_entry_info()->is_directory()) {
353     output->clear();
354     return true;
355   }
356 
357   // The original_size() is the best hint for the real size, so it saves
358   // doing reallocations for the common case when the uncompressed size is
359   // correct. However, we need to assume that the uncompressed size could be
360   // incorrect therefore this function needs to read as much data as possible.
361   std::string contents;
362   contents.reserve(
363       static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
364                                    current_entry_info()->original_size())));
365 
366   StringWriterDelegate writer(max_read_bytes, &contents);
367   if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
368     if (contents.length() < max_read_bytes) {
369       // There was an error in extracting entry. If ExtractCurrentEntry()
370       // returns false, the entire file was not read - in which case
371       // contents.length() should equal |max_read_bytes| unless an error
372       // occurred which caused extraction to be aborted.
373       output->clear();
374     } else {
375       // |num_bytes| is less than the length of current entry.
376       output->swap(contents);
377     }
378     return false;
379   }
380   output->swap(contents);
381   return true;
382 }
383 
OpenInternal()384 bool ZipReader::OpenInternal() {
385   DCHECK(zip_file_);
386 
387   unz_global_info zip_info = {};  // Zero-clear.
388   if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
389     return false;
390   }
391   num_entries_ = zip_info.number_entry;
392   if (num_entries_ < 0)
393     return false;
394 
395   // We are already at the end if the zip file is empty.
396   reached_end_ = (num_entries_ == 0);
397   return true;
398 }
399 
Reset()400 void ZipReader::Reset() {
401   zip_file_ = NULL;
402   num_entries_ = 0;
403   reached_end_ = false;
404   current_entry_info_.reset();
405 }
406 
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)407 void ZipReader::ExtractChunk(base::File output_file,
408                              SuccessCallback success_callback,
409                              FailureCallback failure_callback,
410                              const ProgressCallback& progress_callback,
411                              const int64_t offset) {
412   char buffer[internal::kZipBufSize];
413 
414   const int num_bytes_read = unzReadCurrentFile(zip_file_,
415                                                 buffer,
416                                                 internal::kZipBufSize);
417 
418   if (num_bytes_read == 0) {
419     unzCloseCurrentFile(zip_file_);
420     std::move(success_callback).Run();
421   } else if (num_bytes_read < 0) {
422     DVLOG(1) << "Unzip failed: error while reading zipfile "
423              << "(" << num_bytes_read << ")";
424     std::move(failure_callback).Run();
425   } else {
426     if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
427       DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
428       std::move(failure_callback).Run();
429       return;
430     }
431 
432     int64_t current_progress = offset + num_bytes_read;
433 
434     progress_callback.Run(current_progress);
435 
436     base::SequencedTaskRunnerHandle::Get()->PostTask(
437         FROM_HERE,
438         base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
439                        std::move(output_file), std::move(success_callback),
440                        std::move(failure_callback), progress_callback,
441                        current_progress));
442   }
443 }
444 
445 // FileWriterDelegate ----------------------------------------------------------
446 
FileWriterDelegate(base::File * file)447 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
448 
FileWriterDelegate(std::unique_ptr<base::File> file)449 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
450     : file_(file.get()), owned_file_(std::move(file)) {}
451 
~FileWriterDelegate()452 FileWriterDelegate::~FileWriterDelegate() {
453   if (!file_->SetLength(file_length_)) {
454     DVPLOG(1) << "Failed updating length of written file";
455   }
456 }
457 
PrepareOutput()458 bool FileWriterDelegate::PrepareOutput() {
459   return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
460 }
461 
WriteBytes(const char * data,int num_bytes)462 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
463   int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
464   if (bytes_written > 0)
465     file_length_ += bytes_written;
466   return bytes_written == num_bytes;
467 }
468 
SetTimeModified(const base::Time & time)469 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
470   file_->SetTimes(base::Time::Now(), time);
471 }
472 
473 // FilePathWriterDelegate ------------------------------------------------------
474 
FilePathWriterDelegate(const base::FilePath & output_file_path)475 FilePathWriterDelegate::FilePathWriterDelegate(
476     const base::FilePath& output_file_path)
477     : output_file_path_(output_file_path) {}
478 
~FilePathWriterDelegate()479 FilePathWriterDelegate::~FilePathWriterDelegate() {}
480 
PrepareOutput()481 bool FilePathWriterDelegate::PrepareOutput() {
482   // We can't rely on parent directory entries being specified in the
483   // zip, so we make sure they are created.
484   if (!base::CreateDirectory(output_file_path_.DirName()))
485     return false;
486 
487   file_.Initialize(output_file_path_,
488                    base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
489   return file_.IsValid();
490 }
491 
WriteBytes(const char * data,int num_bytes)492 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
493   return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
494 }
495 
SetTimeModified(const base::Time & time)496 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
497   file_.Close();
498   base::TouchFile(output_file_path_, base::Time::Now(), time);
499 }
500 
501 }  // namespace zip
502