1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/zlib/google/zip_reader.h"
6
7 #include <utility>
8
9 #include "base/bind.h"
10 #include "base/files/file.h"
11 #include "base/logging.h"
12 #include "base/macros.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/threading/sequenced_task_runner_handle.h"
16 #include "build/build_config.h"
17 #include "third_party/zlib/google/zip_internal.h"
18
19 #if defined(USE_SYSTEM_MINIZIP)
20 #include <minizip/unzip.h>
21 #else
22 #include "third_party/zlib/contrib/minizip/unzip.h"
23 #if defined(OS_WIN)
24 #include "third_party/zlib/contrib/minizip/iowin32.h"
25 #endif // defined(OS_WIN)
26 #endif // defined(USE_SYSTEM_MINIZIP)
27
28 namespace zip {
29
30 namespace {
31
32 // StringWriterDelegate --------------------------------------------------------
33
34 // A writer delegate that writes no more than |max_read_bytes| to a given
35 // std::string.
36 class StringWriterDelegate : public WriterDelegate {
37 public:
38 StringWriterDelegate(size_t max_read_bytes, std::string* output);
39 ~StringWriterDelegate() override;
40
41 // WriterDelegate methods:
42
43 // Returns true.
44 bool PrepareOutput() override;
45
46 // Appends |num_bytes| bytes from |data| to the output string. Returns false
47 // if |num_bytes| will cause the string to exceed |max_read_bytes|.
48 bool WriteBytes(const char* data, int num_bytes) override;
49
50 void SetTimeModified(const base::Time& time) override;
51
52 private:
53 size_t max_read_bytes_;
54 std::string* output_;
55
56 DISALLOW_COPY_AND_ASSIGN(StringWriterDelegate);
57 };
58
StringWriterDelegate(size_t max_read_bytes,std::string * output)59 StringWriterDelegate::StringWriterDelegate(size_t max_read_bytes,
60 std::string* output)
61 : max_read_bytes_(max_read_bytes),
62 output_(output) {
63 }
64
~StringWriterDelegate()65 StringWriterDelegate::~StringWriterDelegate() {
66 }
67
PrepareOutput()68 bool StringWriterDelegate::PrepareOutput() {
69 return true;
70 }
71
WriteBytes(const char * data,int num_bytes)72 bool StringWriterDelegate::WriteBytes(const char* data, int num_bytes) {
73 if (output_->size() + num_bytes > max_read_bytes_)
74 return false;
75 output_->append(data, num_bytes);
76 return true;
77 }
78
SetTimeModified(const base::Time & time)79 void StringWriterDelegate::SetTimeModified(const base::Time& time) {
80 // Do nothing.
81 }
82
83 } // namespace
84
85 // TODO(satorux): The implementation assumes that file names in zip files
86 // are encoded in UTF-8. This is true for zip files created by Zip()
87 // function in zip.h, but not true for user-supplied random zip files.
EntryInfo(const std::string & file_name_in_zip,const unz_file_info & raw_file_info)88 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
89 const unz_file_info& raw_file_info)
90 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
91 is_directory_(false),
92 is_unsafe_(false),
93 is_encrypted_(false) {
94 original_size_ = raw_file_info.uncompressed_size;
95
96 // Directory entries in zip files end with "/".
97 is_directory_ = base::EndsWith(file_name_in_zip, "/",
98 base::CompareCase::INSENSITIVE_ASCII);
99
100 // Check the file name here for directory traversal issues.
101 is_unsafe_ = file_path_.ReferencesParent();
102
103 // We also consider that the file name is unsafe, if it's invalid UTF-8.
104 std::u16string file_name_utf16;
105 if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
106 &file_name_utf16)) {
107 is_unsafe_ = true;
108 }
109
110 // We also consider that the file name is unsafe, if it's absolute.
111 // On Windows, IsAbsolute() returns false for paths starting with "/".
112 if (file_path_.IsAbsolute() ||
113 base::StartsWith(file_name_in_zip, "/",
114 base::CompareCase::INSENSITIVE_ASCII))
115 is_unsafe_ = true;
116
117 // Whether the file is encrypted is bit 0 of the flag.
118 is_encrypted_ = raw_file_info.flag & 1;
119
120 // Construct the last modified time. The timezone info is not present in
121 // zip files, so we construct the time as local time.
122 base::Time::Exploded exploded_time = {}; // Zero-clear.
123 exploded_time.year = raw_file_info.tmu_date.tm_year;
124 // The month in zip file is 0-based, whereas ours is 1-based.
125 exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
126 exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
127 exploded_time.hour = raw_file_info.tmu_date.tm_hour;
128 exploded_time.minute = raw_file_info.tmu_date.tm_min;
129 exploded_time.second = raw_file_info.tmu_date.tm_sec;
130 exploded_time.millisecond = 0;
131
132 if (!base::Time::FromLocalExploded(exploded_time, &last_modified_))
133 last_modified_ = base::Time::UnixEpoch();
134 }
135
ZipReader()136 ZipReader::ZipReader() {
137 Reset();
138 }
139
~ZipReader()140 ZipReader::~ZipReader() {
141 Close();
142 }
143
Open(const base::FilePath & zip_file_path)144 bool ZipReader::Open(const base::FilePath& zip_file_path) {
145 DCHECK(!zip_file_);
146
147 // Use of "Unsafe" function does not look good, but there is no way to do
148 // this safely on Linux. See file_util.h for details.
149 zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
150 if (!zip_file_) {
151 return false;
152 }
153
154 return OpenInternal();
155 }
156
OpenFromPlatformFile(base::PlatformFile zip_fd)157 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
158 DCHECK(!zip_file_);
159
160 #if defined(OS_POSIX)
161 zip_file_ = internal::OpenFdForUnzipping(zip_fd);
162 #elif defined(OS_WIN)
163 zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
164 #endif
165 if (!zip_file_) {
166 return false;
167 }
168
169 return OpenInternal();
170 }
171
OpenFromString(const std::string & data)172 bool ZipReader::OpenFromString(const std::string& data) {
173 zip_file_ = internal::PrepareMemoryForUnzipping(data);
174 if (!zip_file_)
175 return false;
176 return OpenInternal();
177 }
178
Close()179 void ZipReader::Close() {
180 if (zip_file_) {
181 unzClose(zip_file_);
182 }
183 Reset();
184 }
185
HasMore()186 bool ZipReader::HasMore() {
187 return !reached_end_;
188 }
189
AdvanceToNextEntry()190 bool ZipReader::AdvanceToNextEntry() {
191 DCHECK(zip_file_);
192
193 // Should not go further if we already reached the end.
194 if (reached_end_)
195 return false;
196
197 unz_file_pos position = {};
198 if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
199 return false;
200 const int current_entry_index = position.num_of_file;
201 // If we are currently at the last entry, then the next position is the
202 // end of the zip file, so mark that we reached the end.
203 if (current_entry_index + 1 == num_entries_) {
204 reached_end_ = true;
205 } else {
206 DCHECK_LT(current_entry_index + 1, num_entries_);
207 if (unzGoToNextFile(zip_file_) != UNZ_OK) {
208 return false;
209 }
210 }
211 current_entry_info_.reset();
212 return true;
213 }
214
OpenCurrentEntryInZip()215 bool ZipReader::OpenCurrentEntryInZip() {
216 DCHECK(zip_file_);
217
218 unz_file_info raw_file_info = {};
219 char raw_file_name_in_zip[internal::kZipMaxPath] = {};
220 const int result = unzGetCurrentFileInfo(zip_file_,
221 &raw_file_info,
222 raw_file_name_in_zip,
223 sizeof(raw_file_name_in_zip) - 1,
224 NULL, // extraField.
225 0, // extraFieldBufferSize.
226 NULL, // szComment.
227 0); // commentBufferSize.
228 if (result != UNZ_OK)
229 return false;
230 if (raw_file_name_in_zip[0] == '\0')
231 return false;
232 current_entry_info_.reset(
233 new EntryInfo(raw_file_name_in_zip, raw_file_info));
234 return true;
235 }
236
ExtractCurrentEntry(WriterDelegate * delegate,uint64_t num_bytes_to_extract) const237 bool ZipReader::ExtractCurrentEntry(WriterDelegate* delegate,
238 uint64_t num_bytes_to_extract) const {
239 DCHECK(zip_file_);
240
241 const int open_result = unzOpenCurrentFile(zip_file_);
242 if (open_result != UNZ_OK)
243 return false;
244
245 if (!delegate->PrepareOutput())
246 return false;
247 std::unique_ptr<char[]> buf(new char[internal::kZipBufSize]);
248
249 uint64_t remaining_capacity = num_bytes_to_extract;
250 bool entire_file_extracted = false;
251
252 while (remaining_capacity > 0) {
253 const int num_bytes_read =
254 unzReadCurrentFile(zip_file_, buf.get(), internal::kZipBufSize);
255
256 if (num_bytes_read == 0) {
257 entire_file_extracted = true;
258 break;
259 } else if (num_bytes_read < 0) {
260 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
261 break;
262 } else if (num_bytes_read > 0) {
263 uint64_t num_bytes_to_write = std::min<uint64_t>(
264 remaining_capacity, base::checked_cast<uint64_t>(num_bytes_read));
265 if (!delegate->WriteBytes(buf.get(), num_bytes_to_write))
266 break;
267 if (remaining_capacity == base::checked_cast<uint64_t>(num_bytes_read)) {
268 // Ensures function returns true if the entire file has been read.
269 entire_file_extracted =
270 (unzReadCurrentFile(zip_file_, buf.get(), 1) == 0);
271 }
272 CHECK_GE(remaining_capacity, num_bytes_to_write);
273 remaining_capacity -= num_bytes_to_write;
274 }
275 }
276
277 unzCloseCurrentFile(zip_file_);
278
279 if (entire_file_extracted &&
280 current_entry_info()->last_modified() != base::Time::UnixEpoch()) {
281 delegate->SetTimeModified(current_entry_info()->last_modified());
282 }
283
284 return entire_file_extracted;
285 }
286
ExtractCurrentEntryToFilePathAsync(const base::FilePath & output_file_path,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback)287 void ZipReader::ExtractCurrentEntryToFilePathAsync(
288 const base::FilePath& output_file_path,
289 SuccessCallback success_callback,
290 FailureCallback failure_callback,
291 const ProgressCallback& progress_callback) {
292 DCHECK(zip_file_);
293 DCHECK(current_entry_info_.get());
294
295 // If this is a directory, just create it and return.
296 if (current_entry_info()->is_directory()) {
297 if (base::CreateDirectory(output_file_path)) {
298 base::SequencedTaskRunnerHandle::Get()->PostTask(
299 FROM_HERE, std::move(success_callback));
300 } else {
301 DVLOG(1) << "Unzip failed: unable to create directory.";
302 base::SequencedTaskRunnerHandle::Get()->PostTask(
303 FROM_HERE, std::move(failure_callback));
304 }
305 return;
306 }
307
308 if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
309 DVLOG(1) << "Unzip failed: unable to open current zip entry.";
310 base::SequencedTaskRunnerHandle::Get()->PostTask(
311 FROM_HERE, std::move(failure_callback));
312 return;
313 }
314
315 base::FilePath output_dir_path = output_file_path.DirName();
316 if (!base::CreateDirectory(output_dir_path)) {
317 DVLOG(1) << "Unzip failed: unable to create containing directory.";
318 base::SequencedTaskRunnerHandle::Get()->PostTask(
319 FROM_HERE, std::move(failure_callback));
320 return;
321 }
322
323 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
324 base::File output_file(output_file_path, flags);
325
326 if (!output_file.IsValid()) {
327 DVLOG(1) << "Unzip failed: unable to create platform file at "
328 << output_file_path.value();
329 base::SequencedTaskRunnerHandle::Get()->PostTask(
330 FROM_HERE, std::move(failure_callback));
331 return;
332 }
333
334 base::SequencedTaskRunnerHandle::Get()->PostTask(
335 FROM_HERE,
336 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
337 std::move(output_file), std::move(success_callback),
338 std::move(failure_callback), progress_callback,
339 0 /* initial offset */));
340 }
341
ExtractCurrentEntryToString(uint64_t max_read_bytes,std::string * output) const342 bool ZipReader::ExtractCurrentEntryToString(uint64_t max_read_bytes,
343 std::string* output) const {
344 DCHECK(output);
345 DCHECK(zip_file_);
346
347 if (max_read_bytes == 0) {
348 output->clear();
349 return true;
350 }
351
352 if (current_entry_info()->is_directory()) {
353 output->clear();
354 return true;
355 }
356
357 // The original_size() is the best hint for the real size, so it saves
358 // doing reallocations for the common case when the uncompressed size is
359 // correct. However, we need to assume that the uncompressed size could be
360 // incorrect therefore this function needs to read as much data as possible.
361 std::string contents;
362 contents.reserve(
363 static_cast<size_t>(std::min(base::checked_cast<int64_t>(max_read_bytes),
364 current_entry_info()->original_size())));
365
366 StringWriterDelegate writer(max_read_bytes, &contents);
367 if (!ExtractCurrentEntry(&writer, max_read_bytes)) {
368 if (contents.length() < max_read_bytes) {
369 // There was an error in extracting entry. If ExtractCurrentEntry()
370 // returns false, the entire file was not read - in which case
371 // contents.length() should equal |max_read_bytes| unless an error
372 // occurred which caused extraction to be aborted.
373 output->clear();
374 } else {
375 // |num_bytes| is less than the length of current entry.
376 output->swap(contents);
377 }
378 return false;
379 }
380 output->swap(contents);
381 return true;
382 }
383
OpenInternal()384 bool ZipReader::OpenInternal() {
385 DCHECK(zip_file_);
386
387 unz_global_info zip_info = {}; // Zero-clear.
388 if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
389 return false;
390 }
391 num_entries_ = zip_info.number_entry;
392 if (num_entries_ < 0)
393 return false;
394
395 // We are already at the end if the zip file is empty.
396 reached_end_ = (num_entries_ == 0);
397 return true;
398 }
399
Reset()400 void ZipReader::Reset() {
401 zip_file_ = NULL;
402 num_entries_ = 0;
403 reached_end_ = false;
404 current_entry_info_.reset();
405 }
406
ExtractChunk(base::File output_file,SuccessCallback success_callback,FailureCallback failure_callback,const ProgressCallback & progress_callback,const int64_t offset)407 void ZipReader::ExtractChunk(base::File output_file,
408 SuccessCallback success_callback,
409 FailureCallback failure_callback,
410 const ProgressCallback& progress_callback,
411 const int64_t offset) {
412 char buffer[internal::kZipBufSize];
413
414 const int num_bytes_read = unzReadCurrentFile(zip_file_,
415 buffer,
416 internal::kZipBufSize);
417
418 if (num_bytes_read == 0) {
419 unzCloseCurrentFile(zip_file_);
420 std::move(success_callback).Run();
421 } else if (num_bytes_read < 0) {
422 DVLOG(1) << "Unzip failed: error while reading zipfile "
423 << "(" << num_bytes_read << ")";
424 std::move(failure_callback).Run();
425 } else {
426 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
427 DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
428 std::move(failure_callback).Run();
429 return;
430 }
431
432 int64_t current_progress = offset + num_bytes_read;
433
434 progress_callback.Run(current_progress);
435
436 base::SequencedTaskRunnerHandle::Get()->PostTask(
437 FROM_HERE,
438 base::BindOnce(&ZipReader::ExtractChunk, weak_ptr_factory_.GetWeakPtr(),
439 std::move(output_file), std::move(success_callback),
440 std::move(failure_callback), progress_callback,
441 current_progress));
442 }
443 }
444
445 // FileWriterDelegate ----------------------------------------------------------
446
FileWriterDelegate(base::File * file)447 FileWriterDelegate::FileWriterDelegate(base::File* file) : file_(file) {}
448
FileWriterDelegate(std::unique_ptr<base::File> file)449 FileWriterDelegate::FileWriterDelegate(std::unique_ptr<base::File> file)
450 : file_(file.get()), owned_file_(std::move(file)) {}
451
~FileWriterDelegate()452 FileWriterDelegate::~FileWriterDelegate() {
453 if (!file_->SetLength(file_length_)) {
454 DVPLOG(1) << "Failed updating length of written file";
455 }
456 }
457
PrepareOutput()458 bool FileWriterDelegate::PrepareOutput() {
459 return file_->Seek(base::File::FROM_BEGIN, 0) >= 0;
460 }
461
WriteBytes(const char * data,int num_bytes)462 bool FileWriterDelegate::WriteBytes(const char* data, int num_bytes) {
463 int bytes_written = file_->WriteAtCurrentPos(data, num_bytes);
464 if (bytes_written > 0)
465 file_length_ += bytes_written;
466 return bytes_written == num_bytes;
467 }
468
SetTimeModified(const base::Time & time)469 void FileWriterDelegate::SetTimeModified(const base::Time& time) {
470 file_->SetTimes(base::Time::Now(), time);
471 }
472
473 // FilePathWriterDelegate ------------------------------------------------------
474
FilePathWriterDelegate(const base::FilePath & output_file_path)475 FilePathWriterDelegate::FilePathWriterDelegate(
476 const base::FilePath& output_file_path)
477 : output_file_path_(output_file_path) {}
478
~FilePathWriterDelegate()479 FilePathWriterDelegate::~FilePathWriterDelegate() {}
480
PrepareOutput()481 bool FilePathWriterDelegate::PrepareOutput() {
482 // We can't rely on parent directory entries being specified in the
483 // zip, so we make sure they are created.
484 if (!base::CreateDirectory(output_file_path_.DirName()))
485 return false;
486
487 file_.Initialize(output_file_path_,
488 base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
489 return file_.IsValid();
490 }
491
WriteBytes(const char * data,int num_bytes)492 bool FilePathWriterDelegate::WriteBytes(const char* data, int num_bytes) {
493 return num_bytes == file_.WriteAtCurrentPos(data, num_bytes);
494 }
495
SetTimeModified(const base::Time & time)496 void FilePathWriterDelegate::SetTimeModified(const base::Time& time) {
497 file_.Close();
498 base::TouchFile(output_file_path_, base::Time::Now(), time);
499 }
500
501 } // namespace zip
502