1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include <memory> 11 #include <string> 12 13 #include "base/callback.h" 14 #include "base/files/file.h" 15 #include "base/files/file_path.h" 16 #include "base/files/file_util.h" 17 #include "base/macros.h" 18 #include "base/memory/weak_ptr.h" 19 #include "base/time/time.h" 20 21 #if defined(USE_SYSTEM_MINIZIP) 22 #include <minizip/unzip.h> 23 #else 24 #include "third_party/zlib/contrib/minizip/unzip.h" 25 #endif 26 27 namespace zip { 28 29 // A delegate interface used to stream out an entry; see 30 // ZipReader::ExtractCurrentEntry. 31 class WriterDelegate { 32 public: ~WriterDelegate()33 virtual ~WriterDelegate() {} 34 35 // Invoked once before any data is streamed out to pave the way (e.g., to open 36 // the output file). Return false on failure to cancel extraction. 37 virtual bool PrepareOutput() = 0; 38 39 // Invoked to write the next chunk of data. Return false on failure to cancel 40 // extraction. 41 virtual bool WriteBytes(const char* data, int num_bytes) = 0; 42 43 // Sets the last-modified time of the data. 44 virtual void SetTimeModified(const base::Time& time) = 0; 45 }; 46 47 // This class is used for reading zip files. A typical use case of this 48 // class is to scan entries in a zip file and extract them. The code will 49 // look like: 50 // 51 // ZipReader reader; 52 // reader.Open(zip_file_path); 53 // while (reader.HasMore()) { 54 // reader.OpenCurrentEntryInZip(); 55 // const base::FilePath& entry_path = 56 // reader.current_entry_info()->file_path(); 57 // auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path); 58 // reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max()); 59 // reader.AdvanceToNextEntry(); 60 // } 61 // 62 // For simplicity, error checking is omitted in the example code above. The 63 // production code should check return values from all of these functions. 64 // 65 class ZipReader { 66 public: 67 // A callback that is called when the operation is successful. 68 using SuccessCallback = base::OnceClosure; 69 // A callback that is called when the operation fails. 70 using FailureCallback = base::OnceClosure; 71 // A callback that is called periodically during the operation with the number 72 // of bytes that have been processed so far. 73 using ProgressCallback = base::RepeatingCallback<void(int64_t)>; 74 75 // This class represents information of an entry (file or directory) in 76 // a zip file. 77 class EntryInfo { 78 public: 79 EntryInfo(const std::string& filename_in_zip, 80 const unz_file_info& raw_file_info); 81 82 // Returns the file path. The path is usually relative like 83 // "foo/bar.txt", but if it's absolute, is_unsafe() returns true. file_path()84 const base::FilePath& file_path() const { return file_path_; } 85 86 // Returns the size of the original file (i.e. after uncompressed). 87 // Returns 0 if the entry is a directory. 88 // Note: this value should not be trusted, because it is stored as metadata 89 // in the zip archive and can be different from the real uncompressed size. original_size()90 int64_t original_size() const { return original_size_; } 91 92 // Returns the last modified time. If the time stored in the zip file was 93 // not valid, the unix epoch will be returned. 94 // 95 // The time stored in the zip archive uses the MS-DOS date and time format. 96 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx 97 // As such the following limitations apply: 98 // * only years from 1980 to 2107 can be represented. 99 // * the time stamp has a 2 second resolution. 100 // * there's no timezone information, so the time is interpreted as local. last_modified()101 base::Time last_modified() const { return last_modified_; } 102 103 // Returns true if the entry is a directory. is_directory()104 bool is_directory() const { return is_directory_; } 105 106 // Returns true if the entry is unsafe, like having ".." or invalid 107 // UTF-8 characters in its file name, or the file path is absolute. is_unsafe()108 bool is_unsafe() const { return is_unsafe_; } 109 110 // Returns true if the entry is encrypted. is_encrypted()111 bool is_encrypted() const { return is_encrypted_; } 112 113 private: 114 const base::FilePath file_path_; 115 int64_t original_size_; 116 base::Time last_modified_; 117 bool is_directory_; 118 bool is_unsafe_; 119 bool is_encrypted_; 120 DISALLOW_COPY_AND_ASSIGN(EntryInfo); 121 }; 122 123 ZipReader(); 124 ~ZipReader(); 125 126 // Opens the zip file specified by |zip_file_path|. Returns true on 127 // success. 128 bool Open(const base::FilePath& zip_file_path); 129 130 // Opens the zip file referred to by the platform file |zip_fd|, without 131 // taking ownership of |zip_fd|. Returns true on success. 132 bool OpenFromPlatformFile(base::PlatformFile zip_fd); 133 134 // Opens the zip data stored in |data|. This class uses a weak reference to 135 // the given sring while extracting files, i.e. the caller should keep the 136 // string until it finishes extracting files. 137 bool OpenFromString(const std::string& data); 138 139 // Closes the currently opened zip file. This function is called in the 140 // destructor of the class, so you usually don't need to call this. 141 void Close(); 142 143 // Returns true if there is at least one entry to read. This function is 144 // used to scan entries with AdvanceToNextEntry(), like: 145 // 146 // while (reader.HasMore()) { 147 // // Do something with the current file here. 148 // reader.AdvanceToNextEntry(); 149 // } 150 bool HasMore(); 151 152 // Advances the next entry. Returns true on success. 153 bool AdvanceToNextEntry(); 154 155 // Opens the current entry in the zip file. On success, returns true and 156 // updates the the current entry state (i.e. current_entry_info() is 157 // updated). This function should be called before operations over the 158 // current entry like ExtractCurrentEntryToFile(). 159 // 160 // Note that there is no CloseCurrentEntryInZip(). The the current entry 161 // state is reset automatically as needed. 162 bool OpenCurrentEntryInZip(); 163 164 // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|, 165 // starting from the beginning of the entry. Return value specifies whether 166 // the entire file was extracted. 167 bool ExtractCurrentEntry(WriterDelegate* delegate, 168 uint64_t num_bytes_to_extract) const; 169 170 // Asynchronously extracts the current entry to the given output file path. 171 // If the current entry is a directory it just creates the directory 172 // synchronously instead. OpenCurrentEntryInZip() must be called beforehand. 173 // success_callback will be called on success and failure_callback will be 174 // called on failure. progress_callback will be called at least once. 175 // Callbacks will be posted to the current MessageLoop in-order. 176 void ExtractCurrentEntryToFilePathAsync( 177 const base::FilePath& output_file_path, 178 SuccessCallback success_callback, 179 FailureCallback failure_callback, 180 const ProgressCallback& progress_callback); 181 182 // Extracts the current entry into memory. If the current entry is a 183 // directory, the |output| parameter is set to the empty string. If the 184 // current entry is a file, the |output| parameter is filled with its 185 // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the 186 // |output| parameter can be filled with a big amount of data, avoid passing 187 // it around by value, but by reference or pointer. Note: the value returned 188 // by EntryInfo::original_size() cannot be trusted, so the real size of the 189 // uncompressed contents can be different. |max_read_bytes| limits the ammount 190 // of memory used to carry the entry. Returns true if the entire content is 191 // read. If the entry is bigger than |max_read_bytes|, returns false and 192 // |output| is filled with |max_read_bytes| of data. If an error occurs, 193 // returns false, and |output| is set to the empty string. 194 bool ExtractCurrentEntryToString(uint64_t max_read_bytes, 195 std::string* output) const; 196 197 // Returns the current entry info. Returns NULL if the current entry is 198 // not yet opened. OpenCurrentEntryInZip() must be called beforehand. current_entry_info()199 EntryInfo* current_entry_info() const { 200 return current_entry_info_.get(); 201 } 202 203 // Returns the number of entries in the zip file. 204 // Open() must be called beforehand. num_entries()205 int num_entries() const { return num_entries_; } 206 207 private: 208 // Common code used both in Open and OpenFromFd. 209 bool OpenInternal(); 210 211 // Resets the internal state. 212 void Reset(); 213 214 // Extracts a chunk of the file to the target. Will post a task for the next 215 // chunk and success/failure/progress callbacks as necessary. 216 void ExtractChunk(base::File target_file, 217 SuccessCallback success_callback, 218 FailureCallback failure_callback, 219 const ProgressCallback& progress_callback, 220 const int64_t offset); 221 222 unzFile zip_file_; 223 int num_entries_; 224 bool reached_end_; 225 std::unique_ptr<EntryInfo> current_entry_info_; 226 227 base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this}; 228 229 DISALLOW_COPY_AND_ASSIGN(ZipReader); 230 }; 231 232 // A writer delegate that writes to a given File. 233 class FileWriterDelegate : public WriterDelegate { 234 public: 235 // Constructs a FileWriterDelegate that manipulates |file|. The delegate will 236 // not own |file|, therefore the caller must guarantee |file| will outlive the 237 // delegate. 238 explicit FileWriterDelegate(base::File* file); 239 240 // Constructs a FileWriterDelegate that takes ownership of |file|. 241 explicit FileWriterDelegate(std::unique_ptr<base::File> file); 242 243 // Truncates the file to the number of bytes written. 244 ~FileWriterDelegate() override; 245 246 // WriterDelegate methods: 247 248 // Seeks to the beginning of the file, returning false if the seek fails. 249 bool PrepareOutput() override; 250 251 // Writes |num_bytes| bytes of |data| to the file, returning false on error or 252 // if not all bytes could be written. 253 bool WriteBytes(const char* data, int num_bytes) override; 254 255 // Sets the last-modified time of the data. 256 void SetTimeModified(const base::Time& time) override; 257 258 // Return the actual size of the file. file_length()259 int64_t file_length() { return file_length_; } 260 261 private: 262 // The file the delegate modifies. 263 base::File* file_; 264 265 // The delegate can optionally own the file it modifies, in which case 266 // owned_file_ is set and file_ is an alias for owned_file_. 267 std::unique_ptr<base::File> owned_file_; 268 269 int64_t file_length_ = 0; 270 271 DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate); 272 }; 273 274 // A writer delegate that writes a file at a given path. 275 class FilePathWriterDelegate : public WriterDelegate { 276 public: 277 explicit FilePathWriterDelegate(const base::FilePath& output_file_path); 278 ~FilePathWriterDelegate() override; 279 280 // WriterDelegate methods: 281 282 // Creates the output file and any necessary intermediate directories. 283 bool PrepareOutput() override; 284 285 // Writes |num_bytes| bytes of |data| to the file, returning false if not all 286 // bytes could be written. 287 bool WriteBytes(const char* data, int num_bytes) override; 288 289 // Sets the last-modified time of the data. 290 void SetTimeModified(const base::Time& time) override; 291 292 private: 293 base::FilePath output_file_path_; 294 base::File file_; 295 296 DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate); 297 }; 298 299 } // namespace zip 300 301 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_ 302