1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <memory>
11 #include <string>
12 
13 #include "base/callback.h"
14 #include "base/files/file.h"
15 #include "base/files/file_path.h"
16 #include "base/files/file_util.h"
17 #include "base/macros.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/time/time.h"
20 
21 #if defined(USE_SYSTEM_MINIZIP)
22 #include <minizip/unzip.h>
23 #else
24 #include "third_party/zlib/contrib/minizip/unzip.h"
25 #endif
26 
27 namespace zip {
28 
29 // A delegate interface used to stream out an entry; see
30 // ZipReader::ExtractCurrentEntry.
31 class WriterDelegate {
32  public:
~WriterDelegate()33   virtual ~WriterDelegate() {}
34 
35   // Invoked once before any data is streamed out to pave the way (e.g., to open
36   // the output file). Return false on failure to cancel extraction.
37   virtual bool PrepareOutput() = 0;
38 
39   // Invoked to write the next chunk of data. Return false on failure to cancel
40   // extraction.
41   virtual bool WriteBytes(const char* data, int num_bytes) = 0;
42 
43   // Sets the last-modified time of the data.
44   virtual void SetTimeModified(const base::Time& time) = 0;
45 };
46 
47 // This class is used for reading zip files. A typical use case of this
48 // class is to scan entries in a zip file and extract them. The code will
49 // look like:
50 //
51 //   ZipReader reader;
52 //   reader.Open(zip_file_path);
53 //   while (reader.HasMore()) {
54 //     reader.OpenCurrentEntryInZip();
55 //     const base::FilePath& entry_path =
56 //        reader.current_entry_info()->file_path();
57 //     auto writer = CreateFilePathWriterDelegate(extract_dir, entry_path);
58 //     reader.ExtractCurrentEntry(writer, std::numeric_limits<uint64_t>::max());
59 //     reader.AdvanceToNextEntry();
60 //   }
61 //
62 // For simplicity, error checking is omitted in the example code above. The
63 // production code should check return values from all of these functions.
64 //
65 class ZipReader {
66  public:
67   // A callback that is called when the operation is successful.
68   using SuccessCallback = base::OnceClosure;
69   // A callback that is called when the operation fails.
70   using FailureCallback = base::OnceClosure;
71   // A callback that is called periodically during the operation with the number
72   // of bytes that have been processed so far.
73   using ProgressCallback = base::RepeatingCallback<void(int64_t)>;
74 
75   // This class represents information of an entry (file or directory) in
76   // a zip file.
77   class EntryInfo {
78    public:
79     EntryInfo(const std::string& filename_in_zip,
80               const unz_file_info& raw_file_info);
81 
82     // Returns the file path. The path is usually relative like
83     // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
file_path()84     const base::FilePath& file_path() const { return file_path_; }
85 
86     // Returns the size of the original file (i.e. after uncompressed).
87     // Returns 0 if the entry is a directory.
88     // Note: this value should not be trusted, because it is stored as metadata
89     // in the zip archive and can be different from the real uncompressed size.
original_size()90     int64_t original_size() const { return original_size_; }
91 
92     // Returns the last modified time. If the time stored in the zip file was
93     // not valid, the unix epoch will be returned.
94     //
95     // The time stored in the zip archive uses the MS-DOS date and time format.
96     // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
97     // As such the following limitations apply:
98     // * only years from 1980 to 2107 can be represented.
99     // * the time stamp has a 2 second resolution.
100     // * there's no timezone information, so the time is interpreted as local.
last_modified()101     base::Time last_modified() const { return last_modified_; }
102 
103     // Returns true if the entry is a directory.
is_directory()104     bool is_directory() const { return is_directory_; }
105 
106     // Returns true if the entry is unsafe, like having ".." or invalid
107     // UTF-8 characters in its file name, or the file path is absolute.
is_unsafe()108     bool is_unsafe() const { return is_unsafe_; }
109 
110     // Returns true if the entry is encrypted.
is_encrypted()111     bool is_encrypted() const { return is_encrypted_; }
112 
113    private:
114     const base::FilePath file_path_;
115     int64_t original_size_;
116     base::Time last_modified_;
117     bool is_directory_;
118     bool is_unsafe_;
119     bool is_encrypted_;
120     DISALLOW_COPY_AND_ASSIGN(EntryInfo);
121   };
122 
123   ZipReader();
124   ~ZipReader();
125 
126   // Opens the zip file specified by |zip_file_path|. Returns true on
127   // success.
128   bool Open(const base::FilePath& zip_file_path);
129 
130   // Opens the zip file referred to by the platform file |zip_fd|, without
131   // taking ownership of |zip_fd|. Returns true on success.
132   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
133 
134   // Opens the zip data stored in |data|. This class uses a weak reference to
135   // the given sring while extracting files, i.e. the caller should keep the
136   // string until it finishes extracting files.
137   bool OpenFromString(const std::string& data);
138 
139   // Closes the currently opened zip file. This function is called in the
140   // destructor of the class, so you usually don't need to call this.
141   void Close();
142 
143   // Returns true if there is at least one entry to read. This function is
144   // used to scan entries with AdvanceToNextEntry(), like:
145   //
146   // while (reader.HasMore()) {
147   //   // Do something with the current file here.
148   //   reader.AdvanceToNextEntry();
149   // }
150   bool HasMore();
151 
152   // Advances the next entry. Returns true on success.
153   bool AdvanceToNextEntry();
154 
155   // Opens the current entry in the zip file. On success, returns true and
156   // updates the the current entry state (i.e. current_entry_info() is
157   // updated). This function should be called before operations over the
158   // current entry like ExtractCurrentEntryToFile().
159   //
160   // Note that there is no CloseCurrentEntryInZip(). The the current entry
161   // state is reset automatically as needed.
162   bool OpenCurrentEntryInZip();
163 
164   // Extracts |num_bytes_to_extract| bytes of the current entry to |delegate|,
165   // starting from the beginning of the entry. Return value specifies whether
166   // the entire file was extracted.
167   bool ExtractCurrentEntry(WriterDelegate* delegate,
168                            uint64_t num_bytes_to_extract) const;
169 
170   // Asynchronously extracts the current entry to the given output file path.
171   // If the current entry is a directory it just creates the directory
172   // synchronously instead.  OpenCurrentEntryInZip() must be called beforehand.
173   // success_callback will be called on success and failure_callback will be
174   // called on failure.  progress_callback will be called at least once.
175   // Callbacks will be posted to the current MessageLoop in-order.
176   void ExtractCurrentEntryToFilePathAsync(
177       const base::FilePath& output_file_path,
178       SuccessCallback success_callback,
179       FailureCallback failure_callback,
180       const ProgressCallback& progress_callback);
181 
182   // Extracts the current entry into memory. If the current entry is a
183   // directory, the |output| parameter is set to the empty string. If the
184   // current entry is a file, the |output| parameter is filled with its
185   // contents. OpenCurrentEntryInZip() must be called beforehand. Note: the
186   // |output| parameter can be filled with a big amount of data, avoid passing
187   // it around by value, but by reference or pointer. Note: the value returned
188   // by EntryInfo::original_size() cannot be trusted, so the real size of the
189   // uncompressed contents can be different. |max_read_bytes| limits the ammount
190   // of memory used to carry the entry. Returns true if the entire content is
191   // read. If the entry is bigger than |max_read_bytes|, returns false and
192   // |output| is filled with |max_read_bytes| of data. If an error occurs,
193   // returns false, and |output| is set to the empty string.
194   bool ExtractCurrentEntryToString(uint64_t max_read_bytes,
195                                    std::string* output) const;
196 
197   // Returns the current entry info. Returns NULL if the current entry is
198   // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
current_entry_info()199   EntryInfo* current_entry_info() const {
200     return current_entry_info_.get();
201   }
202 
203   // Returns the number of entries in the zip file.
204   // Open() must be called beforehand.
num_entries()205   int num_entries() const { return num_entries_; }
206 
207  private:
208   // Common code used both in Open and OpenFromFd.
209   bool OpenInternal();
210 
211   // Resets the internal state.
212   void Reset();
213 
214   // Extracts a chunk of the file to the target.  Will post a task for the next
215   // chunk and success/failure/progress callbacks as necessary.
216   void ExtractChunk(base::File target_file,
217                     SuccessCallback success_callback,
218                     FailureCallback failure_callback,
219                     const ProgressCallback& progress_callback,
220                     const int64_t offset);
221 
222   unzFile zip_file_;
223   int num_entries_;
224   bool reached_end_;
225   std::unique_ptr<EntryInfo> current_entry_info_;
226 
227   base::WeakPtrFactory<ZipReader> weak_ptr_factory_{this};
228 
229   DISALLOW_COPY_AND_ASSIGN(ZipReader);
230 };
231 
232 // A writer delegate that writes to a given File.
233 class FileWriterDelegate : public WriterDelegate {
234  public:
235   // Constructs a FileWriterDelegate that manipulates |file|. The delegate will
236   // not own |file|, therefore the caller must guarantee |file| will outlive the
237   // delegate.
238   explicit FileWriterDelegate(base::File* file);
239 
240   // Constructs a FileWriterDelegate that takes ownership of |file|.
241   explicit FileWriterDelegate(std::unique_ptr<base::File> file);
242 
243   // Truncates the file to the number of bytes written.
244   ~FileWriterDelegate() override;
245 
246   // WriterDelegate methods:
247 
248   // Seeks to the beginning of the file, returning false if the seek fails.
249   bool PrepareOutput() override;
250 
251   // Writes |num_bytes| bytes of |data| to the file, returning false on error or
252   // if not all bytes could be written.
253   bool WriteBytes(const char* data, int num_bytes) override;
254 
255   // Sets the last-modified time of the data.
256   void SetTimeModified(const base::Time& time) override;
257 
258   // Return the actual size of the file.
file_length()259   int64_t file_length() { return file_length_; }
260 
261  private:
262   // The file the delegate modifies.
263   base::File* file_;
264 
265   // The delegate can optionally own the file it modifies, in which case
266   // owned_file_ is set and file_ is an alias for owned_file_.
267   std::unique_ptr<base::File> owned_file_;
268 
269   int64_t file_length_ = 0;
270 
271   DISALLOW_COPY_AND_ASSIGN(FileWriterDelegate);
272 };
273 
274 // A writer delegate that writes a file at a given path.
275 class FilePathWriterDelegate : public WriterDelegate {
276  public:
277   explicit FilePathWriterDelegate(const base::FilePath& output_file_path);
278   ~FilePathWriterDelegate() override;
279 
280   // WriterDelegate methods:
281 
282   // Creates the output file and any necessary intermediate directories.
283   bool PrepareOutput() override;
284 
285   // Writes |num_bytes| bytes of |data| to the file, returning false if not all
286   // bytes could be written.
287   bool WriteBytes(const char* data, int num_bytes) override;
288 
289   // Sets the last-modified time of the data.
290   void SetTimeModified(const base::Time& time) override;
291 
292  private:
293   base::FilePath output_file_path_;
294   base::File file_;
295 
296   DISALLOW_COPY_AND_ASSIGN(FilePathWriterDelegate);
297 };
298 
299 }  // namespace zip
300 
301 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
302