1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdint.h>
20 
21 #include <map>
22 #include <memory>
23 #include <string_view>
24 #include <utility>
25 
26 #include "zip_error.h"
27 
28 // This class is the interface of the central directory entries map. The map
29 // helps to locate a particular cd entry based on the filename.
30 class CdEntryMapInterface {
31  public:
32   virtual ~CdEntryMapInterface() = default;
33   // Adds an entry to the map. The |name| should internally points to the
34   // filename field of a cd entry. And |start| points to the beginning of the
35   // central directory. Returns 0 on success.
36   virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
37   // For the zip entry |entryName|, finds the offset of its filename field in
38   // the central directory. Returns a pair of [status, offset]. The value of
39   // the status is 0 on success.
40   virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
41                                                          const uint8_t* cd_start) const = 0;
42   // Resets the iterator to the beginning of the map.
43   virtual void ResetIteration() = 0;
44   // Returns the [name, cd offset] of the current element. Also increments the
45   // iterator to points to the next element. Returns an empty pair we have read
46   // past boundary.
47   virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
48 };
49 
50 /**
51  * More space efficient string representation of strings in an mmaped zipped
52  * file than std::string_view. Using std::string_view as an entry in the
53  * ZipArchive hash table wastes space. std::string_view stores a pointer to a
54  * string (on 64 bit, 8 bytes) and the length to read from that pointer,
55  * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
56  * 6 bytes.
57  *
58  * ZipStringOffset stores a 4 byte offset from a fixed location in the memory
59  * mapped file instead of the entire address, consuming 8 bytes with alignment.
60  */
61 struct ZipStringOffset {
62   uint32_t name_offset;
63   uint16_t name_length;
64 
ToStringViewZipStringOffset65   const std::string_view ToStringView(const uint8_t* start) const {
66     return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
67   }
68 };
69 
70 // This implementation of CdEntryMap uses an array hash table. It uses less
71 // memory than std::map; and it's used as the default implementation for zip
72 // archives without zip64 extension.
73 class CdEntryMapZip32 : public CdEntryMapInterface {
74  public:
75   static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
76 
77   ZipError AddToMap(std::string_view name, const uint8_t* start) override;
78   std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
79                                                  const uint8_t* cd_start) const override;
80   void ResetIteration() override;
81   std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
82 
83  private:
84   explicit CdEntryMapZip32(uint16_t num_entries);
85 
86   // We know how many entries are in the Zip archive, so we can have a
87   // fixed-size hash table. We define a load factor of 0.75 and over
88   // allocate so the maximum number entries can never be higher than
89   // ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
90   uint32_t hash_table_size_{0};
91   std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};
92 
93   // The position of element for the current iteration.
94   uint32_t current_position_{0};
95 };
96 
97 // This implementation of CdEntryMap uses a std::map
98 class CdEntryMapZip64 : public CdEntryMapInterface {
99  public:
100   static std::unique_ptr<CdEntryMapInterface> Create();
101 
102   ZipError AddToMap(std::string_view name, const uint8_t* start) override;
103   std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
104                                                  const uint8_t* cd_start) const override;
105   void ResetIteration() override;
106   std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
107 
108  private:
109   CdEntryMapZip64() = default;
110 
111   std::map<std::string_view, uint64_t> entry_table_;
112 
113   std::map<std::string_view, uint64_t>::iterator iterator_;
114 };
115