1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Allows memory-mapping a full file or a specific region within the file.
16 // It also supports efficiently switching the region being mapped.
17 //
18 // Note on Performance:
19 // It supports different optimized strategies for common patterns on both
20 // read-only and read-write files. This includes using read-ahead buffers for
21 // faster reads as well as background-sync vs manual-sync of changes to disk.
22 // For more details, see comments at MemoryMappedFile::Strategy.
23 //
24 // Usage:
25 //
26 // MemoryMappedFile mmapped_file(filesystem, "/file.pb", READ_WRITE_AUTO_SYNC));
27 // mmapped_file->Remap(0, 16* 1024);  // load the first 16K of the file.
28 //
29 // char read_byte = mmapped_file->region()[100];
30 // mmapped_file->mutable_region()[10] = write_byte;
31 //
32 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
33 // disk.
34 //
35 // mmapped_file->Remap(16*1024, 16* 1024);  // load the next 16K.
36 // mmapped_file->mutable_region()[10] = write_byte;
37 // mmapped_file.reset();
38 
39 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_
40 #define ICING_FILE_MEMORY_MAPPED_FILE_H_
41 
42 #include <unistd.h>
43 
44 #include <cstdint>
45 #include <memory>
46 #include <string>
47 #include <string_view>
48 
49 #include "icing/text_classifier/lib3/utils/base/status.h"
50 #include "icing/file/filesystem.h"
51 
52 namespace icing {
53 namespace lib {
54 
55 class MemoryMappedFile {
56  public:
system_page_size()57   static size_t __attribute__((const)) system_page_size() {
58     static const size_t page_size = sysconf(_SC_PAGE_SIZE);
59     return page_size;
60   }
61 
62   enum Strategy {
63     // Memory map a read-only file into a read-only memory region.
64     READ_ONLY,
65 
66     // Memory map a read-write file into a writable memory region. Any changes
67     // made to the region are automatically flushed to the underlying file in
68     // the background.
69     READ_WRITE_AUTO_SYNC,
70 
71     // Memory map a read-write file into a writable memory region. Changes made
72     // to this region will never be auto-synced to the underlying file. Unless
73     // the caller explicitly calls PersistToDisk(), all changes will be lost
74     // when the
75     // MemoryMappedFile is destroyed.
76     READ_WRITE_MANUAL_SYNC,
77   };
78 
79   // file_path : Full path of the file that needs to be memory-mapped.
80   MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path,
81                    Strategy mmap_strategy);
82   MemoryMappedFile(const MemoryMappedFile& other) = delete;
83   MemoryMappedFile(MemoryMappedFile&& other);
84   MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
85   MemoryMappedFile& operator=(MemoryMappedFile&& other);
86   // Frees any region that is still memory-mapped region.
87   ~MemoryMappedFile();
88 
89   // Memory-map the newly specified region within the file specified by
90   // file_offset and mmap_size. Unmaps any previously mmapped region.
91   //
92   // Returns any encountered IO error.
93   libtextclassifier3::Status Remap(size_t file_offset, size_t mmap_size);
94 
95   // unmap and free-up the region that has currently been memory mapped.
96   void Unmap();
97 
98   // Explicitly persist any changes made to the currently mapped region to disk.
99   //
100   // NOTE: This is only valid if Strategy=READ_WRITE was used.
101   //
102   // Returns:
103   //   OK on success
104   //   INTERNAL on I/O error
105   //   FAILED_PRECONDITION if Strategy is not implemented
106   libtextclassifier3::Status PersistToDisk();
107 
108   // Advise the system to help it optimize the memory-mapped region for
109   // upcoming read/write operations.
110   //
111   // NOTE: See linux documentation of madvise() for additional details.
112   enum AccessPattern {
113     // Future memory access are expected to be in random order. So, readhead
114     // will have limited impact on latency.
115     ACCESS_RANDOM,
116 
117     // Future memory access are expected to be sequential. So, some readahead
118     // can greatly improve latency.
119     ACCESS_SEQUENTIAL,
120 
121     // Future memory access is expected to be high-volume and all over the file.
122     // So, preloading the whole region into memory would greatly improve
123     // latency.
124     ACCESS_ALL,
125 
126     // Future memory access is expected to be rare. So, it is best to free up
127     // as much of preloaded memory as possible.
128     ACCESS_NONE,
129   };
130   libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
131 
132   // Accessors to the memory-mapped region. Returns null if nothing is mapped.
region()133   const char* region() const { return region_; }
mutable_region()134   char* mutable_region() { return region_; }
135 
region_size()136   size_t region_size() const { return region_size_; }
strategy()137   Strategy strategy() const { return strategy_; }
138 
139  private:
140   // Swaps the contents of this with other.
141   void Swap(MemoryMappedFile* other);
142 
143   // Cached constructor params.
144   const Filesystem* filesystem_;
145   std::string file_path_;
146   Strategy strategy_;
147 
148   // Offset within the file at which the current memory-mapped region starts.
149   size_t file_offset_ = 0;
150 
151   // Region that is currently memory-mapped.
152   char* region_ = nullptr;
153   size_t region_size_ = 0;
154 
155   // The actual size of the region we mmapped. As the requested region might not
156   // align with system pages, we often mmap more bytes than requested.
157   size_t adjusted_mmap_size_ = 0;
158 
159   // Raw pointer (or error) returned by calls to mmap().
160   void* mmap_result_ = nullptr;
161 };
162 
163 }  // namespace lib
164 }  // namespace icing
165 
166 #endif  // ICING_FILE_MEMORY_MAPPED_FILE_H_
167