1 #include <algorithm>
2 #include <functional>
3 #include <utility>
4
5 #include "tail.h"
6
7 namespace marisa {
8
Tail()9 Tail::Tail() : buf_() {}
10
build(const Vector<String> & keys,Vector<UInt32> * offsets,int mode)11 void Tail::build(const Vector<String> &keys,
12 Vector<UInt32> *offsets, int mode) {
13 switch (mode) {
14 case MARISA_BINARY_TAIL: {
15 build_binary_tail(keys, offsets);
16 return;
17 }
18 case MARISA_TEXT_TAIL: {
19 if (!build_text_tail(keys, offsets)) {
20 build_binary_tail(keys, offsets);
21 }
22 return;
23 }
24 default: {
25 MARISA_THROW(MARISA_PARAM_ERROR);
26 }
27 }
28 }
29
mmap(Mapper * mapper,const char * filename,long offset,int whence)30 void Tail::mmap(Mapper *mapper, const char *filename,
31 long offset, int whence) {
32 if (mapper == NULL) {
33 MARISA_THROW(MARISA_PARAM_ERROR);
34 }
35 Mapper temp_mapper;
36 temp_mapper.open(filename, offset, whence);
37 map(temp_mapper);
38 temp_mapper.swap(mapper);
39 }
40
map(const void * ptr,std::size_t size)41 void Tail::map(const void *ptr, std::size_t size) {
42 Mapper mapper(ptr, size);
43 map(mapper);
44 }
45
map(Mapper & mapper)46 void Tail::map(Mapper &mapper) {
47 Tail temp;
48 temp.buf_.map(mapper);
49 temp.swap(this);
50 }
51
load(const char * filename,long offset,int whence)52 void Tail::load(const char *filename, long offset, int whence) {
53 Reader reader;
54 reader.open(filename, offset, whence);
55 read(reader);
56 }
57
fread(::FILE * file)58 void Tail::fread(::FILE *file) {
59 Reader reader(file);
60 read(reader);
61 }
62
read(int fd)63 void Tail::read(int fd) {
64 Reader reader(fd);
65 read(reader);
66 }
67
read(std::istream & stream)68 void Tail::read(std::istream &stream) {
69 Reader reader(&stream);
70 read(reader);
71 }
72
read(Reader & reader)73 void Tail::read(Reader &reader) {
74 Tail temp;
75 temp.buf_.read(reader);
76 temp.swap(this);
77 }
78
save(const char * filename,bool trunc_flag,long offset,int whence) const79 void Tail::save(const char *filename, bool trunc_flag,
80 long offset, int whence) const {
81 Writer writer;
82 writer.open(filename, trunc_flag, offset, whence);
83 write(writer);
84 }
85
fwrite(::FILE * file) const86 void Tail::fwrite(::FILE *file) const {
87 Writer writer(file);
88 write(writer);
89 }
90
write(int fd) const91 void Tail::write(int fd) const {
92 Writer writer(fd);
93 write(writer);
94 }
95
write(std::ostream & stream) const96 void Tail::write(std::ostream &stream) const {
97 Writer writer(&stream);
98 write(writer);
99 }
100
write(Writer & writer) const101 void Tail::write(Writer &writer) const {
102 buf_.write(writer);
103 }
104
clear()105 void Tail::clear() {
106 Tail().swap(this);
107 }
108
swap(Tail * rhs)109 void Tail::swap(Tail *rhs) {
110 buf_.swap(&rhs->buf_);
111 }
112
build_binary_tail(const Vector<String> & keys,Vector<UInt32> * offsets)113 void Tail::build_binary_tail(const Vector<String> &keys,
114 Vector<UInt32> *offsets) {
115 if (keys.empty()) {
116 build_empty_tail(offsets);
117 return;
118 }
119
120 Vector<UInt8> buf;
121 buf.push_back('\0');
122
123 Vector<UInt32> temp_offsets;
124 temp_offsets.resize(keys.size() + 1);
125
126 for (std::size_t i = 0; i < keys.size(); ++i) {
127 temp_offsets[i] = (UInt32)buf.size();
128 for (std::size_t j = 0; j < keys[i].length(); ++j) {
129 buf.push_back(keys[i][j]);
130 }
131 }
132 temp_offsets.back() = (UInt32)buf.size();
133 buf.shrink();
134
135 if (offsets != NULL) {
136 temp_offsets.swap(offsets);
137 }
138 buf_.swap(&buf);
139 }
140
build_text_tail(const Vector<String> & keys,Vector<UInt32> * offsets)141 bool Tail::build_text_tail(const Vector<String> &keys,
142 Vector<UInt32> *offsets) {
143 if (keys.empty()) {
144 build_empty_tail(offsets);
145 return true;
146 }
147
148 typedef std::pair<RString, UInt32> KeyIdPair;
149 Vector<KeyIdPair> pairs;
150 pairs.resize(keys.size());
151 for (std::size_t i = 0; i < keys.size(); ++i) {
152 for (std::size_t j = 0; j < keys[i].length(); ++j) {
153 if (keys[i][j] == '\0') {
154 return false;
155 }
156 }
157 pairs[i].first = RString(keys[i]);
158 pairs[i].second = (UInt32)i;
159 }
160 std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>());
161
162 Vector<UInt8> buf;
163 buf.push_back('T');
164
165 Vector<UInt32> temp_offsets;
166 temp_offsets.resize(pairs.size(), 1);
167
168 const KeyIdPair dummy_key;
169 const KeyIdPair *last = &dummy_key;
170 for (std::size_t i = 0; i < pairs.size(); ++i) {
171 const KeyIdPair &cur = pairs[i];
172 std::size_t match = 0;
173 while ((match < cur.first.length()) && (match < last->first.length()) &&
174 last->first[match] == cur.first[match]) {
175 ++match;
176 }
177 if ((match == cur.first.length()) && (last->first.length() != 0)) {
178 temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second]
179 + (last->first.length() - match));
180 } else {
181 temp_offsets[cur.second] = (UInt32)buf.size();
182 for (std::size_t j = 1; j <= cur.first.length(); ++j) {
183 buf.push_back(cur.first[cur.first.length() - j]);
184 }
185 buf.push_back('\0');
186 }
187 last = &cur;
188 }
189 buf.shrink();
190
191 if (offsets != NULL) {
192 temp_offsets.swap(offsets);
193 }
194 buf_.swap(&buf);
195 return true;
196 }
197
build_empty_tail(Vector<UInt32> * offsets)198 void Tail::build_empty_tail(Vector<UInt32> *offsets) {
199 buf_.clear();
200 if (offsets != NULL) {
201 offsets->clear();
202 }
203 }
204
205 } // namespace marisa
206