1 #include <algorithm>
2 #include <functional>
3 #include <utility>
4 
5 #include "tail.h"
6 
7 namespace marisa {
8 
Tail()9 Tail::Tail() : buf_() {}
10 
build(const Vector<String> & keys,Vector<UInt32> * offsets,int mode)11 void Tail::build(const Vector<String> &keys,
12     Vector<UInt32> *offsets, int mode) {
13   switch (mode) {
14     case MARISA_BINARY_TAIL: {
15       build_binary_tail(keys, offsets);
16       return;
17     }
18     case MARISA_TEXT_TAIL: {
19       if (!build_text_tail(keys, offsets)) {
20         build_binary_tail(keys, offsets);
21       }
22       return;
23     }
24     default: {
25       MARISA_THROW(MARISA_PARAM_ERROR);
26     }
27   }
28 }
29 
mmap(Mapper * mapper,const char * filename,long offset,int whence)30 void Tail::mmap(Mapper *mapper, const char *filename,
31     long offset, int whence) {
32   if (mapper == NULL) {
33     MARISA_THROW(MARISA_PARAM_ERROR);
34   }
35   Mapper temp_mapper;
36   temp_mapper.open(filename, offset, whence);
37   map(temp_mapper);
38   temp_mapper.swap(mapper);
39 }
40 
map(const void * ptr,std::size_t size)41 void Tail::map(const void *ptr, std::size_t size) {
42   Mapper mapper(ptr, size);
43   map(mapper);
44 }
45 
map(Mapper & mapper)46 void Tail::map(Mapper &mapper) {
47   Tail temp;
48   temp.buf_.map(mapper);
49   temp.swap(this);
50 }
51 
load(const char * filename,long offset,int whence)52 void Tail::load(const char *filename, long offset, int whence) {
53   Reader reader;
54   reader.open(filename, offset, whence);
55   read(reader);
56 }
57 
fread(::FILE * file)58 void Tail::fread(::FILE *file) {
59   Reader reader(file);
60   read(reader);
61 }
62 
read(int fd)63 void Tail::read(int fd) {
64   Reader reader(fd);
65   read(reader);
66 }
67 
read(std::istream & stream)68 void Tail::read(std::istream &stream) {
69   Reader reader(&stream);
70   read(reader);
71 }
72 
read(Reader & reader)73 void Tail::read(Reader &reader) {
74   Tail temp;
75   temp.buf_.read(reader);
76   temp.swap(this);
77 }
78 
save(const char * filename,bool trunc_flag,long offset,int whence) const79 void Tail::save(const char *filename, bool trunc_flag,
80     long offset, int whence) const {
81   Writer writer;
82   writer.open(filename, trunc_flag, offset, whence);
83   write(writer);
84 }
85 
fwrite(::FILE * file) const86 void Tail::fwrite(::FILE *file) const {
87   Writer writer(file);
88   write(writer);
89 }
90 
write(int fd) const91 void Tail::write(int fd) const {
92   Writer writer(fd);
93   write(writer);
94 }
95 
write(std::ostream & stream) const96 void Tail::write(std::ostream &stream) const {
97   Writer writer(&stream);
98   write(writer);
99 }
100 
write(Writer & writer) const101 void Tail::write(Writer &writer) const {
102   buf_.write(writer);
103 }
104 
clear()105 void Tail::clear() {
106   Tail().swap(this);
107 }
108 
swap(Tail * rhs)109 void Tail::swap(Tail *rhs) {
110   buf_.swap(&rhs->buf_);
111 }
112 
build_binary_tail(const Vector<String> & keys,Vector<UInt32> * offsets)113 void Tail::build_binary_tail(const Vector<String> &keys,
114     Vector<UInt32> *offsets) {
115   if (keys.empty()) {
116     build_empty_tail(offsets);
117     return;
118   }
119 
120   Vector<UInt8> buf;
121   buf.push_back('\0');
122 
123   Vector<UInt32> temp_offsets;
124   temp_offsets.resize(keys.size() + 1);
125 
126   for (std::size_t i = 0; i < keys.size(); ++i) {
127     temp_offsets[i] = (UInt32)buf.size();
128     for (std::size_t j = 0; j < keys[i].length(); ++j) {
129       buf.push_back(keys[i][j]);
130     }
131   }
132   temp_offsets.back() = (UInt32)buf.size();
133   buf.shrink();
134 
135   if (offsets != NULL) {
136     temp_offsets.swap(offsets);
137   }
138   buf_.swap(&buf);
139 }
140 
build_text_tail(const Vector<String> & keys,Vector<UInt32> * offsets)141 bool Tail::build_text_tail(const Vector<String> &keys,
142     Vector<UInt32> *offsets) {
143   if (keys.empty()) {
144     build_empty_tail(offsets);
145     return true;
146   }
147 
148   typedef std::pair<RString, UInt32> KeyIdPair;
149   Vector<KeyIdPair> pairs;
150   pairs.resize(keys.size());
151   for (std::size_t i = 0; i < keys.size(); ++i) {
152     for (std::size_t j = 0; j < keys[i].length(); ++j) {
153       if (keys[i][j] == '\0') {
154         return false;
155       }
156     }
157     pairs[i].first = RString(keys[i]);
158     pairs[i].second = (UInt32)i;
159   }
160   std::sort(pairs.begin(), pairs.end(), std::greater<KeyIdPair>());
161 
162   Vector<UInt8> buf;
163   buf.push_back('T');
164 
165   Vector<UInt32> temp_offsets;
166   temp_offsets.resize(pairs.size(), 1);
167 
168   const KeyIdPair dummy_key;
169   const KeyIdPair *last = &dummy_key;
170   for (std::size_t i = 0; i < pairs.size(); ++i) {
171     const KeyIdPair &cur = pairs[i];
172     std::size_t match = 0;
173     while ((match < cur.first.length()) && (match < last->first.length()) &&
174         last->first[match] == cur.first[match]) {
175       ++match;
176     }
177     if ((match == cur.first.length()) && (last->first.length() != 0)) {
178       temp_offsets[cur.second] = (UInt32)(temp_offsets[last->second]
179           + (last->first.length() - match));
180     } else {
181       temp_offsets[cur.second] = (UInt32)buf.size();
182       for (std::size_t j = 1; j <= cur.first.length(); ++j) {
183         buf.push_back(cur.first[cur.first.length() - j]);
184       }
185       buf.push_back('\0');
186     }
187     last = &cur;
188   }
189   buf.shrink();
190 
191   if (offsets != NULL) {
192     temp_offsets.swap(offsets);
193   }
194   buf_.swap(&buf);
195   return true;
196 }
197 
build_empty_tail(Vector<UInt32> * offsets)198 void Tail::build_empty_tail(Vector<UInt32> *offsets) {
199   buf_.clear();
200   if (offsets != NULL) {
201     offsets->clear();
202   }
203 }
204 
205 }  // namespace marisa
206