1 #include <cstdlib>
2 #include <ctime>
3 #include <sstream>
4 #include <string>
5 #include <vector>
6 
7 #include <marisa/grimoire/vector/pop-count.h>
8 #include <marisa/grimoire/vector/rank-index.h>
9 #include <marisa/grimoire/vector.h>
10 
11 #include "marisa-assert.h"
12 
13 namespace {
14 
15 #if MARISA_WORD_SIZE == 64
TestPopCount()16 void TestPopCount() {
17   TEST_START();
18 
19   {
20     marisa::grimoire::vector::PopCount count(0);
21     ASSERT(count.lo8() == 0);
22     ASSERT(count.lo16() == 0);
23     ASSERT(count.lo24() == 0);
24     ASSERT(count.lo32() == 0);
25     ASSERT(count.lo40() == 0);
26     ASSERT(count.lo48() == 0);
27     ASSERT(count.lo56() == 0);
28     ASSERT(count.lo64() == 0);
29   }
30 
31   {
32     marisa::grimoire::vector::PopCount count(0xFFFFFFFFFFFFFFFFULL);
33     ASSERT(count.lo8() == 8);
34     ASSERT(count.lo16() == 16);
35     ASSERT(count.lo24() == 24);
36     ASSERT(count.lo32() == 32);
37     ASSERT(count.lo40() == 40);
38     ASSERT(count.lo48() == 48);
39     ASSERT(count.lo56() == 56);
40     ASSERT(count.lo64() == 64);
41   }
42 
43   {
44     marisa::grimoire::vector::PopCount count(0xFF7F3F1F0F070301ULL);
45     ASSERT(count.lo8() == 1);
46     ASSERT(count.lo16() == 3);
47     ASSERT(count.lo24() == 6);
48     ASSERT(count.lo32() == 10);
49     ASSERT(count.lo40() == 15);
50     ASSERT(count.lo48() == 21);
51     ASSERT(count.lo56() == 28);
52     ASSERT(count.lo64() == 36);
53   }
54 
55   TEST_END();
56 }
57 #else  // MARISA_WORD_SIZE == 64
58 void TestPopCount() {
59   TEST_START();
60 
61   {
62     marisa::grimoire::vector::PopCount count(0);
63     ASSERT(count.lo8() == 0);
64     ASSERT(count.lo16() == 0);
65     ASSERT(count.lo24() == 0);
66     ASSERT(count.lo32() == 0);
67   }
68 
69   {
70     marisa::grimoire::vector::PopCount count(0xFFFFFFFFU);
71     ASSERT(count.lo8() == 8);
72     ASSERT(count.lo16() == 16);
73     ASSERT(count.lo24() == 24);
74     ASSERT(count.lo32() == 32);
75   }
76 
77   {
78     marisa::grimoire::vector::PopCount count(0xFF3F0F03U);
79     ASSERT(count.lo8() == 2);
80     ASSERT(count.lo16() == 6);
81     ASSERT(count.lo24() == 12);
82     ASSERT(count.lo32() == 20);
83   }
84 
85   TEST_END();
86 }
87 #endif  // MARISA_WORD_SIZE == 64
88 
TestRankIndex()89 void TestRankIndex() {
90   TEST_START();
91 
92   marisa::grimoire::vector::RankIndex rank;
93 
94   ASSERT(rank.abs() == 0);
95   ASSERT(rank.rel1() == 0);
96   ASSERT(rank.rel2() == 0);
97   ASSERT(rank.rel3() == 0);
98   ASSERT(rank.rel4() == 0);
99   ASSERT(rank.rel5() == 0);
100   ASSERT(rank.rel6() == 0);
101   ASSERT(rank.rel7() == 0);
102 
103   rank.set_abs(10000);
104   rank.set_rel1(64);
105   rank.set_rel2(128);
106   rank.set_rel3(192);
107   rank.set_rel4(256);
108   rank.set_rel5(320);
109   rank.set_rel6(384);
110   rank.set_rel7(448);
111 
112   ASSERT(rank.abs() == 10000);
113   ASSERT(rank.rel1() == 64);
114   ASSERT(rank.rel2() == 128);
115   ASSERT(rank.rel3() == 192);
116   ASSERT(rank.rel4() == 256);
117   ASSERT(rank.rel5() == 320);
118   ASSERT(rank.rel6() == 384);
119   ASSERT(rank.rel7() == 448);
120 
121   TEST_END();
122 }
123 
TestVector()124 void TestVector() {
125   TEST_START();
126 
127   std::vector<int> values;
128   for (std::size_t i = 0; i < 10000; ++i) {
129     values.push_back(std::rand());
130   }
131 
132   marisa::grimoire::Vector<int> vec;
133 
134   ASSERT(vec.max_size() == (MARISA_SIZE_MAX / sizeof(int)));
135   ASSERT(vec.size() == 0);
136   ASSERT(vec.capacity() == 0);
137   ASSERT(!vec.fixed());
138   ASSERT(vec.empty());
139   ASSERT(vec.total_size() == 0);
140   ASSERT(vec.io_size() == sizeof(marisa::UInt64));
141 
142   for (std::size_t i = 0; i < values.size(); ++i) {
143     vec.push_back(values[i]);
144     ASSERT(vec[i] == values[i]);
145     ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec)[i]
146         == values[i]);
147   }
148 
149   ASSERT(vec.size() == values.size());
150   ASSERT(vec.capacity() >= vec.size());
151   ASSERT(!vec.empty());
152   ASSERT(vec.total_size() == (sizeof(int) * values.size()));
153   ASSERT(vec.io_size() == sizeof(marisa::UInt64)
154       + ((sizeof(int) * values.size())));
155 
156   ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec).front()
157       == values.front());
158   ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec).back()
159       == values.back());
160   ASSERT(vec.front() == values.front());
161   ASSERT(vec.back() == values.back());
162 
163   vec.shrink();
164 
165   ASSERT(vec.size() == values.size());
166   ASSERT(vec.capacity() == vec.size());
167   for (std::size_t i = 0; i < values.size(); ++i) {
168     ASSERT(vec[i] == values[i]);
169     ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec)[i]
170         == values[i]);
171   }
172 
173   {
174     marisa::grimoire::Writer writer;
175     writer.open("vector-test.dat");
176     vec.write(writer);
177   }
178   vec.clear();
179 
180   ASSERT(vec.empty());
181   ASSERT(vec.capacity() == 0);
182 
183   {
184     marisa::grimoire::Mapper mapper;
185     mapper.open("vector-test.dat");
186     vec.map(mapper);
187 
188     ASSERT(vec.size() == values.size());
189     ASSERT(vec.capacity() == 0);
190     ASSERT(vec.fixed());
191     ASSERT(!vec.empty());
192     ASSERT(vec.total_size() == (sizeof(int) * values.size()));
193     ASSERT(vec.io_size() == sizeof(marisa::UInt64)
194         + ((sizeof(int) * values.size())));
195 
196     for (std::size_t i = 0; i < values.size(); ++i) {
197       ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec)[i]
198           == values[i]);
199     }
200 
201     vec.clear();
202   }
203 
204   {
205     marisa::grimoire::Reader reader;
206     reader.open("vector-test.dat");
207     vec.read(reader);
208   }
209 
210   ASSERT(vec.size() == values.size());
211   ASSERT(vec.capacity() == vec.size());
212   ASSERT(!vec.fixed());
213   ASSERT(!vec.empty());
214   ASSERT(vec.total_size() == (sizeof(int) * values.size()));
215   ASSERT(vec.io_size() == sizeof(marisa::UInt64)
216       + ((sizeof(int) * values.size())));
217 
218   for (std::size_t i = 0; i < values.size(); ++i) {
219     ASSERT(vec[i] == values[i]);
220     ASSERT(static_cast<const marisa::grimoire::Vector<int> &>(vec)[i]
221         == values[i]);
222   }
223 
224   vec.clear();
225 
226   vec.push_back(0);
227   ASSERT(vec.capacity() == 1);
228   vec.push_back(1);
229   ASSERT(vec.capacity() == 2);
230   vec.push_back(2);
231   ASSERT(vec.capacity() == 4);
232   vec.resize(5);
233   ASSERT(vec.capacity() == 8);
234   vec.resize(100);
235   ASSERT(vec.capacity() == 100);
236 
237   EXCEPT(vec.resize(MARISA_SIZE_MAX), MARISA_SIZE_ERROR);
238 
239   vec.fix();
240   ASSERT(vec.fixed());
241   EXCEPT(vec.fix(), MARISA_STATE_ERROR);
242   EXCEPT(vec.push_back(0), MARISA_STATE_ERROR);
243   EXCEPT(vec.resize(0), MARISA_STATE_ERROR);
244   EXCEPT(vec.reserve(0), MARISA_STATE_ERROR);
245 
246   TEST_END();
247 }
248 
TestFlatVector()249 void TestFlatVector() {
250   TEST_START();
251 
252   marisa::grimoire::FlatVector vec;
253 
254   ASSERT(vec.value_size() == 0);
255   ASSERT(vec.mask() == 0);
256   ASSERT(vec.size() == 0);
257   ASSERT(vec.empty());
258   ASSERT(vec.total_size() == 0);
259   ASSERT(vec.io_size() == (sizeof(marisa::UInt64) * 3));
260 
261   marisa::grimoire::Vector<marisa::UInt32> values;
262   vec.build(values);
263 
264   ASSERT(vec.value_size() == 0);
265   ASSERT(vec.mask() == 0);
266   ASSERT(vec.size() == 0);
267   ASSERT(vec.empty());
268   ASSERT(vec.total_size() == 0);
269   ASSERT(vec.io_size() == (sizeof(marisa::UInt64) * 3));
270 
271   values.push_back(0);
272   vec.build(values);
273 
274   ASSERT(vec.value_size() == 0);
275   ASSERT(vec.mask() == 0);
276   ASSERT(vec.size() == 1);
277   ASSERT(!vec.empty());
278   ASSERT(vec.total_size() == 8);
279   ASSERT(vec.io_size() == (sizeof(marisa::UInt64) * 4));
280   ASSERT(vec[0] == 0);
281 
282   values.push_back(255);
283   vec.build(values);
284 
285   ASSERT(vec.value_size() == 8);
286   ASSERT(vec.mask() == 0xFF);
287   ASSERT(vec.size() == 2);
288   ASSERT(vec[0] == 0);
289   ASSERT(vec[1] == 255);
290 
291   values.push_back(65536);
292   vec.build(values);
293 
294   ASSERT(vec.value_size() == 17);
295   ASSERT(vec.mask() == 0x1FFFF);
296   ASSERT(vec.size() == 3);
297   ASSERT(vec[0] == 0);
298   ASSERT(vec[1] == 255);
299   ASSERT(vec[2] == 65536);
300 
301   {
302     marisa::grimoire::Writer writer;
303     writer.open("vector-test.dat");
304     vec.write(writer);
305   }
306 
307   vec.clear();
308 
309   ASSERT(vec.value_size() == 0);
310   ASSERT(vec.mask() == 0);
311   ASSERT(vec.size() == 0);
312 
313   {
314     marisa::grimoire::Mapper mapper;
315     mapper.open("vector-test.dat");
316     vec.map(mapper);
317 
318     ASSERT(vec.value_size() == 17);
319     ASSERT(vec.mask() == 0x1FFFF);
320     ASSERT(vec.size() == 3);
321     ASSERT(vec[0] == 0);
322     ASSERT(vec[1] == 255);
323     ASSERT(vec[2] == 65536);
324 
325     vec.clear();
326   }
327 
328   {
329     marisa::grimoire::Reader reader;
330     reader.open("vector-test.dat");
331     vec.read(reader);
332   }
333 
334   ASSERT(vec.value_size() == 17);
335   ASSERT(vec.mask() == 0x1FFFF);
336   ASSERT(vec.size() == 3);
337   ASSERT(vec[0] == 0);
338   ASSERT(vec[1] == 255);
339   ASSERT(vec[2] == 65536);
340 
341   values.clear();
342   for (std::size_t i = 0; i < 10000; ++i) {
343     values.push_back(static_cast<marisa::UInt32>(std::rand()));
344   }
345   vec.build(values);
346 
347   ASSERT(vec.size() == values.size());
348   for (std::size_t i = 0; i < vec.size(); ++i) {
349     ASSERT(vec[i] == values[i]);
350   }
351 
352   TEST_END();
353 }
354 
TestBitVector(std::size_t size)355 void TestBitVector(std::size_t size) {
356   marisa::grimoire::BitVector bv;
357 
358   ASSERT(bv.size() == 0);
359   ASSERT(bv.empty());
360   ASSERT(bv.total_size() == 0);
361   ASSERT(bv.io_size() == sizeof(marisa::UInt64) * 5);
362 
363   std::vector<bool> bits(size);
364   std::vector<std::size_t> zeros, ones;
365   for (std::size_t i = 0; i < size; ++i) {
366     const bool bit = (std::rand() % 2) == 0;
367     bits[i] = bit;
368     bv.push_back(bit);
369     (bit ? ones : zeros).push_back(i);
370     ASSERT(bv[i] == bits[i]);
371   }
372 
373   ASSERT(bv.size() == bits.size());
374   ASSERT((size == 0) || !bv.empty());
375 
376   bv.build(true, true);
377 
378   std::size_t num_zeros = 0, num_ones = 0;
379   for (std::size_t i = 0; i < bits.size(); ++i) {
380     ASSERT(bv[i] == bits[i]);
381     ASSERT(bv.rank0(i) == num_zeros);
382     ASSERT(bv.rank1(i) == num_ones);
383     ++(bv[i] ? num_ones : num_zeros);
384   }
385   for (std::size_t i = 0; i < zeros.size(); ++i) {
386     ASSERT(bv.select0(i) == zeros[i]);
387   }
388   for (std::size_t i = 0; i < ones.size(); ++i) {
389     ASSERT(bv.select1(i) == ones[i]);
390   }
391   ASSERT(bv.num_0s() == num_zeros);
392   ASSERT(bv.num_1s() == num_ones);
393 
394   std::stringstream stream;
395   {
396     marisa::grimoire::Writer writer;
397     writer.open(stream);
398     bv.write(writer);
399   }
400 
401   bv.clear();
402 
403   ASSERT(bv.size() == 0);
404   ASSERT(bv.empty());
405   ASSERT(bv.total_size() == 0);
406   ASSERT(bv.io_size() == sizeof(marisa::UInt64) * 5);
407 
408   {
409     marisa::grimoire::Reader reader;
410     reader.open(stream);
411     bv.read(reader);
412   }
413 
414   ASSERT(bv.size() == bits.size());
415 
416   num_zeros = 0, num_ones = 0;
417   for (std::size_t i = 0; i < bits.size(); ++i) {
418     ASSERT(bv[i] == bits[i]);
419     ASSERT(bv.rank0(i) == num_zeros);
420     ASSERT(bv.rank1(i) == num_ones);
421     ++(bv[i] ? num_ones : num_zeros);
422   }
423   for (std::size_t i = 0; i < zeros.size(); ++i) {
424     ASSERT(bv.select0(i) == zeros[i]);
425   }
426   for (std::size_t i = 0; i < ones.size(); ++i) {
427     ASSERT(bv.select1(i) == ones[i]);
428   }
429   ASSERT(bv.num_0s() == num_zeros);
430   ASSERT(bv.num_1s() == num_ones);
431 }
432 
TestBitVector()433 void TestBitVector() {
434   TEST_START();
435 
436   TestBitVector(0);
437   TestBitVector(1);
438   TestBitVector(511);
439   TestBitVector(512);
440   TestBitVector(513);
441 
442   for (int i = 0; i < 100; ++i) {
443     TestBitVector(std::rand() % 4096);
444   }
445 
446   TEST_END();
447 }
448 
449 }  // namespace
450 
main()451 int main() try {
452   std::srand((unsigned int)std::time(NULL));
453 
454   TestPopCount();
455   TestPopCount();
456   TestRankIndex();
457 
458   TestVector();
459   TestFlatVector();
460   TestBitVector();
461 
462   return 0;
463 } catch (const marisa::Exception &ex) {
464   std::cerr << ex.what() << std::endl;
465   throw;
466 }
467