1 #include <cstdlib>
2 #include <iostream>
3 #include <string>
4 
5 #include <marisa.h>
6 
7 #include "cmdopt.h"
8 
9 namespace {
10 
11 std::size_t max_num_results = 10;
12 bool mmap_flag = true;
13 
print_help(const char * cmd)14 void print_help(const char *cmd) {
15   std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
16       "Options:\n"
17       "  -n, --max-num-results=[N]  limit the number of results to N"
18       " (default: 10)\n"
19       "                             0: no limit\n"
20       "  -m, --mmap-dictionary  use memory-mapped I/O to load a dictionary"
21       " (default)\n"
22       "  -r, --read-dictionary  read an entire dictionary into memory\n"
23       "  -h, --help             print this help\n"
24       << std::endl;
25 }
26 
common_prefix_search(const char * const * args,std::size_t num_args)27 int common_prefix_search(const char * const *args, std::size_t num_args) {
28   if (num_args == 0) {
29     std::cerr << "error: dictionary is not specified" << std::endl;
30     return 10;
31   } else if (num_args > 1) {
32     std::cerr << "error: more than one dictionaries are specified"
33         << std::endl;
34     return 11;
35   }
36 
37   marisa::Trie trie;
38   if (mmap_flag) {
39     try {
40       trie.mmap(args[0]);
41     } catch (const marisa::Exception &ex) {
42       std::cerr << ex.what() << ": failed to mmap a dictionary file: "
43           << args[0] << std::endl;
44       return 20;
45     }
46   } else {
47     try {
48       trie.load(args[0]);
49     } catch (const marisa::Exception &ex) {
50       std::cerr << ex.what() << ": failed to load a dictionary file: "
51           << args[0] << std::endl;
52       return 21;
53     }
54   }
55 
56   marisa::Agent agent;
57   marisa::Keyset keyset;
58   std::string str;
59   while (std::getline(std::cin, str)) {
60     try {
61       agent.set_query(str.c_str(), str.length());
62       while (trie.common_prefix_search(agent)) {
63         keyset.push_back(agent.key());
64       }
65       if (keyset.empty()) {
66         std::cout << "not found" << std::endl;
67       } else {
68         std::cout << keyset.size() << " found" << std::endl;
69         const std::size_t end = std::min(max_num_results, keyset.size());
70         for (std::size_t i = 0; i < end; ++i) {
71           std::cout << keyset[i].id() << '\t';
72           std::cout.write(keyset[i].ptr(),
73               static_cast<std::streamsize>(keyset[i].length())) << '\t';
74           std::cout << str << '\n';
75         }
76       }
77       keyset.reset();
78     } catch (const marisa::Exception &ex) {
79       std::cerr << ex.what() << ": common_prefix_search() failed: "
80           << str << std::endl;
81       return 30;
82     }
83 
84     if (!std::cout) {
85       std::cerr << "error: failed to write results to standard output"
86           << std::endl;
87       return 31;
88     }
89   }
90 
91   return 0;
92 }
93 
94 }  // namespace
95 
main(int argc,char * argv[])96 int main(int argc, char *argv[]) {
97   std::ios::sync_with_stdio(false);
98 
99   ::cmdopt_option long_options[] = {
100     { "max-num-results", 1, NULL, 'n' },
101     { "mmap-dictionary", 0, NULL, 'm' },
102     { "read-dictionary", 0, NULL, 'r' },
103     { "help", 0, NULL, 'h' },
104     { NULL, 0, NULL, 0 }
105   };
106   ::cmdopt_t cmdopt;
107   ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options);
108   int label;
109   while ((label = ::cmdopt_get(&cmdopt)) != -1) {
110     switch (label) {
111       case 'n': {
112         char *end_of_value;
113         const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
114         if ((*end_of_value != '\0') || (value < 0)) {
115           std::cerr << "error: option `-n' with an invalid argument: "
116               << cmdopt.optarg << std::endl;
117         }
118         if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) {
119           max_num_results = MARISA_SIZE_MAX;
120         } else {
121           max_num_results = (std::size_t)value;
122         }
123         break;
124       }
125       case 'm': {
126         mmap_flag = true;
127         break;
128       }
129       case 'r': {
130         mmap_flag = false;
131         break;
132       }
133       case 'h': {
134         print_help(argv[0]);
135         return 0;
136       }
137       default: {
138         return 1;
139       }
140     }
141   }
142   return common_prefix_search(cmdopt.argv + cmdopt.optind,
143       static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
144 }
145