1 #ifndef MARISA_BASE_H_
2 #define MARISA_BASE_H_
3 
4 // Old Visual C++ does not provide stdint.h.
5 #ifndef _MSC_VER
6  #include <stdint.h>
7 #endif  // _MSC_VER
8 
9 #ifdef __cplusplus
10  #include <cstddef>
11 #else  // __cplusplus
12  #include <stddef.h>
13 #endif  // __cplusplus
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif  // __cplusplus
18 
19 #ifdef _MSC_VER
20 typedef unsigned __int8  marisa_uint8;
21 typedef unsigned __int16 marisa_uint16;
22 typedef unsigned __int32 marisa_uint32;
23 typedef unsigned __int64 marisa_uint64;
24 #else  // _MSC_VER
25 typedef uint8_t  marisa_uint8;
26 typedef uint16_t marisa_uint16;
27 typedef uint32_t marisa_uint32;
28 typedef uint64_t marisa_uint64;
29 #endif  // _MSC_VER
30 
31 #if defined(_WIN64) || defined(__amd64__) || defined(__x86_64__) || \
32     defined(__ia64__) || defined(__ppc64__) || defined(__powerpc64__) || \
33     defined(__sparc64__) || defined(__mips64__) || defined(__aarch64__) || \
34     defined(__s390x__)
35  #define MARISA_WORD_SIZE 64
36 #else  // defined(_WIN64), etc.
37  #define MARISA_WORD_SIZE 32
38 #endif  // defined(_WIN64), etc.
39 
40 //#define MARISA_WORD_SIZE  (sizeof(void *) * 8)
41 
42 #define MARISA_UINT8_MAX  ((marisa_uint8)~(marisa_uint8)0)
43 #define MARISA_UINT16_MAX ((marisa_uint16)~(marisa_uint16)0)
44 #define MARISA_UINT32_MAX ((marisa_uint32)~(marisa_uint32)0)
45 #define MARISA_UINT64_MAX ((marisa_uint64)~(marisa_uint64)0)
46 #define MARISA_SIZE_MAX   ((size_t)~(size_t)0)
47 
48 #define MARISA_INVALID_LINK_ID MARISA_UINT32_MAX
49 #define MARISA_INVALID_KEY_ID  MARISA_UINT32_MAX
50 #define MARISA_INVALID_EXTRA   (MARISA_UINT32_MAX >> 8)
51 
52 // Error codes are defined as members of marisa_error_code. This library throws
53 // an exception with one of the error codes when an error occurs.
54 typedef enum marisa_error_code_ {
55   // MARISA_OK means that a requested operation has succeeded. In practice, an
56   // exception never has MARISA_OK because it is not an error.
57   MARISA_OK           = 0,
58 
59   // MARISA_STATE_ERROR means that an object was not ready for a requested
60   // operation. For example, an operation to modify a fixed vector throws an
61   // exception with MARISA_STATE_ERROR.
62   MARISA_STATE_ERROR  = 1,
63 
64   // MARISA_NULL_ERROR means that an invalid NULL pointer has been given.
65   MARISA_NULL_ERROR   = 2,
66 
67   // MARISA_BOUND_ERROR means that an operation has tried to access an out of
68   // range address.
69   MARISA_BOUND_ERROR  = 3,
70 
71   // MARISA_RANGE_ERROR means that an out of range value has appeared in
72   // operation.
73   MARISA_RANGE_ERROR  = 4,
74 
75   // MARISA_CODE_ERROR means that an undefined code has appeared in operation.
76   MARISA_CODE_ERROR   = 5,
77 
78   // MARISA_RESET_ERROR means that a smart pointer has tried to reset itself.
79   MARISA_RESET_ERROR  = 6,
80 
81   // MARISA_SIZE_ERROR means that a size has exceeded a library limitation.
82   MARISA_SIZE_ERROR   = 7,
83 
84   // MARISA_MEMORY_ERROR means that a memory allocation has failed.
85   MARISA_MEMORY_ERROR = 8,
86 
87   // MARISA_IO_ERROR means that an I/O operation has failed.
88   MARISA_IO_ERROR     = 9,
89 
90   // MARISA_FORMAT_ERROR means that input was in invalid format.
91   MARISA_FORMAT_ERROR = 10,
92 } marisa_error_code;
93 
94 // Min/max values, flags and masks for dictionary settings are defined below.
95 // Please note that unspecified settings will be replaced with the default
96 // settings. For example, 0 is equivalent to (MARISA_DEFAULT_NUM_TRIES |
97 // MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER).
98 
99 // A dictionary consists of 3 tries in default. Usually more tries make a
100 // dictionary space-efficient but time-inefficient.
101 typedef enum marisa_num_tries_ {
102   MARISA_MIN_NUM_TRIES     = 0x00001,
103   MARISA_MAX_NUM_TRIES     = 0x0007F,
104   MARISA_DEFAULT_NUM_TRIES = 0x00003,
105 } marisa_num_tries;
106 
107 // This library uses a cache technique to accelerate search functions. The
108 // following enumerated type marisa_cache_level gives a list of available cache
109 // size options. A larger cache enables faster search but takes a more space.
110 typedef enum marisa_cache_level_ {
111   MARISA_HUGE_CACHE        = 0x00080,
112   MARISA_LARGE_CACHE       = 0x00100,
113   MARISA_NORMAL_CACHE      = 0x00200,
114   MARISA_SMALL_CACHE       = 0x00400,
115   MARISA_TINY_CACHE        = 0x00800,
116   MARISA_DEFAULT_CACHE     = MARISA_NORMAL_CACHE
117 } marisa_cache_level;
118 
119 // This library provides 2 kinds of TAIL implementations.
120 typedef enum marisa_tail_mode_ {
121   // MARISA_TEXT_TAIL merges last labels as zero-terminated strings. So, it is
122   // available if and only if the last labels do not contain a NULL character.
123   // If MARISA_TEXT_TAIL is specified and a NULL character exists in the last
124   // labels, the setting is automatically switched to MARISA_BINARY_TAIL.
125   MARISA_TEXT_TAIL         = 0x01000,
126 
127   // MARISA_BINARY_TAIL also merges last labels but as byte sequences. It uses
128   // a bit vector to detect the end of a sequence, instead of NULL characters.
129   // So, MARISA_BINARY_TAIL requires a larger space if the average length of
130   // labels is greater than 8.
131   MARISA_BINARY_TAIL       = 0x02000,
132 
133   MARISA_DEFAULT_TAIL      = MARISA_TEXT_TAIL,
134 } marisa_tail_mode;
135 
136 // The arrangement of nodes affects the time cost of matching and the order of
137 // predictive search.
138 typedef enum marisa_node_order_ {
139   // MARISA_LABEL_ORDER arranges nodes in ascending label order.
140   // MARISA_LABEL_ORDER is useful if an application needs to predict keys in
141   // label order.
142   MARISA_LABEL_ORDER       = 0x10000,
143 
144   // MARISA_WEIGHT_ORDER arranges nodes in descending weight order.
145   // MARISA_WEIGHT_ORDER is generally a better choice because it enables faster
146   // matching.
147   MARISA_WEIGHT_ORDER      = 0x20000,
148 
149   MARISA_DEFAULT_ORDER     = MARISA_WEIGHT_ORDER,
150 } marisa_node_order;
151 
152 typedef enum marisa_config_mask_ {
153   MARISA_NUM_TRIES_MASK    = 0x0007F,
154   MARISA_CACHE_LEVEL_MASK  = 0x00F80,
155   MARISA_TAIL_MODE_MASK    = 0x0F000,
156   MARISA_NODE_ORDER_MASK   = 0xF0000,
157   MARISA_CONFIG_MASK       = 0xFFFFF
158 } marisa_config_mask;
159 
160 #ifdef __cplusplus
161 }  // extern "C"
162 #endif  // __cplusplus
163 
164 #ifdef __cplusplus
165 
166 // `std::swap` is in <utility> since C++ 11 but in <algorithm> in C++ 98:
167 #if __cplusplus >= 201103L
168  #include <utility>
169 #else
170  #include <algorithm>
171 #endif
172 namespace marisa {
173 
174 typedef ::marisa_uint8  UInt8;
175 typedef ::marisa_uint16 UInt16;
176 typedef ::marisa_uint32 UInt32;
177 typedef ::marisa_uint64 UInt64;
178 
179 typedef ::marisa_error_code ErrorCode;
180 
181 typedef ::marisa_cache_level CacheLevel;
182 typedef ::marisa_tail_mode TailMode;
183 typedef ::marisa_node_order NodeOrder;
184 
185 using std::swap;
186 
187 }  // namespace marisa
188 #endif  // __cplusplus
189 
190 #ifdef __cplusplus
191  #include "marisa/exception.h"
192  #include "marisa/scoped-ptr.h"
193  #include "marisa/scoped-array.h"
194 #endif  // __cplusplus
195 
196 #endif  // MARISA_BASE_H_
197