1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved.
4 *
5 ********************************************************************/
6
7 /**
8 * This program demos string collation
9 */
10
11 const char gHelpString[] =
12 "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
13 "-help Display this message.\n"
14 "-locale name ICU locale to use. Default is en_US\n"
15 "-rules rule Collation rules file (overrides locale)\n"
16 "-french French accent ordering\n"
17 "-norm Normalizing mode on\n"
18 "-shifted Shifted mode\n"
19 "-lower Lower case first\n"
20 "-upper Upper case first\n"
21 "-case Enable separate case level\n"
22 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
23 "-source string Source string\n"
24 "-pattern string Pattern string to look for in source\n"
25 "-overlap Enable searching to be done on overlapping patterns\n"
26 "-canonical Enable searching to be done matching canonical equivalent patterns"
27 "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
28 "The format \\uXXXX is supported for the rules and comparison strings\n"
29 ;
30
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34
35 #include <unicode/utypes.h>
36 #include <unicode/ucol.h>
37 #include <unicode/usearch.h>
38 #include <unicode/ustring.h>
39
40 /**
41 * Command line option variables
42 * These global variables are set according to the options specified
43 * on the command line by the user.
44 */
45 char * opt_locale = "en_US";
46 char * opt_rules = 0;
47 UBool opt_help = FALSE;
48 UBool opt_norm = FALSE;
49 UBool opt_french = FALSE;
50 UBool opt_shifted = FALSE;
51 UBool opt_lower = FALSE;
52 UBool opt_upper = FALSE;
53 UBool opt_case = FALSE;
54 UBool opt_overlap = FALSE;
55 UBool opt_canonical = FALSE;
56 int opt_level = 0;
57 char * opt_source = "International Components for Unicode";
58 char * opt_pattern = "Unicode";
59 UCollator * collator = 0;
60 UStringSearch * search = 0;
61 UChar rules[100];
62 UChar source[100];
63 UChar pattern[100];
64
65 /**
66 * Definitions for the command line options
67 */
68 struct OptSpec {
69 const char *name;
70 enum {FLAG, NUM, STRING} type;
71 void *pVar;
72 };
73
74 OptSpec opts[] = {
75 {"-locale", OptSpec::STRING, &opt_locale},
76 {"-rules", OptSpec::STRING, &opt_rules},
77 {"-source", OptSpec::STRING, &opt_source},
78 {"-pattern", OptSpec::STRING, &opt_pattern},
79 {"-norm", OptSpec::FLAG, &opt_norm},
80 {"-french", OptSpec::FLAG, &opt_french},
81 {"-shifted", OptSpec::FLAG, &opt_shifted},
82 {"-lower", OptSpec::FLAG, &opt_lower},
83 {"-upper", OptSpec::FLAG, &opt_upper},
84 {"-case", OptSpec::FLAG, &opt_case},
85 {"-level", OptSpec::NUM, &opt_level},
86 {"-overlap", OptSpec::FLAG, &opt_overlap},
87 {"-canonical", OptSpec::FLAG, &opt_canonical},
88 {"-help", OptSpec::FLAG, &opt_help},
89 {"-?", OptSpec::FLAG, &opt_help},
90 {0, OptSpec::FLAG, 0}
91 };
92
93 /**
94 * processOptions() Function to read the command line options.
95 */
processOptions(int argc,const char ** argv,OptSpec opts[])96 UBool processOptions(int argc, const char **argv, OptSpec opts[])
97 {
98 for (int argNum = 1; argNum < argc; argNum ++) {
99 const char *pArgName = argv[argNum];
100 OptSpec *pOpt;
101 for (pOpt = opts; pOpt->name != 0; pOpt ++) {
102 if (strcmp(pOpt->name, pArgName) == 0) {
103 switch (pOpt->type) {
104 case OptSpec::FLAG:
105 *(UBool *)(pOpt->pVar) = TRUE;
106 break;
107 case OptSpec::STRING:
108 argNum ++;
109 if (argNum >= argc) {
110 fprintf(stderr, "value expected for \"%s\" option.\n",
111 pOpt->name);
112 return FALSE;
113 }
114 *(const char **)(pOpt->pVar) = argv[argNum];
115 break;
116 case OptSpec::NUM:
117 argNum ++;
118 if (argNum >= argc) {
119 fprintf(stderr, "value expected for \"%s\" option.\n",
120 pOpt->name);
121 return FALSE;
122 }
123 char *endp;
124 int i = strtol(argv[argNum], &endp, 0);
125 if (endp == argv[argNum]) {
126 fprintf(stderr,
127 "integer value expected for \"%s\" option.\n",
128 pOpt->name);
129 return FALSE;
130 }
131 *(int *)(pOpt->pVar) = i;
132 }
133 break;
134 }
135 }
136 if (pOpt->name == 0)
137 {
138 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
139 return FALSE;
140 }
141 }
142 return TRUE;
143 }
144
145 /**
146 * Creates a collator
147 */
processCollator()148 UBool processCollator()
149 {
150 // Set up an ICU collator
151 UErrorCode status = U_ZERO_ERROR;
152
153 if (opt_rules != 0) {
154 u_unescape(opt_rules, rules, 100);
155 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
156 NULL, &status);
157 }
158 else {
159 collator = ucol_open(opt_locale, &status);
160 }
161 if (U_FAILURE(status)) {
162 fprintf(stderr, "Collator creation failed.: %d\n", status);
163 return FALSE;
164 }
165 if (status == U_USING_DEFAULT_WARNING) {
166 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
167 opt_locale);
168 }
169 if (status == U_USING_FALLBACK_WARNING) {
170 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
171 opt_locale);
172 }
173 if (opt_norm) {
174 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
175 }
176 if (opt_french) {
177 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
178 }
179 if (opt_lower) {
180 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
181 &status);
182 }
183 if (opt_upper) {
184 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
185 &status);
186 }
187 if (opt_case) {
188 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
189 }
190 if (opt_shifted) {
191 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
192 &status);
193 }
194 if (opt_level != 0) {
195 switch (opt_level) {
196 case 1:
197 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
198 break;
199 case 2:
200 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
201 &status);
202 break;
203 case 3:
204 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
205 break;
206 case 4:
207 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
208 &status);
209 break;
210 case 5:
211 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
212 &status);
213 break;
214 default:
215 fprintf(stderr, "-level param must be between 1 and 5\n");
216 return FALSE;
217 }
218 }
219 if (U_FAILURE(status)) {
220 fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
221 return FALSE;
222 }
223 return TRUE;
224 }
225
226 /**
227 * Creates a string search
228 */
processStringSearch()229 UBool processStringSearch()
230 {
231 u_unescape(opt_source, source, 100);
232 u_unescape(opt_pattern, pattern, 100);
233 UErrorCode status = U_ZERO_ERROR;
234 search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
235 &status);
236 if (U_FAILURE(status)) {
237 return FALSE;
238 }
239 if (opt_overlap == TRUE) {
240 usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
241 }
242 if (opt_canonical == TRUE) {
243 usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
244 &status);
245 }
246 if (U_FAILURE(status)) {
247 fprintf(stderr, "Error setting search attributes\n");
248 return FALSE;
249 }
250 return TRUE;
251 }
252
findPattern()253 UBool findPattern()
254 {
255 UErrorCode status = U_ZERO_ERROR;
256 int32_t offset = usearch_next(search, &status);
257 if (offset == USEARCH_DONE) {
258 fprintf(stdout, "Pattern not found in source\n");
259 }
260 while (offset != USEARCH_DONE) {
261 fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
262 usearch_getMatchedLength(search));
263 offset = usearch_next(search, &status);
264 }
265 if (U_FAILURE(status)) {
266 fprintf(stderr, "Error in searching for pattern %d\n", status);
267 return FALSE;
268 }
269 fprintf(stdout, "End of search\n");
270 return TRUE;
271 }
272
273 /**
274 * Main -- process command line, read in and pre-process the test file,
275 * call other functions to do the actual tests.
276 */
main(int argc,const char ** argv)277 int main(int argc, const char** argv)
278 {
279 if (processOptions(argc, argv, opts) != TRUE || opt_help) {
280 printf(gHelpString);
281 return -1;
282 }
283
284 if (processCollator() != TRUE) {
285 fprintf(stderr, "Error creating collator\n");
286 return -1;
287 }
288
289 if (processStringSearch() != TRUE) {
290 fprintf(stderr, "Error creating string search\n");
291 return -1;
292 }
293
294 fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
295 opt_source);
296
297 findPattern();
298 ucol_close(collator);
299 usearch_close(search);
300 return 0;
301 }
302