1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
4  *
5  ********************************************************************/
6 
7 /**
8  * This program demos string collation
9  */
10 
11 const char gHelpString[] =
12     "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
13     "-help            Display this message.\n"
14     "-locale name     ICU locale to use.  Default is en_US\n"
15     "-rules rule      Collation rules file (overrides locale)\n"
16     "-french          French accent ordering\n"
17     "-norm            Normalizing mode on\n"
18     "-shifted         Shifted mode\n"
19     "-lower           Lower case first\n"
20     "-upper           Upper case first\n"
21     "-case            Enable separate case level\n"
22     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
23 	"-source string   Source string\n"
24 	"-pattern string  Pattern string to look for in source\n"
25 	"-overlap         Enable searching to be done on overlapping patterns\n"
26 	"-canonical       Enable searching to be done matching canonical equivalent patterns"
27     "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
28 	"The format \\uXXXX is supported for the rules and comparison strings\n"
29 	;
30 
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 
35 #include <unicode/utypes.h>
36 #include <unicode/ucol.h>
37 #include <unicode/usearch.h>
38 #include <unicode/ustring.h>
39 
40 /**
41  * Command line option variables
42  *    These global variables are set according to the options specified
43  *    on the command line by the user.
44  */
45 char * opt_locale      = "en_US";
46 char * opt_rules       = 0;
47 UBool  opt_help        = FALSE;
48 UBool  opt_norm        = FALSE;
49 UBool  opt_french      = FALSE;
50 UBool  opt_shifted     = FALSE;
51 UBool  opt_lower       = FALSE;
52 UBool  opt_upper       = FALSE;
53 UBool  opt_case        = FALSE;
54 UBool  opt_overlap     = FALSE;
55 UBool  opt_canonical   = FALSE;
56 int    opt_level       = 0;
57 char * opt_source      = "International Components for Unicode";
58 char * opt_pattern     = "Unicode";
59 UCollator * collator   = 0;
60 UStringSearch * search = 0;
61 UChar rules[100];
62 UChar source[100];
63 UChar pattern[100];
64 
65 /**
66  * Definitions for the command line options
67  */
68 struct OptSpec {
69     const char *name;
70     enum {FLAG, NUM, STRING} type;
71     void *pVar;
72 };
73 
74 OptSpec opts[] = {
75     {"-locale",      OptSpec::STRING, &opt_locale},
76     {"-rules",       OptSpec::STRING, &opt_rules},
77 	{"-source",      OptSpec::STRING, &opt_source},
78     {"-pattern",     OptSpec::STRING, &opt_pattern},
79     {"-norm",        OptSpec::FLAG,   &opt_norm},
80     {"-french",      OptSpec::FLAG,   &opt_french},
81     {"-shifted",     OptSpec::FLAG,   &opt_shifted},
82     {"-lower",       OptSpec::FLAG,   &opt_lower},
83     {"-upper",       OptSpec::FLAG,   &opt_upper},
84     {"-case",        OptSpec::FLAG,   &opt_case},
85     {"-level",       OptSpec::NUM,    &opt_level},
86 	{"-overlap",     OptSpec::FLAG,   &opt_overlap},
87 	{"-canonical",   OptSpec::FLAG,   &opt_canonical},
88     {"-help",        OptSpec::FLAG,   &opt_help},
89     {"-?",           OptSpec::FLAG,   &opt_help},
90     {0, OptSpec::FLAG, 0}
91 };
92 
93 /**
94  * processOptions()  Function to read the command line options.
95  */
processOptions(int argc,const char ** argv,OptSpec opts[])96 UBool processOptions(int argc, const char **argv, OptSpec opts[])
97 {
98     for (int argNum = 1; argNum < argc; argNum ++) {
99         const char *pArgName = argv[argNum];
100         OptSpec *pOpt;
101         for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
102             if (strcmp(pOpt->name, pArgName) == 0) {
103                 switch (pOpt->type) {
104                 case OptSpec::FLAG:
105                     *(UBool *)(pOpt->pVar) = TRUE;
106                     break;
107                 case OptSpec::STRING:
108                     argNum ++;
109                     if (argNum >= argc) {
110                         fprintf(stderr, "value expected for \"%s\" option.\n",
111 							    pOpt->name);
112                         return FALSE;
113                     }
114                     *(const char **)(pOpt->pVar) = argv[argNum];
115                     break;
116                 case OptSpec::NUM:
117                     argNum ++;
118                     if (argNum >= argc) {
119                         fprintf(stderr, "value expected for \"%s\" option.\n",
120 							    pOpt->name);
121                         return FALSE;
122                     }
123                     char *endp;
124                     int i = strtol(argv[argNum], &endp, 0);
125                     if (endp == argv[argNum]) {
126                         fprintf(stderr,
127 							    "integer value expected for \"%s\" option.\n",
128 								pOpt->name);
129                         return FALSE;
130                     }
131                     *(int *)(pOpt->pVar) = i;
132                 }
133                 break;
134             }
135         }
136         if (pOpt->name == 0)
137         {
138             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
139             return FALSE;
140         }
141     }
142 	return TRUE;
143 }
144 
145 /**
146  * Creates a collator
147  */
processCollator()148 UBool processCollator()
149 {
150 	// Set up an ICU collator
151     UErrorCode status = U_ZERO_ERROR;
152 
153     if (opt_rules != 0) {
154 		u_unescape(opt_rules, rules, 100);
155         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
156 			                  NULL, &status);
157     }
158     else {
159         collator = ucol_open(opt_locale, &status);
160     }
161 	if (U_FAILURE(status)) {
162         fprintf(stderr, "Collator creation failed.: %d\n", status);
163         return FALSE;
164     }
165     if (status == U_USING_DEFAULT_WARNING) {
166         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
167 			    opt_locale);
168     }
169     if (status == U_USING_FALLBACK_WARNING) {
170         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
171 			    opt_locale);
172     }
173     if (opt_norm) {
174         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
175     }
176     if (opt_french) {
177         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
178     }
179     if (opt_lower) {
180         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
181 			              &status);
182     }
183     if (opt_upper) {
184         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
185 			              &status);
186     }
187     if (opt_case) {
188         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
189     }
190     if (opt_shifted) {
191         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
192 			              &status);
193     }
194     if (opt_level != 0) {
195         switch (opt_level) {
196         case 1:
197             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
198             break;
199         case 2:
200             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
201 				              &status);
202             break;
203         case 3:
204             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
205             break;
206         case 4:
207             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
208 				              &status);
209             break;
210         case 5:
211             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
212 				              &status);
213             break;
214         default:
215             fprintf(stderr, "-level param must be between 1 and 5\n");
216             return FALSE;
217         }
218     }
219     if (U_FAILURE(status)) {
220         fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
221         return FALSE;
222     }
223 	return TRUE;
224 }
225 
226 /**
227  * Creates a string search
228  */
processStringSearch()229 UBool processStringSearch()
230 {
231 	u_unescape(opt_source, source, 100);
232 	u_unescape(opt_pattern, pattern, 100);
233 	UErrorCode status = U_ZERO_ERROR;
234 	search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
235 		                              &status);
236 	if (U_FAILURE(status)) {
237 		return FALSE;
238 	}
239 	if (opt_overlap == TRUE) {
240 		usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
241 	}
242 	if (opt_canonical == TRUE) {
243 		usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
244 			                 &status);
245 	}
246 	if (U_FAILURE(status)) {
247 		fprintf(stderr, "Error setting search attributes\n");
248 		return FALSE;
249 	}
250 	return TRUE;
251 }
252 
findPattern()253 UBool findPattern()
254 {
255 	UErrorCode status = U_ZERO_ERROR;
256 	int32_t offset = usearch_next(search, &status);
257 	if (offset == USEARCH_DONE) {
258 		fprintf(stdout, "Pattern not found in source\n");
259 	}
260 	while (offset != USEARCH_DONE) {
261 		fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
262 				usearch_getMatchedLength(search));
263 		offset = usearch_next(search, &status);
264 	}
265 	if (U_FAILURE(status)) {
266 		fprintf(stderr, "Error in searching for pattern %d\n", status);
267 		return FALSE;
268 	}
269 	fprintf(stdout, "End of search\n");
270 	return TRUE;
271 }
272 
273 /**
274  * Main   --  process command line, read in and pre-process the test file,
275  *            call other functions to do the actual tests.
276  */
main(int argc,const char ** argv)277 int main(int argc, const char** argv)
278 {
279     if (processOptions(argc, argv, opts) != TRUE || opt_help) {
280         printf(gHelpString);
281         return -1;
282     }
283 
284     if (processCollator() != TRUE) {
285 		fprintf(stderr, "Error creating collator\n");
286 		return -1;
287 	}
288 
289 	if (processStringSearch() != TRUE) {
290 		fprintf(stderr, "Error creating string search\n");
291 		return -1;
292 	}
293 
294 	fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
295 		    opt_source);
296 
297 	findPattern();
298 	ucol_close(collator);
299 	usearch_close(search);
300 	return 0;
301 }
302