1 /*************************************************************************
2  * Copyright (C) 2016 and later: Unicode, Inc. and others.
3  * License & terms of use: http://www.unicode.org/copyright.html#License
4  *
5  *************************************************************************
6  *************************************************************************
7  * COPYRIGHT:
8  * Copyright (C) 2002-2006 IBM, Inc.   All Rights Reserved.
9  *
10  *************************************************************************/
11 
12 /**
13  * This program demos string collation
14  */
15 
16 const char gHelpString[] =
17     "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
18     "-help            Display this message.\n"
19     "-locale name     ICU locale to use.  Default is en_US\n"
20     "-rules rule      Collation rules file (overrides locale)\n"
21     "-french          French accent ordering\n"
22     "-norm            Normalizing mode on\n"
23     "-shifted         Shifted mode\n"
24     "-lower           Lower case first\n"
25     "-upper           Upper case first\n"
26     "-case            Enable separate case level\n"
27     "-level n         Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
28 	"-source string   Source string\n"
29 	"-pattern string  Pattern string to look for in source\n"
30 	"-overlap         Enable searching to be done on overlapping patterns\n"
31 	"-canonical       Enable searching to be done matching canonical equivalent patterns"
32     "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
33 	"The format \\uXXXX is supported for the rules and comparison strings\n"
34 	;
35 
36 #include <stdio.h>
37 #include <string.h>
38 #include <stdlib.h>
39 
40 #include <unicode/utypes.h>
41 #include <unicode/ucol.h>
42 #include <unicode/usearch.h>
43 #include <unicode/ustring.h>
44 
45 /**
46  * Command line option variables
47  *    These global variables are set according to the options specified
48  *    on the command line by the user.
49  */
50 char * opt_locale      = "en_US";
51 char * opt_rules       = 0;
52 UBool  opt_help        = FALSE;
53 UBool  opt_norm        = FALSE;
54 UBool  opt_french      = FALSE;
55 UBool  opt_shifted     = FALSE;
56 UBool  opt_lower       = FALSE;
57 UBool  opt_upper       = FALSE;
58 UBool  opt_case        = FALSE;
59 UBool  opt_overlap     = FALSE;
60 UBool  opt_canonical   = FALSE;
61 int    opt_level       = 0;
62 char * opt_source      = "International Components for Unicode";
63 char * opt_pattern     = "Unicode";
64 UCollator * collator   = 0;
65 UStringSearch * search = 0;
66 UChar rules[100];
67 UChar source[100];
68 UChar pattern[100];
69 
70 /**
71  * Definitions for the command line options
72  */
73 struct OptSpec {
74     const char *name;
75     enum {FLAG, NUM, STRING} type;
76     void *pVar;
77 };
78 
79 OptSpec opts[] = {
80     {"-locale",      OptSpec::STRING, &opt_locale},
81     {"-rules",       OptSpec::STRING, &opt_rules},
82 	{"-source",      OptSpec::STRING, &opt_source},
83     {"-pattern",     OptSpec::STRING, &opt_pattern},
84     {"-norm",        OptSpec::FLAG,   &opt_norm},
85     {"-french",      OptSpec::FLAG,   &opt_french},
86     {"-shifted",     OptSpec::FLAG,   &opt_shifted},
87     {"-lower",       OptSpec::FLAG,   &opt_lower},
88     {"-upper",       OptSpec::FLAG,   &opt_upper},
89     {"-case",        OptSpec::FLAG,   &opt_case},
90     {"-level",       OptSpec::NUM,    &opt_level},
91 	{"-overlap",     OptSpec::FLAG,   &opt_overlap},
92 	{"-canonical",   OptSpec::FLAG,   &opt_canonical},
93     {"-help",        OptSpec::FLAG,   &opt_help},
94     {"-?",           OptSpec::FLAG,   &opt_help},
95     {0, OptSpec::FLAG, 0}
96 };
97 
98 /**
99  * processOptions()  Function to read the command line options.
100  */
processOptions(int argc,const char ** argv,OptSpec opts[])101 UBool processOptions(int argc, const char **argv, OptSpec opts[])
102 {
103     for (int argNum = 1; argNum < argc; argNum ++) {
104         const char *pArgName = argv[argNum];
105         OptSpec *pOpt;
106         for (pOpt = opts;  pOpt->name != 0; pOpt ++) {
107             if (strcmp(pOpt->name, pArgName) == 0) {
108                 switch (pOpt->type) {
109                 case OptSpec::FLAG:
110                     *(UBool *)(pOpt->pVar) = TRUE;
111                     break;
112                 case OptSpec::STRING:
113                     argNum ++;
114                     if (argNum >= argc) {
115                         fprintf(stderr, "value expected for \"%s\" option.\n",
116 							    pOpt->name);
117                         return FALSE;
118                     }
119                     *(const char **)(pOpt->pVar) = argv[argNum];
120                     break;
121                 case OptSpec::NUM:
122                     argNum ++;
123                     if (argNum >= argc) {
124                         fprintf(stderr, "value expected for \"%s\" option.\n",
125 							    pOpt->name);
126                         return FALSE;
127                     }
128                     char *endp;
129                     int i = strtol(argv[argNum], &endp, 0);
130                     if (endp == argv[argNum]) {
131                         fprintf(stderr,
132 							    "integer value expected for \"%s\" option.\n",
133 								pOpt->name);
134                         return FALSE;
135                     }
136                     *(int *)(pOpt->pVar) = i;
137                 }
138                 break;
139             }
140         }
141         if (pOpt->name == 0)
142         {
143             fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
144             return FALSE;
145         }
146     }
147 	return TRUE;
148 }
149 
150 /**
151  * Creates a collator
152  */
processCollator()153 UBool processCollator()
154 {
155 	// Set up an ICU collator
156     UErrorCode status = U_ZERO_ERROR;
157 
158     if (opt_rules != 0) {
159 		u_unescape(opt_rules, rules, 100);
160         collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
161 			                  NULL, &status);
162     }
163     else {
164         collator = ucol_open(opt_locale, &status);
165     }
166 	if (U_FAILURE(status)) {
167         fprintf(stderr, "Collator creation failed.: %d\n", status);
168         return FALSE;
169     }
170     if (status == U_USING_DEFAULT_WARNING) {
171         fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
172 			    opt_locale);
173     }
174     if (status == U_USING_FALLBACK_WARNING) {
175         fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
176 			    opt_locale);
177     }
178     if (opt_norm) {
179         ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
180     }
181     if (opt_french) {
182         ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
183     }
184     if (opt_lower) {
185         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
186 			              &status);
187     }
188     if (opt_upper) {
189         ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
190 			              &status);
191     }
192     if (opt_case) {
193         ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
194     }
195     if (opt_shifted) {
196         ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
197 			              &status);
198     }
199     if (opt_level != 0) {
200         switch (opt_level) {
201         case 1:
202             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
203             break;
204         case 2:
205             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
206 				              &status);
207             break;
208         case 3:
209             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
210             break;
211         case 4:
212             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
213 				              &status);
214             break;
215         case 5:
216             ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
217 				              &status);
218             break;
219         default:
220             fprintf(stderr, "-level param must be between 1 and 5\n");
221             return FALSE;
222         }
223     }
224     if (U_FAILURE(status)) {
225         fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
226         return FALSE;
227     }
228 	return TRUE;
229 }
230 
231 /**
232  * Creates a string search
233  */
processStringSearch()234 UBool processStringSearch()
235 {
236 	u_unescape(opt_source, source, 100);
237 	u_unescape(opt_pattern, pattern, 100);
238 	UErrorCode status = U_ZERO_ERROR;
239 	search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
240 		                              &status);
241 	if (U_FAILURE(status)) {
242 		return FALSE;
243 	}
244 	if (opt_overlap == TRUE) {
245 		usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
246 	}
247 	if (opt_canonical == TRUE) {
248 		usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
249 			                 &status);
250 	}
251 	if (U_FAILURE(status)) {
252 		fprintf(stderr, "Error setting search attributes\n");
253 		return FALSE;
254 	}
255 	return TRUE;
256 }
257 
findPattern()258 UBool findPattern()
259 {
260 	UErrorCode status = U_ZERO_ERROR;
261 	int32_t offset = usearch_next(search, &status);
262 	if (offset == USEARCH_DONE) {
263 		fprintf(stdout, "Pattern not found in source\n");
264 	}
265 	while (offset != USEARCH_DONE) {
266 		fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
267 				usearch_getMatchedLength(search));
268 		offset = usearch_next(search, &status);
269 	}
270 	if (U_FAILURE(status)) {
271 		fprintf(stderr, "Error in searching for pattern %d\n", status);
272 		return FALSE;
273 	}
274 	fprintf(stdout, "End of search\n");
275 	return TRUE;
276 }
277 
278 /**
279  * Main   --  process command line, read in and pre-process the test file,
280  *            call other functions to do the actual tests.
281  */
main(int argc,const char ** argv)282 int main(int argc, const char** argv)
283 {
284     if (processOptions(argc, argv, opts) != TRUE || opt_help) {
285         printf(gHelpString);
286         return -1;
287     }
288 
289     if (processCollator() != TRUE) {
290 		fprintf(stderr, "Error creating collator\n");
291 		return -1;
292 	}
293 
294 	if (processStringSearch() != TRUE) {
295 		fprintf(stderr, "Error creating string search\n");
296 		return -1;
297 	}
298 
299 	fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
300 		    opt_source);
301 
302 	findPattern();
303 	ucol_close(collator);
304 	usearch_close(search);
305 	return 0;
306 }
307