1 /*************************************************************************
2 *
3 * Copyright (C) 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
5 *
6 *************************************************************************
7 *************************************************************************
8 * COPYRIGHT:
9 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved.
10 *
11 *************************************************************************/
12
13 /**
14 * This program demos string collation
15 */
16
17 const char gHelpString[] =
18 "usage: coll [options*] -source source_string -target target_string\n"
19 "-help Display this message.\n"
20 "-locale name ICU locale to use. Default is en_US\n"
21 "-rules rule Collation rules file (overrides locale)\n"
22 "-french French accent ordering\n"
23 "-norm Normalizing mode on\n"
24 "-shifted Shifted mode\n"
25 "-lower Lower case first\n"
26 "-upper Upper case first\n"
27 "-case Enable separate case level\n"
28 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
29 "-source string Source string for comparison\n"
30 "-target string Target string for comparison\n"
31 "Example coll -rules \\u0026b\\u003ca -source a -target b\n"
32 "The format \\uXXXX is supported for the rules and comparison strings\n"
33 ;
34
35 #include <stdio.h>
36 #include <string.h>
37 #include <stdlib.h>
38
39 #include <unicode/utypes.h>
40 #include <unicode/ucol.h>
41 #include <unicode/ustring.h>
42
43 /**
44 * Command line option variables
45 * These global variables are set according to the options specified
46 * on the command line by the user.
47 */
48 char * opt_locale = "en_US";
49 char * opt_rules = 0;
50 UBool opt_help = FALSE;
51 UBool opt_norm = FALSE;
52 UBool opt_french = FALSE;
53 UBool opt_shifted = FALSE;
54 UBool opt_lower = FALSE;
55 UBool opt_upper = FALSE;
56 UBool opt_case = FALSE;
57 int opt_level = 0;
58 char * opt_source = "abc";
59 char * opt_target = "abd";
60 UCollator * collator = 0;
61
62 /**
63 * Definitions for the command line options
64 */
65 struct OptSpec {
66 const char *name;
67 enum {FLAG, NUM, STRING} type;
68 void *pVar;
69 };
70
71 OptSpec opts[] = {
72 {"-locale", OptSpec::STRING, &opt_locale},
73 {"-rules", OptSpec::STRING, &opt_rules},
74 {"-source", OptSpec::STRING, &opt_source},
75 {"-target", OptSpec::STRING, &opt_target},
76 {"-norm", OptSpec::FLAG, &opt_norm},
77 {"-french", OptSpec::FLAG, &opt_french},
78 {"-shifted", OptSpec::FLAG, &opt_shifted},
79 {"-lower", OptSpec::FLAG, &opt_lower},
80 {"-upper", OptSpec::FLAG, &opt_upper},
81 {"-case", OptSpec::FLAG, &opt_case},
82 {"-level", OptSpec::NUM, &opt_level},
83 {"-help", OptSpec::FLAG, &opt_help},
84 {"-?", OptSpec::FLAG, &opt_help},
85 {0, OptSpec::FLAG, 0}
86 };
87
88 /**
89 * processOptions() Function to read the command line options.
90 */
processOptions(int argc,const char ** argv,OptSpec opts[])91 UBool processOptions(int argc, const char **argv, OptSpec opts[])
92 {
93 for (int argNum = 1; argNum < argc; argNum ++) {
94 const char *pArgName = argv[argNum];
95 OptSpec *pOpt;
96 for (pOpt = opts; pOpt->name != 0; pOpt ++) {
97 if (strcmp(pOpt->name, pArgName) == 0) {
98 switch (pOpt->type) {
99 case OptSpec::FLAG:
100 *(UBool *)(pOpt->pVar) = TRUE;
101 break;
102 case OptSpec::STRING:
103 argNum ++;
104 if (argNum >= argc) {
105 fprintf(stderr, "value expected for \"%s\" option.\n",
106 pOpt->name);
107 return FALSE;
108 }
109 *(const char **)(pOpt->pVar) = argv[argNum];
110 break;
111 case OptSpec::NUM:
112 argNum ++;
113 if (argNum >= argc) {
114 fprintf(stderr, "value expected for \"%s\" option.\n",
115 pOpt->name);
116 return FALSE;
117 }
118 char *endp;
119 int i = strtol(argv[argNum], &endp, 0);
120 if (endp == argv[argNum]) {
121 fprintf(stderr,
122 "integer value expected for \"%s\" option.\n",
123 pOpt->name);
124 return FALSE;
125 }
126 *(int *)(pOpt->pVar) = i;
127 }
128 break;
129 }
130 }
131 if (pOpt->name == 0)
132 {
133 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
134 return FALSE;
135 }
136 }
137 return TRUE;
138 }
139
140 /**
141 * ICU string comparison
142 */
strcmp()143 int strcmp()
144 {
145 UChar source[100];
146 UChar target[100];
147 u_unescape(opt_source, source, 100);
148 u_unescape(opt_target, target, 100);
149 UCollationResult result = ucol_strcoll(collator, source, -1, target, -1);
150 if (result == UCOL_LESS) {
151 return -1;
152 }
153 else if (result == UCOL_GREATER) {
154 return 1;
155 }
156 return 0;
157 }
158
159 /**
160 * Creates a collator
161 */
processCollator()162 UBool processCollator()
163 {
164 // Set up an ICU collator
165 UErrorCode status = U_ZERO_ERROR;
166 UChar rules[100];
167
168 if (opt_rules != 0) {
169 u_unescape(opt_rules, rules, 100);
170 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
171 NULL, &status);
172 }
173 else {
174 collator = ucol_open(opt_locale, &status);
175 }
176 if (U_FAILURE(status)) {
177 fprintf(stderr, "Collator creation failed.: %d\n", status);
178 return FALSE;
179 }
180 if (status == U_USING_DEFAULT_WARNING) {
181 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
182 opt_locale);
183 }
184 if (status == U_USING_FALLBACK_WARNING) {
185 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
186 opt_locale);
187 }
188 if (opt_norm) {
189 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
190 }
191 if (opt_french) {
192 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
193 }
194 if (opt_lower) {
195 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
196 &status);
197 }
198 if (opt_upper) {
199 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
200 &status);
201 }
202 if (opt_case) {
203 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
204 }
205 if (opt_shifted) {
206 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
207 &status);
208 }
209 if (opt_level != 0) {
210 switch (opt_level) {
211 case 1:
212 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
213 break;
214 case 2:
215 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
216 &status);
217 break;
218 case 3:
219 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
220 break;
221 case 4:
222 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
223 &status);
224 break;
225 case 5:
226 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
227 &status);
228 break;
229 default:
230 fprintf(stderr, "-level param must be between 1 and 5\n");
231 return FALSE;
232 }
233 }
234 if (U_FAILURE(status)) {
235 fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
236 return FALSE;
237 }
238 return TRUE;
239 }
240
241 /**
242 * Main -- process command line, read in and pre-process the test file,
243 * call other functions to do the actual tests.
244 */
main(int argc,const char ** argv)245 int main(int argc, const char** argv)
246 {
247 if (processOptions(argc, argv, opts) != TRUE || opt_help) {
248 printf(gHelpString);
249 return -1;
250 }
251
252 if (processCollator() != TRUE) {
253 fprintf(stderr, "Error creating collator for comparison\n");
254 return -1;
255 }
256
257 fprintf(stdout, "Comparing source=%s and target=%s\n", opt_source,
258 opt_target);
259 int result = strcmp();
260 if (result == 0) {
261 fprintf(stdout, "source is equals to target\n");
262 }
263 else if (result < 0) {
264 fprintf(stdout, "source is less than target\n");
265 }
266 else {
267 fprintf(stdout, "source is greater than target\n");
268 }
269
270 ucol_close(collator);
271 return 0;
272 }
273