1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File ULOC.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 04/01/97 aliu Creation.
15 * 08/21/98 stephen JDK 1.2 sync
16 * 12/08/98 rtg New Locale implementation and C API
17 * 03/15/99 damiba overhaul.
18 * 04/06/99 stephen changed setDefault() to realloc and copy
19 * 06/14/99 stephen Changed calls to ures_open for new params
20 * 07/21/99 stephen Modified setDefault() to propagate to C++
21 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 * brought canonicalization code into line with spec
23 *****************************************************************************/
24
25 /*
26 POSIX's locale format, from putil.c: [no spaces]
27
28 ll [ _CC ] [ . MM ] [ @ VV]
29
30 l = lang, C = ctry, M = charmap, V = variant
31 */
32
33 #include "unicode/bytestream.h"
34 #include "unicode/errorcode.h"
35 #include "unicode/stringpiece.h"
36 #include "unicode/utypes.h"
37 #include "unicode/ustring.h"
38 #include "unicode/uloc.h"
39
40 #include "bytesinkutil.h"
41 #include "putilimp.h"
42 #include "ustr_imp.h"
43 #include "ulocimp.h"
44 #include "umutex.h"
45 #include "cstring.h"
46 #include "cmemory.h"
47 #include "locmap.h"
48 #include "uarrsort.h"
49 #include "uenumimp.h"
50 #include "uassert.h"
51 #include "charstr.h"
52
53 U_NAMESPACE_USE
54
55 /* ### Declarations **************************************************/
56
57 /* Locale stuff from locid.cpp */
58 U_CFUNC void locale_set_default(const char *id);
59 U_CFUNC const char *locale_get_default(void);
60
61 /* ### Data tables **************************************************/
62
63 /**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95 /* ISO639 table version is 20150505 */
96 /* Subsequent hand addition of selected languages */
97 static const char * const LANGUAGES[] = {
98 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
99 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
100 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
101 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
102 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
103 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
106 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
107 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
108 "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
109 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
111 "cs", "csb", "cu", "cv", "cy",
112 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
113 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114 "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
116 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
117 "ext",
118 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
119 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
120 "frs", "fur", "fy",
121 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
123 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
124 "gur", "guz", "gv", "gwi",
125 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
126 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
127 "hup", "hy", "hz",
128 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
129 "ilo", "inh", "io", "is", "it", "iu", "izh",
130 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131 "jv",
132 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
134 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
135 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
136 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
137 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
138 "kv", "kw", "ky",
139 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
140 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
141 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
142 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
143 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
145 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
146 "ml", "mn", "mnc", "mni", "mo",
147 "moh", "mos", "mr", "mrj",
148 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
149 "my", "mye", "myv", "mzn",
150 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
151 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
152 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
153 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
154 "oc", "oj", "om", "or", "os", "osa", "ota",
155 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
156 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
157 "pon", "prg", "pro", "ps", "pt",
158 "qu", "quc", "qug",
159 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
160 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
161 "rw", "rwk",
162 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
164 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
165 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
166 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
167 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
168 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
169 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
170 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
171 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
172 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
173 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
174 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
175 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
176 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
177 "vot", "vro", "vun",
178 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
179 "xal", "xh", "xmf", "xog",
180 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
181 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
182 "zun", "zxx", "zza",
183 NULL,
184 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
185 NULL
186 };
187
188 static const char* const DEPRECATED_LANGUAGES[]={
189 "in", "iw", "ji", "jw", NULL, NULL
190 };
191 static const char* const REPLACEMENT_LANGUAGES[]={
192 "id", "he", "yi", "jv", NULL, NULL
193 };
194
195 /**
196 * Table of 3-letter language codes.
197 *
198 * This is a lookup table used to convert 3-letter language codes to
199 * their 2-letter equivalent, where possible. It must be kept in sync
200 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
201 * same language as LANGUAGES_3[i]. The commented-out lines are
202 * copied from LANGUAGES to make eyeballing this baby easier.
203 *
204 * Where a 3-letter language code has no 2-letter equivalent, the
205 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206 *
207 * This table should be terminated with a NULL entry, followed by a
208 * second list, and another NULL entry. The two lists correspond to
209 * the two lists in LANGUAGES.
210 */
211 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
212 /* ISO639 table version is 20150505 */
213 /* Subsequent hand addition of selected languages */
214 static const char * const LANGUAGES_3[] = {
215 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
218 "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
219 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
225 "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
226 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228 "ces", "csb", "chu", "chv", "cym",
229 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231 "dyo", "dyu", "dzo", "dzg",
232 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234 "ext",
235 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237 "frs", "fur", "fry",
238 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241 "gur", "guz", "glv", "gwi",
242 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244 "hup", "hye", "her",
245 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248 "jav",
249 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255 "kom", "cor", "kir",
256 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
263 "mal", "mon", "mnc", "mni", "mol",
264 "moh", "mos", "mar", "mrj",
265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266 "mya", "mye", "myv", "mzn",
267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274 "pon", "prg", "pro", "pus", "por",
275 "que", "quc", "qug",
276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278 "kin", "rwk",
279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294 "vot", "vro", "vun",
295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296 "xal", "xho", "xmf", "xog",
297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299 "zun", "zxx", "zza",
300 NULL,
301 /* "in", "iw", "ji", "jw", "sh", */
302 "ind", "heb", "yid", "jaw", "srp",
303 NULL
304 };
305
306 /**
307 * Table of 2-letter country codes.
308 *
309 * This list must be in sorted order. This list is returned directly
310 * to the user by some API.
311 *
312 * This list must be kept in sync with COUNTRIES_3, with corresponding
313 * entries matched.
314 *
315 * This table should be terminated with a NULL entry, followed by a
316 * second list, and another NULL entry. The first list is visible to
317 * user code when this array is returned by API. The second list
318 * contains codes we support, but do not expose through user API.
319 *
320 * Notes:
321 *
322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324 * new codes keeping the old ones for compatibility updated to include
325 * 1999/12/03 revisions *CWB*
326 *
327 * RO(ROM) is now RO(ROU) according to
328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329 */
330 static const char * const COUNTRIES[] = {
331 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
338 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
343 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
360 "WS", "YE", "YT", "ZA", "ZM", "ZW",
361 NULL,
362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
363 NULL
364 };
365
366 static const char* const DEPRECATED_COUNTRIES[] = {
367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368 };
369 static const char* const REPLACEMENT_COUNTRIES[] = {
370 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
372 };
373
374 /**
375 * Table of 3-letter country codes.
376 *
377 * This is a lookup table used to convert 3-letter country codes to
378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
379 * For all valid i, COUNTRIES[i] must refer to the same country as
380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
381 * to make eyeballing this baby easier.
382 *
383 * This table should be terminated with a NULL entry, followed by a
384 * second list, and another NULL entry. The two lists correspond to
385 * the two lists in COUNTRIES.
386 */
387 static const char * const COUNTRIES_3[] = {
388 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
402 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
403 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
404 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
413 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
447 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448 NULL,
449 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451 NULL
452 };
453
454 typedef struct CanonicalizationMap {
455 const char *id; /* input ID */
456 const char *canonicalID; /* canonicalized output ID */
457 } CanonicalizationMap;
458
459 /**
460 * A map to canonicalize locale IDs. This handles a variety of
461 * different semantic kinds of transformations.
462 */
463 static const CanonicalizationMap CANONICALIZE_MAP[] = {
464 { "art__LOJBAN", "jbo" }, /* registered name */
465 { "hy__AREVELA", "hy" }, /* Registered IANA variant */
466 { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */
467 { "zh__GUOYU", "zh" }, /* registered name */
468 { "zh__HAKKA", "hak" }, /* registered name */
469 { "zh__XIANG", "hsn" }, /* registered name */
470 // subtags with 3 chars won't be treated as variants.
471 { "zh_GAN", "gan" }, /* registered name */
472 { "zh_MIN_NAN", "nan" }, /* registered name */
473 { "zh_WUU", "wuu" }, /* registered name */
474 { "zh_YUE", "yue" }, /* registered name */
475 };
476
477 /* ### BCP47 Conversion *******************************************/
478 /* Test if the locale id has BCP47 u extension and does not have '@' */
479 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481 #define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
482 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
483 U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
484 finalID=id; \
485 if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
486 } else { \
487 finalID=buffer; \
488 } \
489 } UPRV_BLOCK_MACRO_END
490 /* Gets the size of the shortest subtag in the given localeID. */
getShortestSubtagLength(const char * localeID)491 static int32_t getShortestSubtagLength(const char *localeID) {
492 int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
493 int32_t length = localeIDLength;
494 int32_t tmpLength = 0;
495 int32_t i;
496 UBool reset = TRUE;
497
498 for (i = 0; i < localeIDLength; i++) {
499 if (localeID[i] != '_' && localeID[i] != '-') {
500 if (reset) {
501 tmpLength = 0;
502 reset = FALSE;
503 }
504 tmpLength++;
505 } else {
506 if (tmpLength != 0 && tmpLength < length) {
507 length = tmpLength;
508 }
509 reset = TRUE;
510 }
511 }
512
513 return length;
514 }
515
516 /* ### Keywords **************************************************/
517 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
518 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
519 /* Punctuation/symbols allowed in legacy key values */
520 #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
521
522 #define ULOC_KEYWORD_BUFFER_LEN 25
523 #define ULOC_MAX_NO_KEYWORDS 25
524
525 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char * localeID)526 locale_getKeywordsStart(const char *localeID) {
527 const char *result = NULL;
528 if((result = uprv_strchr(localeID, '@')) != NULL) {
529 return result;
530 }
531 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
532 else {
533 /* We do this because the @ sign is variant, and the @ sign used on one
534 EBCDIC machine won't be compiled the same way on other EBCDIC based
535 machines. */
536 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
537 const uint8_t *charToFind = ebcdicSigns;
538 while(*charToFind) {
539 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
540 return result;
541 }
542 charToFind++;
543 }
544 }
545 #endif
546 return NULL;
547 }
548
549 /**
550 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
551 * @param keywordName incoming name to be canonicalized
552 * @param status return status (keyword too long)
553 * @return length of the keyword name
554 */
locale_canonKeywordName(char * buf,const char * keywordName,UErrorCode * status)555 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
556 {
557 int32_t keywordNameLen = 0;
558
559 for (; *keywordName != 0; keywordName++) {
560 if (!UPRV_ISALPHANUM(*keywordName)) {
561 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
562 return 0;
563 }
564 if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
565 buf[keywordNameLen++] = uprv_tolower(*keywordName);
566 } else {
567 /* keyword name too long for internal buffer */
568 *status = U_INTERNAL_PROGRAM_ERROR;
569 return 0;
570 }
571 }
572 if (keywordNameLen == 0) {
573 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
574 return 0;
575 }
576 buf[keywordNameLen] = 0; /* terminate */
577
578 return keywordNameLen;
579 }
580
581 typedef struct {
582 char keyword[ULOC_KEYWORD_BUFFER_LEN];
583 int32_t keywordLen;
584 const char *valueStart;
585 int32_t valueLen;
586 } KeywordStruct;
587
588 static int32_t U_CALLCONV
compareKeywordStructs(const void *,const void * left,const void * right)589 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
590 const char* leftString = ((const KeywordStruct *)left)->keyword;
591 const char* rightString = ((const KeywordStruct *)right)->keyword;
592 return uprv_strcmp(leftString, rightString);
593 }
594
595 U_CFUNC void
ulocimp_getKeywords(const char * localeID,char prev,ByteSink & sink,UBool valuesToo,UErrorCode * status)596 ulocimp_getKeywords(const char *localeID,
597 char prev,
598 ByteSink& sink,
599 UBool valuesToo,
600 UErrorCode *status)
601 {
602 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
603
604 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
605 int32_t numKeywords = 0;
606 const char* pos = localeID;
607 const char* equalSign = NULL;
608 const char* semicolon = NULL;
609 int32_t i = 0, j, n;
610
611 if(prev == '@') { /* start of keyword definition */
612 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
613 do {
614 UBool duplicate = FALSE;
615 /* skip leading spaces */
616 while(*pos == ' ') {
617 pos++;
618 }
619 if (!*pos) { /* handle trailing "; " */
620 break;
621 }
622 if(numKeywords == maxKeywords) {
623 *status = U_INTERNAL_PROGRAM_ERROR;
624 return;
625 }
626 equalSign = uprv_strchr(pos, '=');
627 semicolon = uprv_strchr(pos, ';');
628 /* lack of '=' [foo@currency] is illegal */
629 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
630 if(!equalSign || (semicolon && semicolon<equalSign)) {
631 *status = U_INVALID_FORMAT_ERROR;
632 return;
633 }
634 /* need to normalize both keyword and keyword name */
635 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
636 /* keyword name too long for internal buffer */
637 *status = U_INTERNAL_PROGRAM_ERROR;
638 return;
639 }
640 for(i = 0, n = 0; i < equalSign - pos; ++i) {
641 if (pos[i] != ' ') {
642 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
643 }
644 }
645
646 /* zero-length keyword is an error. */
647 if (n == 0) {
648 *status = U_INVALID_FORMAT_ERROR;
649 return;
650 }
651
652 keywordList[numKeywords].keyword[n] = 0;
653 keywordList[numKeywords].keywordLen = n;
654 /* now grab the value part. First we skip the '=' */
655 equalSign++;
656 /* then we leading spaces */
657 while(*equalSign == ' ') {
658 equalSign++;
659 }
660
661 /* Premature end or zero-length value */
662 if (!*equalSign || equalSign == semicolon) {
663 *status = U_INVALID_FORMAT_ERROR;
664 return;
665 }
666
667 keywordList[numKeywords].valueStart = equalSign;
668
669 pos = semicolon;
670 i = 0;
671 if(pos) {
672 while(*(pos - i - 1) == ' ') {
673 i++;
674 }
675 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
676 pos++;
677 } else {
678 i = (int32_t)uprv_strlen(equalSign);
679 while(i && equalSign[i-1] == ' ') {
680 i--;
681 }
682 keywordList[numKeywords].valueLen = i;
683 }
684 /* If this is a duplicate keyword, then ignore it */
685 for (j=0; j<numKeywords; ++j) {
686 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
687 duplicate = TRUE;
688 break;
689 }
690 }
691 if (!duplicate) {
692 ++numKeywords;
693 }
694 } while(pos);
695
696 /* now we have a list of keywords */
697 /* we need to sort it */
698 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
699
700 /* Now construct the keyword part */
701 for(i = 0; i < numKeywords; i++) {
702 sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
703 if(valuesToo) {
704 sink.Append("=", 1);
705 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
706 if(i < numKeywords - 1) {
707 sink.Append(";", 1);
708 }
709 } else {
710 sink.Append("\0", 1);
711 }
712 }
713 }
714 }
715
716 U_CAPI int32_t U_EXPORT2
uloc_getKeywordValue(const char * localeID,const char * keywordName,char * buffer,int32_t bufferCapacity,UErrorCode * status)717 uloc_getKeywordValue(const char* localeID,
718 const char* keywordName,
719 char* buffer, int32_t bufferCapacity,
720 UErrorCode* status)
721 {
722 if (U_FAILURE(*status)) {
723 return 0;
724 }
725
726 CheckedArrayByteSink sink(buffer, bufferCapacity);
727 ulocimp_getKeywordValue(localeID, keywordName, sink, status);
728
729 int32_t reslen = sink.NumberOfBytesAppended();
730
731 if (U_FAILURE(*status)) {
732 return reslen;
733 }
734
735 if (sink.Overflowed()) {
736 *status = U_BUFFER_OVERFLOW_ERROR;
737 } else {
738 u_terminateChars(buffer, bufferCapacity, reslen, status);
739 }
740
741 return reslen;
742 }
743
744 U_CAPI void U_EXPORT2
ulocimp_getKeywordValue(const char * localeID,const char * keywordName,icu::ByteSink & sink,UErrorCode * status)745 ulocimp_getKeywordValue(const char* localeID,
746 const char* keywordName,
747 icu::ByteSink& sink,
748 UErrorCode* status)
749 {
750 const char* startSearchHere = NULL;
751 const char* nextSeparator = NULL;
752 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
753 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
754
755 if(status && U_SUCCESS(*status) && localeID) {
756 char tempBuffer[ULOC_FULLNAME_CAPACITY];
757 const char* tmpLocaleID;
758
759 if (keywordName == NULL || keywordName[0] == 0) {
760 *status = U_ILLEGAL_ARGUMENT_ERROR;
761 return;
762 }
763
764 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
765 if(U_FAILURE(*status)) {
766 return;
767 }
768
769 if (_hasBCP47Extension(localeID)) {
770 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
771 } else {
772 tmpLocaleID=localeID;
773 }
774
775 startSearchHere = locale_getKeywordsStart(tmpLocaleID);
776 if(startSearchHere == NULL) {
777 /* no keywords, return at once */
778 return;
779 }
780
781 /* find the first keyword */
782 while(startSearchHere) {
783 const char* keyValueTail;
784 int32_t keyValueLen;
785
786 startSearchHere++; /* skip @ or ; */
787 nextSeparator = uprv_strchr(startSearchHere, '=');
788 if(!nextSeparator) {
789 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
790 return;
791 }
792 /* strip leading & trailing spaces (TC decided to tolerate these) */
793 while(*startSearchHere == ' ') {
794 startSearchHere++;
795 }
796 keyValueTail = nextSeparator;
797 while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
798 keyValueTail--;
799 }
800 /* now keyValueTail points to first char after the keyName */
801 /* copy & normalize keyName from locale */
802 if (startSearchHere == keyValueTail) {
803 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
804 return;
805 }
806 keyValueLen = 0;
807 while (startSearchHere < keyValueTail) {
808 if (!UPRV_ISALPHANUM(*startSearchHere)) {
809 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
810 return;
811 }
812 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
813 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
814 } else {
815 /* keyword name too long for internal buffer */
816 *status = U_INTERNAL_PROGRAM_ERROR;
817 return;
818 }
819 }
820 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
821
822 startSearchHere = uprv_strchr(nextSeparator, ';');
823
824 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
825 /* current entry matches the keyword. */
826 nextSeparator++; /* skip '=' */
827 /* First strip leading & trailing spaces (TC decided to tolerate these) */
828 while(*nextSeparator == ' ') {
829 nextSeparator++;
830 }
831 keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
832 while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
833 keyValueTail--;
834 }
835 /* Now copy the value, but check well-formedness */
836 if (nextSeparator == keyValueTail) {
837 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
838 return;
839 }
840 while (nextSeparator < keyValueTail) {
841 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
842 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
843 return;
844 }
845 /* Should we lowercase value to return here? Tests expect as-is. */
846 sink.Append(nextSeparator++, 1);
847 }
848 return;
849 }
850 }
851 }
852 }
853
854 U_CAPI int32_t U_EXPORT2
uloc_setKeywordValue(const char * keywordName,const char * keywordValue,char * buffer,int32_t bufferCapacity,UErrorCode * status)855 uloc_setKeywordValue(const char* keywordName,
856 const char* keywordValue,
857 char* buffer, int32_t bufferCapacity,
858 UErrorCode* status)
859 {
860 /* TODO: sorting. removal. */
861 int32_t keywordNameLen;
862 int32_t keywordValueLen;
863 int32_t bufLen;
864 int32_t needLen = 0;
865 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
866 char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
867 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
868 int32_t rc;
869 char* nextSeparator = NULL;
870 char* nextEqualsign = NULL;
871 char* startSearchHere = NULL;
872 char* keywordStart = NULL;
873 CharString updatedKeysAndValues;
874 UBool handledInputKeyAndValue = FALSE;
875 char keyValuePrefix = '@';
876
877 if(U_FAILURE(*status)) {
878 return -1;
879 }
880 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
881 *status = U_ZERO_ERROR;
882 }
883 if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
884 *status = U_ILLEGAL_ARGUMENT_ERROR;
885 return 0;
886 }
887 bufLen = (int32_t)uprv_strlen(buffer);
888 if(bufferCapacity<bufLen) {
889 /* The capacity is less than the length?! Is this NULL terminated? */
890 *status = U_ILLEGAL_ARGUMENT_ERROR;
891 return 0;
892 }
893 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
894 if(U_FAILURE(*status)) {
895 return 0;
896 }
897
898 keywordValueLen = 0;
899 if(keywordValue) {
900 while (*keywordValue != 0) {
901 if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
902 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
903 return 0;
904 }
905 if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
906 /* Should we force lowercase in value to set? */
907 keywordValueBuffer[keywordValueLen++] = *keywordValue++;
908 } else {
909 /* keywordValue too long for internal buffer */
910 *status = U_INTERNAL_PROGRAM_ERROR;
911 return 0;
912 }
913 }
914 }
915 keywordValueBuffer[keywordValueLen] = 0; /* terminate */
916
917 startSearchHere = (char*)locale_getKeywordsStart(buffer);
918 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
919 if(keywordValueLen == 0) { /* no keywords = nothing to remove */
920 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
921 return bufLen;
922 }
923
924 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
925 if(startSearchHere) { /* had a single @ */
926 needLen--; /* already had the @ */
927 /* startSearchHere points at the @ */
928 } else {
929 startSearchHere=buffer+bufLen;
930 }
931 if(needLen >= bufferCapacity) {
932 *status = U_BUFFER_OVERFLOW_ERROR;
933 return needLen; /* no change */
934 }
935 *startSearchHere++ = '@';
936 uprv_strcpy(startSearchHere, keywordNameBuffer);
937 startSearchHere += keywordNameLen;
938 *startSearchHere++ = '=';
939 uprv_strcpy(startSearchHere, keywordValueBuffer);
940 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
941 return needLen;
942 } /* end shortcut - no @ */
943
944 keywordStart = startSearchHere;
945 /* search for keyword */
946 while(keywordStart) {
947 const char* keyValueTail;
948 int32_t keyValueLen;
949
950 keywordStart++; /* skip @ or ; */
951 nextEqualsign = uprv_strchr(keywordStart, '=');
952 if (!nextEqualsign) {
953 *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
954 return 0;
955 }
956 /* strip leading & trailing spaces (TC decided to tolerate these) */
957 while(*keywordStart == ' ') {
958 keywordStart++;
959 }
960 keyValueTail = nextEqualsign;
961 while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
962 keyValueTail--;
963 }
964 /* now keyValueTail points to first char after the keyName */
965 /* copy & normalize keyName from locale */
966 if (keywordStart == keyValueTail) {
967 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
968 return 0;
969 }
970 keyValueLen = 0;
971 while (keywordStart < keyValueTail) {
972 if (!UPRV_ISALPHANUM(*keywordStart)) {
973 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
974 return 0;
975 }
976 if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
977 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
978 } else {
979 /* keyword name too long for internal buffer */
980 *status = U_INTERNAL_PROGRAM_ERROR;
981 return 0;
982 }
983 }
984 localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
985
986 nextSeparator = uprv_strchr(nextEqualsign, ';');
987
988 /* start processing the value part */
989 nextEqualsign++; /* skip '=' */
990 /* First strip leading & trailing spaces (TC decided to tolerate these) */
991 while(*nextEqualsign == ' ') {
992 nextEqualsign++;
993 }
994 keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
995 while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
996 keyValueTail--;
997 }
998 if (nextEqualsign == keyValueTail) {
999 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1000 return 0;
1001 }
1002
1003 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1004 if(rc == 0) {
1005 /* Current entry matches the input keyword. Update the entry */
1006 if(keywordValueLen > 0) { /* updating a value */
1007 updatedKeysAndValues.append(keyValuePrefix, *status);
1008 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1009 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1010 updatedKeysAndValues.append('=', *status);
1011 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1012 } /* else removing this entry, don't emit anything */
1013 handledInputKeyAndValue = TRUE;
1014 } else {
1015 /* input keyword sorts earlier than current entry, add before current entry */
1016 if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1017 /* insert new entry at this location */
1018 updatedKeysAndValues.append(keyValuePrefix, *status);
1019 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1020 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1021 updatedKeysAndValues.append('=', *status);
1022 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1023 handledInputKeyAndValue = TRUE;
1024 }
1025 /* copy the current entry */
1026 updatedKeysAndValues.append(keyValuePrefix, *status);
1027 keyValuePrefix = ';'; /* for any subsequent key-value pair */
1028 updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1029 updatedKeysAndValues.append('=', *status);
1030 updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1031 }
1032 if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1033 /* append new entry at the end, it sorts later than existing entries */
1034 updatedKeysAndValues.append(keyValuePrefix, *status);
1035 /* skip keyValuePrefix update, no subsequent key-value pair */
1036 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1037 updatedKeysAndValues.append('=', *status);
1038 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1039 handledInputKeyAndValue = TRUE;
1040 }
1041 keywordStart = nextSeparator;
1042 } /* end loop searching */
1043
1044 /* Any error from updatedKeysAndValues.append above would be internal and not due to
1045 * problems with the passed-in locale. So if we did encounter problems with the
1046 * passed-in locale above, those errors took precedence and overrode any error
1047 * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1048 * are errors here they are from updatedKeysAndValues.append; they do cause an
1049 * error return but the passed-in locale is unmodified and the original bufLen is
1050 * returned.
1051 */
1052 if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1053 /* if input key/value specified removal of a keyword not present in locale, or
1054 * there was an error in CharString.append, leave original locale alone. */
1055 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1056 return bufLen;
1057 }
1058
1059 // needLen = length of the part before '@'
1060 needLen = (int32_t)(startSearchHere - buffer);
1061 // Check to see can we fit the startSearchHere, if not, return
1062 // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1063 // We do this because this API function does not behave like most others:
1064 // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1065 // When the contents fits but without the terminating NUL, in this case we need to not change
1066 // the buffer contents and return with a buffer overflow error.
1067 int32_t appendLength = updatedKeysAndValues.length();
1068 if (appendLength >= bufferCapacity - needLen) {
1069 *status = U_BUFFER_OVERFLOW_ERROR;
1070 return needLen + appendLength;
1071 }
1072 needLen += updatedKeysAndValues.extract(
1073 startSearchHere, bufferCapacity - needLen, *status);
1074 U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1075 return needLen;
1076 }
1077
1078 /* ### ID parsing implementation **************************************************/
1079
1080 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1081
1082 /*returns TRUE if one of the special prefixes is here (s=string)
1083 'x-' or 'i-' */
1084 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1085
1086 /* Dot terminates it because of POSIX form where dot precedes the codepage
1087 * except for variant
1088 */
1089 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1090
1091 /**
1092 * Lookup 'key' in the array 'list'. The array 'list' should contain
1093 * a NULL entry, followed by more entries, and a second NULL entry.
1094 *
1095 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1096 * COUNTRIES_3.
1097 */
_findIndex(const char * const * list,const char * key)1098 static int16_t _findIndex(const char* const* list, const char* key)
1099 {
1100 const char* const* anchor = list;
1101 int32_t pass = 0;
1102
1103 /* Make two passes through two NULL-terminated arrays at 'list' */
1104 while (pass++ < 2) {
1105 while (*list) {
1106 if (uprv_strcmp(key, *list) == 0) {
1107 return (int16_t)(list - anchor);
1108 }
1109 list++;
1110 }
1111 ++list; /* skip final NULL *CWB*/
1112 }
1113 return -1;
1114 }
1115
1116 U_CFUNC const char*
uloc_getCurrentCountryID(const char * oldID)1117 uloc_getCurrentCountryID(const char* oldID){
1118 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1119 if (offset >= 0) {
1120 return REPLACEMENT_COUNTRIES[offset];
1121 }
1122 return oldID;
1123 }
1124 U_CFUNC const char*
uloc_getCurrentLanguageID(const char * oldID)1125 uloc_getCurrentLanguageID(const char* oldID){
1126 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1127 if (offset >= 0) {
1128 return REPLACEMENT_LANGUAGES[offset];
1129 }
1130 return oldID;
1131 }
1132 /*
1133 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1134 * avoid duplicating code to handle the earlier locale ID pieces
1135 * in the functions for the later ones by
1136 * setting the *pEnd pointer to where they stopped parsing
1137 *
1138 * TODO try to use this in Locale
1139 */
1140 CharString U_EXPORT2
ulocimp_getLanguage(const char * localeID,const char ** pEnd,UErrorCode & status)1141 ulocimp_getLanguage(const char *localeID,
1142 const char **pEnd,
1143 UErrorCode &status) {
1144 CharString result;
1145
1146 if (uprv_stricmp(localeID, "root") == 0) {
1147 localeID += 4;
1148 } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1149 (localeID[3] == '\0' ||
1150 localeID[3] == '-' ||
1151 localeID[3] == '_' ||
1152 localeID[3] == '@')) {
1153 localeID += 3;
1154 }
1155
1156 /* if it starts with i- or x- then copy that prefix */
1157 if(_isIDPrefix(localeID)) {
1158 result.append((char)uprv_tolower(*localeID), status);
1159 result.append('-', status);
1160 localeID+=2;
1161 }
1162
1163 /* copy the language as far as possible and count its length */
1164 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1165 result.append((char)uprv_tolower(*localeID), status);
1166 localeID++;
1167 }
1168
1169 if(result.length()==3) {
1170 /* convert 3 character code to 2 character code if possible *CWB*/
1171 int32_t offset = _findIndex(LANGUAGES_3, result.data());
1172 if(offset>=0) {
1173 result.clear();
1174 result.append(LANGUAGES[offset], status);
1175 }
1176 }
1177
1178 if(pEnd!=NULL) {
1179 *pEnd=localeID;
1180 }
1181
1182 return result;
1183 }
1184
1185 CharString U_EXPORT2
ulocimp_getScript(const char * localeID,const char ** pEnd,UErrorCode & status)1186 ulocimp_getScript(const char *localeID,
1187 const char **pEnd,
1188 UErrorCode &status) {
1189 CharString result;
1190 int32_t idLen = 0;
1191
1192 if (pEnd != NULL) {
1193 *pEnd = localeID;
1194 }
1195
1196 /* copy the second item as far as possible and count its length */
1197 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1198 && uprv_isASCIILetter(localeID[idLen])) {
1199 idLen++;
1200 }
1201
1202 /* If it's exactly 4 characters long, then it's a script and not a country. */
1203 if (idLen == 4) {
1204 int32_t i;
1205 if (pEnd != NULL) {
1206 *pEnd = localeID+idLen;
1207 }
1208 if (idLen >= 1) {
1209 result.append((char)uprv_toupper(*(localeID++)), status);
1210 }
1211 for (i = 1; i < idLen; i++) {
1212 result.append((char)uprv_tolower(*(localeID++)), status);
1213 }
1214 }
1215
1216 return result;
1217 }
1218
1219 CharString U_EXPORT2
ulocimp_getCountry(const char * localeID,const char ** pEnd,UErrorCode & status)1220 ulocimp_getCountry(const char *localeID,
1221 const char **pEnd,
1222 UErrorCode &status) {
1223 CharString result;
1224 int32_t idLen=0;
1225
1226 /* copy the country as far as possible and count its length */
1227 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1228 result.append((char)uprv_toupper(localeID[idLen]), status);
1229 idLen++;
1230 }
1231
1232 /* the country should be either length 2 or 3 */
1233 if (idLen == 2 || idLen == 3) {
1234 /* convert 3 character code to 2 character code if possible *CWB*/
1235 if(idLen==3) {
1236 int32_t offset = _findIndex(COUNTRIES_3, result.data());
1237 if(offset>=0) {
1238 result.clear();
1239 result.append(COUNTRIES[offset], status);
1240 }
1241 }
1242 localeID+=idLen;
1243 } else {
1244 result.clear();
1245 }
1246
1247 if(pEnd!=NULL) {
1248 *pEnd=localeID;
1249 }
1250
1251 return result;
1252 }
1253
1254 /**
1255 * @param needSeparator if true, then add leading '_' if any variants
1256 * are added to 'variant'
1257 */
1258 static void
_getVariant(const char * localeID,char prev,ByteSink & sink,UBool needSeparator)1259 _getVariant(const char *localeID,
1260 char prev,
1261 ByteSink& sink,
1262 UBool needSeparator) {
1263 UBool hasVariant = FALSE;
1264
1265 /* get one or more variant tags and separate them with '_' */
1266 if(_isIDSeparator(prev)) {
1267 /* get a variant string after a '-' or '_' */
1268 while(!_isTerminator(*localeID)) {
1269 if (needSeparator) {
1270 sink.Append("_", 1);
1271 needSeparator = FALSE;
1272 }
1273 char c = (char)uprv_toupper(*localeID);
1274 if (c == '-') c = '_';
1275 sink.Append(&c, 1);
1276 hasVariant = TRUE;
1277 localeID++;
1278 }
1279 }
1280
1281 /* if there is no variant tag after a '-' or '_' then look for '@' */
1282 if(!hasVariant) {
1283 if(prev=='@') {
1284 /* keep localeID */
1285 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1286 ++localeID; /* point after the '@' */
1287 } else {
1288 return;
1289 }
1290 while(!_isTerminator(*localeID)) {
1291 if (needSeparator) {
1292 sink.Append("_", 1);
1293 needSeparator = FALSE;
1294 }
1295 char c = (char)uprv_toupper(*localeID);
1296 if (c == '-' || c == ',') c = '_';
1297 sink.Append(&c, 1);
1298 localeID++;
1299 }
1300 }
1301 }
1302
1303 /* Keyword enumeration */
1304
1305 typedef struct UKeywordsContext {
1306 char* keywords;
1307 char* current;
1308 } UKeywordsContext;
1309
1310 U_CDECL_BEGIN
1311
1312 static void U_CALLCONV
uloc_kw_closeKeywords(UEnumeration * enumerator)1313 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1314 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1315 uprv_free(enumerator->context);
1316 uprv_free(enumerator);
1317 }
1318
1319 static int32_t U_CALLCONV
uloc_kw_countKeywords(UEnumeration * en,UErrorCode *)1320 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1321 char *kw = ((UKeywordsContext *)en->context)->keywords;
1322 int32_t result = 0;
1323 while(*kw) {
1324 result++;
1325 kw += uprv_strlen(kw)+1;
1326 }
1327 return result;
1328 }
1329
1330 static const char * U_CALLCONV
uloc_kw_nextKeyword(UEnumeration * en,int32_t * resultLength,UErrorCode *)1331 uloc_kw_nextKeyword(UEnumeration* en,
1332 int32_t* resultLength,
1333 UErrorCode* /*status*/) {
1334 const char* result = ((UKeywordsContext *)en->context)->current;
1335 int32_t len = 0;
1336 if(*result) {
1337 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1338 ((UKeywordsContext *)en->context)->current += len+1;
1339 } else {
1340 result = NULL;
1341 }
1342 if (resultLength) {
1343 *resultLength = len;
1344 }
1345 return result;
1346 }
1347
1348 static void U_CALLCONV
uloc_kw_resetKeywords(UEnumeration * en,UErrorCode *)1349 uloc_kw_resetKeywords(UEnumeration* en,
1350 UErrorCode* /*status*/) {
1351 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1352 }
1353
1354 U_CDECL_END
1355
1356
1357 static const UEnumeration gKeywordsEnum = {
1358 NULL,
1359 NULL,
1360 uloc_kw_closeKeywords,
1361 uloc_kw_countKeywords,
1362 uenum_unextDefault,
1363 uloc_kw_nextKeyword,
1364 uloc_kw_resetKeywords
1365 };
1366
1367 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char * keywordList,int32_t keywordListSize,UErrorCode * status)1368 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1369 {
1370 LocalMemory<UKeywordsContext> myContext;
1371 LocalMemory<UEnumeration> result;
1372
1373 if (U_FAILURE(*status)) {
1374 return nullptr;
1375 }
1376 myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1377 result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1378 if (myContext.isNull() || result.isNull()) {
1379 *status = U_MEMORY_ALLOCATION_ERROR;
1380 return nullptr;
1381 }
1382 uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1383 myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1384 if (myContext->keywords == nullptr) {
1385 *status = U_MEMORY_ALLOCATION_ERROR;
1386 return nullptr;
1387 }
1388 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1389 myContext->keywords[keywordListSize] = 0;
1390 myContext->current = myContext->keywords;
1391 result->context = myContext.orphan();
1392 return result.orphan();
1393 }
1394
1395 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char * localeID,UErrorCode * status)1396 uloc_openKeywords(const char* localeID,
1397 UErrorCode* status)
1398 {
1399 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1400 const char* tmpLocaleID;
1401
1402 if(status==NULL || U_FAILURE(*status)) {
1403 return 0;
1404 }
1405
1406 if (_hasBCP47Extension(localeID)) {
1407 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1408 } else {
1409 if (localeID==NULL) {
1410 localeID=uloc_getDefault();
1411 }
1412 tmpLocaleID=localeID;
1413 }
1414
1415 /* Skip the language */
1416 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1417 if (U_FAILURE(*status)) {
1418 return 0;
1419 }
1420
1421 if(_isIDSeparator(*tmpLocaleID)) {
1422 const char *scriptID;
1423 /* Skip the script if available */
1424 ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1425 if (U_FAILURE(*status)) {
1426 return 0;
1427 }
1428 if(scriptID != tmpLocaleID+1) {
1429 /* Found optional script */
1430 tmpLocaleID = scriptID;
1431 }
1432 /* Skip the Country */
1433 if (_isIDSeparator(*tmpLocaleID)) {
1434 ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1435 if (U_FAILURE(*status)) {
1436 return 0;
1437 }
1438 }
1439 }
1440
1441 /* keywords are located after '@' */
1442 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1443 CharString keywords;
1444 CharStringByteSink sink(&keywords);
1445 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1446 if (U_FAILURE(*status)) {
1447 return NULL;
1448 }
1449 return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1450 }
1451 return NULL;
1452 }
1453
1454
1455 /* bit-flags for 'options' parameter of _canonicalize */
1456 #define _ULOC_STRIP_KEYWORDS 0x2
1457 #define _ULOC_CANONICALIZE 0x1
1458
1459 #define OPTION_SET(options, mask) ((options & mask) != 0)
1460
1461 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1462 #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1463
1464 /**
1465 * Canonicalize the given localeID, to level 1 or to level 2,
1466 * depending on the options. To specify level 1, pass in options=0.
1467 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1468 *
1469 * This is the code underlying uloc_getName and uloc_canonicalize.
1470 */
1471 static void
_canonicalize(const char * localeID,ByteSink & sink,uint32_t options,UErrorCode * err)1472 _canonicalize(const char* localeID,
1473 ByteSink& sink,
1474 uint32_t options,
1475 UErrorCode* err) {
1476 int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1477 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1478 const char* origLocaleID;
1479 const char* tmpLocaleID;
1480 const char* keywordAssign = NULL;
1481 const char* separatorIndicator = NULL;
1482
1483 if (U_FAILURE(*err)) {
1484 return;
1485 }
1486
1487 if (_hasBCP47Extension(localeID)) {
1488 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1489 } else {
1490 if (localeID==NULL) {
1491 localeID=uloc_getDefault();
1492 }
1493 tmpLocaleID=localeID;
1494 }
1495
1496 origLocaleID=tmpLocaleID;
1497
1498 /* get all pieces, one after another, and separate with '_' */
1499 CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1500
1501 if (tag.length() == I_DEFAULT_LENGTH &&
1502 uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1503 tag.clear();
1504 tag.append(uloc_getDefault(), *err);
1505 } else if(_isIDSeparator(*tmpLocaleID)) {
1506 const char *scriptID;
1507
1508 ++fieldCount;
1509 tag.append('_', *err);
1510
1511 CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1512 tag.append(script, *err);
1513 scriptSize = script.length();
1514 if(scriptSize > 0) {
1515 /* Found optional script */
1516 tmpLocaleID = scriptID;
1517 ++fieldCount;
1518 if (_isIDSeparator(*tmpLocaleID)) {
1519 /* If there is something else, then we add the _ */
1520 tag.append('_', *err);
1521 }
1522 }
1523
1524 if (_isIDSeparator(*tmpLocaleID)) {
1525 const char *cntryID;
1526
1527 CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1528 tag.append(country, *err);
1529 if (!country.isEmpty()) {
1530 /* Found optional country */
1531 tmpLocaleID = cntryID;
1532 }
1533 if(_isIDSeparator(*tmpLocaleID)) {
1534 /* If there is something else, then we add the _ if we found country before. */
1535 if (!_isIDSeparator(*(tmpLocaleID+1))) {
1536 ++fieldCount;
1537 tag.append('_', *err);
1538 }
1539
1540 variantSize = -tag.length();
1541 {
1542 CharStringByteSink s(&tag);
1543 _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
1544 }
1545 variantSize += tag.length();
1546 if (variantSize > 0) {
1547 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1548 }
1549 }
1550 }
1551 }
1552
1553 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1554 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1555 UBool done = FALSE;
1556 do {
1557 char c = *tmpLocaleID;
1558 switch (c) {
1559 case 0:
1560 case '@':
1561 done = TRUE;
1562 break;
1563 default:
1564 tag.append(c, *err);
1565 ++tmpLocaleID;
1566 break;
1567 }
1568 } while (!done);
1569 }
1570
1571 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1572 After this, tmpLocaleID either points to '@' or is NULL */
1573 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1574 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1575 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1576 }
1577
1578 /* Copy POSIX-style variant, if any [mr@FOO] */
1579 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1580 tmpLocaleID != NULL && keywordAssign == NULL) {
1581 for (;;) {
1582 char c = *tmpLocaleID;
1583 if (c == 0) {
1584 break;
1585 }
1586 tag.append(c, *err);
1587 ++tmpLocaleID;
1588 }
1589 }
1590
1591 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1592 /* Handle @FOO variant if @ is present and not followed by = */
1593 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1594 /* Add missing '_' if needed */
1595 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1596 do {
1597 tag.append('_', *err);
1598 ++fieldCount;
1599 } while(fieldCount<2);
1600 }
1601
1602 int32_t posixVariantSize = -tag.length();
1603 {
1604 CharStringByteSink s(&tag);
1605 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
1606 }
1607 posixVariantSize += tag.length();
1608 if (posixVariantSize > 0) {
1609 variantSize += posixVariantSize;
1610 }
1611 }
1612
1613 /* Look up the ID in the canonicalization map */
1614 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1615 StringPiece id(CANONICALIZE_MAP[j].id);
1616 if (tag == id) {
1617 if (id.empty() && tmpLocaleID != NULL) {
1618 break; /* Don't remap "" if keywords present */
1619 }
1620 tag.clear();
1621 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
1622 break;
1623 }
1624 }
1625 }
1626
1627 sink.Append(tag.data(), tag.length());
1628
1629 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1630 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1631 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1632 sink.Append("@", 1);
1633 ++fieldCount;
1634 ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
1635 }
1636 }
1637 }
1638
1639 /* ### ID parsing API **************************************************/
1640
1641 U_CAPI int32_t U_EXPORT2
uloc_getParent(const char * localeID,char * parent,int32_t parentCapacity,UErrorCode * err)1642 uloc_getParent(const char* localeID,
1643 char* parent,
1644 int32_t parentCapacity,
1645 UErrorCode* err)
1646 {
1647 const char *lastUnderscore;
1648 int32_t i;
1649
1650 if (U_FAILURE(*err))
1651 return 0;
1652
1653 if (localeID == NULL)
1654 localeID = uloc_getDefault();
1655
1656 lastUnderscore=uprv_strrchr(localeID, '_');
1657 if(lastUnderscore!=NULL) {
1658 i=(int32_t)(lastUnderscore-localeID);
1659 } else {
1660 i=0;
1661 }
1662
1663 if (i > 0) {
1664 if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1665 localeID += 3;
1666 i -= 3;
1667 uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1668 } else if (parent != localeID) {
1669 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1670 }
1671 }
1672
1673 return u_terminateChars(parent, parentCapacity, i, err);
1674 }
1675
1676 U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char * localeID,char * language,int32_t languageCapacity,UErrorCode * err)1677 uloc_getLanguage(const char* localeID,
1678 char* language,
1679 int32_t languageCapacity,
1680 UErrorCode* err)
1681 {
1682 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1683
1684 if (err==NULL || U_FAILURE(*err)) {
1685 return 0;
1686 }
1687
1688 if(localeID==NULL) {
1689 localeID=uloc_getDefault();
1690 }
1691
1692 return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
1693 }
1694
1695 U_CAPI int32_t U_EXPORT2
uloc_getScript(const char * localeID,char * script,int32_t scriptCapacity,UErrorCode * err)1696 uloc_getScript(const char* localeID,
1697 char* script,
1698 int32_t scriptCapacity,
1699 UErrorCode* err)
1700 {
1701 if(err==NULL || U_FAILURE(*err)) {
1702 return 0;
1703 }
1704
1705 if(localeID==NULL) {
1706 localeID=uloc_getDefault();
1707 }
1708
1709 /* skip the language */
1710 ulocimp_getLanguage(localeID, &localeID, *err);
1711 if (U_FAILURE(*err)) {
1712 return 0;
1713 }
1714
1715 if(_isIDSeparator(*localeID)) {
1716 return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1717 }
1718 return u_terminateChars(script, scriptCapacity, 0, err);
1719 }
1720
1721 U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char * localeID,char * country,int32_t countryCapacity,UErrorCode * err)1722 uloc_getCountry(const char* localeID,
1723 char* country,
1724 int32_t countryCapacity,
1725 UErrorCode* err)
1726 {
1727 if(err==NULL || U_FAILURE(*err)) {
1728 return 0;
1729 }
1730
1731 if(localeID==NULL) {
1732 localeID=uloc_getDefault();
1733 }
1734
1735 /* Skip the language */
1736 ulocimp_getLanguage(localeID, &localeID, *err);
1737 if (U_FAILURE(*err)) {
1738 return 0;
1739 }
1740
1741 if(_isIDSeparator(*localeID)) {
1742 const char *scriptID;
1743 /* Skip the script if available */
1744 ulocimp_getScript(localeID+1, &scriptID, *err);
1745 if (U_FAILURE(*err)) {
1746 return 0;
1747 }
1748 if(scriptID != localeID+1) {
1749 /* Found optional script */
1750 localeID = scriptID;
1751 }
1752 if(_isIDSeparator(*localeID)) {
1753 return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
1754 }
1755 }
1756 return u_terminateChars(country, countryCapacity, 0, err);
1757 }
1758
1759 U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char * localeID,char * variant,int32_t variantCapacity,UErrorCode * err)1760 uloc_getVariant(const char* localeID,
1761 char* variant,
1762 int32_t variantCapacity,
1763 UErrorCode* err)
1764 {
1765 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1766 const char* tmpLocaleID;
1767 int32_t i=0;
1768
1769 if(err==NULL || U_FAILURE(*err)) {
1770 return 0;
1771 }
1772
1773 if (_hasBCP47Extension(localeID)) {
1774 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1775 } else {
1776 if (localeID==NULL) {
1777 localeID=uloc_getDefault();
1778 }
1779 tmpLocaleID=localeID;
1780 }
1781
1782 /* Skip the language */
1783 ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1784 if (U_FAILURE(*err)) {
1785 return 0;
1786 }
1787
1788 if(_isIDSeparator(*tmpLocaleID)) {
1789 const char *scriptID;
1790 /* Skip the script if available */
1791 ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1792 if (U_FAILURE(*err)) {
1793 return 0;
1794 }
1795 if(scriptID != tmpLocaleID+1) {
1796 /* Found optional script */
1797 tmpLocaleID = scriptID;
1798 }
1799 /* Skip the Country */
1800 if (_isIDSeparator(*tmpLocaleID)) {
1801 const char *cntryID;
1802 ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1803 if (U_FAILURE(*err)) {
1804 return 0;
1805 }
1806 if (cntryID != tmpLocaleID+1) {
1807 /* Found optional country */
1808 tmpLocaleID = cntryID;
1809 }
1810 if(_isIDSeparator(*tmpLocaleID)) {
1811 /* If there was no country ID, skip a possible extra IDSeparator */
1812 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1813 tmpLocaleID++;
1814 }
1815
1816 CheckedArrayByteSink sink(variant, variantCapacity);
1817 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1818
1819 i = sink.NumberOfBytesAppended();
1820
1821 if (U_FAILURE(*err)) {
1822 return i;
1823 }
1824
1825 if (sink.Overflowed()) {
1826 *err = U_BUFFER_OVERFLOW_ERROR;
1827 return i;
1828 }
1829 }
1830 }
1831 }
1832
1833 return u_terminateChars(variant, variantCapacity, i, err);
1834 }
1835
1836 U_CAPI int32_t U_EXPORT2
uloc_getName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)1837 uloc_getName(const char* localeID,
1838 char* name,
1839 int32_t nameCapacity,
1840 UErrorCode* err)
1841 {
1842 if (U_FAILURE(*err)) {
1843 return 0;
1844 }
1845
1846 CheckedArrayByteSink sink(name, nameCapacity);
1847 ulocimp_getName(localeID, sink, err);
1848
1849 int32_t reslen = sink.NumberOfBytesAppended();
1850
1851 if (U_FAILURE(*err)) {
1852 return reslen;
1853 }
1854
1855 if (sink.Overflowed()) {
1856 *err = U_BUFFER_OVERFLOW_ERROR;
1857 } else {
1858 u_terminateChars(name, nameCapacity, reslen, err);
1859 }
1860
1861 return reslen;
1862 }
1863
1864 U_CAPI void U_EXPORT2
ulocimp_getName(const char * localeID,ByteSink & sink,UErrorCode * err)1865 ulocimp_getName(const char* localeID,
1866 ByteSink& sink,
1867 UErrorCode* err)
1868 {
1869 _canonicalize(localeID, sink, 0, err);
1870 }
1871
1872 U_CAPI int32_t U_EXPORT2
uloc_getBaseName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)1873 uloc_getBaseName(const char* localeID,
1874 char* name,
1875 int32_t nameCapacity,
1876 UErrorCode* err)
1877 {
1878 if (U_FAILURE(*err)) {
1879 return 0;
1880 }
1881
1882 CheckedArrayByteSink sink(name, nameCapacity);
1883 ulocimp_getBaseName(localeID, sink, err);
1884
1885 int32_t reslen = sink.NumberOfBytesAppended();
1886
1887 if (U_FAILURE(*err)) {
1888 return reslen;
1889 }
1890
1891 if (sink.Overflowed()) {
1892 *err = U_BUFFER_OVERFLOW_ERROR;
1893 } else {
1894 u_terminateChars(name, nameCapacity, reslen, err);
1895 }
1896
1897 return reslen;
1898 }
1899
1900 U_CAPI void U_EXPORT2
ulocimp_getBaseName(const char * localeID,ByteSink & sink,UErrorCode * err)1901 ulocimp_getBaseName(const char* localeID,
1902 ByteSink& sink,
1903 UErrorCode* err)
1904 {
1905 _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
1906 }
1907
1908 U_CAPI int32_t U_EXPORT2
uloc_canonicalize(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)1909 uloc_canonicalize(const char* localeID,
1910 char* name,
1911 int32_t nameCapacity,
1912 UErrorCode* err)
1913 {
1914 if (U_FAILURE(*err)) {
1915 return 0;
1916 }
1917
1918 CheckedArrayByteSink sink(name, nameCapacity);
1919 ulocimp_canonicalize(localeID, sink, err);
1920
1921 int32_t reslen = sink.NumberOfBytesAppended();
1922
1923 if (U_FAILURE(*err)) {
1924 return reslen;
1925 }
1926
1927 if (sink.Overflowed()) {
1928 *err = U_BUFFER_OVERFLOW_ERROR;
1929 } else {
1930 u_terminateChars(name, nameCapacity, reslen, err);
1931 }
1932
1933 return reslen;
1934 }
1935
1936 U_CAPI void U_EXPORT2
ulocimp_canonicalize(const char * localeID,ByteSink & sink,UErrorCode * err)1937 ulocimp_canonicalize(const char* localeID,
1938 ByteSink& sink,
1939 UErrorCode* err)
1940 {
1941 _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
1942 }
1943
1944 U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char * localeID)1945 uloc_getISO3Language(const char* localeID)
1946 {
1947 int16_t offset;
1948 char lang[ULOC_LANG_CAPACITY];
1949 UErrorCode err = U_ZERO_ERROR;
1950
1951 if (localeID == NULL)
1952 {
1953 localeID = uloc_getDefault();
1954 }
1955 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1956 if (U_FAILURE(err))
1957 return "";
1958 offset = _findIndex(LANGUAGES, lang);
1959 if (offset < 0)
1960 return "";
1961 return LANGUAGES_3[offset];
1962 }
1963
1964 U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char * localeID)1965 uloc_getISO3Country(const char* localeID)
1966 {
1967 int16_t offset;
1968 char cntry[ULOC_LANG_CAPACITY];
1969 UErrorCode err = U_ZERO_ERROR;
1970
1971 if (localeID == NULL)
1972 {
1973 localeID = uloc_getDefault();
1974 }
1975 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1976 if (U_FAILURE(err))
1977 return "";
1978 offset = _findIndex(COUNTRIES, cntry);
1979 if (offset < 0)
1980 return "";
1981
1982 return COUNTRIES_3[offset];
1983 }
1984
1985 U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char * localeID)1986 uloc_getLCID(const char* localeID)
1987 {
1988 UErrorCode status = U_ZERO_ERROR;
1989 char langID[ULOC_FULLNAME_CAPACITY];
1990 uint32_t lcid = 0;
1991
1992 /* Check for incomplete id. */
1993 if (!localeID || uprv_strlen(localeID) < 2) {
1994 return 0;
1995 }
1996
1997 // First, attempt Windows platform lookup if available, but fall
1998 // through to catch any special cases (ICU vs Windows name differences).
1999 lcid = uprv_convertToLCIDPlatform(localeID, &status);
2000 if (U_FAILURE(status)) {
2001 return 0;
2002 }
2003 if (lcid > 0) {
2004 // Windows found an LCID, return that
2005 return lcid;
2006 }
2007
2008 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2009 if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2010 return 0;
2011 }
2012
2013 if (uprv_strchr(localeID, '@')) {
2014 // uprv_convertToLCID does not support keywords other than collation.
2015 // Remove all keywords except collation.
2016 int32_t len;
2017 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2018
2019 CharString collVal;
2020 {
2021 CharStringByteSink sink(&collVal);
2022 ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2023 }
2024
2025 if (U_SUCCESS(status) && !collVal.isEmpty()) {
2026 len = uloc_getBaseName(localeID, tmpLocaleID,
2027 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2028
2029 if (U_SUCCESS(status) && len > 0) {
2030 tmpLocaleID[len] = 0;
2031
2032 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
2033 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2034
2035 if (U_SUCCESS(status) && len > 0) {
2036 tmpLocaleID[len] = 0;
2037 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2038 }
2039 }
2040 }
2041
2042 // fall through - all keywords are simply ignored
2043 status = U_ZERO_ERROR;
2044 }
2045
2046 return uprv_convertToLCID(langID, localeID, &status);
2047 }
2048
2049 U_CAPI int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostid,char * locale,int32_t localeCapacity,UErrorCode * status)2050 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2051 UErrorCode *status)
2052 {
2053 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2054 }
2055
2056 /* ### Default locale **************************************************/
2057
2058 U_CAPI const char* U_EXPORT2
uloc_getDefault()2059 uloc_getDefault()
2060 {
2061 return locale_get_default();
2062 }
2063
2064 U_CAPI void U_EXPORT2
uloc_setDefault(const char * newDefaultLocale,UErrorCode * err)2065 uloc_setDefault(const char* newDefaultLocale,
2066 UErrorCode* err)
2067 {
2068 if (U_FAILURE(*err))
2069 return;
2070 /* the error code isn't currently used for anything by this function*/
2071
2072 /* propagate change to C++ */
2073 locale_set_default(newDefaultLocale);
2074 }
2075
2076 /**
2077 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2078 * to an array of pointers to arrays of char. All of these pointers are owned
2079 * by ICU-- do not delete them, and do not write through them. The array is
2080 * terminated with a null pointer.
2081 */
2082 U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages()2083 uloc_getISOLanguages()
2084 {
2085 return LANGUAGES;
2086 }
2087
2088 /**
2089 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2090 * pointer to an array of pointers to arrays of char. All of these pointers are
2091 * owned by ICU-- do not delete them, and do not write through them. The array is
2092 * terminated with a null pointer.
2093 */
2094 U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries()2095 uloc_getISOCountries()
2096 {
2097 return COUNTRIES;
2098 }
2099
2100 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char * keyword)2101 uloc_toUnicodeLocaleKey(const char* keyword)
2102 {
2103 const char* bcpKey = ulocimp_toBcpKey(keyword);
2104 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2105 // unknown keyword, but syntax is fine..
2106 return keyword;
2107 }
2108 return bcpKey;
2109 }
2110
2111 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char * keyword,const char * value)2112 uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2113 {
2114 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2115 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2116 // unknown keyword, but syntax is fine..
2117 return value;
2118 }
2119 return bcpType;
2120 }
2121
2122 static UBool
isWellFormedLegacyKey(const char * legacyKey)2123 isWellFormedLegacyKey(const char* legacyKey)
2124 {
2125 const char* p = legacyKey;
2126 while (*p) {
2127 if (!UPRV_ISALPHANUM(*p)) {
2128 return FALSE;
2129 }
2130 p++;
2131 }
2132 return TRUE;
2133 }
2134
2135 static UBool
isWellFormedLegacyType(const char * legacyType)2136 isWellFormedLegacyType(const char* legacyType)
2137 {
2138 const char* p = legacyType;
2139 int32_t alphaNumLen = 0;
2140 while (*p) {
2141 if (*p == '_' || *p == '/' || *p == '-') {
2142 if (alphaNumLen == 0) {
2143 return FALSE;
2144 }
2145 alphaNumLen = 0;
2146 } else if (UPRV_ISALPHANUM(*p)) {
2147 alphaNumLen++;
2148 } else {
2149 return FALSE;
2150 }
2151 p++;
2152 }
2153 return (alphaNumLen != 0);
2154 }
2155
2156 U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char * keyword)2157 uloc_toLegacyKey(const char* keyword)
2158 {
2159 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2160 if (legacyKey == NULL) {
2161 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2162 //
2163 // Note:
2164 // LDML/CLDR provides some definition of keyword syntax in
2165 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2166 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2167 // Keys can only consist of [0-9a-zA-Z].
2168 if (isWellFormedLegacyKey(keyword)) {
2169 return keyword;
2170 }
2171 }
2172 return legacyKey;
2173 }
2174
2175 U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char * keyword,const char * value)2176 uloc_toLegacyType(const char* keyword, const char* value)
2177 {
2178 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2179 if (legacyType == NULL) {
2180 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2181 //
2182 // Note:
2183 // LDML/CLDR provides some definition of keyword syntax in
2184 // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2185 // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2186 // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2187 // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2188 if (isWellFormedLegacyType(value)) {
2189 return value;
2190 }
2191 }
2192 return legacyType;
2193 }
2194
2195 /*eof*/
2196