1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ULOC.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
22
23 /*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29 */
30
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34
35 #include "putilimp.h"
36 #include "ustr_imp.h"
37 #include "ulocimp.h"
38 #include "umutex.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include "locmap.h"
42 #include "uarrsort.h"
43 #include "uenumimp.h"
44 #include "uassert.h"
45
46 #include <stdio.h> /* for sprintf */
47
48 /* ### Declarations **************************************************/
49
50 /* Locale stuff from locid.cpp */
51 U_CFUNC void locale_set_default(const char *id);
52 U_CFUNC const char *locale_get_default(void);
53 U_CFUNC int32_t
54 locale_getKeywords(const char *localeID,
55 char prev,
56 char *keywords, int32_t keywordCapacity,
57 char *values, int32_t valuesCapacity, int32_t *valLen,
58 UBool valuesToo,
59 UErrorCode *status);
60
61 /* ### Data tables **************************************************/
62
63 /**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95 /* ISO639 table version is 20150505 */
96 static const char * const LANGUAGES[] = {
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
98 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
99 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
100 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "as",
101 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
102 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
103 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
104 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
105 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
106 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
108 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
109 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
110 "cs", "csb", "cu", "cv", "cy",
111 "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
112 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
113 "dyo", "dyu", "dz", "dzg",
114 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
115 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
116 "ext",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
118 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
119 "frs", "fur", "fy",
120 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
121 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
122 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
123 "gur", "guz", "gv", "gwi",
124 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
125 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
126 "hup", "hy", "hz",
127 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
128 "ilo", "inh", "io", "is", "it", "iu", "izh",
129 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
130 "jv",
131 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
132 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
133 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
134 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
135 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
136 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
137 "kv", "kw", "ky",
138 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
139 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
140 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
141 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
142 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
143 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
144 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
145 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
146 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
147 "my", "mye", "myv", "mzn",
148 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
149 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
150 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
151 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
152 "oc", "oj", "om", "or", "os", "osa", "ota",
153 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
154 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
155 "pon", "prg", "pro", "ps", "pt",
156 "qu", "quc", "qug",
157 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
158 "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
159 "rw", "rwk",
160 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
161 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
162 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
163 "sgs", "shi", "shn", "shu", "si", "sid", "sk",
164 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
165 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
166 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
167 "sv", "sw", "swb", "swc", "syc", "syr", "szl",
168 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
169 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
170 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
171 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
172 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
173 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
174 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
175 "vot", "vro", "vun",
176 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
177 "xal", "xh", "xmf", "xog",
178 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
179 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
180 "zun", "zxx", "zza",
181 NULL,
182 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
183 NULL
184 };
185
186 static const char* const DEPRECATED_LANGUAGES[]={
187 "in", "iw", "ji", "jw", NULL, NULL
188 };
189 static const char* const REPLACEMENT_LANGUAGES[]={
190 "id", "he", "yi", "jv", NULL, NULL
191 };
192
193 /**
194 * Table of 3-letter language codes.
195 *
196 * This is a lookup table used to convert 3-letter language codes to
197 * their 2-letter equivalent, where possible. It must be kept in sync
198 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
199 * same language as LANGUAGES_3[i]. The commented-out lines are
200 * copied from LANGUAGES to make eyeballing this baby easier.
201 *
202 * Where a 3-letter language code has no 2-letter equivalent, the
203 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
204 *
205 * This table should be terminated with a NULL entry, followed by a
206 * second list, and another NULL entry. The two lists correspond to
207 * the two lists in LANGUAGES.
208 */
209 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
210 /* ISO639 table version is 20150505 */
211 static const char * const LANGUAGES_3[] = {
212 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
213 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
214 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
215 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "asm",
216 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
217 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
218 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
219 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
220 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
221 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
222 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
223 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
224 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
225 "ces", "csb", "chu", "chv", "cym",
226 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
227 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
228 "dyo", "dyu", "dzo", "dzg",
229 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
230 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
231 "ext",
232 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
233 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
234 "frs", "fur", "fry",
235 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
236 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
237 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
238 "gur", "guz", "glv", "gwi",
239 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
240 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
241 "hup", "hye", "her",
242 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
243 "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
244 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
245 "jav",
246 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
247 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
248 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
249 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
250 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
251 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
252 "kom", "cor", "kir",
253 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
254 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
255 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
256 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
257 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
258 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
259 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
260 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
261 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
262 "mya", "mye", "myv", "mzn",
263 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
264 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
265 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
266 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
267 "oci", "oji", "orm", "ori", "oss", "osa", "ota",
268 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
269 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
270 "pon", "prg", "pro", "pus", "por",
271 "que", "quc", "qug",
272 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
273 "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
274 "kin", "rwk",
275 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
276 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
277 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
278 "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
279 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
280 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
281 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
282 "swe", "swa", "swb", "swc", "syc", "syr", "szl",
283 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
284 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
285 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
286 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
287 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
288 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
289 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
290 "vot", "vro", "vun",
291 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
292 "xal", "xho", "xmf", "xog",
293 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
294 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
295 "zun", "zxx", "zza",
296 NULL,
297 /* "in", "iw", "ji", "jw", "sh", */
298 "ind", "heb", "yid", "jaw", "srp",
299 NULL
300 };
301
302 /**
303 * Table of 2-letter country codes.
304 *
305 * This list must be in sorted order. This list is returned directly
306 * to the user by some API.
307 *
308 * This list must be kept in sync with COUNTRIES_3, with corresponding
309 * entries matched.
310 *
311 * This table should be terminated with a NULL entry, followed by a
312 * second list, and another NULL entry. The first list is visible to
313 * user code when this array is returned by API. The second list
314 * contains codes we support, but do not expose through user API.
315 *
316 * Notes:
317 *
318 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
319 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
320 * new codes keeping the old ones for compatibility updated to include
321 * 1999/12/03 revisions *CWB*
322 *
323 * RO(ROM) is now RO(ROU) according to
324 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
325 */
326 static const char * const COUNTRIES[] = {
327 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
328 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
329 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
330 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
331 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
332 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
333 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
334 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
335 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
336 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
337 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
338 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
339 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
340 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
341 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
342 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
343 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
344 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
345 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
346 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
347 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
348 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
349 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
350 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
351 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
352 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
353 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
354 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
355 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
356 "WS", "YE", "YT", "ZA", "ZM", "ZW",
357 NULL,
358 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
359 NULL
360 };
361
362 static const char* const DEPRECATED_COUNTRIES[] = {
363 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
364 };
365 static const char* const REPLACEMENT_COUNTRIES[] = {
366 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
367 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
368 };
369
370 /**
371 * Table of 3-letter country codes.
372 *
373 * This is a lookup table used to convert 3-letter country codes to
374 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
375 * For all valid i, COUNTRIES[i] must refer to the same country as
376 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
377 * to make eyeballing this baby easier.
378 *
379 * This table should be terminated with a NULL entry, followed by a
380 * second list, and another NULL entry. The two lists correspond to
381 * the two lists in COUNTRIES.
382 */
383 static const char * const COUNTRIES_3[] = {
384 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
385 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
386 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
387 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
388 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
389 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
390 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
391 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
392 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
393 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
394 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
395 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
396 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
397 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
398 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
399 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
400 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
401 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
402 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
403 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
404 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
405 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
406 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
407 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
408 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
409 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
410 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
411 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
412 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
413 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
414 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
415 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
416 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
417 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
418 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
419 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
420 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
421 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
422 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
423 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
424 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
425 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
426 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
427 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
428 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
429 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
430 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
431 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
432 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
433 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
434 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
435 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
436 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
437 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
438 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
439 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
440 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
441 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
442 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
443 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
444 NULL,
445 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
446 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
447 NULL
448 };
449
450 typedef struct CanonicalizationMap {
451 const char *id; /* input ID */
452 const char *canonicalID; /* canonicalized output ID */
453 const char *keyword; /* keyword, or NULL if none */
454 const char *value; /* keyword value, or NULL if kw==NULL */
455 } CanonicalizationMap;
456
457 /**
458 * A map to canonicalize locale IDs. This handles a variety of
459 * different semantic kinds of transformations.
460 */
461 static const CanonicalizationMap CANONICALIZE_MAP[] = {
462 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
463 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
464 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
465 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
466 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
467 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
468 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
469 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
470 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
471 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
472 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
473 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
474 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
475 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
476 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
477 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
478 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
479 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
480 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
481 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
482 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
483 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
484 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
485 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
486 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
487 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
488 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
489 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
490 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
491 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
492 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
493 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
494 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
495 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
496 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
497 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
498 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
499 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
500 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
501 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
502 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
503 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
504 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
505 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
506 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
507 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
508 };
509
510 typedef struct VariantMap {
511 const char *variant; /* input ID */
512 const char *keyword; /* keyword, or NULL if none */
513 const char *value; /* keyword value, or NULL if kw==NULL */
514 } VariantMap;
515
516 static const VariantMap VARIANT_MAP[] = {
517 { "EURO", "currency", "EUR" },
518 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
519 { "STROKE", "collation", "stroke" } /* Solaris variant */
520 };
521
522 /* ### BCP47 Conversion *******************************************/
523 /* Test if the locale id has BCP47 u extension and does not have '@' */
524 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
525 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
526 #define _ConvertBCP47(finalID, id, buffer, length,err) \
527 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
528 finalID=id; \
529 } else { \
530 finalID=buffer; \
531 }
532 /* Gets the size of the shortest subtag in the given localeID. */
getShortestSubtagLength(const char * localeID)533 static int32_t getShortestSubtagLength(const char *localeID) {
534 int32_t localeIDLength = uprv_strlen(localeID);
535 int32_t length = localeIDLength;
536 int32_t tmpLength = 0;
537 int32_t i;
538 UBool reset = TRUE;
539
540 for (i = 0; i < localeIDLength; i++) {
541 if (localeID[i] != '_' && localeID[i] != '-') {
542 if (reset) {
543 tmpLength = 0;
544 reset = FALSE;
545 }
546 tmpLength++;
547 } else {
548 if (tmpLength != 0 && tmpLength < length) {
549 length = tmpLength;
550 }
551 reset = TRUE;
552 }
553 }
554
555 return length;
556 }
557
558 /* ### Keywords **************************************************/
559
560 #define ULOC_KEYWORD_BUFFER_LEN 25
561 #define ULOC_MAX_NO_KEYWORDS 25
562
563 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char * localeID)564 locale_getKeywordsStart(const char *localeID) {
565 const char *result = NULL;
566 if((result = uprv_strchr(localeID, '@')) != NULL) {
567 return result;
568 }
569 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
570 else {
571 /* We do this because the @ sign is variant, and the @ sign used on one
572 EBCDIC machine won't be compiled the same way on other EBCDIC based
573 machines. */
574 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
575 const uint8_t *charToFind = ebcdicSigns;
576 while(*charToFind) {
577 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
578 return result;
579 }
580 charToFind++;
581 }
582 }
583 #endif
584 return NULL;
585 }
586
587 /**
588 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
589 * @param keywordName incoming name to be canonicalized
590 * @param status return status (keyword too long)
591 * @return length of the keyword name
592 */
locale_canonKeywordName(char * buf,const char * keywordName,UErrorCode * status)593 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
594 {
595 int32_t i;
596 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
597
598 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
599 /* keyword name too long for internal buffer */
600 *status = U_INTERNAL_PROGRAM_ERROR;
601 return 0;
602 }
603
604 /* normalize the keyword name */
605 for(i = 0; i < keywordNameLen; i++) {
606 buf[i] = uprv_tolower(keywordName[i]);
607 }
608 buf[i] = 0;
609
610 return keywordNameLen;
611 }
612
613 typedef struct {
614 char keyword[ULOC_KEYWORD_BUFFER_LEN];
615 int32_t keywordLen;
616 const char *valueStart;
617 int32_t valueLen;
618 } KeywordStruct;
619
620 static int32_t U_CALLCONV
compareKeywordStructs(const void *,const void * left,const void * right)621 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
622 const char* leftString = ((const KeywordStruct *)left)->keyword;
623 const char* rightString = ((const KeywordStruct *)right)->keyword;
624 return uprv_strcmp(leftString, rightString);
625 }
626
627 /**
628 * Both addKeyword and addValue must already be in canonical form.
629 * Either both addKeyword and addValue are NULL, or neither is NULL.
630 * If they are not NULL they must be zero terminated.
631 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
632 */
633 static int32_t
_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,const char * addKeyword,const char * addValue,UErrorCode * status)634 _getKeywords(const char *localeID,
635 char prev,
636 char *keywords, int32_t keywordCapacity,
637 char *values, int32_t valuesCapacity, int32_t *valLen,
638 UBool valuesToo,
639 const char* addKeyword,
640 const char* addValue,
641 UErrorCode *status)
642 {
643 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
644
645 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
646 int32_t numKeywords = 0;
647 const char* pos = localeID;
648 const char* equalSign = NULL;
649 const char* semicolon = NULL;
650 int32_t i = 0, j, n;
651 int32_t keywordsLen = 0;
652 int32_t valuesLen = 0;
653
654 if(prev == '@') { /* start of keyword definition */
655 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
656 do {
657 UBool duplicate = FALSE;
658 /* skip leading spaces */
659 while(*pos == ' ') {
660 pos++;
661 }
662 if (!*pos) { /* handle trailing "; " */
663 break;
664 }
665 if(numKeywords == maxKeywords) {
666 *status = U_INTERNAL_PROGRAM_ERROR;
667 return 0;
668 }
669 equalSign = uprv_strchr(pos, '=');
670 semicolon = uprv_strchr(pos, ';');
671 /* lack of '=' [foo@currency] is illegal */
672 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
673 if(!equalSign || (semicolon && semicolon<equalSign)) {
674 *status = U_INVALID_FORMAT_ERROR;
675 return 0;
676 }
677 /* need to normalize both keyword and keyword name */
678 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
679 /* keyword name too long for internal buffer */
680 *status = U_INTERNAL_PROGRAM_ERROR;
681 return 0;
682 }
683 for(i = 0, n = 0; i < equalSign - pos; ++i) {
684 if (pos[i] != ' ') {
685 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
686 }
687 }
688
689 /* zero-length keyword is an error. */
690 if (n == 0) {
691 *status = U_INVALID_FORMAT_ERROR;
692 return 0;
693 }
694
695 keywordList[numKeywords].keyword[n] = 0;
696 keywordList[numKeywords].keywordLen = n;
697 /* now grab the value part. First we skip the '=' */
698 equalSign++;
699 /* then we leading spaces */
700 while(*equalSign == ' ') {
701 equalSign++;
702 }
703
704 /* Premature end or zero-length value */
705 if (!*equalSign || equalSign == semicolon) {
706 *status = U_INVALID_FORMAT_ERROR;
707 return 0;
708 }
709
710 keywordList[numKeywords].valueStart = equalSign;
711
712 pos = semicolon;
713 i = 0;
714 if(pos) {
715 while(*(pos - i - 1) == ' ') {
716 i++;
717 }
718 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
719 pos++;
720 } else {
721 i = (int32_t)uprv_strlen(equalSign);
722 while(i && equalSign[i-1] == ' ') {
723 i--;
724 }
725 keywordList[numKeywords].valueLen = i;
726 }
727 /* If this is a duplicate keyword, then ignore it */
728 for (j=0; j<numKeywords; ++j) {
729 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
730 duplicate = TRUE;
731 break;
732 }
733 }
734 if (!duplicate) {
735 ++numKeywords;
736 }
737 } while(pos);
738
739 /* Handle addKeyword/addValue. */
740 if (addKeyword != NULL) {
741 UBool duplicate = FALSE;
742 U_ASSERT(addValue != NULL);
743 /* Search for duplicate; if found, do nothing. Explicit keyword
744 overrides addKeyword. */
745 for (j=0; j<numKeywords; ++j) {
746 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
747 duplicate = TRUE;
748 break;
749 }
750 }
751 if (!duplicate) {
752 if (numKeywords == maxKeywords) {
753 *status = U_INTERNAL_PROGRAM_ERROR;
754 return 0;
755 }
756 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
757 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
758 keywordList[numKeywords].valueStart = addValue;
759 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
760 ++numKeywords;
761 }
762 } else {
763 U_ASSERT(addValue == NULL);
764 }
765
766 /* now we have a list of keywords */
767 /* we need to sort it */
768 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
769
770 /* Now construct the keyword part */
771 for(i = 0; i < numKeywords; i++) {
772 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
773 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
774 if(valuesToo) {
775 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
776 } else {
777 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
778 }
779 }
780 keywordsLen += keywordList[i].keywordLen + 1;
781 if(valuesToo) {
782 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
783 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
784 }
785 keywordsLen += keywordList[i].valueLen;
786
787 if(i < numKeywords - 1) {
788 if(keywordsLen < keywordCapacity) {
789 keywords[keywordsLen] = ';';
790 }
791 keywordsLen++;
792 }
793 }
794 if(values) {
795 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
796 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
797 values[valuesLen + keywordList[i].valueLen] = 0;
798 }
799 valuesLen += keywordList[i].valueLen + 1;
800 }
801 }
802 if(values) {
803 values[valuesLen] = 0;
804 if(valLen) {
805 *valLen = valuesLen;
806 }
807 }
808 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
809 } else {
810 return 0;
811 }
812 }
813
814 U_CFUNC int32_t
locale_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,UErrorCode * status)815 locale_getKeywords(const char *localeID,
816 char prev,
817 char *keywords, int32_t keywordCapacity,
818 char *values, int32_t valuesCapacity, int32_t *valLen,
819 UBool valuesToo,
820 UErrorCode *status) {
821 return _getKeywords(localeID, prev, keywords, keywordCapacity,
822 values, valuesCapacity, valLen, valuesToo,
823 NULL, NULL, status);
824 }
825
826 U_CAPI int32_t U_EXPORT2
uloc_getKeywordValue(const char * localeID,const char * keywordName,char * buffer,int32_t bufferCapacity,UErrorCode * status)827 uloc_getKeywordValue(const char* localeID,
828 const char* keywordName,
829 char* buffer, int32_t bufferCapacity,
830 UErrorCode* status)
831 {
832 const char* startSearchHere = NULL;
833 const char* nextSeparator = NULL;
834 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
835 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
836 int32_t i = 0;
837 int32_t result = 0;
838
839 if(status && U_SUCCESS(*status) && localeID) {
840 char tempBuffer[ULOC_FULLNAME_CAPACITY];
841 const char* tmpLocaleID;
842
843 if (_hasBCP47Extension(localeID)) {
844 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
845 } else {
846 tmpLocaleID=localeID;
847 }
848
849 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
850 if(startSearchHere == NULL) {
851 /* no keywords, return at once */
852 return 0;
853 }
854
855 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
856 if(U_FAILURE(*status)) {
857 return 0;
858 }
859
860 /* find the first keyword */
861 while(startSearchHere) {
862 startSearchHere++;
863 /* skip leading spaces (allowed?) */
864 while(*startSearchHere == ' ') {
865 startSearchHere++;
866 }
867 nextSeparator = uprv_strchr(startSearchHere, '=');
868 /* need to normalize both keyword and keyword name */
869 if(!nextSeparator) {
870 break;
871 }
872 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
873 /* keyword name too long for internal buffer */
874 *status = U_INTERNAL_PROGRAM_ERROR;
875 return 0;
876 }
877 for(i = 0; i < nextSeparator - startSearchHere; i++) {
878 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
879 }
880 /* trim trailing spaces */
881 while(startSearchHere[i-1] == ' ') {
882 i--;
883 U_ASSERT(i>=0);
884 }
885 localeKeywordNameBuffer[i] = 0;
886
887 startSearchHere = uprv_strchr(nextSeparator, ';');
888
889 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
890 nextSeparator++;
891 while(*nextSeparator == ' ') {
892 nextSeparator++;
893 }
894 /* we actually found the keyword. Copy the value */
895 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
896 while(*(startSearchHere-1) == ' ') {
897 startSearchHere--;
898 }
899 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
900 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
901 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
902 i = (int32_t)uprv_strlen(nextSeparator);
903 while(nextSeparator[i - 1] == ' ') {
904 i--;
905 }
906 uprv_strncpy(buffer, nextSeparator, i);
907 result = u_terminateChars(buffer, bufferCapacity, i, status);
908 } else {
909 /* give a bigger buffer, please */
910 *status = U_BUFFER_OVERFLOW_ERROR;
911 if(startSearchHere) {
912 result = (int32_t)(startSearchHere - nextSeparator);
913 } else {
914 result = (int32_t)uprv_strlen(nextSeparator);
915 }
916 }
917 return result;
918 }
919 }
920 }
921 return 0;
922 }
923
924 U_CAPI int32_t U_EXPORT2
uloc_setKeywordValue(const char * keywordName,const char * keywordValue,char * buffer,int32_t bufferCapacity,UErrorCode * status)925 uloc_setKeywordValue(const char* keywordName,
926 const char* keywordValue,
927 char* buffer, int32_t bufferCapacity,
928 UErrorCode* status)
929 {
930 /* TODO: sorting. removal. */
931 int32_t keywordNameLen;
932 int32_t keywordValueLen;
933 int32_t bufLen;
934 int32_t needLen = 0;
935 int32_t foundValueLen;
936 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
937 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
938 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
939 int32_t i = 0;
940 int32_t rc;
941 char* nextSeparator = NULL;
942 char* nextEqualsign = NULL;
943 char* startSearchHere = NULL;
944 char* keywordStart = NULL;
945 char *insertHere = NULL;
946 if(U_FAILURE(*status)) {
947 return -1;
948 }
949 if(bufferCapacity>1) {
950 bufLen = (int32_t)uprv_strlen(buffer);
951 } else {
952 *status = U_ILLEGAL_ARGUMENT_ERROR;
953 return 0;
954 }
955 if(bufferCapacity<bufLen) {
956 /* The capacity is less than the length?! Is this NULL terminated? */
957 *status = U_ILLEGAL_ARGUMENT_ERROR;
958 return 0;
959 }
960 if(keywordValue && !*keywordValue) {
961 keywordValue = NULL;
962 }
963 if(keywordValue) {
964 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
965 } else {
966 keywordValueLen = 0;
967 }
968 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
969 if(U_FAILURE(*status)) {
970 return 0;
971 }
972 startSearchHere = (char*)locale_getKeywordsStart(buffer);
973 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
974 if(!keywordValue) { /* no keywords = nothing to remove */
975 return bufLen;
976 }
977
978 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
979 if(startSearchHere) { /* had a single @ */
980 needLen--; /* already had the @ */
981 /* startSearchHere points at the @ */
982 } else {
983 startSearchHere=buffer+bufLen;
984 }
985 if(needLen >= bufferCapacity) {
986 *status = U_BUFFER_OVERFLOW_ERROR;
987 return needLen; /* no change */
988 }
989 *startSearchHere = '@';
990 startSearchHere++;
991 uprv_strcpy(startSearchHere, keywordNameBuffer);
992 startSearchHere += keywordNameLen;
993 *startSearchHere = '=';
994 startSearchHere++;
995 uprv_strcpy(startSearchHere, keywordValue);
996 startSearchHere+=keywordValueLen;
997 return needLen;
998 } /* end shortcut - no @ */
999
1000 keywordStart = startSearchHere;
1001 /* search for keyword */
1002 while(keywordStart) {
1003 keywordStart++;
1004 /* skip leading spaces (allowed?) */
1005 while(*keywordStart == ' ') {
1006 keywordStart++;
1007 }
1008 nextEqualsign = uprv_strchr(keywordStart, '=');
1009 /* need to normalize both keyword and keyword name */
1010 if(!nextEqualsign) {
1011 break;
1012 }
1013 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1014 /* keyword name too long for internal buffer */
1015 *status = U_INTERNAL_PROGRAM_ERROR;
1016 return 0;
1017 }
1018 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1019 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1020 }
1021 /* trim trailing spaces */
1022 while(keywordStart[i-1] == ' ') {
1023 i--;
1024 }
1025 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1026 localeKeywordNameBuffer[i] = 0;
1027
1028 nextSeparator = uprv_strchr(nextEqualsign, ';');
1029 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1030 if(rc == 0) {
1031 nextEqualsign++;
1032 while(*nextEqualsign == ' ') {
1033 nextEqualsign++;
1034 }
1035 /* we actually found the keyword. Change the value */
1036 if (nextSeparator) {
1037 keywordAtEnd = 0;
1038 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1039 } else {
1040 keywordAtEnd = 1;
1041 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1042 }
1043 if(keywordValue) { /* adding a value - not removing */
1044 if(foundValueLen == keywordValueLen) {
1045 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1046 return bufLen; /* no change in size */
1047 } else if(foundValueLen > keywordValueLen) {
1048 int32_t delta = foundValueLen - keywordValueLen;
1049 if(nextSeparator) { /* RH side */
1050 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1051 }
1052 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1053 bufLen -= delta;
1054 buffer[bufLen]=0;
1055 return bufLen;
1056 } else { /* FVL < KVL */
1057 int32_t delta = keywordValueLen - foundValueLen;
1058 if((bufLen+delta) >= bufferCapacity) {
1059 *status = U_BUFFER_OVERFLOW_ERROR;
1060 return bufLen+delta;
1061 }
1062 if(nextSeparator) { /* RH side */
1063 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1064 }
1065 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1066 bufLen += delta;
1067 buffer[bufLen]=0;
1068 return bufLen;
1069 }
1070 } else { /* removing a keyword */
1071 if(keywordAtEnd) {
1072 /* zero out the ';' or '@' just before startSearchhere */
1073 keywordStart[-1] = 0;
1074 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1075 } else {
1076 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1077 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1078 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1079 }
1080 }
1081 } else if(rc<0){ /* end match keyword */
1082 /* could insert at this location. */
1083 insertHere = keywordStart;
1084 }
1085 keywordStart = nextSeparator;
1086 } /* end loop searching */
1087
1088 if(!keywordValue) {
1089 return bufLen; /* removal of non-extant keyword - no change */
1090 }
1091
1092 /* we know there is at least one keyword. */
1093 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1094 if(needLen >= bufferCapacity) {
1095 *status = U_BUFFER_OVERFLOW_ERROR;
1096 return needLen; /* no change */
1097 }
1098
1099 if(insertHere) {
1100 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1101 keywordStart = insertHere;
1102 } else {
1103 keywordStart = buffer+bufLen;
1104 *keywordStart = ';';
1105 keywordStart++;
1106 }
1107 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1108 keywordStart += keywordNameLen;
1109 *keywordStart = '=';
1110 keywordStart++;
1111 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1112 keywordStart+=keywordValueLen;
1113 if(insertHere) {
1114 *keywordStart = ';';
1115 keywordStart++;
1116 }
1117 buffer[needLen]=0;
1118 return needLen;
1119 }
1120
1121 /* ### ID parsing implementation **************************************************/
1122
1123 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1124
1125 /*returns TRUE if one of the special prefixes is here (s=string)
1126 'x-' or 'i-' */
1127 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1128
1129 /* Dot terminates it because of POSIX form where dot precedes the codepage
1130 * except for variant
1131 */
1132 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1133
_strnchr(const char * str,int32_t len,char c)1134 static char* _strnchr(const char* str, int32_t len, char c) {
1135 U_ASSERT(str != 0 && len >= 0);
1136 while (len-- != 0) {
1137 char d = *str;
1138 if (d == c) {
1139 return (char*) str;
1140 } else if (d == 0) {
1141 break;
1142 }
1143 ++str;
1144 }
1145 return NULL;
1146 }
1147
1148 /**
1149 * Lookup 'key' in the array 'list'. The array 'list' should contain
1150 * a NULL entry, followed by more entries, and a second NULL entry.
1151 *
1152 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1153 * COUNTRIES_3.
1154 */
_findIndex(const char * const * list,const char * key)1155 static int16_t _findIndex(const char* const* list, const char* key)
1156 {
1157 const char* const* anchor = list;
1158 int32_t pass = 0;
1159
1160 /* Make two passes through two NULL-terminated arrays at 'list' */
1161 while (pass++ < 2) {
1162 while (*list) {
1163 if (uprv_strcmp(key, *list) == 0) {
1164 return (int16_t)(list - anchor);
1165 }
1166 list++;
1167 }
1168 ++list; /* skip final NULL *CWB*/
1169 }
1170 return -1;
1171 }
1172
1173 /* count the length of src while copying it to dest; return strlen(src) */
1174 static inline int32_t
_copyCount(char * dest,int32_t destCapacity,const char * src)1175 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1176 const char *anchor;
1177 char c;
1178
1179 anchor=src;
1180 for(;;) {
1181 if((c=*src)==0) {
1182 return (int32_t)(src-anchor);
1183 }
1184 if(destCapacity<=0) {
1185 return (int32_t)((src-anchor)+uprv_strlen(src));
1186 }
1187 ++src;
1188 *dest++=c;
1189 --destCapacity;
1190 }
1191 }
1192
1193 U_CFUNC const char*
uloc_getCurrentCountryID(const char * oldID)1194 uloc_getCurrentCountryID(const char* oldID){
1195 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1196 if (offset >= 0) {
1197 return REPLACEMENT_COUNTRIES[offset];
1198 }
1199 return oldID;
1200 }
1201 U_CFUNC const char*
uloc_getCurrentLanguageID(const char * oldID)1202 uloc_getCurrentLanguageID(const char* oldID){
1203 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1204 if (offset >= 0) {
1205 return REPLACEMENT_LANGUAGES[offset];
1206 }
1207 return oldID;
1208 }
1209 /*
1210 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1211 * avoid duplicating code to handle the earlier locale ID pieces
1212 * in the functions for the later ones by
1213 * setting the *pEnd pointer to where they stopped parsing
1214 *
1215 * TODO try to use this in Locale
1216 */
1217 U_CFUNC int32_t
ulocimp_getLanguage(const char * localeID,char * language,int32_t languageCapacity,const char ** pEnd)1218 ulocimp_getLanguage(const char *localeID,
1219 char *language, int32_t languageCapacity,
1220 const char **pEnd) {
1221 int32_t i=0;
1222 int32_t offset;
1223 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1224
1225 /* if it starts with i- or x- then copy that prefix */
1226 if(_isIDPrefix(localeID)) {
1227 if(i<languageCapacity) {
1228 language[i]=(char)uprv_tolower(*localeID);
1229 }
1230 if(i<languageCapacity) {
1231 language[i+1]='-';
1232 }
1233 i+=2;
1234 localeID+=2;
1235 }
1236
1237 /* copy the language as far as possible and count its length */
1238 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1239 if(i<languageCapacity) {
1240 language[i]=(char)uprv_tolower(*localeID);
1241 }
1242 if(i<3) {
1243 U_ASSERT(i>=0);
1244 lang[i]=(char)uprv_tolower(*localeID);
1245 }
1246 i++;
1247 localeID++;
1248 }
1249
1250 if(i==3) {
1251 /* convert 3 character code to 2 character code if possible *CWB*/
1252 offset=_findIndex(LANGUAGES_3, lang);
1253 if(offset>=0) {
1254 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1255 }
1256 }
1257
1258 if(pEnd!=NULL) {
1259 *pEnd=localeID;
1260 }
1261 return i;
1262 }
1263
1264 U_CFUNC int32_t
ulocimp_getScript(const char * localeID,char * script,int32_t scriptCapacity,const char ** pEnd)1265 ulocimp_getScript(const char *localeID,
1266 char *script, int32_t scriptCapacity,
1267 const char **pEnd)
1268 {
1269 int32_t idLen = 0;
1270
1271 if (pEnd != NULL) {
1272 *pEnd = localeID;
1273 }
1274
1275 /* copy the second item as far as possible and count its length */
1276 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1277 && uprv_isASCIILetter(localeID[idLen])) {
1278 idLen++;
1279 }
1280
1281 /* If it's exactly 4 characters long, then it's a script and not a country. */
1282 if (idLen == 4) {
1283 int32_t i;
1284 if (pEnd != NULL) {
1285 *pEnd = localeID+idLen;
1286 }
1287 if(idLen > scriptCapacity) {
1288 idLen = scriptCapacity;
1289 }
1290 if (idLen >= 1) {
1291 script[0]=(char)uprv_toupper(*(localeID++));
1292 }
1293 for (i = 1; i < idLen; i++) {
1294 script[i]=(char)uprv_tolower(*(localeID++));
1295 }
1296 }
1297 else {
1298 idLen = 0;
1299 }
1300 return idLen;
1301 }
1302
1303 U_CFUNC int32_t
ulocimp_getCountry(const char * localeID,char * country,int32_t countryCapacity,const char ** pEnd)1304 ulocimp_getCountry(const char *localeID,
1305 char *country, int32_t countryCapacity,
1306 const char **pEnd)
1307 {
1308 int32_t idLen=0;
1309 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1310 int32_t offset;
1311
1312 /* copy the country as far as possible and count its length */
1313 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1314 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1315 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1316 }
1317 idLen++;
1318 }
1319
1320 /* the country should be either length 2 or 3 */
1321 if (idLen == 2 || idLen == 3) {
1322 UBool gotCountry = FALSE;
1323 /* convert 3 character code to 2 character code if possible *CWB*/
1324 if(idLen==3) {
1325 offset=_findIndex(COUNTRIES_3, cnty);
1326 if(offset>=0) {
1327 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1328 gotCountry = TRUE;
1329 }
1330 }
1331 if (!gotCountry) {
1332 int32_t i = 0;
1333 for (i = 0; i < idLen; i++) {
1334 if (i < countryCapacity) {
1335 country[i]=(char)uprv_toupper(localeID[i]);
1336 }
1337 }
1338 }
1339 localeID+=idLen;
1340 } else {
1341 idLen = 0;
1342 }
1343
1344 if(pEnd!=NULL) {
1345 *pEnd=localeID;
1346 }
1347
1348 return idLen;
1349 }
1350
1351 /**
1352 * @param needSeparator if true, then add leading '_' if any variants
1353 * are added to 'variant'
1354 */
1355 static int32_t
_getVariantEx(const char * localeID,char prev,char * variant,int32_t variantCapacity,UBool needSeparator)1356 _getVariantEx(const char *localeID,
1357 char prev,
1358 char *variant, int32_t variantCapacity,
1359 UBool needSeparator) {
1360 int32_t i=0;
1361
1362 /* get one or more variant tags and separate them with '_' */
1363 if(_isIDSeparator(prev)) {
1364 /* get a variant string after a '-' or '_' */
1365 while(!_isTerminator(*localeID)) {
1366 if (needSeparator) {
1367 if (i<variantCapacity) {
1368 variant[i] = '_';
1369 }
1370 ++i;
1371 needSeparator = FALSE;
1372 }
1373 if(i<variantCapacity) {
1374 variant[i]=(char)uprv_toupper(*localeID);
1375 if(variant[i]=='-') {
1376 variant[i]='_';
1377 }
1378 }
1379 i++;
1380 localeID++;
1381 }
1382 }
1383
1384 /* if there is no variant tag after a '-' or '_' then look for '@' */
1385 if(i==0) {
1386 if(prev=='@') {
1387 /* keep localeID */
1388 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1389 ++localeID; /* point after the '@' */
1390 } else {
1391 return 0;
1392 }
1393 while(!_isTerminator(*localeID)) {
1394 if (needSeparator) {
1395 if (i<variantCapacity) {
1396 variant[i] = '_';
1397 }
1398 ++i;
1399 needSeparator = FALSE;
1400 }
1401 if(i<variantCapacity) {
1402 variant[i]=(char)uprv_toupper(*localeID);
1403 if(variant[i]=='-' || variant[i]==',') {
1404 variant[i]='_';
1405 }
1406 }
1407 i++;
1408 localeID++;
1409 }
1410 }
1411
1412 return i;
1413 }
1414
1415 static int32_t
_getVariant(const char * localeID,char prev,char * variant,int32_t variantCapacity)1416 _getVariant(const char *localeID,
1417 char prev,
1418 char *variant, int32_t variantCapacity) {
1419 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1420 }
1421
1422 /**
1423 * Delete ALL instances of a variant from the given list of one or
1424 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1425 * @param variants the source string of one or more variants,
1426 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1427 * terminated; if it is, trailing zero will NOT be maintained.
1428 * @param variantsLen length of variants
1429 * @param toDelete variant to delete, without separators, e.g. "EURO"
1430 * or "PREEURO"; not zero terminated
1431 * @param toDeleteLen length of toDelete
1432 * @return number of characters deleted from variants
1433 */
1434 static int32_t
_deleteVariant(char * variants,int32_t variantsLen,const char * toDelete,int32_t toDeleteLen)1435 _deleteVariant(char* variants, int32_t variantsLen,
1436 const char* toDelete, int32_t toDeleteLen)
1437 {
1438 int32_t delta = 0; /* number of chars deleted */
1439 for (;;) {
1440 UBool flag = FALSE;
1441 if (variantsLen < toDeleteLen) {
1442 return delta;
1443 }
1444 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1445 (variantsLen == toDeleteLen ||
1446 (flag=(variants[toDeleteLen] == '_'))))
1447 {
1448 int32_t d = toDeleteLen + (flag?1:0);
1449 variantsLen -= d;
1450 delta += d;
1451 if (variantsLen > 0) {
1452 uprv_memmove(variants, variants+d, variantsLen);
1453 }
1454 } else {
1455 char* p = _strnchr(variants, variantsLen, '_');
1456 if (p == NULL) {
1457 return delta;
1458 }
1459 ++p;
1460 variantsLen -= (int32_t)(p - variants);
1461 variants = p;
1462 }
1463 }
1464 }
1465
1466 /* Keyword enumeration */
1467
1468 typedef struct UKeywordsContext {
1469 char* keywords;
1470 char* current;
1471 } UKeywordsContext;
1472
1473 static void U_CALLCONV
uloc_kw_closeKeywords(UEnumeration * enumerator)1474 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1475 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1476 uprv_free(enumerator->context);
1477 uprv_free(enumerator);
1478 }
1479
1480 static int32_t U_CALLCONV
uloc_kw_countKeywords(UEnumeration * en,UErrorCode *)1481 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1482 char *kw = ((UKeywordsContext *)en->context)->keywords;
1483 int32_t result = 0;
1484 while(*kw) {
1485 result++;
1486 kw += uprv_strlen(kw)+1;
1487 }
1488 return result;
1489 }
1490
1491 static const char* U_CALLCONV
uloc_kw_nextKeyword(UEnumeration * en,int32_t * resultLength,UErrorCode *)1492 uloc_kw_nextKeyword(UEnumeration* en,
1493 int32_t* resultLength,
1494 UErrorCode* /*status*/) {
1495 const char* result = ((UKeywordsContext *)en->context)->current;
1496 int32_t len = 0;
1497 if(*result) {
1498 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1499 ((UKeywordsContext *)en->context)->current += len+1;
1500 } else {
1501 result = NULL;
1502 }
1503 if (resultLength) {
1504 *resultLength = len;
1505 }
1506 return result;
1507 }
1508
1509 static void U_CALLCONV
uloc_kw_resetKeywords(UEnumeration * en,UErrorCode *)1510 uloc_kw_resetKeywords(UEnumeration* en,
1511 UErrorCode* /*status*/) {
1512 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1513 }
1514
1515 static const UEnumeration gKeywordsEnum = {
1516 NULL,
1517 NULL,
1518 uloc_kw_closeKeywords,
1519 uloc_kw_countKeywords,
1520 uenum_unextDefault,
1521 uloc_kw_nextKeyword,
1522 uloc_kw_resetKeywords
1523 };
1524
1525 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char * keywordList,int32_t keywordListSize,UErrorCode * status)1526 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1527 {
1528 UKeywordsContext *myContext = NULL;
1529 UEnumeration *result = NULL;
1530
1531 if(U_FAILURE(*status)) {
1532 return NULL;
1533 }
1534 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1535 /* Null pointer test */
1536 if (result == NULL) {
1537 *status = U_MEMORY_ALLOCATION_ERROR;
1538 return NULL;
1539 }
1540 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1541 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1542 if (myContext == NULL) {
1543 *status = U_MEMORY_ALLOCATION_ERROR;
1544 uprv_free(result);
1545 return NULL;
1546 }
1547 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1548 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1549 myContext->keywords[keywordListSize] = 0;
1550 myContext->current = myContext->keywords;
1551 result->context = myContext;
1552 return result;
1553 }
1554
1555 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char * localeID,UErrorCode * status)1556 uloc_openKeywords(const char* localeID,
1557 UErrorCode* status)
1558 {
1559 int32_t i=0;
1560 char keywords[256];
1561 int32_t keywordsCapacity = 256;
1562 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1563 const char* tmpLocaleID;
1564
1565 if(status==NULL || U_FAILURE(*status)) {
1566 return 0;
1567 }
1568
1569 if (_hasBCP47Extension(localeID)) {
1570 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1571 } else {
1572 if (localeID==NULL) {
1573 localeID=uloc_getDefault();
1574 }
1575 tmpLocaleID=localeID;
1576 }
1577
1578 /* Skip the language */
1579 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1580 if(_isIDSeparator(*tmpLocaleID)) {
1581 const char *scriptID;
1582 /* Skip the script if available */
1583 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1584 if(scriptID != tmpLocaleID+1) {
1585 /* Found optional script */
1586 tmpLocaleID = scriptID;
1587 }
1588 /* Skip the Country */
1589 if (_isIDSeparator(*tmpLocaleID)) {
1590 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1591 if(_isIDSeparator(*tmpLocaleID)) {
1592 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1593 }
1594 }
1595 }
1596
1597 /* keywords are located after '@' */
1598 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1599 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1600 }
1601
1602 if(i) {
1603 return uloc_openKeywordList(keywords, i, status);
1604 } else {
1605 return NULL;
1606 }
1607 }
1608
1609
1610 /* bit-flags for 'options' parameter of _canonicalize */
1611 #define _ULOC_STRIP_KEYWORDS 0x2
1612 #define _ULOC_CANONICALIZE 0x1
1613
1614 #define OPTION_SET(options, mask) ((options & mask) != 0)
1615
1616 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1617 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1618
1619 /**
1620 * Canonicalize the given localeID, to level 1 or to level 2,
1621 * depending on the options. To specify level 1, pass in options=0.
1622 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1623 *
1624 * This is the code underlying uloc_getName and uloc_canonicalize.
1625 */
1626 static int32_t
_canonicalize(const char * localeID,char * result,int32_t resultCapacity,uint32_t options,UErrorCode * err)1627 _canonicalize(const char* localeID,
1628 char* result,
1629 int32_t resultCapacity,
1630 uint32_t options,
1631 UErrorCode* err) {
1632 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1633 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1634 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1635 const char* origLocaleID;
1636 const char* tmpLocaleID;
1637 const char* keywordAssign = NULL;
1638 const char* separatorIndicator = NULL;
1639 const char* addKeyword = NULL;
1640 const char* addValue = NULL;
1641 char* name;
1642 char* variant = NULL; /* pointer into name, or NULL */
1643
1644 if (U_FAILURE(*err)) {
1645 return 0;
1646 }
1647
1648 if (_hasBCP47Extension(localeID)) {
1649 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1650 } else {
1651 if (localeID==NULL) {
1652 localeID=uloc_getDefault();
1653 }
1654 tmpLocaleID=localeID;
1655 }
1656
1657 origLocaleID=tmpLocaleID;
1658
1659 /* if we are doing a full canonicalization, then put results in
1660 localeBuffer, if necessary; otherwise send them to result. */
1661 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1662 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1663 name = localeBuffer;
1664 nameCapacity = (int32_t)sizeof(localeBuffer);
1665 } else {
1666 name = result;
1667 nameCapacity = resultCapacity;
1668 }
1669
1670 /* get all pieces, one after another, and separate with '_' */
1671 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1672
1673 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1674 const char *d = uloc_getDefault();
1675
1676 len = (int32_t)uprv_strlen(d);
1677
1678 if (name != NULL) {
1679 uprv_strncpy(name, d, len);
1680 }
1681 } else if(_isIDSeparator(*tmpLocaleID)) {
1682 const char *scriptID;
1683
1684 ++fieldCount;
1685 if(len<nameCapacity) {
1686 name[len]='_';
1687 }
1688 ++len;
1689
1690 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1691 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1692 if(scriptSize > 0) {
1693 /* Found optional script */
1694 tmpLocaleID = scriptID;
1695 ++fieldCount;
1696 len+=scriptSize;
1697 if (_isIDSeparator(*tmpLocaleID)) {
1698 /* If there is something else, then we add the _ */
1699 if(len<nameCapacity) {
1700 name[len]='_';
1701 }
1702 ++len;
1703 }
1704 }
1705
1706 if (_isIDSeparator(*tmpLocaleID)) {
1707 const char *cntryID;
1708 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1709 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1710 if (cntrySize > 0) {
1711 /* Found optional country */
1712 tmpLocaleID = cntryID;
1713 len+=cntrySize;
1714 }
1715 if(_isIDSeparator(*tmpLocaleID)) {
1716 /* If there is something else, then we add the _ if we found country before. */
1717 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1718 ++fieldCount;
1719 if(len<nameCapacity) {
1720 name[len]='_';
1721 }
1722 ++len;
1723 }
1724
1725 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1726 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1727 if (variantSize > 0) {
1728 variant = len<nameCapacity ? name+len : NULL;
1729 len += variantSize;
1730 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1731 }
1732 }
1733 }
1734 }
1735
1736 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1737 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1738 UBool done = FALSE;
1739 do {
1740 char c = *tmpLocaleID;
1741 switch (c) {
1742 case 0:
1743 case '@':
1744 done = TRUE;
1745 break;
1746 default:
1747 if (len<nameCapacity) {
1748 name[len] = c;
1749 }
1750 ++len;
1751 ++tmpLocaleID;
1752 break;
1753 }
1754 } while (!done);
1755 }
1756
1757 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1758 After this, tmpLocaleID either points to '@' or is NULL */
1759 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1760 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1761 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1762 }
1763
1764 /* Copy POSIX-style variant, if any [mr@FOO] */
1765 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1766 tmpLocaleID != NULL && keywordAssign == NULL) {
1767 for (;;) {
1768 char c = *tmpLocaleID;
1769 if (c == 0) {
1770 break;
1771 }
1772 if (len<nameCapacity) {
1773 name[len] = c;
1774 }
1775 ++len;
1776 ++tmpLocaleID;
1777 }
1778 }
1779
1780 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1781 /* Handle @FOO variant if @ is present and not followed by = */
1782 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1783 int32_t posixVariantSize;
1784 /* Add missing '_' if needed */
1785 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1786 do {
1787 if(len<nameCapacity) {
1788 name[len]='_';
1789 }
1790 ++len;
1791 ++fieldCount;
1792 } while(fieldCount<2);
1793 }
1794 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1795 (UBool)(variantSize > 0));
1796 if (posixVariantSize > 0) {
1797 if (variant == NULL) {
1798 variant = name+len;
1799 }
1800 len += posixVariantSize;
1801 variantSize += posixVariantSize;
1802 }
1803 }
1804
1805 /* Handle generic variants first */
1806 if (variant) {
1807 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1808 const char* variantToCompare = VARIANT_MAP[j].variant;
1809 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1810 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1811 len -= variantLen;
1812 if (variantLen > 0) {
1813 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1814 --len;
1815 }
1816 addKeyword = VARIANT_MAP[j].keyword;
1817 addValue = VARIANT_MAP[j].value;
1818 break;
1819 }
1820 }
1821 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1822 --len;
1823 }
1824 }
1825
1826 /* Look up the ID in the canonicalization map */
1827 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1828 const char* id = CANONICALIZE_MAP[j].id;
1829 int32_t n = (int32_t)uprv_strlen(id);
1830 if (len == n && uprv_strncmp(name, id, n) == 0) {
1831 if (n == 0 && tmpLocaleID != NULL) {
1832 break; /* Don't remap "" if keywords present */
1833 }
1834 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1835 if (CANONICALIZE_MAP[j].keyword) {
1836 addKeyword = CANONICALIZE_MAP[j].keyword;
1837 addValue = CANONICALIZE_MAP[j].value;
1838 }
1839 break;
1840 }
1841 }
1842 }
1843
1844 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1845 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1846 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1847 if(len<nameCapacity) {
1848 name[len]='@';
1849 }
1850 ++len;
1851 ++fieldCount;
1852 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1853 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1854 } else if (addKeyword != NULL) {
1855 U_ASSERT(addValue != NULL && len < nameCapacity);
1856 /* inelegant but works -- later make _getKeywords do this? */
1857 len += _copyCount(name+len, nameCapacity-len, "@");
1858 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1859 len += _copyCount(name+len, nameCapacity-len, "=");
1860 len += _copyCount(name+len, nameCapacity-len, addValue);
1861 }
1862 }
1863
1864 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1865 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1866 }
1867
1868 return u_terminateChars(result, resultCapacity, len, err);
1869 }
1870
1871 /* ### ID parsing API **************************************************/
1872
1873 U_CAPI int32_t U_EXPORT2
uloc_getParent(const char * localeID,char * parent,int32_t parentCapacity,UErrorCode * err)1874 uloc_getParent(const char* localeID,
1875 char* parent,
1876 int32_t parentCapacity,
1877 UErrorCode* err)
1878 {
1879 const char *lastUnderscore;
1880 int32_t i;
1881
1882 if (U_FAILURE(*err))
1883 return 0;
1884
1885 if (localeID == NULL)
1886 localeID = uloc_getDefault();
1887
1888 lastUnderscore=uprv_strrchr(localeID, '_');
1889 if(lastUnderscore!=NULL) {
1890 i=(int32_t)(lastUnderscore-localeID);
1891 } else {
1892 i=0;
1893 }
1894
1895 if(i>0 && parent != localeID) {
1896 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1897 }
1898 return u_terminateChars(parent, parentCapacity, i, err);
1899 }
1900
1901 U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char * localeID,char * language,int32_t languageCapacity,UErrorCode * err)1902 uloc_getLanguage(const char* localeID,
1903 char* language,
1904 int32_t languageCapacity,
1905 UErrorCode* err)
1906 {
1907 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1908 int32_t i=0;
1909
1910 if (err==NULL || U_FAILURE(*err)) {
1911 return 0;
1912 }
1913
1914 if(localeID==NULL) {
1915 localeID=uloc_getDefault();
1916 }
1917
1918 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1919 return u_terminateChars(language, languageCapacity, i, err);
1920 }
1921
1922 U_CAPI int32_t U_EXPORT2
uloc_getScript(const char * localeID,char * script,int32_t scriptCapacity,UErrorCode * err)1923 uloc_getScript(const char* localeID,
1924 char* script,
1925 int32_t scriptCapacity,
1926 UErrorCode* err)
1927 {
1928 int32_t i=0;
1929
1930 if(err==NULL || U_FAILURE(*err)) {
1931 return 0;
1932 }
1933
1934 if(localeID==NULL) {
1935 localeID=uloc_getDefault();
1936 }
1937
1938 /* skip the language */
1939 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1940 if(_isIDSeparator(*localeID)) {
1941 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1942 }
1943 return u_terminateChars(script, scriptCapacity, i, err);
1944 }
1945
1946 U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char * localeID,char * country,int32_t countryCapacity,UErrorCode * err)1947 uloc_getCountry(const char* localeID,
1948 char* country,
1949 int32_t countryCapacity,
1950 UErrorCode* err)
1951 {
1952 int32_t i=0;
1953
1954 if(err==NULL || U_FAILURE(*err)) {
1955 return 0;
1956 }
1957
1958 if(localeID==NULL) {
1959 localeID=uloc_getDefault();
1960 }
1961
1962 /* Skip the language */
1963 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1964 if(_isIDSeparator(*localeID)) {
1965 const char *scriptID;
1966 /* Skip the script if available */
1967 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1968 if(scriptID != localeID+1) {
1969 /* Found optional script */
1970 localeID = scriptID;
1971 }
1972 if(_isIDSeparator(*localeID)) {
1973 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1974 }
1975 }
1976 return u_terminateChars(country, countryCapacity, i, err);
1977 }
1978
1979 U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char * localeID,char * variant,int32_t variantCapacity,UErrorCode * err)1980 uloc_getVariant(const char* localeID,
1981 char* variant,
1982 int32_t variantCapacity,
1983 UErrorCode* err)
1984 {
1985 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1986 const char* tmpLocaleID;
1987 int32_t i=0;
1988
1989 if(err==NULL || U_FAILURE(*err)) {
1990 return 0;
1991 }
1992
1993 if (_hasBCP47Extension(localeID)) {
1994 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1995 } else {
1996 if (localeID==NULL) {
1997 localeID=uloc_getDefault();
1998 }
1999 tmpLocaleID=localeID;
2000 }
2001
2002 /* Skip the language */
2003 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2004 if(_isIDSeparator(*tmpLocaleID)) {
2005 const char *scriptID;
2006 /* Skip the script if available */
2007 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2008 if(scriptID != tmpLocaleID+1) {
2009 /* Found optional script */
2010 tmpLocaleID = scriptID;
2011 }
2012 /* Skip the Country */
2013 if (_isIDSeparator(*tmpLocaleID)) {
2014 const char *cntryID;
2015 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2016 if (cntryID != tmpLocaleID+1) {
2017 /* Found optional country */
2018 tmpLocaleID = cntryID;
2019 }
2020 if(_isIDSeparator(*tmpLocaleID)) {
2021 /* If there was no country ID, skip a possible extra IDSeparator */
2022 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2023 tmpLocaleID++;
2024 }
2025 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2026 }
2027 }
2028 }
2029
2030 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2031 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2032 /*
2033 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2034 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2035 }
2036 */
2037 return u_terminateChars(variant, variantCapacity, i, err);
2038 }
2039
2040 U_CAPI int32_t U_EXPORT2
uloc_getName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2041 uloc_getName(const char* localeID,
2042 char* name,
2043 int32_t nameCapacity,
2044 UErrorCode* err)
2045 {
2046 return _canonicalize(localeID, name, nameCapacity, 0, err);
2047 }
2048
2049 U_CAPI int32_t U_EXPORT2
uloc_getBaseName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2050 uloc_getBaseName(const char* localeID,
2051 char* name,
2052 int32_t nameCapacity,
2053 UErrorCode* err)
2054 {
2055 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2056 }
2057
2058 U_CAPI int32_t U_EXPORT2
uloc_canonicalize(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2059 uloc_canonicalize(const char* localeID,
2060 char* name,
2061 int32_t nameCapacity,
2062 UErrorCode* err)
2063 {
2064 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2065 }
2066
2067 U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char * localeID)2068 uloc_getISO3Language(const char* localeID)
2069 {
2070 int16_t offset;
2071 char lang[ULOC_LANG_CAPACITY];
2072 UErrorCode err = U_ZERO_ERROR;
2073
2074 if (localeID == NULL)
2075 {
2076 localeID = uloc_getDefault();
2077 }
2078 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2079 if (U_FAILURE(err))
2080 return "";
2081 offset = _findIndex(LANGUAGES, lang);
2082 if (offset < 0)
2083 return "";
2084 return LANGUAGES_3[offset];
2085 }
2086
2087 U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char * localeID)2088 uloc_getISO3Country(const char* localeID)
2089 {
2090 int16_t offset;
2091 char cntry[ULOC_LANG_CAPACITY];
2092 UErrorCode err = U_ZERO_ERROR;
2093
2094 if (localeID == NULL)
2095 {
2096 localeID = uloc_getDefault();
2097 }
2098 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2099 if (U_FAILURE(err))
2100 return "";
2101 offset = _findIndex(COUNTRIES, cntry);
2102 if (offset < 0)
2103 return "";
2104
2105 return COUNTRIES_3[offset];
2106 }
2107
2108 U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char * localeID)2109 uloc_getLCID(const char* localeID)
2110 {
2111 UErrorCode status = U_ZERO_ERROR;
2112 char langID[ULOC_FULLNAME_CAPACITY];
2113
2114 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2115 if (U_FAILURE(status)) {
2116 return 0;
2117 }
2118
2119 if (uprv_strchr(localeID, '@')) {
2120 // uprv_convertToLCID does not support keywords other than collation.
2121 // Remove all keywords except collation.
2122 int32_t len;
2123 char collVal[ULOC_KEYWORDS_CAPACITY];
2124 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2125
2126 len = uloc_getKeywordValue(localeID, "collation", collVal,
2127 sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2128
2129 if (U_SUCCESS(status) && len > 0) {
2130 collVal[len] = 0;
2131
2132 len = uloc_getBaseName(localeID, tmpLocaleID,
2133 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2134
2135 if (U_SUCCESS(status)) {
2136 tmpLocaleID[len] = 0;
2137
2138 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2139 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2140
2141 if (U_SUCCESS(status)) {
2142 tmpLocaleID[len] = 0;
2143 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2144 }
2145 }
2146 }
2147
2148 // fall through - all keywords are simply ignored
2149 status = U_ZERO_ERROR;
2150 }
2151
2152 return uprv_convertToLCID(langID, localeID, &status);
2153 }
2154
2155 U_CAPI int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostid,char * locale,int32_t localeCapacity,UErrorCode * status)2156 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2157 UErrorCode *status)
2158 {
2159 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2160 }
2161
2162 /* ### Default locale **************************************************/
2163
2164 U_CAPI const char* U_EXPORT2
uloc_getDefault()2165 uloc_getDefault()
2166 {
2167 return locale_get_default();
2168 }
2169
2170 U_CAPI void U_EXPORT2
uloc_setDefault(const char * newDefaultLocale,UErrorCode * err)2171 uloc_setDefault(const char* newDefaultLocale,
2172 UErrorCode* err)
2173 {
2174 if (U_FAILURE(*err))
2175 return;
2176 /* the error code isn't currently used for anything by this function*/
2177
2178 /* propagate change to C++ */
2179 locale_set_default(newDefaultLocale);
2180 }
2181
2182 /**
2183 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2184 * to an array of pointers to arrays of char. All of these pointers are owned
2185 * by ICU-- do not delete them, and do not write through them. The array is
2186 * terminated with a null pointer.
2187 */
2188 U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages()2189 uloc_getISOLanguages()
2190 {
2191 return LANGUAGES;
2192 }
2193
2194 /**
2195 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2196 * pointer to an array of pointers to arrays of char. All of these pointers are
2197 * owned by ICU-- do not delete them, and do not write through them. The array is
2198 * terminated with a null pointer.
2199 */
2200 U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries()2201 uloc_getISOCountries()
2202 {
2203 return COUNTRIES;
2204 }
2205
2206
2207 /* this function to be moved into cstring.c later */
2208 static char gDecimal = 0;
2209
2210 static /* U_CAPI */
2211 double
2212 /* U_EXPORT2 */
_uloc_strtod(const char * start,char ** end)2213 _uloc_strtod(const char *start, char **end) {
2214 char *decimal;
2215 char *myEnd;
2216 char buf[30];
2217 double rv;
2218 if (!gDecimal) {
2219 char rep[5];
2220 /* For machines that decide to change the decimal on you,
2221 and try to be too smart with localization.
2222 This normally should be just a '.'. */
2223 sprintf(rep, "%+1.1f", 1.0);
2224 gDecimal = rep[2];
2225 }
2226
2227 if(gDecimal == '.') {
2228 return uprv_strtod(start, end); /* fall through to OS */
2229 } else {
2230 uprv_strncpy(buf, start, 29);
2231 buf[29]=0;
2232 decimal = uprv_strchr(buf, '.');
2233 if(decimal) {
2234 *decimal = gDecimal;
2235 } else {
2236 return uprv_strtod(start, end); /* no decimal point */
2237 }
2238 rv = uprv_strtod(buf, &myEnd);
2239 if(end) {
2240 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2241 }
2242 return rv;
2243 }
2244 }
2245
2246 typedef struct {
2247 float q;
2248 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2249 char *locale;
2250 } _acceptLangItem;
2251
2252 static int32_t U_CALLCONV
uloc_acceptLanguageCompare(const void *,const void * a,const void * b)2253 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2254 {
2255 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2256 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2257
2258 int32_t rc = 0;
2259 if(bb->q < aa->q) {
2260 rc = -1; /* A > B */
2261 } else if(bb->q > aa->q) {
2262 rc = 1; /* A < B */
2263 } else {
2264 rc = 0; /* A = B */
2265 }
2266
2267 if(rc==0) {
2268 rc = uprv_stricmp(aa->locale, bb->locale);
2269 }
2270
2271 #if defined(ULOC_DEBUG)
2272 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2273 aa->locale, aa->q,
2274 bb->locale, bb->q,
2275 rc);*/
2276 #endif
2277
2278 return rc;
2279 }
2280
2281 /*
2282 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2283 */
2284
2285 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)2286 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2287 const char *httpAcceptLanguage,
2288 UEnumeration* availableLocales,
2289 UErrorCode *status)
2290 {
2291 _acceptLangItem *j;
2292 _acceptLangItem smallBuffer[30];
2293 char **strs;
2294 char tmp[ULOC_FULLNAME_CAPACITY +1];
2295 int32_t n = 0;
2296 const char *itemEnd;
2297 const char *paramEnd;
2298 const char *s;
2299 const char *t;
2300 int32_t res;
2301 int32_t i;
2302 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2303 int32_t jSize;
2304 char *tempstr; /* Use for null pointer check */
2305
2306 j = smallBuffer;
2307 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2308 if(U_FAILURE(*status)) {
2309 return -1;
2310 }
2311
2312 for(s=httpAcceptLanguage;s&&*s;) {
2313 while(isspace(*s)) /* eat space at the beginning */
2314 s++;
2315 itemEnd=uprv_strchr(s,',');
2316 paramEnd=uprv_strchr(s,';');
2317 if(!itemEnd) {
2318 itemEnd = httpAcceptLanguage+l; /* end of string */
2319 }
2320 if(paramEnd && paramEnd<itemEnd) {
2321 /* semicolon (;) is closer than end (,) */
2322 t = paramEnd+1;
2323 if(*t=='q') {
2324 t++;
2325 }
2326 while(isspace(*t)) {
2327 t++;
2328 }
2329 if(*t=='=') {
2330 t++;
2331 }
2332 while(isspace(*t)) {
2333 t++;
2334 }
2335 j[n].q = (float)_uloc_strtod(t,NULL);
2336 } else {
2337 /* no semicolon - it's 1.0 */
2338 j[n].q = 1.0f;
2339 paramEnd = itemEnd;
2340 }
2341 j[n].dummy=0;
2342 /* eat spaces prior to semi */
2343 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2344 ;
2345 /* Check for null pointer from uprv_strndup */
2346 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2347 if (tempstr == NULL) {
2348 *status = U_MEMORY_ALLOCATION_ERROR;
2349 return -1;
2350 }
2351 j[n].locale = tempstr;
2352 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2353 if(strcmp(j[n].locale,tmp)) {
2354 uprv_free(j[n].locale);
2355 j[n].locale=uprv_strdup(tmp);
2356 }
2357 #if defined(ULOC_DEBUG)
2358 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2359 #endif
2360 n++;
2361 s = itemEnd;
2362 while(*s==',') { /* eat duplicate commas */
2363 s++;
2364 }
2365 if(n>=jSize) {
2366 if(j==smallBuffer) { /* overflowed the small buffer. */
2367 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2368 if(j!=NULL) {
2369 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2370 }
2371 #if defined(ULOC_DEBUG)
2372 fprintf(stderr,"malloced at size %d\n", jSize);
2373 #endif
2374 } else {
2375 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2376 #if defined(ULOC_DEBUG)
2377 fprintf(stderr,"re-alloced at size %d\n", jSize);
2378 #endif
2379 }
2380 jSize *= 2;
2381 if(j==NULL) {
2382 *status = U_MEMORY_ALLOCATION_ERROR;
2383 return -1;
2384 }
2385 }
2386 }
2387 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2388 if(U_FAILURE(*status)) {
2389 if(j != smallBuffer) {
2390 #if defined(ULOC_DEBUG)
2391 fprintf(stderr,"freeing j %p\n", j);
2392 #endif
2393 uprv_free(j);
2394 }
2395 return -1;
2396 }
2397 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2398 /* Check for null pointer */
2399 if (strs == NULL) {
2400 uprv_free(j); /* Free to avoid memory leak */
2401 *status = U_MEMORY_ALLOCATION_ERROR;
2402 return -1;
2403 }
2404 for(i=0;i<n;i++) {
2405 #if defined(ULOC_DEBUG)
2406 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2407 #endif
2408 strs[i]=j[i].locale;
2409 }
2410 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2411 (const char**)strs, n, availableLocales, status);
2412 for(i=0;i<n;i++) {
2413 uprv_free(strs[i]);
2414 }
2415 uprv_free(strs);
2416 if(j != smallBuffer) {
2417 #if defined(ULOC_DEBUG)
2418 fprintf(stderr,"freeing j %p\n", j);
2419 #endif
2420 uprv_free(j);
2421 }
2422 return res;
2423 }
2424
2425
2426 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)2427 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2428 UAcceptResult *outResult, const char **acceptList,
2429 int32_t acceptListCount,
2430 UEnumeration* availableLocales,
2431 UErrorCode *status)
2432 {
2433 int32_t i,j;
2434 int32_t len;
2435 int32_t maxLen=0;
2436 char tmp[ULOC_FULLNAME_CAPACITY+1];
2437 const char *l;
2438 char **fallbackList;
2439 if(U_FAILURE(*status)) {
2440 return -1;
2441 }
2442 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2443 if(fallbackList==NULL) {
2444 *status = U_MEMORY_ALLOCATION_ERROR;
2445 return -1;
2446 }
2447 for(i=0;i<acceptListCount;i++) {
2448 #if defined(ULOC_DEBUG)
2449 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2450 #endif
2451 while((l=uenum_next(availableLocales, NULL, status))) {
2452 #if defined(ULOC_DEBUG)
2453 fprintf(stderr," %s\n", l);
2454 #endif
2455 len = (int32_t)uprv_strlen(l);
2456 if(!uprv_strcmp(acceptList[i], l)) {
2457 if(outResult) {
2458 *outResult = ULOC_ACCEPT_VALID;
2459 }
2460 #if defined(ULOC_DEBUG)
2461 fprintf(stderr, "MATCH! %s\n", l);
2462 #endif
2463 if(len>0) {
2464 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2465 }
2466 for(j=0;j<i;j++) {
2467 uprv_free(fallbackList[j]);
2468 }
2469 uprv_free(fallbackList);
2470 return u_terminateChars(result, resultAvailable, len, status);
2471 }
2472 if(len>maxLen) {
2473 maxLen = len;
2474 }
2475 }
2476 uenum_reset(availableLocales, status);
2477 /* save off parent info */
2478 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2479 fallbackList[i] = uprv_strdup(tmp);
2480 } else {
2481 fallbackList[i]=0;
2482 }
2483 }
2484
2485 for(maxLen--;maxLen>0;maxLen--) {
2486 for(i=0;i<acceptListCount;i++) {
2487 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2488 #if defined(ULOC_DEBUG)
2489 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2490 #endif
2491 while((l=uenum_next(availableLocales, NULL, status))) {
2492 #if defined(ULOC_DEBUG)
2493 fprintf(stderr," %s\n", l);
2494 #endif
2495 len = (int32_t)uprv_strlen(l);
2496 if(!uprv_strcmp(fallbackList[i], l)) {
2497 if(outResult) {
2498 *outResult = ULOC_ACCEPT_FALLBACK;
2499 }
2500 #if defined(ULOC_DEBUG)
2501 fprintf(stderr, "fallback MATCH! %s\n", l);
2502 #endif
2503 if(len>0) {
2504 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2505 }
2506 for(j=0;j<acceptListCount;j++) {
2507 uprv_free(fallbackList[j]);
2508 }
2509 uprv_free(fallbackList);
2510 return u_terminateChars(result, resultAvailable, len, status);
2511 }
2512 }
2513 uenum_reset(availableLocales, status);
2514
2515 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2516 uprv_free(fallbackList[i]);
2517 fallbackList[i] = uprv_strdup(tmp);
2518 } else {
2519 uprv_free(fallbackList[i]);
2520 fallbackList[i]=0;
2521 }
2522 }
2523 }
2524 if(outResult) {
2525 *outResult = ULOC_ACCEPT_FAILED;
2526 }
2527 }
2528 for(i=0;i<acceptListCount;i++) {
2529 uprv_free(fallbackList[i]);
2530 }
2531 uprv_free(fallbackList);
2532 return -1;
2533 }
2534
2535 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char * keyword)2536 uloc_toUnicodeLocaleKey(const char* keyword)
2537 {
2538 const char* bcpKey = ulocimp_toBcpKey(keyword);
2539 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2540 // unknown keyword, but syntax is fine..
2541 return keyword;
2542 }
2543 return bcpKey;
2544 }
2545
2546 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char * keyword,const char * value)2547 uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2548 {
2549 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2550 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2551 // unknown keyword, but syntax is fine..
2552 return value;
2553 }
2554 return bcpType;
2555 }
2556
2557 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2558 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2559
2560 static UBool
isWellFormedLegacyKey(const char * legacyKey)2561 isWellFormedLegacyKey(const char* legacyKey)
2562 {
2563 const char* p = legacyKey;
2564 while (*p) {
2565 if (!UPRV_ISALPHANUM(*p)) {
2566 return FALSE;
2567 }
2568 p++;
2569 }
2570 return TRUE;
2571 }
2572
2573 static UBool
isWellFormedLegacyType(const char * legacyType)2574 isWellFormedLegacyType(const char* legacyType)
2575 {
2576 const char* p = legacyType;
2577 int32_t alphaNumLen = 0;
2578 while (*p) {
2579 if (*p == '_' || *p == '/' || *p == '-') {
2580 if (alphaNumLen == 0) {
2581 return FALSE;
2582 }
2583 alphaNumLen = 0;
2584 } else if (UPRV_ISALPHANUM(*p)) {
2585 alphaNumLen++;
2586 } else {
2587 return FALSE;
2588 }
2589 p++;
2590 }
2591 return (alphaNumLen != 0);
2592 }
2593
2594 U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char * keyword)2595 uloc_toLegacyKey(const char* keyword)
2596 {
2597 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2598 if (legacyKey == NULL) {
2599 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2600 //
2601 // Note:
2602 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2603 // However, a key should not contain '=' obviously. For now, all existing
2604 // keys are using ASCII alphabetic letters only. We won't add any new key
2605 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2606 // a valid key consist from [0-9a-zA-Z], no symbols.
2607 if (isWellFormedLegacyKey(keyword)) {
2608 return keyword;
2609 }
2610 }
2611 return legacyKey;
2612 }
2613
2614 U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char * keyword,const char * value)2615 uloc_toLegacyType(const char* keyword, const char* value)
2616 {
2617 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2618 if (legacyType == NULL) {
2619 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2620 //
2621 // Note:
2622 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2623 // However, a type should not contain '=' obviously. For now, all existing
2624 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2625 // add any new type that is not compatible with the BCP 47 syntax except timezone
2626 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2627 // '-' '_' '/' in the middle.
2628 if (isWellFormedLegacyType(value)) {
2629 return value;
2630 }
2631 }
2632 return legacyType;
2633 }
2634
2635 /*eof*/
2636