1 /*
2 **********************************************************************
3 * Copyright (C) 1997-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File ULOC.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 04/01/97 aliu Creation.
13 * 08/21/98 stephen JDK 1.2 sync
14 * 12/08/98 rtg New Locale implementation and C API
15 * 03/15/99 damiba overhaul.
16 * 04/06/99 stephen changed setDefault() to realloc and copy
17 * 06/14/99 stephen Changed calls to ures_open for new params
18 * 07/21/99 stephen Modified setDefault() to propagate to C++
19 * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
20 * brought canonicalization code into line with spec
21 *****************************************************************************/
22
23 /*
24 POSIX's locale format, from putil.c: [no spaces]
25
26 ll [ _CC ] [ . MM ] [ @ VV]
27
28 l = lang, C = ctry, M = charmap, V = variant
29 */
30
31 #include "unicode/utypes.h"
32 #include "unicode/ustring.h"
33 #include "unicode/uloc.h"
34
35 #include "putilimp.h"
36 #include "ustr_imp.h"
37 #include "ulocimp.h"
38 #include "umutex.h"
39 #include "cstring.h"
40 #include "cmemory.h"
41 #include "locmap.h"
42 #include "uarrsort.h"
43 #include "uenumimp.h"
44 #include "uassert.h"
45
46 #include <stdio.h> /* for sprintf */
47
48 /* ### Declarations **************************************************/
49
50 /* Locale stuff from locid.cpp */
51 U_CFUNC void locale_set_default(const char *id);
52 U_CFUNC const char *locale_get_default(void);
53 U_CFUNC int32_t
54 locale_getKeywords(const char *localeID,
55 char prev,
56 char *keywords, int32_t keywordCapacity,
57 char *values, int32_t valuesCapacity, int32_t *valLen,
58 UBool valuesToo,
59 UErrorCode *status);
60
61 /* ### Data tables **************************************************/
62
63 /**
64 * Table of language codes, both 2- and 3-letter, with preference
65 * given to 2-letter codes where possible. Includes 3-letter codes
66 * that lack a 2-letter equivalent.
67 *
68 * This list must be in sorted order. This list is returned directly
69 * to the user by some API.
70 *
71 * This list must be kept in sync with LANGUAGES_3, with corresponding
72 * entries matched.
73 *
74 * This table should be terminated with a NULL entry, followed by a
75 * second list, and another NULL entry. The first list is visible to
76 * user code when this array is returned by API. The second list
77 * contains codes we support, but do not expose through user API.
78 *
79 * Notes
80 *
81 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82 * include the revisions up to 2001/7/27 *CWB*
83 *
84 * The 3 character codes are the terminology codes like RFC 3066. This
85 * is compatible with prior ICU codes
86 *
87 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88 * table but now at the end of the table because 3 character codes are
89 * duplicates. This avoids bad searches going from 3 to 2 character
90 * codes.
91 *
92 * The range qaa-qtz is reserved for local use
93 */
94 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95 /* ISO639 table version is 20130531 */
96 static const char * const LANGUAGES[] = {
97 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af",
98 "afa", "afh", "agq", "ain", "ak", "akk", "ale", "alg",
99 "alt", "am", "an", "ang", "anp", "apa", "ar", "arc",
100 "arn", "arp", "art", "arw", "as", "asa", "ast", "ath",
101 "aus", "av", "awa", "ay", "az",
102 "ba", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
103 "bbj", "be", "bej", "bem", "ber", "bez", "bfd", "bg",
104 "bh", "bho", "bi", "bik", "bin", "bkm", "bla", "bm",
105 "bn", "bnt", "bo", "br", "bra", "brx", "bs", "bss",
106 "btk", "bua", "bug", "bum", "byn", "byv",
107 "ca", "cad", "cai", "car", "cau", "cay", "cch", "ce",
108 "ceb", "cel", "cgg", "ch", "chb", "chg", "chk", "chm",
109 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
110 "cop", "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs",
111 "csb", "cu", "cus", "cv", "cy",
112 "da", "dak", "dar", "dav", "day", "de", "del", "den",
113 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
114 "dv", "dyo", "dyu", "dz", "dzg",
115 "ebu", "ee", "efi", "egy", "eka", "el", "elx", "en",
116 "enm", "eo", "es", "et", "eu", "ewo",
117 "fa", "fan", "fat", "ff", "fi", "fil", "fiu", "fj",
118 "fo", "fon", "fr", "frm", "fro", "frr", "frs", "fur",
119 "fy",
120 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil",
121 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb",
122 "grc", "gsw", "gu", "guz", "gv", "gwi",
123 "ha", "hai", "haw", "he", "hi", "hil", "him", "hit",
124 "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy",
125 "hz",
126 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ijo",
127 "ik", "ilo", "inc", "ine", "inh", "io", "ira", "iro",
128 "is", "it", "iu",
129 "ja", "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
130 "ka", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
131 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg", "kha",
132 "khi", "kho", "khq", "ki", "kj", "kk", "kkj", "kl",
133 "kln", "km", "kmb", "kn", "ko", "kok", "kos", "kpe",
134 "kr", "krc", "krl", "kro", "kru", "ks", "ksb", "ksf",
135 "ksh", "ku", "kum", "kut", "kv", "kw", "ky",
136 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lg",
137 "li", "lkt", "ln", "lo", "lol", "loz", "lt", "lu",
138 "lua", "lui", "lun", "luo", "lus", "luy", "lv",
139 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
140 "mde", "mdf", "mdr", "men", "mer", "mfe", "mg", "mga",
141 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
142 "mkh", "ml", "mn", "mnc", "mni", "mno", "mo", "moh",
143 "mos", "mr", "ms", "mt", "mua", "mul", "mun", "mus",
144 "mwl", "mwr", "my", "mye", "myn", "myv",
145 "na", "nah", "nai", "nap", "naq", "nb", "nd", "nds",
146 "ne", "new", "ng", "nia", "nic", "niu", "nl", "nmg",
147 "nn", "nnh", "no", "nog", "non", "nqo", "nr", "nso",
148 "nub", "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo",
149 "nzi",
150 "oc", "oj", "om", "or", "os", "osa", "ota", "oto",
151 "pa", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
152 "phi", "phn", "pi", "pl", "pon", "pra", "pro", "ps",
153 "pt",
154 "qu",
155 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rof",
156 "rom", "ru", "rup", "rw", "rwk",
157 "sa", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
158 "sat", "sba", "sbp", "sc", "scn", "sco", "sd", "se",
159 "see", "seh", "sel", "sem", "ses", "sg", "sga", "sgn",
160 "shi", "shn", "shu", "si", "sid", "sio", "sit",
161 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn",
162 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr",
163 "srn", "srr", "ss", "ssa", "ssy", "st", "su", "suk",
164 "sus", "sux", "sv", "sw", "swb", "swc", "syc", "syr",
165 "ta", "tai", "te", "tem", "teo", "ter", "tet", "tg",
166 "th", "ti", "tig", "tiv", "tk", "tkl", "tl", "tlh",
167 "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv",
168 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw",
169 "twq", "ty", "tyv", "tzm",
170 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
171 "vai", "ve", "vi", "vo", "vot", "vun",
172 "wa", "wae", "wak", "wal", "war", "was", "wen", "wo",
173 "xal", "xh", "xog",
174 "yao", "yap", "yav", "ybb", "yi", "yo", "ypk", "yue",
175 "za", "zap", "zbl", "zen", "zgh", "zh", "znd", "zu",
176 "zun", "zxx", "zza",
177 NULL,
178 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
179 NULL
180 };
181
182 static const char* const DEPRECATED_LANGUAGES[]={
183 "in", "iw", "ji", "jw", NULL, NULL
184 };
185 static const char* const REPLACEMENT_LANGUAGES[]={
186 "id", "he", "yi", "jv", NULL, NULL
187 };
188
189 /**
190 * Table of 3-letter language codes.
191 *
192 * This is a lookup table used to convert 3-letter language codes to
193 * their 2-letter equivalent, where possible. It must be kept in sync
194 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
195 * same language as LANGUAGES_3[i]. The commented-out lines are
196 * copied from LANGUAGES to make eyeballing this baby easier.
197 *
198 * Where a 3-letter language code has no 2-letter equivalent, the
199 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
200 *
201 * This table should be terminated with a NULL entry, followed by a
202 * second list, and another NULL entry. The two lists correspond to
203 * the two lists in LANGUAGES.
204 */
205 /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
206 /* ISO639 table version is 20130531 */
207 static const char * const LANGUAGES_3[] = {
208 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
209 "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
210 "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
211 "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
212 "aus", "ava", "awa", "aym", "aze",
213 "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
214 "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
215 "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
216 "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
217 "btk", "bua", "bug", "bum", "byn", "byv",
218 "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
219 "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
220 "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
221 "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
222 "csb", "chu", "cus", "chv", "cym",
223 "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
224 "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
225 "div", "dyo", "dyu", "dzo", "dzg",
226 "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
227 "enm", "epo", "spa", "est", "eus", "ewo",
228 "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
229 "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
230 "fry",
231 "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
232 "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
233 "grc", "gsw", "guj", "guz", "glv", "gwi",
234 "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
235 "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
236 "her",
237 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
238 "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
239 "isl", "ita", "iku",
240 "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
241 "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
242 "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
243 "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
244 "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
245 "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
246 "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
247 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
248 "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
249 "lua", "lui", "lun", "luo", "lus", "luy", "lav",
250 "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
251 "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
252 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
253 "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
254 "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
255 "mwl", "mwr", "mya", "mye", "myn", "myv",
256 "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
257 "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
258 "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
259 "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
260 "nzi",
261 "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
262 "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
263 "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
264 "por",
265 "que",
266 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
267 "rom", "rus", "rup", "kin", "rwk",
268 "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
269 "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
270 "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
271 "shi", "shn", "shu", "sin", "sid", "sio", "sit",
272 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
273 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
274 "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
275 "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
276 "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
277 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
278 "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
279 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
280 "twq", "tah", "tyv", "tzm",
281 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
282 "vai", "ven", "vie", "vol", "vot", "vun",
283 "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
284 "xal", "xho", "xog",
285 "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
286 "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
287 "zun", "zxx", "zza",
288 NULL,
289 /* "in", "iw", "ji", "jw", "sh", */
290 "ind", "heb", "yid", "jaw", "srp",
291 NULL
292 };
293
294 /**
295 * Table of 2-letter country codes.
296 *
297 * This list must be in sorted order. This list is returned directly
298 * to the user by some API.
299 *
300 * This list must be kept in sync with COUNTRIES_3, with corresponding
301 * entries matched.
302 *
303 * This table should be terminated with a NULL entry, followed by a
304 * second list, and another NULL entry. The first list is visible to
305 * user code when this array is returned by API. The second list
306 * contains codes we support, but do not expose through user API.
307 *
308 * Notes:
309 *
310 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
311 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
312 * new codes keeping the old ones for compatibility updated to include
313 * 1999/12/03 revisions *CWB*
314 *
315 * RO(ROM) is now RO(ROU) according to
316 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
317 */
318 static const char * const COUNTRIES[] = {
319 "AD", "AE", "AF", "AG", "AI", "AL", "AM",
320 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
321 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
322 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
323 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
324 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
325 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
326 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
327 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
328 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
329 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
330 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
331 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
332 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
333 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
334 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
335 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
336 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
337 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
338 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
339 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
340 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
341 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
342 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
343 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
344 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
345 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
346 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
347 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
348 "WS", "YE", "YT", "ZA", "ZM", "ZW",
349 NULL,
350 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
351 NULL
352 };
353
354 static const char* const DEPRECATED_COUNTRIES[] = {
355 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
356 };
357 static const char* const REPLACEMENT_COUNTRIES[] = {
358 /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
359 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
360 };
361
362 /**
363 * Table of 3-letter country codes.
364 *
365 * This is a lookup table used to convert 3-letter country codes to
366 * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
367 * For all valid i, COUNTRIES[i] must refer to the same country as
368 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
369 * to make eyeballing this baby easier.
370 *
371 * This table should be terminated with a NULL entry, followed by a
372 * second list, and another NULL entry. The two lists correspond to
373 * the two lists in COUNTRIES.
374 */
375 static const char * const COUNTRIES_3[] = {
376 /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
377 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
378 /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
379 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
380 /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
381 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
382 /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
383 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
384 /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
385 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
386 /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
387 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
388 /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
389 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
390 /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
391 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
392 /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
393 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
394 /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
395 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
396 /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
397 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
398 /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
399 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
400 /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
401 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
402 /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
403 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
404 /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
405 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
406 /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
407 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
408 /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
409 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
410 /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
411 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
412 /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
413 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
414 /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
415 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
416 /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
417 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
418 /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
419 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
420 /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
421 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
422 /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
423 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
424 /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
425 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
426 /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
427 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
428 /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
429 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
430 /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
431 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
432 /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
433 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
434 /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
435 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
436 NULL,
437 /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
438 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
439 NULL
440 };
441
442 typedef struct CanonicalizationMap {
443 const char *id; /* input ID */
444 const char *canonicalID; /* canonicalized output ID */
445 const char *keyword; /* keyword, or NULL if none */
446 const char *value; /* keyword value, or NULL if kw==NULL */
447 } CanonicalizationMap;
448
449 /**
450 * A map to canonicalize locale IDs. This handles a variety of
451 * different semantic kinds of transformations.
452 */
453 static const CanonicalizationMap CANONICALIZE_MAP[] = {
454 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
455 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
456 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
457 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
458 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
459 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
460 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
461 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
462 { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
463 { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
464 { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
465 { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
466 { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
467 { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
468 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
469 { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
470 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
471 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
472 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
473 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
474 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
475 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
476 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
477 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
478 { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
479 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
480 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
481 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
482 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
483 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
484 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
485 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
486 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
487 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
488 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
489 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
490 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
491 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
492 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
493 { "zh_GAN", "gan", NULL, NULL }, /* registered name */
494 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
495 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
496 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
497 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
498 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
499 { "zh_YUE", "yue", NULL, NULL }, /* registered name */
500 };
501
502 typedef struct VariantMap {
503 const char *variant; /* input ID */
504 const char *keyword; /* keyword, or NULL if none */
505 const char *value; /* keyword value, or NULL if kw==NULL */
506 } VariantMap;
507
508 static const VariantMap VARIANT_MAP[] = {
509 { "EURO", "currency", "EUR" },
510 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
511 { "STROKE", "collation", "stroke" } /* Solaris variant */
512 };
513
514 /* ### BCP47 Conversion *******************************************/
515 /* Test if the locale id has BCP47 u extension and does not have '@' */
516 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
517 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
518 #define _ConvertBCP47(finalID, id, buffer, length,err) \
519 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
520 finalID=id; \
521 } else { \
522 finalID=buffer; \
523 }
524 /* Gets the size of the shortest subtag in the given localeID. */
getShortestSubtagLength(const char * localeID)525 static int32_t getShortestSubtagLength(const char *localeID) {
526 int32_t localeIDLength = uprv_strlen(localeID);
527 int32_t length = localeIDLength;
528 int32_t tmpLength = 0;
529 int32_t i;
530 UBool reset = TRUE;
531
532 for (i = 0; i < localeIDLength; i++) {
533 if (localeID[i] != '_' && localeID[i] != '-') {
534 if (reset) {
535 tmpLength = 0;
536 reset = FALSE;
537 }
538 tmpLength++;
539 } else {
540 if (tmpLength != 0 && tmpLength < length) {
541 length = tmpLength;
542 }
543 reset = TRUE;
544 }
545 }
546
547 return length;
548 }
549
550 /* ### Keywords **************************************************/
551
552 #define ULOC_KEYWORD_BUFFER_LEN 25
553 #define ULOC_MAX_NO_KEYWORDS 25
554
555 U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char * localeID)556 locale_getKeywordsStart(const char *localeID) {
557 const char *result = NULL;
558 if((result = uprv_strchr(localeID, '@')) != NULL) {
559 return result;
560 }
561 #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
562 else {
563 /* We do this because the @ sign is variant, and the @ sign used on one
564 EBCDIC machine won't be compiled the same way on other EBCDIC based
565 machines. */
566 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
567 const uint8_t *charToFind = ebcdicSigns;
568 while(*charToFind) {
569 if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
570 return result;
571 }
572 charToFind++;
573 }
574 }
575 #endif
576 return NULL;
577 }
578
579 /**
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
584 */
locale_canonKeywordName(char * buf,const char * keywordName,UErrorCode * status)585 static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
586 {
587 int32_t i;
588 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
589
590 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
591 /* keyword name too long for internal buffer */
592 *status = U_INTERNAL_PROGRAM_ERROR;
593 return 0;
594 }
595
596 /* normalize the keyword name */
597 for(i = 0; i < keywordNameLen; i++) {
598 buf[i] = uprv_tolower(keywordName[i]);
599 }
600 buf[i] = 0;
601
602 return keywordNameLen;
603 }
604
605 typedef struct {
606 char keyword[ULOC_KEYWORD_BUFFER_LEN];
607 int32_t keywordLen;
608 const char *valueStart;
609 int32_t valueLen;
610 } KeywordStruct;
611
612 static int32_t U_CALLCONV
compareKeywordStructs(const void *,const void * left,const void * right)613 compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
614 const char* leftString = ((const KeywordStruct *)left)->keyword;
615 const char* rightString = ((const KeywordStruct *)right)->keyword;
616 return uprv_strcmp(leftString, rightString);
617 }
618
619 /**
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
624 */
625 static int32_t
_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,const char * addKeyword,const char * addValue,UErrorCode * status)626 _getKeywords(const char *localeID,
627 char prev,
628 char *keywords, int32_t keywordCapacity,
629 char *values, int32_t valuesCapacity, int32_t *valLen,
630 UBool valuesToo,
631 const char* addKeyword,
632 const char* addValue,
633 UErrorCode *status)
634 {
635 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
636
637 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
638 int32_t numKeywords = 0;
639 const char* pos = localeID;
640 const char* equalSign = NULL;
641 const char* semicolon = NULL;
642 int32_t i = 0, j, n;
643 int32_t keywordsLen = 0;
644 int32_t valuesLen = 0;
645
646 if(prev == '@') { /* start of keyword definition */
647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
648 do {
649 UBool duplicate = FALSE;
650 /* skip leading spaces */
651 while(*pos == ' ') {
652 pos++;
653 }
654 if (!*pos) { /* handle trailing "; " */
655 break;
656 }
657 if(numKeywords == maxKeywords) {
658 *status = U_INTERNAL_PROGRAM_ERROR;
659 return 0;
660 }
661 equalSign = uprv_strchr(pos, '=');
662 semicolon = uprv_strchr(pos, ';');
663 /* lack of '=' [foo@currency] is illegal */
664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665 if(!equalSign || (semicolon && semicolon<equalSign)) {
666 *status = U_INVALID_FORMAT_ERROR;
667 return 0;
668 }
669 /* need to normalize both keyword and keyword name */
670 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
671 /* keyword name too long for internal buffer */
672 *status = U_INTERNAL_PROGRAM_ERROR;
673 return 0;
674 }
675 for(i = 0, n = 0; i < equalSign - pos; ++i) {
676 if (pos[i] != ' ') {
677 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
678 }
679 }
680
681 /* zero-length keyword is an error. */
682 if (n == 0) {
683 *status = U_INVALID_FORMAT_ERROR;
684 return 0;
685 }
686
687 keywordList[numKeywords].keyword[n] = 0;
688 keywordList[numKeywords].keywordLen = n;
689 /* now grab the value part. First we skip the '=' */
690 equalSign++;
691 /* then we leading spaces */
692 while(*equalSign == ' ') {
693 equalSign++;
694 }
695
696 /* Premature end or zero-length value */
697 if (!equalSign || equalSign == semicolon) {
698 *status = U_INVALID_FORMAT_ERROR;
699 return 0;
700 }
701
702 keywordList[numKeywords].valueStart = equalSign;
703
704 pos = semicolon;
705 i = 0;
706 if(pos) {
707 while(*(pos - i - 1) == ' ') {
708 i++;
709 }
710 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
711 pos++;
712 } else {
713 i = (int32_t)uprv_strlen(equalSign);
714 while(i && equalSign[i-1] == ' ') {
715 i--;
716 }
717 keywordList[numKeywords].valueLen = i;
718 }
719 /* If this is a duplicate keyword, then ignore it */
720 for (j=0; j<numKeywords; ++j) {
721 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
722 duplicate = TRUE;
723 break;
724 }
725 }
726 if (!duplicate) {
727 ++numKeywords;
728 }
729 } while(pos);
730
731 /* Handle addKeyword/addValue. */
732 if (addKeyword != NULL) {
733 UBool duplicate = FALSE;
734 U_ASSERT(addValue != NULL);
735 /* Search for duplicate; if found, do nothing. Explicit keyword
736 overrides addKeyword. */
737 for (j=0; j<numKeywords; ++j) {
738 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
739 duplicate = TRUE;
740 break;
741 }
742 }
743 if (!duplicate) {
744 if (numKeywords == maxKeywords) {
745 *status = U_INTERNAL_PROGRAM_ERROR;
746 return 0;
747 }
748 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
749 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
750 keywordList[numKeywords].valueStart = addValue;
751 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
752 ++numKeywords;
753 }
754 } else {
755 U_ASSERT(addValue == NULL);
756 }
757
758 /* now we have a list of keywords */
759 /* we need to sort it */
760 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
761
762 /* Now construct the keyword part */
763 for(i = 0; i < numKeywords; i++) {
764 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
765 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
766 if(valuesToo) {
767 keywords[keywordsLen + keywordList[i].keywordLen] = '=';
768 } else {
769 keywords[keywordsLen + keywordList[i].keywordLen] = 0;
770 }
771 }
772 keywordsLen += keywordList[i].keywordLen + 1;
773 if(valuesToo) {
774 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
775 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
776 }
777 keywordsLen += keywordList[i].valueLen;
778
779 if(i < numKeywords - 1) {
780 if(keywordsLen < keywordCapacity) {
781 keywords[keywordsLen] = ';';
782 }
783 keywordsLen++;
784 }
785 }
786 if(values) {
787 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
788 uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
789 values[valuesLen + keywordList[i].valueLen] = 0;
790 }
791 valuesLen += keywordList[i].valueLen + 1;
792 }
793 }
794 if(values) {
795 values[valuesLen] = 0;
796 if(valLen) {
797 *valLen = valuesLen;
798 }
799 }
800 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
801 } else {
802 return 0;
803 }
804 }
805
806 U_CFUNC int32_t
locale_getKeywords(const char * localeID,char prev,char * keywords,int32_t keywordCapacity,char * values,int32_t valuesCapacity,int32_t * valLen,UBool valuesToo,UErrorCode * status)807 locale_getKeywords(const char *localeID,
808 char prev,
809 char *keywords, int32_t keywordCapacity,
810 char *values, int32_t valuesCapacity, int32_t *valLen,
811 UBool valuesToo,
812 UErrorCode *status) {
813 return _getKeywords(localeID, prev, keywords, keywordCapacity,
814 values, valuesCapacity, valLen, valuesToo,
815 NULL, NULL, status);
816 }
817
818 U_CAPI int32_t U_EXPORT2
uloc_getKeywordValue(const char * localeID,const char * keywordName,char * buffer,int32_t bufferCapacity,UErrorCode * status)819 uloc_getKeywordValue(const char* localeID,
820 const char* keywordName,
821 char* buffer, int32_t bufferCapacity,
822 UErrorCode* status)
823 {
824 const char* startSearchHere = NULL;
825 const char* nextSeparator = NULL;
826 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
827 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
828 int32_t i = 0;
829 int32_t result = 0;
830
831 if(status && U_SUCCESS(*status) && localeID) {
832 char tempBuffer[ULOC_FULLNAME_CAPACITY];
833 const char* tmpLocaleID;
834
835 if (_hasBCP47Extension(localeID)) {
836 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
837 } else {
838 tmpLocaleID=localeID;
839 }
840
841 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
842 if(startSearchHere == NULL) {
843 /* no keywords, return at once */
844 return 0;
845 }
846
847 locale_canonKeywordName(keywordNameBuffer, keywordName, status);
848 if(U_FAILURE(*status)) {
849 return 0;
850 }
851
852 /* find the first keyword */
853 while(startSearchHere) {
854 startSearchHere++;
855 /* skip leading spaces (allowed?) */
856 while(*startSearchHere == ' ') {
857 startSearchHere++;
858 }
859 nextSeparator = uprv_strchr(startSearchHere, '=');
860 /* need to normalize both keyword and keyword name */
861 if(!nextSeparator) {
862 break;
863 }
864 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
865 /* keyword name too long for internal buffer */
866 *status = U_INTERNAL_PROGRAM_ERROR;
867 return 0;
868 }
869 for(i = 0; i < nextSeparator - startSearchHere; i++) {
870 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
871 }
872 /* trim trailing spaces */
873 while(startSearchHere[i-1] == ' ') {
874 i--;
875 U_ASSERT(i>=0);
876 }
877 localeKeywordNameBuffer[i] = 0;
878
879 startSearchHere = uprv_strchr(nextSeparator, ';');
880
881 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
882 nextSeparator++;
883 while(*nextSeparator == ' ') {
884 nextSeparator++;
885 }
886 /* we actually found the keyword. Copy the value */
887 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
888 while(*(startSearchHere-1) == ' ') {
889 startSearchHere--;
890 }
891 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
892 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
893 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
894 i = (int32_t)uprv_strlen(nextSeparator);
895 while(nextSeparator[i - 1] == ' ') {
896 i--;
897 }
898 uprv_strncpy(buffer, nextSeparator, i);
899 result = u_terminateChars(buffer, bufferCapacity, i, status);
900 } else {
901 /* give a bigger buffer, please */
902 *status = U_BUFFER_OVERFLOW_ERROR;
903 if(startSearchHere) {
904 result = (int32_t)(startSearchHere - nextSeparator);
905 } else {
906 result = (int32_t)uprv_strlen(nextSeparator);
907 }
908 }
909 return result;
910 }
911 }
912 }
913 return 0;
914 }
915
916 U_CAPI int32_t U_EXPORT2
uloc_setKeywordValue(const char * keywordName,const char * keywordValue,char * buffer,int32_t bufferCapacity,UErrorCode * status)917 uloc_setKeywordValue(const char* keywordName,
918 const char* keywordValue,
919 char* buffer, int32_t bufferCapacity,
920 UErrorCode* status)
921 {
922 /* TODO: sorting. removal. */
923 int32_t keywordNameLen;
924 int32_t keywordValueLen;
925 int32_t bufLen;
926 int32_t needLen = 0;
927 int32_t foundValueLen;
928 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
929 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
930 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
931 int32_t i = 0;
932 int32_t rc;
933 char* nextSeparator = NULL;
934 char* nextEqualsign = NULL;
935 char* startSearchHere = NULL;
936 char* keywordStart = NULL;
937 char *insertHere = NULL;
938 if(U_FAILURE(*status)) {
939 return -1;
940 }
941 if(bufferCapacity>1) {
942 bufLen = (int32_t)uprv_strlen(buffer);
943 } else {
944 *status = U_ILLEGAL_ARGUMENT_ERROR;
945 return 0;
946 }
947 if(bufferCapacity<bufLen) {
948 /* The capacity is less than the length?! Is this NULL terminated? */
949 *status = U_ILLEGAL_ARGUMENT_ERROR;
950 return 0;
951 }
952 if(keywordValue && !*keywordValue) {
953 keywordValue = NULL;
954 }
955 if(keywordValue) {
956 keywordValueLen = (int32_t)uprv_strlen(keywordValue);
957 } else {
958 keywordValueLen = 0;
959 }
960 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
961 if(U_FAILURE(*status)) {
962 return 0;
963 }
964 startSearchHere = (char*)locale_getKeywordsStart(buffer);
965 if(startSearchHere == NULL || (startSearchHere[1]==0)) {
966 if(!keywordValue) { /* no keywords = nothing to remove */
967 return bufLen;
968 }
969
970 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
971 if(startSearchHere) { /* had a single @ */
972 needLen--; /* already had the @ */
973 /* startSearchHere points at the @ */
974 } else {
975 startSearchHere=buffer+bufLen;
976 }
977 if(needLen >= bufferCapacity) {
978 *status = U_BUFFER_OVERFLOW_ERROR;
979 return needLen; /* no change */
980 }
981 *startSearchHere = '@';
982 startSearchHere++;
983 uprv_strcpy(startSearchHere, keywordNameBuffer);
984 startSearchHere += keywordNameLen;
985 *startSearchHere = '=';
986 startSearchHere++;
987 uprv_strcpy(startSearchHere, keywordValue);
988 startSearchHere+=keywordValueLen;
989 return needLen;
990 } /* end shortcut - no @ */
991
992 keywordStart = startSearchHere;
993 /* search for keyword */
994 while(keywordStart) {
995 keywordStart++;
996 /* skip leading spaces (allowed?) */
997 while(*keywordStart == ' ') {
998 keywordStart++;
999 }
1000 nextEqualsign = uprv_strchr(keywordStart, '=');
1001 /* need to normalize both keyword and keyword name */
1002 if(!nextEqualsign) {
1003 break;
1004 }
1005 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1006 /* keyword name too long for internal buffer */
1007 *status = U_INTERNAL_PROGRAM_ERROR;
1008 return 0;
1009 }
1010 for(i = 0; i < nextEqualsign - keywordStart; i++) {
1011 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1012 }
1013 /* trim trailing spaces */
1014 while(keywordStart[i-1] == ' ') {
1015 i--;
1016 }
1017 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1018 localeKeywordNameBuffer[i] = 0;
1019
1020 nextSeparator = uprv_strchr(nextEqualsign, ';');
1021 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1022 if(rc == 0) {
1023 nextEqualsign++;
1024 while(*nextEqualsign == ' ') {
1025 nextEqualsign++;
1026 }
1027 /* we actually found the keyword. Change the value */
1028 if (nextSeparator) {
1029 keywordAtEnd = 0;
1030 foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1031 } else {
1032 keywordAtEnd = 1;
1033 foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1034 }
1035 if(keywordValue) { /* adding a value - not removing */
1036 if(foundValueLen == keywordValueLen) {
1037 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1038 return bufLen; /* no change in size */
1039 } else if(foundValueLen > keywordValueLen) {
1040 int32_t delta = foundValueLen - keywordValueLen;
1041 if(nextSeparator) { /* RH side */
1042 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1043 }
1044 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1045 bufLen -= delta;
1046 buffer[bufLen]=0;
1047 return bufLen;
1048 } else { /* FVL < KVL */
1049 int32_t delta = keywordValueLen - foundValueLen;
1050 if((bufLen+delta) >= bufferCapacity) {
1051 *status = U_BUFFER_OVERFLOW_ERROR;
1052 return bufLen+delta;
1053 }
1054 if(nextSeparator) { /* RH side */
1055 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1056 }
1057 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1058 bufLen += delta;
1059 buffer[bufLen]=0;
1060 return bufLen;
1061 }
1062 } else { /* removing a keyword */
1063 if(keywordAtEnd) {
1064 /* zero out the ';' or '@' just before startSearchhere */
1065 keywordStart[-1] = 0;
1066 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1067 } else {
1068 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1069 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1070 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1071 }
1072 }
1073 } else if(rc<0){ /* end match keyword */
1074 /* could insert at this location. */
1075 insertHere = keywordStart;
1076 }
1077 keywordStart = nextSeparator;
1078 } /* end loop searching */
1079
1080 if(!keywordValue) {
1081 return bufLen; /* removal of non-extant keyword - no change */
1082 }
1083
1084 /* we know there is at least one keyword. */
1085 needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1086 if(needLen >= bufferCapacity) {
1087 *status = U_BUFFER_OVERFLOW_ERROR;
1088 return needLen; /* no change */
1089 }
1090
1091 if(insertHere) {
1092 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1093 keywordStart = insertHere;
1094 } else {
1095 keywordStart = buffer+bufLen;
1096 *keywordStart = ';';
1097 keywordStart++;
1098 }
1099 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1100 keywordStart += keywordNameLen;
1101 *keywordStart = '=';
1102 keywordStart++;
1103 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1104 keywordStart+=keywordValueLen;
1105 if(insertHere) {
1106 *keywordStart = ';';
1107 keywordStart++;
1108 }
1109 buffer[needLen]=0;
1110 return needLen;
1111 }
1112
1113 /* ### ID parsing implementation **************************************************/
1114
1115 #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1116
1117 /*returns TRUE if one of the special prefixes is here (s=string)
1118 'x-' or 'i-' */
1119 #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1120
1121 /* Dot terminates it because of POSIX form where dot precedes the codepage
1122 * except for variant
1123 */
1124 #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1125
_strnchr(const char * str,int32_t len,char c)1126 static char* _strnchr(const char* str, int32_t len, char c) {
1127 U_ASSERT(str != 0 && len >= 0);
1128 while (len-- != 0) {
1129 char d = *str;
1130 if (d == c) {
1131 return (char*) str;
1132 } else if (d == 0) {
1133 break;
1134 }
1135 ++str;
1136 }
1137 return NULL;
1138 }
1139
1140 /**
1141 * Lookup 'key' in the array 'list'. The array 'list' should contain
1142 * a NULL entry, followed by more entries, and a second NULL entry.
1143 *
1144 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1145 * COUNTRIES_3.
1146 */
_findIndex(const char * const * list,const char * key)1147 static int16_t _findIndex(const char* const* list, const char* key)
1148 {
1149 const char* const* anchor = list;
1150 int32_t pass = 0;
1151
1152 /* Make two passes through two NULL-terminated arrays at 'list' */
1153 while (pass++ < 2) {
1154 while (*list) {
1155 if (uprv_strcmp(key, *list) == 0) {
1156 return (int16_t)(list - anchor);
1157 }
1158 list++;
1159 }
1160 ++list; /* skip final NULL *CWB*/
1161 }
1162 return -1;
1163 }
1164
1165 /* count the length of src while copying it to dest; return strlen(src) */
1166 static inline int32_t
_copyCount(char * dest,int32_t destCapacity,const char * src)1167 _copyCount(char *dest, int32_t destCapacity, const char *src) {
1168 const char *anchor;
1169 char c;
1170
1171 anchor=src;
1172 for(;;) {
1173 if((c=*src)==0) {
1174 return (int32_t)(src-anchor);
1175 }
1176 if(destCapacity<=0) {
1177 return (int32_t)((src-anchor)+uprv_strlen(src));
1178 }
1179 ++src;
1180 *dest++=c;
1181 --destCapacity;
1182 }
1183 }
1184
1185 U_CFUNC const char*
uloc_getCurrentCountryID(const char * oldID)1186 uloc_getCurrentCountryID(const char* oldID){
1187 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1188 if (offset >= 0) {
1189 return REPLACEMENT_COUNTRIES[offset];
1190 }
1191 return oldID;
1192 }
1193 U_CFUNC const char*
uloc_getCurrentLanguageID(const char * oldID)1194 uloc_getCurrentLanguageID(const char* oldID){
1195 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1196 if (offset >= 0) {
1197 return REPLACEMENT_LANGUAGES[offset];
1198 }
1199 return oldID;
1200 }
1201 /*
1202 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1203 * avoid duplicating code to handle the earlier locale ID pieces
1204 * in the functions for the later ones by
1205 * setting the *pEnd pointer to where they stopped parsing
1206 *
1207 * TODO try to use this in Locale
1208 */
1209 U_CFUNC int32_t
ulocimp_getLanguage(const char * localeID,char * language,int32_t languageCapacity,const char ** pEnd)1210 ulocimp_getLanguage(const char *localeID,
1211 char *language, int32_t languageCapacity,
1212 const char **pEnd) {
1213 int32_t i=0;
1214 int32_t offset;
1215 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1216
1217 /* if it starts with i- or x- then copy that prefix */
1218 if(_isIDPrefix(localeID)) {
1219 if(i<languageCapacity) {
1220 language[i]=(char)uprv_tolower(*localeID);
1221 }
1222 if(i<languageCapacity) {
1223 language[i+1]='-';
1224 }
1225 i+=2;
1226 localeID+=2;
1227 }
1228
1229 /* copy the language as far as possible and count its length */
1230 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1231 if(i<languageCapacity) {
1232 language[i]=(char)uprv_tolower(*localeID);
1233 }
1234 if(i<3) {
1235 U_ASSERT(i>=0);
1236 lang[i]=(char)uprv_tolower(*localeID);
1237 }
1238 i++;
1239 localeID++;
1240 }
1241
1242 if(i==3) {
1243 /* convert 3 character code to 2 character code if possible *CWB*/
1244 offset=_findIndex(LANGUAGES_3, lang);
1245 if(offset>=0) {
1246 i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1247 }
1248 }
1249
1250 if(pEnd!=NULL) {
1251 *pEnd=localeID;
1252 }
1253 return i;
1254 }
1255
1256 U_CFUNC int32_t
ulocimp_getScript(const char * localeID,char * script,int32_t scriptCapacity,const char ** pEnd)1257 ulocimp_getScript(const char *localeID,
1258 char *script, int32_t scriptCapacity,
1259 const char **pEnd)
1260 {
1261 int32_t idLen = 0;
1262
1263 if (pEnd != NULL) {
1264 *pEnd = localeID;
1265 }
1266
1267 /* copy the second item as far as possible and count its length */
1268 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1269 && uprv_isASCIILetter(localeID[idLen])) {
1270 idLen++;
1271 }
1272
1273 /* If it's exactly 4 characters long, then it's a script and not a country. */
1274 if (idLen == 4) {
1275 int32_t i;
1276 if (pEnd != NULL) {
1277 *pEnd = localeID+idLen;
1278 }
1279 if(idLen > scriptCapacity) {
1280 idLen = scriptCapacity;
1281 }
1282 if (idLen >= 1) {
1283 script[0]=(char)uprv_toupper(*(localeID++));
1284 }
1285 for (i = 1; i < idLen; i++) {
1286 script[i]=(char)uprv_tolower(*(localeID++));
1287 }
1288 }
1289 else {
1290 idLen = 0;
1291 }
1292 return idLen;
1293 }
1294
1295 U_CFUNC int32_t
ulocimp_getCountry(const char * localeID,char * country,int32_t countryCapacity,const char ** pEnd)1296 ulocimp_getCountry(const char *localeID,
1297 char *country, int32_t countryCapacity,
1298 const char **pEnd)
1299 {
1300 int32_t idLen=0;
1301 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1302 int32_t offset;
1303
1304 /* copy the country as far as possible and count its length */
1305 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1306 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1307 cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1308 }
1309 idLen++;
1310 }
1311
1312 /* the country should be either length 2 or 3 */
1313 if (idLen == 2 || idLen == 3) {
1314 UBool gotCountry = FALSE;
1315 /* convert 3 character code to 2 character code if possible *CWB*/
1316 if(idLen==3) {
1317 offset=_findIndex(COUNTRIES_3, cnty);
1318 if(offset>=0) {
1319 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1320 gotCountry = TRUE;
1321 }
1322 }
1323 if (!gotCountry) {
1324 int32_t i = 0;
1325 for (i = 0; i < idLen; i++) {
1326 if (i < countryCapacity) {
1327 country[i]=(char)uprv_toupper(localeID[i]);
1328 }
1329 }
1330 }
1331 localeID+=idLen;
1332 } else {
1333 idLen = 0;
1334 }
1335
1336 if(pEnd!=NULL) {
1337 *pEnd=localeID;
1338 }
1339
1340 return idLen;
1341 }
1342
1343 /**
1344 * @param needSeparator if true, then add leading '_' if any variants
1345 * are added to 'variant'
1346 */
1347 static int32_t
_getVariantEx(const char * localeID,char prev,char * variant,int32_t variantCapacity,UBool needSeparator)1348 _getVariantEx(const char *localeID,
1349 char prev,
1350 char *variant, int32_t variantCapacity,
1351 UBool needSeparator) {
1352 int32_t i=0;
1353
1354 /* get one or more variant tags and separate them with '_' */
1355 if(_isIDSeparator(prev)) {
1356 /* get a variant string after a '-' or '_' */
1357 while(!_isTerminator(*localeID)) {
1358 if (needSeparator) {
1359 if (i<variantCapacity) {
1360 variant[i] = '_';
1361 }
1362 ++i;
1363 needSeparator = FALSE;
1364 }
1365 if(i<variantCapacity) {
1366 variant[i]=(char)uprv_toupper(*localeID);
1367 if(variant[i]=='-') {
1368 variant[i]='_';
1369 }
1370 }
1371 i++;
1372 localeID++;
1373 }
1374 }
1375
1376 /* if there is no variant tag after a '-' or '_' then look for '@' */
1377 if(i==0) {
1378 if(prev=='@') {
1379 /* keep localeID */
1380 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1381 ++localeID; /* point after the '@' */
1382 } else {
1383 return 0;
1384 }
1385 while(!_isTerminator(*localeID)) {
1386 if (needSeparator) {
1387 if (i<variantCapacity) {
1388 variant[i] = '_';
1389 }
1390 ++i;
1391 needSeparator = FALSE;
1392 }
1393 if(i<variantCapacity) {
1394 variant[i]=(char)uprv_toupper(*localeID);
1395 if(variant[i]=='-' || variant[i]==',') {
1396 variant[i]='_';
1397 }
1398 }
1399 i++;
1400 localeID++;
1401 }
1402 }
1403
1404 return i;
1405 }
1406
1407 static int32_t
_getVariant(const char * localeID,char prev,char * variant,int32_t variantCapacity)1408 _getVariant(const char *localeID,
1409 char prev,
1410 char *variant, int32_t variantCapacity) {
1411 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1412 }
1413
1414 /**
1415 * Delete ALL instances of a variant from the given list of one or
1416 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1417 * @param variants the source string of one or more variants,
1418 * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1419 * terminated; if it is, trailing zero will NOT be maintained.
1420 * @param variantsLen length of variants
1421 * @param toDelete variant to delete, without separators, e.g. "EURO"
1422 * or "PREEURO"; not zero terminated
1423 * @param toDeleteLen length of toDelete
1424 * @return number of characters deleted from variants
1425 */
1426 static int32_t
_deleteVariant(char * variants,int32_t variantsLen,const char * toDelete,int32_t toDeleteLen)1427 _deleteVariant(char* variants, int32_t variantsLen,
1428 const char* toDelete, int32_t toDeleteLen)
1429 {
1430 int32_t delta = 0; /* number of chars deleted */
1431 for (;;) {
1432 UBool flag = FALSE;
1433 if (variantsLen < toDeleteLen) {
1434 return delta;
1435 }
1436 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1437 (variantsLen == toDeleteLen ||
1438 (flag=(variants[toDeleteLen] == '_'))))
1439 {
1440 int32_t d = toDeleteLen + (flag?1:0);
1441 variantsLen -= d;
1442 delta += d;
1443 if (variantsLen > 0) {
1444 uprv_memmove(variants, variants+d, variantsLen);
1445 }
1446 } else {
1447 char* p = _strnchr(variants, variantsLen, '_');
1448 if (p == NULL) {
1449 return delta;
1450 }
1451 ++p;
1452 variantsLen -= (int32_t)(p - variants);
1453 variants = p;
1454 }
1455 }
1456 }
1457
1458 /* Keyword enumeration */
1459
1460 typedef struct UKeywordsContext {
1461 char* keywords;
1462 char* current;
1463 } UKeywordsContext;
1464
1465 static void U_CALLCONV
uloc_kw_closeKeywords(UEnumeration * enumerator)1466 uloc_kw_closeKeywords(UEnumeration *enumerator) {
1467 uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1468 uprv_free(enumerator->context);
1469 uprv_free(enumerator);
1470 }
1471
1472 static int32_t U_CALLCONV
uloc_kw_countKeywords(UEnumeration * en,UErrorCode *)1473 uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1474 char *kw = ((UKeywordsContext *)en->context)->keywords;
1475 int32_t result = 0;
1476 while(*kw) {
1477 result++;
1478 kw += uprv_strlen(kw)+1;
1479 }
1480 return result;
1481 }
1482
1483 static const char* U_CALLCONV
uloc_kw_nextKeyword(UEnumeration * en,int32_t * resultLength,UErrorCode *)1484 uloc_kw_nextKeyword(UEnumeration* en,
1485 int32_t* resultLength,
1486 UErrorCode* /*status*/) {
1487 const char* result = ((UKeywordsContext *)en->context)->current;
1488 int32_t len = 0;
1489 if(*result) {
1490 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1491 ((UKeywordsContext *)en->context)->current += len+1;
1492 } else {
1493 result = NULL;
1494 }
1495 if (resultLength) {
1496 *resultLength = len;
1497 }
1498 return result;
1499 }
1500
1501 static void U_CALLCONV
uloc_kw_resetKeywords(UEnumeration * en,UErrorCode *)1502 uloc_kw_resetKeywords(UEnumeration* en,
1503 UErrorCode* /*status*/) {
1504 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1505 }
1506
1507 static const UEnumeration gKeywordsEnum = {
1508 NULL,
1509 NULL,
1510 uloc_kw_closeKeywords,
1511 uloc_kw_countKeywords,
1512 uenum_unextDefault,
1513 uloc_kw_nextKeyword,
1514 uloc_kw_resetKeywords
1515 };
1516
1517 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywordList(const char * keywordList,int32_t keywordListSize,UErrorCode * status)1518 uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1519 {
1520 UKeywordsContext *myContext = NULL;
1521 UEnumeration *result = NULL;
1522
1523 if(U_FAILURE(*status)) {
1524 return NULL;
1525 }
1526 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1527 /* Null pointer test */
1528 if (result == NULL) {
1529 *status = U_MEMORY_ALLOCATION_ERROR;
1530 return NULL;
1531 }
1532 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1533 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1534 if (myContext == NULL) {
1535 *status = U_MEMORY_ALLOCATION_ERROR;
1536 uprv_free(result);
1537 return NULL;
1538 }
1539 myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1540 uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1541 myContext->keywords[keywordListSize] = 0;
1542 myContext->current = myContext->keywords;
1543 result->context = myContext;
1544 return result;
1545 }
1546
1547 U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char * localeID,UErrorCode * status)1548 uloc_openKeywords(const char* localeID,
1549 UErrorCode* status)
1550 {
1551 int32_t i=0;
1552 char keywords[256];
1553 int32_t keywordsCapacity = 256;
1554 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1555 const char* tmpLocaleID;
1556
1557 if(status==NULL || U_FAILURE(*status)) {
1558 return 0;
1559 }
1560
1561 if (_hasBCP47Extension(localeID)) {
1562 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1563 } else {
1564 if (localeID==NULL) {
1565 localeID=uloc_getDefault();
1566 }
1567 tmpLocaleID=localeID;
1568 }
1569
1570 /* Skip the language */
1571 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1572 if(_isIDSeparator(*tmpLocaleID)) {
1573 const char *scriptID;
1574 /* Skip the script if available */
1575 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1576 if(scriptID != tmpLocaleID+1) {
1577 /* Found optional script */
1578 tmpLocaleID = scriptID;
1579 }
1580 /* Skip the Country */
1581 if (_isIDSeparator(*tmpLocaleID)) {
1582 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1583 if(_isIDSeparator(*tmpLocaleID)) {
1584 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1585 }
1586 }
1587 }
1588
1589 /* keywords are located after '@' */
1590 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1591 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1592 }
1593
1594 if(i) {
1595 return uloc_openKeywordList(keywords, i, status);
1596 } else {
1597 return NULL;
1598 }
1599 }
1600
1601
1602 /* bit-flags for 'options' parameter of _canonicalize */
1603 #define _ULOC_STRIP_KEYWORDS 0x2
1604 #define _ULOC_CANONICALIZE 0x1
1605
1606 #define OPTION_SET(options, mask) ((options & mask) != 0)
1607
1608 static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1609 #define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1610
1611 /**
1612 * Canonicalize the given localeID, to level 1 or to level 2,
1613 * depending on the options. To specify level 1, pass in options=0.
1614 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1615 *
1616 * This is the code underlying uloc_getName and uloc_canonicalize.
1617 */
1618 static int32_t
_canonicalize(const char * localeID,char * result,int32_t resultCapacity,uint32_t options,UErrorCode * err)1619 _canonicalize(const char* localeID,
1620 char* result,
1621 int32_t resultCapacity,
1622 uint32_t options,
1623 UErrorCode* err) {
1624 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1625 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1626 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1627 const char* origLocaleID;
1628 const char* tmpLocaleID;
1629 const char* keywordAssign = NULL;
1630 const char* separatorIndicator = NULL;
1631 const char* addKeyword = NULL;
1632 const char* addValue = NULL;
1633 char* name;
1634 char* variant = NULL; /* pointer into name, or NULL */
1635
1636 if (U_FAILURE(*err)) {
1637 return 0;
1638 }
1639
1640 if (_hasBCP47Extension(localeID)) {
1641 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1642 } else {
1643 if (localeID==NULL) {
1644 localeID=uloc_getDefault();
1645 }
1646 tmpLocaleID=localeID;
1647 }
1648
1649 origLocaleID=tmpLocaleID;
1650
1651 /* if we are doing a full canonicalization, then put results in
1652 localeBuffer, if necessary; otherwise send them to result. */
1653 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1654 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1655 name = localeBuffer;
1656 nameCapacity = (int32_t)sizeof(localeBuffer);
1657 } else {
1658 name = result;
1659 nameCapacity = resultCapacity;
1660 }
1661
1662 /* get all pieces, one after another, and separate with '_' */
1663 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1664
1665 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1666 const char *d = uloc_getDefault();
1667
1668 len = (int32_t)uprv_strlen(d);
1669
1670 if (name != NULL) {
1671 uprv_strncpy(name, d, len);
1672 }
1673 } else if(_isIDSeparator(*tmpLocaleID)) {
1674 const char *scriptID;
1675
1676 ++fieldCount;
1677 if(len<nameCapacity) {
1678 name[len]='_';
1679 }
1680 ++len;
1681
1682 scriptSize=ulocimp_getScript(tmpLocaleID+1,
1683 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1684 if(scriptSize > 0) {
1685 /* Found optional script */
1686 tmpLocaleID = scriptID;
1687 ++fieldCount;
1688 len+=scriptSize;
1689 if (_isIDSeparator(*tmpLocaleID)) {
1690 /* If there is something else, then we add the _ */
1691 if(len<nameCapacity) {
1692 name[len]='_';
1693 }
1694 ++len;
1695 }
1696 }
1697
1698 if (_isIDSeparator(*tmpLocaleID)) {
1699 const char *cntryID;
1700 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1701 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1702 if (cntrySize > 0) {
1703 /* Found optional country */
1704 tmpLocaleID = cntryID;
1705 len+=cntrySize;
1706 }
1707 if(_isIDSeparator(*tmpLocaleID)) {
1708 /* If there is something else, then we add the _ if we found country before. */
1709 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1710 ++fieldCount;
1711 if(len<nameCapacity) {
1712 name[len]='_';
1713 }
1714 ++len;
1715 }
1716
1717 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1718 (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1719 if (variantSize > 0) {
1720 variant = len<nameCapacity ? name+len : NULL;
1721 len += variantSize;
1722 tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1723 }
1724 }
1725 }
1726 }
1727
1728 /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1729 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1730 UBool done = FALSE;
1731 do {
1732 char c = *tmpLocaleID;
1733 switch (c) {
1734 case 0:
1735 case '@':
1736 done = TRUE;
1737 break;
1738 default:
1739 if (len<nameCapacity) {
1740 name[len] = c;
1741 }
1742 ++len;
1743 ++tmpLocaleID;
1744 break;
1745 }
1746 } while (!done);
1747 }
1748
1749 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1750 After this, tmpLocaleID either points to '@' or is NULL */
1751 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1752 keywordAssign = uprv_strchr(tmpLocaleID, '=');
1753 separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1754 }
1755
1756 /* Copy POSIX-style variant, if any [mr@FOO] */
1757 if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1758 tmpLocaleID != NULL && keywordAssign == NULL) {
1759 for (;;) {
1760 char c = *tmpLocaleID;
1761 if (c == 0) {
1762 break;
1763 }
1764 if (len<nameCapacity) {
1765 name[len] = c;
1766 }
1767 ++len;
1768 ++tmpLocaleID;
1769 }
1770 }
1771
1772 if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1773 /* Handle @FOO variant if @ is present and not followed by = */
1774 if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1775 int32_t posixVariantSize;
1776 /* Add missing '_' if needed */
1777 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1778 do {
1779 if(len<nameCapacity) {
1780 name[len]='_';
1781 }
1782 ++len;
1783 ++fieldCount;
1784 } while(fieldCount<2);
1785 }
1786 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1787 (UBool)(variantSize > 0));
1788 if (posixVariantSize > 0) {
1789 if (variant == NULL) {
1790 variant = name+len;
1791 }
1792 len += posixVariantSize;
1793 variantSize += posixVariantSize;
1794 }
1795 }
1796
1797 /* Handle generic variants first */
1798 if (variant) {
1799 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1800 const char* variantToCompare = VARIANT_MAP[j].variant;
1801 int32_t n = (int32_t)uprv_strlen(variantToCompare);
1802 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1803 len -= variantLen;
1804 if (variantLen > 0) {
1805 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1806 --len;
1807 }
1808 addKeyword = VARIANT_MAP[j].keyword;
1809 addValue = VARIANT_MAP[j].value;
1810 break;
1811 }
1812 }
1813 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1814 --len;
1815 }
1816 }
1817
1818 /* Look up the ID in the canonicalization map */
1819 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1820 const char* id = CANONICALIZE_MAP[j].id;
1821 int32_t n = (int32_t)uprv_strlen(id);
1822 if (len == n && uprv_strncmp(name, id, n) == 0) {
1823 if (n == 0 && tmpLocaleID != NULL) {
1824 break; /* Don't remap "" if keywords present */
1825 }
1826 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1827 if (CANONICALIZE_MAP[j].keyword) {
1828 addKeyword = CANONICALIZE_MAP[j].keyword;
1829 addValue = CANONICALIZE_MAP[j].value;
1830 }
1831 break;
1832 }
1833 }
1834 }
1835
1836 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1837 if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1838 (!separatorIndicator || separatorIndicator > keywordAssign)) {
1839 if(len<nameCapacity) {
1840 name[len]='@';
1841 }
1842 ++len;
1843 ++fieldCount;
1844 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1845 NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1846 } else if (addKeyword != NULL) {
1847 U_ASSERT(addValue != NULL && len < nameCapacity);
1848 /* inelegant but works -- later make _getKeywords do this? */
1849 len += _copyCount(name+len, nameCapacity-len, "@");
1850 len += _copyCount(name+len, nameCapacity-len, addKeyword);
1851 len += _copyCount(name+len, nameCapacity-len, "=");
1852 len += _copyCount(name+len, nameCapacity-len, addValue);
1853 }
1854 }
1855
1856 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1857 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1858 }
1859
1860 return u_terminateChars(result, resultCapacity, len, err);
1861 }
1862
1863 /* ### ID parsing API **************************************************/
1864
1865 U_CAPI int32_t U_EXPORT2
uloc_getParent(const char * localeID,char * parent,int32_t parentCapacity,UErrorCode * err)1866 uloc_getParent(const char* localeID,
1867 char* parent,
1868 int32_t parentCapacity,
1869 UErrorCode* err)
1870 {
1871 const char *lastUnderscore;
1872 int32_t i;
1873
1874 if (U_FAILURE(*err))
1875 return 0;
1876
1877 if (localeID == NULL)
1878 localeID = uloc_getDefault();
1879
1880 lastUnderscore=uprv_strrchr(localeID, '_');
1881 if(lastUnderscore!=NULL) {
1882 i=(int32_t)(lastUnderscore-localeID);
1883 } else {
1884 i=0;
1885 }
1886
1887 if(i>0 && parent != localeID) {
1888 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1889 }
1890 return u_terminateChars(parent, parentCapacity, i, err);
1891 }
1892
1893 U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char * localeID,char * language,int32_t languageCapacity,UErrorCode * err)1894 uloc_getLanguage(const char* localeID,
1895 char* language,
1896 int32_t languageCapacity,
1897 UErrorCode* err)
1898 {
1899 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1900 int32_t i=0;
1901
1902 if (err==NULL || U_FAILURE(*err)) {
1903 return 0;
1904 }
1905
1906 if(localeID==NULL) {
1907 localeID=uloc_getDefault();
1908 }
1909
1910 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1911 return u_terminateChars(language, languageCapacity, i, err);
1912 }
1913
1914 U_CAPI int32_t U_EXPORT2
uloc_getScript(const char * localeID,char * script,int32_t scriptCapacity,UErrorCode * err)1915 uloc_getScript(const char* localeID,
1916 char* script,
1917 int32_t scriptCapacity,
1918 UErrorCode* err)
1919 {
1920 int32_t i=0;
1921
1922 if(err==NULL || U_FAILURE(*err)) {
1923 return 0;
1924 }
1925
1926 if(localeID==NULL) {
1927 localeID=uloc_getDefault();
1928 }
1929
1930 /* skip the language */
1931 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1932 if(_isIDSeparator(*localeID)) {
1933 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1934 }
1935 return u_terminateChars(script, scriptCapacity, i, err);
1936 }
1937
1938 U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char * localeID,char * country,int32_t countryCapacity,UErrorCode * err)1939 uloc_getCountry(const char* localeID,
1940 char* country,
1941 int32_t countryCapacity,
1942 UErrorCode* err)
1943 {
1944 int32_t i=0;
1945
1946 if(err==NULL || U_FAILURE(*err)) {
1947 return 0;
1948 }
1949
1950 if(localeID==NULL) {
1951 localeID=uloc_getDefault();
1952 }
1953
1954 /* Skip the language */
1955 ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1956 if(_isIDSeparator(*localeID)) {
1957 const char *scriptID;
1958 /* Skip the script if available */
1959 ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1960 if(scriptID != localeID+1) {
1961 /* Found optional script */
1962 localeID = scriptID;
1963 }
1964 if(_isIDSeparator(*localeID)) {
1965 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1966 }
1967 }
1968 return u_terminateChars(country, countryCapacity, i, err);
1969 }
1970
1971 U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char * localeID,char * variant,int32_t variantCapacity,UErrorCode * err)1972 uloc_getVariant(const char* localeID,
1973 char* variant,
1974 int32_t variantCapacity,
1975 UErrorCode* err)
1976 {
1977 char tempBuffer[ULOC_FULLNAME_CAPACITY];
1978 const char* tmpLocaleID;
1979 int32_t i=0;
1980
1981 if(err==NULL || U_FAILURE(*err)) {
1982 return 0;
1983 }
1984
1985 if (_hasBCP47Extension(localeID)) {
1986 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1987 } else {
1988 if (localeID==NULL) {
1989 localeID=uloc_getDefault();
1990 }
1991 tmpLocaleID=localeID;
1992 }
1993
1994 /* Skip the language */
1995 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1996 if(_isIDSeparator(*tmpLocaleID)) {
1997 const char *scriptID;
1998 /* Skip the script if available */
1999 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2000 if(scriptID != tmpLocaleID+1) {
2001 /* Found optional script */
2002 tmpLocaleID = scriptID;
2003 }
2004 /* Skip the Country */
2005 if (_isIDSeparator(*tmpLocaleID)) {
2006 const char *cntryID;
2007 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2008 if (cntryID != tmpLocaleID+1) {
2009 /* Found optional country */
2010 tmpLocaleID = cntryID;
2011 }
2012 if(_isIDSeparator(*tmpLocaleID)) {
2013 /* If there was no country ID, skip a possible extra IDSeparator */
2014 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2015 tmpLocaleID++;
2016 }
2017 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2018 }
2019 }
2020 }
2021
2022 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2023 /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2024 /*
2025 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2026 i=_getVariant(localeID+1, '@', variant, variantCapacity);
2027 }
2028 */
2029 return u_terminateChars(variant, variantCapacity, i, err);
2030 }
2031
2032 U_CAPI int32_t U_EXPORT2
uloc_getName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2033 uloc_getName(const char* localeID,
2034 char* name,
2035 int32_t nameCapacity,
2036 UErrorCode* err)
2037 {
2038 return _canonicalize(localeID, name, nameCapacity, 0, err);
2039 }
2040
2041 U_CAPI int32_t U_EXPORT2
uloc_getBaseName(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2042 uloc_getBaseName(const char* localeID,
2043 char* name,
2044 int32_t nameCapacity,
2045 UErrorCode* err)
2046 {
2047 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2048 }
2049
2050 U_CAPI int32_t U_EXPORT2
uloc_canonicalize(const char * localeID,char * name,int32_t nameCapacity,UErrorCode * err)2051 uloc_canonicalize(const char* localeID,
2052 char* name,
2053 int32_t nameCapacity,
2054 UErrorCode* err)
2055 {
2056 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2057 }
2058
2059 U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char * localeID)2060 uloc_getISO3Language(const char* localeID)
2061 {
2062 int16_t offset;
2063 char lang[ULOC_LANG_CAPACITY];
2064 UErrorCode err = U_ZERO_ERROR;
2065
2066 if (localeID == NULL)
2067 {
2068 localeID = uloc_getDefault();
2069 }
2070 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2071 if (U_FAILURE(err))
2072 return "";
2073 offset = _findIndex(LANGUAGES, lang);
2074 if (offset < 0)
2075 return "";
2076 return LANGUAGES_3[offset];
2077 }
2078
2079 U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char * localeID)2080 uloc_getISO3Country(const char* localeID)
2081 {
2082 int16_t offset;
2083 char cntry[ULOC_LANG_CAPACITY];
2084 UErrorCode err = U_ZERO_ERROR;
2085
2086 if (localeID == NULL)
2087 {
2088 localeID = uloc_getDefault();
2089 }
2090 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2091 if (U_FAILURE(err))
2092 return "";
2093 offset = _findIndex(COUNTRIES, cntry);
2094 if (offset < 0)
2095 return "";
2096
2097 return COUNTRIES_3[offset];
2098 }
2099
2100 U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char * localeID)2101 uloc_getLCID(const char* localeID)
2102 {
2103 UErrorCode status = U_ZERO_ERROR;
2104 char langID[ULOC_FULLNAME_CAPACITY];
2105
2106 uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2107 if (U_FAILURE(status)) {
2108 return 0;
2109 }
2110
2111 if (uprv_strchr(localeID, '@')) {
2112 // uprv_convertToLCID does not support keywords other than collation.
2113 // Remove all keywords except collation.
2114 int32_t len;
2115 char collVal[ULOC_KEYWORDS_CAPACITY];
2116 char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2117
2118 len = uloc_getKeywordValue(localeID, "collation", collVal,
2119 sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2120
2121 if (U_SUCCESS(status) && len > 0) {
2122 collVal[len] = 0;
2123
2124 len = uloc_getBaseName(localeID, tmpLocaleID,
2125 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2126
2127 if (U_SUCCESS(status)) {
2128 tmpLocaleID[len] = 0;
2129
2130 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2131 sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2132
2133 if (U_SUCCESS(status)) {
2134 tmpLocaleID[len] = 0;
2135 return uprv_convertToLCID(langID, tmpLocaleID, &status);
2136 }
2137 }
2138 }
2139
2140 // fall through - all keywords are simply ignored
2141 status = U_ZERO_ERROR;
2142 }
2143
2144 return uprv_convertToLCID(langID, localeID, &status);
2145 }
2146
2147 U_CAPI int32_t U_EXPORT2
uloc_getLocaleForLCID(uint32_t hostid,char * locale,int32_t localeCapacity,UErrorCode * status)2148 uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2149 UErrorCode *status)
2150 {
2151 return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2152 }
2153
2154 /* ### Default locale **************************************************/
2155
2156 U_CAPI const char* U_EXPORT2
uloc_getDefault()2157 uloc_getDefault()
2158 {
2159 return locale_get_default();
2160 }
2161
2162 U_CAPI void U_EXPORT2
uloc_setDefault(const char * newDefaultLocale,UErrorCode * err)2163 uloc_setDefault(const char* newDefaultLocale,
2164 UErrorCode* err)
2165 {
2166 if (U_FAILURE(*err))
2167 return;
2168 /* the error code isn't currently used for anything by this function*/
2169
2170 /* propagate change to C++ */
2171 locale_set_default(newDefaultLocale);
2172 }
2173
2174 /**
2175 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2176 * to an array of pointers to arrays of char. All of these pointers are owned
2177 * by ICU-- do not delete them, and do not write through them. The array is
2178 * terminated with a null pointer.
2179 */
2180 U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages()2181 uloc_getISOLanguages()
2182 {
2183 return LANGUAGES;
2184 }
2185
2186 /**
2187 * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2188 * pointer to an array of pointers to arrays of char. All of these pointers are
2189 * owned by ICU-- do not delete them, and do not write through them. The array is
2190 * terminated with a null pointer.
2191 */
2192 U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries()2193 uloc_getISOCountries()
2194 {
2195 return COUNTRIES;
2196 }
2197
2198
2199 /* this function to be moved into cstring.c later */
2200 static char gDecimal = 0;
2201
2202 static /* U_CAPI */
2203 double
2204 /* U_EXPORT2 */
_uloc_strtod(const char * start,char ** end)2205 _uloc_strtod(const char *start, char **end) {
2206 char *decimal;
2207 char *myEnd;
2208 char buf[30];
2209 double rv;
2210 if (!gDecimal) {
2211 char rep[5];
2212 /* For machines that decide to change the decimal on you,
2213 and try to be too smart with localization.
2214 This normally should be just a '.'. */
2215 sprintf(rep, "%+1.1f", 1.0);
2216 gDecimal = rep[2];
2217 }
2218
2219 if(gDecimal == '.') {
2220 return uprv_strtod(start, end); /* fall through to OS */
2221 } else {
2222 uprv_strncpy(buf, start, 29);
2223 buf[29]=0;
2224 decimal = uprv_strchr(buf, '.');
2225 if(decimal) {
2226 *decimal = gDecimal;
2227 } else {
2228 return uprv_strtod(start, end); /* no decimal point */
2229 }
2230 rv = uprv_strtod(buf, &myEnd);
2231 if(end) {
2232 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2233 }
2234 return rv;
2235 }
2236 }
2237
2238 typedef struct {
2239 float q;
2240 int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2241 char *locale;
2242 } _acceptLangItem;
2243
2244 static int32_t U_CALLCONV
uloc_acceptLanguageCompare(const void *,const void * a,const void * b)2245 uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2246 {
2247 const _acceptLangItem *aa = (const _acceptLangItem*)a;
2248 const _acceptLangItem *bb = (const _acceptLangItem*)b;
2249
2250 int32_t rc = 0;
2251 if(bb->q < aa->q) {
2252 rc = -1; /* A > B */
2253 } else if(bb->q > aa->q) {
2254 rc = 1; /* A < B */
2255 } else {
2256 rc = 0; /* A = B */
2257 }
2258
2259 if(rc==0) {
2260 rc = uprv_stricmp(aa->locale, bb->locale);
2261 }
2262
2263 #if defined(ULOC_DEBUG)
2264 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2265 aa->locale, aa->q,
2266 bb->locale, bb->q,
2267 rc);*/
2268 #endif
2269
2270 return rc;
2271 }
2272
2273 /*
2274 mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2275 */
2276
2277 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguageFromHTTP(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char * httpAcceptLanguage,UEnumeration * availableLocales,UErrorCode * status)2278 uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2279 const char *httpAcceptLanguage,
2280 UEnumeration* availableLocales,
2281 UErrorCode *status)
2282 {
2283 _acceptLangItem *j;
2284 _acceptLangItem smallBuffer[30];
2285 char **strs;
2286 char tmp[ULOC_FULLNAME_CAPACITY +1];
2287 int32_t n = 0;
2288 const char *itemEnd;
2289 const char *paramEnd;
2290 const char *s;
2291 const char *t;
2292 int32_t res;
2293 int32_t i;
2294 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2295 int32_t jSize;
2296 char *tempstr; /* Use for null pointer check */
2297
2298 j = smallBuffer;
2299 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2300 if(U_FAILURE(*status)) {
2301 return -1;
2302 }
2303
2304 for(s=httpAcceptLanguage;s&&*s;) {
2305 while(isspace(*s)) /* eat space at the beginning */
2306 s++;
2307 itemEnd=uprv_strchr(s,',');
2308 paramEnd=uprv_strchr(s,';');
2309 if(!itemEnd) {
2310 itemEnd = httpAcceptLanguage+l; /* end of string */
2311 }
2312 if(paramEnd && paramEnd<itemEnd) {
2313 /* semicolon (;) is closer than end (,) */
2314 t = paramEnd+1;
2315 if(*t=='q') {
2316 t++;
2317 }
2318 while(isspace(*t)) {
2319 t++;
2320 }
2321 if(*t=='=') {
2322 t++;
2323 }
2324 while(isspace(*t)) {
2325 t++;
2326 }
2327 j[n].q = (float)_uloc_strtod(t,NULL);
2328 } else {
2329 /* no semicolon - it's 1.0 */
2330 j[n].q = 1.0f;
2331 paramEnd = itemEnd;
2332 }
2333 j[n].dummy=0;
2334 /* eat spaces prior to semi */
2335 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2336 ;
2337 /* Check for null pointer from uprv_strndup */
2338 tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2339 if (tempstr == NULL) {
2340 *status = U_MEMORY_ALLOCATION_ERROR;
2341 return -1;
2342 }
2343 j[n].locale = tempstr;
2344 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2345 if(strcmp(j[n].locale,tmp)) {
2346 uprv_free(j[n].locale);
2347 j[n].locale=uprv_strdup(tmp);
2348 }
2349 #if defined(ULOC_DEBUG)
2350 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2351 #endif
2352 n++;
2353 s = itemEnd;
2354 while(*s==',') { /* eat duplicate commas */
2355 s++;
2356 }
2357 if(n>=jSize) {
2358 if(j==smallBuffer) { /* overflowed the small buffer. */
2359 j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2360 if(j!=NULL) {
2361 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2362 }
2363 #if defined(ULOC_DEBUG)
2364 fprintf(stderr,"malloced at size %d\n", jSize);
2365 #endif
2366 } else {
2367 j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2368 #if defined(ULOC_DEBUG)
2369 fprintf(stderr,"re-alloced at size %d\n", jSize);
2370 #endif
2371 }
2372 jSize *= 2;
2373 if(j==NULL) {
2374 *status = U_MEMORY_ALLOCATION_ERROR;
2375 return -1;
2376 }
2377 }
2378 }
2379 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2380 if(U_FAILURE(*status)) {
2381 if(j != smallBuffer) {
2382 #if defined(ULOC_DEBUG)
2383 fprintf(stderr,"freeing j %p\n", j);
2384 #endif
2385 uprv_free(j);
2386 }
2387 return -1;
2388 }
2389 strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2390 /* Check for null pointer */
2391 if (strs == NULL) {
2392 uprv_free(j); /* Free to avoid memory leak */
2393 *status = U_MEMORY_ALLOCATION_ERROR;
2394 return -1;
2395 }
2396 for(i=0;i<n;i++) {
2397 #if defined(ULOC_DEBUG)
2398 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2399 #endif
2400 strs[i]=j[i].locale;
2401 }
2402 res = uloc_acceptLanguage(result, resultAvailable, outResult,
2403 (const char**)strs, n, availableLocales, status);
2404 for(i=0;i<n;i++) {
2405 uprv_free(strs[i]);
2406 }
2407 uprv_free(strs);
2408 if(j != smallBuffer) {
2409 #if defined(ULOC_DEBUG)
2410 fprintf(stderr,"freeing j %p\n", j);
2411 #endif
2412 uprv_free(j);
2413 }
2414 return res;
2415 }
2416
2417
2418 U_CAPI int32_t U_EXPORT2
uloc_acceptLanguage(char * result,int32_t resultAvailable,UAcceptResult * outResult,const char ** acceptList,int32_t acceptListCount,UEnumeration * availableLocales,UErrorCode * status)2419 uloc_acceptLanguage(char *result, int32_t resultAvailable,
2420 UAcceptResult *outResult, const char **acceptList,
2421 int32_t acceptListCount,
2422 UEnumeration* availableLocales,
2423 UErrorCode *status)
2424 {
2425 int32_t i,j;
2426 int32_t len;
2427 int32_t maxLen=0;
2428 char tmp[ULOC_FULLNAME_CAPACITY+1];
2429 const char *l;
2430 char **fallbackList;
2431 if(U_FAILURE(*status)) {
2432 return -1;
2433 }
2434 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2435 if(fallbackList==NULL) {
2436 *status = U_MEMORY_ALLOCATION_ERROR;
2437 return -1;
2438 }
2439 for(i=0;i<acceptListCount;i++) {
2440 #if defined(ULOC_DEBUG)
2441 fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2442 #endif
2443 while((l=uenum_next(availableLocales, NULL, status))) {
2444 #if defined(ULOC_DEBUG)
2445 fprintf(stderr," %s\n", l);
2446 #endif
2447 len = (int32_t)uprv_strlen(l);
2448 if(!uprv_strcmp(acceptList[i], l)) {
2449 if(outResult) {
2450 *outResult = ULOC_ACCEPT_VALID;
2451 }
2452 #if defined(ULOC_DEBUG)
2453 fprintf(stderr, "MATCH! %s\n", l);
2454 #endif
2455 if(len>0) {
2456 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2457 }
2458 for(j=0;j<i;j++) {
2459 uprv_free(fallbackList[j]);
2460 }
2461 uprv_free(fallbackList);
2462 return u_terminateChars(result, resultAvailable, len, status);
2463 }
2464 if(len>maxLen) {
2465 maxLen = len;
2466 }
2467 }
2468 uenum_reset(availableLocales, status);
2469 /* save off parent info */
2470 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2471 fallbackList[i] = uprv_strdup(tmp);
2472 } else {
2473 fallbackList[i]=0;
2474 }
2475 }
2476
2477 for(maxLen--;maxLen>0;maxLen--) {
2478 for(i=0;i<acceptListCount;i++) {
2479 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2480 #if defined(ULOC_DEBUG)
2481 fprintf(stderr,"Try: [%s]", fallbackList[i]);
2482 #endif
2483 while((l=uenum_next(availableLocales, NULL, status))) {
2484 #if defined(ULOC_DEBUG)
2485 fprintf(stderr," %s\n", l);
2486 #endif
2487 len = (int32_t)uprv_strlen(l);
2488 if(!uprv_strcmp(fallbackList[i], l)) {
2489 if(outResult) {
2490 *outResult = ULOC_ACCEPT_FALLBACK;
2491 }
2492 #if defined(ULOC_DEBUG)
2493 fprintf(stderr, "fallback MATCH! %s\n", l);
2494 #endif
2495 if(len>0) {
2496 uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2497 }
2498 for(j=0;j<acceptListCount;j++) {
2499 uprv_free(fallbackList[j]);
2500 }
2501 uprv_free(fallbackList);
2502 return u_terminateChars(result, resultAvailable, len, status);
2503 }
2504 }
2505 uenum_reset(availableLocales, status);
2506
2507 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2508 uprv_free(fallbackList[i]);
2509 fallbackList[i] = uprv_strdup(tmp);
2510 } else {
2511 uprv_free(fallbackList[i]);
2512 fallbackList[i]=0;
2513 }
2514 }
2515 }
2516 if(outResult) {
2517 *outResult = ULOC_ACCEPT_FAILED;
2518 }
2519 }
2520 for(i=0;i<acceptListCount;i++) {
2521 uprv_free(fallbackList[i]);
2522 }
2523 uprv_free(fallbackList);
2524 return -1;
2525 }
2526
2527 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char * keyword)2528 uloc_toUnicodeLocaleKey(const char* keyword)
2529 {
2530 const char* bcpKey = ulocimp_toBcpKey(keyword);
2531 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2532 // unknown keyword, but syntax is fine..
2533 return keyword;
2534 }
2535 return bcpKey;
2536 }
2537
2538 U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char * keyword,const char * value)2539 uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2540 {
2541 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2542 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2543 // unknown keyword, but syntax is fine..
2544 return value;
2545 }
2546 return bcpType;
2547 }
2548
2549 #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
2550 #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
2551
2552 static UBool
isWellFormedLegacyKey(const char * legacyKey)2553 isWellFormedLegacyKey(const char* legacyKey)
2554 {
2555 const char* p = legacyKey;
2556 while (*p) {
2557 if (!UPRV_ISALPHANUM(*p)) {
2558 return FALSE;
2559 }
2560 p++;
2561 }
2562 return TRUE;
2563 }
2564
2565 static UBool
isWellFormedLegacyType(const char * legacyType)2566 isWellFormedLegacyType(const char* legacyType)
2567 {
2568 const char* p = legacyType;
2569 int32_t alphaNumLen = 0;
2570 while (*p) {
2571 if (*p == '_' || *p == '/' || *p == '-') {
2572 if (alphaNumLen == 0) {
2573 return FALSE;
2574 }
2575 alphaNumLen = 0;
2576 } else if (UPRV_ISALPHANUM(*p)) {
2577 alphaNumLen++;
2578 } else {
2579 return FALSE;
2580 }
2581 p++;
2582 }
2583 return (alphaNumLen != 0);
2584 }
2585
2586 U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char * keyword)2587 uloc_toLegacyKey(const char* keyword)
2588 {
2589 const char* legacyKey = ulocimp_toLegacyKey(keyword);
2590 if (legacyKey == NULL) {
2591 // Checks if the specified locale key is well-formed with the legacy locale syntax.
2592 //
2593 // Note:
2594 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2595 // However, a key should not contain '=' obviously. For now, all existing
2596 // keys are using ASCII alphabetic letters only. We won't add any new key
2597 // that is not compatible with the BCP 47 syntax. Therefore, we assume
2598 // a valid key consist from [0-9a-zA-Z], no symbols.
2599 if (isWellFormedLegacyKey(keyword)) {
2600 return keyword;
2601 }
2602 }
2603 return legacyKey;
2604 }
2605
2606 U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char * keyword,const char * value)2607 uloc_toLegacyType(const char* keyword, const char* value)
2608 {
2609 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2610 if (legacyType == NULL) {
2611 // Checks if the specified locale type is well-formed with the legacy locale syntax.
2612 //
2613 // Note:
2614 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
2615 // However, a type should not contain '=' obviously. For now, all existing
2616 // types are using ASCII alphabetic letters with a few symbol letters. We won't
2617 // add any new type that is not compatible with the BCP 47 syntax except timezone
2618 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
2619 // '-' '_' '/' in the middle.
2620 if (isWellFormedLegacyType(value)) {
2621 return value;
2622 }
2623 }
2624 return legacyType;
2625 }
2626
2627 /*eof*/
2628