1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30 #include "locmap.h"
31 #include "bytesinkutil.h"
32 #include "charstr.h"
33 #include "cstring.h"
34 #include "cmemory.h"
35 #include "ulocimp.h"
36 #include "unicode/uloc.h"
37
38 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
39 #include <windows.h>
40 #include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
41 #endif
42
43 /*
44 * Note:
45 * The mapping from Win32 locale ID numbers to POSIX locale strings should
46 * be the faster one.
47 *
48 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
49 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
50 */
51
52 /*
53 ////////////////////////////////////////////////
54 //
55 // Internal Classes for LCID <--> POSIX Mapping
56 //
57 /////////////////////////////////////////////////
58 */
59
60 typedef struct ILcidPosixElement
61 {
62 const uint32_t hostID;
63 const char * const posixID;
64 } ILcidPosixElement;
65
66 typedef struct ILcidPosixMap
67 {
68 const uint32_t numRegions;
69 const struct ILcidPosixElement* const regionMaps;
70 } ILcidPosixMap;
71
72
73 /*
74 /////////////////////////////////////////////////
75 //
76 // Easy macros to make the LCID <--> POSIX Mapping
77 //
78 /////////////////////////////////////////////////
79 */
80
81 /**
82 * The standard one language/one country mapping for LCID.
83 * The first element must be the language, and the following
84 * elements are the language with the country.
85 * @param hostID LCID in host format such as 0x044d
86 * @param languageID posix ID of just the language such as 'de'
87 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
88 */
89 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
90 static const ILcidPosixElement locmap_ ## languageID [] = { \
91 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
92 {hostID, #posixID}, \
93 };
94
95 /**
96 * Define a subtable by ID
97 * @param id the POSIX ID, either a language or language_TERRITORY
98 */
99 #define ILCID_POSIX_SUBTABLE(id) \
100 static const ILcidPosixElement locmap_ ## id [] =
101
102
103 /**
104 * Create the map for the posixID. This macro supposes that the language string
105 * name is the same as the global variable name, and that the first element
106 * in the ILcidPosixElement is just the language.
107 * @param _posixID the full POSIX ID for this entry.
108 */
109 #define ILCID_POSIX_MAP(_posixID) \
110 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
111
112 /*
113 ////////////////////////////////////////////
114 //
115 // Create the table of LCID to POSIX Mapping
116 // None of it should be dynamically created.
117 //
118 // Keep static locale variables inside the function so that
119 // it can be created properly during static init.
120 //
121 // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
122 // (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
123 //
124 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
125 // maintained for support of older Windows version.
126 // Update: Windows 7 (091130)
127 //
128 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
129 // @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
130 // called from uloc_getLCID(), keywords other than collation are already removed. If we really need
131 // to support other keywords in this mapping data, we must update the implementation.
132 ////////////////////////////////////////////
133 */
134
135 // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
136 // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
137
138 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
139
ILCID_POSIX_SUBTABLE(ar)140 ILCID_POSIX_SUBTABLE(ar) {
141 {0x01, "ar"},
142 {0x3801, "ar_AE"},
143 {0x3c01, "ar_BH"},
144 {0x1401, "ar_DZ"},
145 {0x0c01, "ar_EG"},
146 {0x0801, "ar_IQ"},
147 {0x2c01, "ar_JO"},
148 {0x3401, "ar_KW"},
149 {0x3001, "ar_LB"},
150 {0x1001, "ar_LY"},
151 {0x1801, "ar_MA"},
152 {0x1801, "ar_MO"},
153 {0x2001, "ar_OM"},
154 {0x4001, "ar_QA"},
155 {0x0401, "ar_SA"},
156 {0x2801, "ar_SY"},
157 {0x1c01, "ar_TN"},
158 {0x2401, "ar_YE"}
159 };
160
161 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
162 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
163 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
164
ILCID_POSIX_SUBTABLE(az)165 ILCID_POSIX_SUBTABLE(az) {
166 {0x2c, "az"},
167 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
168 {0x742c, "az_Cyrl"}, /* Cyrillic based */
169 {0x042c, "az_Latn_AZ"}, /* Latin based */
170 {0x782c, "az_Latn"}, /* Latin based */
171 {0x042c, "az_AZ"} /* Latin based */
172 };
173
174 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
175 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
176
177 /*ILCID_POSIX_SUBTABLE(ber) {
178 {0x5f, "ber"},
179 {0x045f, "ber_Arab_DZ"},
180 {0x045f, "ber_Arab"},
181 {0x085f, "ber_Latn_DZ"},
182 {0x085f, "ber_Latn"}
183 };*/
184
185 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
186
ILCID_POSIX_SUBTABLE(bin)187 ILCID_POSIX_SUBTABLE(bin) {
188 {0x66, "bin"},
189 {0x0466, "bin_NG"}
190 };
191
ILCID_POSIX_SUBTABLE(bn)192 ILCID_POSIX_SUBTABLE(bn) {
193 {0x45, "bn"},
194 {0x0845, "bn_BD"},
195 {0x0445, "bn_IN"}
196 };
197
ILCID_POSIX_SUBTABLE(bo)198 ILCID_POSIX_SUBTABLE(bo) {
199 {0x51, "bo"},
200 {0x0851, "bo_BT"},
201 {0x0451, "bo_CN"},
202 {0x0c51, "dz_BT"}
203 };
204
205 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
206
ILCID_POSIX_SUBTABLE(ca)207 ILCID_POSIX_SUBTABLE(ca) {
208 {0x03, "ca"},
209 {0x0403, "ca_ES"},
210 {0x0803, "ca_ES_VALENCIA"}
211 };
212
213 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
214
ILCID_POSIX_SUBTABLE(chr)215 ILCID_POSIX_SUBTABLE(chr) {
216 {0x05c, "chr"},
217 {0x7c5c, "chr_Cher"},
218 {0x045c, "chr_Cher_US"},
219 {0x045c, "chr_US"}
220 };
221
222 // ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb)223 ILCID_POSIX_SUBTABLE(ckb) {
224 {0x92, "ckb"},
225 {0x7c92, "ckb_Arab"},
226 {0x0492, "ckb_Arab_IQ"}
227 };
228
229 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
230 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
231
232 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
233 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
234
235 // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
ILCID_POSIX_SUBTABLE(de)236 ILCID_POSIX_SUBTABLE(de) {
237 {0x07, "de"},
238 {0x0c07, "de_AT"},
239 {0x0807, "de_CH"},
240 {0x0407, "de_DE"},
241 {0x1407, "de_LI"},
242 {0x1007, "de_LU"},
243 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
244 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
245 };
246
247 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
248 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
249
250 // Windows uses an empty string for 'invariant'
ILCID_POSIX_SUBTABLE(en)251 ILCID_POSIX_SUBTABLE(en) {
252 {0x09, "en"},
253 {0x0c09, "en_AU"},
254 {0x2809, "en_BZ"},
255 {0x1009, "en_CA"},
256 {0x0809, "en_GB"},
257 {0x3c09, "en_HK"},
258 {0x3809, "en_ID"},
259 {0x1809, "en_IE"},
260 {0x4009, "en_IN"},
261 {0x2009, "en_JM"},
262 {0x4409, "en_MY"},
263 {0x1409, "en_NZ"},
264 {0x3409, "en_PH"},
265 {0x4809, "en_SG"},
266 {0x2C09, "en_TT"},
267 {0x0409, "en_US"},
268 {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
269 {0x2409, "en_029"},
270 {0x1c09, "en_ZA"},
271 {0x3009, "en_ZW"},
272 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
273 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
274 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
275 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
276 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
277 {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
278 };
279
ILCID_POSIX_SUBTABLE(en_US_POSIX)280 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
281 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
282 };
283
284 // Windows doesn't know POSIX or BCP47 Unicode traditional sort names
ILCID_POSIX_SUBTABLE(es)285 ILCID_POSIX_SUBTABLE(es) {
286 {0x0a, "es"},
287 {0x2c0a, "es_AR"},
288 {0x400a, "es_BO"},
289 {0x340a, "es_CL"},
290 {0x240a, "es_CO"},
291 {0x140a, "es_CR"},
292 {0x5c0a, "es_CU"},
293 {0x1c0a, "es_DO"},
294 {0x300a, "es_EC"},
295 {0x0c0a, "es_ES"}, /*Modern sort.*/
296 {0x100a, "es_GT"},
297 {0x480a, "es_HN"},
298 {0x080a, "es_MX"},
299 {0x4c0a, "es_NI"},
300 {0x180a, "es_PA"},
301 {0x280a, "es_PE"},
302 {0x500a, "es_PR"},
303 {0x3c0a, "es_PY"},
304 {0x440a, "es_SV"},
305 {0x540a, "es_US"},
306 {0x380a, "es_UY"},
307 {0x200a, "es_VE"},
308 {0x580a, "es_419"},
309 {0x040a, "es_ES@collation=traditional"},
310 {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional
311 };
312
313 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
314 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
315
316 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)317 ILCID_POSIX_SUBTABLE(fa) {
318 {0x29, "fa"},
319 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
320 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
321 };
322
323
324 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)325 ILCID_POSIX_SUBTABLE(fa_AF) {
326 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
327 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
328 };
329
ILCID_POSIX_SUBTABLE(ff)330 ILCID_POSIX_SUBTABLE(ff) {
331 {0x67, "ff"},
332 {0x7c67, "ff_Latn"},
333 {0x0867, "ff_Latn_SN"},
334 {0x0467, "ff_NG"}
335 };
336
337 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
338 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
339 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
340
ILCID_POSIX_SUBTABLE(fr)341 ILCID_POSIX_SUBTABLE(fr) {
342 {0x0c, "fr"},
343 {0x080c, "fr_BE"},
344 {0x0c0c, "fr_CA"},
345 {0x240c, "fr_CD"},
346 {0x240c, "fr_CG"},
347 {0x100c, "fr_CH"},
348 {0x300c, "fr_CI"},
349 {0x2c0c, "fr_CM"},
350 {0x040c, "fr_FR"},
351 {0x3c0c, "fr_HT"},
352 {0x140c, "fr_LU"},
353 {0x380c, "fr_MA"},
354 {0x180c, "fr_MC"},
355 {0x340c, "fr_ML"},
356 {0x200c, "fr_RE"},
357 {0x280c, "fr_SN"},
358 {0xe40c, "fr_015"},
359 {0x1c0c, "fr_029"}
360 };
361
362 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
363
364 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
365
ILCID_POSIX_SUBTABLE(ga)366 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
367 {0x3c, "ga"},
368 {0x083c, "ga_IE"},
369 {0x043c, "gd_GB"}
370 };
371
ILCID_POSIX_SUBTABLE(gd)372 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
373 {0x91, "gd"},
374 {0x0491, "gd_GB"}
375 };
376
377 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
378 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
379 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
380 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
381
ILCID_POSIX_SUBTABLE(ha)382 ILCID_POSIX_SUBTABLE(ha) {
383 {0x68, "ha"},
384 {0x7c68, "ha_Latn"},
385 {0x0468, "ha_Latn_NG"},
386 };
387
388 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
389 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
390 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
391
392 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)393 ILCID_POSIX_SUBTABLE(hr) {
394 {0x1a, "hr"},
395 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
396 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
397 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
398 {0x781a, "bs"}, /* Bosnian */
399 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
400 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
401 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
402 {0x041a, "hr_HR"}, /* Croatian*/
403 {0x2c1a, "sr_Latn_ME"},
404 {0x241a, "sr_Latn_RS"},
405 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
406 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
407 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
408 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
409 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
410 {0x301a, "sr_Cyrl_ME"},
411 {0x281a, "sr_Cyrl_RS"},
412 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
413 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
414 };
415
ILCID_POSIX_SUBTABLE(hsb)416 ILCID_POSIX_SUBTABLE(hsb) {
417 {0x2E, "hsb"},
418 {0x042E, "hsb_DE"},
419 {0x082E, "dsb_DE"},
420 {0x7C2E, "dsb"},
421 };
422
423 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
424 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
425
ILCID_POSIX_SUBTABLE(ibb)426 ILCID_POSIX_SUBTABLE(ibb) {
427 {0x69, "ibb"},
428 {0x0469, "ibb_NG"}
429 };
430
431 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
432 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
433 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
434 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
435
ILCID_POSIX_SUBTABLE(it)436 ILCID_POSIX_SUBTABLE(it) {
437 {0x10, "it"},
438 {0x0810, "it_CH"},
439 {0x0410, "it_IT"}
440 };
441
ILCID_POSIX_SUBTABLE(iu)442 ILCID_POSIX_SUBTABLE(iu) {
443 {0x5d, "iu"},
444 {0x045d, "iu_Cans_CA"},
445 {0x785d, "iu_Cans"},
446 {0x085d, "iu_Latn_CA"},
447 {0x7c5d, "iu_Latn"}
448 };
449
450 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
451 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
452 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
453 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
454 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
455 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
456 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
457
ILCID_POSIX_SUBTABLE(ko)458 ILCID_POSIX_SUBTABLE(ko) {
459 {0x12, "ko"},
460 {0x0812, "ko_KP"},
461 {0x0412, "ko_KR"}
462 };
463
464 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
465 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
466
ILCID_POSIX_SUBTABLE(ks)467 ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
468 {0x60, "ks"},
469 {0x0460, "ks_Arab_IN"},
470 {0x0860, "ks_Deva_IN"}
471 };
472
473 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
474
ILCID_POSIX_SUBTABLE(la)475 ILCID_POSIX_SUBTABLE(la) {
476 {0x76, "la"},
477 {0x0476, "la_001"},
478 {0x0476, "la_IT"} /*Left in for compatibility*/
479 };
480
481 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
482 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
483 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
484 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
485 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
486 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
487 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
488
ILCID_POSIX_SUBTABLE(mn)489 ILCID_POSIX_SUBTABLE(mn) {
490 {0x50, "mn"},
491 {0x0450, "mn_MN"},
492 {0x7c50, "mn_Mong"},
493 {0x0850, "mn_Mong_CN"},
494 {0x0850, "mn_CN"},
495 {0x7850, "mn_Cyrl"},
496 {0x0c50, "mn_Mong_MN"}
497 };
498
499 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
500 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
501 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
502
ILCID_POSIX_SUBTABLE(ms)503 ILCID_POSIX_SUBTABLE(ms) {
504 {0x3e, "ms"},
505 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
506 {0x043e, "ms_MY"} /* Malaysia*/
507 };
508
509 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
510 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
511
ILCID_POSIX_SUBTABLE(ne)512 ILCID_POSIX_SUBTABLE(ne) {
513 {0x61, "ne"},
514 {0x0861, "ne_IN"}, /* India*/
515 {0x0461, "ne_NP"} /* Nepal*/
516 };
517
ILCID_POSIX_SUBTABLE(nl)518 ILCID_POSIX_SUBTABLE(nl) {
519 {0x13, "nl"},
520 {0x0813, "nl_BE"},
521 {0x0413, "nl_NL"}
522 };
523
524 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
525 // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
ILCID_POSIX_SUBTABLE(no)526 ILCID_POSIX_SUBTABLE(no) {
527 {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
528 {0x7c14, "nb"}, /* really nb */
529 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
530 {0x0414, "no_NO"}, /* really nb_NO */
531 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
532 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
533 {0x0814, "no_NO_NY"}/* really nn_NO */
534 };
535
536 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
537 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
538
ILCID_POSIX_SUBTABLE(om)539 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
540 {0x72, "om"},
541 {0x0472, "om_ET"},
542 {0x0472, "gaz_ET"}
543 };
544
545 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)546 ILCID_POSIX_SUBTABLE(or_IN) {
547 {0x48, "or"},
548 {0x0448, "or_IN"},
549 };
550
ILCID_POSIX_SUBTABLE(pa)551 ILCID_POSIX_SUBTABLE(pa) {
552 {0x46, "pa"},
553 {0x0446, "pa_IN"},
554 {0x0846, "pa_Arab_PK"},
555 {0x0846, "pa_PK"}
556 };
557
ILCID_POSIX_SUBTABLE(pap)558 ILCID_POSIX_SUBTABLE(pap) {
559 {0x79, "pap"},
560 {0x0479, "pap_029"},
561 {0x0479, "pap_AN"} /*Left in for compatibility*/
562 };
563
564 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
565 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
566
ILCID_POSIX_SUBTABLE(pt)567 ILCID_POSIX_SUBTABLE(pt) {
568 {0x16, "pt"},
569 {0x0416, "pt_BR"},
570 {0x0816, "pt_PT"}
571 };
572
ILCID_POSIX_SUBTABLE(qu)573 ILCID_POSIX_SUBTABLE(qu) {
574 {0x6b, "qu"},
575 {0x046b, "qu_BO"},
576 {0x086b, "qu_EC"},
577 {0x0C6b, "qu_PE"},
578 {0x046b, "quz_BO"},
579 {0x086b, "quz_EC"},
580 {0x0C6b, "quz_PE"}
581 };
582
ILCID_POSIX_SUBTABLE(quc)583 ILCID_POSIX_SUBTABLE(quc) {
584 {0x93, "quc"},
585 {0x0493, "quc_CO"},
586 /*
587 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
588 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
589 under the group of "qut". "qut" is a retired ISO 639-3 language
590 code for West Central Quiche, and merged to "quc".
591 It looks Windows previously reserved "qut" for K'iche', but,
592 decided to use "quc" when adding a locale for K'iche' (Guatemala).
593
594 This data structure used here assumes language ID bits in
595 LCID is unique for alphabetic language code. But this is not true
596 for "quc_Latn_GT". If we don't have the data below, LCID look up
597 by alphabetic locale ID (POSIX) will fail. The same entry is found
598 under "qut" below, which is required for reverse look up.
599 */
600 {0x0486, "quc_Latn_GT"}
601 };
602
ILCID_POSIX_SUBTABLE(qut)603 ILCID_POSIX_SUBTABLE(qut) {
604 {0x86, "qut"},
605 {0x0486, "qut_GT"},
606 /*
607 See the note in "quc" above.
608 */
609 {0x0486, "quc_Latn_GT"}
610 };
611
612 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
613
ILCID_POSIX_SUBTABLE(ro)614 ILCID_POSIX_SUBTABLE(ro) {
615 {0x18, "ro"},
616 {0x0418, "ro_RO"},
617 {0x0818, "ro_MD"}
618 };
619
620 // TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT.
621 // More likely this is a similar concept to the Windows 0x7f Invariant locale ""
622 // (Except that it's not invariant in ICU)
ILCID_POSIX_SUBTABLE(root)623 ILCID_POSIX_SUBTABLE(root) {
624 {0x00, "root"}
625 };
626
ILCID_POSIX_SUBTABLE(ru)627 ILCID_POSIX_SUBTABLE(ru) {
628 {0x19, "ru"},
629 {0x0419, "ru_RU"},
630 {0x0819, "ru_MD"}
631 };
632
633 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
634 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
635 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
636
ILCID_POSIX_SUBTABLE(sd)637 ILCID_POSIX_SUBTABLE(sd) {
638 {0x59, "sd"},
639 {0x0459, "sd_Deva_IN"},
640 {0x0459, "sd_IN"},
641 {0x0859, "sd_Arab_PK"},
642 {0x0859, "sd_PK"},
643 {0x7c59, "sd_Arab"}
644 };
645
ILCID_POSIX_SUBTABLE(se)646 ILCID_POSIX_SUBTABLE(se) {
647 {0x3b, "se"},
648 {0x0c3b, "se_FI"},
649 {0x043b, "se_NO"},
650 {0x083b, "se_SE"},
651 {0x783b, "sma"},
652 {0x183b, "sma_NO"},
653 {0x1c3b, "sma_SE"},
654 {0x7c3b, "smj"},
655 {0x703b, "smn"},
656 {0x743b, "sms"},
657 {0x103b, "smj_NO"},
658 {0x143b, "smj_SE"},
659 {0x243b, "smn_FI"},
660 {0x203b, "sms_FI"},
661 };
662
663 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
664 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
665 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
666
ILCID_POSIX_SUBTABLE(so)667 ILCID_POSIX_SUBTABLE(so) {
668 {0x77, "so"},
669 {0x0477, "so_SO"}
670 };
671
672 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
673 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
674
ILCID_POSIX_SUBTABLE(sv)675 ILCID_POSIX_SUBTABLE(sv) {
676 {0x1d, "sv"},
677 {0x081d, "sv_FI"},
678 {0x041d, "sv_SE"}
679 };
680
681 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
682 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
683
ILCID_POSIX_SUBTABLE(ta)684 ILCID_POSIX_SUBTABLE(ta) {
685 {0x49, "ta"},
686 {0x0449, "ta_IN"},
687 {0x0849, "ta_LK"}
688 };
689
690 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
691
692 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)693 ILCID_POSIX_SUBTABLE(tg) {
694 {0x28, "tg"},
695 {0x7c28, "tg_Cyrl"},
696 {0x0428, "tg_Cyrl_TJ"}
697 };
698
699 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
700
ILCID_POSIX_SUBTABLE(ti)701 ILCID_POSIX_SUBTABLE(ti) {
702 {0x73, "ti"},
703 {0x0873, "ti_ER"},
704 {0x0473, "ti_ET"}
705 };
706
707 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
708
ILCID_POSIX_SUBTABLE(tn)709 ILCID_POSIX_SUBTABLE(tn) {
710 {0x32, "tn"},
711 {0x0832, "tn_BW"},
712 {0x0432, "tn_ZA"}
713 };
714
715 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
716 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
717 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
718
ILCID_POSIX_SUBTABLE(tzm)719 ILCID_POSIX_SUBTABLE(tzm) {
720 {0x5f, "tzm"},
721 {0x7c5f, "tzm_Latn"},
722 {0x085f, "tzm_Latn_DZ"},
723 {0x105f, "tzm_Tfng_MA"},
724 {0x045f, "tzm_Arab_MA"},
725 {0x045f, "tmz"}
726 };
727
ILCID_POSIX_SUBTABLE(ug)728 ILCID_POSIX_SUBTABLE(ug) {
729 {0x80, "ug"},
730 {0x0480, "ug_CN"},
731 {0x0480, "ug_Arab_CN"}
732 };
733
734 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
735
ILCID_POSIX_SUBTABLE(ur)736 ILCID_POSIX_SUBTABLE(ur) {
737 {0x20, "ur"},
738 {0x0820, "ur_IN"},
739 {0x0420, "ur_PK"}
740 };
741
ILCID_POSIX_SUBTABLE(uz)742 ILCID_POSIX_SUBTABLE(uz) {
743 {0x43, "uz"},
744 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
745 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
746 {0x0843, "uz_UZ"}, /* Cyrillic based */
747 {0x0443, "uz_Latn_UZ"}, /* Latin based */
748 {0x7c43, "uz_Latn"} /* Latin based */
749 };
750
ILCID_POSIX_SUBTABLE(ve)751 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
752 {0x33, "ve"},
753 {0x0433, "ve_ZA"},
754 {0x0433, "ven_ZA"}
755 };
756
757 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
758 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
759 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
760
ILCID_POSIX_SUBTABLE(yi)761 ILCID_POSIX_SUBTABLE(yi) {
762 {0x003d, "yi"},
763 {0x043d, "yi_001"}
764 };
765
766 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
767
768 // Windows & ICU tend to different names for some of these
769 // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
ILCID_POSIX_SUBTABLE(zh)770 ILCID_POSIX_SUBTABLE(zh) {
771 {0x0004, "zh_Hans"},
772 {0x7804, "zh"},
773 {0x0804, "zh_CN"},
774 {0x0804, "zh_Hans_CN"},
775 {0x0c04, "zh_Hant_HK"},
776 {0x0c04, "zh_HK"},
777 {0x1404, "zh_Hant_MO"},
778 {0x1404, "zh_MO"},
779 {0x1004, "zh_Hans_SG"},
780 {0x1004, "zh_SG"},
781 {0x0404, "zh_Hant_TW"},
782 {0x7c04, "zh_Hant"},
783 {0x0404, "zh_TW"},
784 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
785 {0x30404,"zh_TW"}, /* Bopomofo order */
786 {0x20004,"zh@collation=stroke"},
787 {0x20404,"zh_Hant@collation=stroke"},
788 {0x20404,"zh_Hant_TW@collation=stroke"},
789 {0x20404,"zh_TW@collation=stroke"},
790 {0x20804,"zh_Hans@collation=stroke"},
791 {0x20804,"zh_Hans_CN@collation=stroke"},
792 {0x20804,"zh_CN@collation=stroke"}
793 // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
794 };
795
796 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
797
798 /* This must be static and grouped by LCID. */
799 static const ILcidPosixMap gPosixIDmap[] = {
800 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
801 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
802 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
803 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
804 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
805 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
806 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
807 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
808 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
809 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
810 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
811 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
812 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
813 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
814 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
815 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
816 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
817 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
818 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
819 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
820 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
821 ILCID_POSIX_MAP(de), /* de German 0x07 */
822 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
823 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
824 ILCID_POSIX_MAP(en), /* en English 0x09 */
825 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
826 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
827 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
828 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
829 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
830 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
831 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
832 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
833 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
834 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
835 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
836 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
837 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
838 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
839 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
840 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
841 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
842 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
843 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
844 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
845 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
846 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
847 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
848 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
849 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
850 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
851 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
852 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
853 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
854 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
855 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
856 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
857 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
858 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
859 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
860 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
861 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
862 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
863 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
864 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
865 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
866 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
867 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
868 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
869 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
870 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
871 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
872 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
873 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
874 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
875 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
876 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
877 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
878 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
879 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
880 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
881 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
882 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
883 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
884 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
885 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
886 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
887 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
888 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
889 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
890 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
891 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
892 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
893 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
894 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
895 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
896 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
897 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
898 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
899 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
900 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
901 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
902 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
903 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
904 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
905 ILCID_POSIX_MAP(root), /* root 0x00 */
906 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
907 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
908 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
909 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
910 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
911 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
912 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
913 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
914 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
915 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
916 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
917 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
918 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
919 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
920 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
921 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
922 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
923 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
924 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
925 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
926 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
927 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
928 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
929 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
930 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
931 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
932 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
933 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
934 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
935 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
936 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
937 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
938 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
939 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
940 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
941 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
942 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
943 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
944 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
945 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
946 };
947
948 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
949
950 /**
951 * Do not call this function. It is called by hostID.
952 * The function is not private because this struct must stay as a C struct,
953 * and this is an internal class.
954 */
955 static int32_t
idCmp(const char * id1,const char * id2)956 idCmp(const char* id1, const char* id2)
957 {
958 int32_t diffIdx = 0;
959 while (*id1 == *id2 && *id1 != 0) {
960 diffIdx++;
961 id1++;
962 id2++;
963 }
964 return diffIdx;
965 }
966
967 /**
968 * Searches for a Windows LCID
969 *
970 * @param posixID the Posix style locale id.
971 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
972 * no equivalent Windows LCID.
973 * @return the LCID
974 */
975 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)976 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
977 {
978 int32_t bestIdx = 0;
979 int32_t bestIdxDiff = 0;
980 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
981 uint32_t idx;
982
983 for (idx = 0; idx < this_0->numRegions; idx++ ) {
984 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
985 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
986 if (posixIDlen == sameChars) {
987 /* Exact match */
988 return this_0->regionMaps[idx].hostID;
989 }
990 bestIdxDiff = sameChars;
991 bestIdx = idx;
992 }
993 }
994 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
995 /* We also have to make sure that sid and si and similar string subsets don't match. */
996 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
997 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
998 {
999 *status = U_USING_FALLBACK_WARNING;
1000 return this_0->regionMaps[bestIdx].hostID;
1001 }
1002
1003 /*no match found */
1004 *status = U_ILLEGAL_ARGUMENT_ERROR;
1005 return this_0->regionMaps->hostID;
1006 }
1007
1008 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)1009 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1010 {
1011 uint32_t i;
1012 for (i = 0; i < this_0->numRegions; i++)
1013 {
1014 if (this_0->regionMaps[i].hostID == hostID)
1015 {
1016 return this_0->regionMaps[i].posixID;
1017 }
1018 }
1019
1020 /* If you get here, then no matching region was found,
1021 so return the language id with the wild card region. */
1022 return this_0->regionMaps[0].posixID;
1023 }
1024
1025 /*
1026 //////////////////////////////////////
1027 //
1028 // LCID --> POSIX
1029 //
1030 /////////////////////////////////////
1031 */
1032 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1033 /*
1034 * Various language tags needs to be changed:
1035 * quz -> qu
1036 * prs -> fa
1037 */
1038 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1039 if (len >= 3) { \
1040 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1041 buffer[2] = 0; \
1042 uprv_strcat(buffer, buffer+3); \
1043 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1044 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1045 uprv_strcat(buffer, buffer+3); \
1046 } \
1047 }
1048
1049 #endif
1050
1051 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1052 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1053 {
1054 uint16_t langID;
1055 uint32_t localeIndex;
1056 UBool bLookup = TRUE;
1057 const char *pPosixID = NULL;
1058
1059 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1060 static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
1061
1062 char locName[LOCALE_NAME_MAX_LENGTH] = {};
1063
1064 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1065 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1066 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1067 // use the Windows API to resolve locale ID for this specific case.
1068 if ((hostid & 0x3FF) != 0x92) {
1069 int32_t tmpLen = 0;
1070 char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
1071
1072 // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1073 tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1074 if (tmpLen > 1) {
1075 int32_t i = 0;
1076 // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1077 bLookup = FALSE;
1078 for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1079 {
1080 locName[i] = (char)(windowsLocaleName[i]);
1081
1082 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1083 // In such cases, we need special mapping data found in the hardcoded table
1084 // in this source file.
1085 if (windowsLocaleName[i] == L'_')
1086 {
1087 // Keep the base locale, without variant
1088 // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1089 locName[i] = '\0';
1090 tmpLen = i;
1091 bLookup = TRUE;
1092 break;
1093 }
1094 else if (windowsLocaleName[i] == L'-')
1095 {
1096 // Windows names use -, ICU uses _
1097 locName[i] = '_';
1098 }
1099 else if (windowsLocaleName[i] == L'\0')
1100 {
1101 // No point in doing more work than necessary
1102 break;
1103 }
1104 }
1105 // TODO: Need to understand this better, why isn't it an alias?
1106 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1107 pPosixID = locName;
1108 }
1109 }
1110 #endif
1111
1112 if (bLookup) {
1113 const char *pCandidate = NULL;
1114 langID = LANGUAGE_LCID(hostid);
1115
1116 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1117 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1118 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1119 break;
1120 }
1121 }
1122
1123 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1124 If a match in the hardcoded table is longer than the Windows locale name without
1125 variant, we use the one as the result */
1126 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1127 pPosixID = pCandidate;
1128 }
1129 }
1130
1131 if (pPosixID) {
1132 int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
1133 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1134 uprv_memcpy(posixID, pPosixID, copyLen);
1135 if (resLen < posixIDCapacity) {
1136 posixID[resLen] = 0;
1137 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1138 *status = U_ZERO_ERROR;
1139 }
1140 } else if (resLen == posixIDCapacity) {
1141 *status = U_STRING_NOT_TERMINATED_WARNING;
1142 } else {
1143 *status = U_BUFFER_OVERFLOW_ERROR;
1144 }
1145 return resLen;
1146 }
1147
1148 /* no match found */
1149 *status = U_ILLEGAL_ARGUMENT_ERROR;
1150 return -1;
1151 }
1152
1153 /*
1154 //////////////////////////////////////
1155 //
1156 // POSIX --> LCID
1157 // This should only be called from uloc_getLCID.
1158 // The locale ID must be in canonical form.
1159 //
1160 /////////////////////////////////////
1161 */
1162 U_CAPI uint32_t
uprv_convertToLCIDPlatform(const char * localeID,UErrorCode * status)1163 uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
1164 {
1165 if (U_FAILURE(*status)) {
1166 return 0;
1167 }
1168
1169 // The purpose of this function is to leverage the Windows platform name->lcid
1170 // conversion functionality when available.
1171 #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1172 int32_t len;
1173 char baseName[ULOC_FULLNAME_CAPACITY] = {};
1174 const char * mylocaleID = localeID;
1175
1176 // Check any for keywords.
1177 if (uprv_strchr(localeID, '@'))
1178 {
1179 icu::CharString collVal;
1180 {
1181 icu::CharStringByteSink sink(&collVal);
1182 ulocimp_getKeywordValue(localeID, "collation", sink, status);
1183 }
1184 if (U_SUCCESS(*status) && !collVal.isEmpty())
1185 {
1186 // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1187 return 0;
1188 }
1189 else
1190 {
1191 // If the locale ID contains keywords other than collation, just use the base name.
1192 len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
1193
1194 if (U_SUCCESS(*status) && len > 0)
1195 {
1196 baseName[len] = 0;
1197 mylocaleID = baseName;
1198 }
1199 }
1200 }
1201
1202 char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1203 // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1204 (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status);
1205
1206 if (U_SUCCESS(*status))
1207 {
1208 // Need it to be UTF-16, not 8-bit
1209 wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1210 int32_t i;
1211 for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1212 {
1213 if (asciiBCP47Tag[i] == '\0')
1214 {
1215 break;
1216 }
1217 else
1218 {
1219 // Copy the character
1220 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1221 }
1222 }
1223
1224 if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1225 {
1226 // Ensure it's null terminated
1227 bcp47Tag[i] = L'\0';
1228 LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
1229 if (lcid > 0)
1230 {
1231 // Found LCID from windows, return that one, unless its completely ambiguous
1232 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1233 // for this process.
1234 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1235 {
1236 return lcid;
1237 }
1238 }
1239 }
1240 }
1241 #else
1242 (void) localeID; // Suppress unused variable warning.
1243 #endif
1244
1245 // Nothing found, or not implemented.
1246 return 0;
1247 }
1248
1249 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1250 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1251 {
1252 // This function does the table lookup when native platform name->lcid conversion isn't available,
1253 // or for locales that don't follow patterns the platform expects.
1254 uint32_t low = 0;
1255 uint32_t high = gLocaleCount;
1256 uint32_t mid;
1257 uint32_t oldmid = 0;
1258 int32_t compVal;
1259
1260 uint32_t value = 0;
1261 uint32_t fallbackValue = (uint32_t)-1;
1262 UErrorCode myStatus;
1263 uint32_t idx;
1264
1265 /* Check for incomplete id. */
1266 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1267 return 0;
1268 }
1269
1270 /*Binary search for the map entry for normal cases */
1271
1272 while (high > low) /*binary search*/{
1273
1274 mid = (high+low) >> 1; /*Finds median*/
1275
1276 if (mid == oldmid)
1277 break;
1278
1279 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1280 if (compVal < 0){
1281 high = mid;
1282 }
1283 else if (compVal > 0){
1284 low = mid;
1285 }
1286 else /*we found it*/{
1287 return getHostID(&gPosixIDmap[mid], posixID, status);
1288 }
1289 oldmid = mid;
1290 }
1291
1292 /*
1293 * Sometimes we can't do a binary search on posixID because some LCIDs
1294 * go to different locales. We hit one of those special cases.
1295 */
1296 for (idx = 0; idx < gLocaleCount; idx++ ) {
1297 myStatus = U_ZERO_ERROR;
1298 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1299 if (myStatus == U_ZERO_ERROR) {
1300 return value;
1301 }
1302 else if (myStatus == U_USING_FALLBACK_WARNING) {
1303 fallbackValue = value;
1304 }
1305 }
1306
1307 if (fallbackValue != (uint32_t)-1) {
1308 *status = U_USING_FALLBACK_WARNING;
1309 return fallbackValue;
1310 }
1311
1312 /* no match found */
1313 *status = U_ILLEGAL_ARGUMENT_ERROR;
1314 return 0; /* return international (root) */
1315 }
1316