1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2014-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 */
9 #include <algorithm>
10 
11 #include "unicode/utypes.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uobject.h"
14 
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "uassert.h"
19 #include "ucln_cmn.h"
20 #include "uhash.h"
21 #include "umutex.h"
22 #include "uresimp.h"
23 #include "uvector.h"
24 #include "udataswp.h" /* for InvChar functions */
25 
26 static UHashtable* gLocExtKeyMap = NULL;
27 static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
28 
29 // bit flags for special types
30 typedef enum {
31     SPECIALTYPE_NONE = 0,
32     SPECIALTYPE_CODEPOINTS = 1,
33     SPECIALTYPE_REORDER_CODE = 2,
34     SPECIALTYPE_RG_KEY_VALUE = 4
35 } SpecialType;
36 
37 struct LocExtKeyData : public icu::UMemory {
38     const char*     legacyId;
39     const char*     bcpId;
40     icu::LocalUHashtablePointer typeMap;
41     uint32_t        specialTypes;
42 };
43 
44 struct LocExtType : public icu::UMemory {
45     const char*     legacyId;
46     const char*     bcpId;
47 };
48 
49 static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL;
50 static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL;
51 static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL;
52 
53 U_CDECL_BEGIN
54 
55 static UBool U_CALLCONV
uloc_key_type_cleanup(void)56 uloc_key_type_cleanup(void) {
57     if (gLocExtKeyMap != NULL) {
58         uhash_close(gLocExtKeyMap);
59         gLocExtKeyMap = NULL;
60     }
61 
62     delete gLocExtKeyDataEntries;
63     gLocExtKeyDataEntries = NULL;
64 
65     delete gLocExtTypeEntries;
66     gLocExtTypeEntries = NULL;
67 
68     delete gKeyTypeStringPool;
69     gKeyTypeStringPool = NULL;
70 
71     gLocExtKeyMapInitOnce.reset();
72     return TRUE;
73 }
74 
75 U_CDECL_END
76 
77 
78 static void U_CALLCONV
initFromResourceBundle(UErrorCode & sts)79 initFromResourceBundle(UErrorCode& sts) {
80     U_NAMESPACE_USE
81     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
82 
83     gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
84 
85     LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts));
86     LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts));
87     LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts));
88 
89     if (U_FAILURE(sts)) {
90         return;
91     }
92 
93     UErrorCode tmpSts = U_ZERO_ERROR;
94     LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts));
95     tmpSts = U_ZERO_ERROR;
96     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts));
97 
98     // initialize pools storing dynamically allocated objects
99     gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
100     if (gKeyTypeStringPool == NULL) {
101         sts = U_MEMORY_ALLOCATION_ERROR;
102         return;
103     }
104     gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
105     if (gLocExtKeyDataEntries == NULL) {
106         sts = U_MEMORY_ALLOCATION_ERROR;
107         return;
108     }
109     gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
110     if (gLocExtTypeEntries == NULL) {
111         sts = U_MEMORY_ALLOCATION_ERROR;
112         return;
113     }
114 
115     // iterate through keyMap resource
116     LocalUResourceBundlePointer keyMapEntry;
117 
118     while (ures_hasNext(keyMapRes.getAlias())) {
119         keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
120         if (U_FAILURE(sts)) {
121             break;
122         }
123         const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
124         UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
125         if (U_FAILURE(sts)) {
126             break;
127         }
128 
129         // empty value indicates that BCP key is same with the legacy key.
130         const char* bcpKeyId = legacyKeyId;
131         if (!uBcpKeyId.isEmpty()) {
132             icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
133             if (bcpKeyIdBuf == NULL) {
134                 sts = U_MEMORY_ALLOCATION_ERROR;
135                 break;
136             }
137             bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
138             if (U_FAILURE(sts)) {
139                 break;
140             }
141             bcpKeyId = bcpKeyIdBuf->data();
142         }
143 
144         UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
145 
146         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
147         if (U_FAILURE(sts)) {
148             break;
149         }
150         uint32_t specialTypes = SPECIALTYPE_NONE;
151 
152         LocalUResourceBundlePointer typeAliasResByKey;
153         LocalUResourceBundlePointer bcpTypeAliasResByKey;
154 
155         if (typeAliasRes.isValid()) {
156             tmpSts = U_ZERO_ERROR;
157             typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts));
158             if (U_FAILURE(tmpSts)) {
159                 typeAliasResByKey.orphan();
160             }
161         }
162         if (bcpTypeAliasRes.isValid()) {
163             tmpSts = U_ZERO_ERROR;
164             bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts));
165             if (U_FAILURE(tmpSts)) {
166                 bcpTypeAliasResByKey.orphan();
167             }
168         }
169 
170         // look up type map for the key, and walk through the mapping data
171         tmpSts = U_ZERO_ERROR;
172         LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
173         if (U_FAILURE(tmpSts)) {
174             // type map for each key must exist
175             UPRV_UNREACHABLE;
176         } else {
177             LocalUResourceBundlePointer typeMapEntry;
178 
179             while (ures_hasNext(typeMapResByKey.getAlias())) {
180                 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
181                 if (U_FAILURE(sts)) {
182                     break;
183                 }
184                 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
185 
186                 // special types
187                 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
188                     specialTypes |= SPECIALTYPE_CODEPOINTS;
189                     continue;
190                 }
191                 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
192                     specialTypes |= SPECIALTYPE_REORDER_CODE;
193                     continue;
194                 }
195                 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
196                     specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
197                     continue;
198                 }
199 
200                 if (isTZ) {
201                     // a timezone key uses a colon instead of a slash in the resource.
202                     // e.g. America:Los_Angeles
203                     if (uprv_strchr(legacyTypeId, ':') != NULL) {
204                         icu::CharString* legacyTypeIdBuf =
205                                 gKeyTypeStringPool->create(legacyTypeId, sts);
206                         if (legacyTypeIdBuf == NULL) {
207                             sts = U_MEMORY_ALLOCATION_ERROR;
208                             break;
209                         }
210                         if (U_FAILURE(sts)) {
211                             break;
212                         }
213                         std::replace(
214                                 legacyTypeIdBuf->data(),
215                                 legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
216                                 ':', '/');
217                         legacyTypeId = legacyTypeIdBuf->data();
218                     }
219                 }
220 
221                 UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
222                 if (U_FAILURE(sts)) {
223                     break;
224                 }
225 
226                 // empty value indicates that BCP type is same with the legacy type.
227                 const char* bcpTypeId = legacyTypeId;
228                 if (!uBcpTypeId.isEmpty()) {
229                     icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
230                     if (bcpTypeIdBuf == NULL) {
231                         sts = U_MEMORY_ALLOCATION_ERROR;
232                         break;
233                     }
234                     bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
235                     if (U_FAILURE(sts)) {
236                         break;
237                     }
238                     bcpTypeId = bcpTypeIdBuf->data();
239                 }
240 
241                 // Note: legacy type value should never be
242                 // equivalent to bcp type value of a different
243                 // type under the same key. So we use a single
244                 // map for lookup.
245                 LocExtType* t = gLocExtTypeEntries->create();
246                 if (t == NULL) {
247                     sts = U_MEMORY_ALLOCATION_ERROR;
248                     break;
249                 }
250                 t->bcpId = bcpTypeId;
251                 t->legacyId = legacyTypeId;
252 
253                 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
254                 if (bcpTypeId != legacyTypeId) {
255                     // different type value
256                     uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
257                 }
258                 if (U_FAILURE(sts)) {
259                     break;
260                 }
261 
262                 // also put aliases in the map
263                 if (typeAliasResByKey.isValid()) {
264                     LocalUResourceBundlePointer typeAliasDataEntry;
265 
266                     ures_resetIterator(typeAliasResByKey.getAlias());
267                     while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
268                         int32_t toLen;
269                         typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
270                         const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
271                         if (U_FAILURE(sts)) {
272                             break;
273                         }
274                         // check if this is an alias of canoncal legacy type
275                         if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
276                             const char* from = ures_getKey(typeAliasDataEntry.getAlias());
277                             if (isTZ) {
278                                 // replace colon with slash if necessary
279                                 if (uprv_strchr(from, ':') != NULL) {
280                                     icu::CharString* fromBuf =
281                                             gKeyTypeStringPool->create(from, sts);
282                                     if (fromBuf == NULL) {
283                                         sts = U_MEMORY_ALLOCATION_ERROR;
284                                         break;
285                                     }
286                                     if (U_FAILURE(sts)) {
287                                         break;
288                                     }
289                                     std::replace(
290                                             fromBuf->data(),
291                                             fromBuf->data() + fromBuf->length(),
292                                             ':', '/');
293                                     from = fromBuf->data();
294                                 }
295                             }
296                             uhash_put(typeDataMap, (void*)from, t, &sts);
297                         }
298                     }
299                     if (U_FAILURE(sts)) {
300                         break;
301                     }
302                 }
303 
304                 if (bcpTypeAliasResByKey.isValid()) {
305                     LocalUResourceBundlePointer bcpTypeAliasDataEntry;
306 
307                     ures_resetIterator(bcpTypeAliasResByKey.getAlias());
308                     while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
309                         int32_t toLen;
310                         bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
311                         const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
312                         if (U_FAILURE(sts)) {
313                             break;
314                         }
315                         // check if this is an alias of bcp type
316                         if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) {
317                             const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
318                             uhash_put(typeDataMap, (void*)from, t, &sts);
319                         }
320                     }
321                     if (U_FAILURE(sts)) {
322                         break;
323                     }
324                 }
325             }
326         }
327         if (U_FAILURE(sts)) {
328             break;
329         }
330 
331         LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
332         if (keyData == NULL) {
333             sts = U_MEMORY_ALLOCATION_ERROR;
334             break;
335         }
336         keyData->bcpId = bcpKeyId;
337         keyData->legacyId = legacyKeyId;
338         keyData->specialTypes = specialTypes;
339         keyData->typeMap.adoptInstead(typeDataMap);
340 
341         uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
342         if (legacyKeyId != bcpKeyId) {
343             // different key value
344             uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
345         }
346         if (U_FAILURE(sts)) {
347             break;
348         }
349     }
350 }
351 
352 static UBool
init()353 init() {
354     UErrorCode sts = U_ZERO_ERROR;
355     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
356     if (U_FAILURE(sts)) {
357         return FALSE;
358     }
359     return TRUE;
360 }
361 
362 static UBool
isSpecialTypeCodepoints(const char * val)363 isSpecialTypeCodepoints(const char* val) {
364     int32_t subtagLen = 0;
365     const char* p = val;
366     while (*p) {
367         if (*p == '-') {
368             if (subtagLen < 4 || subtagLen > 6) {
369                 return FALSE;
370             }
371             subtagLen = 0;
372         } else if ((*p >= '0' && *p <= '9') ||
373                     (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
374                     (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
375             subtagLen++;
376         } else {
377             return FALSE;
378         }
379         p++;
380     }
381     return (subtagLen >= 4 && subtagLen <= 6);
382 }
383 
384 static UBool
isSpecialTypeReorderCode(const char * val)385 isSpecialTypeReorderCode(const char* val) {
386     int32_t subtagLen = 0;
387     const char* p = val;
388     while (*p) {
389         if (*p == '-') {
390             if (subtagLen < 3 || subtagLen > 8) {
391                 return FALSE;
392             }
393             subtagLen = 0;
394         } else if (uprv_isASCIILetter(*p)) {
395             subtagLen++;
396         } else {
397             return FALSE;
398         }
399         p++;
400     }
401     return (subtagLen >=3 && subtagLen <=8);
402 }
403 
404 static UBool
isSpecialTypeRgKeyValue(const char * val)405 isSpecialTypeRgKeyValue(const char* val) {
406     int32_t subtagLen = 0;
407     const char* p = val;
408     while (*p) {
409         if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
410                     (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
411             subtagLen++;
412         } else {
413             return FALSE;
414         }
415         p++;
416     }
417     return (subtagLen == 6);
418 }
419 
420 U_CFUNC const char*
ulocimp_toBcpKey(const char * key)421 ulocimp_toBcpKey(const char* key) {
422     if (!init()) {
423         return NULL;
424     }
425 
426     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
427     if (keyData != NULL) {
428         return keyData->bcpId;
429     }
430     return NULL;
431 }
432 
433 U_CFUNC const char*
ulocimp_toLegacyKey(const char * key)434 ulocimp_toLegacyKey(const char* key) {
435     if (!init()) {
436         return NULL;
437     }
438 
439     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
440     if (keyData != NULL) {
441         return keyData->legacyId;
442     }
443     return NULL;
444 }
445 
446 U_CFUNC const char*
ulocimp_toBcpType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)447 ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
448     if (isKnownKey != NULL) {
449         *isKnownKey = FALSE;
450     }
451     if (isSpecialType != NULL) {
452         *isSpecialType = FALSE;
453     }
454 
455     if (!init()) {
456         return NULL;
457     }
458 
459     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
460     if (keyData != NULL) {
461         if (isKnownKey != NULL) {
462             *isKnownKey = TRUE;
463         }
464         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
465         if (t != NULL) {
466             return t->bcpId;
467         }
468         if (keyData->specialTypes != SPECIALTYPE_NONE) {
469             UBool matched = FALSE;
470             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
471                 matched = isSpecialTypeCodepoints(type);
472             }
473             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
474                 matched = isSpecialTypeReorderCode(type);
475             }
476             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
477                 matched = isSpecialTypeRgKeyValue(type);
478             }
479             if (matched) {
480                 if (isSpecialType != NULL) {
481                     *isSpecialType = TRUE;
482                 }
483                 return type;
484             }
485         }
486     }
487     return NULL;
488 }
489 
490 
491 U_CFUNC const char*
ulocimp_toLegacyType(const char * key,const char * type,UBool * isKnownKey,UBool * isSpecialType)492 ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
493     if (isKnownKey != NULL) {
494         *isKnownKey = FALSE;
495     }
496     if (isSpecialType != NULL) {
497         *isSpecialType = FALSE;
498     }
499 
500     if (!init()) {
501         return NULL;
502     }
503 
504     LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
505     if (keyData != NULL) {
506         if (isKnownKey != NULL) {
507             *isKnownKey = TRUE;
508         }
509         LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
510         if (t != NULL) {
511             return t->legacyId;
512         }
513         if (keyData->specialTypes != SPECIALTYPE_NONE) {
514             UBool matched = FALSE;
515             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
516                 matched = isSpecialTypeCodepoints(type);
517             }
518             if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
519                 matched = isSpecialTypeReorderCode(type);
520             }
521             if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
522                 matched = isSpecialTypeRgKeyValue(type);
523             }
524             if (matched) {
525                 if (isSpecialType != NULL) {
526                     *isSpecialType = TRUE;
527                 }
528                 return type;
529             }
530         }
531     }
532     return NULL;
533 }
534 
535