1 /*
2 **********************************************************************
3 *   Copyright (C) 2009-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 */
7 
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18 #include "uassert.h"
19 
20 /* struct holding a single variant */
21 typedef struct VariantListEntry {
22     const char              *variant;
23     struct VariantListEntry *next;
24 } VariantListEntry;
25 
26 /* struct holding a single attribute value */
27 typedef struct AttributeListEntry {
28     const char              *attribute;
29     struct AttributeListEntry *next;
30 } AttributeListEntry;
31 
32 /* struct holding a single extension */
33 typedef struct ExtensionListEntry {
34     const char                  *key;
35     const char                  *value;
36     struct ExtensionListEntry   *next;
37 } ExtensionListEntry;
38 
39 #define MAXEXTLANG 3
40 typedef struct ULanguageTag {
41     char                *buf;   /* holding parsed subtags */
42     const char          *language;
43     const char          *extlang[MAXEXTLANG];
44     const char          *script;
45     const char          *region;
46     VariantListEntry    *variants;
47     ExtensionListEntry  *extensions;
48     const char          *privateuse;
49     const char          *grandfathered;
50 } ULanguageTag;
51 
52 #define MINLEN 2
53 #define SEP '-'
54 #define PRIVATEUSE 'x'
55 #define LDMLEXT 'u'
56 
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
61 
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
64 
65 static const char EMPTY[] = "";
66 static const char LANG_UND[] = "und";
67 static const char PRIVATEUSE_KEY[] = "x";
68 static const char _POSIX[] = "_POSIX";
69 static const char POSIX_KEY[] = "va";
70 static const char POSIX_VALUE[] = "posix";
71 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
72 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
73 static const char LOCALE_TYPE_YES[] = "yes";
74 
75 #define LANG_UND_LEN 3
76 
77 static const char* const GRANDFATHERED[] = {
78 /*  grandfathered   preferred */
79     "art-lojban",   "jbo",
80     "cel-gaulish",  "xtg-x-cel-gaulish",
81     "en-GB-oed",    "en-GB-x-oed",
82     "i-ami",        "ami",
83     "i-bnn",        "bnn",
84     "i-default",    "en-x-i-default",
85     "i-enochian",   "und-x-i-enochian",
86     "i-hak",        "hak",
87     "i-klingon",    "tlh",
88     "i-lux",        "lb",
89     "i-mingo",      "see-x-i-mingo",
90     "i-navajo",     "nv",
91     "i-pwn",        "pwn",
92     "i-tao",        "tao",
93     "i-tay",        "tay",
94     "i-tsu",        "tsu",
95     "no-bok",       "nb",
96     "no-nyn",       "nn",
97     "sgn-be-fr",    "sfb",
98     "sgn-be-nl",    "vgt",
99     "sgn-ch-de",    "sgg",
100     "zh-guoyu",     "cmn",
101     "zh-hakka",     "hak",
102     "zh-min",       "nan-x-zh-min",
103     "zh-min-nan",   "nan",
104     "zh-xiang",     "hsn",
105     NULL,           NULL
106 };
107 
108 static const char DEPRECATEDLANGS[][4] = {
109 /*  deprecated  new */
110     "iw",       "he",
111     "ji",       "yi",
112     "in",       "id"
113 };
114 
115 /*
116 * -------------------------------------------------
117 *
118 * These ultag_ functions may be exposed as APIs later
119 *
120 * -------------------------------------------------
121 */
122 
123 static ULanguageTag*
124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
125 
126 static void
127 ultag_close(ULanguageTag* langtag);
128 
129 static const char*
130 ultag_getLanguage(const ULanguageTag* langtag);
131 
132 #if 0
133 static const char*
134 ultag_getJDKLanguage(const ULanguageTag* langtag);
135 #endif
136 
137 static const char*
138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
139 
140 static int32_t
141 ultag_getExtlangSize(const ULanguageTag* langtag);
142 
143 static const char*
144 ultag_getScript(const ULanguageTag* langtag);
145 
146 static const char*
147 ultag_getRegion(const ULanguageTag* langtag);
148 
149 static const char*
150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
151 
152 static int32_t
153 ultag_getVariantsSize(const ULanguageTag* langtag);
154 
155 static const char*
156 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
157 
158 static const char*
159 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
160 
161 static int32_t
162 ultag_getExtensionsSize(const ULanguageTag* langtag);
163 
164 static const char*
165 ultag_getPrivateUse(const ULanguageTag* langtag);
166 
167 #if 0
168 static const char*
169 ultag_getGrandfathered(const ULanguageTag* langtag);
170 #endif
171 
172 /*
173 * -------------------------------------------------
174 *
175 * Language subtag syntax validation functions
176 *
177 * -------------------------------------------------
178 */
179 
180 static UBool
_isAlphaString(const char * s,int32_t len)181 _isAlphaString(const char* s, int32_t len) {
182     int32_t i;
183     for (i = 0; i < len; i++) {
184         if (!ISALPHA(*(s + i))) {
185             return FALSE;
186         }
187     }
188     return TRUE;
189 }
190 
191 static UBool
_isNumericString(const char * s,int32_t len)192 _isNumericString(const char* s, int32_t len) {
193     int32_t i;
194     for (i = 0; i < len; i++) {
195         if (!ISNUMERIC(*(s + i))) {
196             return FALSE;
197         }
198     }
199     return TRUE;
200 }
201 
202 static UBool
_isAlphaNumericString(const char * s,int32_t len)203 _isAlphaNumericString(const char* s, int32_t len) {
204     int32_t i;
205     for (i = 0; i < len; i++) {
206         if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
207             return FALSE;
208         }
209     }
210     return TRUE;
211 }
212 
213 static UBool
_isLanguageSubtag(const char * s,int32_t len)214 _isLanguageSubtag(const char* s, int32_t len) {
215     /*
216      * language      = 2*3ALPHA            ; shortest ISO 639 code
217      *                 ["-" extlang]       ; sometimes followed by
218      *                                     ;   extended language subtags
219      *               / 4ALPHA              ; or reserved for future use
220      *               / 5*8ALPHA            ; or registered language subtag
221      */
222     if (len < 0) {
223         len = (int32_t)uprv_strlen(s);
224     }
225     if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
226         return TRUE;
227     }
228     return FALSE;
229 }
230 
231 static UBool
_isExtlangSubtag(const char * s,int32_t len)232 _isExtlangSubtag(const char* s, int32_t len) {
233     /*
234      * extlang       = 3ALPHA              ; selected ISO 639 codes
235      *                 *2("-" 3ALPHA)      ; permanently reserved
236      */
237     if (len < 0) {
238         len = (int32_t)uprv_strlen(s);
239     }
240     if (len == 3 && _isAlphaString(s, len)) {
241         return TRUE;
242     }
243     return FALSE;
244 }
245 
246 static UBool
_isScriptSubtag(const char * s,int32_t len)247 _isScriptSubtag(const char* s, int32_t len) {
248     /*
249      * script        = 4ALPHA              ; ISO 15924 code
250      */
251     if (len < 0) {
252         len = (int32_t)uprv_strlen(s);
253     }
254     if (len == 4 && _isAlphaString(s, len)) {
255         return TRUE;
256     }
257     return FALSE;
258 }
259 
260 static UBool
_isRegionSubtag(const char * s,int32_t len)261 _isRegionSubtag(const char* s, int32_t len) {
262     /*
263      * region        = 2ALPHA              ; ISO 3166-1 code
264      *               / 3DIGIT              ; UN M.49 code
265      */
266     if (len < 0) {
267         len = (int32_t)uprv_strlen(s);
268     }
269     if (len == 2 && _isAlphaString(s, len)) {
270         return TRUE;
271     }
272     if (len == 3 && _isNumericString(s, len)) {
273         return TRUE;
274     }
275     return FALSE;
276 }
277 
278 static UBool
_isVariantSubtag(const char * s,int32_t len)279 _isVariantSubtag(const char* s, int32_t len) {
280     /*
281      * variant       = 5*8alphanum         ; registered variants
282      *               / (DIGIT 3alphanum)
283      */
284     if (len < 0) {
285         len = (int32_t)uprv_strlen(s);
286     }
287     if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
288         return TRUE;
289     }
290     if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
291         return TRUE;
292     }
293     return FALSE;
294 }
295 
296 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)297 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
298     /*
299      * variant       = 1*8alphanum         ; registered variants
300      *               / (DIGIT 3alphanum)
301      */
302     if (len < 0) {
303         len = (int32_t)uprv_strlen(s);
304     }
305     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
306         return TRUE;
307     }
308     return FALSE;
309 }
310 
311 static UBool
_isExtensionSingleton(const char * s,int32_t len)312 _isExtensionSingleton(const char* s, int32_t len) {
313     /*
314      * extension     = singleton 1*("-" (2*8alphanum))
315      */
316     if (len < 0) {
317         len = (int32_t)uprv_strlen(s);
318     }
319     if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
320         return TRUE;
321     }
322     return FALSE;
323 }
324 
325 static UBool
_isExtensionSubtag(const char * s,int32_t len)326 _isExtensionSubtag(const char* s, int32_t len) {
327     /*
328      * extension     = singleton 1*("-" (2*8alphanum))
329      */
330     if (len < 0) {
331         len = (int32_t)uprv_strlen(s);
332     }
333     if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
334         return TRUE;
335     }
336     return FALSE;
337 }
338 
339 static UBool
_isExtensionSubtags(const char * s,int32_t len)340 _isExtensionSubtags(const char* s, int32_t len) {
341     const char *p = s;
342     const char *pSubtag = NULL;
343 
344     if (len < 0) {
345         len = (int32_t)uprv_strlen(s);
346     }
347 
348     while ((p - s) < len) {
349         if (*p == SEP) {
350             if (pSubtag == NULL) {
351                 return FALSE;
352             }
353             if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
354                 return FALSE;
355             }
356             pSubtag = NULL;
357         } else if (pSubtag == NULL) {
358             pSubtag = p;
359         }
360         p++;
361     }
362     if (pSubtag == NULL) {
363         return FALSE;
364     }
365     return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
366 }
367 
368 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)369 _isPrivateuseValueSubtag(const char* s, int32_t len) {
370     /*
371      * privateuse    = "x" 1*("-" (1*8alphanum))
372      */
373     if (len < 0) {
374         len = (int32_t)uprv_strlen(s);
375     }
376     if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
377         return TRUE;
378     }
379     return FALSE;
380 }
381 
382 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)383 _isPrivateuseValueSubtags(const char* s, int32_t len) {
384     const char *p = s;
385     const char *pSubtag = NULL;
386 
387     if (len < 0) {
388         len = (int32_t)uprv_strlen(s);
389     }
390 
391     while ((p - s) < len) {
392         if (*p == SEP) {
393             if (pSubtag == NULL) {
394                 return FALSE;
395             }
396             if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
397                 return FALSE;
398             }
399             pSubtag = NULL;
400         } else if (pSubtag == NULL) {
401             pSubtag = p;
402         }
403         p++;
404     }
405     if (pSubtag == NULL) {
406         return FALSE;
407     }
408     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
409 }
410 
411 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)412 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
413     if (len < 0) {
414         len = (int32_t)uprv_strlen(s);
415     }
416     if (len == 2 && _isAlphaNumericString(s, len)) {
417         return TRUE;
418     }
419     return FALSE;
420 }
421 
422 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)423 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
424     const char* p;
425     int32_t subtagLen = 0;
426 
427     if (len < 0) {
428         len = (int32_t)uprv_strlen(s);
429     }
430 
431     for (p = s; len > 0; p++, len--) {
432         if (*p == SEP) {
433             if (subtagLen < 3) {
434                 return FALSE;
435             }
436             subtagLen = 0;
437         } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
438             subtagLen++;
439             if (subtagLen > 8) {
440                 return FALSE;
441             }
442         } else {
443             return FALSE;
444         }
445     }
446 
447     return (subtagLen >= 3);
448 }
449 /*
450 * -------------------------------------------------
451 *
452 * Helper functions
453 *
454 * -------------------------------------------------
455 */
456 
457 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)458 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
459     UBool bAdded = TRUE;
460 
461     if (*first == NULL) {
462         var->next = NULL;
463         *first = var;
464     } else {
465         VariantListEntry *prev, *cur;
466         int32_t cmp;
467 
468         /* variants order should be preserved */
469         prev = NULL;
470         cur = *first;
471         while (TRUE) {
472             if (cur == NULL) {
473                 prev->next = var;
474                 var->next = NULL;
475                 break;
476             }
477 
478             /* Checking for duplicate variant */
479             cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
480             if (cmp == 0) {
481                 /* duplicated variant */
482                 bAdded = FALSE;
483                 break;
484             }
485             prev = cur;
486             cur = cur->next;
487         }
488     }
489 
490     return bAdded;
491 }
492 
493 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)494 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
495     UBool bAdded = TRUE;
496 
497     if (*first == NULL) {
498         attr->next = NULL;
499         *first = attr;
500     } else {
501         AttributeListEntry *prev, *cur;
502         int32_t cmp;
503 
504         /* reorder variants in alphabetical order */
505         prev = NULL;
506         cur = *first;
507         while (TRUE) {
508             if (cur == NULL) {
509                 prev->next = attr;
510                 attr->next = NULL;
511                 break;
512             }
513             cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
514             if (cmp < 0) {
515                 if (prev == NULL) {
516                     *first = attr;
517                 } else {
518                     prev->next = attr;
519                 }
520                 attr->next = cur;
521                 break;
522             }
523             if (cmp == 0) {
524                 /* duplicated variant */
525                 bAdded = FALSE;
526                 break;
527             }
528             prev = cur;
529             cur = cur->next;
530         }
531     }
532 
533     return bAdded;
534 }
535 
536 
537 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)538 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
539     UBool bAdded = TRUE;
540 
541     if (*first == NULL) {
542         ext->next = NULL;
543         *first = ext;
544     } else {
545         ExtensionListEntry *prev, *cur;
546         int32_t cmp;
547 
548         /* reorder variants in alphabetical order */
549         prev = NULL;
550         cur = *first;
551         while (TRUE) {
552             if (cur == NULL) {
553                 prev->next = ext;
554                 ext->next = NULL;
555                 break;
556             }
557             if (localeToBCP) {
558                 /* special handling for locale to bcp conversion */
559                 int32_t len, curlen;
560 
561                 len = (int32_t)uprv_strlen(ext->key);
562                 curlen = (int32_t)uprv_strlen(cur->key);
563 
564                 if (len == 1 && curlen == 1) {
565                     if (*(ext->key) == *(cur->key)) {
566                         cmp = 0;
567                     } else if (*(ext->key) == PRIVATEUSE) {
568                         cmp = 1;
569                     } else if (*(cur->key) == PRIVATEUSE) {
570                         cmp = -1;
571                     } else {
572                         cmp = *(ext->key) - *(cur->key);
573                     }
574                 } else if (len == 1) {
575                     cmp = *(ext->key) - LDMLEXT;
576                 } else if (curlen == 1) {
577                     cmp = LDMLEXT - *(cur->key);
578                 } else {
579                     cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
580                 }
581             } else {
582                 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583             }
584             if (cmp < 0) {
585                 if (prev == NULL) {
586                     *first = ext;
587                 } else {
588                     prev->next = ext;
589                 }
590                 ext->next = cur;
591                 break;
592             }
593             if (cmp == 0) {
594                 /* duplicated extension key */
595                 bAdded = FALSE;
596                 break;
597             }
598             prev = cur;
599             cur = cur->next;
600         }
601     }
602 
603     return bAdded;
604 }
605 
606 static void
_initializeULanguageTag(ULanguageTag * langtag)607 _initializeULanguageTag(ULanguageTag* langtag) {
608     int32_t i;
609 
610     langtag->buf = NULL;
611 
612     langtag->language = EMPTY;
613     for (i = 0; i < MAXEXTLANG; i++) {
614         langtag->extlang[i] = NULL;
615     }
616 
617     langtag->script = EMPTY;
618     langtag->region = EMPTY;
619 
620     langtag->variants = NULL;
621     langtag->extensions = NULL;
622 
623     langtag->grandfathered = EMPTY;
624     langtag->privateuse = EMPTY;
625 }
626 
627 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)628 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
629     char buf[ULOC_LANG_CAPACITY];
630     UErrorCode tmpStatus = U_ZERO_ERROR;
631     int32_t len, i;
632     int32_t reslen = 0;
633 
634     if (U_FAILURE(*status)) {
635         return 0;
636     }
637 
638     len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
639     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
640         if (strict) {
641             *status = U_ILLEGAL_ARGUMENT_ERROR;
642             return 0;
643         }
644         len = 0;
645     }
646 
647     /* Note: returned language code is in lower case letters */
648 
649     if (len == 0) {
650         if (reslen < capacity) {
651             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
652         }
653         reslen += LANG_UND_LEN;
654     } else if (!_isLanguageSubtag(buf, len)) {
655             /* invalid language code */
656         if (strict) {
657             *status = U_ILLEGAL_ARGUMENT_ERROR;
658             return 0;
659         }
660         if (reslen < capacity) {
661             uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
662         }
663         reslen += LANG_UND_LEN;
664     } else {
665         /* resolve deprecated */
666         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
667             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
668                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
669                 len = (int32_t)uprv_strlen(buf);
670                 break;
671             }
672         }
673         if (reslen < capacity) {
674             uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
675         }
676         reslen += len;
677     }
678     u_terminateChars(appendAt, capacity, reslen, status);
679     return reslen;
680 }
681 
682 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)683 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
684     char buf[ULOC_SCRIPT_CAPACITY];
685     UErrorCode tmpStatus = U_ZERO_ERROR;
686     int32_t len;
687     int32_t reslen = 0;
688 
689     if (U_FAILURE(*status)) {
690         return 0;
691     }
692 
693     len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
694     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
695         if (strict) {
696             *status = U_ILLEGAL_ARGUMENT_ERROR;
697         }
698         return 0;
699     }
700 
701     if (len > 0) {
702         if (!_isScriptSubtag(buf, len)) {
703             /* invalid script code */
704             if (strict) {
705                 *status = U_ILLEGAL_ARGUMENT_ERROR;
706             }
707             return 0;
708         } else {
709             if (reslen < capacity) {
710                 *(appendAt + reslen) = SEP;
711             }
712             reslen++;
713 
714             if (reslen < capacity) {
715                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
716             }
717             reslen += len;
718         }
719     }
720     u_terminateChars(appendAt, capacity, reslen, status);
721     return reslen;
722 }
723 
724 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)725 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
726     char buf[ULOC_COUNTRY_CAPACITY];
727     UErrorCode tmpStatus = U_ZERO_ERROR;
728     int32_t len;
729     int32_t reslen = 0;
730 
731     if (U_FAILURE(*status)) {
732         return 0;
733     }
734 
735     len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
736     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
737         if (strict) {
738             *status = U_ILLEGAL_ARGUMENT_ERROR;
739         }
740         return 0;
741     }
742 
743     if (len > 0) {
744         if (!_isRegionSubtag(buf, len)) {
745             /* invalid region code */
746             if (strict) {
747                 *status = U_ILLEGAL_ARGUMENT_ERROR;
748             }
749             return 0;
750         } else {
751             if (reslen < capacity) {
752                 *(appendAt + reslen) = SEP;
753             }
754             reslen++;
755 
756             if (reslen < capacity) {
757                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
758             }
759             reslen += len;
760         }
761     }
762     u_terminateChars(appendAt, capacity, reslen, status);
763     return reslen;
764 }
765 
766 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)767 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
768     char buf[ULOC_FULLNAME_CAPACITY];
769     UErrorCode tmpStatus = U_ZERO_ERROR;
770     int32_t len, i;
771     int32_t reslen = 0;
772 
773     if (U_FAILURE(*status)) {
774         return 0;
775     }
776 
777     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
778     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
779         if (strict) {
780             *status = U_ILLEGAL_ARGUMENT_ERROR;
781         }
782         return 0;
783     }
784 
785     if (len > 0) {
786         char *p, *pVar;
787         UBool bNext = TRUE;
788         VariantListEntry *var;
789         VariantListEntry *varFirst = NULL;
790 
791         pVar = NULL;
792         p = buf;
793         while (bNext) {
794             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
795                 if (*p == 0) {
796                     bNext = FALSE;
797                 } else {
798                     *p = 0; /* terminate */
799                 }
800                 if (pVar == NULL) {
801                     if (strict) {
802                         *status = U_ILLEGAL_ARGUMENT_ERROR;
803                         break;
804                     }
805                     /* ignore empty variant */
806                 } else {
807                     /* ICU uses upper case letters for variants, but
808                        the canonical format is lowercase in BCP47 */
809                     for (i = 0; *(pVar + i) != 0; i++) {
810                         *(pVar + i) = uprv_tolower(*(pVar + i));
811                     }
812 
813                     /* validate */
814                     if (_isVariantSubtag(pVar, -1)) {
815                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
816                             /* emit the variant to the list */
817                             var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
818                             if (var == NULL) {
819                                 *status = U_MEMORY_ALLOCATION_ERROR;
820                                 break;
821                             }
822                             var->variant = pVar;
823                             if (!_addVariantToList(&varFirst, var)) {
824                                 /* duplicated variant */
825                                 uprv_free(var);
826                                 if (strict) {
827                                     *status = U_ILLEGAL_ARGUMENT_ERROR;
828                                     break;
829                                 }
830                             }
831                         } else {
832                             /* Special handling for POSIX variant, need to remember that we had it and then */
833                             /* treat it like an extension later. */
834                             *hadPosix = TRUE;
835                         }
836                     } else if (strict) {
837                         *status = U_ILLEGAL_ARGUMENT_ERROR;
838                         break;
839                     } else if (_isPrivateuseValueSubtag(pVar, -1)) {
840                         /* Handle private use subtags separately */
841                         break;
842                     }
843                 }
844                 /* reset variant starting position */
845                 pVar = NULL;
846             } else if (pVar == NULL) {
847                 pVar = p;
848             }
849             p++;
850         }
851 
852         if (U_SUCCESS(*status)) {
853             if (varFirst != NULL) {
854                 int32_t varLen;
855 
856                 /* write out validated/normalized variants to the target */
857                 var = varFirst;
858                 while (var != NULL) {
859                     if (reslen < capacity) {
860                         *(appendAt + reslen) = SEP;
861                     }
862                     reslen++;
863                     varLen = (int32_t)uprv_strlen(var->variant);
864                     if (reslen < capacity) {
865                         uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
866                     }
867                     reslen += varLen;
868                     var = var->next;
869                 }
870             }
871         }
872 
873         /* clean up */
874         var = varFirst;
875         while (var != NULL) {
876             VariantListEntry *tmpVar = var->next;
877             uprv_free(var);
878             var = tmpVar;
879         }
880 
881         if (U_FAILURE(*status)) {
882             return 0;
883         }
884     }
885 
886     u_terminateChars(appendAt, capacity, reslen, status);
887     return reslen;
888 }
889 
890 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)891 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
892     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
893     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
894     int32_t attrBufLength = 0;
895     UBool isAttribute = FALSE;
896     UEnumeration *keywordEnum = NULL;
897     int32_t reslen = 0;
898 
899     keywordEnum = uloc_openKeywords(localeID, status);
900     if (U_FAILURE(*status) && !hadPosix) {
901         uenum_close(keywordEnum);
902         return 0;
903     }
904     if (keywordEnum != NULL || hadPosix) {
905         /* reorder extensions */
906         int32_t len;
907         const char *key;
908         ExtensionListEntry *firstExt = NULL;
909         ExtensionListEntry *ext;
910         AttributeListEntry *firstAttr = NULL;
911         AttributeListEntry *attr;
912         char *attrValue;
913         char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
914         char *pExtBuf = extBuf;
915         int32_t extBufCapacity = sizeof(extBuf);
916         const char *bcpKey, *bcpValue;
917         UErrorCode tmpStatus = U_ZERO_ERROR;
918         int32_t keylen;
919         UBool isBcpUExt;
920 
921         while (TRUE) {
922             isAttribute = FALSE;
923             key = uenum_next(keywordEnum, NULL, status);
924             if (key == NULL) {
925                 break;
926             }
927             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
928             /* buf must be null-terminated */
929             if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
930                 if (strict) {
931                     *status = U_ILLEGAL_ARGUMENT_ERROR;
932                     break;
933                 }
934                 /* ignore this keyword */
935                 tmpStatus = U_ZERO_ERROR;
936                 continue;
937             }
938 
939             keylen = (int32_t)uprv_strlen(key);
940             isBcpUExt = (keylen > 1);
941 
942             /* special keyword used for representing Unicode locale attributes */
943             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
944                 isAttribute = TRUE;
945                 if (len > 0) {
946                     int32_t i = 0;
947                     while (TRUE) {
948                         attrBufLength = 0;
949                         for (; i < len; i++) {
950                             if (buf[i] != '-') {
951                                 attrBuf[attrBufLength++] = buf[i];
952                             } else {
953                                 i++;
954                                 break;
955                             }
956                         }
957                         if (attrBufLength > 0) {
958                             attrBuf[attrBufLength] = 0;
959 
960                         } else if (i >= len){
961                             break;
962                         }
963 
964                         /* create AttributeListEntry */
965                         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
966                         if (attr == NULL) {
967                             *status = U_MEMORY_ALLOCATION_ERROR;
968                             break;
969                         }
970                         attrValue = (char*)uprv_malloc(attrBufLength + 1);
971                         if (attrValue == NULL) {
972                             *status = U_MEMORY_ALLOCATION_ERROR;
973                             break;
974                         }
975                         uprv_strcpy(attrValue, attrBuf);
976                         attr->attribute = attrValue;
977 
978                         if (!_addAttributeToList(&firstAttr, attr)) {
979                             uprv_free(attr);
980                             uprv_free(attrValue);
981                             if (strict) {
982                                 *status = U_ILLEGAL_ARGUMENT_ERROR;
983                                 break;
984                             }
985                         }
986                     }
987                 }
988             } else if (isBcpUExt) {
989                 bcpKey = uloc_toUnicodeLocaleKey(key);
990                 if (bcpKey == NULL) {
991                     if (strict) {
992                         *status = U_ILLEGAL_ARGUMENT_ERROR;
993                         break;
994                     }
995                     continue;
996                 }
997 
998                 /* we've checked buf is null-terminated above */
999                 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1000                 if (bcpValue == NULL) {
1001                     if (strict) {
1002                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1003                         break;
1004                     }
1005                     continue;
1006                 }
1007                 if (bcpValue == buf) {
1008                     /*
1009                     When uloc_toUnicodeLocaleType(key, buf) returns the
1010                     input value as is, the value is well-formed, but has
1011                     no known mapping. This implementation normalizes the
1012                     the value to lower case
1013                     */
1014                     int32_t bcpValueLen = uprv_strlen(bcpValue);
1015                     if (bcpValueLen < extBufCapacity) {
1016                         uprv_strcpy(pExtBuf, bcpValue);
1017                         T_CString_toLowerCase(pExtBuf);
1018 
1019                         bcpValue = pExtBuf;
1020 
1021                         pExtBuf += (bcpValueLen + 1);
1022                         extBufCapacity -= (bcpValueLen + 1);
1023                     } else {
1024                         if (strict) {
1025                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1026                             break;
1027                         }
1028                         continue;
1029                     }
1030                 }
1031             } else {
1032                 if (*key == PRIVATEUSE) {
1033                     if (!_isPrivateuseValueSubtags(buf, len)) {
1034                         if (strict) {
1035                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1036                             break;
1037                         }
1038                         continue;
1039                     }
1040                 } else {
1041                     if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1042                         if (strict) {
1043                             *status = U_ILLEGAL_ARGUMENT_ERROR;
1044                             break;
1045                         }
1046                         continue;
1047                     }
1048                 }
1049                 bcpKey = key;
1050                 if ((len + 1) < extBufCapacity) {
1051                     uprv_memcpy(pExtBuf, buf, len);
1052                     bcpValue = pExtBuf;
1053 
1054                     pExtBuf += len;
1055 
1056                     *pExtBuf = 0;
1057                     pExtBuf++;
1058 
1059                     extBufCapacity -= (len + 1);
1060                 } else {
1061                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1062                     break;
1063                 }
1064             }
1065 
1066             if (!isAttribute) {
1067                 /* create ExtensionListEntry */
1068                 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1069                 if (ext == NULL) {
1070                     *status = U_MEMORY_ALLOCATION_ERROR;
1071                     break;
1072                 }
1073                 ext->key = bcpKey;
1074                 ext->value = bcpValue;
1075 
1076                 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1077                     uprv_free(ext);
1078                     if (strict) {
1079                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1080                         break;
1081                     }
1082                 }
1083             }
1084         }
1085 
1086         /* Special handling for POSIX variant - add the keywords for POSIX */
1087         if (hadPosix) {
1088             /* create ExtensionListEntry for POSIX */
1089             ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1090             if (ext == NULL) {
1091                 *status = U_MEMORY_ALLOCATION_ERROR;
1092                 goto cleanup;
1093             }
1094             ext->key = POSIX_KEY;
1095             ext->value = POSIX_VALUE;
1096 
1097             if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1098                 uprv_free(ext);
1099             }
1100         }
1101 
1102         if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1103             UBool startLDMLExtension = FALSE;
1104 
1105             attr = firstAttr;
1106             ext = firstExt;
1107             do {
1108                 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1109                    /* write LDML singleton extension */
1110                    if (reslen < capacity) {
1111                        *(appendAt + reslen) = SEP;
1112                    }
1113                    reslen++;
1114                    if (reslen < capacity) {
1115                        *(appendAt + reslen) = LDMLEXT;
1116                    }
1117                    reslen++;
1118 
1119                    startLDMLExtension = TRUE;
1120                 }
1121 
1122                 /* write out the sorted BCP47 attributes, extensions and private use */
1123                 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1124                     if (reslen < capacity) {
1125                         *(appendAt + reslen) = SEP;
1126                     }
1127                     reslen++;
1128                     len = (int32_t)uprv_strlen(ext->key);
1129                     if (reslen < capacity) {
1130                         uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1131                     }
1132                     reslen += len;
1133                     if (reslen < capacity) {
1134                         *(appendAt + reslen) = SEP;
1135                     }
1136                     reslen++;
1137                     len = (int32_t)uprv_strlen(ext->value);
1138                     if (reslen < capacity) {
1139                         uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1140                     }
1141                     reslen += len;
1142 
1143                     ext = ext->next;
1144                 } else if (attr) {
1145                     /* write the value for the attributes */
1146                     if (reslen < capacity) {
1147                         *(appendAt + reslen) = SEP;
1148                     }
1149                     reslen++;
1150                     len = (int32_t)uprv_strlen(attr->attribute);
1151                     if (reslen < capacity) {
1152                         uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1153                     }
1154                     reslen += len;
1155 
1156                     attr = attr->next;
1157                 }
1158             } while (attr != NULL || ext != NULL);
1159         }
1160 cleanup:
1161         /* clean up */
1162         ext = firstExt;
1163         while (ext != NULL) {
1164             ExtensionListEntry *tmpExt = ext->next;
1165             uprv_free(ext);
1166             ext = tmpExt;
1167         }
1168 
1169         attr = firstAttr;
1170         while (attr != NULL) {
1171             AttributeListEntry *tmpAttr = attr->next;
1172             char *pValue = (char *)attr->attribute;
1173             uprv_free(pValue);
1174             uprv_free(attr);
1175             attr = tmpAttr;
1176         }
1177 
1178         uenum_close(keywordEnum);
1179 
1180         if (U_FAILURE(*status)) {
1181             return 0;
1182         }
1183     }
1184 
1185     return u_terminateChars(appendAt, capacity, reslen, status);
1186 }
1187 
1188 /**
1189  * Append keywords parsed from LDML extension value
1190  * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1191  * Note: char* buf is used for storing keywords
1192  */
1193 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1194 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1195     const char *pTag;   /* beginning of current subtag */
1196     const char *pKwds;  /* beginning of key-type pairs */
1197     UBool variantExists = *posixVariant;
1198 
1199     ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
1200     ExtensionListEntry *kwd, *nextKwd;
1201 
1202     AttributeListEntry *attrFirst = NULL;   /* first attribute */
1203     AttributeListEntry *attr, *nextAttr;
1204 
1205     int32_t len;
1206     int32_t bufIdx = 0;
1207 
1208     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1209     int32_t attrBufIdx = 0;
1210 
1211     /* Reset the posixVariant value */
1212     *posixVariant = FALSE;
1213 
1214     pTag = ldmlext;
1215     pKwds = NULL;
1216 
1217     /* Iterate through u extension attributes */
1218     while (*pTag) {
1219         /* locate next separator char */
1220         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1221 
1222         if (ultag_isUnicodeLocaleKey(pTag, len)) {
1223             pKwds = pTag;
1224             break;
1225         }
1226 
1227         /* add this attribute to the list */
1228         attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1229         if (attr == NULL) {
1230             *status = U_MEMORY_ALLOCATION_ERROR;
1231             goto cleanup;
1232         }
1233 
1234         if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1235             uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1236             attrBuf[attrBufIdx + len] = 0;
1237             attr->attribute = &attrBuf[attrBufIdx];
1238             attrBufIdx += (len + 1);
1239         } else {
1240             *status = U_ILLEGAL_ARGUMENT_ERROR;
1241             goto cleanup;
1242         }
1243 
1244         if (!_addAttributeToList(&attrFirst, attr)) {
1245             *status = U_ILLEGAL_ARGUMENT_ERROR;
1246             uprv_free(attr);
1247             goto cleanup;
1248         }
1249 
1250         /* next tag */
1251         pTag += len;
1252         if (*pTag) {
1253             /* next to the separator */
1254             pTag++;
1255         }
1256     }
1257 
1258     if (attrFirst) {
1259         /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1260 
1261         if (attrBufIdx > bufSize) {
1262             /* attrBufIdx == <total length of attribute subtag> + 1 */
1263             *status = U_ILLEGAL_ARGUMENT_ERROR;
1264             goto cleanup;
1265         }
1266 
1267         kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1268         if (kwd == NULL) {
1269             *status = U_MEMORY_ALLOCATION_ERROR;
1270             goto cleanup;
1271         }
1272 
1273         kwd->key = LOCALE_ATTRIBUTE_KEY;
1274         kwd->value = buf;
1275 
1276         /* attribute subtags sorted in alphabetical order as type */
1277         attr = attrFirst;
1278         while (attr != NULL) {
1279             nextAttr = attr->next;
1280 
1281             /* buffer size check is done above */
1282             if (attr != attrFirst) {
1283                 *(buf + bufIdx) = SEP;
1284                 bufIdx++;
1285             }
1286 
1287             len = uprv_strlen(attr->attribute);
1288             uprv_memcpy(buf + bufIdx, attr->attribute, len);
1289             bufIdx += len;
1290 
1291             attr = nextAttr;
1292         }
1293         *(buf + bufIdx) = 0;
1294         bufIdx++;
1295 
1296         if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1297             *status = U_ILLEGAL_ARGUMENT_ERROR;
1298             uprv_free(kwd);
1299             goto cleanup;
1300         }
1301 
1302         /* once keyword entry is created, delete the attribute list */
1303         attr = attrFirst;
1304         while (attr != NULL) {
1305             nextAttr = attr->next;
1306             uprv_free(attr);
1307             attr = nextAttr;
1308         }
1309         attrFirst = NULL;
1310     }
1311 
1312     if (pKwds) {
1313         const char *pBcpKey = NULL;     /* u extenstion key subtag */
1314         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
1315         int32_t bcpKeyLen = 0;
1316         int32_t bcpTypeLen = 0;
1317         UBool isDone = FALSE;
1318 
1319         pTag = pKwds;
1320         /* BCP47 representation of LDML key/type pairs */
1321         while (!isDone) {
1322             const char *pNextBcpKey = NULL;
1323             int32_t nextBcpKeyLen = 0;
1324             UBool emitKeyword = FALSE;
1325 
1326             if (*pTag) {
1327                 /* locate next separator char */
1328                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1329 
1330                 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1331                     if (pBcpKey) {
1332                         emitKeyword = TRUE;
1333                         pNextBcpKey = pTag;
1334                         nextBcpKeyLen = len;
1335                     } else {
1336                         pBcpKey = pTag;
1337                         bcpKeyLen = len;
1338                     }
1339                 } else {
1340                     U_ASSERT(pBcpKey != NULL);
1341                     /* within LDML type subtags */
1342                     if (pBcpType) {
1343                         bcpTypeLen += (len + 1);
1344                     } else {
1345                         pBcpType = pTag;
1346                         bcpTypeLen = len;
1347                     }
1348                 }
1349 
1350                 /* next tag */
1351                 pTag += len;
1352                 if (*pTag) {
1353                     /* next to the separator */
1354                     pTag++;
1355                 }
1356             } else {
1357                 /* processing last one */
1358                 emitKeyword = TRUE;
1359                 isDone = TRUE;
1360             }
1361 
1362             if (emitKeyword) {
1363                 const char *pKey = NULL;    /* LDML key */
1364                 const char *pType = NULL;   /* LDML type */
1365 
1366                 char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
1367 
1368                 U_ASSERT(pBcpKey != NULL);
1369 
1370                 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1371                     /* the BCP key is invalid */
1372                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1373                     goto cleanup;
1374                 }
1375 
1376                 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1377                 bcpKeyBuf[bcpKeyLen] = 0;
1378 
1379                 /* u extension key to LDML key */
1380                 pKey = uloc_toLegacyKey(bcpKeyBuf);
1381                 if (pKey == NULL) {
1382                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1383                     goto cleanup;
1384                 }
1385                 if (pKey == bcpKeyBuf) {
1386                     /*
1387                     The key returned by toLegacyKey points to the input buffer.
1388                     We normalize the result key to lower case.
1389                     */
1390                     T_CString_toLowerCase(bcpKeyBuf);
1391                     if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1392                         uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1393                         pKey = buf + bufIdx;
1394                         bufIdx += bcpKeyLen;
1395                         *(buf + bufIdx) = 0;
1396                         bufIdx++;
1397                     } else {
1398                         *status = U_BUFFER_OVERFLOW_ERROR;
1399                         goto cleanup;
1400                     }
1401                 }
1402 
1403                 if (pBcpType) {
1404                     char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
1405                     if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1406                         /* the BCP type is too long */
1407                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1408                         goto cleanup;
1409                     }
1410 
1411                     uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1412                     bcpTypeBuf[bcpTypeLen] = 0;
1413 
1414                     /* BCP type to locale type */
1415                     pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1416                     if (pType == NULL) {
1417                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1418                         goto cleanup;
1419                     }
1420                     if (pType == bcpTypeBuf) {
1421                         /*
1422                         The type returned by toLegacyType points to the input buffer.
1423                         We normalize the result type to lower case.
1424                         */
1425                         /* normalize to lower case */
1426                         T_CString_toLowerCase(bcpTypeBuf);
1427                         if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1428                             uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1429                             pType = buf + bufIdx;
1430                             bufIdx += bcpTypeLen;
1431                             *(buf + bufIdx) = 0;
1432                             bufIdx++;
1433                         } else {
1434                             *status = U_BUFFER_OVERFLOW_ERROR;
1435                             goto cleanup;
1436                         }
1437                     }
1438                 } else {
1439                     /* typeless - default type value is "yes" */
1440                     pType = LOCALE_TYPE_YES;
1441                 }
1442 
1443                 /* Special handling for u-va-posix, since we want to treat this as a variant,
1444                    not as a keyword */
1445                 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1446                     *posixVariant = TRUE;
1447                 } else {
1448                     /* create an ExtensionListEntry for this keyword */
1449                     kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1450                     if (kwd == NULL) {
1451                         *status = U_MEMORY_ALLOCATION_ERROR;
1452                         goto cleanup;
1453                     }
1454 
1455                     kwd->key = pKey;
1456                     kwd->value = pType;
1457 
1458                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1459                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1460                         uprv_free(kwd);
1461                         goto cleanup;
1462                     }
1463                 }
1464 
1465                 pBcpKey = pNextBcpKey;
1466                 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1467                 pBcpType = NULL;
1468                 bcpTypeLen = 0;
1469             }
1470         }
1471     }
1472 
1473     kwd = kwdFirst;
1474     while (kwd != NULL) {
1475         nextKwd = kwd->next;
1476         _addExtensionToList(appendTo, kwd, FALSE);
1477         kwd = nextKwd;
1478     }
1479 
1480     return;
1481 
1482 cleanup:
1483     attr = attrFirst;
1484     while (attr != NULL) {
1485         nextAttr = attr->next;
1486         uprv_free(attr);
1487         attr = nextAttr;
1488     }
1489 
1490     kwd = kwdFirst;
1491     while (kwd != NULL) {
1492         nextKwd = kwd->next;
1493         uprv_free(kwd);
1494         kwd = nextKwd;
1495     }
1496 }
1497 
1498 
1499 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1500 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1501     int32_t reslen = 0;
1502     int32_t i, n;
1503     int32_t len;
1504     ExtensionListEntry *kwdFirst = NULL;
1505     ExtensionListEntry *kwd;
1506     const char *key, *type;
1507     char *kwdBuf = NULL;
1508     int32_t kwdBufLength = capacity;
1509     UBool posixVariant = FALSE;
1510 
1511     if (U_FAILURE(*status)) {
1512         return 0;
1513     }
1514 
1515     kwdBuf = (char*)uprv_malloc(kwdBufLength);
1516     if (kwdBuf == NULL) {
1517         *status = U_MEMORY_ALLOCATION_ERROR;
1518         return 0;
1519     }
1520 
1521     /* Determine if variants already exists */
1522     if (ultag_getVariantsSize(langtag)) {
1523         posixVariant = TRUE;
1524     }
1525 
1526     n = ultag_getExtensionsSize(langtag);
1527 
1528     /* resolve locale keywords and reordering keys */
1529     for (i = 0; i < n; i++) {
1530         key = ultag_getExtensionKey(langtag, i);
1531         type = ultag_getExtensionValue(langtag, i);
1532         if (*key == LDMLEXT) {
1533             _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1534             if (U_FAILURE(*status)) {
1535                 break;
1536             }
1537         } else {
1538             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1539             if (kwd == NULL) {
1540                 *status = U_MEMORY_ALLOCATION_ERROR;
1541                 break;
1542             }
1543             kwd->key = key;
1544             kwd->value = type;
1545             if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1546                 uprv_free(kwd);
1547                 *status = U_ILLEGAL_ARGUMENT_ERROR;
1548                 break;
1549             }
1550         }
1551     }
1552 
1553     if (U_SUCCESS(*status)) {
1554         type = ultag_getPrivateUse(langtag);
1555         if ((int32_t)uprv_strlen(type) > 0) {
1556             /* add private use as a keyword */
1557             kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1558             if (kwd == NULL) {
1559                 *status = U_MEMORY_ALLOCATION_ERROR;
1560             } else {
1561                 kwd->key = PRIVATEUSE_KEY;
1562                 kwd->value = type;
1563                 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1564                     uprv_free(kwd);
1565                     *status = U_ILLEGAL_ARGUMENT_ERROR;
1566                 }
1567             }
1568         }
1569     }
1570 
1571     /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1572 
1573     if (U_SUCCESS(*status) && posixVariant) {
1574         len = (int32_t) uprv_strlen(_POSIX);
1575         if (reslen < capacity) {
1576             uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1577         }
1578         reslen += len;
1579     }
1580 
1581     if (U_SUCCESS(*status) && kwdFirst != NULL) {
1582         /* write out the sorted keywords */
1583         UBool firstValue = TRUE;
1584         kwd = kwdFirst;
1585         do {
1586             if (reslen < capacity) {
1587                 if (firstValue) {
1588                     /* '@' */
1589                     *(appendAt + reslen) = LOCALE_EXT_SEP;
1590                     firstValue = FALSE;
1591                 } else {
1592                     /* ';' */
1593                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1594                 }
1595             }
1596             reslen++;
1597 
1598             /* key */
1599             len = (int32_t)uprv_strlen(kwd->key);
1600             if (reslen < capacity) {
1601                 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1602             }
1603             reslen += len;
1604 
1605             /* '=' */
1606             if (reslen < capacity) {
1607                 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1608             }
1609             reslen++;
1610 
1611             /* type */
1612             len = (int32_t)uprv_strlen(kwd->value);
1613             if (reslen < capacity) {
1614                 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1615             }
1616             reslen += len;
1617 
1618             kwd = kwd->next;
1619         } while (kwd);
1620     }
1621 
1622     /* clean up */
1623     kwd = kwdFirst;
1624     while (kwd != NULL) {
1625         ExtensionListEntry *tmpKwd = kwd->next;
1626         uprv_free(kwd);
1627         kwd = tmpKwd;
1628     }
1629 
1630     uprv_free(kwdBuf);
1631 
1632     if (U_FAILURE(*status)) {
1633         return 0;
1634     }
1635 
1636     return u_terminateChars(appendAt, capacity, reslen, status);
1637 }
1638 
1639 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1640 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1641     char buf[ULOC_FULLNAME_CAPACITY];
1642     char tmpAppend[ULOC_FULLNAME_CAPACITY];
1643     UErrorCode tmpStatus = U_ZERO_ERROR;
1644     int32_t len, i;
1645     int32_t reslen = 0;
1646 
1647     if (U_FAILURE(*status)) {
1648         return 0;
1649     }
1650 
1651     len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1652     if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1653         if (strict) {
1654             *status = U_ILLEGAL_ARGUMENT_ERROR;
1655         }
1656         return 0;
1657     }
1658 
1659     if (len > 0) {
1660         char *p, *pPriv;
1661         UBool bNext = TRUE;
1662         UBool firstValue = TRUE;
1663         UBool writeValue;
1664 
1665         pPriv = NULL;
1666         p = buf;
1667         while (bNext) {
1668             writeValue = FALSE;
1669             if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1670                 if (*p == 0) {
1671                     bNext = FALSE;
1672                 } else {
1673                     *p = 0; /* terminate */
1674                 }
1675                 if (pPriv != NULL) {
1676                     /* Private use in the canonical format is lowercase in BCP47 */
1677                     for (i = 0; *(pPriv + i) != 0; i++) {
1678                         *(pPriv + i) = uprv_tolower(*(pPriv + i));
1679                     }
1680 
1681                     /* validate */
1682                     if (_isPrivateuseValueSubtag(pPriv, -1)) {
1683                         if (firstValue) {
1684                             if (!_isVariantSubtag(pPriv, -1)) {
1685                                 writeValue = TRUE;
1686                             }
1687                         } else {
1688                             writeValue = TRUE;
1689                         }
1690                     } else if (strict) {
1691                         *status = U_ILLEGAL_ARGUMENT_ERROR;
1692                         break;
1693                     } else {
1694                         break;
1695                     }
1696 
1697                     if (writeValue) {
1698                         if (reslen < capacity) {
1699                             tmpAppend[reslen++] = SEP;
1700                         }
1701 
1702                         if (firstValue) {
1703                             if (reslen < capacity) {
1704                                 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1705                             }
1706 
1707                             if (reslen < capacity) {
1708                                 tmpAppend[reslen++] = SEP;
1709                             }
1710 
1711                             len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1712                             if (reslen < capacity) {
1713                                 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1714                             }
1715                             reslen += len;
1716 
1717                             if (reslen < capacity) {
1718                                 tmpAppend[reslen++] = SEP;
1719                             }
1720 
1721                             firstValue = FALSE;
1722                         }
1723 
1724                         len = (int32_t)uprv_strlen(pPriv);
1725                         if (reslen < capacity) {
1726                             uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1727                         }
1728                         reslen += len;
1729                     }
1730                 }
1731                 /* reset private use starting position */
1732                 pPriv = NULL;
1733             } else if (pPriv == NULL) {
1734                 pPriv = p;
1735             }
1736             p++;
1737         }
1738 
1739         if (U_FAILURE(*status)) {
1740             return 0;
1741         }
1742     }
1743 
1744     if (U_SUCCESS(*status)) {
1745         len = reslen;
1746         if (reslen < capacity) {
1747             uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1748         }
1749     }
1750 
1751     u_terminateChars(appendAt, capacity, reslen, status);
1752 
1753     return reslen;
1754 }
1755 
1756 /*
1757 * -------------------------------------------------
1758 *
1759 * ultag_ functions
1760 *
1761 * -------------------------------------------------
1762 */
1763 
1764 /* Bit flags used by the parser */
1765 #define LANG 0x0001
1766 #define EXTL 0x0002
1767 #define SCRT 0x0004
1768 #define REGN 0x0008
1769 #define VART 0x0010
1770 #define EXTS 0x0020
1771 #define EXTV 0x0040
1772 #define PRIV 0x0080
1773 
1774 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)1775 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1776     ULanguageTag *t;
1777     char *tagBuf;
1778     int16_t next;
1779     char *pSubtag, *pNext, *pLastGoodPosition;
1780     int32_t subtagLen;
1781     int32_t extlangIdx;
1782     ExtensionListEntry *pExtension;
1783     char *pExtValueSubtag, *pExtValueSubtagEnd;
1784     int32_t i;
1785     UBool privateuseVar = FALSE;
1786     int32_t grandfatheredLen = 0;
1787 
1788     if (parsedLen != NULL) {
1789         *parsedLen = 0;
1790     }
1791 
1792     if (U_FAILURE(*status)) {
1793         return NULL;
1794     }
1795 
1796     if (tagLen < 0) {
1797         tagLen = (int32_t)uprv_strlen(tag);
1798     }
1799 
1800     /* copy the entire string */
1801     tagBuf = (char*)uprv_malloc(tagLen + 1);
1802     if (tagBuf == NULL) {
1803         *status = U_MEMORY_ALLOCATION_ERROR;
1804         return NULL;
1805     }
1806     uprv_memcpy(tagBuf, tag, tagLen);
1807     *(tagBuf + tagLen) = 0;
1808 
1809     /* create a ULanguageTag */
1810     t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1811     if (t == NULL) {
1812         uprv_free(tagBuf);
1813         *status = U_MEMORY_ALLOCATION_ERROR;
1814         return NULL;
1815     }
1816     _initializeULanguageTag(t);
1817     t->buf = tagBuf;
1818 
1819     if (tagLen < MINLEN) {
1820         /* the input tag is too short - return empty ULanguageTag */
1821         return t;
1822     }
1823 
1824     /* check if the tag is grandfathered */
1825     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1826         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1827             int32_t newTagLength;
1828 
1829             grandfatheredLen = tagLen;  /* back up for output parsedLen */
1830             newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1831             if (tagLen < newTagLength) {
1832                 uprv_free(tagBuf);
1833                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1834                 if (tagBuf == NULL) {
1835                     *status = U_MEMORY_ALLOCATION_ERROR;
1836                     return NULL;
1837                 }
1838                 t->buf = tagBuf;
1839                 tagLen = newTagLength;
1840             }
1841             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1842             break;
1843         }
1844     }
1845 
1846     /*
1847      * langtag      =   language
1848      *                  ["-" script]
1849      *                  ["-" region]
1850      *                  *("-" variant)
1851      *                  *("-" extension)
1852      *                  ["-" privateuse]
1853      */
1854 
1855     next = LANG | PRIV;
1856     pNext = pLastGoodPosition = tagBuf;
1857     extlangIdx = 0;
1858     pExtension = NULL;
1859     pExtValueSubtag = NULL;
1860     pExtValueSubtagEnd = NULL;
1861 
1862     while (pNext) {
1863         char *pSep;
1864 
1865         pSubtag = pNext;
1866 
1867         /* locate next separator char */
1868         pSep = pSubtag;
1869         while (*pSep) {
1870             if (*pSep == SEP) {
1871                 break;
1872             }
1873             pSep++;
1874         }
1875         if (*pSep == 0) {
1876             /* last subtag */
1877             pNext = NULL;
1878         } else {
1879             pNext = pSep + 1;
1880         }
1881         subtagLen = (int32_t)(pSep - pSubtag);
1882 
1883         if (next & LANG) {
1884             if (_isLanguageSubtag(pSubtag, subtagLen)) {
1885                 *pSep = 0;  /* terminate */
1886                 t->language = T_CString_toLowerCase(pSubtag);
1887 
1888                 pLastGoodPosition = pSep;
1889                 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1890                 continue;
1891             }
1892         }
1893         if (next & EXTL) {
1894             if (_isExtlangSubtag(pSubtag, subtagLen)) {
1895                 *pSep = 0;
1896                 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1897 
1898                 pLastGoodPosition = pSep;
1899                 if (extlangIdx < 3) {
1900                     next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1901                 } else {
1902                     next = SCRT | REGN | VART | EXTS | PRIV;
1903                 }
1904                 continue;
1905             }
1906         }
1907         if (next & SCRT) {
1908             if (_isScriptSubtag(pSubtag, subtagLen)) {
1909                 char *p = pSubtag;
1910 
1911                 *pSep = 0;
1912 
1913                 /* to title case */
1914                 *p = uprv_toupper(*p);
1915                 p++;
1916                 for (; *p; p++) {
1917                     *p = uprv_tolower(*p);
1918                 }
1919 
1920                 t->script = pSubtag;
1921 
1922                 pLastGoodPosition = pSep;
1923                 next = REGN | VART | EXTS | PRIV;
1924                 continue;
1925             }
1926         }
1927         if (next & REGN) {
1928             if (_isRegionSubtag(pSubtag, subtagLen)) {
1929                 *pSep = 0;
1930                 t->region = T_CString_toUpperCase(pSubtag);
1931 
1932                 pLastGoodPosition = pSep;
1933                 next = VART | EXTS | PRIV;
1934                 continue;
1935             }
1936         }
1937         if (next & VART) {
1938             if (_isVariantSubtag(pSubtag, subtagLen) ||
1939                (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1940                 VariantListEntry *var;
1941                 UBool isAdded;
1942 
1943                 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1944                 if (var == NULL) {
1945                     *status = U_MEMORY_ALLOCATION_ERROR;
1946                     goto error;
1947                 }
1948                 *pSep = 0;
1949                 var->variant = T_CString_toUpperCase(pSubtag);
1950                 isAdded = _addVariantToList(&(t->variants), var);
1951                 if (!isAdded) {
1952                     /* duplicated variant entry */
1953                     uprv_free(var);
1954                     break;
1955                 }
1956                 pLastGoodPosition = pSep;
1957                 next = VART | EXTS | PRIV;
1958                 continue;
1959             }
1960         }
1961         if (next & EXTS) {
1962             if (_isExtensionSingleton(pSubtag, subtagLen)) {
1963                 if (pExtension != NULL) {
1964                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1965                         /* the previous extension is incomplete */
1966                         uprv_free(pExtension);
1967                         pExtension = NULL;
1968                         break;
1969                     }
1970 
1971                     /* terminate the previous extension value */
1972                     *pExtValueSubtagEnd = 0;
1973                     pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1974 
1975                     /* insert the extension to the list */
1976                     if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1977                         pLastGoodPosition = pExtValueSubtagEnd;
1978                     } else {
1979                         /* stop parsing here */
1980                         uprv_free(pExtension);
1981                         pExtension = NULL;
1982                         break;
1983                     }
1984                 }
1985 
1986                 /* create a new extension */
1987                 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1988                 if (pExtension == NULL) {
1989                     *status = U_MEMORY_ALLOCATION_ERROR;
1990                     goto error;
1991                 }
1992                 *pSep = 0;
1993                 pExtension->key = T_CString_toLowerCase(pSubtag);
1994                 pExtension->value = NULL;   /* will be set later */
1995 
1996                 /*
1997                  * reset the start and the end location of extension value
1998                  * subtags for this extension
1999                  */
2000                 pExtValueSubtag = NULL;
2001                 pExtValueSubtagEnd = NULL;
2002 
2003                 next = EXTV;
2004                 continue;
2005             }
2006         }
2007         if (next & EXTV) {
2008             if (_isExtensionSubtag(pSubtag, subtagLen)) {
2009                 if (pExtValueSubtag == NULL) {
2010                     /* if the start postion of this extension's value is not yet,
2011                         this one is the first value subtag */
2012                     pExtValueSubtag = pSubtag;
2013                 }
2014 
2015                 /* Mark the end of this subtag */
2016                 pExtValueSubtagEnd = pSep;
2017                 next = EXTS | EXTV | PRIV;
2018 
2019                 continue;
2020             }
2021         }
2022         if (next & PRIV) {
2023             if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2024                 char *pPrivuseVal;
2025 
2026                 if (pExtension != NULL) {
2027                     /* Process the last extension */
2028                     if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2029                         /* the previous extension is incomplete */
2030                         uprv_free(pExtension);
2031                         pExtension = NULL;
2032                         break;
2033                     } else {
2034                         /* terminate the previous extension value */
2035                         *pExtValueSubtagEnd = 0;
2036                         pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2037 
2038                         /* insert the extension to the list */
2039                         if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2040                             pLastGoodPosition = pExtValueSubtagEnd;
2041                             pExtension = NULL;
2042                         } else {
2043                         /* stop parsing here */
2044                             uprv_free(pExtension);
2045                             pExtension = NULL;
2046                             break;
2047                         }
2048                     }
2049                 }
2050 
2051                 /* The rest of part will be private use value subtags */
2052                 if (pNext == NULL) {
2053                     /* empty private use subtag */
2054                     break;
2055                 }
2056                 /* back up the private use value start position */
2057                 pPrivuseVal = pNext;
2058 
2059                 /* validate private use value subtags */
2060                 while (pNext) {
2061                     pSubtag = pNext;
2062                     pSep = pSubtag;
2063                     while (*pSep) {
2064                         if (*pSep == SEP) {
2065                             break;
2066                         }
2067                         pSep++;
2068                     }
2069                     if (*pSep == 0) {
2070                         /* last subtag */
2071                         pNext = NULL;
2072                     } else {
2073                         pNext = pSep + 1;
2074                     }
2075                     subtagLen = (int32_t)(pSep - pSubtag);
2076 
2077                     if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2078                         *pSep = 0;
2079                         next = VART;
2080                         privateuseVar = TRUE;
2081                         break;
2082                     } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2083                         pLastGoodPosition = pSep;
2084                     } else {
2085                         break;
2086                     }
2087                 }
2088 
2089                 if (next == VART) {
2090                     continue;
2091                 }
2092 
2093                 if (pLastGoodPosition - pPrivuseVal > 0) {
2094                     *pLastGoodPosition = 0;
2095                     t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2096                 }
2097                 /* No more subtags, exiting the parse loop */
2098                 break;
2099             }
2100             break;
2101         }
2102 
2103         /* If we fell through here, it means this subtag is illegal - quit parsing */
2104         break;
2105     }
2106 
2107     if (pExtension != NULL) {
2108         /* Process the last extension */
2109         if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2110             /* the previous extension is incomplete */
2111             uprv_free(pExtension);
2112         } else {
2113             /* terminate the previous extension value */
2114             *pExtValueSubtagEnd = 0;
2115             pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2116             /* insert the extension to the list */
2117             if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2118                 pLastGoodPosition = pExtValueSubtagEnd;
2119             } else {
2120                 uprv_free(pExtension);
2121             }
2122         }
2123     }
2124 
2125     if (parsedLen != NULL) {
2126         *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2127     }
2128 
2129     return t;
2130 
2131 error:
2132     uprv_free(t);
2133     return NULL;
2134 }
2135 
2136 static void
ultag_close(ULanguageTag * langtag)2137 ultag_close(ULanguageTag* langtag) {
2138 
2139     if (langtag == NULL) {
2140         return;
2141     }
2142 
2143     uprv_free(langtag->buf);
2144 
2145     if (langtag->variants) {
2146         VariantListEntry *curVar = langtag->variants;
2147         while (curVar) {
2148             VariantListEntry *nextVar = curVar->next;
2149             uprv_free(curVar);
2150             curVar = nextVar;
2151         }
2152     }
2153 
2154     if (langtag->extensions) {
2155         ExtensionListEntry *curExt = langtag->extensions;
2156         while (curExt) {
2157             ExtensionListEntry *nextExt = curExt->next;
2158             uprv_free(curExt);
2159             curExt = nextExt;
2160         }
2161     }
2162 
2163     uprv_free(langtag);
2164 }
2165 
2166 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2167 ultag_getLanguage(const ULanguageTag* langtag) {
2168     return langtag->language;
2169 }
2170 
2171 #if 0
2172 static const char*
2173 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2174     int32_t i;
2175     for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2176         if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2177             return DEPRECATEDLANGS[i + 1];
2178         }
2179     }
2180     return langtag->language;
2181 }
2182 #endif
2183 
2184 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2185 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2186     if (idx >= 0 && idx < MAXEXTLANG) {
2187         return langtag->extlang[idx];
2188     }
2189     return NULL;
2190 }
2191 
2192 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2193 ultag_getExtlangSize(const ULanguageTag* langtag) {
2194     int32_t size = 0;
2195     int32_t i;
2196     for (i = 0; i < MAXEXTLANG; i++) {
2197         if (langtag->extlang[i]) {
2198             size++;
2199         }
2200     }
2201     return size;
2202 }
2203 
2204 static const char*
ultag_getScript(const ULanguageTag * langtag)2205 ultag_getScript(const ULanguageTag* langtag) {
2206     return langtag->script;
2207 }
2208 
2209 static const char*
ultag_getRegion(const ULanguageTag * langtag)2210 ultag_getRegion(const ULanguageTag* langtag) {
2211     return langtag->region;
2212 }
2213 
2214 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2215 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2216     const char *var = NULL;
2217     VariantListEntry *cur = langtag->variants;
2218     int32_t i = 0;
2219     while (cur) {
2220         if (i == idx) {
2221             var = cur->variant;
2222             break;
2223         }
2224         cur = cur->next;
2225         i++;
2226     }
2227     return var;
2228 }
2229 
2230 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2231 ultag_getVariantsSize(const ULanguageTag* langtag) {
2232     int32_t size = 0;
2233     VariantListEntry *cur = langtag->variants;
2234     while (TRUE) {
2235         if (cur == NULL) {
2236             break;
2237         }
2238         size++;
2239         cur = cur->next;
2240     }
2241     return size;
2242 }
2243 
2244 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2245 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2246     const char *key = NULL;
2247     ExtensionListEntry *cur = langtag->extensions;
2248     int32_t i = 0;
2249     while (cur) {
2250         if (i == idx) {
2251             key = cur->key;
2252             break;
2253         }
2254         cur = cur->next;
2255         i++;
2256     }
2257     return key;
2258 }
2259 
2260 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2261 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2262     const char *val = NULL;
2263     ExtensionListEntry *cur = langtag->extensions;
2264     int32_t i = 0;
2265     while (cur) {
2266         if (i == idx) {
2267             val = cur->value;
2268             break;
2269         }
2270         cur = cur->next;
2271         i++;
2272     }
2273     return val;
2274 }
2275 
2276 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2277 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2278     int32_t size = 0;
2279     ExtensionListEntry *cur = langtag->extensions;
2280     while (TRUE) {
2281         if (cur == NULL) {
2282             break;
2283         }
2284         size++;
2285         cur = cur->next;
2286     }
2287     return size;
2288 }
2289 
2290 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2291 ultag_getPrivateUse(const ULanguageTag* langtag) {
2292     return langtag->privateuse;
2293 }
2294 
2295 #if 0
2296 static const char*
2297 ultag_getGrandfathered(const ULanguageTag* langtag) {
2298     return langtag->grandfathered;
2299 }
2300 #endif
2301 
2302 
2303 /*
2304 * -------------------------------------------------
2305 *
2306 * Locale/BCP47 conversion APIs, exposed as uloc_*
2307 *
2308 * -------------------------------------------------
2309 */
2310 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2311 uloc_toLanguageTag(const char* localeID,
2312                    char* langtag,
2313                    int32_t langtagCapacity,
2314                    UBool strict,
2315                    UErrorCode* status) {
2316     /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2317     char canonical[256];
2318     int32_t reslen = 0;
2319     UErrorCode tmpStatus = U_ZERO_ERROR;
2320     UBool hadPosix = FALSE;
2321     const char* pKeywordStart;
2322 
2323     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
2324     canonical[0] = 0;
2325     if (uprv_strlen(localeID) > 0) {
2326         uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2327         if (tmpStatus != U_ZERO_ERROR) {
2328             *status = U_ILLEGAL_ARGUMENT_ERROR;
2329             return 0;
2330         }
2331     }
2332 
2333     /* For handling special case - private use only tag */
2334     pKeywordStart = locale_getKeywordsStart(canonical);
2335     if (pKeywordStart == canonical) {
2336         UEnumeration *kwdEnum;
2337         int kwdCnt = 0;
2338         UBool done = FALSE;
2339 
2340         kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2341         if (kwdEnum != NULL) {
2342             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2343             if (kwdCnt == 1) {
2344                 const char *key;
2345                 int32_t len = 0;
2346 
2347                 key = uenum_next(kwdEnum, &len, &tmpStatus);
2348                 if (len == 1 && *key == PRIVATEUSE) {
2349                     char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2350                     buf[0] = PRIVATEUSE;
2351                     buf[1] = SEP;
2352                     len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2353                     if (U_SUCCESS(tmpStatus)) {
2354                         if (_isPrivateuseValueSubtags(&buf[2], len)) {
2355                             /* return private use only tag */
2356                             reslen = len + 2;
2357                             uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2358                             u_terminateChars(langtag, langtagCapacity, reslen, status);
2359                             done = TRUE;
2360                         } else if (strict) {
2361                             *status = U_ILLEGAL_ARGUMENT_ERROR;
2362                             done = TRUE;
2363                         }
2364                         /* if not strict mode, then "und" will be returned */
2365                     } else {
2366                         *status = U_ILLEGAL_ARGUMENT_ERROR;
2367                         done = TRUE;
2368                     }
2369                 }
2370             }
2371             uenum_close(kwdEnum);
2372             if (done) {
2373                 return reslen;
2374             }
2375         }
2376     }
2377 
2378     reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2379     reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2380     reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2381     reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2382     reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2383     reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2384 
2385     return reslen;
2386 }
2387 
2388 
2389 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2390 uloc_forLanguageTag(const char* langtag,
2391                     char* localeID,
2392                     int32_t localeIDCapacity,
2393                     int32_t* parsedLength,
2394                     UErrorCode* status) {
2395     ULanguageTag *lt;
2396     int32_t reslen = 0;
2397     const char *subtag, *p;
2398     int32_t len;
2399     int32_t i, n;
2400     UBool noRegion = TRUE;
2401 
2402     lt = ultag_parse(langtag, -1, parsedLength, status);
2403     if (U_FAILURE(*status)) {
2404         return 0;
2405     }
2406 
2407     /* language */
2408     subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2409     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2410         len = (int32_t)uprv_strlen(subtag);
2411         if (len > 0) {
2412             if (reslen < localeIDCapacity) {
2413                 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2414             }
2415             reslen += len;
2416         }
2417     }
2418 
2419     /* script */
2420     subtag = ultag_getScript(lt);
2421     len = (int32_t)uprv_strlen(subtag);
2422     if (len > 0) {
2423         if (reslen < localeIDCapacity) {
2424             *(localeID + reslen) = LOCALE_SEP;
2425         }
2426         reslen++;
2427 
2428         /* write out the script in title case */
2429         p = subtag;
2430         while (*p) {
2431             if (reslen < localeIDCapacity) {
2432                 if (p == subtag) {
2433                     *(localeID + reslen) = uprv_toupper(*p);
2434                 } else {
2435                     *(localeID + reslen) = *p;
2436                 }
2437             }
2438             reslen++;
2439             p++;
2440         }
2441     }
2442 
2443     /* region */
2444     subtag = ultag_getRegion(lt);
2445     len = (int32_t)uprv_strlen(subtag);
2446     if (len > 0) {
2447         if (reslen < localeIDCapacity) {
2448             *(localeID + reslen) = LOCALE_SEP;
2449         }
2450         reslen++;
2451         /* write out the retion in upper case */
2452         p = subtag;
2453         while (*p) {
2454             if (reslen < localeIDCapacity) {
2455                 *(localeID + reslen) = uprv_toupper(*p);
2456             }
2457             reslen++;
2458             p++;
2459         }
2460         noRegion = FALSE;
2461     }
2462 
2463     /* variants */
2464     n = ultag_getVariantsSize(lt);
2465     if (n > 0) {
2466         if (noRegion) {
2467             if (reslen < localeIDCapacity) {
2468                 *(localeID + reslen) = LOCALE_SEP;
2469             }
2470             reslen++;
2471         }
2472 
2473         for (i = 0; i < n; i++) {
2474             subtag = ultag_getVariant(lt, i);
2475             if (reslen < localeIDCapacity) {
2476                 *(localeID + reslen) = LOCALE_SEP;
2477             }
2478             reslen++;
2479             /* write out the variant in upper case */
2480             p = subtag;
2481             while (*p) {
2482                 if (reslen < localeIDCapacity) {
2483                     *(localeID + reslen) = uprv_toupper(*p);
2484                 }
2485                 reslen++;
2486                 p++;
2487             }
2488         }
2489     }
2490 
2491     /* keywords */
2492     n = ultag_getExtensionsSize(lt);
2493     subtag = ultag_getPrivateUse(lt);
2494     if (n > 0 || uprv_strlen(subtag) > 0) {
2495         if (reslen == 0 && n > 0) {
2496             /* need a language */
2497             if (reslen < localeIDCapacity) {
2498                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2499             }
2500             reslen += LANG_UND_LEN;
2501         }
2502         len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2503         reslen += len;
2504     }
2505 
2506     ultag_close(lt);
2507     return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2508 }
2509 
2510 
2511