1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18 #include "uassert.h"
19
20 /* struct holding a single variant */
21 typedef struct VariantListEntry {
22 const char *variant;
23 struct VariantListEntry *next;
24 } VariantListEntry;
25
26 /* struct holding a single attribute value */
27 typedef struct AttributeListEntry {
28 const char *attribute;
29 struct AttributeListEntry *next;
30 } AttributeListEntry;
31
32 /* struct holding a single extension */
33 typedef struct ExtensionListEntry {
34 const char *key;
35 const char *value;
36 struct ExtensionListEntry *next;
37 } ExtensionListEntry;
38
39 #define MAXEXTLANG 3
40 typedef struct ULanguageTag {
41 char *buf; /* holding parsed subtags */
42 const char *language;
43 const char *extlang[MAXEXTLANG];
44 const char *script;
45 const char *region;
46 VariantListEntry *variants;
47 ExtensionListEntry *extensions;
48 const char *privateuse;
49 const char *grandfathered;
50 } ULanguageTag;
51
52 #define MINLEN 2
53 #define SEP '-'
54 #define PRIVATEUSE 'x'
55 #define LDMLEXT 'u'
56
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
61
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
64
65 static const char EMPTY[] = "";
66 static const char LANG_UND[] = "und";
67 static const char PRIVATEUSE_KEY[] = "x";
68 static const char _POSIX[] = "_POSIX";
69 static const char POSIX_KEY[] = "va";
70 static const char POSIX_VALUE[] = "posix";
71 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
72 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
73 static const char LOCALE_TYPE_YES[] = "yes";
74
75 #define LANG_UND_LEN 3
76
77 static const char* const GRANDFATHERED[] = {
78 /* grandfathered preferred */
79 "art-lojban", "jbo",
80 "cel-gaulish", "xtg-x-cel-gaulish",
81 "en-GB-oed", "en-GB-x-oed",
82 "i-ami", "ami",
83 "i-bnn", "bnn",
84 "i-default", "en-x-i-default",
85 "i-enochian", "und-x-i-enochian",
86 "i-hak", "hak",
87 "i-klingon", "tlh",
88 "i-lux", "lb",
89 "i-mingo", "see-x-i-mingo",
90 "i-navajo", "nv",
91 "i-pwn", "pwn",
92 "i-tao", "tao",
93 "i-tay", "tay",
94 "i-tsu", "tsu",
95 "no-bok", "nb",
96 "no-nyn", "nn",
97 "sgn-be-fr", "sfb",
98 "sgn-be-nl", "vgt",
99 "sgn-ch-de", "sgg",
100 "zh-guoyu", "cmn",
101 "zh-hakka", "hak",
102 "zh-min", "nan-x-zh-min",
103 "zh-min-nan", "nan",
104 "zh-xiang", "hsn",
105 NULL, NULL
106 };
107
108 static const char DEPRECATEDLANGS[][4] = {
109 /* deprecated new */
110 "iw", "he",
111 "ji", "yi",
112 "in", "id"
113 };
114
115 /*
116 * -------------------------------------------------
117 *
118 * These ultag_ functions may be exposed as APIs later
119 *
120 * -------------------------------------------------
121 */
122
123 static ULanguageTag*
124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
125
126 static void
127 ultag_close(ULanguageTag* langtag);
128
129 static const char*
130 ultag_getLanguage(const ULanguageTag* langtag);
131
132 #if 0
133 static const char*
134 ultag_getJDKLanguage(const ULanguageTag* langtag);
135 #endif
136
137 static const char*
138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
139
140 static int32_t
141 ultag_getExtlangSize(const ULanguageTag* langtag);
142
143 static const char*
144 ultag_getScript(const ULanguageTag* langtag);
145
146 static const char*
147 ultag_getRegion(const ULanguageTag* langtag);
148
149 static const char*
150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
151
152 static int32_t
153 ultag_getVariantsSize(const ULanguageTag* langtag);
154
155 static const char*
156 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
157
158 static const char*
159 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
160
161 static int32_t
162 ultag_getExtensionsSize(const ULanguageTag* langtag);
163
164 static const char*
165 ultag_getPrivateUse(const ULanguageTag* langtag);
166
167 #if 0
168 static const char*
169 ultag_getGrandfathered(const ULanguageTag* langtag);
170 #endif
171
172 /*
173 * -------------------------------------------------
174 *
175 * Language subtag syntax validation functions
176 *
177 * -------------------------------------------------
178 */
179
180 static UBool
_isAlphaString(const char * s,int32_t len)181 _isAlphaString(const char* s, int32_t len) {
182 int32_t i;
183 for (i = 0; i < len; i++) {
184 if (!ISALPHA(*(s + i))) {
185 return FALSE;
186 }
187 }
188 return TRUE;
189 }
190
191 static UBool
_isNumericString(const char * s,int32_t len)192 _isNumericString(const char* s, int32_t len) {
193 int32_t i;
194 for (i = 0; i < len; i++) {
195 if (!ISNUMERIC(*(s + i))) {
196 return FALSE;
197 }
198 }
199 return TRUE;
200 }
201
202 static UBool
_isAlphaNumericString(const char * s,int32_t len)203 _isAlphaNumericString(const char* s, int32_t len) {
204 int32_t i;
205 for (i = 0; i < len; i++) {
206 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
207 return FALSE;
208 }
209 }
210 return TRUE;
211 }
212
213 static UBool
_isLanguageSubtag(const char * s,int32_t len)214 _isLanguageSubtag(const char* s, int32_t len) {
215 /*
216 * language = 2*3ALPHA ; shortest ISO 639 code
217 * ["-" extlang] ; sometimes followed by
218 * ; extended language subtags
219 * / 4ALPHA ; or reserved for future use
220 * / 5*8ALPHA ; or registered language subtag
221 */
222 if (len < 0) {
223 len = (int32_t)uprv_strlen(s);
224 }
225 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
226 return TRUE;
227 }
228 return FALSE;
229 }
230
231 static UBool
_isExtlangSubtag(const char * s,int32_t len)232 _isExtlangSubtag(const char* s, int32_t len) {
233 /*
234 * extlang = 3ALPHA ; selected ISO 639 codes
235 * *2("-" 3ALPHA) ; permanently reserved
236 */
237 if (len < 0) {
238 len = (int32_t)uprv_strlen(s);
239 }
240 if (len == 3 && _isAlphaString(s, len)) {
241 return TRUE;
242 }
243 return FALSE;
244 }
245
246 static UBool
_isScriptSubtag(const char * s,int32_t len)247 _isScriptSubtag(const char* s, int32_t len) {
248 /*
249 * script = 4ALPHA ; ISO 15924 code
250 */
251 if (len < 0) {
252 len = (int32_t)uprv_strlen(s);
253 }
254 if (len == 4 && _isAlphaString(s, len)) {
255 return TRUE;
256 }
257 return FALSE;
258 }
259
260 static UBool
_isRegionSubtag(const char * s,int32_t len)261 _isRegionSubtag(const char* s, int32_t len) {
262 /*
263 * region = 2ALPHA ; ISO 3166-1 code
264 * / 3DIGIT ; UN M.49 code
265 */
266 if (len < 0) {
267 len = (int32_t)uprv_strlen(s);
268 }
269 if (len == 2 && _isAlphaString(s, len)) {
270 return TRUE;
271 }
272 if (len == 3 && _isNumericString(s, len)) {
273 return TRUE;
274 }
275 return FALSE;
276 }
277
278 static UBool
_isVariantSubtag(const char * s,int32_t len)279 _isVariantSubtag(const char* s, int32_t len) {
280 /*
281 * variant = 5*8alphanum ; registered variants
282 * / (DIGIT 3alphanum)
283 */
284 if (len < 0) {
285 len = (int32_t)uprv_strlen(s);
286 }
287 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
288 return TRUE;
289 }
290 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
291 return TRUE;
292 }
293 return FALSE;
294 }
295
296 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)297 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
298 /*
299 * variant = 1*8alphanum ; registered variants
300 * / (DIGIT 3alphanum)
301 */
302 if (len < 0) {
303 len = (int32_t)uprv_strlen(s);
304 }
305 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
306 return TRUE;
307 }
308 return FALSE;
309 }
310
311 static UBool
_isExtensionSingleton(const char * s,int32_t len)312 _isExtensionSingleton(const char* s, int32_t len) {
313 /*
314 * extension = singleton 1*("-" (2*8alphanum))
315 */
316 if (len < 0) {
317 len = (int32_t)uprv_strlen(s);
318 }
319 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
320 return TRUE;
321 }
322 return FALSE;
323 }
324
325 static UBool
_isExtensionSubtag(const char * s,int32_t len)326 _isExtensionSubtag(const char* s, int32_t len) {
327 /*
328 * extension = singleton 1*("-" (2*8alphanum))
329 */
330 if (len < 0) {
331 len = (int32_t)uprv_strlen(s);
332 }
333 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
334 return TRUE;
335 }
336 return FALSE;
337 }
338
339 static UBool
_isExtensionSubtags(const char * s,int32_t len)340 _isExtensionSubtags(const char* s, int32_t len) {
341 const char *p = s;
342 const char *pSubtag = NULL;
343
344 if (len < 0) {
345 len = (int32_t)uprv_strlen(s);
346 }
347
348 while ((p - s) < len) {
349 if (*p == SEP) {
350 if (pSubtag == NULL) {
351 return FALSE;
352 }
353 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
354 return FALSE;
355 }
356 pSubtag = NULL;
357 } else if (pSubtag == NULL) {
358 pSubtag = p;
359 }
360 p++;
361 }
362 if (pSubtag == NULL) {
363 return FALSE;
364 }
365 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
366 }
367
368 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)369 _isPrivateuseValueSubtag(const char* s, int32_t len) {
370 /*
371 * privateuse = "x" 1*("-" (1*8alphanum))
372 */
373 if (len < 0) {
374 len = (int32_t)uprv_strlen(s);
375 }
376 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
377 return TRUE;
378 }
379 return FALSE;
380 }
381
382 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)383 _isPrivateuseValueSubtags(const char* s, int32_t len) {
384 const char *p = s;
385 const char *pSubtag = NULL;
386
387 if (len < 0) {
388 len = (int32_t)uprv_strlen(s);
389 }
390
391 while ((p - s) < len) {
392 if (*p == SEP) {
393 if (pSubtag == NULL) {
394 return FALSE;
395 }
396 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
397 return FALSE;
398 }
399 pSubtag = NULL;
400 } else if (pSubtag == NULL) {
401 pSubtag = p;
402 }
403 p++;
404 }
405 if (pSubtag == NULL) {
406 return FALSE;
407 }
408 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
409 }
410
411 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)412 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
413 if (len < 0) {
414 len = (int32_t)uprv_strlen(s);
415 }
416 if (len == 2 && _isAlphaNumericString(s, len)) {
417 return TRUE;
418 }
419 return FALSE;
420 }
421
422 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)423 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
424 const char* p;
425 int32_t subtagLen = 0;
426
427 if (len < 0) {
428 len = (int32_t)uprv_strlen(s);
429 }
430
431 for (p = s; len > 0; p++, len--) {
432 if (*p == SEP) {
433 if (subtagLen < 3) {
434 return FALSE;
435 }
436 subtagLen = 0;
437 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
438 subtagLen++;
439 if (subtagLen > 8) {
440 return FALSE;
441 }
442 } else {
443 return FALSE;
444 }
445 }
446
447 return (subtagLen >= 3);
448 }
449 /*
450 * -------------------------------------------------
451 *
452 * Helper functions
453 *
454 * -------------------------------------------------
455 */
456
457 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)458 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
459 UBool bAdded = TRUE;
460
461 if (*first == NULL) {
462 var->next = NULL;
463 *first = var;
464 } else {
465 VariantListEntry *prev, *cur;
466 int32_t cmp;
467
468 /* variants order should be preserved */
469 prev = NULL;
470 cur = *first;
471 while (TRUE) {
472 if (cur == NULL) {
473 prev->next = var;
474 var->next = NULL;
475 break;
476 }
477
478 /* Checking for duplicate variant */
479 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
480 if (cmp == 0) {
481 /* duplicated variant */
482 bAdded = FALSE;
483 break;
484 }
485 prev = cur;
486 cur = cur->next;
487 }
488 }
489
490 return bAdded;
491 }
492
493 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)494 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
495 UBool bAdded = TRUE;
496
497 if (*first == NULL) {
498 attr->next = NULL;
499 *first = attr;
500 } else {
501 AttributeListEntry *prev, *cur;
502 int32_t cmp;
503
504 /* reorder variants in alphabetical order */
505 prev = NULL;
506 cur = *first;
507 while (TRUE) {
508 if (cur == NULL) {
509 prev->next = attr;
510 attr->next = NULL;
511 break;
512 }
513 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
514 if (cmp < 0) {
515 if (prev == NULL) {
516 *first = attr;
517 } else {
518 prev->next = attr;
519 }
520 attr->next = cur;
521 break;
522 }
523 if (cmp == 0) {
524 /* duplicated variant */
525 bAdded = FALSE;
526 break;
527 }
528 prev = cur;
529 cur = cur->next;
530 }
531 }
532
533 return bAdded;
534 }
535
536
537 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)538 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
539 UBool bAdded = TRUE;
540
541 if (*first == NULL) {
542 ext->next = NULL;
543 *first = ext;
544 } else {
545 ExtensionListEntry *prev, *cur;
546 int32_t cmp;
547
548 /* reorder variants in alphabetical order */
549 prev = NULL;
550 cur = *first;
551 while (TRUE) {
552 if (cur == NULL) {
553 prev->next = ext;
554 ext->next = NULL;
555 break;
556 }
557 if (localeToBCP) {
558 /* special handling for locale to bcp conversion */
559 int32_t len, curlen;
560
561 len = (int32_t)uprv_strlen(ext->key);
562 curlen = (int32_t)uprv_strlen(cur->key);
563
564 if (len == 1 && curlen == 1) {
565 if (*(ext->key) == *(cur->key)) {
566 cmp = 0;
567 } else if (*(ext->key) == PRIVATEUSE) {
568 cmp = 1;
569 } else if (*(cur->key) == PRIVATEUSE) {
570 cmp = -1;
571 } else {
572 cmp = *(ext->key) - *(cur->key);
573 }
574 } else if (len == 1) {
575 cmp = *(ext->key) - LDMLEXT;
576 } else if (curlen == 1) {
577 cmp = LDMLEXT - *(cur->key);
578 } else {
579 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
580 }
581 } else {
582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583 }
584 if (cmp < 0) {
585 if (prev == NULL) {
586 *first = ext;
587 } else {
588 prev->next = ext;
589 }
590 ext->next = cur;
591 break;
592 }
593 if (cmp == 0) {
594 /* duplicated extension key */
595 bAdded = FALSE;
596 break;
597 }
598 prev = cur;
599 cur = cur->next;
600 }
601 }
602
603 return bAdded;
604 }
605
606 static void
_initializeULanguageTag(ULanguageTag * langtag)607 _initializeULanguageTag(ULanguageTag* langtag) {
608 int32_t i;
609
610 langtag->buf = NULL;
611
612 langtag->language = EMPTY;
613 for (i = 0; i < MAXEXTLANG; i++) {
614 langtag->extlang[i] = NULL;
615 }
616
617 langtag->script = EMPTY;
618 langtag->region = EMPTY;
619
620 langtag->variants = NULL;
621 langtag->extensions = NULL;
622
623 langtag->grandfathered = EMPTY;
624 langtag->privateuse = EMPTY;
625 }
626
627 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)628 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
629 char buf[ULOC_LANG_CAPACITY];
630 UErrorCode tmpStatus = U_ZERO_ERROR;
631 int32_t len, i;
632 int32_t reslen = 0;
633
634 if (U_FAILURE(*status)) {
635 return 0;
636 }
637
638 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
639 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
640 if (strict) {
641 *status = U_ILLEGAL_ARGUMENT_ERROR;
642 return 0;
643 }
644 len = 0;
645 }
646
647 /* Note: returned language code is in lower case letters */
648
649 if (len == 0) {
650 if (reslen < capacity) {
651 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
652 }
653 reslen += LANG_UND_LEN;
654 } else if (!_isLanguageSubtag(buf, len)) {
655 /* invalid language code */
656 if (strict) {
657 *status = U_ILLEGAL_ARGUMENT_ERROR;
658 return 0;
659 }
660 if (reslen < capacity) {
661 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
662 }
663 reslen += LANG_UND_LEN;
664 } else {
665 /* resolve deprecated */
666 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
667 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
668 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
669 len = (int32_t)uprv_strlen(buf);
670 break;
671 }
672 }
673 if (reslen < capacity) {
674 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
675 }
676 reslen += len;
677 }
678 u_terminateChars(appendAt, capacity, reslen, status);
679 return reslen;
680 }
681
682 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)683 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
684 char buf[ULOC_SCRIPT_CAPACITY];
685 UErrorCode tmpStatus = U_ZERO_ERROR;
686 int32_t len;
687 int32_t reslen = 0;
688
689 if (U_FAILURE(*status)) {
690 return 0;
691 }
692
693 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
694 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
695 if (strict) {
696 *status = U_ILLEGAL_ARGUMENT_ERROR;
697 }
698 return 0;
699 }
700
701 if (len > 0) {
702 if (!_isScriptSubtag(buf, len)) {
703 /* invalid script code */
704 if (strict) {
705 *status = U_ILLEGAL_ARGUMENT_ERROR;
706 }
707 return 0;
708 } else {
709 if (reslen < capacity) {
710 *(appendAt + reslen) = SEP;
711 }
712 reslen++;
713
714 if (reslen < capacity) {
715 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
716 }
717 reslen += len;
718 }
719 }
720 u_terminateChars(appendAt, capacity, reslen, status);
721 return reslen;
722 }
723
724 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)725 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
726 char buf[ULOC_COUNTRY_CAPACITY];
727 UErrorCode tmpStatus = U_ZERO_ERROR;
728 int32_t len;
729 int32_t reslen = 0;
730
731 if (U_FAILURE(*status)) {
732 return 0;
733 }
734
735 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
736 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
737 if (strict) {
738 *status = U_ILLEGAL_ARGUMENT_ERROR;
739 }
740 return 0;
741 }
742
743 if (len > 0) {
744 if (!_isRegionSubtag(buf, len)) {
745 /* invalid region code */
746 if (strict) {
747 *status = U_ILLEGAL_ARGUMENT_ERROR;
748 }
749 return 0;
750 } else {
751 if (reslen < capacity) {
752 *(appendAt + reslen) = SEP;
753 }
754 reslen++;
755
756 if (reslen < capacity) {
757 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
758 }
759 reslen += len;
760 }
761 }
762 u_terminateChars(appendAt, capacity, reslen, status);
763 return reslen;
764 }
765
766 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)767 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
768 char buf[ULOC_FULLNAME_CAPACITY];
769 UErrorCode tmpStatus = U_ZERO_ERROR;
770 int32_t len, i;
771 int32_t reslen = 0;
772
773 if (U_FAILURE(*status)) {
774 return 0;
775 }
776
777 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
778 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
779 if (strict) {
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
781 }
782 return 0;
783 }
784
785 if (len > 0) {
786 char *p, *pVar;
787 UBool bNext = TRUE;
788 VariantListEntry *var;
789 VariantListEntry *varFirst = NULL;
790
791 pVar = NULL;
792 p = buf;
793 while (bNext) {
794 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
795 if (*p == 0) {
796 bNext = FALSE;
797 } else {
798 *p = 0; /* terminate */
799 }
800 if (pVar == NULL) {
801 if (strict) {
802 *status = U_ILLEGAL_ARGUMENT_ERROR;
803 break;
804 }
805 /* ignore empty variant */
806 } else {
807 /* ICU uses upper case letters for variants, but
808 the canonical format is lowercase in BCP47 */
809 for (i = 0; *(pVar + i) != 0; i++) {
810 *(pVar + i) = uprv_tolower(*(pVar + i));
811 }
812
813 /* validate */
814 if (_isVariantSubtag(pVar, -1)) {
815 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
816 /* emit the variant to the list */
817 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
818 if (var == NULL) {
819 *status = U_MEMORY_ALLOCATION_ERROR;
820 break;
821 }
822 var->variant = pVar;
823 if (!_addVariantToList(&varFirst, var)) {
824 /* duplicated variant */
825 uprv_free(var);
826 if (strict) {
827 *status = U_ILLEGAL_ARGUMENT_ERROR;
828 break;
829 }
830 }
831 } else {
832 /* Special handling for POSIX variant, need to remember that we had it and then */
833 /* treat it like an extension later. */
834 *hadPosix = TRUE;
835 }
836 } else if (strict) {
837 *status = U_ILLEGAL_ARGUMENT_ERROR;
838 break;
839 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
840 /* Handle private use subtags separately */
841 break;
842 }
843 }
844 /* reset variant starting position */
845 pVar = NULL;
846 } else if (pVar == NULL) {
847 pVar = p;
848 }
849 p++;
850 }
851
852 if (U_SUCCESS(*status)) {
853 if (varFirst != NULL) {
854 int32_t varLen;
855
856 /* write out validated/normalized variants to the target */
857 var = varFirst;
858 while (var != NULL) {
859 if (reslen < capacity) {
860 *(appendAt + reslen) = SEP;
861 }
862 reslen++;
863 varLen = (int32_t)uprv_strlen(var->variant);
864 if (reslen < capacity) {
865 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
866 }
867 reslen += varLen;
868 var = var->next;
869 }
870 }
871 }
872
873 /* clean up */
874 var = varFirst;
875 while (var != NULL) {
876 VariantListEntry *tmpVar = var->next;
877 uprv_free(var);
878 var = tmpVar;
879 }
880
881 if (U_FAILURE(*status)) {
882 return 0;
883 }
884 }
885
886 u_terminateChars(appendAt, capacity, reslen, status);
887 return reslen;
888 }
889
890 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)891 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
892 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
893 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
894 int32_t attrBufLength = 0;
895 UBool isAttribute = FALSE;
896 UEnumeration *keywordEnum = NULL;
897 int32_t reslen = 0;
898
899 keywordEnum = uloc_openKeywords(localeID, status);
900 if (U_FAILURE(*status) && !hadPosix) {
901 uenum_close(keywordEnum);
902 return 0;
903 }
904 if (keywordEnum != NULL || hadPosix) {
905 /* reorder extensions */
906 int32_t len;
907 const char *key;
908 ExtensionListEntry *firstExt = NULL;
909 ExtensionListEntry *ext;
910 AttributeListEntry *firstAttr = NULL;
911 AttributeListEntry *attr;
912 char *attrValue;
913 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
914 char *pExtBuf = extBuf;
915 int32_t extBufCapacity = sizeof(extBuf);
916 const char *bcpKey, *bcpValue;
917 UErrorCode tmpStatus = U_ZERO_ERROR;
918 int32_t keylen;
919 UBool isBcpUExt;
920
921 while (TRUE) {
922 isAttribute = FALSE;
923 key = uenum_next(keywordEnum, NULL, status);
924 if (key == NULL) {
925 break;
926 }
927 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
928 /* buf must be null-terminated */
929 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
930 if (strict) {
931 *status = U_ILLEGAL_ARGUMENT_ERROR;
932 break;
933 }
934 /* ignore this keyword */
935 tmpStatus = U_ZERO_ERROR;
936 continue;
937 }
938
939 keylen = (int32_t)uprv_strlen(key);
940 isBcpUExt = (keylen > 1);
941
942 /* special keyword used for representing Unicode locale attributes */
943 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
944 isAttribute = TRUE;
945 if (len > 0) {
946 int32_t i = 0;
947 while (TRUE) {
948 attrBufLength = 0;
949 for (; i < len; i++) {
950 if (buf[i] != '-') {
951 attrBuf[attrBufLength++] = buf[i];
952 } else {
953 i++;
954 break;
955 }
956 }
957 if (attrBufLength > 0) {
958 attrBuf[attrBufLength] = 0;
959
960 } else if (i >= len){
961 break;
962 }
963
964 /* create AttributeListEntry */
965 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
966 if (attr == NULL) {
967 *status = U_MEMORY_ALLOCATION_ERROR;
968 break;
969 }
970 attrValue = (char*)uprv_malloc(attrBufLength + 1);
971 if (attrValue == NULL) {
972 *status = U_MEMORY_ALLOCATION_ERROR;
973 break;
974 }
975 uprv_strcpy(attrValue, attrBuf);
976 attr->attribute = attrValue;
977
978 if (!_addAttributeToList(&firstAttr, attr)) {
979 uprv_free(attr);
980 uprv_free(attrValue);
981 if (strict) {
982 *status = U_ILLEGAL_ARGUMENT_ERROR;
983 break;
984 }
985 }
986 }
987 }
988 } else if (isBcpUExt) {
989 bcpKey = uloc_toUnicodeLocaleKey(key);
990 if (bcpKey == NULL) {
991 if (strict) {
992 *status = U_ILLEGAL_ARGUMENT_ERROR;
993 break;
994 }
995 continue;
996 }
997
998 /* we've checked buf is null-terminated above */
999 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1000 if (bcpValue == NULL) {
1001 if (strict) {
1002 *status = U_ILLEGAL_ARGUMENT_ERROR;
1003 break;
1004 }
1005 continue;
1006 }
1007 if (bcpValue == buf) {
1008 /*
1009 When uloc_toUnicodeLocaleType(key, buf) returns the
1010 input value as is, the value is well-formed, but has
1011 no known mapping. This implementation normalizes the
1012 the value to lower case
1013 */
1014 int32_t bcpValueLen = uprv_strlen(bcpValue);
1015 if (bcpValueLen < extBufCapacity) {
1016 uprv_strcpy(pExtBuf, bcpValue);
1017 T_CString_toLowerCase(pExtBuf);
1018
1019 bcpValue = pExtBuf;
1020
1021 pExtBuf += (bcpValueLen + 1);
1022 extBufCapacity -= (bcpValueLen + 1);
1023 } else {
1024 if (strict) {
1025 *status = U_ILLEGAL_ARGUMENT_ERROR;
1026 break;
1027 }
1028 continue;
1029 }
1030 }
1031 } else {
1032 if (*key == PRIVATEUSE) {
1033 if (!_isPrivateuseValueSubtags(buf, len)) {
1034 if (strict) {
1035 *status = U_ILLEGAL_ARGUMENT_ERROR;
1036 break;
1037 }
1038 continue;
1039 }
1040 } else {
1041 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1042 if (strict) {
1043 *status = U_ILLEGAL_ARGUMENT_ERROR;
1044 break;
1045 }
1046 continue;
1047 }
1048 }
1049 bcpKey = key;
1050 if ((len + 1) < extBufCapacity) {
1051 uprv_memcpy(pExtBuf, buf, len);
1052 bcpValue = pExtBuf;
1053
1054 pExtBuf += len;
1055
1056 *pExtBuf = 0;
1057 pExtBuf++;
1058
1059 extBufCapacity -= (len + 1);
1060 } else {
1061 *status = U_ILLEGAL_ARGUMENT_ERROR;
1062 break;
1063 }
1064 }
1065
1066 if (!isAttribute) {
1067 /* create ExtensionListEntry */
1068 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1069 if (ext == NULL) {
1070 *status = U_MEMORY_ALLOCATION_ERROR;
1071 break;
1072 }
1073 ext->key = bcpKey;
1074 ext->value = bcpValue;
1075
1076 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1077 uprv_free(ext);
1078 if (strict) {
1079 *status = U_ILLEGAL_ARGUMENT_ERROR;
1080 break;
1081 }
1082 }
1083 }
1084 }
1085
1086 /* Special handling for POSIX variant - add the keywords for POSIX */
1087 if (hadPosix) {
1088 /* create ExtensionListEntry for POSIX */
1089 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1090 if (ext == NULL) {
1091 *status = U_MEMORY_ALLOCATION_ERROR;
1092 goto cleanup;
1093 }
1094 ext->key = POSIX_KEY;
1095 ext->value = POSIX_VALUE;
1096
1097 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1098 uprv_free(ext);
1099 }
1100 }
1101
1102 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1103 UBool startLDMLExtension = FALSE;
1104
1105 attr = firstAttr;
1106 ext = firstExt;
1107 do {
1108 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1109 /* write LDML singleton extension */
1110 if (reslen < capacity) {
1111 *(appendAt + reslen) = SEP;
1112 }
1113 reslen++;
1114 if (reslen < capacity) {
1115 *(appendAt + reslen) = LDMLEXT;
1116 }
1117 reslen++;
1118
1119 startLDMLExtension = TRUE;
1120 }
1121
1122 /* write out the sorted BCP47 attributes, extensions and private use */
1123 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1124 if (reslen < capacity) {
1125 *(appendAt + reslen) = SEP;
1126 }
1127 reslen++;
1128 len = (int32_t)uprv_strlen(ext->key);
1129 if (reslen < capacity) {
1130 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1131 }
1132 reslen += len;
1133 if (reslen < capacity) {
1134 *(appendAt + reslen) = SEP;
1135 }
1136 reslen++;
1137 len = (int32_t)uprv_strlen(ext->value);
1138 if (reslen < capacity) {
1139 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1140 }
1141 reslen += len;
1142
1143 ext = ext->next;
1144 } else if (attr) {
1145 /* write the value for the attributes */
1146 if (reslen < capacity) {
1147 *(appendAt + reslen) = SEP;
1148 }
1149 reslen++;
1150 len = (int32_t)uprv_strlen(attr->attribute);
1151 if (reslen < capacity) {
1152 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1153 }
1154 reslen += len;
1155
1156 attr = attr->next;
1157 }
1158 } while (attr != NULL || ext != NULL);
1159 }
1160 cleanup:
1161 /* clean up */
1162 ext = firstExt;
1163 while (ext != NULL) {
1164 ExtensionListEntry *tmpExt = ext->next;
1165 uprv_free(ext);
1166 ext = tmpExt;
1167 }
1168
1169 attr = firstAttr;
1170 while (attr != NULL) {
1171 AttributeListEntry *tmpAttr = attr->next;
1172 char *pValue = (char *)attr->attribute;
1173 uprv_free(pValue);
1174 uprv_free(attr);
1175 attr = tmpAttr;
1176 }
1177
1178 uenum_close(keywordEnum);
1179
1180 if (U_FAILURE(*status)) {
1181 return 0;
1182 }
1183 }
1184
1185 return u_terminateChars(appendAt, capacity, reslen, status);
1186 }
1187
1188 /**
1189 * Append keywords parsed from LDML extension value
1190 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1191 * Note: char* buf is used for storing keywords
1192 */
1193 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1194 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1195 const char *pTag; /* beginning of current subtag */
1196 const char *pKwds; /* beginning of key-type pairs */
1197 UBool variantExists = *posixVariant;
1198
1199 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1200 ExtensionListEntry *kwd, *nextKwd;
1201
1202 AttributeListEntry *attrFirst = NULL; /* first attribute */
1203 AttributeListEntry *attr, *nextAttr;
1204
1205 int32_t len;
1206 int32_t bufIdx = 0;
1207
1208 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1209 int32_t attrBufIdx = 0;
1210
1211 /* Reset the posixVariant value */
1212 *posixVariant = FALSE;
1213
1214 pTag = ldmlext;
1215 pKwds = NULL;
1216
1217 /* Iterate through u extension attributes */
1218 while (*pTag) {
1219 /* locate next separator char */
1220 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1221
1222 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1223 pKwds = pTag;
1224 break;
1225 }
1226
1227 /* add this attribute to the list */
1228 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1229 if (attr == NULL) {
1230 *status = U_MEMORY_ALLOCATION_ERROR;
1231 goto cleanup;
1232 }
1233
1234 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1235 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1236 attrBuf[attrBufIdx + len] = 0;
1237 attr->attribute = &attrBuf[attrBufIdx];
1238 attrBufIdx += (len + 1);
1239 } else {
1240 *status = U_ILLEGAL_ARGUMENT_ERROR;
1241 goto cleanup;
1242 }
1243
1244 if (!_addAttributeToList(&attrFirst, attr)) {
1245 *status = U_ILLEGAL_ARGUMENT_ERROR;
1246 uprv_free(attr);
1247 goto cleanup;
1248 }
1249
1250 /* next tag */
1251 pTag += len;
1252 if (*pTag) {
1253 /* next to the separator */
1254 pTag++;
1255 }
1256 }
1257
1258 if (attrFirst) {
1259 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1260
1261 if (attrBufIdx > bufSize) {
1262 /* attrBufIdx == <total length of attribute subtag> + 1 */
1263 *status = U_ILLEGAL_ARGUMENT_ERROR;
1264 goto cleanup;
1265 }
1266
1267 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1268 if (kwd == NULL) {
1269 *status = U_MEMORY_ALLOCATION_ERROR;
1270 goto cleanup;
1271 }
1272
1273 kwd->key = LOCALE_ATTRIBUTE_KEY;
1274 kwd->value = buf;
1275
1276 /* attribute subtags sorted in alphabetical order as type */
1277 attr = attrFirst;
1278 while (attr != NULL) {
1279 nextAttr = attr->next;
1280
1281 /* buffer size check is done above */
1282 if (attr != attrFirst) {
1283 *(buf + bufIdx) = SEP;
1284 bufIdx++;
1285 }
1286
1287 len = uprv_strlen(attr->attribute);
1288 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1289 bufIdx += len;
1290
1291 attr = nextAttr;
1292 }
1293 *(buf + bufIdx) = 0;
1294 bufIdx++;
1295
1296 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1297 *status = U_ILLEGAL_ARGUMENT_ERROR;
1298 uprv_free(kwd);
1299 goto cleanup;
1300 }
1301
1302 /* once keyword entry is created, delete the attribute list */
1303 attr = attrFirst;
1304 while (attr != NULL) {
1305 nextAttr = attr->next;
1306 uprv_free(attr);
1307 attr = nextAttr;
1308 }
1309 attrFirst = NULL;
1310 }
1311
1312 if (pKwds) {
1313 const char *pBcpKey = NULL; /* u extenstion key subtag */
1314 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1315 int32_t bcpKeyLen = 0;
1316 int32_t bcpTypeLen = 0;
1317 UBool isDone = FALSE;
1318
1319 pTag = pKwds;
1320 /* BCP47 representation of LDML key/type pairs */
1321 while (!isDone) {
1322 const char *pNextBcpKey = NULL;
1323 int32_t nextBcpKeyLen = 0;
1324 UBool emitKeyword = FALSE;
1325
1326 if (*pTag) {
1327 /* locate next separator char */
1328 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1329
1330 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1331 if (pBcpKey) {
1332 emitKeyword = TRUE;
1333 pNextBcpKey = pTag;
1334 nextBcpKeyLen = len;
1335 } else {
1336 pBcpKey = pTag;
1337 bcpKeyLen = len;
1338 }
1339 } else {
1340 U_ASSERT(pBcpKey != NULL);
1341 /* within LDML type subtags */
1342 if (pBcpType) {
1343 bcpTypeLen += (len + 1);
1344 } else {
1345 pBcpType = pTag;
1346 bcpTypeLen = len;
1347 }
1348 }
1349
1350 /* next tag */
1351 pTag += len;
1352 if (*pTag) {
1353 /* next to the separator */
1354 pTag++;
1355 }
1356 } else {
1357 /* processing last one */
1358 emitKeyword = TRUE;
1359 isDone = TRUE;
1360 }
1361
1362 if (emitKeyword) {
1363 const char *pKey = NULL; /* LDML key */
1364 const char *pType = NULL; /* LDML type */
1365
1366 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1367
1368 U_ASSERT(pBcpKey != NULL);
1369
1370 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1371 /* the BCP key is invalid */
1372 *status = U_ILLEGAL_ARGUMENT_ERROR;
1373 goto cleanup;
1374 }
1375
1376 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1377 bcpKeyBuf[bcpKeyLen] = 0;
1378
1379 /* u extension key to LDML key */
1380 pKey = uloc_toLegacyKey(bcpKeyBuf);
1381 if (pKey == NULL) {
1382 *status = U_ILLEGAL_ARGUMENT_ERROR;
1383 goto cleanup;
1384 }
1385 if (pKey == bcpKeyBuf) {
1386 /*
1387 The key returned by toLegacyKey points to the input buffer.
1388 We normalize the result key to lower case.
1389 */
1390 T_CString_toLowerCase(bcpKeyBuf);
1391 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1392 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1393 pKey = buf + bufIdx;
1394 bufIdx += bcpKeyLen;
1395 *(buf + bufIdx) = 0;
1396 bufIdx++;
1397 } else {
1398 *status = U_BUFFER_OVERFLOW_ERROR;
1399 goto cleanup;
1400 }
1401 }
1402
1403 if (pBcpType) {
1404 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
1405 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1406 /* the BCP type is too long */
1407 *status = U_ILLEGAL_ARGUMENT_ERROR;
1408 goto cleanup;
1409 }
1410
1411 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1412 bcpTypeBuf[bcpTypeLen] = 0;
1413
1414 /* BCP type to locale type */
1415 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1416 if (pType == NULL) {
1417 *status = U_ILLEGAL_ARGUMENT_ERROR;
1418 goto cleanup;
1419 }
1420 if (pType == bcpTypeBuf) {
1421 /*
1422 The type returned by toLegacyType points to the input buffer.
1423 We normalize the result type to lower case.
1424 */
1425 /* normalize to lower case */
1426 T_CString_toLowerCase(bcpTypeBuf);
1427 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1428 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1429 pType = buf + bufIdx;
1430 bufIdx += bcpTypeLen;
1431 *(buf + bufIdx) = 0;
1432 bufIdx++;
1433 } else {
1434 *status = U_BUFFER_OVERFLOW_ERROR;
1435 goto cleanup;
1436 }
1437 }
1438 } else {
1439 /* typeless - default type value is "yes" */
1440 pType = LOCALE_TYPE_YES;
1441 }
1442
1443 /* Special handling for u-va-posix, since we want to treat this as a variant,
1444 not as a keyword */
1445 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1446 *posixVariant = TRUE;
1447 } else {
1448 /* create an ExtensionListEntry for this keyword */
1449 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1450 if (kwd == NULL) {
1451 *status = U_MEMORY_ALLOCATION_ERROR;
1452 goto cleanup;
1453 }
1454
1455 kwd->key = pKey;
1456 kwd->value = pType;
1457
1458 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1459 *status = U_ILLEGAL_ARGUMENT_ERROR;
1460 uprv_free(kwd);
1461 goto cleanup;
1462 }
1463 }
1464
1465 pBcpKey = pNextBcpKey;
1466 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1467 pBcpType = NULL;
1468 bcpTypeLen = 0;
1469 }
1470 }
1471 }
1472
1473 kwd = kwdFirst;
1474 while (kwd != NULL) {
1475 nextKwd = kwd->next;
1476 _addExtensionToList(appendTo, kwd, FALSE);
1477 kwd = nextKwd;
1478 }
1479
1480 return;
1481
1482 cleanup:
1483 attr = attrFirst;
1484 while (attr != NULL) {
1485 nextAttr = attr->next;
1486 uprv_free(attr);
1487 attr = nextAttr;
1488 }
1489
1490 kwd = kwdFirst;
1491 while (kwd != NULL) {
1492 nextKwd = kwd->next;
1493 uprv_free(kwd);
1494 kwd = nextKwd;
1495 }
1496 }
1497
1498
1499 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1500 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1501 int32_t reslen = 0;
1502 int32_t i, n;
1503 int32_t len;
1504 ExtensionListEntry *kwdFirst = NULL;
1505 ExtensionListEntry *kwd;
1506 const char *key, *type;
1507 char *kwdBuf = NULL;
1508 int32_t kwdBufLength = capacity;
1509 UBool posixVariant = FALSE;
1510
1511 if (U_FAILURE(*status)) {
1512 return 0;
1513 }
1514
1515 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1516 if (kwdBuf == NULL) {
1517 *status = U_MEMORY_ALLOCATION_ERROR;
1518 return 0;
1519 }
1520
1521 /* Determine if variants already exists */
1522 if (ultag_getVariantsSize(langtag)) {
1523 posixVariant = TRUE;
1524 }
1525
1526 n = ultag_getExtensionsSize(langtag);
1527
1528 /* resolve locale keywords and reordering keys */
1529 for (i = 0; i < n; i++) {
1530 key = ultag_getExtensionKey(langtag, i);
1531 type = ultag_getExtensionValue(langtag, i);
1532 if (*key == LDMLEXT) {
1533 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1534 if (U_FAILURE(*status)) {
1535 break;
1536 }
1537 } else {
1538 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1539 if (kwd == NULL) {
1540 *status = U_MEMORY_ALLOCATION_ERROR;
1541 break;
1542 }
1543 kwd->key = key;
1544 kwd->value = type;
1545 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1546 uprv_free(kwd);
1547 *status = U_ILLEGAL_ARGUMENT_ERROR;
1548 break;
1549 }
1550 }
1551 }
1552
1553 if (U_SUCCESS(*status)) {
1554 type = ultag_getPrivateUse(langtag);
1555 if ((int32_t)uprv_strlen(type) > 0) {
1556 /* add private use as a keyword */
1557 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1558 if (kwd == NULL) {
1559 *status = U_MEMORY_ALLOCATION_ERROR;
1560 } else {
1561 kwd->key = PRIVATEUSE_KEY;
1562 kwd->value = type;
1563 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1564 uprv_free(kwd);
1565 *status = U_ILLEGAL_ARGUMENT_ERROR;
1566 }
1567 }
1568 }
1569 }
1570
1571 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1572
1573 if (U_SUCCESS(*status) && posixVariant) {
1574 len = (int32_t) uprv_strlen(_POSIX);
1575 if (reslen < capacity) {
1576 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1577 }
1578 reslen += len;
1579 }
1580
1581 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1582 /* write out the sorted keywords */
1583 UBool firstValue = TRUE;
1584 kwd = kwdFirst;
1585 do {
1586 if (reslen < capacity) {
1587 if (firstValue) {
1588 /* '@' */
1589 *(appendAt + reslen) = LOCALE_EXT_SEP;
1590 firstValue = FALSE;
1591 } else {
1592 /* ';' */
1593 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1594 }
1595 }
1596 reslen++;
1597
1598 /* key */
1599 len = (int32_t)uprv_strlen(kwd->key);
1600 if (reslen < capacity) {
1601 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1602 }
1603 reslen += len;
1604
1605 /* '=' */
1606 if (reslen < capacity) {
1607 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1608 }
1609 reslen++;
1610
1611 /* type */
1612 len = (int32_t)uprv_strlen(kwd->value);
1613 if (reslen < capacity) {
1614 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1615 }
1616 reslen += len;
1617
1618 kwd = kwd->next;
1619 } while (kwd);
1620 }
1621
1622 /* clean up */
1623 kwd = kwdFirst;
1624 while (kwd != NULL) {
1625 ExtensionListEntry *tmpKwd = kwd->next;
1626 uprv_free(kwd);
1627 kwd = tmpKwd;
1628 }
1629
1630 uprv_free(kwdBuf);
1631
1632 if (U_FAILURE(*status)) {
1633 return 0;
1634 }
1635
1636 return u_terminateChars(appendAt, capacity, reslen, status);
1637 }
1638
1639 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1640 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1641 char buf[ULOC_FULLNAME_CAPACITY];
1642 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1643 UErrorCode tmpStatus = U_ZERO_ERROR;
1644 int32_t len, i;
1645 int32_t reslen = 0;
1646
1647 if (U_FAILURE(*status)) {
1648 return 0;
1649 }
1650
1651 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1652 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1653 if (strict) {
1654 *status = U_ILLEGAL_ARGUMENT_ERROR;
1655 }
1656 return 0;
1657 }
1658
1659 if (len > 0) {
1660 char *p, *pPriv;
1661 UBool bNext = TRUE;
1662 UBool firstValue = TRUE;
1663 UBool writeValue;
1664
1665 pPriv = NULL;
1666 p = buf;
1667 while (bNext) {
1668 writeValue = FALSE;
1669 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1670 if (*p == 0) {
1671 bNext = FALSE;
1672 } else {
1673 *p = 0; /* terminate */
1674 }
1675 if (pPriv != NULL) {
1676 /* Private use in the canonical format is lowercase in BCP47 */
1677 for (i = 0; *(pPriv + i) != 0; i++) {
1678 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1679 }
1680
1681 /* validate */
1682 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1683 if (firstValue) {
1684 if (!_isVariantSubtag(pPriv, -1)) {
1685 writeValue = TRUE;
1686 }
1687 } else {
1688 writeValue = TRUE;
1689 }
1690 } else if (strict) {
1691 *status = U_ILLEGAL_ARGUMENT_ERROR;
1692 break;
1693 } else {
1694 break;
1695 }
1696
1697 if (writeValue) {
1698 if (reslen < capacity) {
1699 tmpAppend[reslen++] = SEP;
1700 }
1701
1702 if (firstValue) {
1703 if (reslen < capacity) {
1704 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1705 }
1706
1707 if (reslen < capacity) {
1708 tmpAppend[reslen++] = SEP;
1709 }
1710
1711 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1712 if (reslen < capacity) {
1713 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1714 }
1715 reslen += len;
1716
1717 if (reslen < capacity) {
1718 tmpAppend[reslen++] = SEP;
1719 }
1720
1721 firstValue = FALSE;
1722 }
1723
1724 len = (int32_t)uprv_strlen(pPriv);
1725 if (reslen < capacity) {
1726 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1727 }
1728 reslen += len;
1729 }
1730 }
1731 /* reset private use starting position */
1732 pPriv = NULL;
1733 } else if (pPriv == NULL) {
1734 pPriv = p;
1735 }
1736 p++;
1737 }
1738
1739 if (U_FAILURE(*status)) {
1740 return 0;
1741 }
1742 }
1743
1744 if (U_SUCCESS(*status)) {
1745 len = reslen;
1746 if (reslen < capacity) {
1747 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1748 }
1749 }
1750
1751 u_terminateChars(appendAt, capacity, reslen, status);
1752
1753 return reslen;
1754 }
1755
1756 /*
1757 * -------------------------------------------------
1758 *
1759 * ultag_ functions
1760 *
1761 * -------------------------------------------------
1762 */
1763
1764 /* Bit flags used by the parser */
1765 #define LANG 0x0001
1766 #define EXTL 0x0002
1767 #define SCRT 0x0004
1768 #define REGN 0x0008
1769 #define VART 0x0010
1770 #define EXTS 0x0020
1771 #define EXTV 0x0040
1772 #define PRIV 0x0080
1773
1774 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)1775 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1776 ULanguageTag *t;
1777 char *tagBuf;
1778 int16_t next;
1779 char *pSubtag, *pNext, *pLastGoodPosition;
1780 int32_t subtagLen;
1781 int32_t extlangIdx;
1782 ExtensionListEntry *pExtension;
1783 char *pExtValueSubtag, *pExtValueSubtagEnd;
1784 int32_t i;
1785 UBool privateuseVar = FALSE;
1786 int32_t grandfatheredLen = 0;
1787
1788 if (parsedLen != NULL) {
1789 *parsedLen = 0;
1790 }
1791
1792 if (U_FAILURE(*status)) {
1793 return NULL;
1794 }
1795
1796 if (tagLen < 0) {
1797 tagLen = (int32_t)uprv_strlen(tag);
1798 }
1799
1800 /* copy the entire string */
1801 tagBuf = (char*)uprv_malloc(tagLen + 1);
1802 if (tagBuf == NULL) {
1803 *status = U_MEMORY_ALLOCATION_ERROR;
1804 return NULL;
1805 }
1806 uprv_memcpy(tagBuf, tag, tagLen);
1807 *(tagBuf + tagLen) = 0;
1808
1809 /* create a ULanguageTag */
1810 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1811 if (t == NULL) {
1812 uprv_free(tagBuf);
1813 *status = U_MEMORY_ALLOCATION_ERROR;
1814 return NULL;
1815 }
1816 _initializeULanguageTag(t);
1817 t->buf = tagBuf;
1818
1819 if (tagLen < MINLEN) {
1820 /* the input tag is too short - return empty ULanguageTag */
1821 return t;
1822 }
1823
1824 /* check if the tag is grandfathered */
1825 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1826 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1827 int32_t newTagLength;
1828
1829 grandfatheredLen = tagLen; /* back up for output parsedLen */
1830 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1831 if (tagLen < newTagLength) {
1832 uprv_free(tagBuf);
1833 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1834 if (tagBuf == NULL) {
1835 *status = U_MEMORY_ALLOCATION_ERROR;
1836 ultag_close(t);
1837 return NULL;
1838 }
1839 t->buf = tagBuf;
1840 tagLen = newTagLength;
1841 }
1842 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1843 break;
1844 }
1845 }
1846
1847 /*
1848 * langtag = language
1849 * ["-" script]
1850 * ["-" region]
1851 * *("-" variant)
1852 * *("-" extension)
1853 * ["-" privateuse]
1854 */
1855
1856 next = LANG | PRIV;
1857 pNext = pLastGoodPosition = tagBuf;
1858 extlangIdx = 0;
1859 pExtension = NULL;
1860 pExtValueSubtag = NULL;
1861 pExtValueSubtagEnd = NULL;
1862
1863 while (pNext) {
1864 char *pSep;
1865
1866 pSubtag = pNext;
1867
1868 /* locate next separator char */
1869 pSep = pSubtag;
1870 while (*pSep) {
1871 if (*pSep == SEP) {
1872 break;
1873 }
1874 pSep++;
1875 }
1876 if (*pSep == 0) {
1877 /* last subtag */
1878 pNext = NULL;
1879 } else {
1880 pNext = pSep + 1;
1881 }
1882 subtagLen = (int32_t)(pSep - pSubtag);
1883
1884 if (next & LANG) {
1885 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1886 *pSep = 0; /* terminate */
1887 t->language = T_CString_toLowerCase(pSubtag);
1888
1889 pLastGoodPosition = pSep;
1890 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1891 continue;
1892 }
1893 }
1894 if (next & EXTL) {
1895 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1896 *pSep = 0;
1897 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1898
1899 pLastGoodPosition = pSep;
1900 if (extlangIdx < 3) {
1901 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1902 } else {
1903 next = SCRT | REGN | VART | EXTS | PRIV;
1904 }
1905 continue;
1906 }
1907 }
1908 if (next & SCRT) {
1909 if (_isScriptSubtag(pSubtag, subtagLen)) {
1910 char *p = pSubtag;
1911
1912 *pSep = 0;
1913
1914 /* to title case */
1915 *p = uprv_toupper(*p);
1916 p++;
1917 for (; *p; p++) {
1918 *p = uprv_tolower(*p);
1919 }
1920
1921 t->script = pSubtag;
1922
1923 pLastGoodPosition = pSep;
1924 next = REGN | VART | EXTS | PRIV;
1925 continue;
1926 }
1927 }
1928 if (next & REGN) {
1929 if (_isRegionSubtag(pSubtag, subtagLen)) {
1930 *pSep = 0;
1931 t->region = T_CString_toUpperCase(pSubtag);
1932
1933 pLastGoodPosition = pSep;
1934 next = VART | EXTS | PRIV;
1935 continue;
1936 }
1937 }
1938 if (next & VART) {
1939 if (_isVariantSubtag(pSubtag, subtagLen) ||
1940 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1941 VariantListEntry *var;
1942 UBool isAdded;
1943
1944 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1945 if (var == NULL) {
1946 *status = U_MEMORY_ALLOCATION_ERROR;
1947 goto error;
1948 }
1949 *pSep = 0;
1950 var->variant = T_CString_toUpperCase(pSubtag);
1951 isAdded = _addVariantToList(&(t->variants), var);
1952 if (!isAdded) {
1953 /* duplicated variant entry */
1954 uprv_free(var);
1955 break;
1956 }
1957 pLastGoodPosition = pSep;
1958 next = VART | EXTS | PRIV;
1959 continue;
1960 }
1961 }
1962 if (next & EXTS) {
1963 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1964 if (pExtension != NULL) {
1965 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1966 /* the previous extension is incomplete */
1967 uprv_free(pExtension);
1968 pExtension = NULL;
1969 break;
1970 }
1971
1972 /* terminate the previous extension value */
1973 *pExtValueSubtagEnd = 0;
1974 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1975
1976 /* insert the extension to the list */
1977 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1978 pLastGoodPosition = pExtValueSubtagEnd;
1979 } else {
1980 /* stop parsing here */
1981 uprv_free(pExtension);
1982 pExtension = NULL;
1983 break;
1984 }
1985 }
1986
1987 /* create a new extension */
1988 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1989 if (pExtension == NULL) {
1990 *status = U_MEMORY_ALLOCATION_ERROR;
1991 goto error;
1992 }
1993 *pSep = 0;
1994 pExtension->key = T_CString_toLowerCase(pSubtag);
1995 pExtension->value = NULL; /* will be set later */
1996
1997 /*
1998 * reset the start and the end location of extension value
1999 * subtags for this extension
2000 */
2001 pExtValueSubtag = NULL;
2002 pExtValueSubtagEnd = NULL;
2003
2004 next = EXTV;
2005 continue;
2006 }
2007 }
2008 if (next & EXTV) {
2009 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2010 if (pExtValueSubtag == NULL) {
2011 /* if the start postion of this extension's value is not yet,
2012 this one is the first value subtag */
2013 pExtValueSubtag = pSubtag;
2014 }
2015
2016 /* Mark the end of this subtag */
2017 pExtValueSubtagEnd = pSep;
2018 next = EXTS | EXTV | PRIV;
2019
2020 continue;
2021 }
2022 }
2023 if (next & PRIV) {
2024 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2025 char *pPrivuseVal;
2026
2027 if (pExtension != NULL) {
2028 /* Process the last extension */
2029 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2030 /* the previous extension is incomplete */
2031 uprv_free(pExtension);
2032 pExtension = NULL;
2033 break;
2034 } else {
2035 /* terminate the previous extension value */
2036 *pExtValueSubtagEnd = 0;
2037 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2038
2039 /* insert the extension to the list */
2040 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2041 pLastGoodPosition = pExtValueSubtagEnd;
2042 pExtension = NULL;
2043 } else {
2044 /* stop parsing here */
2045 uprv_free(pExtension);
2046 pExtension = NULL;
2047 break;
2048 }
2049 }
2050 }
2051
2052 /* The rest of part will be private use value subtags */
2053 if (pNext == NULL) {
2054 /* empty private use subtag */
2055 break;
2056 }
2057 /* back up the private use value start position */
2058 pPrivuseVal = pNext;
2059
2060 /* validate private use value subtags */
2061 while (pNext) {
2062 pSubtag = pNext;
2063 pSep = pSubtag;
2064 while (*pSep) {
2065 if (*pSep == SEP) {
2066 break;
2067 }
2068 pSep++;
2069 }
2070 if (*pSep == 0) {
2071 /* last subtag */
2072 pNext = NULL;
2073 } else {
2074 pNext = pSep + 1;
2075 }
2076 subtagLen = (int32_t)(pSep - pSubtag);
2077
2078 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2079 *pSep = 0;
2080 next = VART;
2081 privateuseVar = TRUE;
2082 break;
2083 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2084 pLastGoodPosition = pSep;
2085 } else {
2086 break;
2087 }
2088 }
2089
2090 if (next == VART) {
2091 continue;
2092 }
2093
2094 if (pLastGoodPosition - pPrivuseVal > 0) {
2095 *pLastGoodPosition = 0;
2096 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2097 }
2098 /* No more subtags, exiting the parse loop */
2099 break;
2100 }
2101 break;
2102 }
2103
2104 /* If we fell through here, it means this subtag is illegal - quit parsing */
2105 break;
2106 }
2107
2108 if (pExtension != NULL) {
2109 /* Process the last extension */
2110 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2111 /* the previous extension is incomplete */
2112 uprv_free(pExtension);
2113 } else {
2114 /* terminate the previous extension value */
2115 *pExtValueSubtagEnd = 0;
2116 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2117 /* insert the extension to the list */
2118 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2119 pLastGoodPosition = pExtValueSubtagEnd;
2120 } else {
2121 uprv_free(pExtension);
2122 }
2123 }
2124 }
2125
2126 if (parsedLen != NULL) {
2127 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2128 }
2129
2130 return t;
2131
2132 error:
2133 ultag_close(t);
2134 return NULL;
2135 }
2136
2137 static void
ultag_close(ULanguageTag * langtag)2138 ultag_close(ULanguageTag* langtag) {
2139
2140 if (langtag == NULL) {
2141 return;
2142 }
2143
2144 uprv_free(langtag->buf);
2145
2146 if (langtag->variants) {
2147 VariantListEntry *curVar = langtag->variants;
2148 while (curVar) {
2149 VariantListEntry *nextVar = curVar->next;
2150 uprv_free(curVar);
2151 curVar = nextVar;
2152 }
2153 }
2154
2155 if (langtag->extensions) {
2156 ExtensionListEntry *curExt = langtag->extensions;
2157 while (curExt) {
2158 ExtensionListEntry *nextExt = curExt->next;
2159 uprv_free(curExt);
2160 curExt = nextExt;
2161 }
2162 }
2163
2164 uprv_free(langtag);
2165 }
2166
2167 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2168 ultag_getLanguage(const ULanguageTag* langtag) {
2169 return langtag->language;
2170 }
2171
2172 #if 0
2173 static const char*
2174 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2175 int32_t i;
2176 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2177 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2178 return DEPRECATEDLANGS[i + 1];
2179 }
2180 }
2181 return langtag->language;
2182 }
2183 #endif
2184
2185 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2186 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2187 if (idx >= 0 && idx < MAXEXTLANG) {
2188 return langtag->extlang[idx];
2189 }
2190 return NULL;
2191 }
2192
2193 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2194 ultag_getExtlangSize(const ULanguageTag* langtag) {
2195 int32_t size = 0;
2196 int32_t i;
2197 for (i = 0; i < MAXEXTLANG; i++) {
2198 if (langtag->extlang[i]) {
2199 size++;
2200 }
2201 }
2202 return size;
2203 }
2204
2205 static const char*
ultag_getScript(const ULanguageTag * langtag)2206 ultag_getScript(const ULanguageTag* langtag) {
2207 return langtag->script;
2208 }
2209
2210 static const char*
ultag_getRegion(const ULanguageTag * langtag)2211 ultag_getRegion(const ULanguageTag* langtag) {
2212 return langtag->region;
2213 }
2214
2215 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2216 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2217 const char *var = NULL;
2218 VariantListEntry *cur = langtag->variants;
2219 int32_t i = 0;
2220 while (cur) {
2221 if (i == idx) {
2222 var = cur->variant;
2223 break;
2224 }
2225 cur = cur->next;
2226 i++;
2227 }
2228 return var;
2229 }
2230
2231 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2232 ultag_getVariantsSize(const ULanguageTag* langtag) {
2233 int32_t size = 0;
2234 VariantListEntry *cur = langtag->variants;
2235 while (TRUE) {
2236 if (cur == NULL) {
2237 break;
2238 }
2239 size++;
2240 cur = cur->next;
2241 }
2242 return size;
2243 }
2244
2245 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2246 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2247 const char *key = NULL;
2248 ExtensionListEntry *cur = langtag->extensions;
2249 int32_t i = 0;
2250 while (cur) {
2251 if (i == idx) {
2252 key = cur->key;
2253 break;
2254 }
2255 cur = cur->next;
2256 i++;
2257 }
2258 return key;
2259 }
2260
2261 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2262 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2263 const char *val = NULL;
2264 ExtensionListEntry *cur = langtag->extensions;
2265 int32_t i = 0;
2266 while (cur) {
2267 if (i == idx) {
2268 val = cur->value;
2269 break;
2270 }
2271 cur = cur->next;
2272 i++;
2273 }
2274 return val;
2275 }
2276
2277 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2278 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2279 int32_t size = 0;
2280 ExtensionListEntry *cur = langtag->extensions;
2281 while (TRUE) {
2282 if (cur == NULL) {
2283 break;
2284 }
2285 size++;
2286 cur = cur->next;
2287 }
2288 return size;
2289 }
2290
2291 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2292 ultag_getPrivateUse(const ULanguageTag* langtag) {
2293 return langtag->privateuse;
2294 }
2295
2296 #if 0
2297 static const char*
2298 ultag_getGrandfathered(const ULanguageTag* langtag) {
2299 return langtag->grandfathered;
2300 }
2301 #endif
2302
2303
2304 /*
2305 * -------------------------------------------------
2306 *
2307 * Locale/BCP47 conversion APIs, exposed as uloc_*
2308 *
2309 * -------------------------------------------------
2310 */
2311 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2312 uloc_toLanguageTag(const char* localeID,
2313 char* langtag,
2314 int32_t langtagCapacity,
2315 UBool strict,
2316 UErrorCode* status) {
2317 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2318 char canonical[256];
2319 int32_t reslen = 0;
2320 UErrorCode tmpStatus = U_ZERO_ERROR;
2321 UBool hadPosix = FALSE;
2322 const char* pKeywordStart;
2323
2324 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2325 canonical[0] = 0;
2326 if (uprv_strlen(localeID) > 0) {
2327 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2328 if (tmpStatus != U_ZERO_ERROR) {
2329 *status = U_ILLEGAL_ARGUMENT_ERROR;
2330 return 0;
2331 }
2332 }
2333
2334 /* For handling special case - private use only tag */
2335 pKeywordStart = locale_getKeywordsStart(canonical);
2336 if (pKeywordStart == canonical) {
2337 UEnumeration *kwdEnum;
2338 int kwdCnt = 0;
2339 UBool done = FALSE;
2340
2341 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2342 if (kwdEnum != NULL) {
2343 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2344 if (kwdCnt == 1) {
2345 const char *key;
2346 int32_t len = 0;
2347
2348 key = uenum_next(kwdEnum, &len, &tmpStatus);
2349 if (len == 1 && *key == PRIVATEUSE) {
2350 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2351 buf[0] = PRIVATEUSE;
2352 buf[1] = SEP;
2353 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2354 if (U_SUCCESS(tmpStatus)) {
2355 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2356 /* return private use only tag */
2357 reslen = len + 2;
2358 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2359 u_terminateChars(langtag, langtagCapacity, reslen, status);
2360 done = TRUE;
2361 } else if (strict) {
2362 *status = U_ILLEGAL_ARGUMENT_ERROR;
2363 done = TRUE;
2364 }
2365 /* if not strict mode, then "und" will be returned */
2366 } else {
2367 *status = U_ILLEGAL_ARGUMENT_ERROR;
2368 done = TRUE;
2369 }
2370 }
2371 }
2372 uenum_close(kwdEnum);
2373 if (done) {
2374 return reslen;
2375 }
2376 }
2377 }
2378
2379 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2380 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2381 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2382 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2383 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2384 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2385
2386 return reslen;
2387 }
2388
2389
2390 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2391 uloc_forLanguageTag(const char* langtag,
2392 char* localeID,
2393 int32_t localeIDCapacity,
2394 int32_t* parsedLength,
2395 UErrorCode* status) {
2396 ULanguageTag *lt;
2397 int32_t reslen = 0;
2398 const char *subtag, *p;
2399 int32_t len;
2400 int32_t i, n;
2401 UBool noRegion = TRUE;
2402
2403 lt = ultag_parse(langtag, -1, parsedLength, status);
2404 if (U_FAILURE(*status)) {
2405 return 0;
2406 }
2407
2408 /* language */
2409 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2410 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2411 len = (int32_t)uprv_strlen(subtag);
2412 if (len > 0) {
2413 if (reslen < localeIDCapacity) {
2414 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2415 }
2416 reslen += len;
2417 }
2418 }
2419
2420 /* script */
2421 subtag = ultag_getScript(lt);
2422 len = (int32_t)uprv_strlen(subtag);
2423 if (len > 0) {
2424 if (reslen < localeIDCapacity) {
2425 *(localeID + reslen) = LOCALE_SEP;
2426 }
2427 reslen++;
2428
2429 /* write out the script in title case */
2430 p = subtag;
2431 while (*p) {
2432 if (reslen < localeIDCapacity) {
2433 if (p == subtag) {
2434 *(localeID + reslen) = uprv_toupper(*p);
2435 } else {
2436 *(localeID + reslen) = *p;
2437 }
2438 }
2439 reslen++;
2440 p++;
2441 }
2442 }
2443
2444 /* region */
2445 subtag = ultag_getRegion(lt);
2446 len = (int32_t)uprv_strlen(subtag);
2447 if (len > 0) {
2448 if (reslen < localeIDCapacity) {
2449 *(localeID + reslen) = LOCALE_SEP;
2450 }
2451 reslen++;
2452 /* write out the retion in upper case */
2453 p = subtag;
2454 while (*p) {
2455 if (reslen < localeIDCapacity) {
2456 *(localeID + reslen) = uprv_toupper(*p);
2457 }
2458 reslen++;
2459 p++;
2460 }
2461 noRegion = FALSE;
2462 }
2463
2464 /* variants */
2465 n = ultag_getVariantsSize(lt);
2466 if (n > 0) {
2467 if (noRegion) {
2468 if (reslen < localeIDCapacity) {
2469 *(localeID + reslen) = LOCALE_SEP;
2470 }
2471 reslen++;
2472 }
2473
2474 for (i = 0; i < n; i++) {
2475 subtag = ultag_getVariant(lt, i);
2476 if (reslen < localeIDCapacity) {
2477 *(localeID + reslen) = LOCALE_SEP;
2478 }
2479 reslen++;
2480 /* write out the variant in upper case */
2481 p = subtag;
2482 while (*p) {
2483 if (reslen < localeIDCapacity) {
2484 *(localeID + reslen) = uprv_toupper(*p);
2485 }
2486 reslen++;
2487 p++;
2488 }
2489 }
2490 }
2491
2492 /* keywords */
2493 n = ultag_getExtensionsSize(lt);
2494 subtag = ultag_getPrivateUse(lt);
2495 if (n > 0 || uprv_strlen(subtag) > 0) {
2496 if (reslen == 0 && n > 0) {
2497 /* need a language */
2498 if (reslen < localeIDCapacity) {
2499 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2500 }
2501 reslen += LANG_UND_LEN;
2502 }
2503 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2504 reslen += len;
2505 }
2506
2507 ultag_close(lt);
2508 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2509 }
2510
2511
2512