1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17 #include "ulocimp.h"
18 #include "uassert.h"
19
20 /* struct holding a single variant */
21 typedef struct VariantListEntry {
22 const char *variant;
23 struct VariantListEntry *next;
24 } VariantListEntry;
25
26 /* struct holding a single attribute value */
27 typedef struct AttributeListEntry {
28 const char *attribute;
29 struct AttributeListEntry *next;
30 } AttributeListEntry;
31
32 /* struct holding a single extension */
33 typedef struct ExtensionListEntry {
34 const char *key;
35 const char *value;
36 struct ExtensionListEntry *next;
37 } ExtensionListEntry;
38
39 #define MAXEXTLANG 3
40 typedef struct ULanguageTag {
41 char *buf; /* holding parsed subtags */
42 const char *language;
43 const char *extlang[MAXEXTLANG];
44 const char *script;
45 const char *region;
46 VariantListEntry *variants;
47 ExtensionListEntry *extensions;
48 const char *privateuse;
49 const char *grandfathered;
50 } ULanguageTag;
51
52 #define MINLEN 2
53 #define SEP '-'
54 #define PRIVATEUSE 'x'
55 #define LDMLEXT 'u'
56
57 #define LOCALE_SEP '_'
58 #define LOCALE_EXT_SEP '@'
59 #define LOCALE_KEYWORD_SEP ';'
60 #define LOCALE_KEY_TYPE_SEP '='
61
62 #define ISALPHA(c) uprv_isASCIILetter(c)
63 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
64
65 static const char EMPTY[] = "";
66 static const char LANG_UND[] = "und";
67 static const char PRIVATEUSE_KEY[] = "x";
68 static const char _POSIX[] = "_POSIX";
69 static const char POSIX_KEY[] = "va";
70 static const char POSIX_VALUE[] = "posix";
71 static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
72 static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
73 static const char LOCALE_TYPE_YES[] = "yes";
74
75 #define LANG_UND_LEN 3
76
77 static const char* const GRANDFATHERED[] = {
78 /* grandfathered preferred */
79 "art-lojban", "jbo",
80 "cel-gaulish", "xtg-x-cel-gaulish",
81 "en-GB-oed", "en-GB-x-oed",
82 "i-ami", "ami",
83 "i-bnn", "bnn",
84 "i-default", "en-x-i-default",
85 "i-enochian", "und-x-i-enochian",
86 "i-hak", "hak",
87 "i-klingon", "tlh",
88 "i-lux", "lb",
89 "i-mingo", "see-x-i-mingo",
90 "i-navajo", "nv",
91 "i-pwn", "pwn",
92 "i-tao", "tao",
93 "i-tay", "tay",
94 "i-tsu", "tsu",
95 "no-bok", "nb",
96 "no-nyn", "nn",
97 "sgn-be-fr", "sfb",
98 "sgn-be-nl", "vgt",
99 "sgn-ch-de", "sgg",
100 "zh-guoyu", "cmn",
101 "zh-hakka", "hak",
102 "zh-min", "nan-x-zh-min",
103 "zh-min-nan", "nan",
104 "zh-xiang", "hsn",
105 NULL, NULL
106 };
107
108 static const char DEPRECATEDLANGS[][4] = {
109 /* deprecated new */
110 "iw", "he",
111 "ji", "yi",
112 "in", "id"
113 };
114
115 /*
116 * -------------------------------------------------
117 *
118 * These ultag_ functions may be exposed as APIs later
119 *
120 * -------------------------------------------------
121 */
122
123 static ULanguageTag*
124 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
125
126 static void
127 ultag_close(ULanguageTag* langtag);
128
129 static const char*
130 ultag_getLanguage(const ULanguageTag* langtag);
131
132 #if 0
133 static const char*
134 ultag_getJDKLanguage(const ULanguageTag* langtag);
135 #endif
136
137 static const char*
138 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
139
140 static int32_t
141 ultag_getExtlangSize(const ULanguageTag* langtag);
142
143 static const char*
144 ultag_getScript(const ULanguageTag* langtag);
145
146 static const char*
147 ultag_getRegion(const ULanguageTag* langtag);
148
149 static const char*
150 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
151
152 static int32_t
153 ultag_getVariantsSize(const ULanguageTag* langtag);
154
155 static const char*
156 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
157
158 static const char*
159 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
160
161 static int32_t
162 ultag_getExtensionsSize(const ULanguageTag* langtag);
163
164 static const char*
165 ultag_getPrivateUse(const ULanguageTag* langtag);
166
167 #if 0
168 static const char*
169 ultag_getGrandfathered(const ULanguageTag* langtag);
170 #endif
171
172 /*
173 * -------------------------------------------------
174 *
175 * Language subtag syntax validation functions
176 *
177 * -------------------------------------------------
178 */
179
180 static UBool
_isAlphaString(const char * s,int32_t len)181 _isAlphaString(const char* s, int32_t len) {
182 int32_t i;
183 for (i = 0; i < len; i++) {
184 if (!ISALPHA(*(s + i))) {
185 return FALSE;
186 }
187 }
188 return TRUE;
189 }
190
191 static UBool
_isNumericString(const char * s,int32_t len)192 _isNumericString(const char* s, int32_t len) {
193 int32_t i;
194 for (i = 0; i < len; i++) {
195 if (!ISNUMERIC(*(s + i))) {
196 return FALSE;
197 }
198 }
199 return TRUE;
200 }
201
202 static UBool
_isAlphaNumericString(const char * s,int32_t len)203 _isAlphaNumericString(const char* s, int32_t len) {
204 int32_t i;
205 for (i = 0; i < len; i++) {
206 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
207 return FALSE;
208 }
209 }
210 return TRUE;
211 }
212
213 static UBool
_isLanguageSubtag(const char * s,int32_t len)214 _isLanguageSubtag(const char* s, int32_t len) {
215 /*
216 * language = 2*3ALPHA ; shortest ISO 639 code
217 * ["-" extlang] ; sometimes followed by
218 * ; extended language subtags
219 * / 4ALPHA ; or reserved for future use
220 * / 5*8ALPHA ; or registered language subtag
221 */
222 if (len < 0) {
223 len = (int32_t)uprv_strlen(s);
224 }
225 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
226 return TRUE;
227 }
228 return FALSE;
229 }
230
231 static UBool
_isExtlangSubtag(const char * s,int32_t len)232 _isExtlangSubtag(const char* s, int32_t len) {
233 /*
234 * extlang = 3ALPHA ; selected ISO 639 codes
235 * *2("-" 3ALPHA) ; permanently reserved
236 */
237 if (len < 0) {
238 len = (int32_t)uprv_strlen(s);
239 }
240 if (len == 3 && _isAlphaString(s, len)) {
241 return TRUE;
242 }
243 return FALSE;
244 }
245
246 static UBool
_isScriptSubtag(const char * s,int32_t len)247 _isScriptSubtag(const char* s, int32_t len) {
248 /*
249 * script = 4ALPHA ; ISO 15924 code
250 */
251 if (len < 0) {
252 len = (int32_t)uprv_strlen(s);
253 }
254 if (len == 4 && _isAlphaString(s, len)) {
255 return TRUE;
256 }
257 return FALSE;
258 }
259
260 static UBool
_isRegionSubtag(const char * s,int32_t len)261 _isRegionSubtag(const char* s, int32_t len) {
262 /*
263 * region = 2ALPHA ; ISO 3166-1 code
264 * / 3DIGIT ; UN M.49 code
265 */
266 if (len < 0) {
267 len = (int32_t)uprv_strlen(s);
268 }
269 if (len == 2 && _isAlphaString(s, len)) {
270 return TRUE;
271 }
272 if (len == 3 && _isNumericString(s, len)) {
273 return TRUE;
274 }
275 return FALSE;
276 }
277
278 static UBool
_isVariantSubtag(const char * s,int32_t len)279 _isVariantSubtag(const char* s, int32_t len) {
280 /*
281 * variant = 5*8alphanum ; registered variants
282 * / (DIGIT 3alphanum)
283 */
284 if (len < 0) {
285 len = (int32_t)uprv_strlen(s);
286 }
287 if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
288 return TRUE;
289 }
290 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
291 return TRUE;
292 }
293 return FALSE;
294 }
295
296 static UBool
_isPrivateuseVariantSubtag(const char * s,int32_t len)297 _isPrivateuseVariantSubtag(const char* s, int32_t len) {
298 /*
299 * variant = 1*8alphanum ; registered variants
300 * / (DIGIT 3alphanum)
301 */
302 if (len < 0) {
303 len = (int32_t)uprv_strlen(s);
304 }
305 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
306 return TRUE;
307 }
308 return FALSE;
309 }
310
311 static UBool
_isExtensionSingleton(const char * s,int32_t len)312 _isExtensionSingleton(const char* s, int32_t len) {
313 /*
314 * extension = singleton 1*("-" (2*8alphanum))
315 */
316 if (len < 0) {
317 len = (int32_t)uprv_strlen(s);
318 }
319 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
320 return TRUE;
321 }
322 return FALSE;
323 }
324
325 static UBool
_isExtensionSubtag(const char * s,int32_t len)326 _isExtensionSubtag(const char* s, int32_t len) {
327 /*
328 * extension = singleton 1*("-" (2*8alphanum))
329 */
330 if (len < 0) {
331 len = (int32_t)uprv_strlen(s);
332 }
333 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
334 return TRUE;
335 }
336 return FALSE;
337 }
338
339 static UBool
_isExtensionSubtags(const char * s,int32_t len)340 _isExtensionSubtags(const char* s, int32_t len) {
341 const char *p = s;
342 const char *pSubtag = NULL;
343
344 if (len < 0) {
345 len = (int32_t)uprv_strlen(s);
346 }
347
348 while ((p - s) < len) {
349 if (*p == SEP) {
350 if (pSubtag == NULL) {
351 return FALSE;
352 }
353 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
354 return FALSE;
355 }
356 pSubtag = NULL;
357 } else if (pSubtag == NULL) {
358 pSubtag = p;
359 }
360 p++;
361 }
362 if (pSubtag == NULL) {
363 return FALSE;
364 }
365 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
366 }
367
368 static UBool
_isPrivateuseValueSubtag(const char * s,int32_t len)369 _isPrivateuseValueSubtag(const char* s, int32_t len) {
370 /*
371 * privateuse = "x" 1*("-" (1*8alphanum))
372 */
373 if (len < 0) {
374 len = (int32_t)uprv_strlen(s);
375 }
376 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
377 return TRUE;
378 }
379 return FALSE;
380 }
381
382 static UBool
_isPrivateuseValueSubtags(const char * s,int32_t len)383 _isPrivateuseValueSubtags(const char* s, int32_t len) {
384 const char *p = s;
385 const char *pSubtag = NULL;
386
387 if (len < 0) {
388 len = (int32_t)uprv_strlen(s);
389 }
390
391 while ((p - s) < len) {
392 if (*p == SEP) {
393 if (pSubtag == NULL) {
394 return FALSE;
395 }
396 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
397 return FALSE;
398 }
399 pSubtag = NULL;
400 } else if (pSubtag == NULL) {
401 pSubtag = p;
402 }
403 p++;
404 }
405 if (pSubtag == NULL) {
406 return FALSE;
407 }
408 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
409 }
410
411 U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char * s,int32_t len)412 ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
413 if (len < 0) {
414 len = (int32_t)uprv_strlen(s);
415 }
416 if (len == 2 && _isAlphaNumericString(s, len)) {
417 return TRUE;
418 }
419 return FALSE;
420 }
421
422 U_CFUNC UBool
ultag_isUnicodeLocaleType(const char * s,int32_t len)423 ultag_isUnicodeLocaleType(const char*s, int32_t len) {
424 const char* p;
425 int32_t subtagLen = 0;
426
427 if (len < 0) {
428 len = (int32_t)uprv_strlen(s);
429 }
430
431 for (p = s; len > 0; p++, len--) {
432 if (*p == SEP) {
433 if (subtagLen < 3) {
434 return FALSE;
435 }
436 subtagLen = 0;
437 } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
438 subtagLen++;
439 if (subtagLen > 8) {
440 return FALSE;
441 }
442 } else {
443 return FALSE;
444 }
445 }
446
447 return (subtagLen >= 3);
448 }
449 /*
450 * -------------------------------------------------
451 *
452 * Helper functions
453 *
454 * -------------------------------------------------
455 */
456
457 static UBool
_addVariantToList(VariantListEntry ** first,VariantListEntry * var)458 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
459 UBool bAdded = TRUE;
460
461 if (*first == NULL) {
462 var->next = NULL;
463 *first = var;
464 } else {
465 VariantListEntry *prev, *cur;
466 int32_t cmp;
467
468 /* variants order should be preserved */
469 prev = NULL;
470 cur = *first;
471 while (TRUE) {
472 if (cur == NULL) {
473 prev->next = var;
474 var->next = NULL;
475 break;
476 }
477
478 /* Checking for duplicate variant */
479 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
480 if (cmp == 0) {
481 /* duplicated variant */
482 bAdded = FALSE;
483 break;
484 }
485 prev = cur;
486 cur = cur->next;
487 }
488 }
489
490 return bAdded;
491 }
492
493 static UBool
_addAttributeToList(AttributeListEntry ** first,AttributeListEntry * attr)494 _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
495 UBool bAdded = TRUE;
496
497 if (*first == NULL) {
498 attr->next = NULL;
499 *first = attr;
500 } else {
501 AttributeListEntry *prev, *cur;
502 int32_t cmp;
503
504 /* reorder variants in alphabetical order */
505 prev = NULL;
506 cur = *first;
507 while (TRUE) {
508 if (cur == NULL) {
509 prev->next = attr;
510 attr->next = NULL;
511 break;
512 }
513 cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
514 if (cmp < 0) {
515 if (prev == NULL) {
516 *first = attr;
517 } else {
518 prev->next = attr;
519 }
520 attr->next = cur;
521 break;
522 }
523 if (cmp == 0) {
524 /* duplicated variant */
525 bAdded = FALSE;
526 break;
527 }
528 prev = cur;
529 cur = cur->next;
530 }
531 }
532
533 return bAdded;
534 }
535
536
537 static UBool
_addExtensionToList(ExtensionListEntry ** first,ExtensionListEntry * ext,UBool localeToBCP)538 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
539 UBool bAdded = TRUE;
540
541 if (*first == NULL) {
542 ext->next = NULL;
543 *first = ext;
544 } else {
545 ExtensionListEntry *prev, *cur;
546 int32_t cmp;
547
548 /* reorder variants in alphabetical order */
549 prev = NULL;
550 cur = *first;
551 while (TRUE) {
552 if (cur == NULL) {
553 prev->next = ext;
554 ext->next = NULL;
555 break;
556 }
557 if (localeToBCP) {
558 /* special handling for locale to bcp conversion */
559 int32_t len, curlen;
560
561 len = (int32_t)uprv_strlen(ext->key);
562 curlen = (int32_t)uprv_strlen(cur->key);
563
564 if (len == 1 && curlen == 1) {
565 if (*(ext->key) == *(cur->key)) {
566 cmp = 0;
567 } else if (*(ext->key) == PRIVATEUSE) {
568 cmp = 1;
569 } else if (*(cur->key) == PRIVATEUSE) {
570 cmp = -1;
571 } else {
572 cmp = *(ext->key) - *(cur->key);
573 }
574 } else if (len == 1) {
575 cmp = *(ext->key) - LDMLEXT;
576 } else if (curlen == 1) {
577 cmp = LDMLEXT - *(cur->key);
578 } else {
579 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
580 }
581 } else {
582 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
583 }
584 if (cmp < 0) {
585 if (prev == NULL) {
586 *first = ext;
587 } else {
588 prev->next = ext;
589 }
590 ext->next = cur;
591 break;
592 }
593 if (cmp == 0) {
594 /* duplicated extension key */
595 bAdded = FALSE;
596 break;
597 }
598 prev = cur;
599 cur = cur->next;
600 }
601 }
602
603 return bAdded;
604 }
605
606 static void
_initializeULanguageTag(ULanguageTag * langtag)607 _initializeULanguageTag(ULanguageTag* langtag) {
608 int32_t i;
609
610 langtag->buf = NULL;
611
612 langtag->language = EMPTY;
613 for (i = 0; i < MAXEXTLANG; i++) {
614 langtag->extlang[i] = NULL;
615 }
616
617 langtag->script = EMPTY;
618 langtag->region = EMPTY;
619
620 langtag->variants = NULL;
621 langtag->extensions = NULL;
622
623 langtag->grandfathered = EMPTY;
624 langtag->privateuse = EMPTY;
625 }
626
627 static int32_t
_appendLanguageToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)628 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
629 char buf[ULOC_LANG_CAPACITY];
630 UErrorCode tmpStatus = U_ZERO_ERROR;
631 int32_t len, i;
632 int32_t reslen = 0;
633
634 if (U_FAILURE(*status)) {
635 return 0;
636 }
637
638 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
639 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
640 if (strict) {
641 *status = U_ILLEGAL_ARGUMENT_ERROR;
642 return 0;
643 }
644 len = 0;
645 }
646
647 /* Note: returned language code is in lower case letters */
648
649 if (len == 0) {
650 if (reslen < capacity) {
651 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
652 }
653 reslen += LANG_UND_LEN;
654 } else if (!_isLanguageSubtag(buf, len)) {
655 /* invalid language code */
656 if (strict) {
657 *status = U_ILLEGAL_ARGUMENT_ERROR;
658 return 0;
659 }
660 if (reslen < capacity) {
661 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
662 }
663 reslen += LANG_UND_LEN;
664 } else {
665 /* resolve deprecated */
666 for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
667 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
668 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
669 len = (int32_t)uprv_strlen(buf);
670 break;
671 }
672 }
673 if (reslen < capacity) {
674 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
675 }
676 reslen += len;
677 }
678 u_terminateChars(appendAt, capacity, reslen, status);
679 return reslen;
680 }
681
682 static int32_t
_appendScriptToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)683 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
684 char buf[ULOC_SCRIPT_CAPACITY];
685 UErrorCode tmpStatus = U_ZERO_ERROR;
686 int32_t len;
687 int32_t reslen = 0;
688
689 if (U_FAILURE(*status)) {
690 return 0;
691 }
692
693 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
694 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
695 if (strict) {
696 *status = U_ILLEGAL_ARGUMENT_ERROR;
697 }
698 return 0;
699 }
700
701 if (len > 0) {
702 if (!_isScriptSubtag(buf, len)) {
703 /* invalid script code */
704 if (strict) {
705 *status = U_ILLEGAL_ARGUMENT_ERROR;
706 }
707 return 0;
708 } else {
709 if (reslen < capacity) {
710 *(appendAt + reslen) = SEP;
711 }
712 reslen++;
713
714 if (reslen < capacity) {
715 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
716 }
717 reslen += len;
718 }
719 }
720 u_terminateChars(appendAt, capacity, reslen, status);
721 return reslen;
722 }
723
724 static int32_t
_appendRegionToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UErrorCode * status)725 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
726 char buf[ULOC_COUNTRY_CAPACITY];
727 UErrorCode tmpStatus = U_ZERO_ERROR;
728 int32_t len;
729 int32_t reslen = 0;
730
731 if (U_FAILURE(*status)) {
732 return 0;
733 }
734
735 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
736 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
737 if (strict) {
738 *status = U_ILLEGAL_ARGUMENT_ERROR;
739 }
740 return 0;
741 }
742
743 if (len > 0) {
744 if (!_isRegionSubtag(buf, len)) {
745 /* invalid region code */
746 if (strict) {
747 *status = U_ILLEGAL_ARGUMENT_ERROR;
748 }
749 return 0;
750 } else {
751 if (reslen < capacity) {
752 *(appendAt + reslen) = SEP;
753 }
754 reslen++;
755
756 if (reslen < capacity) {
757 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
758 }
759 reslen += len;
760 }
761 }
762 u_terminateChars(appendAt, capacity, reslen, status);
763 return reslen;
764 }
765
766 static int32_t
_appendVariantsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool * hadPosix,UErrorCode * status)767 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
768 char buf[ULOC_FULLNAME_CAPACITY];
769 UErrorCode tmpStatus = U_ZERO_ERROR;
770 int32_t len, i;
771 int32_t reslen = 0;
772
773 if (U_FAILURE(*status)) {
774 return 0;
775 }
776
777 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
778 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
779 if (strict) {
780 *status = U_ILLEGAL_ARGUMENT_ERROR;
781 }
782 return 0;
783 }
784
785 if (len > 0) {
786 char *p, *pVar;
787 UBool bNext = TRUE;
788 VariantListEntry *var;
789 VariantListEntry *varFirst = NULL;
790
791 pVar = NULL;
792 p = buf;
793 while (bNext) {
794 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
795 if (*p == 0) {
796 bNext = FALSE;
797 } else {
798 *p = 0; /* terminate */
799 }
800 if (pVar == NULL) {
801 if (strict) {
802 *status = U_ILLEGAL_ARGUMENT_ERROR;
803 break;
804 }
805 /* ignore empty variant */
806 } else {
807 /* ICU uses upper case letters for variants, but
808 the canonical format is lowercase in BCP47 */
809 for (i = 0; *(pVar + i) != 0; i++) {
810 *(pVar + i) = uprv_tolower(*(pVar + i));
811 }
812
813 /* validate */
814 if (_isVariantSubtag(pVar, -1)) {
815 if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
816 /* emit the variant to the list */
817 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
818 if (var == NULL) {
819 *status = U_MEMORY_ALLOCATION_ERROR;
820 break;
821 }
822 var->variant = pVar;
823 if (!_addVariantToList(&varFirst, var)) {
824 /* duplicated variant */
825 uprv_free(var);
826 if (strict) {
827 *status = U_ILLEGAL_ARGUMENT_ERROR;
828 break;
829 }
830 }
831 } else {
832 /* Special handling for POSIX variant, need to remember that we had it and then */
833 /* treat it like an extension later. */
834 *hadPosix = TRUE;
835 }
836 } else if (strict) {
837 *status = U_ILLEGAL_ARGUMENT_ERROR;
838 break;
839 } else if (_isPrivateuseValueSubtag(pVar, -1)) {
840 /* Handle private use subtags separately */
841 break;
842 }
843 }
844 /* reset variant starting position */
845 pVar = NULL;
846 } else if (pVar == NULL) {
847 pVar = p;
848 }
849 p++;
850 }
851
852 if (U_SUCCESS(*status)) {
853 if (varFirst != NULL) {
854 int32_t varLen;
855
856 /* write out validated/normalized variants to the target */
857 var = varFirst;
858 while (var != NULL) {
859 if (reslen < capacity) {
860 *(appendAt + reslen) = SEP;
861 }
862 reslen++;
863 varLen = (int32_t)uprv_strlen(var->variant);
864 if (reslen < capacity) {
865 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
866 }
867 reslen += varLen;
868 var = var->next;
869 }
870 }
871 }
872
873 /* clean up */
874 var = varFirst;
875 while (var != NULL) {
876 VariantListEntry *tmpVar = var->next;
877 uprv_free(var);
878 var = tmpVar;
879 }
880
881 if (U_FAILURE(*status)) {
882 return 0;
883 }
884 }
885
886 u_terminateChars(appendAt, capacity, reslen, status);
887 return reslen;
888 }
889
890 static int32_t
_appendKeywordsToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)891 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
892 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
893 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
894 int32_t attrBufLength = 0;
895 UBool isAttribute = FALSE;
896 UEnumeration *keywordEnum = NULL;
897 int32_t reslen = 0;
898
899 keywordEnum = uloc_openKeywords(localeID, status);
900 if (U_FAILURE(*status) && !hadPosix) {
901 uenum_close(keywordEnum);
902 return 0;
903 }
904 if (keywordEnum != NULL || hadPosix) {
905 /* reorder extensions */
906 int32_t len;
907 const char *key;
908 ExtensionListEntry *firstExt = NULL;
909 ExtensionListEntry *ext;
910 AttributeListEntry *firstAttr = NULL;
911 AttributeListEntry *attr;
912 char *attrValue;
913 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
914 char *pExtBuf = extBuf;
915 int32_t extBufCapacity = sizeof(extBuf);
916 const char *bcpKey, *bcpValue;
917 UErrorCode tmpStatus = U_ZERO_ERROR;
918 int32_t keylen;
919 UBool isBcpUExt;
920
921 while (TRUE) {
922 isAttribute = FALSE;
923 key = uenum_next(keywordEnum, NULL, status);
924 if (key == NULL) {
925 break;
926 }
927 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
928 /* buf must be null-terminated */
929 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
930 if (strict) {
931 *status = U_ILLEGAL_ARGUMENT_ERROR;
932 break;
933 }
934 /* ignore this keyword */
935 tmpStatus = U_ZERO_ERROR;
936 continue;
937 }
938
939 keylen = (int32_t)uprv_strlen(key);
940 isBcpUExt = (keylen > 1);
941
942 /* special keyword used for representing Unicode locale attributes */
943 if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
944 isAttribute = TRUE;
945 if (len > 0) {
946 int32_t i = 0;
947 while (TRUE) {
948 attrBufLength = 0;
949 for (; i < len; i++) {
950 if (buf[i] != '-') {
951 attrBuf[attrBufLength++] = buf[i];
952 } else {
953 i++;
954 break;
955 }
956 }
957 if (attrBufLength > 0) {
958 attrBuf[attrBufLength] = 0;
959
960 } else if (i >= len){
961 break;
962 }
963
964 /* create AttributeListEntry */
965 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
966 if (attr == NULL) {
967 *status = U_MEMORY_ALLOCATION_ERROR;
968 break;
969 }
970 attrValue = (char*)uprv_malloc(attrBufLength + 1);
971 if (attrValue == NULL) {
972 *status = U_MEMORY_ALLOCATION_ERROR;
973 break;
974 }
975 uprv_strcpy(attrValue, attrBuf);
976 attr->attribute = attrValue;
977
978 if (!_addAttributeToList(&firstAttr, attr)) {
979 uprv_free(attr);
980 uprv_free(attrValue);
981 if (strict) {
982 *status = U_ILLEGAL_ARGUMENT_ERROR;
983 break;
984 }
985 }
986 }
987 }
988 } else if (isBcpUExt) {
989 bcpKey = uloc_toUnicodeLocaleKey(key);
990 if (bcpKey == NULL) {
991 if (strict) {
992 *status = U_ILLEGAL_ARGUMENT_ERROR;
993 break;
994 }
995 continue;
996 }
997
998 /* we've checked buf is null-terminated above */
999 bcpValue = uloc_toUnicodeLocaleType(key, buf);
1000 if (bcpValue == NULL) {
1001 if (strict) {
1002 *status = U_ILLEGAL_ARGUMENT_ERROR;
1003 break;
1004 }
1005 continue;
1006 }
1007 if (bcpValue == buf) {
1008 /*
1009 When uloc_toUnicodeLocaleType(key, buf) returns the
1010 input value as is, the value is well-formed, but has
1011 no known mapping. This implementation normalizes the
1012 the value to lower case
1013 */
1014 int32_t bcpValueLen = uprv_strlen(bcpValue);
1015 if (bcpValueLen < extBufCapacity) {
1016 uprv_strcpy(pExtBuf, bcpValue);
1017 T_CString_toLowerCase(pExtBuf);
1018
1019 bcpValue = pExtBuf;
1020
1021 pExtBuf += (bcpValueLen + 1);
1022 extBufCapacity -= (bcpValueLen + 1);
1023 } else {
1024 if (strict) {
1025 *status = U_ILLEGAL_ARGUMENT_ERROR;
1026 break;
1027 }
1028 continue;
1029 }
1030 }
1031 } else {
1032 if (*key == PRIVATEUSE) {
1033 if (!_isPrivateuseValueSubtags(buf, len)) {
1034 if (strict) {
1035 *status = U_ILLEGAL_ARGUMENT_ERROR;
1036 break;
1037 }
1038 continue;
1039 }
1040 } else {
1041 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
1042 if (strict) {
1043 *status = U_ILLEGAL_ARGUMENT_ERROR;
1044 break;
1045 }
1046 continue;
1047 }
1048 }
1049 bcpKey = key;
1050 if ((len + 1) < extBufCapacity) {
1051 uprv_memcpy(pExtBuf, buf, len);
1052 bcpValue = pExtBuf;
1053
1054 pExtBuf += len;
1055
1056 *pExtBuf = 0;
1057 pExtBuf++;
1058
1059 extBufCapacity -= (len + 1);
1060 } else {
1061 *status = U_ILLEGAL_ARGUMENT_ERROR;
1062 break;
1063 }
1064 }
1065
1066 if (!isAttribute) {
1067 /* create ExtensionListEntry */
1068 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1069 if (ext == NULL) {
1070 *status = U_MEMORY_ALLOCATION_ERROR;
1071 break;
1072 }
1073 ext->key = bcpKey;
1074 ext->value = bcpValue;
1075
1076 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1077 uprv_free(ext);
1078 if (strict) {
1079 *status = U_ILLEGAL_ARGUMENT_ERROR;
1080 break;
1081 }
1082 }
1083 }
1084 }
1085
1086 /* Special handling for POSIX variant - add the keywords for POSIX */
1087 if (hadPosix) {
1088 /* create ExtensionListEntry for POSIX */
1089 ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1090 if (ext == NULL) {
1091 *status = U_MEMORY_ALLOCATION_ERROR;
1092 goto cleanup;
1093 }
1094 ext->key = POSIX_KEY;
1095 ext->value = POSIX_VALUE;
1096
1097 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1098 uprv_free(ext);
1099 }
1100 }
1101
1102 if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
1103 UBool startLDMLExtension = FALSE;
1104
1105 attr = firstAttr;
1106 ext = firstExt;
1107 do {
1108 if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
1109 /* write LDML singleton extension */
1110 if (reslen < capacity) {
1111 *(appendAt + reslen) = SEP;
1112 }
1113 reslen++;
1114 if (reslen < capacity) {
1115 *(appendAt + reslen) = LDMLEXT;
1116 }
1117 reslen++;
1118
1119 startLDMLExtension = TRUE;
1120 }
1121
1122 /* write out the sorted BCP47 attributes, extensions and private use */
1123 if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
1124 if (reslen < capacity) {
1125 *(appendAt + reslen) = SEP;
1126 }
1127 reslen++;
1128 len = (int32_t)uprv_strlen(ext->key);
1129 if (reslen < capacity) {
1130 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
1131 }
1132 reslen += len;
1133 if (reslen < capacity) {
1134 *(appendAt + reslen) = SEP;
1135 }
1136 reslen++;
1137 len = (int32_t)uprv_strlen(ext->value);
1138 if (reslen < capacity) {
1139 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
1140 }
1141 reslen += len;
1142
1143 ext = ext->next;
1144 } else if (attr) {
1145 /* write the value for the attributes */
1146 if (reslen < capacity) {
1147 *(appendAt + reslen) = SEP;
1148 }
1149 reslen++;
1150 len = (int32_t)uprv_strlen(attr->attribute);
1151 if (reslen < capacity) {
1152 uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
1153 }
1154 reslen += len;
1155
1156 attr = attr->next;
1157 }
1158 } while (attr != NULL || ext != NULL);
1159 }
1160 cleanup:
1161 /* clean up */
1162 ext = firstExt;
1163 while (ext != NULL) {
1164 ExtensionListEntry *tmpExt = ext->next;
1165 uprv_free(ext);
1166 ext = tmpExt;
1167 }
1168
1169 attr = firstAttr;
1170 while (attr != NULL) {
1171 AttributeListEntry *tmpAttr = attr->next;
1172 char *pValue = (char *)attr->attribute;
1173 uprv_free(pValue);
1174 uprv_free(attr);
1175 attr = tmpAttr;
1176 }
1177
1178 uenum_close(keywordEnum);
1179
1180 if (U_FAILURE(*status)) {
1181 return 0;
1182 }
1183 }
1184
1185 return u_terminateChars(appendAt, capacity, reslen, status);
1186 }
1187
1188 /**
1189 * Append keywords parsed from LDML extension value
1190 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
1191 * Note: char* buf is used for storing keywords
1192 */
1193 static void
_appendLDMLExtensionAsKeywords(const char * ldmlext,ExtensionListEntry ** appendTo,char * buf,int32_t bufSize,UBool * posixVariant,UErrorCode * status)1194 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1195 const char *pTag; /* beginning of current subtag */
1196 const char *pKwds; /* beginning of key-type pairs */
1197 UBool variantExists = *posixVariant;
1198
1199 ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
1200 ExtensionListEntry *kwd, *nextKwd;
1201
1202 AttributeListEntry *attrFirst = NULL; /* first attribute */
1203 AttributeListEntry *attr, *nextAttr;
1204
1205 int32_t len;
1206 int32_t bufIdx = 0;
1207
1208 char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1209 int32_t attrBufIdx = 0;
1210
1211 /* Reset the posixVariant value */
1212 *posixVariant = FALSE;
1213
1214 pTag = ldmlext;
1215 pKwds = NULL;
1216
1217 /* Iterate through u extension attributes */
1218 while (*pTag) {
1219 /* locate next separator char */
1220 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1221
1222 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1223 pKwds = pTag;
1224 break;
1225 }
1226
1227 /* add this attribute to the list */
1228 attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
1229 if (attr == NULL) {
1230 *status = U_MEMORY_ALLOCATION_ERROR;
1231 goto cleanup;
1232 }
1233
1234 if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
1235 uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
1236 attrBuf[attrBufIdx + len] = 0;
1237 attr->attribute = &attrBuf[attrBufIdx];
1238 attrBufIdx += (len + 1);
1239 } else {
1240 *status = U_ILLEGAL_ARGUMENT_ERROR;
1241 goto cleanup;
1242 }
1243
1244 if (!_addAttributeToList(&attrFirst, attr)) {
1245 *status = U_ILLEGAL_ARGUMENT_ERROR;
1246 uprv_free(attr);
1247 goto cleanup;
1248 }
1249
1250 /* next tag */
1251 pTag += len;
1252 if (*pTag) {
1253 /* next to the separator */
1254 pTag++;
1255 }
1256 }
1257
1258 if (attrFirst) {
1259 /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
1260
1261 if (attrBufIdx > bufSize) {
1262 /* attrBufIdx == <total length of attribute subtag> + 1 */
1263 *status = U_ILLEGAL_ARGUMENT_ERROR;
1264 goto cleanup;
1265 }
1266
1267 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1268 if (kwd == NULL) {
1269 *status = U_MEMORY_ALLOCATION_ERROR;
1270 goto cleanup;
1271 }
1272
1273 kwd->key = LOCALE_ATTRIBUTE_KEY;
1274 kwd->value = buf;
1275
1276 /* attribute subtags sorted in alphabetical order as type */
1277 attr = attrFirst;
1278 while (attr != NULL) {
1279 nextAttr = attr->next;
1280
1281 /* buffer size check is done above */
1282 if (attr != attrFirst) {
1283 *(buf + bufIdx) = SEP;
1284 bufIdx++;
1285 }
1286
1287 len = uprv_strlen(attr->attribute);
1288 uprv_memcpy(buf + bufIdx, attr->attribute, len);
1289 bufIdx += len;
1290
1291 attr = nextAttr;
1292 }
1293 *(buf + bufIdx) = 0;
1294 bufIdx++;
1295
1296 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1297 *status = U_ILLEGAL_ARGUMENT_ERROR;
1298 uprv_free(kwd);
1299 goto cleanup;
1300 }
1301
1302 /* once keyword entry is created, delete the attribute list */
1303 attr = attrFirst;
1304 while (attr != NULL) {
1305 nextAttr = attr->next;
1306 uprv_free(attr);
1307 attr = nextAttr;
1308 }
1309 attrFirst = NULL;
1310 }
1311
1312 if (pKwds) {
1313 const char *pBcpKey = NULL; /* u extenstion key subtag */
1314 const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
1315 int32_t bcpKeyLen = 0;
1316 int32_t bcpTypeLen = 0;
1317 UBool isDone = FALSE;
1318
1319 pTag = pKwds;
1320 /* BCP47 representation of LDML key/type pairs */
1321 while (!isDone) {
1322 const char *pNextBcpKey = NULL;
1323 int32_t nextBcpKeyLen = 0;
1324 UBool emitKeyword = FALSE;
1325
1326 if (*pTag) {
1327 /* locate next separator char */
1328 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
1329
1330 if (ultag_isUnicodeLocaleKey(pTag, len)) {
1331 if (pBcpKey) {
1332 emitKeyword = TRUE;
1333 pNextBcpKey = pTag;
1334 nextBcpKeyLen = len;
1335 } else {
1336 pBcpKey = pTag;
1337 bcpKeyLen = len;
1338 }
1339 } else {
1340 U_ASSERT(pBcpKey != NULL);
1341 /* within LDML type subtags */
1342 if (pBcpType) {
1343 bcpTypeLen += (len + 1);
1344 } else {
1345 pBcpType = pTag;
1346 bcpTypeLen = len;
1347 }
1348 }
1349
1350 /* next tag */
1351 pTag += len;
1352 if (*pTag) {
1353 /* next to the separator */
1354 pTag++;
1355 }
1356 } else {
1357 /* processing last one */
1358 emitKeyword = TRUE;
1359 isDone = TRUE;
1360 }
1361
1362 if (emitKeyword) {
1363 const char *pKey = NULL; /* LDML key */
1364 const char *pType = NULL; /* LDML type */
1365
1366 char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
1367
1368 U_ASSERT(pBcpKey != NULL);
1369
1370 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
1371 /* the BCP key is invalid */
1372 *status = U_ILLEGAL_ARGUMENT_ERROR;
1373 goto cleanup;
1374 }
1375
1376 uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
1377 bcpKeyBuf[bcpKeyLen] = 0;
1378
1379 /* u extension key to LDML key */
1380 pKey = uloc_toLegacyKey(bcpKeyBuf);
1381 if (pKey == NULL) {
1382 *status = U_ILLEGAL_ARGUMENT_ERROR;
1383 goto cleanup;
1384 }
1385 if (pKey == bcpKeyBuf) {
1386 /*
1387 The key returned by toLegacyKey points to the input buffer.
1388 We normalize the result key to lower case.
1389 */
1390 T_CString_toLowerCase(bcpKeyBuf);
1391 if (bufSize - bufIdx - 1 >= bcpKeyLen) {
1392 uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
1393 pKey = buf + bufIdx;
1394 bufIdx += bcpKeyLen;
1395 *(buf + bufIdx) = 0;
1396 bufIdx++;
1397 } else {
1398 *status = U_BUFFER_OVERFLOW_ERROR;
1399 goto cleanup;
1400 }
1401 }
1402
1403 if (pBcpType) {
1404 char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
1405 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
1406 /* the BCP type is too long */
1407 *status = U_ILLEGAL_ARGUMENT_ERROR;
1408 goto cleanup;
1409 }
1410
1411 uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
1412 bcpTypeBuf[bcpTypeLen] = 0;
1413
1414 /* BCP type to locale type */
1415 pType = uloc_toLegacyType(pKey, bcpTypeBuf);
1416 if (pType == NULL) {
1417 *status = U_ILLEGAL_ARGUMENT_ERROR;
1418 goto cleanup;
1419 }
1420 if (pType == bcpTypeBuf) {
1421 /*
1422 The type returned by toLegacyType points to the input buffer.
1423 We normalize the result type to lower case.
1424 */
1425 /* normalize to lower case */
1426 T_CString_toLowerCase(bcpTypeBuf);
1427 if (bufSize - bufIdx - 1 >= bcpTypeLen) {
1428 uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
1429 pType = buf + bufIdx;
1430 bufIdx += bcpTypeLen;
1431 *(buf + bufIdx) = 0;
1432 bufIdx++;
1433 } else {
1434 *status = U_BUFFER_OVERFLOW_ERROR;
1435 goto cleanup;
1436 }
1437 }
1438 } else {
1439 /* typeless - default type value is "yes" */
1440 pType = LOCALE_TYPE_YES;
1441 }
1442
1443 /* Special handling for u-va-posix, since we want to treat this as a variant,
1444 not as a keyword */
1445 if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
1446 *posixVariant = TRUE;
1447 } else {
1448 /* create an ExtensionListEntry for this keyword */
1449 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1450 if (kwd == NULL) {
1451 *status = U_MEMORY_ALLOCATION_ERROR;
1452 goto cleanup;
1453 }
1454
1455 kwd->key = pKey;
1456 kwd->value = pType;
1457
1458 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1459 *status = U_ILLEGAL_ARGUMENT_ERROR;
1460 uprv_free(kwd);
1461 goto cleanup;
1462 }
1463 }
1464
1465 pBcpKey = pNextBcpKey;
1466 bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
1467 pBcpType = NULL;
1468 bcpTypeLen = 0;
1469 }
1470 }
1471 }
1472
1473 kwd = kwdFirst;
1474 while (kwd != NULL) {
1475 nextKwd = kwd->next;
1476 _addExtensionToList(appendTo, kwd, FALSE);
1477 kwd = nextKwd;
1478 }
1479
1480 return;
1481
1482 cleanup:
1483 attr = attrFirst;
1484 while (attr != NULL) {
1485 nextAttr = attr->next;
1486 uprv_free(attr);
1487 attr = nextAttr;
1488 }
1489
1490 kwd = kwdFirst;
1491 while (kwd != NULL) {
1492 nextKwd = kwd->next;
1493 uprv_free(kwd);
1494 kwd = nextKwd;
1495 }
1496 }
1497
1498
1499 static int32_t
_appendKeywords(ULanguageTag * langtag,char * appendAt,int32_t capacity,UErrorCode * status)1500 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
1501 int32_t reslen = 0;
1502 int32_t i, n;
1503 int32_t len;
1504 ExtensionListEntry *kwdFirst = NULL;
1505 ExtensionListEntry *kwd;
1506 const char *key, *type;
1507 char *kwdBuf = NULL;
1508 int32_t kwdBufLength = capacity;
1509 UBool posixVariant = FALSE;
1510
1511 if (U_FAILURE(*status)) {
1512 return 0;
1513 }
1514
1515 kwdBuf = (char*)uprv_malloc(kwdBufLength);
1516 if (kwdBuf == NULL) {
1517 *status = U_MEMORY_ALLOCATION_ERROR;
1518 return 0;
1519 }
1520
1521 /* Determine if variants already exists */
1522 if (ultag_getVariantsSize(langtag)) {
1523 posixVariant = TRUE;
1524 }
1525
1526 n = ultag_getExtensionsSize(langtag);
1527
1528 /* resolve locale keywords and reordering keys */
1529 for (i = 0; i < n; i++) {
1530 key = ultag_getExtensionKey(langtag, i);
1531 type = ultag_getExtensionValue(langtag, i);
1532 if (*key == LDMLEXT) {
1533 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
1534 if (U_FAILURE(*status)) {
1535 break;
1536 }
1537 } else {
1538 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1539 if (kwd == NULL) {
1540 *status = U_MEMORY_ALLOCATION_ERROR;
1541 break;
1542 }
1543 kwd->key = key;
1544 kwd->value = type;
1545 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1546 uprv_free(kwd);
1547 *status = U_ILLEGAL_ARGUMENT_ERROR;
1548 break;
1549 }
1550 }
1551 }
1552
1553 if (U_SUCCESS(*status)) {
1554 type = ultag_getPrivateUse(langtag);
1555 if ((int32_t)uprv_strlen(type) > 0) {
1556 /* add private use as a keyword */
1557 kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1558 if (kwd == NULL) {
1559 *status = U_MEMORY_ALLOCATION_ERROR;
1560 } else {
1561 kwd->key = PRIVATEUSE_KEY;
1562 kwd->value = type;
1563 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1564 uprv_free(kwd);
1565 *status = U_ILLEGAL_ARGUMENT_ERROR;
1566 }
1567 }
1568 }
1569 }
1570
1571 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1572
1573 if (U_SUCCESS(*status) && posixVariant) {
1574 len = (int32_t) uprv_strlen(_POSIX);
1575 if (reslen < capacity) {
1576 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
1577 }
1578 reslen += len;
1579 }
1580
1581 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1582 /* write out the sorted keywords */
1583 UBool firstValue = TRUE;
1584 kwd = kwdFirst;
1585 do {
1586 if (reslen < capacity) {
1587 if (firstValue) {
1588 /* '@' */
1589 *(appendAt + reslen) = LOCALE_EXT_SEP;
1590 firstValue = FALSE;
1591 } else {
1592 /* ';' */
1593 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1594 }
1595 }
1596 reslen++;
1597
1598 /* key */
1599 len = (int32_t)uprv_strlen(kwd->key);
1600 if (reslen < capacity) {
1601 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1602 }
1603 reslen += len;
1604
1605 /* '=' */
1606 if (reslen < capacity) {
1607 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1608 }
1609 reslen++;
1610
1611 /* type */
1612 len = (int32_t)uprv_strlen(kwd->value);
1613 if (reslen < capacity) {
1614 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
1615 }
1616 reslen += len;
1617
1618 kwd = kwd->next;
1619 } while (kwd);
1620 }
1621
1622 /* clean up */
1623 kwd = kwdFirst;
1624 while (kwd != NULL) {
1625 ExtensionListEntry *tmpKwd = kwd->next;
1626 uprv_free(kwd);
1627 kwd = tmpKwd;
1628 }
1629
1630 uprv_free(kwdBuf);
1631
1632 if (U_FAILURE(*status)) {
1633 return 0;
1634 }
1635
1636 return u_terminateChars(appendAt, capacity, reslen, status);
1637 }
1638
1639 static int32_t
_appendPrivateuseToLanguageTag(const char * localeID,char * appendAt,int32_t capacity,UBool strict,UBool hadPosix,UErrorCode * status)1640 _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
1641 char buf[ULOC_FULLNAME_CAPACITY];
1642 char tmpAppend[ULOC_FULLNAME_CAPACITY];
1643 UErrorCode tmpStatus = U_ZERO_ERROR;
1644 int32_t len, i;
1645 int32_t reslen = 0;
1646
1647 if (U_FAILURE(*status)) {
1648 return 0;
1649 }
1650
1651 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1652 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1653 if (strict) {
1654 *status = U_ILLEGAL_ARGUMENT_ERROR;
1655 }
1656 return 0;
1657 }
1658
1659 if (len > 0) {
1660 char *p, *pPriv;
1661 UBool bNext = TRUE;
1662 UBool firstValue = TRUE;
1663 UBool writeValue;
1664
1665 pPriv = NULL;
1666 p = buf;
1667 while (bNext) {
1668 writeValue = FALSE;
1669 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1670 if (*p == 0) {
1671 bNext = FALSE;
1672 } else {
1673 *p = 0; /* terminate */
1674 }
1675 if (pPriv != NULL) {
1676 /* Private use in the canonical format is lowercase in BCP47 */
1677 for (i = 0; *(pPriv + i) != 0; i++) {
1678 *(pPriv + i) = uprv_tolower(*(pPriv + i));
1679 }
1680
1681 /* validate */
1682 if (_isPrivateuseValueSubtag(pPriv, -1)) {
1683 if (firstValue) {
1684 if (!_isVariantSubtag(pPriv, -1)) {
1685 writeValue = TRUE;
1686 }
1687 } else {
1688 writeValue = TRUE;
1689 }
1690 } else if (strict) {
1691 *status = U_ILLEGAL_ARGUMENT_ERROR;
1692 break;
1693 } else {
1694 break;
1695 }
1696
1697 if (writeValue) {
1698 if (reslen < capacity) {
1699 tmpAppend[reslen++] = SEP;
1700 }
1701
1702 if (firstValue) {
1703 if (reslen < capacity) {
1704 tmpAppend[reslen++] = *PRIVATEUSE_KEY;
1705 }
1706
1707 if (reslen < capacity) {
1708 tmpAppend[reslen++] = SEP;
1709 }
1710
1711 len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
1712 if (reslen < capacity) {
1713 uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
1714 }
1715 reslen += len;
1716
1717 if (reslen < capacity) {
1718 tmpAppend[reslen++] = SEP;
1719 }
1720
1721 firstValue = FALSE;
1722 }
1723
1724 len = (int32_t)uprv_strlen(pPriv);
1725 if (reslen < capacity) {
1726 uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
1727 }
1728 reslen += len;
1729 }
1730 }
1731 /* reset private use starting position */
1732 pPriv = NULL;
1733 } else if (pPriv == NULL) {
1734 pPriv = p;
1735 }
1736 p++;
1737 }
1738
1739 if (U_FAILURE(*status)) {
1740 return 0;
1741 }
1742 }
1743
1744 if (U_SUCCESS(*status)) {
1745 len = reslen;
1746 if (reslen < capacity) {
1747 uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
1748 }
1749 }
1750
1751 u_terminateChars(appendAt, capacity, reslen, status);
1752
1753 return reslen;
1754 }
1755
1756 /*
1757 * -------------------------------------------------
1758 *
1759 * ultag_ functions
1760 *
1761 * -------------------------------------------------
1762 */
1763
1764 /* Bit flags used by the parser */
1765 #define LANG 0x0001
1766 #define EXTL 0x0002
1767 #define SCRT 0x0004
1768 #define REGN 0x0008
1769 #define VART 0x0010
1770 #define EXTS 0x0020
1771 #define EXTV 0x0040
1772 #define PRIV 0x0080
1773
1774 static ULanguageTag*
ultag_parse(const char * tag,int32_t tagLen,int32_t * parsedLen,UErrorCode * status)1775 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
1776 ULanguageTag *t;
1777 char *tagBuf;
1778 int16_t next;
1779 char *pSubtag, *pNext, *pLastGoodPosition;
1780 int32_t subtagLen;
1781 int32_t extlangIdx;
1782 ExtensionListEntry *pExtension;
1783 char *pExtValueSubtag, *pExtValueSubtagEnd;
1784 int32_t i;
1785 UBool privateuseVar = FALSE;
1786 int32_t grandfatheredLen = 0;
1787
1788 if (parsedLen != NULL) {
1789 *parsedLen = 0;
1790 }
1791
1792 if (U_FAILURE(*status)) {
1793 return NULL;
1794 }
1795
1796 if (tagLen < 0) {
1797 tagLen = (int32_t)uprv_strlen(tag);
1798 }
1799
1800 /* copy the entire string */
1801 tagBuf = (char*)uprv_malloc(tagLen + 1);
1802 if (tagBuf == NULL) {
1803 *status = U_MEMORY_ALLOCATION_ERROR;
1804 return NULL;
1805 }
1806 uprv_memcpy(tagBuf, tag, tagLen);
1807 *(tagBuf + tagLen) = 0;
1808
1809 /* create a ULanguageTag */
1810 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1811 if (t == NULL) {
1812 uprv_free(tagBuf);
1813 *status = U_MEMORY_ALLOCATION_ERROR;
1814 return NULL;
1815 }
1816 _initializeULanguageTag(t);
1817 t->buf = tagBuf;
1818
1819 if (tagLen < MINLEN) {
1820 /* the input tag is too short - return empty ULanguageTag */
1821 return t;
1822 }
1823
1824 /* check if the tag is grandfathered */
1825 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1826 if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1827 int32_t newTagLength;
1828
1829 grandfatheredLen = tagLen; /* back up for output parsedLen */
1830 newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
1831 if (tagLen < newTagLength) {
1832 uprv_free(tagBuf);
1833 tagBuf = (char*)uprv_malloc(newTagLength + 1);
1834 if (tagBuf == NULL) {
1835 *status = U_MEMORY_ALLOCATION_ERROR;
1836 return NULL;
1837 }
1838 t->buf = tagBuf;
1839 tagLen = newTagLength;
1840 }
1841 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1842 break;
1843 }
1844 }
1845
1846 /*
1847 * langtag = language
1848 * ["-" script]
1849 * ["-" region]
1850 * *("-" variant)
1851 * *("-" extension)
1852 * ["-" privateuse]
1853 */
1854
1855 next = LANG | PRIV;
1856 pNext = pLastGoodPosition = tagBuf;
1857 extlangIdx = 0;
1858 pExtension = NULL;
1859 pExtValueSubtag = NULL;
1860 pExtValueSubtagEnd = NULL;
1861
1862 while (pNext) {
1863 char *pSep;
1864
1865 pSubtag = pNext;
1866
1867 /* locate next separator char */
1868 pSep = pSubtag;
1869 while (*pSep) {
1870 if (*pSep == SEP) {
1871 break;
1872 }
1873 pSep++;
1874 }
1875 if (*pSep == 0) {
1876 /* last subtag */
1877 pNext = NULL;
1878 } else {
1879 pNext = pSep + 1;
1880 }
1881 subtagLen = (int32_t)(pSep - pSubtag);
1882
1883 if (next & LANG) {
1884 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1885 *pSep = 0; /* terminate */
1886 t->language = T_CString_toLowerCase(pSubtag);
1887
1888 pLastGoodPosition = pSep;
1889 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1890 continue;
1891 }
1892 }
1893 if (next & EXTL) {
1894 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1895 *pSep = 0;
1896 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1897
1898 pLastGoodPosition = pSep;
1899 if (extlangIdx < 3) {
1900 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1901 } else {
1902 next = SCRT | REGN | VART | EXTS | PRIV;
1903 }
1904 continue;
1905 }
1906 }
1907 if (next & SCRT) {
1908 if (_isScriptSubtag(pSubtag, subtagLen)) {
1909 char *p = pSubtag;
1910
1911 *pSep = 0;
1912
1913 /* to title case */
1914 *p = uprv_toupper(*p);
1915 p++;
1916 for (; *p; p++) {
1917 *p = uprv_tolower(*p);
1918 }
1919
1920 t->script = pSubtag;
1921
1922 pLastGoodPosition = pSep;
1923 next = REGN | VART | EXTS | PRIV;
1924 continue;
1925 }
1926 }
1927 if (next & REGN) {
1928 if (_isRegionSubtag(pSubtag, subtagLen)) {
1929 *pSep = 0;
1930 t->region = T_CString_toUpperCase(pSubtag);
1931
1932 pLastGoodPosition = pSep;
1933 next = VART | EXTS | PRIV;
1934 continue;
1935 }
1936 }
1937 if (next & VART) {
1938 if (_isVariantSubtag(pSubtag, subtagLen) ||
1939 (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
1940 VariantListEntry *var;
1941 UBool isAdded;
1942
1943 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1944 if (var == NULL) {
1945 *status = U_MEMORY_ALLOCATION_ERROR;
1946 goto error;
1947 }
1948 *pSep = 0;
1949 var->variant = T_CString_toUpperCase(pSubtag);
1950 isAdded = _addVariantToList(&(t->variants), var);
1951 if (!isAdded) {
1952 /* duplicated variant entry */
1953 uprv_free(var);
1954 break;
1955 }
1956 pLastGoodPosition = pSep;
1957 next = VART | EXTS | PRIV;
1958 continue;
1959 }
1960 }
1961 if (next & EXTS) {
1962 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1963 if (pExtension != NULL) {
1964 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1965 /* the previous extension is incomplete */
1966 uprv_free(pExtension);
1967 pExtension = NULL;
1968 break;
1969 }
1970
1971 /* terminate the previous extension value */
1972 *pExtValueSubtagEnd = 0;
1973 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1974
1975 /* insert the extension to the list */
1976 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1977 pLastGoodPosition = pExtValueSubtagEnd;
1978 } else {
1979 /* stop parsing here */
1980 uprv_free(pExtension);
1981 pExtension = NULL;
1982 break;
1983 }
1984 }
1985
1986 /* create a new extension */
1987 pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
1988 if (pExtension == NULL) {
1989 *status = U_MEMORY_ALLOCATION_ERROR;
1990 goto error;
1991 }
1992 *pSep = 0;
1993 pExtension->key = T_CString_toLowerCase(pSubtag);
1994 pExtension->value = NULL; /* will be set later */
1995
1996 /*
1997 * reset the start and the end location of extension value
1998 * subtags for this extension
1999 */
2000 pExtValueSubtag = NULL;
2001 pExtValueSubtagEnd = NULL;
2002
2003 next = EXTV;
2004 continue;
2005 }
2006 }
2007 if (next & EXTV) {
2008 if (_isExtensionSubtag(pSubtag, subtagLen)) {
2009 if (pExtValueSubtag == NULL) {
2010 /* if the start postion of this extension's value is not yet,
2011 this one is the first value subtag */
2012 pExtValueSubtag = pSubtag;
2013 }
2014
2015 /* Mark the end of this subtag */
2016 pExtValueSubtagEnd = pSep;
2017 next = EXTS | EXTV | PRIV;
2018
2019 continue;
2020 }
2021 }
2022 if (next & PRIV) {
2023 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
2024 char *pPrivuseVal;
2025
2026 if (pExtension != NULL) {
2027 /* Process the last extension */
2028 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2029 /* the previous extension is incomplete */
2030 uprv_free(pExtension);
2031 pExtension = NULL;
2032 break;
2033 } else {
2034 /* terminate the previous extension value */
2035 *pExtValueSubtagEnd = 0;
2036 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2037
2038 /* insert the extension to the list */
2039 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2040 pLastGoodPosition = pExtValueSubtagEnd;
2041 pExtension = NULL;
2042 } else {
2043 /* stop parsing here */
2044 uprv_free(pExtension);
2045 pExtension = NULL;
2046 break;
2047 }
2048 }
2049 }
2050
2051 /* The rest of part will be private use value subtags */
2052 if (pNext == NULL) {
2053 /* empty private use subtag */
2054 break;
2055 }
2056 /* back up the private use value start position */
2057 pPrivuseVal = pNext;
2058
2059 /* validate private use value subtags */
2060 while (pNext) {
2061 pSubtag = pNext;
2062 pSep = pSubtag;
2063 while (*pSep) {
2064 if (*pSep == SEP) {
2065 break;
2066 }
2067 pSep++;
2068 }
2069 if (*pSep == 0) {
2070 /* last subtag */
2071 pNext = NULL;
2072 } else {
2073 pNext = pSep + 1;
2074 }
2075 subtagLen = (int32_t)(pSep - pSubtag);
2076
2077 if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
2078 *pSep = 0;
2079 next = VART;
2080 privateuseVar = TRUE;
2081 break;
2082 } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
2083 pLastGoodPosition = pSep;
2084 } else {
2085 break;
2086 }
2087 }
2088
2089 if (next == VART) {
2090 continue;
2091 }
2092
2093 if (pLastGoodPosition - pPrivuseVal > 0) {
2094 *pLastGoodPosition = 0;
2095 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
2096 }
2097 /* No more subtags, exiting the parse loop */
2098 break;
2099 }
2100 break;
2101 }
2102
2103 /* If we fell through here, it means this subtag is illegal - quit parsing */
2104 break;
2105 }
2106
2107 if (pExtension != NULL) {
2108 /* Process the last extension */
2109 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
2110 /* the previous extension is incomplete */
2111 uprv_free(pExtension);
2112 } else {
2113 /* terminate the previous extension value */
2114 *pExtValueSubtagEnd = 0;
2115 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
2116 /* insert the extension to the list */
2117 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
2118 pLastGoodPosition = pExtValueSubtagEnd;
2119 } else {
2120 uprv_free(pExtension);
2121 }
2122 }
2123 }
2124
2125 if (parsedLen != NULL) {
2126 *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
2127 }
2128
2129 return t;
2130
2131 error:
2132 uprv_free(t);
2133 return NULL;
2134 }
2135
2136 static void
ultag_close(ULanguageTag * langtag)2137 ultag_close(ULanguageTag* langtag) {
2138
2139 if (langtag == NULL) {
2140 return;
2141 }
2142
2143 uprv_free(langtag->buf);
2144
2145 if (langtag->variants) {
2146 VariantListEntry *curVar = langtag->variants;
2147 while (curVar) {
2148 VariantListEntry *nextVar = curVar->next;
2149 uprv_free(curVar);
2150 curVar = nextVar;
2151 }
2152 }
2153
2154 if (langtag->extensions) {
2155 ExtensionListEntry *curExt = langtag->extensions;
2156 while (curExt) {
2157 ExtensionListEntry *nextExt = curExt->next;
2158 uprv_free(curExt);
2159 curExt = nextExt;
2160 }
2161 }
2162
2163 uprv_free(langtag);
2164 }
2165
2166 static const char*
ultag_getLanguage(const ULanguageTag * langtag)2167 ultag_getLanguage(const ULanguageTag* langtag) {
2168 return langtag->language;
2169 }
2170
2171 #if 0
2172 static const char*
2173 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2174 int32_t i;
2175 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2176 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
2177 return DEPRECATEDLANGS[i + 1];
2178 }
2179 }
2180 return langtag->language;
2181 }
2182 #endif
2183
2184 static const char*
ultag_getExtlang(const ULanguageTag * langtag,int32_t idx)2185 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2186 if (idx >= 0 && idx < MAXEXTLANG) {
2187 return langtag->extlang[idx];
2188 }
2189 return NULL;
2190 }
2191
2192 static int32_t
ultag_getExtlangSize(const ULanguageTag * langtag)2193 ultag_getExtlangSize(const ULanguageTag* langtag) {
2194 int32_t size = 0;
2195 int32_t i;
2196 for (i = 0; i < MAXEXTLANG; i++) {
2197 if (langtag->extlang[i]) {
2198 size++;
2199 }
2200 }
2201 return size;
2202 }
2203
2204 static const char*
ultag_getScript(const ULanguageTag * langtag)2205 ultag_getScript(const ULanguageTag* langtag) {
2206 return langtag->script;
2207 }
2208
2209 static const char*
ultag_getRegion(const ULanguageTag * langtag)2210 ultag_getRegion(const ULanguageTag* langtag) {
2211 return langtag->region;
2212 }
2213
2214 static const char*
ultag_getVariant(const ULanguageTag * langtag,int32_t idx)2215 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2216 const char *var = NULL;
2217 VariantListEntry *cur = langtag->variants;
2218 int32_t i = 0;
2219 while (cur) {
2220 if (i == idx) {
2221 var = cur->variant;
2222 break;
2223 }
2224 cur = cur->next;
2225 i++;
2226 }
2227 return var;
2228 }
2229
2230 static int32_t
ultag_getVariantsSize(const ULanguageTag * langtag)2231 ultag_getVariantsSize(const ULanguageTag* langtag) {
2232 int32_t size = 0;
2233 VariantListEntry *cur = langtag->variants;
2234 while (TRUE) {
2235 if (cur == NULL) {
2236 break;
2237 }
2238 size++;
2239 cur = cur->next;
2240 }
2241 return size;
2242 }
2243
2244 static const char*
ultag_getExtensionKey(const ULanguageTag * langtag,int32_t idx)2245 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2246 const char *key = NULL;
2247 ExtensionListEntry *cur = langtag->extensions;
2248 int32_t i = 0;
2249 while (cur) {
2250 if (i == idx) {
2251 key = cur->key;
2252 break;
2253 }
2254 cur = cur->next;
2255 i++;
2256 }
2257 return key;
2258 }
2259
2260 static const char*
ultag_getExtensionValue(const ULanguageTag * langtag,int32_t idx)2261 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2262 const char *val = NULL;
2263 ExtensionListEntry *cur = langtag->extensions;
2264 int32_t i = 0;
2265 while (cur) {
2266 if (i == idx) {
2267 val = cur->value;
2268 break;
2269 }
2270 cur = cur->next;
2271 i++;
2272 }
2273 return val;
2274 }
2275
2276 static int32_t
ultag_getExtensionsSize(const ULanguageTag * langtag)2277 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2278 int32_t size = 0;
2279 ExtensionListEntry *cur = langtag->extensions;
2280 while (TRUE) {
2281 if (cur == NULL) {
2282 break;
2283 }
2284 size++;
2285 cur = cur->next;
2286 }
2287 return size;
2288 }
2289
2290 static const char*
ultag_getPrivateUse(const ULanguageTag * langtag)2291 ultag_getPrivateUse(const ULanguageTag* langtag) {
2292 return langtag->privateuse;
2293 }
2294
2295 #if 0
2296 static const char*
2297 ultag_getGrandfathered(const ULanguageTag* langtag) {
2298 return langtag->grandfathered;
2299 }
2300 #endif
2301
2302
2303 /*
2304 * -------------------------------------------------
2305 *
2306 * Locale/BCP47 conversion APIs, exposed as uloc_*
2307 *
2308 * -------------------------------------------------
2309 */
2310 U_CAPI int32_t U_EXPORT2
uloc_toLanguageTag(const char * localeID,char * langtag,int32_t langtagCapacity,UBool strict,UErrorCode * status)2311 uloc_toLanguageTag(const char* localeID,
2312 char* langtag,
2313 int32_t langtagCapacity,
2314 UBool strict,
2315 UErrorCode* status) {
2316 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2317 char canonical[256];
2318 int32_t reslen = 0;
2319 UErrorCode tmpStatus = U_ZERO_ERROR;
2320 UBool hadPosix = FALSE;
2321 const char* pKeywordStart;
2322
2323 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
2324 canonical[0] = 0;
2325 if (uprv_strlen(localeID) > 0) {
2326 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2327 if (tmpStatus != U_ZERO_ERROR) {
2328 *status = U_ILLEGAL_ARGUMENT_ERROR;
2329 return 0;
2330 }
2331 }
2332
2333 /* For handling special case - private use only tag */
2334 pKeywordStart = locale_getKeywordsStart(canonical);
2335 if (pKeywordStart == canonical) {
2336 UEnumeration *kwdEnum;
2337 int kwdCnt = 0;
2338 UBool done = FALSE;
2339
2340 kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
2341 if (kwdEnum != NULL) {
2342 kwdCnt = uenum_count(kwdEnum, &tmpStatus);
2343 if (kwdCnt == 1) {
2344 const char *key;
2345 int32_t len = 0;
2346
2347 key = uenum_next(kwdEnum, &len, &tmpStatus);
2348 if (len == 1 && *key == PRIVATEUSE) {
2349 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
2350 buf[0] = PRIVATEUSE;
2351 buf[1] = SEP;
2352 len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
2353 if (U_SUCCESS(tmpStatus)) {
2354 if (_isPrivateuseValueSubtags(&buf[2], len)) {
2355 /* return private use only tag */
2356 reslen = len + 2;
2357 uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
2358 u_terminateChars(langtag, langtagCapacity, reslen, status);
2359 done = TRUE;
2360 } else if (strict) {
2361 *status = U_ILLEGAL_ARGUMENT_ERROR;
2362 done = TRUE;
2363 }
2364 /* if not strict mode, then "und" will be returned */
2365 } else {
2366 *status = U_ILLEGAL_ARGUMENT_ERROR;
2367 done = TRUE;
2368 }
2369 }
2370 }
2371 uenum_close(kwdEnum);
2372 if (done) {
2373 return reslen;
2374 }
2375 }
2376 }
2377
2378 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2379 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2380 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
2381 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
2382 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2383 reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
2384
2385 return reslen;
2386 }
2387
2388
2389 U_CAPI int32_t U_EXPORT2
uloc_forLanguageTag(const char * langtag,char * localeID,int32_t localeIDCapacity,int32_t * parsedLength,UErrorCode * status)2390 uloc_forLanguageTag(const char* langtag,
2391 char* localeID,
2392 int32_t localeIDCapacity,
2393 int32_t* parsedLength,
2394 UErrorCode* status) {
2395 ULanguageTag *lt;
2396 int32_t reslen = 0;
2397 const char *subtag, *p;
2398 int32_t len;
2399 int32_t i, n;
2400 UBool noRegion = TRUE;
2401
2402 lt = ultag_parse(langtag, -1, parsedLength, status);
2403 if (U_FAILURE(*status)) {
2404 return 0;
2405 }
2406
2407 /* language */
2408 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
2409 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2410 len = (int32_t)uprv_strlen(subtag);
2411 if (len > 0) {
2412 if (reslen < localeIDCapacity) {
2413 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
2414 }
2415 reslen += len;
2416 }
2417 }
2418
2419 /* script */
2420 subtag = ultag_getScript(lt);
2421 len = (int32_t)uprv_strlen(subtag);
2422 if (len > 0) {
2423 if (reslen < localeIDCapacity) {
2424 *(localeID + reslen) = LOCALE_SEP;
2425 }
2426 reslen++;
2427
2428 /* write out the script in title case */
2429 p = subtag;
2430 while (*p) {
2431 if (reslen < localeIDCapacity) {
2432 if (p == subtag) {
2433 *(localeID + reslen) = uprv_toupper(*p);
2434 } else {
2435 *(localeID + reslen) = *p;
2436 }
2437 }
2438 reslen++;
2439 p++;
2440 }
2441 }
2442
2443 /* region */
2444 subtag = ultag_getRegion(lt);
2445 len = (int32_t)uprv_strlen(subtag);
2446 if (len > 0) {
2447 if (reslen < localeIDCapacity) {
2448 *(localeID + reslen) = LOCALE_SEP;
2449 }
2450 reslen++;
2451 /* write out the retion in upper case */
2452 p = subtag;
2453 while (*p) {
2454 if (reslen < localeIDCapacity) {
2455 *(localeID + reslen) = uprv_toupper(*p);
2456 }
2457 reslen++;
2458 p++;
2459 }
2460 noRegion = FALSE;
2461 }
2462
2463 /* variants */
2464 n = ultag_getVariantsSize(lt);
2465 if (n > 0) {
2466 if (noRegion) {
2467 if (reslen < localeIDCapacity) {
2468 *(localeID + reslen) = LOCALE_SEP;
2469 }
2470 reslen++;
2471 }
2472
2473 for (i = 0; i < n; i++) {
2474 subtag = ultag_getVariant(lt, i);
2475 if (reslen < localeIDCapacity) {
2476 *(localeID + reslen) = LOCALE_SEP;
2477 }
2478 reslen++;
2479 /* write out the variant in upper case */
2480 p = subtag;
2481 while (*p) {
2482 if (reslen < localeIDCapacity) {
2483 *(localeID + reslen) = uprv_toupper(*p);
2484 }
2485 reslen++;
2486 p++;
2487 }
2488 }
2489 }
2490
2491 /* keywords */
2492 n = ultag_getExtensionsSize(lt);
2493 subtag = ultag_getPrivateUse(lt);
2494 if (n > 0 || uprv_strlen(subtag) > 0) {
2495 if (reslen == 0 && n > 0) {
2496 /* need a language */
2497 if (reslen < localeIDCapacity) {
2498 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2499 }
2500 reslen += LANG_UND_LEN;
2501 }
2502 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2503 reslen += len;
2504 }
2505
2506 ultag_close(lt);
2507 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2508 }
2509
2510
2511