1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1997-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File USCRIPT.C
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   07/06/2001    Ram         Creation.
15 ******************************************************************************
16 */
17 
18 #include "unicode/uchar.h"
19 #include "unicode/uscript.h"
20 #include "unicode/uloc.h"
21 #include "cmemory.h"
22 #include "cstring.h"
23 
24 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
25 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
26 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
27 
28 static int32_t
setCodes(const UScriptCode * src,int32_t length,UScriptCode * dest,int32_t capacity,UErrorCode * err)29 setCodes(const UScriptCode *src, int32_t length,
30          UScriptCode *dest, int32_t capacity, UErrorCode *err) {
31     int32_t i;
32     if(U_FAILURE(*err)) { return 0; }
33     if(length > capacity) {
34         *err = U_BUFFER_OVERFLOW_ERROR;
35         return length;
36     }
37     for(i = 0; i < length; ++i) {
38         dest[i] = src[i];
39     }
40     return length;
41 }
42 
43 static int32_t
setOneCode(UScriptCode script,UScriptCode * scripts,int32_t capacity,UErrorCode * err)44 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
45     if(U_FAILURE(*err)) { return 0; }
46     if(1 > capacity) {
47         *err = U_BUFFER_OVERFLOW_ERROR;
48         return 1;
49     }
50     scripts[0] = script;
51     return 1;
52 }
53 
54 static int32_t
getCodesFromLocale(const char * locale,UScriptCode * scripts,int32_t capacity,UErrorCode * err)55 getCodesFromLocale(const char *locale,
56                    UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
57     UErrorCode internalErrorCode = U_ZERO_ERROR;
58     char lang[8];
59     char script[8];
60     int32_t scriptLength;
61     if(U_FAILURE(*err)) { return 0; }
62     // Multi-script languages, equivalent to the LocaleScript data
63     // that we used to load from locale resource bundles.
64     /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
65     if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
66         return 0;
67     }
68     if(0 == uprv_strcmp(lang, "ja")) {
69         return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
70     }
71     if(0 == uprv_strcmp(lang, "ko")) {
72         return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
73     }
74     scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
75     if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
76         return 0;
77     }
78     if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
79         return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
80     }
81     // Explicit script code.
82     if(scriptLength != 0) {
83         UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
84         if(scriptCode != USCRIPT_INVALID_CODE) {
85             if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
86                 scriptCode = USCRIPT_HAN;
87             }
88             return setOneCode(scriptCode, scripts, capacity, err);
89         }
90     }
91     return 0;
92 }
93 
94 /* TODO: this is a bad API and should be deprecated, ticket #11141 */
95 U_CAPI int32_t  U_EXPORT2
uscript_getCode(const char * nameOrAbbrOrLocale,UScriptCode * fillIn,int32_t capacity,UErrorCode * err)96 uscript_getCode(const char* nameOrAbbrOrLocale,
97                 UScriptCode* fillIn,
98                 int32_t capacity,
99                 UErrorCode* err){
100     UBool triedCode;
101     char likely[ULOC_FULLNAME_CAPACITY];
102     UErrorCode internalErrorCode;
103     int32_t length;
104 
105     if(U_FAILURE(*err)) {
106         return 0;
107     }
108     if(nameOrAbbrOrLocale==NULL ||
109             (fillIn == NULL ? capacity != 0 : capacity < 0)) {
110         *err = U_ILLEGAL_ARGUMENT_ERROR;
111         return 0;
112     }
113 
114     triedCode = FALSE;
115     if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
116         /* try long and abbreviated script names first */
117         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
118         if(code!=USCRIPT_INVALID_CODE) {
119             return setOneCode(code, fillIn, capacity, err);
120         }
121         triedCode = TRUE;
122     }
123     internalErrorCode = U_ZERO_ERROR;
124     length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
125     if(U_FAILURE(*err) || length != 0) {
126         return length;
127     }
128     (void)uloc_addLikelySubtags(nameOrAbbrOrLocale,
129                                 likely, UPRV_LENGTHOF(likely), &internalErrorCode);
130     if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
131         length = getCodesFromLocale(likely, fillIn, capacity, err);
132         if(U_FAILURE(*err) || length != 0) {
133             return length;
134         }
135     }
136     if(!triedCode) {
137         /* still not found .. try long and abbreviated script names again */
138         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
139         if(code!=USCRIPT_INVALID_CODE) {
140             return setOneCode(code, fillIn, capacity, err);
141         }
142     }
143     return 0;
144 }
145