1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: October 30 2002
8 * Since: ICU 2.4
9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
10 **********************************************************************
11 */
12 #include "propname.h"
13 #include "unicode/uchar.h"
14 #include "unicode/udata.h"
15 #include "unicode/uscript.h"
16 #include "umutex.h"
17 #include "cmemory.h"
18 #include "cstring.h"
19 #include "uarrsort.h"
20 #include "uinvchar.h"
21
22 #define INCLUDED_FROM_PROPNAME_CPP
23 #include "propname_data.h"
24
25 U_CDECL_BEGIN
26
27 /**
28 * Get the next non-ignorable ASCII character from a property name
29 * and lowercases it.
30 * @return ((advance count for the name)<<8)|character
31 */
32 static inline int32_t
getASCIIPropertyNameChar(const char * name)33 getASCIIPropertyNameChar(const char *name) {
34 int32_t i;
35 char c;
36
37 /* Ignore delimiters '-', '_', and ASCII White_Space */
38 for(i=0;
39 (c=name[i++])==0x2d || c==0x5f ||
40 c==0x20 || (0x09<=c && c<=0x0d);
41 ) {}
42
43 if(c!=0) {
44 return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
45 } else {
46 return i<<8;
47 }
48 }
49
50 /**
51 * Get the next non-ignorable EBCDIC character from a property name
52 * and lowercases it.
53 * @return ((advance count for the name)<<8)|character
54 */
55 static inline int32_t
getEBCDICPropertyNameChar(const char * name)56 getEBCDICPropertyNameChar(const char *name) {
57 int32_t i;
58 char c;
59
60 /* Ignore delimiters '-', '_', and EBCDIC White_Space */
61 for(i=0;
62 (c=name[i++])==0x60 || c==0x6d ||
63 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
64 ) {}
65
66 if(c!=0) {
67 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
68 } else {
69 return i<<8;
70 }
71 }
72
73 /**
74 * Unicode property names and property value names are compared "loosely".
75 *
76 * UCD.html 4.0.1 says:
77 * For all property names, property value names, and for property values for
78 * Enumerated, Binary, or Catalog properties, use the following
79 * loose matching rule:
80 *
81 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
82 *
83 * This function does just that, for (char *) name strings.
84 * It is almost identical to ucnv_compareNames() but also ignores
85 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
86 *
87 * @internal
88 */
89
90 U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char * name1,const char * name2)91 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
92 int32_t rc, r1, r2;
93
94 for(;;) {
95 r1=getASCIIPropertyNameChar(name1);
96 r2=getASCIIPropertyNameChar(name2);
97
98 /* If we reach the ends of both strings then they match */
99 if(((r1|r2)&0xff)==0) {
100 return 0;
101 }
102
103 /* Compare the lowercased characters */
104 if(r1!=r2) {
105 rc=(r1&0xff)-(r2&0xff);
106 if(rc!=0) {
107 return rc;
108 }
109 }
110
111 name1+=r1>>8;
112 name2+=r2>>8;
113 }
114 }
115
116 U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char * name1,const char * name2)117 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
118 int32_t rc, r1, r2;
119
120 for(;;) {
121 r1=getEBCDICPropertyNameChar(name1);
122 r2=getEBCDICPropertyNameChar(name2);
123
124 /* If we reach the ends of both strings then they match */
125 if(((r1|r2)&0xff)==0) {
126 return 0;
127 }
128
129 /* Compare the lowercased characters */
130 if(r1!=r2) {
131 rc=(r1&0xff)-(r2&0xff);
132 if(rc!=0) {
133 return rc;
134 }
135 }
136
137 name1+=r1>>8;
138 name2+=r2>>8;
139 }
140 }
141
142 U_CDECL_END
143
144 U_NAMESPACE_BEGIN
145
findProperty(int32_t property)146 int32_t PropNameData::findProperty(int32_t property) {
147 int32_t i=1; // valueMaps index, initially after numRanges
148 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
149 // Read and skip the start and limit of this range.
150 int32_t start=valueMaps[i];
151 int32_t limit=valueMaps[i+1];
152 i+=2;
153 if(property<start) {
154 break;
155 }
156 if(property<limit) {
157 return i+(property-start)*2;
158 }
159 i+=(limit-start)*2; // Skip all entries for this range.
160 }
161 return 0;
162 }
163
findPropertyValueNameGroup(int32_t valueMapIndex,int32_t value)164 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
165 if(valueMapIndex==0) {
166 return 0; // The property does not have named values.
167 }
168 ++valueMapIndex; // Skip the BytesTrie offset.
169 int32_t numRanges=valueMaps[valueMapIndex++];
170 if(numRanges<0x10) {
171 // Ranges of values.
172 for(; numRanges>0; --numRanges) {
173 // Read and skip the start and limit of this range.
174 int32_t start=valueMaps[valueMapIndex];
175 int32_t limit=valueMaps[valueMapIndex+1];
176 valueMapIndex+=2;
177 if(value<start) {
178 break;
179 }
180 if(value<limit) {
181 return valueMaps[valueMapIndex+value-start];
182 }
183 valueMapIndex+=limit-start; // Skip all entries for this range.
184 }
185 } else {
186 // List of values.
187 int32_t valuesStart=valueMapIndex;
188 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
189 do {
190 int32_t v=valueMaps[valueMapIndex];
191 if(value<v) {
192 break;
193 }
194 if(value==v) {
195 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
196 }
197 } while(++valueMapIndex<nameGroupOffsetsStart);
198 }
199 return 0;
200 }
201
getName(const char * nameGroup,int32_t nameIndex)202 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
203 int32_t numNames=*nameGroup++;
204 if(nameIndex<0 || numNames<=nameIndex) {
205 return NULL;
206 }
207 // Skip nameIndex names.
208 for(; nameIndex>0; --nameIndex) {
209 nameGroup=uprv_strchr(nameGroup, 0)+1;
210 }
211 if(*nameGroup==0) {
212 return NULL; // no name (Property[Value]Aliases.txt has "n/a")
213 }
214 return nameGroup;
215 }
216
containsName(BytesTrie & trie,const char * name)217 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
218 if(name==NULL) {
219 return FALSE;
220 }
221 UStringTrieResult result=USTRINGTRIE_NO_VALUE;
222 char c;
223 while((c=*name++)!=0) {
224 c=uprv_invCharToLowercaseAscii(c);
225 // Ignore delimiters '-', '_', and ASCII White_Space.
226 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
227 continue;
228 }
229 if(!USTRINGTRIE_HAS_NEXT(result)) {
230 return FALSE;
231 }
232 result=trie.next((uint8_t)c);
233 }
234 return USTRINGTRIE_HAS_VALUE(result);
235 }
236
getPropertyName(int32_t property,int32_t nameChoice)237 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
238 int32_t valueMapIndex=findProperty(property);
239 if(valueMapIndex==0) {
240 return NULL; // Not a known property.
241 }
242 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
243 }
244
getPropertyValueName(int32_t property,int32_t value,int32_t nameChoice)245 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
246 int32_t valueMapIndex=findProperty(property);
247 if(valueMapIndex==0) {
248 return NULL; // Not a known property.
249 }
250 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
251 if(nameGroupOffset==0) {
252 return NULL;
253 }
254 return getName(nameGroups+nameGroupOffset, nameChoice);
255 }
256
getPropertyOrValueEnum(int32_t bytesTrieOffset,const char * alias)257 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
258 BytesTrie trie(bytesTries+bytesTrieOffset);
259 if(containsName(trie, alias)) {
260 return trie.getValue();
261 } else {
262 return UCHAR_INVALID_CODE;
263 }
264 }
265
getPropertyEnum(const char * alias)266 int32_t PropNameData::getPropertyEnum(const char *alias) {
267 return getPropertyOrValueEnum(0, alias);
268 }
269
getPropertyValueEnum(int32_t property,const char * alias)270 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
271 int32_t valueMapIndex=findProperty(property);
272 if(valueMapIndex==0) {
273 return UCHAR_INVALID_CODE; // Not a known property.
274 }
275 valueMapIndex=valueMaps[valueMapIndex+1];
276 if(valueMapIndex==0) {
277 return UCHAR_INVALID_CODE; // The property does not have named values.
278 }
279 // valueMapIndex is the start of the property's valueMap,
280 // where the first word is the BytesTrie offset.
281 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
282 }
283 U_NAMESPACE_END
284
285 //----------------------------------------------------------------------
286 // Public API implementation
287
288 U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,UPropertyNameChoice nameChoice)289 u_getPropertyName(UProperty property,
290 UPropertyNameChoice nameChoice) {
291 U_NAMESPACE_USE
292 return PropNameData::getPropertyName(property, nameChoice);
293 }
294
295 U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char * alias)296 u_getPropertyEnum(const char* alias) {
297 U_NAMESPACE_USE
298 return (UProperty)PropNameData::getPropertyEnum(alias);
299 }
300
301 U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,int32_t value,UPropertyNameChoice nameChoice)302 u_getPropertyValueName(UProperty property,
303 int32_t value,
304 UPropertyNameChoice nameChoice) {
305 U_NAMESPACE_USE
306 return PropNameData::getPropertyValueName(property, value, nameChoice);
307 }
308
309 U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,const char * alias)310 u_getPropertyValueEnum(UProperty property,
311 const char* alias) {
312 U_NAMESPACE_USE
313 return PropNameData::getPropertyValueEnum(property, alias);
314 }
315
316 U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode)317 uscript_getName(UScriptCode scriptCode){
318 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
319 U_LONG_PROPERTY_NAME);
320 }
321
322 U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode)323 uscript_getShortName(UScriptCode scriptCode){
324 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
325 U_SHORT_PROPERTY_NAME);
326 }
327