1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA
22
23 #include "unicode/usprep.h"
24
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39
40 U_NAMESPACE_USE
41
42 U_CDECL_BEGIN
43
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce;
49
50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES[] = {
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
74 };
75
76 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)77 isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo *pInfo) {
81 if(
82 pInfo->size>=20 &&
83 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84 pInfo->charsetFamily==U_CHARSET_FAMILY &&
85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo->dataFormat[1]==0x50 &&
87 pInfo->dataFormat[2]==0x52 &&
88 pInfo->dataFormat[3]==0x50 &&
89 pInfo->formatVersion[0]==3 &&
90 pInfo->formatVersion[2]==UTRIE_SHIFT &&
91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92 ) {
93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95 return TRUE;
96 } else {
97 return FALSE;
98 }
99 }
100
101 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)102 getSPrepFoldingOffset(uint32_t data) {
103
104 return (int32_t)data;
105
106 }
107
108 /* hashes an entry */
109 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)110 hashEntry(const UHashTok parm) {
111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112 UHashTok namekey, pathkey;
113 namekey.pointer = b->name;
114 pathkey.pointer = b->path;
115 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116 }
117
118 /* compares two entries */
119 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)120 compareEntries(const UHashTok p1, const UHashTok p2) {
121 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123 UHashTok name1, name2, path1, path2;
124 name1.pointer = b1->name;
125 name2.pointer = b2->name;
126 path1.pointer = b1->path;
127 path2.pointer = b2->path;
128 return ((UBool)(uhash_compareChars(name1, name2) &
129 uhash_compareChars(path1, path2)));
130 }
131
132 static void
usprep_unload(UStringPrepProfile * data)133 usprep_unload(UStringPrepProfile* data){
134 udata_close(data->sprepData);
135 }
136
137 static int32_t
usprep_internal_flushCache(UBool noRefCount)138 usprep_internal_flushCache(UBool noRefCount){
139 UStringPrepProfile *profile = NULL;
140 UStringPrepKey *key = NULL;
141 int32_t pos = UHASH_FIRST;
142 int32_t deletedNum = 0;
143 const UHashElement *e;
144
145 /*
146 * if shared data hasn't even been lazy evaluated yet
147 * return 0
148 */
149 umtx_lock(&usprepMutex);
150 if (SHARED_DATA_HASHTABLE == NULL) {
151 umtx_unlock(&usprepMutex);
152 return 0;
153 }
154
155 /*creates an enumeration to iterate through every element in the table */
156 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157 {
158 profile = (UStringPrepProfile *) e->value.pointer;
159 key = (UStringPrepKey *) e->key.pointer;
160
161 if ((noRefCount== FALSE && profile->refCount == 0) ||
162 noRefCount== TRUE) {
163 deletedNum++;
164 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165
166 /* unload the data */
167 usprep_unload(profile);
168
169 if(key->name != NULL) {
170 uprv_free(key->name);
171 key->name=NULL;
172 }
173 if(key->path != NULL) {
174 uprv_free(key->path);
175 key->path=NULL;
176 }
177 uprv_free(profile);
178 uprv_free(key);
179 }
180
181 }
182 umtx_unlock(&usprepMutex);
183
184 return deletedNum;
185 }
186
187 /* Works just like ucnv_flushCache()
188 static int32_t
189 usprep_flushCache(){
190 return usprep_internal_flushCache(FALSE);
191 }
192 */
193
usprep_cleanup(void)194 static UBool U_CALLCONV usprep_cleanup(void){
195 if (SHARED_DATA_HASHTABLE != NULL) {
196 usprep_internal_flushCache(TRUE);
197 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198 uhash_close(SHARED_DATA_HASHTABLE);
199 SHARED_DATA_HASHTABLE = NULL;
200 }
201 }
202 gSharedDataInitOnce.reset();
203 return (SHARED_DATA_HASHTABLE == NULL);
204 }
205 U_CDECL_END
206
207
208 /** Initializes the cache for resources */
209 static void U_CALLCONV
createCache(UErrorCode & status)210 createCache(UErrorCode &status) {
211 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212 if (U_FAILURE(status)) {
213 SHARED_DATA_HASHTABLE = NULL;
214 }
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216 }
217
218 static void
initCache(UErrorCode * status)219 initCache(UErrorCode *status) {
220 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
221 }
222
223 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)224 loadData(UStringPrepProfile* profile,
225 const char* path,
226 const char* name,
227 const char* type,
228 UErrorCode* errorCode) {
229 /* load Unicode SPREP data from file */
230 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231 UDataMemory *dataMemory;
232 const int32_t *p=NULL;
233 const uint8_t *pb;
234 UVersionInfo normUnicodeVersion;
235 int32_t normUniVer, sprepUniVer, normCorrVer;
236
237 if(errorCode==NULL || U_FAILURE(*errorCode)) {
238 return 0;
239 }
240
241 /* open the data outside the mutex block */
242 //TODO: change the path
243 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244 if(U_FAILURE(*errorCode)) {
245 return FALSE;
246 }
247
248 p=(const int32_t *)udata_getMemory(dataMemory);
249 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252
253
254 if(U_FAILURE(*errorCode)) {
255 udata_close(dataMemory);
256 return FALSE;
257 }
258
259 /* in the mutex block, set the data for this process */
260 umtx_lock(&usprepMutex);
261 if(profile->sprepData==NULL) {
262 profile->sprepData=dataMemory;
263 dataMemory=NULL;
264 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266 } else {
267 p=(const int32_t *)udata_getMemory(profile->sprepData);
268 }
269 umtx_unlock(&usprepMutex);
270 /* initialize some variables */
271 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272
273 u_getUnicodeVersion(normUnicodeVersion);
274 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
275 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
277 (dataVersion[2] << 8 ) + (dataVersion[3]);
278 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279
280 if(U_FAILURE(*errorCode)){
281 udata_close(dataMemory);
282 return FALSE;
283 }
284 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287 ){
288 *errorCode = U_INVALID_FORMAT_ERROR;
289 udata_close(dataMemory);
290 return FALSE;
291 }
292 profile->isDataLoaded = TRUE;
293
294 /* if a different thread set it first, then close the extra data */
295 if(dataMemory!=NULL) {
296 udata_close(dataMemory); /* NULL if it was set correctly */
297 }
298
299
300 return profile->isDataLoaded;
301 }
302
303 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)304 usprep_getProfile(const char* path,
305 const char* name,
306 UErrorCode *status){
307
308 UStringPrepProfile* profile = NULL;
309
310 initCache(status);
311
312 if(U_FAILURE(*status)){
313 return NULL;
314 }
315
316 UStringPrepKey stackKey;
317 /*
318 * const is cast way to save malloc, strcpy and free calls
319 * we use the passed in pointers for fetching the data from the
320 * hash table which is safe
321 */
322 stackKey.name = (char*) name;
323 stackKey.path = (char*) path;
324
325 /* fetch the data from the cache */
326 umtx_lock(&usprepMutex);
327 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
328 if(profile != NULL) {
329 profile->refCount++;
330 }
331 umtx_unlock(&usprepMutex);
332
333 if(profile == NULL) {
334 /* else load the data and put the data in the cache */
335 LocalMemory<UStringPrepProfile> newProfile;
336 if(newProfile.allocateInsteadAndReset() == NULL) {
337 *status = U_MEMORY_ALLOCATION_ERROR;
338 return NULL;
339 }
340
341 /* load the data */
342 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
343 return NULL;
344 }
345
346 /* get the options */
347 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
349
350 if(newProfile->checkBiDi) {
351 newProfile->bdp = ubidi_getSingleton();
352 }
353
354 LocalMemory<UStringPrepKey> key;
355 LocalMemory<char> keyName;
356 LocalMemory<char> keyPath;
357 if( key.allocateInsteadAndReset() == NULL ||
358 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
359 (path != NULL &&
360 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
361 ) {
362 *status = U_MEMORY_ALLOCATION_ERROR;
363 usprep_unload(newProfile.getAlias());
364 return NULL;
365 }
366
367 umtx_lock(&usprepMutex);
368 // If another thread already inserted the same key/value, refcount and cleanup our thread data
369 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
370 if(profile != NULL) {
371 profile->refCount++;
372 usprep_unload(newProfile.getAlias());
373 }
374 else {
375 /* initialize the key members */
376 key->name = keyName.orphan();
377 uprv_strcpy(key->name, name);
378 if(path != NULL){
379 key->path = keyPath.orphan();
380 uprv_strcpy(key->path, path);
381 }
382 profile = newProfile.orphan();
383
384 /* add the data object to the cache */
385 profile->refCount = 1;
386 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
387 }
388 umtx_unlock(&usprepMutex);
389 }
390
391 return profile;
392 }
393
394 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)395 usprep_open(const char* path,
396 const char* name,
397 UErrorCode* status){
398
399 if(status == NULL || U_FAILURE(*status)){
400 return NULL;
401 }
402
403 /* initialize the profile struct members */
404 return usprep_getProfile(path,name,status);
405 }
406
407 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)408 usprep_openByType(UStringPrepProfileType type,
409 UErrorCode* status) {
410 if(status == NULL || U_FAILURE(*status)){
411 return NULL;
412 }
413 int32_t index = (int32_t)type;
414 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
415 *status = U_ILLEGAL_ARGUMENT_ERROR;
416 return NULL;
417 }
418 return usprep_open(NULL, PROFILE_NAMES[index], status);
419 }
420
421 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)422 usprep_close(UStringPrepProfile* profile){
423 if(profile==NULL){
424 return;
425 }
426
427 umtx_lock(&usprepMutex);
428 /* decrement the ref count*/
429 if(profile->refCount > 0){
430 profile->refCount--;
431 }
432 umtx_unlock(&usprepMutex);
433
434 }
435
436 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)437 uprv_syntaxError(const UChar* rules,
438 int32_t pos,
439 int32_t rulesLen,
440 UParseError* parseError){
441 if(parseError == NULL){
442 return;
443 }
444 parseError->offset = pos;
445 parseError->line = 0 ; // we are not using line numbers
446
447 // for pre-context
448 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
449 int32_t limit = pos;
450
451 u_memcpy(parseError->preContext,rules+start,limit-start);
452 //null terminate the buffer
453 parseError->preContext[limit-start] = 0;
454
455 // for post-context; include error rules[pos]
456 start = pos;
457 limit = start + (U_PARSE_CONTEXT_LEN-1);
458 if (limit > rulesLen) {
459 limit = rulesLen;
460 }
461 if (start < rulesLen) {
462 u_memcpy(parseError->postContext,rules+start,limit-start);
463 }
464 //null terminate the buffer
465 parseError->postContext[limit-start]= 0;
466 }
467
468
469 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471
472 UStringPrepType type;
473 if(trieWord == 0){
474 /*
475 * Initial value stored in the mapping table
476 * just return USPREP_TYPE_LIMIT .. so that
477 * the source codepoint is copied to the destination
478 */
479 type = USPREP_TYPE_LIMIT;
480 isIndex =FALSE;
481 value = 0;
482 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
484 isIndex =FALSE;
485 value = 0;
486 }else{
487 /* get the type */
488 type = USPREP_MAP;
489 /* ascertain if the value is index or delta */
490 if(trieWord & 0x02){
491 isIndex = TRUE;
492 value = trieWord >> 2; //mask off the lower 2 bits and shift
493 }else{
494 isIndex = FALSE;
495 value = (int16_t)trieWord;
496 value = (value >> 2);
497 }
498
499 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500 type = USPREP_DELETE;
501 isIndex =FALSE;
502 value = 0;
503 }
504 }
505 return type;
506 }
507
508 // TODO: change to writing to UnicodeString not UChar *
509 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)510 usprep_map( const UStringPrepProfile* profile,
511 const UChar* src, int32_t srcLength,
512 UChar* dest, int32_t destCapacity,
513 int32_t options,
514 UParseError* parseError,
515 UErrorCode* status ){
516
517 uint16_t result;
518 int32_t destIndex=0;
519 int32_t srcIndex;
520 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
521 UStringPrepType type;
522 int16_t value;
523 UBool isIndex;
524 const int32_t* indexes = profile->indexes;
525
526 // no error checking the caller check for error and arguments
527 // no string length check the caller finds out the string length
528
529 for(srcIndex=0;srcIndex<srcLength;){
530 UChar32 ch;
531
532 U16_NEXT(src,srcIndex,srcLength,ch);
533
534 result=0;
535
536 UTRIE_GET16(&profile->sprepTrie,ch,result);
537
538 type = getValues(result, value, isIndex);
539
540 // check if the source codepoint is unassigned
541 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
542
543 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
544 *status = U_STRINGPREP_UNASSIGNED_ERROR;
545 return 0;
546
547 }else if(type == USPREP_MAP){
548
549 int32_t index, length;
550
551 if(isIndex){
552 index = value;
553 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
554 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
555 length = 1;
556 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
557 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
558 length = 2;
559 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
560 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
561 length = 3;
562 }else{
563 length = profile->mappingData[index++];
564
565 }
566
567 /* copy mapping to destination */
568 for(int32_t i=0; i< length; i++){
569 if(destIndex < destCapacity ){
570 dest[destIndex] = profile->mappingData[index+i];
571 }
572 destIndex++; /* for pre-flighting */
573 }
574 continue;
575 }else{
576 // subtract the delta to arrive at the code point
577 ch -= value;
578 }
579
580 }else if(type==USPREP_DELETE){
581 // just consume the codepoint and contine
582 continue;
583 }
584 //copy the code point into destination
585 if(ch <= 0xFFFF){
586 if(destIndex < destCapacity ){
587 dest[destIndex] = (UChar)ch;
588 }
589 destIndex++;
590 }else{
591 if(destIndex+1 < destCapacity ){
592 dest[destIndex] = U16_LEAD(ch);
593 dest[destIndex+1] = U16_TRAIL(ch);
594 }
595 destIndex +=2;
596 }
597
598 }
599
600 return u_terminateUChars(dest, destCapacity, destIndex, status);
601 }
602
603 /*
604 1) Map -- For each character in the input, check if it has a mapping
605 and, if so, replace it with its mapping.
606
607 2) Normalize -- Possibly normalize the result of step 1 using Unicode
608 normalization.
609
610 3) Prohibit -- Check for any characters that are not allowed in the
611 output. If any are found, return an error.
612
613 4) Check bidi -- Possibly check for right-to-left characters, and if
614 any are found, make sure that the whole string satisfies the
615 requirements for bidirectional strings. If the string does not
616 satisfy the requirements for bidirectional strings, return an
617 error.
618 [Unicode3.2] defines several bidirectional categories; each character
619 has one bidirectional category assigned to it. For the purposes of
620 the requirements below, an "RandALCat character" is a character that
621 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
622 is a character that has Unicode bidirectional category "L". Note
623
624
625 that there are many characters which fall in neither of the above
626 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
627 this because they have bidirectional category "EN".
628
629 In any profile that specifies bidirectional character handling, all
630 three of the following requirements MUST be met:
631
632 1) The characters in section 5.8 MUST be prohibited.
633
634 2) If a string contains any RandALCat character, the string MUST NOT
635 contain any LCat character.
636
637 3) If a string contains any RandALCat character, a RandALCat
638 character MUST be the first character of the string, and a
639 RandALCat character MUST be the last character of the string.
640 */
641 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)642 usprep_prepare( const UStringPrepProfile* profile,
643 const UChar* src, int32_t srcLength,
644 UChar* dest, int32_t destCapacity,
645 int32_t options,
646 UParseError* parseError,
647 UErrorCode* status ){
648
649 // check error status
650 if(U_FAILURE(*status)){
651 return 0;
652 }
653
654 //check arguments
655 if(profile==NULL ||
656 (src==NULL ? srcLength!=0 : srcLength<-1) ||
657 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
658 *status=U_ILLEGAL_ARGUMENT_ERROR;
659 return 0;
660 }
661
662 //get the string length
663 if(srcLength < 0){
664 srcLength = u_strlen(src);
665 }
666 // map
667 UnicodeString s1;
668 UChar *b1 = s1.getBuffer(srcLength);
669 if(b1==NULL){
670 *status = U_MEMORY_ALLOCATION_ERROR;
671 return 0;
672 }
673 int32_t b1Len = usprep_map(profile, src, srcLength,
674 b1, s1.getCapacity(), options, parseError, status);
675 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
676
677 if(*status == U_BUFFER_OVERFLOW_ERROR){
678 // redo processing of string
679 /* we do not have enough room so grow the buffer*/
680 b1 = s1.getBuffer(b1Len);
681 if(b1==NULL){
682 *status = U_MEMORY_ALLOCATION_ERROR;
683 return 0;
684 }
685
686 *status = U_ZERO_ERROR; // reset error
687 b1Len = usprep_map(profile, src, srcLength,
688 b1, s1.getCapacity(), options, parseError, status);
689 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
690 }
691 if(U_FAILURE(*status)){
692 return 0;
693 }
694
695 // normalize
696 UnicodeString s2;
697 if(profile->doNFKC){
698 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
699 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
700 if(U_FAILURE(*status)){
701 return 0;
702 }
703 fn2.normalize(s1, s2, *status);
704 }else{
705 s2.fastCopyFrom(s1);
706 }
707 if(U_FAILURE(*status)){
708 return 0;
709 }
710
711 // Prohibit and checkBiDi in one pass
712 const UChar *b2 = s2.getBuffer();
713 int32_t b2Len = s2.length();
714 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
715 UBool leftToRight=FALSE, rightToLeft=FALSE;
716 int32_t rtlPos =-1, ltrPos =-1;
717
718 for(int32_t b2Index=0; b2Index<b2Len;){
719 UChar32 ch = 0;
720 U16_NEXT(b2, b2Index, b2Len, ch);
721
722 uint16_t result;
723 UTRIE_GET16(&profile->sprepTrie,ch,result);
724
725 int16_t value;
726 UBool isIndex;
727 UStringPrepType type = getValues(result, value, isIndex);
728
729 if( type == USPREP_PROHIBITED ||
730 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
731 ){
732 *status = U_STRINGPREP_PROHIBITED_ERROR;
733 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
734 return 0;
735 }
736
737 if(profile->checkBiDi) {
738 direction = ubidi_getClass(profile->bdp, ch);
739 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
740 firstCharDir = direction;
741 }
742 if(direction == U_LEFT_TO_RIGHT){
743 leftToRight = TRUE;
744 ltrPos = b2Index-1;
745 }
746 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
747 rightToLeft = TRUE;
748 rtlPos = b2Index-1;
749 }
750 }
751 }
752 if(profile->checkBiDi == TRUE){
753 // satisfy 2
754 if( leftToRight == TRUE && rightToLeft == TRUE){
755 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
756 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
757 return 0;
758 }
759
760 //satisfy 3
761 if( rightToLeft == TRUE &&
762 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
763 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
764 ){
765 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
766 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
767 return FALSE;
768 }
769 }
770 return s2.extract(dest, destCapacity, *status);
771 }
772
773
774 /* data swapping ------------------------------------------------------------ */
775
776 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)777 usprep_swap(const UDataSwapper *ds,
778 const void *inData, int32_t length, void *outData,
779 UErrorCode *pErrorCode) {
780 const UDataInfo *pInfo;
781 int32_t headerSize;
782
783 const uint8_t *inBytes;
784 uint8_t *outBytes;
785
786 const int32_t *inIndexes;
787 int32_t indexes[16];
788
789 int32_t i, offset, count, size;
790
791 /* udata_swapDataHeader checks the arguments */
792 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
793 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
794 return 0;
795 }
796
797 /* check data format and format version */
798 pInfo=(const UDataInfo *)((const char *)inData+4);
799 if(!(
800 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
801 pInfo->dataFormat[1]==0x50 &&
802 pInfo->dataFormat[2]==0x52 &&
803 pInfo->dataFormat[3]==0x50 &&
804 pInfo->formatVersion[0]==3
805 )) {
806 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
807 pInfo->dataFormat[0], pInfo->dataFormat[1],
808 pInfo->dataFormat[2], pInfo->dataFormat[3],
809 pInfo->formatVersion[0]);
810 *pErrorCode=U_UNSUPPORTED_ERROR;
811 return 0;
812 }
813
814 inBytes=(const uint8_t *)inData+headerSize;
815 outBytes=(uint8_t *)outData+headerSize;
816
817 inIndexes=(const int32_t *)inBytes;
818
819 if(length>=0) {
820 length-=headerSize;
821 if(length<16*4) {
822 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
823 length);
824 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
825 return 0;
826 }
827 }
828
829 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
830 for(i=0; i<16; ++i) {
831 indexes[i]=udata_readInt32(ds, inIndexes[i]);
832 }
833
834 /* calculate the total length of the data */
835 size=
836 16*4+ /* size of indexes[] */
837 indexes[_SPREP_INDEX_TRIE_SIZE]+
838 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
839
840 if(length>=0) {
841 if(length<size) {
842 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
843 length);
844 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
845 return 0;
846 }
847
848 /* copy the data for inaccessible bytes */
849 if(inBytes!=outBytes) {
850 uprv_memcpy(outBytes, inBytes, size);
851 }
852
853 offset=0;
854
855 /* swap the int32_t indexes[] */
856 count=16*4;
857 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
858 offset+=count;
859
860 /* swap the UTrie */
861 count=indexes[_SPREP_INDEX_TRIE_SIZE];
862 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
863 offset+=count;
864
865 /* swap the uint16_t mappingTable[] */
866 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
867 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
868 //offset+=count;
869 }
870
871 return headerSize+size;
872 }
873
874 #endif /* #if !UCONFIG_NO_IDNA */
875