1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/usprep.h"
22
23 #include "unicode/normalizer2.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "ubidi_props.h"
36 #include "uprops.h"
37
38 U_NAMESPACE_USE
39
40 U_CDECL_BEGIN
41
42 /*
43 Static cache for already opened StringPrep profiles
44 */
45 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
46 static icu::UInitOnce gSharedDataInitOnce;
47
48 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
49
50 /* format version of spp file */
51 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
52
53 /* the Unicode version of the sprep data */
54 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
55
56 /* Profile names must be aligned to UStringPrepProfileType */
57 static const char * const PROFILE_NAMES[] = {
58 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
59 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
60 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
61 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
62 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
63 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
64 "rfc3722", /* USPREP_RFC3722_ISCSI */
65 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
66 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
67 "rfc4011", /* USPREP_RFC4011_MIB */
68 "rfc4013", /* USPREP_RFC4013_SASLPREP */
69 "rfc4505", /* USPREP_RFC4505_TRACE */
70 "rfc4518", /* USPREP_RFC4518_LDAP */
71 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
72 };
73
74 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)75 isSPrepAcceptable(void * /* context */,
76 const char * /* type */,
77 const char * /* name */,
78 const UDataInfo *pInfo) {
79 if(
80 pInfo->size>=20 &&
81 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
82 pInfo->charsetFamily==U_CHARSET_FAMILY &&
83 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
84 pInfo->dataFormat[1]==0x50 &&
85 pInfo->dataFormat[2]==0x52 &&
86 pInfo->dataFormat[3]==0x50 &&
87 pInfo->formatVersion[0]==3 &&
88 pInfo->formatVersion[2]==UTRIE_SHIFT &&
89 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
90 ) {
91 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
92 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
93 return TRUE;
94 } else {
95 return FALSE;
96 }
97 }
98
99 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)100 getSPrepFoldingOffset(uint32_t data) {
101
102 return (int32_t)data;
103
104 }
105
106 /* hashes an entry */
107 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)108 hashEntry(const UHashTok parm) {
109 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
110 UHashTok namekey, pathkey;
111 namekey.pointer = b->name;
112 pathkey.pointer = b->path;
113 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
114 }
115
116 /* compares two entries */
117 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)118 compareEntries(const UHashTok p1, const UHashTok p2) {
119 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
120 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
121 UHashTok name1, name2, path1, path2;
122 name1.pointer = b1->name;
123 name2.pointer = b2->name;
124 path1.pointer = b1->path;
125 path2.pointer = b2->path;
126 return ((UBool)(uhash_compareChars(name1, name2) &
127 uhash_compareChars(path1, path2)));
128 }
129
130 static void
usprep_unload(UStringPrepProfile * data)131 usprep_unload(UStringPrepProfile* data){
132 udata_close(data->sprepData);
133 }
134
135 static int32_t
usprep_internal_flushCache(UBool noRefCount)136 usprep_internal_flushCache(UBool noRefCount){
137 UStringPrepProfile *profile = NULL;
138 UStringPrepKey *key = NULL;
139 int32_t pos = UHASH_FIRST;
140 int32_t deletedNum = 0;
141 const UHashElement *e;
142
143 /*
144 * if shared data hasn't even been lazy evaluated yet
145 * return 0
146 */
147 umtx_lock(&usprepMutex);
148 if (SHARED_DATA_HASHTABLE == NULL) {
149 umtx_unlock(&usprepMutex);
150 return 0;
151 }
152
153 /*creates an enumeration to iterate through every element in the table */
154 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
155 {
156 profile = (UStringPrepProfile *) e->value.pointer;
157 key = (UStringPrepKey *) e->key.pointer;
158
159 if ((noRefCount== FALSE && profile->refCount == 0) ||
160 noRefCount== TRUE) {
161 deletedNum++;
162 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
163
164 /* unload the data */
165 usprep_unload(profile);
166
167 if(key->name != NULL) {
168 uprv_free(key->name);
169 key->name=NULL;
170 }
171 if(key->path != NULL) {
172 uprv_free(key->path);
173 key->path=NULL;
174 }
175 uprv_free(profile);
176 uprv_free(key);
177 }
178
179 }
180 umtx_unlock(&usprepMutex);
181
182 return deletedNum;
183 }
184
185 /* Works just like ucnv_flushCache()
186 static int32_t
187 usprep_flushCache(){
188 return usprep_internal_flushCache(FALSE);
189 }
190 */
191
usprep_cleanup(void)192 static UBool U_CALLCONV usprep_cleanup(void){
193 if (SHARED_DATA_HASHTABLE != NULL) {
194 usprep_internal_flushCache(TRUE);
195 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
196 uhash_close(SHARED_DATA_HASHTABLE);
197 SHARED_DATA_HASHTABLE = NULL;
198 }
199 }
200 gSharedDataInitOnce.reset();
201 return (SHARED_DATA_HASHTABLE == NULL);
202 }
203 U_CDECL_END
204
205
206 /** Initializes the cache for resources */
207 static void U_CALLCONV
createCache(UErrorCode & status)208 createCache(UErrorCode &status) {
209 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
210 if (U_FAILURE(status)) {
211 SHARED_DATA_HASHTABLE = NULL;
212 }
213 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
214 }
215
216 static void
initCache(UErrorCode * status)217 initCache(UErrorCode *status) {
218 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
219 }
220
221 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)222 loadData(UStringPrepProfile* profile,
223 const char* path,
224 const char* name,
225 const char* type,
226 UErrorCode* errorCode) {
227 /* load Unicode SPREP data from file */
228 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
229 UDataMemory *dataMemory;
230 const int32_t *p=NULL;
231 const uint8_t *pb;
232 UVersionInfo normUnicodeVersion;
233 int32_t normUniVer, sprepUniVer, normCorrVer;
234
235 if(errorCode==NULL || U_FAILURE(*errorCode)) {
236 return 0;
237 }
238
239 /* open the data outside the mutex block */
240 //TODO: change the path
241 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
242 if(U_FAILURE(*errorCode)) {
243 return FALSE;
244 }
245
246 p=(const int32_t *)udata_getMemory(dataMemory);
247 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
248 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
249 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
250
251
252 if(U_FAILURE(*errorCode)) {
253 udata_close(dataMemory);
254 return FALSE;
255 }
256
257 /* in the mutex block, set the data for this process */
258 umtx_lock(&usprepMutex);
259 if(profile->sprepData==NULL) {
260 profile->sprepData=dataMemory;
261 dataMemory=NULL;
262 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
263 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
264 } else {
265 p=(const int32_t *)udata_getMemory(profile->sprepData);
266 }
267 umtx_unlock(&usprepMutex);
268 /* initialize some variables */
269 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
270
271 u_getUnicodeVersion(normUnicodeVersion);
272 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
273 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
274 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
275 (dataVersion[2] << 8 ) + (dataVersion[3]);
276 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
277
278 if(U_FAILURE(*errorCode)){
279 udata_close(dataMemory);
280 return FALSE;
281 }
282 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
283 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
284 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
285 ){
286 *errorCode = U_INVALID_FORMAT_ERROR;
287 udata_close(dataMemory);
288 return FALSE;
289 }
290 profile->isDataLoaded = TRUE;
291
292 /* if a different thread set it first, then close the extra data */
293 if(dataMemory!=NULL) {
294 udata_close(dataMemory); /* NULL if it was set correctly */
295 }
296
297
298 return profile->isDataLoaded;
299 }
300
301 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)302 usprep_getProfile(const char* path,
303 const char* name,
304 UErrorCode *status){
305
306 UStringPrepProfile* profile = NULL;
307
308 initCache(status);
309
310 if(U_FAILURE(*status)){
311 return NULL;
312 }
313
314 UStringPrepKey stackKey;
315 /*
316 * const is cast way to save malloc, strcpy and free calls
317 * we use the passed in pointers for fetching the data from the
318 * hash table which is safe
319 */
320 stackKey.name = (char*) name;
321 stackKey.path = (char*) path;
322
323 /* fetch the data from the cache */
324 umtx_lock(&usprepMutex);
325 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
326 if(profile != NULL) {
327 profile->refCount++;
328 }
329 umtx_unlock(&usprepMutex);
330
331 if(profile == NULL) {
332 /* else load the data and put the data in the cache */
333 LocalMemory<UStringPrepProfile> newProfile;
334 if(newProfile.allocateInsteadAndReset() == NULL) {
335 *status = U_MEMORY_ALLOCATION_ERROR;
336 return NULL;
337 }
338
339 /* load the data */
340 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
341 return NULL;
342 }
343
344 /* get the options */
345 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
346 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
347
348 if(newProfile->checkBiDi) {
349 newProfile->bdp = ubidi_getSingleton();
350 }
351
352 LocalMemory<UStringPrepKey> key;
353 LocalMemory<char> keyName;
354 LocalMemory<char> keyPath;
355 if( key.allocateInsteadAndReset() == NULL ||
356 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
357 (path != NULL &&
358 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
359 ) {
360 *status = U_MEMORY_ALLOCATION_ERROR;
361 usprep_unload(newProfile.getAlias());
362 return NULL;
363 }
364
365 umtx_lock(&usprepMutex);
366 // If another thread already inserted the same key/value, refcount and cleanup our thread data
367 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
368 if(profile != NULL) {
369 profile->refCount++;
370 usprep_unload(newProfile.getAlias());
371 }
372 else {
373 /* initialize the key members */
374 key->name = keyName.orphan();
375 uprv_strcpy(key->name, name);
376 if(path != NULL){
377 key->path = keyPath.orphan();
378 uprv_strcpy(key->path, path);
379 }
380 profile = newProfile.orphan();
381
382 /* add the data object to the cache */
383 profile->refCount = 1;
384 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
385 }
386 umtx_unlock(&usprepMutex);
387 }
388
389 return profile;
390 }
391
392 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)393 usprep_open(const char* path,
394 const char* name,
395 UErrorCode* status){
396
397 if(status == NULL || U_FAILURE(*status)){
398 return NULL;
399 }
400
401 /* initialize the profile struct members */
402 return usprep_getProfile(path,name,status);
403 }
404
405 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)406 usprep_openByType(UStringPrepProfileType type,
407 UErrorCode* status) {
408 if(status == NULL || U_FAILURE(*status)){
409 return NULL;
410 }
411 int32_t index = (int32_t)type;
412 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
413 *status = U_ILLEGAL_ARGUMENT_ERROR;
414 return NULL;
415 }
416 return usprep_open(NULL, PROFILE_NAMES[index], status);
417 }
418
419 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)420 usprep_close(UStringPrepProfile* profile){
421 if(profile==NULL){
422 return;
423 }
424
425 umtx_lock(&usprepMutex);
426 /* decrement the ref count*/
427 if(profile->refCount > 0){
428 profile->refCount--;
429 }
430 umtx_unlock(&usprepMutex);
431
432 }
433
434 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)435 uprv_syntaxError(const UChar* rules,
436 int32_t pos,
437 int32_t rulesLen,
438 UParseError* parseError){
439 if(parseError == NULL){
440 return;
441 }
442 parseError->offset = pos;
443 parseError->line = 0 ; // we are not using line numbers
444
445 // for pre-context
446 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
447 int32_t limit = pos;
448
449 u_memcpy(parseError->preContext,rules+start,limit-start);
450 //null terminate the buffer
451 parseError->preContext[limit-start] = 0;
452
453 // for post-context; include error rules[pos]
454 start = pos;
455 limit = start + (U_PARSE_CONTEXT_LEN-1);
456 if (limit > rulesLen) {
457 limit = rulesLen;
458 }
459 if (start < rulesLen) {
460 u_memcpy(parseError->postContext,rules+start,limit-start);
461 }
462 //null terminate the buffer
463 parseError->postContext[limit-start]= 0;
464 }
465
466
467 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469
470 UStringPrepType type;
471 if(trieWord == 0){
472 /*
473 * Initial value stored in the mapping table
474 * just return USPREP_TYPE_LIMIT .. so that
475 * the source codepoint is copied to the destination
476 */
477 type = USPREP_TYPE_LIMIT;
478 isIndex =FALSE;
479 value = 0;
480 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
481 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
482 isIndex =FALSE;
483 value = 0;
484 }else{
485 /* get the type */
486 type = USPREP_MAP;
487 /* ascertain if the value is index or delta */
488 if(trieWord & 0x02){
489 isIndex = TRUE;
490 value = trieWord >> 2; //mask off the lower 2 bits and shift
491 }else{
492 isIndex = FALSE;
493 value = (int16_t)trieWord;
494 value = (value >> 2);
495 }
496
497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
498 type = USPREP_DELETE;
499 isIndex =FALSE;
500 value = 0;
501 }
502 }
503 return type;
504 }
505
506 // TODO: change to writing to UnicodeString not UChar *
507 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)508 usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity,
511 int32_t options,
512 UParseError* parseError,
513 UErrorCode* status ){
514
515 uint16_t result;
516 int32_t destIndex=0;
517 int32_t srcIndex;
518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519 UStringPrepType type;
520 int16_t value;
521 UBool isIndex;
522 const int32_t* indexes = profile->indexes;
523
524 // no error checking the caller check for error and arguments
525 // no string length check the caller finds out the string length
526
527 for(srcIndex=0;srcIndex<srcLength;){
528 UChar32 ch;
529
530 U16_NEXT(src,srcIndex,srcLength,ch);
531
532 result=0;
533
534 UTRIE_GET16(&profile->sprepTrie,ch,result);
535
536 type = getValues(result, value, isIndex);
537
538 // check if the source codepoint is unassigned
539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540
541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542 *status = U_STRINGPREP_UNASSIGNED_ERROR;
543 return 0;
544
545 }else if(type == USPREP_MAP){
546
547 int32_t index, length;
548
549 if(isIndex){
550 index = value;
551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553 length = 1;
554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556 length = 2;
557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559 length = 3;
560 }else{
561 length = profile->mappingData[index++];
562
563 }
564
565 /* copy mapping to destination */
566 for(int32_t i=0; i< length; i++){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = profile->mappingData[index+i];
569 }
570 destIndex++; /* for pre-flighting */
571 }
572 continue;
573 }else{
574 // subtract the delta to arrive at the code point
575 ch -= value;
576 }
577
578 }else if(type==USPREP_DELETE){
579 // just consume the codepoint and contine
580 continue;
581 }
582 //copy the code point into destination
583 if(ch <= 0xFFFF){
584 if(destIndex < destCapacity ){
585 dest[destIndex] = (UChar)ch;
586 }
587 destIndex++;
588 }else{
589 if(destIndex+1 < destCapacity ){
590 dest[destIndex] = U16_LEAD(ch);
591 dest[destIndex+1] = U16_TRAIL(ch);
592 }
593 destIndex +=2;
594 }
595
596 }
597
598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599 }
600
601 /*
602 1) Map -- For each character in the input, check if it has a mapping
603 and, if so, replace it with its mapping.
604
605 2) Normalize -- Possibly normalize the result of step 1 using Unicode
606 normalization.
607
608 3) Prohibit -- Check for any characters that are not allowed in the
609 output. If any are found, return an error.
610
611 4) Check bidi -- Possibly check for right-to-left characters, and if
612 any are found, make sure that the whole string satisfies the
613 requirements for bidirectional strings. If the string does not
614 satisfy the requirements for bidirectional strings, return an
615 error.
616 [Unicode3.2] defines several bidirectional categories; each character
617 has one bidirectional category assigned to it. For the purposes of
618 the requirements below, an "RandALCat character" is a character that
619 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
620 is a character that has Unicode bidirectional category "L". Note
621
622
623 that there are many characters which fall in neither of the above
624 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
625 this because they have bidirectional category "EN".
626
627 In any profile that specifies bidirectional character handling, all
628 three of the following requirements MUST be met:
629
630 1) The characters in section 5.8 MUST be prohibited.
631
632 2) If a string contains any RandALCat character, the string MUST NOT
633 contain any LCat character.
634
635 3) If a string contains any RandALCat character, a RandALCat
636 character MUST be the first character of the string, and a
637 RandALCat character MUST be the last character of the string.
638 */
639 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)640 usprep_prepare( const UStringPrepProfile* profile,
641 const UChar* src, int32_t srcLength,
642 UChar* dest, int32_t destCapacity,
643 int32_t options,
644 UParseError* parseError,
645 UErrorCode* status ){
646
647 // check error status
648 if(U_FAILURE(*status)){
649 return 0;
650 }
651
652 //check arguments
653 if(profile==NULL ||
654 (src==NULL ? srcLength!=0 : srcLength<-1) ||
655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
656 *status=U_ILLEGAL_ARGUMENT_ERROR;
657 return 0;
658 }
659
660 //get the string length
661 if(srcLength < 0){
662 srcLength = u_strlen(src);
663 }
664 // map
665 UnicodeString s1;
666 UChar *b1 = s1.getBuffer(srcLength);
667 if(b1==NULL){
668 *status = U_MEMORY_ALLOCATION_ERROR;
669 return 0;
670 }
671 int32_t b1Len = usprep_map(profile, src, srcLength,
672 b1, s1.getCapacity(), options, parseError, status);
673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
674
675 if(*status == U_BUFFER_OVERFLOW_ERROR){
676 // redo processing of string
677 /* we do not have enough room so grow the buffer*/
678 b1 = s1.getBuffer(b1Len);
679 if(b1==NULL){
680 *status = U_MEMORY_ALLOCATION_ERROR;
681 return 0;
682 }
683
684 *status = U_ZERO_ERROR; // reset error
685 b1Len = usprep_map(profile, src, srcLength,
686 b1, s1.getCapacity(), options, parseError, status);
687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688 }
689 if(U_FAILURE(*status)){
690 return 0;
691 }
692
693 // normalize
694 UnicodeString s2;
695 if(profile->doNFKC){
696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698 if(U_FAILURE(*status)){
699 return 0;
700 }
701 fn2.normalize(s1, s2, *status);
702 }else{
703 s2.fastCopyFrom(s1);
704 }
705 if(U_FAILURE(*status)){
706 return 0;
707 }
708
709 // Prohibit and checkBiDi in one pass
710 const UChar *b2 = s2.getBuffer();
711 int32_t b2Len = s2.length();
712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
713 UBool leftToRight=FALSE, rightToLeft=FALSE;
714 int32_t rtlPos =-1, ltrPos =-1;
715
716 for(int32_t b2Index=0; b2Index<b2Len;){
717 UChar32 ch = 0;
718 U16_NEXT(b2, b2Index, b2Len, ch);
719
720 uint16_t result;
721 UTRIE_GET16(&profile->sprepTrie,ch,result);
722
723 int16_t value;
724 UBool isIndex;
725 UStringPrepType type = getValues(result, value, isIndex);
726
727 if( type == USPREP_PROHIBITED ||
728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
729 ){
730 *status = U_STRINGPREP_PROHIBITED_ERROR;
731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
732 return 0;
733 }
734
735 if(profile->checkBiDi) {
736 direction = ubidi_getClass(profile->bdp, ch);
737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
738 firstCharDir = direction;
739 }
740 if(direction == U_LEFT_TO_RIGHT){
741 leftToRight = TRUE;
742 ltrPos = b2Index-1;
743 }
744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
745 rightToLeft = TRUE;
746 rtlPos = b2Index-1;
747 }
748 }
749 }
750 if(profile->checkBiDi == TRUE){
751 // satisfy 2
752 if( leftToRight == TRUE && rightToLeft == TRUE){
753 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
755 return 0;
756 }
757
758 //satisfy 3
759 if( rightToLeft == TRUE &&
760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
762 ){
763 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
764 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
765 return FALSE;
766 }
767 }
768 return s2.extract(dest, destCapacity, *status);
769 }
770
771
772 /* data swapping ------------------------------------------------------------ */
773
774 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)775 usprep_swap(const UDataSwapper *ds,
776 const void *inData, int32_t length, void *outData,
777 UErrorCode *pErrorCode) {
778 const UDataInfo *pInfo;
779 int32_t headerSize;
780
781 const uint8_t *inBytes;
782 uint8_t *outBytes;
783
784 const int32_t *inIndexes;
785 int32_t indexes[16];
786
787 int32_t i, offset, count, size;
788
789 /* udata_swapDataHeader checks the arguments */
790 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
791 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
792 return 0;
793 }
794
795 /* check data format and format version */
796 pInfo=(const UDataInfo *)((const char *)inData+4);
797 if(!(
798 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
799 pInfo->dataFormat[1]==0x50 &&
800 pInfo->dataFormat[2]==0x52 &&
801 pInfo->dataFormat[3]==0x50 &&
802 pInfo->formatVersion[0]==3
803 )) {
804 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
805 pInfo->dataFormat[0], pInfo->dataFormat[1],
806 pInfo->dataFormat[2], pInfo->dataFormat[3],
807 pInfo->formatVersion[0]);
808 *pErrorCode=U_UNSUPPORTED_ERROR;
809 return 0;
810 }
811
812 inBytes=(const uint8_t *)inData+headerSize;
813 outBytes=(uint8_t *)outData+headerSize;
814
815 inIndexes=(const int32_t *)inBytes;
816
817 if(length>=0) {
818 length-=headerSize;
819 if(length<16*4) {
820 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821 length);
822 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
823 return 0;
824 }
825 }
826
827 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
828 for(i=0; i<16; ++i) {
829 indexes[i]=udata_readInt32(ds, inIndexes[i]);
830 }
831
832 /* calculate the total length of the data */
833 size=
834 16*4+ /* size of indexes[] */
835 indexes[_SPREP_INDEX_TRIE_SIZE]+
836 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
837
838 if(length>=0) {
839 if(length<size) {
840 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841 length);
842 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
843 return 0;
844 }
845
846 /* copy the data for inaccessible bytes */
847 if(inBytes!=outBytes) {
848 uprv_memcpy(outBytes, inBytes, size);
849 }
850
851 offset=0;
852
853 /* swap the int32_t indexes[] */
854 count=16*4;
855 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
856 offset+=count;
857
858 /* swap the UTrie */
859 count=indexes[_SPREP_INDEX_TRIE_SIZE];
860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
861 offset+=count;
862
863 /* swap the uint16_t mappingTable[] */
864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
866 //offset+=count;
867 }
868
869 return headerSize+size;
870 }
871
872 #endif /* #if !UCONFIG_NO_IDNA */
873