1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2016, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  usprep.cpp
11  *   encoding:   US-ASCII
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003jul2
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_IDNA
22 
23 #include "unicode/usprep.h"
24 
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39 
40 U_NAMESPACE_USE
41 
42 U_CDECL_BEGIN
43 
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce;
49 
50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51 
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54 
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57 
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES[] = {
60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
69     "rfc4011",      /* USPREP_RFC4011_MIB */
70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
71     "rfc4505",      /* USPREP_RFC4505_TRACE */
72     "rfc4518",      /* USPREP_RFC4518_LDAP */
73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
74 };
75 
76 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)77 isSPrepAcceptable(void * /* context */,
78              const char * /* type */,
79              const char * /* name */,
80              const UDataInfo *pInfo) {
81     if(
82         pInfo->size>=20 &&
83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
86         pInfo->dataFormat[1]==0x50 &&
87         pInfo->dataFormat[2]==0x52 &&
88         pInfo->dataFormat[3]==0x50 &&
89         pInfo->formatVersion[0]==3 &&
90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92     ) {
93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95         return TRUE;
96     } else {
97         return FALSE;
98     }
99 }
100 
101 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)102 getSPrepFoldingOffset(uint32_t data) {
103 
104     return (int32_t)data;
105 
106 }
107 
108 /* hashes an entry  */
109 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)110 hashEntry(const UHashTok parm) {
111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112     UHashTok namekey, pathkey;
113     namekey.pointer = b->name;
114     pathkey.pointer = b->path;
115     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116 }
117 
118 /* compares two entries */
119 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)120 compareEntries(const UHashTok p1, const UHashTok p2) {
121     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123     UHashTok name1, name2, path1, path2;
124     name1.pointer = b1->name;
125     name2.pointer = b2->name;
126     path1.pointer = b1->path;
127     path2.pointer = b2->path;
128     return ((UBool)(uhash_compareChars(name1, name2) &
129         uhash_compareChars(path1, path2)));
130 }
131 
132 static void
usprep_unload(UStringPrepProfile * data)133 usprep_unload(UStringPrepProfile* data){
134     udata_close(data->sprepData);
135 }
136 
137 static int32_t
usprep_internal_flushCache(UBool noRefCount)138 usprep_internal_flushCache(UBool noRefCount){
139     UStringPrepProfile *profile = NULL;
140     UStringPrepKey  *key  = NULL;
141     int32_t pos = UHASH_FIRST;
142     int32_t deletedNum = 0;
143     const UHashElement *e;
144 
145     /*
146      * if shared data hasn't even been lazy evaluated yet
147      * return 0
148      */
149     umtx_lock(&usprepMutex);
150     if (SHARED_DATA_HASHTABLE == NULL) {
151         umtx_unlock(&usprepMutex);
152         return 0;
153     }
154 
155     /*creates an enumeration to iterate through every element in the table */
156     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157     {
158         profile = (UStringPrepProfile *) e->value.pointer;
159         key  = (UStringPrepKey *) e->key.pointer;
160 
161         if ((noRefCount== FALSE && profile->refCount == 0) ||
162              noRefCount== TRUE) {
163             deletedNum++;
164             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165 
166             /* unload the data */
167             usprep_unload(profile);
168 
169             if(key->name != NULL) {
170                 uprv_free(key->name);
171                 key->name=NULL;
172             }
173             if(key->path != NULL) {
174                 uprv_free(key->path);
175                 key->path=NULL;
176             }
177             uprv_free(profile);
178             uprv_free(key);
179         }
180 
181     }
182     umtx_unlock(&usprepMutex);
183 
184     return deletedNum;
185 }
186 
187 /* Works just like ucnv_flushCache()
188 static int32_t
189 usprep_flushCache(){
190     return usprep_internal_flushCache(FALSE);
191 }
192 */
193 
usprep_cleanup(void)194 static UBool U_CALLCONV usprep_cleanup(void){
195     if (SHARED_DATA_HASHTABLE != NULL) {
196         usprep_internal_flushCache(TRUE);
197         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198             uhash_close(SHARED_DATA_HASHTABLE);
199             SHARED_DATA_HASHTABLE = NULL;
200         }
201     }
202     gSharedDataInitOnce.reset();
203     return (SHARED_DATA_HASHTABLE == NULL);
204 }
205 U_CDECL_END
206 
207 
208 /** Initializes the cache for resources */
209 static void U_CALLCONV
createCache(UErrorCode & status)210 createCache(UErrorCode &status) {
211     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212     if (U_FAILURE(status)) {
213         SHARED_DATA_HASHTABLE = NULL;
214     }
215     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216 }
217 
218 static void
initCache(UErrorCode * status)219 initCache(UErrorCode *status) {
220     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
221 }
222 
223 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)224 loadData(UStringPrepProfile* profile,
225          const char* path,
226          const char* name,
227          const char* type,
228          UErrorCode* errorCode) {
229     /* load Unicode SPREP data from file */
230     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231     UDataMemory *dataMemory;
232     const int32_t *p=NULL;
233     const uint8_t *pb;
234     UVersionInfo normUnicodeVersion;
235     int32_t normUniVer, sprepUniVer, normCorrVer;
236 
237     if(errorCode==NULL || U_FAILURE(*errorCode)) {
238         return 0;
239     }
240 
241     /* open the data outside the mutex block */
242     //TODO: change the path
243     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244     if(U_FAILURE(*errorCode)) {
245         return FALSE;
246     }
247 
248     p=(const int32_t *)udata_getMemory(dataMemory);
249     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252 
253 
254     if(U_FAILURE(*errorCode)) {
255         udata_close(dataMemory);
256         return FALSE;
257     }
258 
259     /* in the mutex block, set the data for this process */
260     umtx_lock(&usprepMutex);
261     if(profile->sprepData==NULL) {
262         profile->sprepData=dataMemory;
263         dataMemory=NULL;
264         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266     } else {
267         p=(const int32_t *)udata_getMemory(profile->sprepData);
268     }
269     umtx_unlock(&usprepMutex);
270     /* initialize some variables */
271     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272 
273     u_getUnicodeVersion(normUnicodeVersion);
274     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
275                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
277                   (dataVersion[2] << 8 ) + (dataVersion[3]);
278     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279 
280     if(U_FAILURE(*errorCode)){
281         udata_close(dataMemory);
282         return FALSE;
283     }
284     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287       ){
288         *errorCode = U_INVALID_FORMAT_ERROR;
289         udata_close(dataMemory);
290         return FALSE;
291     }
292     profile->isDataLoaded = TRUE;
293 
294     /* if a different thread set it first, then close the extra data */
295     if(dataMemory!=NULL) {
296         udata_close(dataMemory); /* NULL if it was set correctly */
297     }
298 
299 
300     return profile->isDataLoaded;
301 }
302 
303 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)304 usprep_getProfile(const char* path,
305                   const char* name,
306                   UErrorCode *status){
307 
308     UStringPrepProfile* profile = NULL;
309 
310     initCache(status);
311 
312     if(U_FAILURE(*status)){
313         return NULL;
314     }
315 
316     UStringPrepKey stackKey;
317     /*
318      * const is cast way to save malloc, strcpy and free calls
319      * we use the passed in pointers for fetching the data from the
320      * hash table which is safe
321      */
322     stackKey.name = (char*) name;
323     stackKey.path = (char*) path;
324 
325     /* fetch the data from the cache */
326     umtx_lock(&usprepMutex);
327     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
328     if(profile != NULL) {
329         profile->refCount++;
330     }
331     umtx_unlock(&usprepMutex);
332 
333     if(profile == NULL) {
334         /* else load the data and put the data in the cache */
335         LocalMemory<UStringPrepProfile> newProfile;
336         if(newProfile.allocateInsteadAndReset() == NULL) {
337             *status = U_MEMORY_ALLOCATION_ERROR;
338             return NULL;
339         }
340 
341         /* load the data */
342         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
343             return NULL;
344         }
345 
346         /* get the options */
347         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
349 
350         if(newProfile->checkBiDi) {
351             newProfile->bdp = ubidi_getSingleton();
352         }
353 
354         LocalMemory<UStringPrepKey> key;
355         LocalMemory<char> keyName;
356         LocalMemory<char> keyPath;
357         if( key.allocateInsteadAndReset() == NULL ||
358             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
359             (path != NULL &&
360              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
361          ) {
362             *status = U_MEMORY_ALLOCATION_ERROR;
363             usprep_unload(newProfile.getAlias());
364             return NULL;
365         }
366 
367         umtx_lock(&usprepMutex);
368         // If another thread already inserted the same key/value, refcount and cleanup our thread data
369         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
370         if(profile != NULL) {
371             profile->refCount++;
372             usprep_unload(newProfile.getAlias());
373         }
374         else {
375             /* initialize the key members */
376             key->name = keyName.orphan();
377             uprv_strcpy(key->name, name);
378             if(path != NULL){
379                 key->path = keyPath.orphan();
380                 uprv_strcpy(key->path, path);
381             }
382             profile = newProfile.orphan();
383 
384             /* add the data object to the cache */
385             profile->refCount = 1;
386             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
387         }
388         umtx_unlock(&usprepMutex);
389     }
390 
391     return profile;
392 }
393 
394 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)395 usprep_open(const char* path,
396             const char* name,
397             UErrorCode* status){
398 
399     if(status == NULL || U_FAILURE(*status)){
400         return NULL;
401     }
402 
403     /* initialize the profile struct members */
404     return usprep_getProfile(path,name,status);
405 }
406 
407 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)408 usprep_openByType(UStringPrepProfileType type,
409 				  UErrorCode* status) {
410     if(status == NULL || U_FAILURE(*status)){
411         return NULL;
412     }
413     int32_t index = (int32_t)type;
414     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
415         *status = U_ILLEGAL_ARGUMENT_ERROR;
416         return NULL;
417     }
418     return usprep_open(NULL, PROFILE_NAMES[index], status);
419 }
420 
421 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)422 usprep_close(UStringPrepProfile* profile){
423     if(profile==NULL){
424         return;
425     }
426 
427     umtx_lock(&usprepMutex);
428     /* decrement the ref count*/
429     if(profile->refCount > 0){
430         profile->refCount--;
431     }
432     umtx_unlock(&usprepMutex);
433 
434 }
435 
436 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)437 uprv_syntaxError(const UChar* rules,
438                  int32_t pos,
439                  int32_t rulesLen,
440                  UParseError* parseError){
441     if(parseError == NULL){
442         return;
443     }
444     parseError->offset = pos;
445     parseError->line = 0 ; // we are not using line numbers
446 
447     // for pre-context
448     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
449     int32_t limit = pos;
450 
451     u_memcpy(parseError->preContext,rules+start,limit-start);
452     //null terminate the buffer
453     parseError->preContext[limit-start] = 0;
454 
455     // for post-context; include error rules[pos]
456     start = pos;
457     limit = start + (U_PARSE_CONTEXT_LEN-1);
458     if (limit > rulesLen) {
459         limit = rulesLen;
460     }
461     if (start < rulesLen) {
462         u_memcpy(parseError->postContext,rules+start,limit-start);
463     }
464     //null terminate the buffer
465     parseError->postContext[limit-start]= 0;
466 }
467 
468 
469 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471 
472     UStringPrepType type;
473     if(trieWord == 0){
474         /*
475          * Initial value stored in the mapping table
476          * just return USPREP_TYPE_LIMIT .. so that
477          * the source codepoint is copied to the destination
478          */
479         type = USPREP_TYPE_LIMIT;
480         isIndex =FALSE;
481         value = 0;
482     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
484         isIndex =FALSE;
485         value = 0;
486     }else{
487         /* get the type */
488         type = USPREP_MAP;
489         /* ascertain if the value is index or delta */
490         if(trieWord & 0x02){
491             isIndex = TRUE;
492             value = trieWord  >> 2; //mask off the lower 2 bits and shift
493         }else{
494             isIndex = FALSE;
495             value = (int16_t)trieWord;
496             value =  (value >> 2);
497         }
498 
499         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500             type = USPREP_DELETE;
501             isIndex =FALSE;
502             value = 0;
503         }
504     }
505     return type;
506 }
507 
508 // TODO: change to writing to UnicodeString not UChar *
509 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)510 usprep_map(  const UStringPrepProfile* profile,
511              const UChar* src, int32_t srcLength,
512              UChar* dest, int32_t destCapacity,
513              int32_t options,
514              UParseError* parseError,
515              UErrorCode* status ){
516 
517     uint16_t result;
518     int32_t destIndex=0;
519     int32_t srcIndex;
520     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
521     UStringPrepType type;
522     int16_t value;
523     UBool isIndex;
524     const int32_t* indexes = profile->indexes;
525 
526     // no error checking the caller check for error and arguments
527     // no string length check the caller finds out the string length
528 
529     for(srcIndex=0;srcIndex<srcLength;){
530         UChar32 ch;
531 
532         U16_NEXT(src,srcIndex,srcLength,ch);
533 
534         result=0;
535 
536         UTRIE_GET16(&profile->sprepTrie,ch,result);
537 
538         type = getValues(result, value, isIndex);
539 
540         // check if the source codepoint is unassigned
541         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
542 
543             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
544             *status = U_STRINGPREP_UNASSIGNED_ERROR;
545             return 0;
546 
547         }else if(type == USPREP_MAP){
548 
549             int32_t index, length;
550 
551             if(isIndex){
552                 index = value;
553                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
554                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
555                     length = 1;
556                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
557                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
558                     length = 2;
559                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
560                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
561                     length = 3;
562                 }else{
563                     length = profile->mappingData[index++];
564 
565                 }
566 
567                 /* copy mapping to destination */
568                 for(int32_t i=0; i< length; i++){
569                     if(destIndex < destCapacity  ){
570                         dest[destIndex] = profile->mappingData[index+i];
571                     }
572                     destIndex++; /* for pre-flighting */
573                 }
574                 continue;
575             }else{
576                 // subtract the delta to arrive at the code point
577                 ch -= value;
578             }
579 
580         }else if(type==USPREP_DELETE){
581              // just consume the codepoint and contine
582             continue;
583         }
584         //copy the code point into destination
585         if(ch <= 0xFFFF){
586             if(destIndex < destCapacity ){
587                 dest[destIndex] = (UChar)ch;
588             }
589             destIndex++;
590         }else{
591             if(destIndex+1 < destCapacity ){
592                 dest[destIndex]   = U16_LEAD(ch);
593                 dest[destIndex+1] = U16_TRAIL(ch);
594             }
595             destIndex +=2;
596         }
597 
598     }
599 
600     return u_terminateUChars(dest, destCapacity, destIndex, status);
601 }
602 
603 /*
604    1) Map -- For each character in the input, check if it has a mapping
605       and, if so, replace it with its mapping.
606 
607    2) Normalize -- Possibly normalize the result of step 1 using Unicode
608       normalization.
609 
610    3) Prohibit -- Check for any characters that are not allowed in the
611       output.  If any are found, return an error.
612 
613    4) Check bidi -- Possibly check for right-to-left characters, and if
614       any are found, make sure that the whole string satisfies the
615       requirements for bidirectional strings.  If the string does not
616       satisfy the requirements for bidirectional strings, return an
617       error.
618       [Unicode3.2] defines several bidirectional categories; each character
619        has one bidirectional category assigned to it.  For the purposes of
620        the requirements below, an "RandALCat character" is a character that
621        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
622        is a character that has Unicode bidirectional category "L".  Note
623 
624 
625        that there are many characters which fall in neither of the above
626        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
627        this because they have bidirectional category "EN".
628 
629        In any profile that specifies bidirectional character handling, all
630        three of the following requirements MUST be met:
631 
632        1) The characters in section 5.8 MUST be prohibited.
633 
634        2) If a string contains any RandALCat character, the string MUST NOT
635           contain any LCat character.
636 
637        3) If a string contains any RandALCat character, a RandALCat
638           character MUST be the first character of the string, and a
639           RandALCat character MUST be the last character of the string.
640 */
641 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)642 usprep_prepare(   const UStringPrepProfile* profile,
643                   const UChar* src, int32_t srcLength,
644                   UChar* dest, int32_t destCapacity,
645                   int32_t options,
646                   UParseError* parseError,
647                   UErrorCode* status ){
648 
649     // check error status
650     if(U_FAILURE(*status)){
651         return 0;
652     }
653 
654     //check arguments
655     if(profile==NULL ||
656             (src==NULL ? srcLength!=0 : srcLength<-1) ||
657             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
658         *status=U_ILLEGAL_ARGUMENT_ERROR;
659         return 0;
660     }
661 
662     //get the string length
663     if(srcLength < 0){
664         srcLength = u_strlen(src);
665     }
666     // map
667     UnicodeString s1;
668     UChar *b1 = s1.getBuffer(srcLength);
669     if(b1==NULL){
670         *status = U_MEMORY_ALLOCATION_ERROR;
671         return 0;
672     }
673     int32_t b1Len = usprep_map(profile, src, srcLength,
674                                b1, s1.getCapacity(), options, parseError, status);
675     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
676 
677     if(*status == U_BUFFER_OVERFLOW_ERROR){
678         // redo processing of string
679         /* we do not have enough room so grow the buffer*/
680         b1 = s1.getBuffer(b1Len);
681         if(b1==NULL){
682             *status = U_MEMORY_ALLOCATION_ERROR;
683             return 0;
684         }
685 
686         *status = U_ZERO_ERROR; // reset error
687         b1Len = usprep_map(profile, src, srcLength,
688                            b1, s1.getCapacity(), options, parseError, status);
689         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
690     }
691     if(U_FAILURE(*status)){
692         return 0;
693     }
694 
695     // normalize
696     UnicodeString s2;
697     if(profile->doNFKC){
698         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
699         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
700         if(U_FAILURE(*status)){
701             return 0;
702         }
703         fn2.normalize(s1, s2, *status);
704     }else{
705         s2.fastCopyFrom(s1);
706     }
707     if(U_FAILURE(*status)){
708         return 0;
709     }
710 
711     // Prohibit and checkBiDi in one pass
712     const UChar *b2 = s2.getBuffer();
713     int32_t b2Len = s2.length();
714     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
715     UBool leftToRight=FALSE, rightToLeft=FALSE;
716     int32_t rtlPos =-1, ltrPos =-1;
717 
718     for(int32_t b2Index=0; b2Index<b2Len;){
719         UChar32 ch = 0;
720         U16_NEXT(b2, b2Index, b2Len, ch);
721 
722         uint16_t result;
723         UTRIE_GET16(&profile->sprepTrie,ch,result);
724 
725         int16_t value;
726         UBool isIndex;
727         UStringPrepType type = getValues(result, value, isIndex);
728 
729         if( type == USPREP_PROHIBITED ||
730             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
731            ){
732             *status = U_STRINGPREP_PROHIBITED_ERROR;
733             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
734             return 0;
735         }
736 
737         if(profile->checkBiDi) {
738             direction = ubidi_getClass(profile->bdp, ch);
739             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
740                 firstCharDir = direction;
741             }
742             if(direction == U_LEFT_TO_RIGHT){
743                 leftToRight = TRUE;
744                 ltrPos = b2Index-1;
745             }
746             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
747                 rightToLeft = TRUE;
748                 rtlPos = b2Index-1;
749             }
750         }
751     }
752     if(profile->checkBiDi == TRUE){
753         // satisfy 2
754         if( leftToRight == TRUE && rightToLeft == TRUE){
755             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
756             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
757             return 0;
758         }
759 
760         //satisfy 3
761         if( rightToLeft == TRUE &&
762             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
763               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
764            ){
765             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
766             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
767             return FALSE;
768         }
769     }
770     return s2.extract(dest, destCapacity, *status);
771 }
772 
773 
774 /* data swapping ------------------------------------------------------------ */
775 
776 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)777 usprep_swap(const UDataSwapper *ds,
778             const void *inData, int32_t length, void *outData,
779             UErrorCode *pErrorCode) {
780     const UDataInfo *pInfo;
781     int32_t headerSize;
782 
783     const uint8_t *inBytes;
784     uint8_t *outBytes;
785 
786     const int32_t *inIndexes;
787     int32_t indexes[16];
788 
789     int32_t i, offset, count, size;
790 
791     /* udata_swapDataHeader checks the arguments */
792     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
793     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
794         return 0;
795     }
796 
797     /* check data format and format version */
798     pInfo=(const UDataInfo *)((const char *)inData+4);
799     if(!(
800         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
801         pInfo->dataFormat[1]==0x50 &&
802         pInfo->dataFormat[2]==0x52 &&
803         pInfo->dataFormat[3]==0x50 &&
804         pInfo->formatVersion[0]==3
805     )) {
806         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
807                          pInfo->dataFormat[0], pInfo->dataFormat[1],
808                          pInfo->dataFormat[2], pInfo->dataFormat[3],
809                          pInfo->formatVersion[0]);
810         *pErrorCode=U_UNSUPPORTED_ERROR;
811         return 0;
812     }
813 
814     inBytes=(const uint8_t *)inData+headerSize;
815     outBytes=(uint8_t *)outData+headerSize;
816 
817     inIndexes=(const int32_t *)inBytes;
818 
819     if(length>=0) {
820         length-=headerSize;
821         if(length<16*4) {
822             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
823                              length);
824             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
825             return 0;
826         }
827     }
828 
829     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
830     for(i=0; i<16; ++i) {
831         indexes[i]=udata_readInt32(ds, inIndexes[i]);
832     }
833 
834     /* calculate the total length of the data */
835     size=
836         16*4+ /* size of indexes[] */
837         indexes[_SPREP_INDEX_TRIE_SIZE]+
838         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
839 
840     if(length>=0) {
841         if(length<size) {
842             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
843                              length);
844             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
845             return 0;
846         }
847 
848         /* copy the data for inaccessible bytes */
849         if(inBytes!=outBytes) {
850             uprv_memcpy(outBytes, inBytes, size);
851         }
852 
853         offset=0;
854 
855         /* swap the int32_t indexes[] */
856         count=16*4;
857         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
858         offset+=count;
859 
860         /* swap the UTrie */
861         count=indexes[_SPREP_INDEX_TRIE_SIZE];
862         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
863         offset+=count;
864 
865         /* swap the uint16_t mappingTable[] */
866         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
867         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
868         //offset+=count;
869     }
870 
871     return headerSize+size;
872 }
873 
874 #endif /* #if !UCONFIG_NO_IDNA */
875