1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2014, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  usprep.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003jul2
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include "unicode/usprep.h"
22 
23 #include "unicode/normalizer2.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "ubidi_props.h"
36 #include "uprops.h"
37 
38 U_NAMESPACE_USE
39 
40 U_CDECL_BEGIN
41 
42 /*
43 Static cache for already opened StringPrep profiles
44 */
45 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
46 static icu::UInitOnce gSharedDataInitOnce;
47 
48 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
49 
50 /* format version of spp file */
51 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
52 
53 /* the Unicode version of the sprep data */
54 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
55 
56 /* Profile names must be aligned to UStringPrepProfileType */
57 static const char * const PROFILE_NAMES[] = {
58     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
59     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
60     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
61     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
62     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
63     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
64     "rfc3722",      /* USPREP_RFC3722_ISCSI */
65     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
66     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
67     "rfc4011",      /* USPREP_RFC4011_MIB */
68     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
69     "rfc4505",      /* USPREP_RFC4505_TRACE */
70     "rfc4518",      /* USPREP_RFC4518_LDAP */
71     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
72 };
73 
74 static UBool U_CALLCONV
isSPrepAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)75 isSPrepAcceptable(void * /* context */,
76              const char * /* type */,
77              const char * /* name */,
78              const UDataInfo *pInfo) {
79     if(
80         pInfo->size>=20 &&
81         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
82         pInfo->charsetFamily==U_CHARSET_FAMILY &&
83         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
84         pInfo->dataFormat[1]==0x50 &&
85         pInfo->dataFormat[2]==0x52 &&
86         pInfo->dataFormat[3]==0x50 &&
87         pInfo->formatVersion[0]==3 &&
88         pInfo->formatVersion[2]==UTRIE_SHIFT &&
89         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
90     ) {
91         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
92         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
93         return TRUE;
94     } else {
95         return FALSE;
96     }
97 }
98 
99 static int32_t U_CALLCONV
getSPrepFoldingOffset(uint32_t data)100 getSPrepFoldingOffset(uint32_t data) {
101 
102     return (int32_t)data;
103 
104 }
105 
106 /* hashes an entry  */
107 static int32_t U_CALLCONV
hashEntry(const UHashTok parm)108 hashEntry(const UHashTok parm) {
109     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
110     UHashTok namekey, pathkey;
111     namekey.pointer = b->name;
112     pathkey.pointer = b->path;
113     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
114 }
115 
116 /* compares two entries */
117 static UBool U_CALLCONV
compareEntries(const UHashTok p1,const UHashTok p2)118 compareEntries(const UHashTok p1, const UHashTok p2) {
119     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
120     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
121     UHashTok name1, name2, path1, path2;
122     name1.pointer = b1->name;
123     name2.pointer = b2->name;
124     path1.pointer = b1->path;
125     path2.pointer = b2->path;
126     return ((UBool)(uhash_compareChars(name1, name2) &
127         uhash_compareChars(path1, path2)));
128 }
129 
130 static void
usprep_unload(UStringPrepProfile * data)131 usprep_unload(UStringPrepProfile* data){
132     udata_close(data->sprepData);
133 }
134 
135 static int32_t
usprep_internal_flushCache(UBool noRefCount)136 usprep_internal_flushCache(UBool noRefCount){
137     UStringPrepProfile *profile = NULL;
138     UStringPrepKey  *key  = NULL;
139     int32_t pos = UHASH_FIRST;
140     int32_t deletedNum = 0;
141     const UHashElement *e;
142 
143     /*
144      * if shared data hasn't even been lazy evaluated yet
145      * return 0
146      */
147     umtx_lock(&usprepMutex);
148     if (SHARED_DATA_HASHTABLE == NULL) {
149         umtx_unlock(&usprepMutex);
150         return 0;
151     }
152 
153     /*creates an enumeration to iterate through every element in the table */
154     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
155     {
156         profile = (UStringPrepProfile *) e->value.pointer;
157         key  = (UStringPrepKey *) e->key.pointer;
158 
159         if ((noRefCount== FALSE && profile->refCount == 0) ||
160              noRefCount== TRUE) {
161             deletedNum++;
162             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
163 
164             /* unload the data */
165             usprep_unload(profile);
166 
167             if(key->name != NULL) {
168                 uprv_free(key->name);
169                 key->name=NULL;
170             }
171             if(key->path != NULL) {
172                 uprv_free(key->path);
173                 key->path=NULL;
174             }
175             uprv_free(profile);
176             uprv_free(key);
177         }
178 
179     }
180     umtx_unlock(&usprepMutex);
181 
182     return deletedNum;
183 }
184 
185 /* Works just like ucnv_flushCache()
186 static int32_t
187 usprep_flushCache(){
188     return usprep_internal_flushCache(FALSE);
189 }
190 */
191 
usprep_cleanup(void)192 static UBool U_CALLCONV usprep_cleanup(void){
193     if (SHARED_DATA_HASHTABLE != NULL) {
194         usprep_internal_flushCache(TRUE);
195         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
196             uhash_close(SHARED_DATA_HASHTABLE);
197             SHARED_DATA_HASHTABLE = NULL;
198         }
199     }
200     gSharedDataInitOnce.reset();
201     return (SHARED_DATA_HASHTABLE == NULL);
202 }
203 U_CDECL_END
204 
205 
206 /** Initializes the cache for resources */
207 static void U_CALLCONV
createCache(UErrorCode & status)208 createCache(UErrorCode &status) {
209     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
210     if (U_FAILURE(status)) {
211         SHARED_DATA_HASHTABLE = NULL;
212     }
213     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
214 }
215 
216 static void
initCache(UErrorCode * status)217 initCache(UErrorCode *status) {
218     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
219 }
220 
221 static UBool U_CALLCONV
loadData(UStringPrepProfile * profile,const char * path,const char * name,const char * type,UErrorCode * errorCode)222 loadData(UStringPrepProfile* profile,
223          const char* path,
224          const char* name,
225          const char* type,
226          UErrorCode* errorCode) {
227     /* load Unicode SPREP data from file */
228     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
229     UDataMemory *dataMemory;
230     const int32_t *p=NULL;
231     const uint8_t *pb;
232     UVersionInfo normUnicodeVersion;
233     int32_t normUniVer, sprepUniVer, normCorrVer;
234 
235     if(errorCode==NULL || U_FAILURE(*errorCode)) {
236         return 0;
237     }
238 
239     /* open the data outside the mutex block */
240     //TODO: change the path
241     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
242     if(U_FAILURE(*errorCode)) {
243         return FALSE;
244     }
245 
246     p=(const int32_t *)udata_getMemory(dataMemory);
247     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
248     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
249     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
250 
251 
252     if(U_FAILURE(*errorCode)) {
253         udata_close(dataMemory);
254         return FALSE;
255     }
256 
257     /* in the mutex block, set the data for this process */
258     umtx_lock(&usprepMutex);
259     if(profile->sprepData==NULL) {
260         profile->sprepData=dataMemory;
261         dataMemory=NULL;
262         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
263         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
264     } else {
265         p=(const int32_t *)udata_getMemory(profile->sprepData);
266     }
267     umtx_unlock(&usprepMutex);
268     /* initialize some variables */
269     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
270 
271     u_getUnicodeVersion(normUnicodeVersion);
272     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
273                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
274     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
275                   (dataVersion[2] << 8 ) + (dataVersion[3]);
276     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
277 
278     if(U_FAILURE(*errorCode)){
279         udata_close(dataMemory);
280         return FALSE;
281     }
282     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
283         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
284         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
285       ){
286         *errorCode = U_INVALID_FORMAT_ERROR;
287         udata_close(dataMemory);
288         return FALSE;
289     }
290     profile->isDataLoaded = TRUE;
291 
292     /* if a different thread set it first, then close the extra data */
293     if(dataMemory!=NULL) {
294         udata_close(dataMemory); /* NULL if it was set correctly */
295     }
296 
297 
298     return profile->isDataLoaded;
299 }
300 
301 static UStringPrepProfile*
usprep_getProfile(const char * path,const char * name,UErrorCode * status)302 usprep_getProfile(const char* path,
303                   const char* name,
304                   UErrorCode *status){
305 
306     UStringPrepProfile* profile = NULL;
307 
308     initCache(status);
309 
310     if(U_FAILURE(*status)){
311         return NULL;
312     }
313 
314     UStringPrepKey stackKey;
315     /*
316      * const is cast way to save malloc, strcpy and free calls
317      * we use the passed in pointers for fetching the data from the
318      * hash table which is safe
319      */
320     stackKey.name = (char*) name;
321     stackKey.path = (char*) path;
322 
323     /* fetch the data from the cache */
324     umtx_lock(&usprepMutex);
325     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
326     if(profile != NULL) {
327         profile->refCount++;
328     }
329     umtx_unlock(&usprepMutex);
330 
331     if(profile == NULL) {
332         /* else load the data and put the data in the cache */
333         LocalMemory<UStringPrepProfile> newProfile;
334         if(newProfile.allocateInsteadAndReset() == NULL) {
335             *status = U_MEMORY_ALLOCATION_ERROR;
336             return NULL;
337         }
338 
339         /* load the data */
340         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
341             return NULL;
342         }
343 
344         /* get the options */
345         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
346         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
347 
348         if(newProfile->checkBiDi) {
349             newProfile->bdp = ubidi_getSingleton();
350         }
351 
352         LocalMemory<UStringPrepKey> key;
353         LocalMemory<char> keyName;
354         LocalMemory<char> keyPath;
355         if( key.allocateInsteadAndReset() == NULL ||
356             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
357             (path != NULL &&
358              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
359          ) {
360             *status = U_MEMORY_ALLOCATION_ERROR;
361             usprep_unload(newProfile.getAlias());
362             return NULL;
363         }
364 
365         umtx_lock(&usprepMutex);
366         // If another thread already inserted the same key/value, refcount and cleanup our thread data
367         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
368         if(profile != NULL) {
369             profile->refCount++;
370             usprep_unload(newProfile.getAlias());
371         }
372         else {
373             /* initialize the key members */
374             key->name = keyName.orphan();
375             uprv_strcpy(key->name, name);
376             if(path != NULL){
377                 key->path = keyPath.orphan();
378                 uprv_strcpy(key->path, path);
379             }
380             profile = newProfile.orphan();
381 
382             /* add the data object to the cache */
383             profile->refCount = 1;
384             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
385         }
386         umtx_unlock(&usprepMutex);
387     }
388 
389     return profile;
390 }
391 
392 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char * path,const char * name,UErrorCode * status)393 usprep_open(const char* path,
394             const char* name,
395             UErrorCode* status){
396 
397     if(status == NULL || U_FAILURE(*status)){
398         return NULL;
399     }
400 
401     /* initialize the profile struct members */
402     return usprep_getProfile(path,name,status);
403 }
404 
405 U_CAPI UStringPrepProfile* U_EXPORT2
usprep_openByType(UStringPrepProfileType type,UErrorCode * status)406 usprep_openByType(UStringPrepProfileType type,
407 				  UErrorCode* status) {
408     if(status == NULL || U_FAILURE(*status)){
409         return NULL;
410     }
411     int32_t index = (int32_t)type;
412     if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
413         *status = U_ILLEGAL_ARGUMENT_ERROR;
414         return NULL;
415     }
416     return usprep_open(NULL, PROFILE_NAMES[index], status);
417 }
418 
419 U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile * profile)420 usprep_close(UStringPrepProfile* profile){
421     if(profile==NULL){
422         return;
423     }
424 
425     umtx_lock(&usprepMutex);
426     /* decrement the ref count*/
427     if(profile->refCount > 0){
428         profile->refCount--;
429     }
430     umtx_unlock(&usprepMutex);
431 
432 }
433 
434 U_CFUNC void
uprv_syntaxError(const UChar * rules,int32_t pos,int32_t rulesLen,UParseError * parseError)435 uprv_syntaxError(const UChar* rules,
436                  int32_t pos,
437                  int32_t rulesLen,
438                  UParseError* parseError){
439     if(parseError == NULL){
440         return;
441     }
442     parseError->offset = pos;
443     parseError->line = 0 ; // we are not using line numbers
444 
445     // for pre-context
446     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
447     int32_t limit = pos;
448 
449     u_memcpy(parseError->preContext,rules+start,limit-start);
450     //null terminate the buffer
451     parseError->preContext[limit-start] = 0;
452 
453     // for post-context; include error rules[pos]
454     start = pos;
455     limit = start + (U_PARSE_CONTEXT_LEN-1);
456     if (limit > rulesLen) {
457         limit = rulesLen;
458     }
459     if (start < rulesLen) {
460         u_memcpy(parseError->postContext,rules+start,limit-start);
461     }
462     //null terminate the buffer
463     parseError->postContext[limit-start]= 0;
464 }
465 
466 
467 static inline UStringPrepType
getValues(uint16_t trieWord,int16_t & value,UBool & isIndex)468 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469 
470     UStringPrepType type;
471     if(trieWord == 0){
472         /*
473          * Initial value stored in the mapping table
474          * just return USPREP_TYPE_LIMIT .. so that
475          * the source codepoint is copied to the destination
476          */
477         type = USPREP_TYPE_LIMIT;
478         isIndex =FALSE;
479         value = 0;
480     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
481         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
482         isIndex =FALSE;
483         value = 0;
484     }else{
485         /* get the type */
486         type = USPREP_MAP;
487         /* ascertain if the value is index or delta */
488         if(trieWord & 0x02){
489             isIndex = TRUE;
490             value = trieWord  >> 2; //mask off the lower 2 bits and shift
491         }else{
492             isIndex = FALSE;
493             value = (int16_t)trieWord;
494             value =  (value >> 2);
495         }
496 
497         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
498             type = USPREP_DELETE;
499             isIndex =FALSE;
500             value = 0;
501         }
502     }
503     return type;
504 }
505 
506 // TODO: change to writing to UnicodeString not UChar *
507 static int32_t
usprep_map(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)508 usprep_map(  const UStringPrepProfile* profile,
509              const UChar* src, int32_t srcLength,
510              UChar* dest, int32_t destCapacity,
511              int32_t options,
512              UParseError* parseError,
513              UErrorCode* status ){
514 
515     uint16_t result;
516     int32_t destIndex=0;
517     int32_t srcIndex;
518     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519     UStringPrepType type;
520     int16_t value;
521     UBool isIndex;
522     const int32_t* indexes = profile->indexes;
523 
524     // no error checking the caller check for error and arguments
525     // no string length check the caller finds out the string length
526 
527     for(srcIndex=0;srcIndex<srcLength;){
528         UChar32 ch;
529 
530         U16_NEXT(src,srcIndex,srcLength,ch);
531 
532         result=0;
533 
534         UTRIE_GET16(&profile->sprepTrie,ch,result);
535 
536         type = getValues(result, value, isIndex);
537 
538         // check if the source codepoint is unassigned
539         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540 
541             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542             *status = U_STRINGPREP_UNASSIGNED_ERROR;
543             return 0;
544 
545         }else if(type == USPREP_MAP){
546 
547             int32_t index, length;
548 
549             if(isIndex){
550                 index = value;
551                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553                     length = 1;
554                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556                     length = 2;
557                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559                     length = 3;
560                 }else{
561                     length = profile->mappingData[index++];
562 
563                 }
564 
565                 /* copy mapping to destination */
566                 for(int32_t i=0; i< length; i++){
567                     if(destIndex < destCapacity  ){
568                         dest[destIndex] = profile->mappingData[index+i];
569                     }
570                     destIndex++; /* for pre-flighting */
571                 }
572                 continue;
573             }else{
574                 // subtract the delta to arrive at the code point
575                 ch -= value;
576             }
577 
578         }else if(type==USPREP_DELETE){
579              // just consume the codepoint and contine
580             continue;
581         }
582         //copy the code point into destination
583         if(ch <= 0xFFFF){
584             if(destIndex < destCapacity ){
585                 dest[destIndex] = (UChar)ch;
586             }
587             destIndex++;
588         }else{
589             if(destIndex+1 < destCapacity ){
590                 dest[destIndex]   = U16_LEAD(ch);
591                 dest[destIndex+1] = U16_TRAIL(ch);
592             }
593             destIndex +=2;
594         }
595 
596     }
597 
598     return u_terminateUChars(dest, destCapacity, destIndex, status);
599 }
600 
601 /*
602    1) Map -- For each character in the input, check if it has a mapping
603       and, if so, replace it with its mapping.
604 
605    2) Normalize -- Possibly normalize the result of step 1 using Unicode
606       normalization.
607 
608    3) Prohibit -- Check for any characters that are not allowed in the
609       output.  If any are found, return an error.
610 
611    4) Check bidi -- Possibly check for right-to-left characters, and if
612       any are found, make sure that the whole string satisfies the
613       requirements for bidirectional strings.  If the string does not
614       satisfy the requirements for bidirectional strings, return an
615       error.
616       [Unicode3.2] defines several bidirectional categories; each character
617        has one bidirectional category assigned to it.  For the purposes of
618        the requirements below, an "RandALCat character" is a character that
619        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
620        is a character that has Unicode bidirectional category "L".  Note
621 
622 
623        that there are many characters which fall in neither of the above
624        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
625        this because they have bidirectional category "EN".
626 
627        In any profile that specifies bidirectional character handling, all
628        three of the following requirements MUST be met:
629 
630        1) The characters in section 5.8 MUST be prohibited.
631 
632        2) If a string contains any RandALCat character, the string MUST NOT
633           contain any LCat character.
634 
635        3) If a string contains any RandALCat character, a RandALCat
636           character MUST be the first character of the string, and a
637           RandALCat character MUST be the last character of the string.
638 */
639 U_CAPI int32_t U_EXPORT2
usprep_prepare(const UStringPrepProfile * profile,const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)640 usprep_prepare(   const UStringPrepProfile* profile,
641                   const UChar* src, int32_t srcLength,
642                   UChar* dest, int32_t destCapacity,
643                   int32_t options,
644                   UParseError* parseError,
645                   UErrorCode* status ){
646 
647     // check error status
648     if(U_FAILURE(*status)){
649         return 0;
650     }
651 
652     //check arguments
653     if(profile==NULL ||
654             (src==NULL ? srcLength!=0 : srcLength<-1) ||
655             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
656         *status=U_ILLEGAL_ARGUMENT_ERROR;
657         return 0;
658     }
659 
660     //get the string length
661     if(srcLength < 0){
662         srcLength = u_strlen(src);
663     }
664     // map
665     UnicodeString s1;
666     UChar *b1 = s1.getBuffer(srcLength);
667     if(b1==NULL){
668         *status = U_MEMORY_ALLOCATION_ERROR;
669         return 0;
670     }
671     int32_t b1Len = usprep_map(profile, src, srcLength,
672                                b1, s1.getCapacity(), options, parseError, status);
673     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
674 
675     if(*status == U_BUFFER_OVERFLOW_ERROR){
676         // redo processing of string
677         /* we do not have enough room so grow the buffer*/
678         b1 = s1.getBuffer(b1Len);
679         if(b1==NULL){
680             *status = U_MEMORY_ALLOCATION_ERROR;
681             return 0;
682         }
683 
684         *status = U_ZERO_ERROR; // reset error
685         b1Len = usprep_map(profile, src, srcLength,
686                            b1, s1.getCapacity(), options, parseError, status);
687         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688     }
689     if(U_FAILURE(*status)){
690         return 0;
691     }
692 
693     // normalize
694     UnicodeString s2;
695     if(profile->doNFKC){
696         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698         if(U_FAILURE(*status)){
699             return 0;
700         }
701         fn2.normalize(s1, s2, *status);
702     }else{
703         s2.fastCopyFrom(s1);
704     }
705     if(U_FAILURE(*status)){
706         return 0;
707     }
708 
709     // Prohibit and checkBiDi in one pass
710     const UChar *b2 = s2.getBuffer();
711     int32_t b2Len = s2.length();
712     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
713     UBool leftToRight=FALSE, rightToLeft=FALSE;
714     int32_t rtlPos =-1, ltrPos =-1;
715 
716     for(int32_t b2Index=0; b2Index<b2Len;){
717         UChar32 ch = 0;
718         U16_NEXT(b2, b2Index, b2Len, ch);
719 
720         uint16_t result;
721         UTRIE_GET16(&profile->sprepTrie,ch,result);
722 
723         int16_t value;
724         UBool isIndex;
725         UStringPrepType type = getValues(result, value, isIndex);
726 
727         if( type == USPREP_PROHIBITED ||
728             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
729            ){
730             *status = U_STRINGPREP_PROHIBITED_ERROR;
731             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
732             return 0;
733         }
734 
735         if(profile->checkBiDi) {
736             direction = ubidi_getClass(profile->bdp, ch);
737             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
738                 firstCharDir = direction;
739             }
740             if(direction == U_LEFT_TO_RIGHT){
741                 leftToRight = TRUE;
742                 ltrPos = b2Index-1;
743             }
744             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
745                 rightToLeft = TRUE;
746                 rtlPos = b2Index-1;
747             }
748         }
749     }
750     if(profile->checkBiDi == TRUE){
751         // satisfy 2
752         if( leftToRight == TRUE && rightToLeft == TRUE){
753             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
754             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
755             return 0;
756         }
757 
758         //satisfy 3
759         if( rightToLeft == TRUE &&
760             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
761               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
762            ){
763             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
764             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
765             return FALSE;
766         }
767     }
768     return s2.extract(dest, destCapacity, *status);
769 }
770 
771 
772 /* data swapping ------------------------------------------------------------ */
773 
774 U_CAPI int32_t U_EXPORT2
usprep_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)775 usprep_swap(const UDataSwapper *ds,
776             const void *inData, int32_t length, void *outData,
777             UErrorCode *pErrorCode) {
778     const UDataInfo *pInfo;
779     int32_t headerSize;
780 
781     const uint8_t *inBytes;
782     uint8_t *outBytes;
783 
784     const int32_t *inIndexes;
785     int32_t indexes[16];
786 
787     int32_t i, offset, count, size;
788 
789     /* udata_swapDataHeader checks the arguments */
790     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
791     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
792         return 0;
793     }
794 
795     /* check data format and format version */
796     pInfo=(const UDataInfo *)((const char *)inData+4);
797     if(!(
798         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
799         pInfo->dataFormat[1]==0x50 &&
800         pInfo->dataFormat[2]==0x52 &&
801         pInfo->dataFormat[3]==0x50 &&
802         pInfo->formatVersion[0]==3
803     )) {
804         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
805                          pInfo->dataFormat[0], pInfo->dataFormat[1],
806                          pInfo->dataFormat[2], pInfo->dataFormat[3],
807                          pInfo->formatVersion[0]);
808         *pErrorCode=U_UNSUPPORTED_ERROR;
809         return 0;
810     }
811 
812     inBytes=(const uint8_t *)inData+headerSize;
813     outBytes=(uint8_t *)outData+headerSize;
814 
815     inIndexes=(const int32_t *)inBytes;
816 
817     if(length>=0) {
818         length-=headerSize;
819         if(length<16*4) {
820             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821                              length);
822             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
823             return 0;
824         }
825     }
826 
827     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
828     for(i=0; i<16; ++i) {
829         indexes[i]=udata_readInt32(ds, inIndexes[i]);
830     }
831 
832     /* calculate the total length of the data */
833     size=
834         16*4+ /* size of indexes[] */
835         indexes[_SPREP_INDEX_TRIE_SIZE]+
836         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
837 
838     if(length>=0) {
839         if(length<size) {
840             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841                              length);
842             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
843             return 0;
844         }
845 
846         /* copy the data for inaccessible bytes */
847         if(inBytes!=outBytes) {
848             uprv_memcpy(outBytes, inBytes, size);
849         }
850 
851         offset=0;
852 
853         /* swap the int32_t indexes[] */
854         count=16*4;
855         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
856         offset+=count;
857 
858         /* swap the UTrie */
859         count=indexes[_SPREP_INDEX_TRIE_SIZE];
860         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
861         offset+=count;
862 
863         /* swap the uint16_t mappingTable[] */
864         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
865         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
866         //offset+=count;
867     }
868 
869     return headerSize+size;
870 }
871 
872 #endif /* #if !UCONFIG_NO_IDNA */
873