1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2014, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  uidna.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_IDNA
20 
21 #include "unicode/uidna.h"
22 #include "unicode/ustring.h"
23 #include "unicode/usprep.h"
24 #include "punycode.h"
25 #include "ustr_imp.h"
26 #include "cmemory.h"
27 #include "uassert.h"
28 #include "sprpimpl.h"
29 
30 /* it is official IDNA ACE Prefix is "xn--" */
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32 #define ACE_PREFIX_LENGTH 4
33 
34 #define MAX_LABEL_LENGTH 63
35 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */
36 #define MAX_LABEL_BUFFER_SIZE 100
37 
38 #define MAX_DOMAIN_NAME_LENGTH 255
39 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */
40 #define MAX_IDN_BUFFER_SIZE   MAX_DOMAIN_NAME_LENGTH+1
41 
42 #define LOWER_CASE_DELTA 0x0020
43 #define HYPHEN           0x002D
44 #define FULL_STOP        0x002E
45 #define CAPITAL_A        0x0041
46 #define CAPITAL_Z        0x005A
47 
48 inline static UChar
toASCIILower(UChar ch)49 toASCIILower(UChar ch){
50     if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
51         return ch + LOWER_CASE_DELTA;
52     }
53     return ch;
54 }
55 
56 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)57 startsWithPrefix(const UChar* src , int32_t srcLength){
58     UBool startsWithPrefix = TRUE;
59 
60     if(srcLength < ACE_PREFIX_LENGTH){
61         return FALSE;
62     }
63 
64     for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
65         if(toASCIILower(src[i]) != ACE_PREFIX[i]){
66             startsWithPrefix = FALSE;
67         }
68     }
69     return startsWithPrefix;
70 }
71 
72 
73 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)74 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
75                             const UChar* s2, int32_t s2Len){
76 
77     int32_t minLength;
78     int32_t lengthResult;
79 
80     // are we comparing different lengths?
81     if(s1Len != s2Len) {
82         if(s1Len < s2Len) {
83             minLength = s1Len;
84             lengthResult = -1;
85         } else {
86             minLength = s2Len;
87             lengthResult = 1;
88         }
89     } else {
90         // ok the lengths are equal
91         minLength = s1Len;
92         lengthResult = 0;
93     }
94 
95     UChar c1,c2;
96     int32_t rc;
97 
98     for(int32_t i =0;/* no condition */;i++) {
99 
100         /* If we reach the ends of both strings then they match */
101         if(i == minLength) {
102             return lengthResult;
103         }
104 
105         c1 = s1[i];
106         c2 = s2[i];
107 
108         /* Case-insensitive comparison */
109         if(c1!=c2) {
110             rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
111             if(rc!=0) {
112                 lengthResult=rc;
113                 break;
114             }
115         }
116     }
117     return lengthResult;
118 }
119 
120 
121 /**
122  * Ascertain if the given code point is a label separator as
123  * defined by the IDNA RFC
124  *
125  * @param ch The code point to be ascertained
126  * @return true if the char is a label separator
127  * @stable ICU 2.8
128  */
isLabelSeparator(UChar ch)129 static inline UBool isLabelSeparator(UChar ch){
130     switch(ch){
131         case 0x002e:
132         case 0x3002:
133         case 0xFF0E:
134         case 0xFF61:
135             return TRUE;
136         default:
137             return FALSE;
138     }
139 }
140 
141 // returns the length of the label excluding the separator
142 // if *limit == separator then the length returned does not include
143 // the separtor.
144 static inline int32_t
getNextSeparator(UChar * src,int32_t srcLength,UChar ** limit,UBool * done)145 getNextSeparator(UChar *src, int32_t srcLength,
146                  UChar **limit, UBool *done){
147     if(srcLength == -1){
148         int32_t i;
149         for(i=0 ; ;i++){
150             if(src[i] == 0){
151                 *limit = src + i; // point to null
152                 *done = TRUE;
153                 return i;
154             }
155             if(isLabelSeparator(src[i])){
156                 *limit = src + (i+1); // go past the delimiter
157                 return i;
158 
159             }
160         }
161     }else{
162         int32_t i;
163         for(i=0;i<srcLength;i++){
164             if(isLabelSeparator(src[i])){
165                 *limit = src + (i+1); // go past the delimiter
166                 return i;
167             }
168         }
169         // we have not found the delimiter
170         // if(i==srcLength)
171         *limit = src+srcLength;
172         *done = TRUE;
173 
174         return i;
175     }
176 }
isLDHChar(UChar ch)177 static inline UBool isLDHChar(UChar ch){
178     // high runner case
179     if(ch>0x007A){
180         return FALSE;
181     }
182     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
183     if( (ch==0x002D) ||
184         (0x0030 <= ch && ch <= 0x0039) ||
185         (0x0041 <= ch && ch <= 0x005A) ||
186         (0x0061 <= ch && ch <= 0x007A)
187       ){
188         return TRUE;
189     }
190     return FALSE;
191 }
192 
193 static int32_t
_internal_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UStringPrepProfile * nameprep,UParseError * parseError,UErrorCode * status)194 _internal_toASCII(const UChar* src, int32_t srcLength,
195                   UChar* dest, int32_t destCapacity,
196                   int32_t options,
197                   UStringPrepProfile* nameprep,
198                   UParseError* parseError,
199                   UErrorCode* status)
200 {
201 
202     // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
203     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
204     //initialize pointers to stack buffers
205     UChar  *b1 = b1Stack, *b2 = b2Stack;
206     int32_t b1Len=0, b2Len,
207             b1Capacity = MAX_LABEL_BUFFER_SIZE,
208             b2Capacity = MAX_LABEL_BUFFER_SIZE ,
209             reqLength=0;
210 
211     int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
212     UBool* caseFlags = NULL;
213 
214     // the source contains all ascii codepoints
215     UBool srcIsASCII  = TRUE;
216     // assume the source contains all LDH codepoints
217     UBool srcIsLDH = TRUE;
218 
219     int32_t j=0;
220 
221     //get the options
222     UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
223 
224     int32_t failPos = -1;
225 
226     if(srcLength == -1){
227         srcLength = u_strlen(src);
228     }
229 
230     if(srcLength > b1Capacity){
231         b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
232         if(b1==NULL){
233             *status = U_MEMORY_ALLOCATION_ERROR;
234             goto CLEANUP;
235         }
236         b1Capacity = srcLength;
237     }
238 
239     // step 1
240     for( j=0;j<srcLength;j++){
241         if(src[j] > 0x7F){
242             srcIsASCII = FALSE;
243         }
244         b1[b1Len++] = src[j];
245     }
246 
247     // step 2 is performed only if the source contains non ASCII
248     if(srcIsASCII == FALSE){
249 
250         // step 2
251         b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
252 
253         if(*status == U_BUFFER_OVERFLOW_ERROR){
254             // redo processing of string
255             // we do not have enough room so grow the buffer
256             if(b1 != b1Stack){
257                 uprv_free(b1);
258             }
259             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
260             if(b1==NULL){
261                 *status = U_MEMORY_ALLOCATION_ERROR;
262                 goto CLEANUP;
263             }
264 
265             *status = U_ZERO_ERROR; // reset error
266 
267             b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
268         }
269     }
270     // error bail out
271     if(U_FAILURE(*status)){
272         goto CLEANUP;
273     }
274     if(b1Len == 0){
275         *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
276         goto CLEANUP;
277     }
278 
279     // for step 3 & 4
280     srcIsASCII = TRUE;
281     for( j=0;j<b1Len;j++){
282         // check if output of usprep_prepare is all ASCII
283         if(b1[j] > 0x7F){
284             srcIsASCII = FALSE;
285         }else if(isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character
286             srcIsLDH = FALSE;
287             failPos = j;
288         }
289     }
290     if(useSTD3ASCIIRules == TRUE){
291         // verify 3a and 3b
292         // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
293         //  absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
294         // 3(b) Verify the absence of leading and trailing hyphen-minus; that
295         //  is, the absence of U+002D at the beginning and end of the
296         //  sequence.
297         if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
298             || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
299             *status = U_IDNA_STD3_ASCII_RULES_ERROR;
300 
301             /* populate the parseError struct */
302             if(srcIsLDH==FALSE){
303                 // failPos is always set the index of failure
304                 uprv_syntaxError(b1,failPos, b1Len,parseError);
305             }else if(b1[0] == HYPHEN){
306                 // fail position is 0
307                 uprv_syntaxError(b1,0,b1Len,parseError);
308             }else{
309                 // the last index in the source is always length-1
310                 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
311             }
312 
313             goto CLEANUP;
314         }
315     }
316     // Step 4: if the source is ASCII then proceed to step 8
317     if(srcIsASCII){
318         if(b1Len <= destCapacity){
319             uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
320             reqLength = b1Len;
321         }else{
322             reqLength = b1Len;
323             goto CLEANUP;
324         }
325     }else{
326         // step 5 : verify the sequence does not begin with ACE prefix
327         if(!startsWithPrefix(b1,b1Len)){
328 
329             //step 6: encode the sequence with punycode
330 
331             // do not preserve the case flags for now!
332             // TODO: Preserve the case while implementing the RFE
333             // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
334             // uprv_memset(caseFlags,TRUE,b1Len);
335 
336             b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
337 
338             if(*status == U_BUFFER_OVERFLOW_ERROR){
339                 // redo processing of string
340                 /* we do not have enough room so grow the buffer*/
341                 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
342                 if(b2 == NULL){
343                     *status = U_MEMORY_ALLOCATION_ERROR;
344                     goto CLEANUP;
345                 }
346 
347                 *status = U_ZERO_ERROR; // reset error
348 
349                 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
350             }
351             //error bail out
352             if(U_FAILURE(*status)){
353                 goto CLEANUP;
354             }
355             // TODO : Reconsider while implementing the case preserve RFE
356             // convert all codepoints to lower case ASCII
357             // toASCIILower(b2,b2Len);
358             reqLength = b2Len+ACE_PREFIX_LENGTH;
359 
360             if(reqLength > destCapacity){
361                 *status = U_BUFFER_OVERFLOW_ERROR;
362                 goto CLEANUP;
363             }
364             //Step 7: prepend the ACE prefix
365             uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
366             //Step 6: copy the contents in b2 into dest
367             uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
368 
369         }else{
370             *status = U_IDNA_ACE_PREFIX_ERROR;
371             //position of failure is 0
372             uprv_syntaxError(b1,0,b1Len,parseError);
373             goto CLEANUP;
374         }
375     }
376     // step 8: verify the length of label
377     if(reqLength > MAX_LABEL_LENGTH){
378         *status = U_IDNA_LABEL_TOO_LONG_ERROR;
379     }
380 
381 CLEANUP:
382     if(b1 != b1Stack){
383         uprv_free(b1);
384     }
385     if(b2 != b2Stack){
386         uprv_free(b2);
387     }
388     uprv_free(caseFlags);
389 
390     return u_terminateUChars(dest, destCapacity, reqLength, status);
391 }
392 
393 static int32_t
_internal_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UStringPrepProfile * nameprep,UParseError * parseError,UErrorCode * status)394 _internal_toUnicode(const UChar* src, int32_t srcLength,
395                     UChar* dest, int32_t destCapacity,
396                     int32_t options,
397                     UStringPrepProfile* nameprep,
398                     UParseError* parseError,
399                     UErrorCode* status)
400 {
401 
402     //get the options
403     //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
404     int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
405 
406     // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
407     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
408 
409     //initialize pointers to stack buffers
410     UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
411     int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len,
412             b1Capacity = MAX_LABEL_BUFFER_SIZE,
413             b2Capacity = MAX_LABEL_BUFFER_SIZE,
414             b3Capacity = MAX_LABEL_BUFFER_SIZE,
415             reqLength=0;
416 
417     UBool* caseFlags = NULL;
418 
419     UBool srcIsASCII = TRUE;
420     /*UBool srcIsLDH = TRUE;
421     int32_t failPos =0;*/
422 
423     // step 1: find out if all the codepoints in src are ASCII
424     if(srcLength==-1){
425         srcLength = 0;
426         for(;src[srcLength]!=0;){
427             if(src[srcLength]> 0x7f){
428                 srcIsASCII = FALSE;
429             }/*else if(isLDHChar(src[srcLength])==FALSE){
430                 // here we do not assemble surrogates
431                 // since we know that LDH code points
432                 // are in the ASCII range only
433                 srcIsLDH = FALSE;
434                 failPos = srcLength;
435             }*/
436             srcLength++;
437         }
438     }else if(srcLength > 0){
439         for(int32_t j=0; j<srcLength; j++){
440             if(src[j]> 0x7f){
441                 srcIsASCII = FALSE;
442             }/*else if(isLDHChar(src[j])==FALSE){
443                 // here we do not assemble surrogates
444                 // since we know that LDH code points
445                 // are in the ASCII range only
446                 srcIsLDH = FALSE;
447                 failPos = j;
448             }*/
449         }
450     }else{
451         return 0;
452     }
453 
454     if(srcIsASCII == FALSE){
455         // step 2: process the string
456         b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
457         if(*status == U_BUFFER_OVERFLOW_ERROR){
458             // redo processing of string
459             /* we do not have enough room so grow the buffer*/
460             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
461             if(b1==NULL){
462                 *status = U_MEMORY_ALLOCATION_ERROR;
463                 goto CLEANUP;
464             }
465 
466             *status = U_ZERO_ERROR; // reset error
467 
468             b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
469         }
470         //bail out on error
471         if(U_FAILURE(*status)){
472             goto CLEANUP;
473         }
474     }else{
475 
476         //just point src to b1
477         b1 = (UChar*) src;
478         b1Len = srcLength;
479     }
480 
481     // The RFC states that
482     // <quote>
483     // ToUnicode never fails. If any step fails, then the original input
484     // is returned immediately in that step.
485     // </quote>
486 
487     //step 3: verify ACE Prefix
488     if(startsWithPrefix(b1,b1Len)){
489 
490         //step 4: Remove the ACE Prefix
491         b1Prime = b1 + ACE_PREFIX_LENGTH;
492         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
493 
494         //step 5: Decode using punycode
495         b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
496 
497         if(*status == U_BUFFER_OVERFLOW_ERROR){
498             // redo processing of string
499             /* we do not have enough room so grow the buffer*/
500             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
501             if(b2==NULL){
502                 *status = U_MEMORY_ALLOCATION_ERROR;
503                 goto CLEANUP;
504             }
505 
506             *status = U_ZERO_ERROR; // reset error
507 
508             b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
509         }
510 
511 
512         //step 6:Apply toASCII
513         b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
514 
515         if(*status == U_BUFFER_OVERFLOW_ERROR){
516             // redo processing of string
517             /* we do not have enough room so grow the buffer*/
518             b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
519             if(b3==NULL){
520                 *status = U_MEMORY_ALLOCATION_ERROR;
521                 goto CLEANUP;
522             }
523 
524             *status = U_ZERO_ERROR; // reset error
525 
526             b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
527 
528         }
529         //bail out on error
530         if(U_FAILURE(*status)){
531             goto CLEANUP;
532         }
533 
534         //step 7: verify
535         if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
536             // Cause the original to be returned.
537             *status = U_IDNA_VERIFICATION_ERROR;
538             goto CLEANUP;
539         }
540 
541         //step 8: return output of step 5
542         reqLength = b2Len;
543         if(b2Len <= destCapacity) {
544             uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
545         }
546     }
547     else{
548         // See the start of this if statement for why this is commented out.
549         // verify that STD3 ASCII rules are satisfied
550         /*if(useSTD3ASCIIRules == TRUE){
551             if( srcIsLDH == FALSE // source contains some non-LDH characters
552                 || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
553                 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
554 
555                 // populate the parseError struct
556                 if(srcIsLDH==FALSE){
557                     // failPos is always set the index of failure
558                     uprv_syntaxError(src,failPos, srcLength,parseError);
559                 }else if(src[0] == HYPHEN){
560                     // fail position is 0
561                     uprv_syntaxError(src,0,srcLength,parseError);
562                 }else{
563                     // the last index in the source is always length-1
564                     uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
565                 }
566 
567                 goto CLEANUP;
568             }
569         }*/
570         // just return the source
571         //copy the source to destination
572         if(srcLength <= destCapacity){
573             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
574         }
575         reqLength = srcLength;
576     }
577 
578 
579 CLEANUP:
580 
581     if(b1 != b1Stack && b1!=src){
582         uprv_free(b1);
583     }
584     if(b2 != b2Stack){
585         uprv_free(b2);
586     }
587     uprv_free(caseFlags);
588 
589     // The RFC states that
590     // <quote>
591     // ToUnicode never fails. If any step fails, then the original input
592     // is returned immediately in that step.
593     // </quote>
594     // So if any step fails lets copy source to destination
595     if(U_FAILURE(*status)){
596         //copy the source to destination
597         if(dest && srcLength <= destCapacity){
598             // srcLength should have already been set earlier.
599             U_ASSERT(srcLength >= 0);
600             uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
601         }
602         reqLength = srcLength;
603         *status = U_ZERO_ERROR;
604     }
605 
606     return u_terminateUChars(dest, destCapacity, reqLength, status);
607 }
608 
609 U_CAPI int32_t U_EXPORT2
uidna_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)610 uidna_toASCII(const UChar* src, int32_t srcLength,
611               UChar* dest, int32_t destCapacity,
612               int32_t options,
613               UParseError* parseError,
614               UErrorCode* status){
615 
616     if(status == NULL || U_FAILURE(*status)){
617         return 0;
618     }
619     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
620         *status = U_ILLEGAL_ARGUMENT_ERROR;
621         return 0;
622     }
623 
624     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
625 
626     if(U_FAILURE(*status)){
627         return -1;
628     }
629 
630     int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
631 
632     /* close the profile*/
633     usprep_close(nameprep);
634 
635     return retLen;
636 }
637 
638 U_CAPI int32_t U_EXPORT2
uidna_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)639 uidna_toUnicode(const UChar* src, int32_t srcLength,
640                 UChar* dest, int32_t destCapacity,
641                 int32_t options,
642                 UParseError* parseError,
643                 UErrorCode* status){
644 
645     if(status == NULL || U_FAILURE(*status)){
646         return 0;
647     }
648     if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
649         *status = U_ILLEGAL_ARGUMENT_ERROR;
650         return 0;
651     }
652 
653     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
654 
655     if(U_FAILURE(*status)){
656         return -1;
657     }
658 
659     int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
660 
661     usprep_close(nameprep);
662 
663     return retLen;
664 }
665 
666 
667 U_CAPI int32_t U_EXPORT2
uidna_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)668 uidna_IDNToASCII(  const UChar *src, int32_t srcLength,
669                    UChar* dest, int32_t destCapacity,
670                    int32_t options,
671                    UParseError *parseError,
672                    UErrorCode *status){
673 
674     if(status == NULL || U_FAILURE(*status)){
675         return 0;
676     }
677     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
678         *status = U_ILLEGAL_ARGUMENT_ERROR;
679         return 0;
680     }
681 
682     int32_t reqLength = 0;
683 
684     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
685 
686     if(U_FAILURE(*status)){
687         return 0;
688     }
689 
690     //initialize pointers
691     UChar *delimiter = (UChar*)src;
692     UChar *labelStart = (UChar*)src;
693     UChar *currentDest = (UChar*) dest;
694     int32_t remainingLen = srcLength;
695     int32_t remainingDestCapacity = destCapacity;
696     int32_t labelLen = 0, labelReqLength = 0;
697     UBool done = FALSE;
698 
699 
700     for(;;){
701 
702         labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
703         labelReqLength = 0;
704         if(!(labelLen==0 && done)){// make sure this is not a root label separator.
705 
706             labelReqLength = _internal_toASCII( labelStart, labelLen,
707                                                 currentDest, remainingDestCapacity,
708                                                 options, nameprep,
709                                                 parseError, status);
710 
711             if(*status == U_BUFFER_OVERFLOW_ERROR){
712 
713                 *status = U_ZERO_ERROR; // reset error
714                 remainingDestCapacity = 0;
715             }
716         }
717 
718 
719         if(U_FAILURE(*status)){
720             break;
721         }
722 
723         reqLength +=labelReqLength;
724         // adjust the destination pointer
725         if(labelReqLength < remainingDestCapacity){
726             currentDest = currentDest + labelReqLength;
727             remainingDestCapacity -= labelReqLength;
728         }else{
729             // should never occur
730             remainingDestCapacity = 0;
731         }
732 
733         if(done == TRUE){
734             break;
735         }
736 
737         // add the label separator
738         if(remainingDestCapacity > 0){
739             *currentDest++ = FULL_STOP;
740             remainingDestCapacity--;
741         }
742         reqLength++;
743 
744         labelStart = delimiter;
745         if(remainingLen >0 ){
746             remainingLen = (int32_t)(srcLength - (delimiter - src));
747         }
748 
749     }
750 
751     if(reqLength > MAX_DOMAIN_NAME_LENGTH){
752         *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
753     }
754 
755     usprep_close(nameprep);
756 
757     return u_terminateUChars(dest, destCapacity, reqLength, status);
758 }
759 
760 U_CAPI int32_t U_EXPORT2
uidna_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)761 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
762                      UChar* dest, int32_t destCapacity,
763                      int32_t options,
764                      UParseError* parseError,
765                      UErrorCode* status){
766 
767     if(status == NULL || U_FAILURE(*status)){
768         return 0;
769     }
770     if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
771         *status = U_ILLEGAL_ARGUMENT_ERROR;
772         return 0;
773     }
774 
775     int32_t reqLength = 0;
776 
777     UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
778 
779     if(U_FAILURE(*status)){
780         return 0;
781     }
782 
783     //initialize pointers
784     UChar *delimiter = (UChar*)src;
785     UChar *labelStart = (UChar*)src;
786     UChar *currentDest = (UChar*) dest;
787     int32_t remainingLen = srcLength;
788     int32_t remainingDestCapacity = destCapacity;
789     int32_t labelLen = 0, labelReqLength = 0;
790     UBool done = FALSE;
791 
792     for(;;){
793 
794         labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
795 
796         // The RFC states that
797         // <quote>
798         // ToUnicode never fails. If any step fails, then the original input
799         // is returned immediately in that step.
800         // </quote>
801         // _internal_toUnicode will copy the label.
802         /*if(labelLen==0 && done==FALSE){
803             *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
804             break;
805         }*/
806 
807         labelReqLength = _internal_toUnicode(labelStart, labelLen,
808                                              currentDest, remainingDestCapacity,
809                                              options, nameprep,
810                                              parseError, status);
811 
812         if(*status == U_BUFFER_OVERFLOW_ERROR){
813             *status = U_ZERO_ERROR; // reset error
814             remainingDestCapacity = 0;
815         }
816 
817         if(U_FAILURE(*status)){
818             break;
819         }
820 
821         reqLength +=labelReqLength;
822         // adjust the destination pointer
823         if(labelReqLength < remainingDestCapacity){
824             currentDest = currentDest + labelReqLength;
825             remainingDestCapacity -= labelReqLength;
826         }else{
827             // should never occur
828             remainingDestCapacity = 0;
829         }
830 
831         if(done == TRUE){
832             break;
833         }
834 
835         // add the label separator
836         // Unlike the ToASCII operation we don't normalize the label separators
837         if(remainingDestCapacity > 0){
838             *currentDest++ = *(labelStart + labelLen);
839             remainingDestCapacity--;
840         }
841         reqLength++;
842 
843         labelStart = delimiter;
844         if(remainingLen >0 ){
845             remainingLen = (int32_t)(srcLength - (delimiter - src));
846         }
847 
848     }
849 
850     if(reqLength > MAX_DOMAIN_NAME_LENGTH){
851         *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
852     }
853 
854     usprep_close(nameprep);
855 
856     return u_terminateUChars(dest, destCapacity, reqLength, status);
857 }
858 
859 U_CAPI int32_t U_EXPORT2
uidna_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)860 uidna_compare(  const UChar *s1, int32_t length1,
861                 const UChar *s2, int32_t length2,
862                 int32_t options,
863                 UErrorCode* status){
864 
865     if(status == NULL || U_FAILURE(*status)){
866         return -1;
867     }
868 
869     UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
870     UChar *b1 = b1Stack, *b2 = b2Stack;
871     int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
872     int32_t result=-1;
873 
874     UParseError parseError;
875 
876     b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
877     if(*status == U_BUFFER_OVERFLOW_ERROR){
878         // redo processing of string
879         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
880         if(b1==NULL){
881             *status = U_MEMORY_ALLOCATION_ERROR;
882             goto CLEANUP;
883         }
884 
885         *status = U_ZERO_ERROR; // reset error
886 
887         b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
888 
889     }
890 
891     b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
892     if(*status == U_BUFFER_OVERFLOW_ERROR){
893         // redo processing of string
894         b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
895         if(b2==NULL){
896             *status = U_MEMORY_ALLOCATION_ERROR;
897             goto CLEANUP;
898         }
899 
900         *status = U_ZERO_ERROR; // reset error
901 
902         b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
903 
904     }
905     // when toASCII is applied all label separators are replaced with FULL_STOP
906     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
907 
908 CLEANUP:
909     if(b1 != b1Stack){
910         uprv_free(b1);
911     }
912 
913     if(b2 != b2Stack){
914         uprv_free(b2);
915     }
916 
917     return result;
918 }
919 
920 #endif /* #if !UCONFIG_NO_IDNA */
921