1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2011, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  idnaref.cpp
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
22 #include "idnaref.h"
23 #include "punyref.h"
24 #include "ustr_imp.h"
25 #include "cmemory.h"
26 #include "sprpimpl.h"
27 #include "nptrans.h"
28 #include "testidna.h"
29 #include "punycode.h"
30 #include "unicode/ustring.h"
31 
32 /* it is official IDNA ACE Prefix is "xn--" */
33 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
34 #define ACE_PREFIX_LENGTH 4
35 
36 #define MAX_LABEL_LENGTH 63
37 #define HYPHEN      0x002D
38 /* The Max length of the labels should not be more than 64 */
39 #define MAX_LABEL_BUFFER_SIZE 100
40 #define MAX_IDN_BUFFER_SIZE   300
41 
42 #define CAPITAL_A        0x0041
43 #define CAPITAL_Z        0x005A
44 #define LOWER_CASE_DELTA 0x0020
45 #define FULL_STOP        0x002E
46 
47 
48 inline static UBool
startsWithPrefix(const UChar * src,int32_t srcLength)49 startsWithPrefix(const UChar* src , int32_t srcLength){
50     UBool startsWithPrefix = TRUE;
51 
52     if(srcLength < ACE_PREFIX_LENGTH){
53         return FALSE;
54     }
55 
56     for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
57         if(u_tolower(src[i]) != ACE_PREFIX[i]){
58             startsWithPrefix = FALSE;
59         }
60     }
61     return startsWithPrefix;
62 }
63 
64 inline static UChar
toASCIILower(UChar ch)65 toASCIILower(UChar ch){
66     if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
67         return ch + LOWER_CASE_DELTA;
68     }
69     return ch;
70 }
71 
72 inline static int32_t
compareCaseInsensitiveASCII(const UChar * s1,int32_t s1Len,const UChar * s2,int32_t s2Len)73 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
74                             const UChar* s2, int32_t s2Len){
75     if(s1Len != s2Len){
76         return (s1Len > s2Len) ? s1Len : s2Len;
77     }
78     UChar c1,c2;
79     int32_t rc;
80 
81     for(int32_t i =0;/* no condition */;i++) {
82         /* If we reach the ends of both strings then they match */
83         if(i == s1Len) {
84             return 0;
85         }
86 
87         c1 = s1[i];
88         c2 = s2[i];
89 
90         /* Case-insensitive comparison */
91         if(c1!=c2) {
92             rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
93             if(rc!=0) {
94                 return rc;
95             }
96         }
97     }
98 
99 }
100 
getError(enum punycode_status status)101 static UErrorCode getError(enum punycode_status status){
102     switch(status){
103     case punycode_success:
104         return U_ZERO_ERROR;
105     case punycode_bad_input:   /* Input is invalid.                         */
106         return U_INVALID_CHAR_FOUND;
107     case punycode_big_output:  /* Output would exceed the space provided.   */
108         return U_BUFFER_OVERFLOW_ERROR;
109     case punycode_overflow :    /* Input requires wider integers to process. */
110         return U_INDEX_OUTOFBOUNDS_ERROR;
111     default:
112         return U_INTERNAL_PROGRAM_ERROR;
113     }
114 }
115 
convertASCIIToUChars(const char * src,UChar * dest,int32_t length)116 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
117     int i;
118     for(i=0;i<length;i++){
119         dest[i] = src[i];
120     }
121     return i;
122 }
convertUCharsToASCII(const UChar * src,char * dest,int32_t length)123 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
124     int i;
125     for(i=0;i<length;i++){
126         dest[i] = (char)src[i];
127     }
128     return i;
129 }
130 // wrapper around the reference Punycode implementation
convertToPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)131 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
132                              UChar* dest, int32_t destCapacity,
133                              UErrorCode& status){
134     uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
135     int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
136     uint32_t* b1 = b1Stack;
137     char b2Stack[MAX_LABEL_BUFFER_SIZE];
138     char* b2 = b2Stack;
139     int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
140     punycode_status error;
141     unsigned char* caseFlags = NULL;
142 
143     u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
144     if(status == U_BUFFER_OVERFLOW_ERROR){
145         // redo processing of string
146         /* we do not have enough room so grow the buffer*/
147         b1 =  (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
148         if(b1==NULL){
149             status = U_MEMORY_ALLOCATION_ERROR;
150             goto CLEANUP;
151         }
152 
153         status = U_ZERO_ERROR; // reset error
154 
155         u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
156     }
157     if(U_FAILURE(status)){
158         goto CLEANUP;
159     }
160 
161     //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
162 
163     error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
164     status = getError(error);
165 
166     if(status == U_BUFFER_OVERFLOW_ERROR){
167         /* we do not have enough room so grow the buffer*/
168         b2 = (char*) uprv_malloc( b2Len * sizeof(char));
169         if(b2==NULL){
170             status = U_MEMORY_ALLOCATION_ERROR;
171             goto CLEANUP;
172         }
173 
174         status = U_ZERO_ERROR; // reset error
175 
176         punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
177         status = getError(error);
178     }
179     if(U_FAILURE(status)){
180         goto CLEANUP;
181     }
182 
183     if(b2Len < destCapacity){
184           convertASCIIToUChars(b2,dest,b2Len);
185     }else{
186         status =U_BUFFER_OVERFLOW_ERROR;
187     }
188 
189 CLEANUP:
190     if(b1Stack != b1){
191         uprv_free(b1);
192     }
193     if(b2Stack != b2){
194         uprv_free(b2);
195     }
196     uprv_free(caseFlags);
197 
198     return b2Len;
199 }
200 
convertFromPuny(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UErrorCode & status)201 static int32_t convertFromPuny(  const UChar* src, int32_t srcLength,
202                                  UChar* dest, int32_t destCapacity,
203                                  UErrorCode& status){
204     char b1Stack[MAX_LABEL_BUFFER_SIZE];
205     char* b1 = b1Stack;
206     int32_t destLen =0;
207 
208     convertUCharsToASCII(src, b1,srcLength);
209 
210     uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
211     uint32_t* b2 = b2Stack;
212     int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
213     unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
214     punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
215     status = getError(error);
216     if(status == U_BUFFER_OVERFLOW_ERROR){
217         b2 =  (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
218         if(b2 == NULL){
219             status = U_MEMORY_ALLOCATION_ERROR;
220             goto CLEANUP;
221         }
222         error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
223         status = getError(error);
224     }
225 
226     if(U_FAILURE(status)){
227         goto CLEANUP;
228     }
229 
230     u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
231 
232 CLEANUP:
233     if(b1Stack != b1){
234         uprv_free(b1);
235     }
236     if(b2Stack != b2){
237         uprv_free(b2);
238     }
239     uprv_free(caseFlags);
240 
241     return destLen;
242 }
243 
244 
245 U_CFUNC int32_t U_EXPORT2
idnaref_toASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)246 idnaref_toASCII(const UChar* src, int32_t srcLength,
247               UChar* dest, int32_t destCapacity,
248               int32_t options,
249               UParseError* parseError,
250               UErrorCode* status){
251 
252     if(status == NULL || U_FAILURE(*status)){
253         return 0;
254     }
255     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
256         *status = U_ILLEGAL_ARGUMENT_ERROR;
257         return 0;
258     }
259     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
260     //initialize pointers to stack buffers
261     UChar  *b1 = b1Stack, *b2 = b2Stack;
262     int32_t b1Len=0, b2Len=0,
263             b1Capacity = MAX_LABEL_BUFFER_SIZE,
264             b2Capacity = MAX_LABEL_BUFFER_SIZE ,
265             reqLength=0;
266 
267     //get the options
268     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
269     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
270 
271     UBool* caseFlags = NULL;
272 
273     // assume the source contains all ascii codepoints
274     UBool srcIsASCII  = TRUE;
275     // assume the source contains all LDH codepoints
276     UBool srcIsLDH = TRUE;
277     int32_t j=0;
278 
279     if(srcLength == -1){
280         srcLength = u_strlen(src);
281     }
282 
283     // step 1
284     for( j=0;j<srcLength;j++){
285         if(src[j] > 0x7F){
286             srcIsASCII = FALSE;
287         }
288         b1[b1Len++] = src[j];
289     }
290 
291     NamePrepTransform* prep = TestIDNA::getInstance(*status);
292     if(U_FAILURE(*status)){
293         goto CLEANUP;
294     }
295 
296     // step 2 is performed only if the source contains non ASCII
297     if (!srcIsASCII) {
298         b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
299 
300         if(*status == U_BUFFER_OVERFLOW_ERROR){
301             // redo processing of string
302             /* we do not have enough room so grow the buffer*/
303             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
304             if(b1==NULL){
305                 *status = U_MEMORY_ALLOCATION_ERROR;
306                 goto CLEANUP;
307             }
308 
309             *status = U_ZERO_ERROR; // reset error
310 
311             b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
312         }
313         // error bail out
314         if(U_FAILURE(*status)){
315             goto CLEANUP;
316         }
317     }
318 
319     if(b1Len == 0){
320         *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
321         goto CLEANUP;
322     }
323 
324     srcIsASCII = TRUE;
325     // step 3 & 4
326     for( j=0;j<b1Len;j++){
327         if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
328             srcIsASCII = FALSE;
329         }else if(prep->isLDHChar(b1[j])==FALSE){  // if the char is in ASCII range verify that it is an LDH character{
330             srcIsLDH = FALSE;
331         }
332     }
333 
334     if(useSTD3ASCIIRules == TRUE){
335         // verify 3a and 3b
336         if( srcIsLDH == FALSE /* source contains some non-LDH characters */
337             || b1[0] ==  HYPHEN || b1[b1Len-1] == HYPHEN){
338             *status = U_IDNA_STD3_ASCII_RULES_ERROR;
339             goto CLEANUP;
340         }
341     }
342     if(srcIsASCII){
343         if(b1Len <= destCapacity){
344             u_memmove(dest, b1, b1Len);
345             reqLength = b1Len;
346         }else{
347             reqLength = b1Len;
348             goto CLEANUP;
349         }
350     }else{
351         // step 5 : verify the sequence does not begin with ACE prefix
352         if(!startsWithPrefix(b1,b1Len)){
353 
354             //step 6: encode the sequence with punycode
355             //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
356 
357             b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
358             //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
359             if(*status == U_BUFFER_OVERFLOW_ERROR){
360                 // redo processing of string
361                 /* we do not have enough room so grow the buffer*/
362                 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
363                 if(b2 == NULL){
364                     *status = U_MEMORY_ALLOCATION_ERROR;
365                     goto CLEANUP;
366                 }
367 
368                 *status = U_ZERO_ERROR; // reset error
369 
370                 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
371                 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
372 
373             }
374             //error bail out
375             if(U_FAILURE(*status)){
376                 goto CLEANUP;
377             }
378             reqLength = b2Len+ACE_PREFIX_LENGTH;
379 
380             if(reqLength > destCapacity){
381                 *status = U_BUFFER_OVERFLOW_ERROR;
382                 goto CLEANUP;
383             }
384             //Step 7: prepend the ACE prefix
385             u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
386             //Step 6: copy the contents in b2 into dest
387             u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
388 
389         }else{
390             *status = U_IDNA_ACE_PREFIX_ERROR;
391             goto CLEANUP;
392         }
393     }
394 
395     if(reqLength > MAX_LABEL_LENGTH){
396         *status = U_IDNA_LABEL_TOO_LONG_ERROR;
397     }
398 
399 CLEANUP:
400     if(b1 != b1Stack){
401         uprv_free(b1);
402     }
403     if(b2 != b2Stack){
404         uprv_free(b2);
405     }
406     uprv_free(caseFlags);
407 
408 //    delete prep;
409 
410     return u_terminateUChars(dest, destCapacity, reqLength, status);
411 }
412 
413 
414 U_CFUNC int32_t U_EXPORT2
idnaref_toUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)415 idnaref_toUnicode(const UChar* src, int32_t srcLength,
416                 UChar* dest, int32_t destCapacity,
417                 int32_t options,
418                 UParseError* parseError,
419                 UErrorCode* status){
420 
421     if(status == NULL || U_FAILURE(*status)){
422         return 0;
423     }
424     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
425         *status = U_ILLEGAL_ARGUMENT_ERROR;
426         return 0;
427     }
428 
429 
430 
431     UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
432 
433     //initialize pointers to stack buffers
434     UChar  *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
435     int32_t b1Len, b2Len, b1PrimeLen, b3Len,
436             b1Capacity = MAX_LABEL_BUFFER_SIZE,
437             b2Capacity = MAX_LABEL_BUFFER_SIZE,
438             b3Capacity = MAX_LABEL_BUFFER_SIZE,
439             reqLength=0;
440 //    UParseError parseError;
441 
442     NamePrepTransform* prep = TestIDNA::getInstance(*status);
443     b1Len = 0;
444     UBool* caseFlags = NULL;
445 
446     //get the options
447     UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
448     UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
449 
450     UBool srcIsASCII = TRUE;
451     UBool srcIsLDH = TRUE;
452     int32_t failPos =0;
453 
454     if(U_FAILURE(*status)){
455         goto CLEANUP;
456     }
457     // step 1: find out if all the codepoints in src are ASCII
458     if(srcLength==-1){
459         srcLength = 0;
460         for(;src[srcLength]!=0;){
461             if(src[srcLength]> 0x7f){
462                 srcIsASCII = FALSE;
463             }if(prep->isLDHChar(src[srcLength])==FALSE){
464                 // here we do not assemble surrogates
465                 // since we know that LDH code points
466                 // are in the ASCII range only
467                 srcIsLDH = FALSE;
468                 failPos = srcLength;
469             }
470             srcLength++;
471         }
472     }else{
473         for(int32_t j=0; j<srcLength; j++){
474             if(src[j]> 0x7f){
475                 srcIsASCII = FALSE;
476             }else if(prep->isLDHChar(src[j])==FALSE){
477                 // here we do not assemble surrogates
478                 // since we know that LDH code points
479                 // are in the ASCII range only
480                 srcIsLDH = FALSE;
481                 failPos = j;
482             }
483         }
484     }
485 
486     if(srcIsASCII == FALSE){
487         // step 2: process the string
488         b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
489         if(*status == U_BUFFER_OVERFLOW_ERROR){
490             // redo processing of string
491             /* we do not have enough room so grow the buffer*/
492             b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
493             if(b1==NULL){
494                 *status = U_MEMORY_ALLOCATION_ERROR;
495                 goto CLEANUP;
496             }
497 
498             *status = U_ZERO_ERROR; // reset error
499 
500             b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
501         }
502         //bail out on error
503         if(U_FAILURE(*status)){
504             goto CLEANUP;
505         }
506     }else{
507 
508         // copy everything to b1
509         if(srcLength < b1Capacity){
510             u_memmove(b1, src, srcLength);
511         }else{
512             /* we do not have enough room so grow the buffer*/
513             b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
514             if(b1==NULL){
515                 *status = U_MEMORY_ALLOCATION_ERROR;
516                 goto CLEANUP;
517             }
518             u_memmove(b1, src, srcLength);
519         }
520         b1Len = srcLength;
521     }
522     //step 3: verify ACE Prefix
523     if(startsWithPrefix(src,srcLength)){
524 
525         //step 4: Remove the ACE Prefix
526         b1Prime = b1 + ACE_PREFIX_LENGTH;
527         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
528 
529         //step 5: Decode using punycode
530         b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
531         //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
532 
533         if(*status == U_BUFFER_OVERFLOW_ERROR){
534             // redo processing of string
535             /* we do not have enough room so grow the buffer*/
536             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
537             if(b2==NULL){
538                 *status = U_MEMORY_ALLOCATION_ERROR;
539                 goto CLEANUP;
540             }
541 
542             *status = U_ZERO_ERROR; // reset error
543 
544             b2Len =  convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
545             //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
546         }
547 
548 
549         //step 6:Apply toASCII
550         b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
551 
552         if(*status == U_BUFFER_OVERFLOW_ERROR){
553             // redo processing of string
554             /* we do not have enough room so grow the buffer*/
555             b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
556             if(b3==NULL){
557                 *status = U_MEMORY_ALLOCATION_ERROR;
558                 goto CLEANUP;
559             }
560 
561             *status = U_ZERO_ERROR; // reset error
562 
563             b3Len =  idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
564 
565         }
566         //bail out on error
567         if(U_FAILURE(*status)){
568             goto CLEANUP;
569         }
570 
571         //step 7: verify
572         if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
573             *status = U_IDNA_VERIFICATION_ERROR;
574             goto CLEANUP;
575         }
576 
577         //step 8: return output of step 5
578         reqLength = b2Len;
579         if(b2Len <= destCapacity) {
580             u_memmove(dest, b2, b2Len);
581         }
582     }else{
583         // verify that STD3 ASCII rules are satisfied
584         if(useSTD3ASCIIRules == TRUE){
585             if( srcIsLDH == FALSE /* source contains some non-LDH characters */
586                 || src[0] ==  HYPHEN || src[srcLength-1] == HYPHEN){
587                 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
588 
589                 /* populate the parseError struct */
590                 if(srcIsLDH==FALSE){
591                     // failPos is always set the index of failure
592                     uprv_syntaxError(src,failPos, srcLength,parseError);
593                 }else if(src[0] == HYPHEN){
594                     // fail position is 0
595                     uprv_syntaxError(src,0,srcLength,parseError);
596                 }else{
597                     // the last index in the source is always length-1
598                     uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
599                 }
600 
601                 goto CLEANUP;
602             }
603         }
604         //copy the source to destination
605         if(srcLength <= destCapacity){
606             u_memmove(dest, src, srcLength);
607         }
608         reqLength = srcLength;
609     }
610 
611 CLEANUP:
612 
613     if(b1 != b1Stack){
614         uprv_free(b1);
615     }
616     if(b2 != b2Stack){
617         uprv_free(b2);
618     }
619     uprv_free(caseFlags);
620 
621     // The RFC states that
622     // <quote>
623     // ToUnicode never fails. If any step fails, then the original input
624     // is returned immediately in that step.
625     // </quote>
626     // So if any step fails lets copy source to destination
627     if(U_FAILURE(*status)){
628         //copy the source to destination
629         if(dest && srcLength <= destCapacity){
630           if(srcLength == -1) {
631             u_memmove(dest, src, u_strlen(src));
632           } else {
633             u_memmove(dest, src, srcLength);
634           }
635         }
636         reqLength = srcLength;
637         *status = U_ZERO_ERROR;
638     }
639     return u_terminateUChars(dest, destCapacity, reqLength, status);
640 }
641 
642 
643 static int32_t
getNextSeparator(UChar * src,int32_t srcLength,NamePrepTransform * prep,UChar ** limit,UBool * done,UErrorCode * status)644 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
645                  UChar **limit,
646                  UBool *done,
647                  UErrorCode *status){
648     if(srcLength == -1){
649         int32_t i;
650         for(i=0 ; ;i++){
651             if(src[i] == 0){
652                 *limit = src + i; // point to null
653                 *done = TRUE;
654                 return i;
655             }
656             if(prep->isLabelSeparator(src[i],*status)){
657                 *limit = src + (i+1); // go past the delimiter
658                 return i;
659 
660             }
661         }
662     }else{
663         int32_t i;
664         for(i=0;i<srcLength;i++){
665             if(prep->isLabelSeparator(src[i],*status)){
666                 *limit = src + (i+1); // go past the delimiter
667                 return i;
668             }
669         }
670         // we have not found the delimiter
671         if(i==srcLength){
672             *limit = src+srcLength;
673             *done = TRUE;
674         }
675         return i;
676     }
677 }
678 
679 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToASCII(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)680 idnaref_IDNToASCII(  const UChar* src, int32_t srcLength,
681                    UChar* dest, int32_t destCapacity,
682                    int32_t options,
683                    UParseError* parseError,
684                    UErrorCode* status){
685 
686     if(status == NULL || U_FAILURE(*status)){
687         return 0;
688     }
689     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
690         *status = U_ILLEGAL_ARGUMENT_ERROR;
691         return 0;
692     }
693 
694     int32_t reqLength = 0;
695 //    UParseError parseError;
696 
697     NamePrepTransform* prep = TestIDNA::getInstance(*status);
698 
699     //initialize pointers to stack buffers
700     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
701     UChar  *b1 = b1Stack;
702     int32_t b1Len, labelLen;
703     UChar* delimiter = (UChar*)src;
704     UChar* labelStart = (UChar*)src;
705     int32_t remainingLen = srcLength;
706     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
707 
708     //get the options
709 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
710 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
711     UBool done = FALSE;
712 
713     if(U_FAILURE(*status)){
714         goto CLEANUP;
715     }
716 
717 
718     if(srcLength == -1){
719         for(;;){
720 
721             if(*delimiter == 0){
722                 break;
723             }
724 
725             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
726             b1Len = 0;
727             if(!(labelLen==0 && done)){// make sure this is not a root label separator.
728 
729                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
730                                         options, parseError, status);
731 
732                 if(*status == U_BUFFER_OVERFLOW_ERROR){
733                     // redo processing of string
734                     /* we do not have enough room so grow the buffer*/
735                     b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
736                     if(b1==NULL){
737                         *status = U_MEMORY_ALLOCATION_ERROR;
738                         goto CLEANUP;
739                     }
740 
741                     *status = U_ZERO_ERROR; // reset error
742 
743                     b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
744                                             options, parseError, status);
745 
746                 }
747             }
748 
749             if(U_FAILURE(*status)){
750                 goto CLEANUP;
751             }
752             int32_t tempLen = (reqLength + b1Len );
753             // copy to dest
754             if( tempLen< destCapacity){
755                 u_memmove(dest+reqLength, b1, b1Len);
756             }
757 
758             reqLength = tempLen;
759 
760             // add the label separator
761             if(done == FALSE){
762                 if(reqLength < destCapacity){
763                     dest[reqLength] = FULL_STOP;
764                 }
765                 reqLength++;
766             }
767 
768             labelStart = delimiter;
769         }
770     }else{
771         for(;;){
772 
773             if(delimiter == src+srcLength){
774                 break;
775             }
776 
777             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
778 
779             b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
780                                     options,parseError, status);
781 
782             if(*status == U_BUFFER_OVERFLOW_ERROR){
783                 // redo processing of string
784                 /* we do not have enough room so grow the buffer*/
785                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
786                 if(b1==NULL){
787                     *status = U_MEMORY_ALLOCATION_ERROR;
788                     goto CLEANUP;
789                 }
790 
791                 *status = U_ZERO_ERROR; // reset error
792 
793                 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
794                                         options, parseError, status);
795 
796             }
797 
798             if(U_FAILURE(*status)){
799                 goto CLEANUP;
800             }
801             int32_t tempLen = (reqLength + b1Len );
802             // copy to dest
803             if( tempLen< destCapacity){
804                 u_memmove(dest+reqLength, b1, b1Len);
805             }
806 
807             reqLength = tempLen;
808 
809             // add the label separator
810             if(done == FALSE){
811                 if(reqLength < destCapacity){
812                     dest[reqLength] = FULL_STOP;
813                 }
814                 reqLength++;
815             }
816 
817             labelStart = delimiter;
818             remainingLen = srcLength - (delimiter - src);
819         }
820     }
821 
822 
823 CLEANUP:
824 
825     if(b1 != b1Stack){
826         uprv_free(b1);
827     }
828 
829 //   delete prep;
830 
831     return u_terminateUChars(dest, destCapacity, reqLength, status);
832 }
833 
834 U_CFUNC int32_t U_EXPORT2
idnaref_IDNToUnicode(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,int32_t options,UParseError * parseError,UErrorCode * status)835 idnaref_IDNToUnicode(  const UChar* src, int32_t srcLength,
836                      UChar* dest, int32_t destCapacity,
837                      int32_t options,
838                      UParseError* parseError,
839                      UErrorCode* status){
840 
841     if(status == NULL || U_FAILURE(*status)){
842         return 0;
843     }
844     if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
845         *status = U_ILLEGAL_ARGUMENT_ERROR;
846         return 0;
847     }
848 
849     int32_t reqLength = 0;
850 
851     UBool done = FALSE;
852 
853     NamePrepTransform* prep = TestIDNA::getInstance(*status);
854 
855     //initialize pointers to stack buffers
856     UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
857     UChar  *b1 = b1Stack;
858     int32_t b1Len, labelLen;
859     UChar* delimiter = (UChar*)src;
860     UChar* labelStart = (UChar*)src;
861     int32_t remainingLen = srcLength;
862     int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
863 
864     //get the options
865 //    UBool allowUnassigned   = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
866 //    UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
867 
868     if(U_FAILURE(*status)){
869         goto CLEANUP;
870     }
871 
872     if(srcLength == -1){
873         for(;;){
874 
875             if(*delimiter == 0){
876                 break;
877             }
878 
879             labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
880 
881            if(labelLen==0 && done==FALSE){
882                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
883             }
884             b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
885                                       options, parseError, status);
886 
887             if(*status == U_BUFFER_OVERFLOW_ERROR){
888                 // redo processing of string
889                 /* we do not have enough room so grow the buffer*/
890                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
891                 if(b1==NULL){
892                     *status = U_MEMORY_ALLOCATION_ERROR;
893                     goto CLEANUP;
894                 }
895 
896                 *status = U_ZERO_ERROR; // reset error
897 
898                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
899                                            options, parseError, status);
900 
901             }
902 
903             if(U_FAILURE(*status)){
904                 goto CLEANUP;
905             }
906             int32_t tempLen = (reqLength + b1Len );
907             // copy to dest
908             if( tempLen< destCapacity){
909                 u_memmove(dest+reqLength, b1, b1Len);
910             }
911 
912             reqLength = tempLen;
913             // add the label separator
914             if(done == FALSE){
915                 if(reqLength < destCapacity){
916                     dest[reqLength] = FULL_STOP;
917                 }
918                 reqLength++;
919             }
920 
921             labelStart = delimiter;
922         }
923     }else{
924         for(;;){
925 
926             if(delimiter == src+srcLength){
927                 break;
928             }
929 
930             labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
931 
932             if(labelLen==0 && done==FALSE){
933                 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
934             }
935 
936             b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
937                                        options, parseError, status);
938 
939             if(*status == U_BUFFER_OVERFLOW_ERROR){
940                 // redo processing of string
941                 /* we do not have enough room so grow the buffer*/
942                 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
943                 if(b1==NULL){
944                     *status = U_MEMORY_ALLOCATION_ERROR;
945                     goto CLEANUP;
946                 }
947 
948                 *status = U_ZERO_ERROR; // reset error
949 
950                 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
951                                            options, parseError, status);
952 
953             }
954 
955             if(U_FAILURE(*status)){
956                 goto CLEANUP;
957             }
958             int32_t tempLen = (reqLength + b1Len );
959             // copy to dest
960             if( tempLen< destCapacity){
961                 u_memmove(dest+reqLength, b1, b1Len);
962             }
963 
964             reqLength = tempLen;
965 
966             // add the label separator
967             if(done == FALSE){
968                 if(reqLength < destCapacity){
969                     dest[reqLength] = FULL_STOP;
970                 }
971                 reqLength++;
972             }
973 
974             labelStart = delimiter;
975             remainingLen = srcLength - (delimiter - src);
976         }
977     }
978 
979 CLEANUP:
980 
981     if(b1 != b1Stack){
982         uprv_free(b1);
983     }
984 
985 //    delete prep;
986 
987     return u_terminateUChars(dest, destCapacity, reqLength, status);
988 }
989 
990 U_CFUNC int32_t U_EXPORT2
idnaref_compare(const UChar * s1,int32_t length1,const UChar * s2,int32_t length2,int32_t options,UErrorCode * status)991 idnaref_compare(  const UChar *s1, int32_t length1,
992                 const UChar *s2, int32_t length2,
993                 int32_t options,
994                 UErrorCode* status){
995 
996     if(status == NULL || U_FAILURE(*status)){
997         return -1;
998     }
999 
1000     UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
1001     UChar *b1 = b1Stack, *b2 = b2Stack;
1002     int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
1003     int32_t result = -1;
1004 
1005     UParseError parseError;
1006 
1007     b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
1008     if(*status == U_BUFFER_OVERFLOW_ERROR){
1009         // redo processing of string
1010         /* we do not have enough room so grow the buffer*/
1011         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
1012         if(b1==NULL){
1013             *status = U_MEMORY_ALLOCATION_ERROR;
1014             goto CLEANUP;
1015         }
1016 
1017         *status = U_ZERO_ERROR; // reset error
1018 
1019         b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
1020 
1021     }
1022 
1023     b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
1024     if(*status == U_BUFFER_OVERFLOW_ERROR){
1025         // redo processing of string
1026         /* we do not have enough room so grow the buffer*/
1027         b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
1028         if(b2==NULL){
1029             *status = U_MEMORY_ALLOCATION_ERROR;
1030             goto CLEANUP;
1031         }
1032 
1033         *status = U_ZERO_ERROR; // reset error
1034 
1035         b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
1036 
1037     }
1038     // when toASCII is applied all label separators are replaced with FULL_STOP
1039     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
1040 
1041 CLEANUP:
1042     if(b1 != b1Stack){
1043         uprv_free(b1);
1044     }
1045 
1046     if(b2 != b2Stack){
1047         uprv_free(b2);
1048     }
1049 
1050     return result;
1051 }
1052 #endif /* #if !UCONFIG_NO_IDNA */
1053