1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5 *******************************************************************************
6 * Copyright (C) 2003-2010, International Business Machines
7 * Corporation and others.  All Rights Reserved.
8 *******************************************************************************
9 */
10 package android.icu.impl;
11 
12 import android.icu.text.IDNA;
13 import android.icu.text.StringPrep;
14 import android.icu.text.StringPrepParseException;
15 import android.icu.text.UCharacterIterator;
16 
17 /**
18  * IDNA2003 implementation code, moved out of android.icu.text.IDNA.java
19  * while extending that class to support IDNA2008/UTS #46 as well.
20  * @author Ram Viswanadha
21  * @hide Only a subset of ICU is exposed in Android
22  */
23 public final class IDNA2003 {
24     /* IDNA ACE Prefix is "xn--" */
25     private static char[] ACE_PREFIX                = new char[]{ 0x0078,0x006E,0x002d,0x002d } ;
26     //private static final int ACE_PREFIX_LENGTH      = ACE_PREFIX.length;
27 
28     private static final int MAX_LABEL_LENGTH       = 63;
29     private static final int HYPHEN                 = 0x002D;
30     private static final int CAPITAL_A              = 0x0041;
31     private static final int CAPITAL_Z              = 0x005A;
32     private static final int LOWER_CASE_DELTA       = 0x0020;
33     private static final int FULL_STOP              = 0x002E;
34     private static final int MAX_DOMAIN_NAME_LENGTH = 255;
35 
36     // The NamePrep profile object
37     private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
38 
startsWithPrefix(StringBuffer src)39     private static boolean startsWithPrefix(StringBuffer src){
40         boolean startsWithPrefix = true;
41 
42         if(src.length() < ACE_PREFIX.length){
43             return false;
44         }
45         for(int i=0; i<ACE_PREFIX.length;i++){
46             if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
47                 startsWithPrefix = false;
48             }
49         }
50         return startsWithPrefix;
51     }
52 
toASCIILower(char ch)53     private static char toASCIILower(char ch){
54         if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
55             return (char)(ch + LOWER_CASE_DELTA);
56         }
57         return ch;
58     }
59 
toASCIILower(CharSequence src)60     private static StringBuffer toASCIILower(CharSequence src){
61         StringBuffer dest = new StringBuffer();
62         for(int i=0; i<src.length();i++){
63             dest.append(toASCIILower(src.charAt(i)));
64         }
65         return dest;
66     }
67 
compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2)68     private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){
69         char c1,c2;
70         int rc;
71         for(int i =0;/* no condition */;i++) {
72             /* If we reach the ends of both strings then they match */
73             if(i == s1.length()) {
74                 return 0;
75             }
76 
77             c1 = s1.charAt(i);
78             c2 = s2.charAt(i);
79 
80             /* Case-insensitive comparison */
81             if(c1!=c2) {
82                 rc=toASCIILower(c1)-toASCIILower(c2);
83                 if(rc!=0) {
84                     return rc;
85                 }
86             }
87         }
88     }
89 
getSeparatorIndex(char[] src,int start, int limit)90     private static int getSeparatorIndex(char[] src,int start, int limit){
91         for(; start<limit;start++){
92             if(isLabelSeparator(src[start])){
93                 return start;
94             }
95         }
96         // we have not found the separator just return length
97         return start;
98     }
99 
100     /*
101     private static int getSeparatorIndex(UCharacterIterator iter){
102         int currentIndex = iter.getIndex();
103         int separatorIndex = 0;
104         int ch;
105         while((ch=iter.next())!= UCharacterIterator.DONE){
106             if(isLabelSeparator(ch)){
107                 separatorIndex = iter.getIndex();
108                 iter.setIndex(currentIndex);
109                 return separatorIndex;
110             }
111         }
112         // reset index
113         iter.setIndex(currentIndex);
114         // we have not found the separator just return the length
115 
116     }
117     */
118 
119 
isLDHChar(int ch)120     private static boolean isLDHChar(int ch){
121         // high runner case
122         if(ch>0x007A){
123             return false;
124         }
125         //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
126         if( (ch==0x002D) ||
127             (0x0030 <= ch && ch <= 0x0039) ||
128             (0x0041 <= ch && ch <= 0x005A) ||
129             (0x0061 <= ch && ch <= 0x007A)
130           ){
131             return true;
132         }
133         return false;
134     }
135 
136     /**
137      * Ascertain if the given code point is a label separator as
138      * defined by the IDNA RFC
139      *
140      * @param ch The code point to be ascertained
141      * @return true if the char is a label separator
142      */
isLabelSeparator(int ch)143     private static boolean isLabelSeparator(int ch){
144         switch(ch){
145             case 0x002e:
146             case 0x3002:
147             case 0xFF0E:
148             case 0xFF61:
149                 return true;
150             default:
151                 return false;
152         }
153     }
154 
convertToASCII(UCharacterIterator src, int options)155     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
156             throws StringPrepParseException{
157 
158         boolean[] caseFlags = null;
159 
160         // the source contains all ascii codepoints
161         boolean srcIsASCII  = true;
162         // assume the source contains all LDH codepoints
163         boolean srcIsLDH = true;
164 
165         //get the options
166         boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0);
167         int ch;
168         // step 1
169         while((ch = src.next())!= UCharacterIterator.DONE){
170             if(ch> 0x7f){
171                 srcIsASCII = false;
172             }
173         }
174         int failPos = -1;
175         src.setToStart();
176         StringBuffer processOut = null;
177         // step 2 is performed only if the source contains non ASCII
178         if(!srcIsASCII){
179             // step 2
180             processOut = namePrep.prepare(src, options);
181         }else{
182             processOut = new StringBuffer(src.getText());
183         }
184         int poLen = processOut.length();
185 
186         if(poLen==0){
187             throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
188         }
189         StringBuffer dest = new StringBuffer();
190 
191         // reset the variable to verify if output of prepare is ASCII or not
192         srcIsASCII = true;
193 
194         // step 3 & 4
195         for(int j=0;j<poLen;j++ ){
196             ch=processOut.charAt(j);
197             if(ch > 0x7F){
198                 srcIsASCII = false;
199             }else if(isLDHChar(ch)==false){
200                 // here we do not assemble surrogates
201                 // since we know that LDH code points
202                 // are in the ASCII range only
203                 srcIsLDH = false;
204                 failPos = j;
205             }
206         }
207 
208         if(useSTD3ASCIIRules == true){
209             // verify 3a and 3b
210             if( srcIsLDH == false /* source contains some non-LDH characters */
211                 || processOut.charAt(0) ==  HYPHEN
212                 || processOut.charAt(processOut.length()-1) == HYPHEN){
213 
214                 /* populate the parseError struct */
215                 if(srcIsLDH==false){
216                      throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules",
217                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
218                                               processOut.toString(),
219                                              (failPos>0) ? (failPos-1) : failPos);
220                 }else if(processOut.charAt(0) == HYPHEN){
221                     throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
222                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0);
223 
224                 }else{
225                      throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
226                                               StringPrepParseException.STD3_ASCII_RULES_ERROR,
227                                               processOut.toString(),
228                                               (poLen>0) ? poLen-1 : poLen);
229 
230                 }
231             }
232         }
233         if(srcIsASCII){
234             dest =  processOut;
235         }else{
236             // step 5 : verify the sequence does not begin with ACE prefix
237             if(!startsWithPrefix(processOut)){
238 
239                 //step 6: encode the sequence with punycode
240                 caseFlags = new boolean[poLen];
241 
242                 StringBuilder punyout = Punycode.encode(processOut,caseFlags);
243 
244                 // convert all codepoints to lower case ASCII
245                 StringBuffer lowerOut = toASCIILower(punyout);
246 
247                 //Step 7: prepend the ACE prefix
248                 dest.append(ACE_PREFIX,0,ACE_PREFIX.length);
249                 //Step 6: copy the contents in b2 into dest
250                 dest.append(lowerOut);
251             }else{
252 
253                 throw new StringPrepParseException("The input does not start with the ACE Prefix.",
254                                          StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0);
255             }
256         }
257         if(dest.length() > MAX_LABEL_LENGTH){
258             throw new StringPrepParseException("The labels in the input are too long. Length > 63.",
259                                      StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0);
260         }
261         return dest;
262     }
263 
convertIDNToASCII(String src,int options)264     public static StringBuffer convertIDNToASCII(String src,int options)
265             throws StringPrepParseException{
266 
267         char[] srcArr = src.toCharArray();
268         StringBuffer result = new StringBuffer();
269         int sepIndex=0;
270         int oldSepIndex=0;
271         for(;;){
272             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
273             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
274             //make sure this is not a root label separator.
275             if(!(label.length()==0 && sepIndex==srcArr.length)){
276                 UCharacterIterator iter = UCharacterIterator.getInstance(label);
277                 result.append(convertToASCII(iter,options));
278             }
279             if(sepIndex==srcArr.length){
280                 break;
281             }
282 
283             // increment the sepIndex to skip past the separator
284             sepIndex++;
285             oldSepIndex = sepIndex;
286             result.append((char)FULL_STOP);
287         }
288         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
289             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
290         }
291         return result;
292     }
293 
convertToUnicode(UCharacterIterator src, int options)294     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
295             throws StringPrepParseException{
296 
297         boolean[] caseFlags = null;
298 
299         // the source contains all ascii codepoints
300         boolean srcIsASCII  = true;
301         // assume the source contains all LDH codepoints
302         //boolean srcIsLDH = true;
303 
304         //get the options
305         //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);
306 
307         //int failPos = -1;
308         int ch;
309         int saveIndex = src.getIndex();
310         // step 1: find out if all the codepoints in src are ASCII
311         while((ch=src.next())!= UCharacterIterator.DONE){
312             if(ch>0x7F){
313                 srcIsASCII = false;
314             }/*else if((srcIsLDH = isLDHChar(ch))==false){
315                 failPos = src.getIndex();
316             }*/
317         }
318         StringBuffer processOut;
319 
320         if(srcIsASCII == false){
321             try {
322                 // step 2: process the string
323                 src.setIndex(saveIndex);
324                 processOut = namePrep.prepare(src,options);
325             } catch (StringPrepParseException ex) {
326                 return new StringBuffer(src.getText());
327             }
328 
329         }else{
330             //just point to source
331             processOut = new StringBuffer(src.getText());
332         }
333         // TODO:
334         // The RFC states that
335         // <quote>
336         // ToUnicode never fails. If any step fails, then the original input
337         // is returned immediately in that step.
338         // </quote>
339 
340         //step 3: verify ACE Prefix
341         if(startsWithPrefix(processOut)){
342             StringBuffer decodeOut = null;
343 
344             //step 4: Remove the ACE Prefix
345             String temp = processOut.substring(ACE_PREFIX.length,processOut.length());
346 
347             //step 5: Decode using punycode
348             try {
349                 decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags));
350             } catch (StringPrepParseException e) {
351                 decodeOut = null;
352             }
353 
354             //step 6:Apply toASCII
355             if (decodeOut != null) {
356                 StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options);
357 
358                 //step 7: verify
359                 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){
360 //                    throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed",
361 //                                             StringPrepParseException.VERIFICATION_ERROR);
362                     decodeOut = null;
363                 }
364             }
365 
366             //step 8: return output of step 5
367              if (decodeOut != null) {
368                  return decodeOut;
369              }
370         }
371 
372 //        }else{
373 //            // verify that STD3 ASCII rules are satisfied
374 //            if(useSTD3ASCIIRules == true){
375 //                if( srcIsLDH == false /* source contains some non-LDH characters */
376 //                    || processOut.charAt(0) ==  HYPHEN
377 //                    || processOut.charAt(processOut.length()-1) == HYPHEN){
378 //
379 //                    if(srcIsLDH==false){
380 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
381 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),
382 //                                                 (failPos>0) ? (failPos-1) : failPos);
383 //                    }else if(processOut.charAt(0) == HYPHEN){
384 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
385 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
386 //                                                 processOut.toString(),0);
387 //
388 //                    }else{
389 //                        throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules",
390 //                                                 StringPrepParseException.STD3_ASCII_RULES_ERROR,
391 //                                                 processOut.toString(),
392 //                                                 processOut.length());
393 //
394 //                    }
395 //                }
396 //            }
397 //            // just return the source
398 //            return new StringBuffer(src.getText());
399 //        }
400 
401         return new StringBuffer(src.getText());
402     }
403 
convertIDNToUnicode(String src, int options)404     public static StringBuffer convertIDNToUnicode(String src, int options)
405             throws StringPrepParseException{
406 
407         char[] srcArr = src.toCharArray();
408         StringBuffer result = new StringBuffer();
409         int sepIndex=0;
410         int oldSepIndex=0;
411         for(;;){
412             sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length);
413             String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex);
414             if(label.length()==0 && sepIndex!=srcArr.length ){
415                 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL);
416             }
417             UCharacterIterator iter = UCharacterIterator.getInstance(label);
418             result.append(convertToUnicode(iter,options));
419             if(sepIndex==srcArr.length){
420                 break;
421             }
422             // Unlike the ToASCII operation we don't normalize the label separators
423             result.append(srcArr[sepIndex]);
424             // increment the sepIndex to skip past the separator
425             sepIndex++;
426             oldSepIndex =sepIndex;
427         }
428         if(result.length() > MAX_DOMAIN_NAME_LENGTH){
429             throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR);
430         }
431         return result;
432     }
433 
compare(String s1, String s2, int options)434     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
435         StringBuffer s1Out = convertIDNToASCII(s1, options);
436         StringBuffer s2Out = convertIDNToASCII(s2, options);
437         return compareCaseInsensitiveASCII(s1Out,s2Out);
438     }
439 }
440