1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2003-2010, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 package android.icu.impl; 11 12 import android.icu.text.IDNA; 13 import android.icu.text.StringPrep; 14 import android.icu.text.StringPrepParseException; 15 import android.icu.text.UCharacterIterator; 16 17 /** 18 * IDNA2003 implementation code, moved out of android.icu.text.IDNA.java 19 * while extending that class to support IDNA2008/UTS #46 as well. 20 * @author Ram Viswanadha 21 * @hide Only a subset of ICU is exposed in Android 22 */ 23 public final class IDNA2003 { 24 /* IDNA ACE Prefix is "xn--" */ 25 private static char[] ACE_PREFIX = new char[]{ 0x0078,0x006E,0x002d,0x002d } ; 26 //private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length; 27 28 private static final int MAX_LABEL_LENGTH = 63; 29 private static final int HYPHEN = 0x002D; 30 private static final int CAPITAL_A = 0x0041; 31 private static final int CAPITAL_Z = 0x005A; 32 private static final int LOWER_CASE_DELTA = 0x0020; 33 private static final int FULL_STOP = 0x002E; 34 private static final int MAX_DOMAIN_NAME_LENGTH = 255; 35 36 // The NamePrep profile object 37 private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP); 38 startsWithPrefix(StringBuffer src)39 private static boolean startsWithPrefix(StringBuffer src){ 40 boolean startsWithPrefix = true; 41 42 if(src.length() < ACE_PREFIX.length){ 43 return false; 44 } 45 for(int i=0; i<ACE_PREFIX.length;i++){ 46 if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){ 47 startsWithPrefix = false; 48 } 49 } 50 return startsWithPrefix; 51 } 52 toASCIILower(char ch)53 private static char toASCIILower(char ch){ 54 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 55 return (char)(ch + LOWER_CASE_DELTA); 56 } 57 return ch; 58 } 59 toASCIILower(CharSequence src)60 private static StringBuffer toASCIILower(CharSequence src){ 61 StringBuffer dest = new StringBuffer(); 62 for(int i=0; i<src.length();i++){ 63 dest.append(toASCIILower(src.charAt(i))); 64 } 65 return dest; 66 } 67 compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2)68 private static int compareCaseInsensitiveASCII(StringBuffer s1, StringBuffer s2){ 69 char c1,c2; 70 int rc; 71 for(int i =0;/* no condition */;i++) { 72 /* If we reach the ends of both strings then they match */ 73 if(i == s1.length()) { 74 return 0; 75 } 76 77 c1 = s1.charAt(i); 78 c2 = s2.charAt(i); 79 80 /* Case-insensitive comparison */ 81 if(c1!=c2) { 82 rc=toASCIILower(c1)-toASCIILower(c2); 83 if(rc!=0) { 84 return rc; 85 } 86 } 87 } 88 } 89 getSeparatorIndex(char[] src,int start, int limit)90 private static int getSeparatorIndex(char[] src,int start, int limit){ 91 for(; start<limit;start++){ 92 if(isLabelSeparator(src[start])){ 93 return start; 94 } 95 } 96 // we have not found the separator just return length 97 return start; 98 } 99 100 /* 101 private static int getSeparatorIndex(UCharacterIterator iter){ 102 int currentIndex = iter.getIndex(); 103 int separatorIndex = 0; 104 int ch; 105 while((ch=iter.next())!= UCharacterIterator.DONE){ 106 if(isLabelSeparator(ch)){ 107 separatorIndex = iter.getIndex(); 108 iter.setIndex(currentIndex); 109 return separatorIndex; 110 } 111 } 112 // reset index 113 iter.setIndex(currentIndex); 114 // we have not found the separator just return the length 115 116 } 117 */ 118 119 isLDHChar(int ch)120 private static boolean isLDHChar(int ch){ 121 // high runner case 122 if(ch>0x007A){ 123 return false; 124 } 125 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 126 if( (ch==0x002D) || 127 (0x0030 <= ch && ch <= 0x0039) || 128 (0x0041 <= ch && ch <= 0x005A) || 129 (0x0061 <= ch && ch <= 0x007A) 130 ){ 131 return true; 132 } 133 return false; 134 } 135 136 /** 137 * Ascertain if the given code point is a label separator as 138 * defined by the IDNA RFC 139 * 140 * @param ch The code point to be ascertained 141 * @return true if the char is a label separator 142 */ isLabelSeparator(int ch)143 private static boolean isLabelSeparator(int ch){ 144 switch(ch){ 145 case 0x002e: 146 case 0x3002: 147 case 0xFF0E: 148 case 0xFF61: 149 return true; 150 default: 151 return false; 152 } 153 } 154 convertToASCII(UCharacterIterator src, int options)155 public static StringBuffer convertToASCII(UCharacterIterator src, int options) 156 throws StringPrepParseException{ 157 158 boolean[] caseFlags = null; 159 160 // the source contains all ascii codepoints 161 boolean srcIsASCII = true; 162 // assume the source contains all LDH codepoints 163 boolean srcIsLDH = true; 164 165 //get the options 166 boolean useSTD3ASCIIRules = ((options & IDNA.USE_STD3_RULES) != 0); 167 int ch; 168 // step 1 169 while((ch = src.next())!= UCharacterIterator.DONE){ 170 if(ch> 0x7f){ 171 srcIsASCII = false; 172 } 173 } 174 int failPos = -1; 175 src.setToStart(); 176 StringBuffer processOut = null; 177 // step 2 is performed only if the source contains non ASCII 178 if(!srcIsASCII){ 179 // step 2 180 processOut = namePrep.prepare(src, options); 181 }else{ 182 processOut = new StringBuffer(src.getText()); 183 } 184 int poLen = processOut.length(); 185 186 if(poLen==0){ 187 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 188 } 189 StringBuffer dest = new StringBuffer(); 190 191 // reset the variable to verify if output of prepare is ASCII or not 192 srcIsASCII = true; 193 194 // step 3 & 4 195 for(int j=0;j<poLen;j++ ){ 196 ch=processOut.charAt(j); 197 if(ch > 0x7F){ 198 srcIsASCII = false; 199 }else if(isLDHChar(ch)==false){ 200 // here we do not assemble surrogates 201 // since we know that LDH code points 202 // are in the ASCII range only 203 srcIsLDH = false; 204 failPos = j; 205 } 206 } 207 208 if(useSTD3ASCIIRules == true){ 209 // verify 3a and 3b 210 if( srcIsLDH == false /* source contains some non-LDH characters */ 211 || processOut.charAt(0) == HYPHEN 212 || processOut.charAt(processOut.length()-1) == HYPHEN){ 213 214 /* populate the parseError struct */ 215 if(srcIsLDH==false){ 216 throw new StringPrepParseException( "The input does not conform to the STD 3 ASCII rules", 217 StringPrepParseException.STD3_ASCII_RULES_ERROR, 218 processOut.toString(), 219 (failPos>0) ? (failPos-1) : failPos); 220 }else if(processOut.charAt(0) == HYPHEN){ 221 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 222 StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(),0); 223 224 }else{ 225 throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 226 StringPrepParseException.STD3_ASCII_RULES_ERROR, 227 processOut.toString(), 228 (poLen>0) ? poLen-1 : poLen); 229 230 } 231 } 232 } 233 if(srcIsASCII){ 234 dest = processOut; 235 }else{ 236 // step 5 : verify the sequence does not begin with ACE prefix 237 if(!startsWithPrefix(processOut)){ 238 239 //step 6: encode the sequence with punycode 240 caseFlags = new boolean[poLen]; 241 242 StringBuilder punyout = Punycode.encode(processOut,caseFlags); 243 244 // convert all codepoints to lower case ASCII 245 StringBuffer lowerOut = toASCIILower(punyout); 246 247 //Step 7: prepend the ACE prefix 248 dest.append(ACE_PREFIX,0,ACE_PREFIX.length); 249 //Step 6: copy the contents in b2 into dest 250 dest.append(lowerOut); 251 }else{ 252 253 throw new StringPrepParseException("The input does not start with the ACE Prefix.", 254 StringPrepParseException.ACE_PREFIX_ERROR,processOut.toString(),0); 255 } 256 } 257 if(dest.length() > MAX_LABEL_LENGTH){ 258 throw new StringPrepParseException("The labels in the input are too long. Length > 63.", 259 StringPrepParseException.LABEL_TOO_LONG_ERROR,dest.toString(),0); 260 } 261 return dest; 262 } 263 convertIDNToASCII(String src,int options)264 public static StringBuffer convertIDNToASCII(String src,int options) 265 throws StringPrepParseException{ 266 267 char[] srcArr = src.toCharArray(); 268 StringBuffer result = new StringBuffer(); 269 int sepIndex=0; 270 int oldSepIndex=0; 271 for(;;){ 272 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 273 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 274 //make sure this is not a root label separator. 275 if(!(label.length()==0 && sepIndex==srcArr.length)){ 276 UCharacterIterator iter = UCharacterIterator.getInstance(label); 277 result.append(convertToASCII(iter,options)); 278 } 279 if(sepIndex==srcArr.length){ 280 break; 281 } 282 283 // increment the sepIndex to skip past the separator 284 sepIndex++; 285 oldSepIndex = sepIndex; 286 result.append((char)FULL_STOP); 287 } 288 if(result.length() > MAX_DOMAIN_NAME_LENGTH){ 289 throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR); 290 } 291 return result; 292 } 293 convertToUnicode(UCharacterIterator src, int options)294 public static StringBuffer convertToUnicode(UCharacterIterator src, int options) 295 throws StringPrepParseException{ 296 297 boolean[] caseFlags = null; 298 299 // the source contains all ascii codepoints 300 boolean srcIsASCII = true; 301 // assume the source contains all LDH codepoints 302 //boolean srcIsLDH = true; 303 304 //get the options 305 //boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0); 306 307 //int failPos = -1; 308 int ch; 309 int saveIndex = src.getIndex(); 310 // step 1: find out if all the codepoints in src are ASCII 311 while((ch=src.next())!= UCharacterIterator.DONE){ 312 if(ch>0x7F){ 313 srcIsASCII = false; 314 }/*else if((srcIsLDH = isLDHChar(ch))==false){ 315 failPos = src.getIndex(); 316 }*/ 317 } 318 StringBuffer processOut; 319 320 if(srcIsASCII == false){ 321 try { 322 // step 2: process the string 323 src.setIndex(saveIndex); 324 processOut = namePrep.prepare(src,options); 325 } catch (StringPrepParseException ex) { 326 return new StringBuffer(src.getText()); 327 } 328 329 }else{ 330 //just point to source 331 processOut = new StringBuffer(src.getText()); 332 } 333 // TODO: 334 // The RFC states that 335 // <quote> 336 // ToUnicode never fails. If any step fails, then the original input 337 // is returned immediately in that step. 338 // </quote> 339 340 //step 3: verify ACE Prefix 341 if(startsWithPrefix(processOut)){ 342 StringBuffer decodeOut = null; 343 344 //step 4: Remove the ACE Prefix 345 String temp = processOut.substring(ACE_PREFIX.length,processOut.length()); 346 347 //step 5: Decode using punycode 348 try { 349 decodeOut = new StringBuffer(Punycode.decode(temp,caseFlags)); 350 } catch (StringPrepParseException e) { 351 decodeOut = null; 352 } 353 354 //step 6:Apply toASCII 355 if (decodeOut != null) { 356 StringBuffer toASCIIOut = convertToASCII(UCharacterIterator.getInstance(decodeOut), options); 357 358 //step 7: verify 359 if(compareCaseInsensitiveASCII(processOut, toASCIIOut) !=0){ 360 // throw new StringPrepParseException("The verification step prescribed by the RFC 3491 failed", 361 // StringPrepParseException.VERIFICATION_ERROR); 362 decodeOut = null; 363 } 364 } 365 366 //step 8: return output of step 5 367 if (decodeOut != null) { 368 return decodeOut; 369 } 370 } 371 372 // }else{ 373 // // verify that STD3 ASCII rules are satisfied 374 // if(useSTD3ASCIIRules == true){ 375 // if( srcIsLDH == false /* source contains some non-LDH characters */ 376 // || processOut.charAt(0) == HYPHEN 377 // || processOut.charAt(processOut.length()-1) == HYPHEN){ 378 // 379 // if(srcIsLDH==false){ 380 // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 381 // StringPrepParseException.STD3_ASCII_RULES_ERROR,processOut.toString(), 382 // (failPos>0) ? (failPos-1) : failPos); 383 // }else if(processOut.charAt(0) == HYPHEN){ 384 // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 385 // StringPrepParseException.STD3_ASCII_RULES_ERROR, 386 // processOut.toString(),0); 387 // 388 // }else{ 389 // throw new StringPrepParseException("The input does not conform to the STD 3 ASCII rules", 390 // StringPrepParseException.STD3_ASCII_RULES_ERROR, 391 // processOut.toString(), 392 // processOut.length()); 393 // 394 // } 395 // } 396 // } 397 // // just return the source 398 // return new StringBuffer(src.getText()); 399 // } 400 401 return new StringBuffer(src.getText()); 402 } 403 convertIDNToUnicode(String src, int options)404 public static StringBuffer convertIDNToUnicode(String src, int options) 405 throws StringPrepParseException{ 406 407 char[] srcArr = src.toCharArray(); 408 StringBuffer result = new StringBuffer(); 409 int sepIndex=0; 410 int oldSepIndex=0; 411 for(;;){ 412 sepIndex = getSeparatorIndex(srcArr,sepIndex,srcArr.length); 413 String label = new String(srcArr,oldSepIndex,sepIndex-oldSepIndex); 414 if(label.length()==0 && sepIndex!=srcArr.length ){ 415 throw new StringPrepParseException("Found zero length lable after NamePrep.",StringPrepParseException.ZERO_LENGTH_LABEL); 416 } 417 UCharacterIterator iter = UCharacterIterator.getInstance(label); 418 result.append(convertToUnicode(iter,options)); 419 if(sepIndex==srcArr.length){ 420 break; 421 } 422 // Unlike the ToASCII operation we don't normalize the label separators 423 result.append(srcArr[sepIndex]); 424 // increment the sepIndex to skip past the separator 425 sepIndex++; 426 oldSepIndex =sepIndex; 427 } 428 if(result.length() > MAX_DOMAIN_NAME_LENGTH){ 429 throw new StringPrepParseException("The output exceed the max allowed length.", StringPrepParseException.DOMAIN_NAME_TOO_LONG_ERROR); 430 } 431 return result; 432 } 433 compare(String s1, String s2, int options)434 public static int compare(String s1, String s2, int options) throws StringPrepParseException{ 435 StringBuffer s1Out = convertIDNToASCII(s1, options); 436 StringBuffer s2Out = convertIDNToASCII(s2, options); 437 return compareCaseInsensitiveASCII(s1Out,s2Out); 438 } 439 } 440