1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2003-2016, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 package android.icu.text; 12 13 import java.util.Collections; 14 import java.util.EnumSet; 15 import java.util.Set; 16 17 import android.icu.impl.IDNA2003; 18 import android.icu.impl.UTS46; 19 20 /** 21 * Abstract base class for IDNA processing. 22 * See http://www.unicode.org/reports/tr46/ 23 * and http://www.ietf.org/rfc/rfc3490.txt 24 * <p> 25 * The IDNA class is not intended for public subclassing. 26 * <p> 27 * The non-static methods implement UTS #46 and IDNA2008. 28 * IDNA2008 is implemented according to UTS #46, see getUTS46Instance(). 29 * <p> 30 * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated. 31 * <p> 32 * IDNA2003 API Overview: 33 * <p> 34 * The static IDNA API methods implement the IDNA protocol as defined in the 35 * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>. 36 * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels 37 * containing non-ASCII code points are required to be processed by 38 * ToASCII operation before passing it to resolver libraries. Domain names 39 * that are obtained from resolver libraries are required to be processed by 40 * ToUnicode operation before displaying the domain name to the user. 41 * IDNA requires that implementations process input strings with 42 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>, 43 * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> , 44 * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>. 45 * Implementations of IDNA MUST fully implement Nameprep and Punycode; 46 * neither Nameprep nor Punycode are optional. 47 * The input and output of ToASCII and ToUnicode operations are Unicode 48 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations 49 * multiple times to an input string will yield the same result as applying the operation 50 * once. 51 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 52 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). 53 * 54 * @author Ram Viswanadha, Markus Scherer 55 */ 56 public abstract class IDNA { 57 /** 58 * Default options value: None of the other options are set. 59 * For use in static worker and factory methods. 60 */ 61 public static final int DEFAULT = 0; 62 /** 63 * Option to allow unassigned code points in domain names and labels. 64 * For use in static worker and factory methods. 65 * <p>This option is ignored by the UTS46 implementation. 66 * (UTS #46 disallows unassigned code points.) 67 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 68 * @hide original deprecated declaration 69 */ 70 @Deprecated 71 public static final int ALLOW_UNASSIGNED = 1; 72 /** 73 * Option to check whether the input conforms to the STD3 ASCII rules, 74 * for example the restriction of labels to LDH characters 75 * (ASCII Letters, Digits and Hyphen-Minus). 76 * For use in static worker and factory methods. 77 */ 78 public static final int USE_STD3_RULES = 2; 79 /** 80 * IDNA option to check for whether the input conforms to the BiDi rules. 81 * For use in static worker and factory methods. 82 * <p>This option is ignored by the IDNA2003 implementation. 83 * (IDNA2003 always performs a BiDi check.) 84 */ 85 public static final int CHECK_BIDI = 4; 86 /** 87 * IDNA option to check for whether the input conforms to the CONTEXTJ rules. 88 * For use in static worker and factory methods. 89 * <p>This option is ignored by the IDNA2003 implementation. 90 * (The CONTEXTJ check is new in IDNA2008.) 91 */ 92 public static final int CHECK_CONTEXTJ = 8; 93 /** 94 * IDNA option for nontransitional processing in ToASCII(). 95 * For use in static worker and factory methods. 96 * <p>By default, ToASCII() uses transitional processing. 97 * <p>This option is ignored by the IDNA2003 implementation. 98 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 99 */ 100 public static final int NONTRANSITIONAL_TO_ASCII = 0x10; 101 /** 102 * IDNA option for nontransitional processing in ToUnicode(). 103 * For use in static worker and factory methods. 104 * <p>By default, ToUnicode() uses transitional processing. 105 * <p>This option is ignored by the IDNA2003 implementation. 106 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 107 */ 108 public static final int NONTRANSITIONAL_TO_UNICODE = 0x20; 109 /** 110 * IDNA option to check for whether the input conforms to the CONTEXTO rules. 111 * For use in static worker and factory methods. 112 * <p>This option is ignored by the IDNA2003 implementation. 113 * (The CONTEXTO check is new in IDNA2008.) 114 * <p>This is for use by registries for IDNA2008 conformance. 115 * UTS #46 does not require the CONTEXTO check. 116 */ 117 public static final int CHECK_CONTEXTO = 0x40; 118 119 /** 120 * Returns an IDNA instance which implements UTS #46. 121 * Returns an unmodifiable instance, owned by the caller. 122 * Cache it for multiple operations, and delete it when done. 123 * The instance is thread-safe, that is, it can be used concurrently. 124 * <p> 125 * UTS #46 defines Unicode IDNA Compatibility Processing, 126 * updated to the latest version of Unicode and compatible with both 127 * IDNA2003 and IDNA2008. 128 * <p> 129 * The worker functions use transitional processing, including deviation mappings, 130 * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE 131 * is used in which case the deviation characters are passed through without change. 132 * <p> 133 * Disallowed characters are mapped to U+FFFD. 134 * <p> 135 * Operations with the UTS #46 instance do not support the 136 * ALLOW_UNASSIGNED option. 137 * <p> 138 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 139 * When the USE_STD3_RULES option is used, ASCII characters other than 140 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 141 * 142 * @param options Bit set to modify the processing and error checking. 143 * @return the UTS #46 IDNA instance, if successful 144 */ getUTS46Instance(int options)145 public static IDNA getUTS46Instance(int options) { 146 return new UTS46(options); 147 } 148 149 /** 150 * Converts a single domain name label into its ASCII form for DNS lookup. 151 * If any processing step fails, then info.hasErrors() will be true and 152 * the result might not be an ASCII string. 153 * The label might be modified according to the types of errors. 154 * Labels with severe errors will be left in (or turned into) their Unicode form. 155 * 156 * @param label Input domain name label 157 * @param dest Destination string object 158 * @param info Output container of IDNA processing details. 159 * @return dest 160 */ labelToASCII(CharSequence label, StringBuilder dest, Info info)161 public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info); 162 163 /** 164 * Converts a single domain name label into its Unicode form for human-readable display. 165 * If any processing step fails, then info.hasErrors() will be true. 166 * The label might be modified according to the types of errors. 167 * 168 * @param label Input domain name label 169 * @param dest Destination string object 170 * @param info Output container of IDNA processing details. 171 * @return dest 172 */ labelToUnicode(CharSequence label, StringBuilder dest, Info info)173 public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info); 174 175 /** 176 * Converts a whole domain name into its ASCII form for DNS lookup. 177 * If any processing step fails, then info.hasErrors() will be true and 178 * the result might not be an ASCII string. 179 * The domain name might be modified according to the types of errors. 180 * Labels with severe errors will be left in (or turned into) their Unicode form. 181 * 182 * @param name Input domain name 183 * @param dest Destination string object 184 * @param info Output container of IDNA processing details. 185 * @return dest 186 */ nameToASCII(CharSequence name, StringBuilder dest, Info info)187 public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info); 188 189 /** 190 * Converts a whole domain name into its Unicode form for human-readable display. 191 * If any processing step fails, then info.hasErrors() will be true. 192 * The domain name might be modified according to the types of errors. 193 * 194 * @param name Input domain name 195 * @param dest Destination string object 196 * @param info Output container of IDNA processing details. 197 * @return dest 198 */ nameToUnicode(CharSequence name, StringBuilder dest, Info info)199 public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info); 200 201 /** 202 * Output container for IDNA processing errors. 203 * The Info class is not suitable for subclassing. 204 */ 205 public static final class Info { 206 /** 207 * Constructor. 208 */ Info()209 public Info() { 210 errors=EnumSet.noneOf(Error.class); 211 labelErrors=EnumSet.noneOf(Error.class); 212 isTransDiff=false; 213 isBiDi=false; 214 isOkBiDi=true; 215 } 216 /** 217 * Were there IDNA processing errors? 218 * @return true if there were processing errors 219 */ hasErrors()220 public boolean hasErrors() { return !errors.isEmpty(); } 221 /** 222 * Returns a set indicating IDNA processing errors. 223 * @return set of processing errors (modifiable, and not null) 224 */ getErrors()225 public Set<Error> getErrors() { return errors; } 226 /** 227 * Returns true if transitional and nontransitional processing produce different results. 228 * This is the case when the input label or domain name contains 229 * one or more deviation characters outside a Punycode label (see UTS #46). 230 * <ul> 231 * <li>With nontransitional processing, such characters are 232 * copied to the destination string. 233 * <li>With transitional processing, such characters are 234 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 235 * </ul> 236 * @return true if transitional and nontransitional processing produce different results 237 */ isTransitionalDifferent()238 public boolean isTransitionalDifferent() { return isTransDiff; } 239 reset()240 private void reset() { 241 errors.clear(); 242 labelErrors.clear(); 243 isTransDiff=false; 244 isBiDi=false; 245 isOkBiDi=true; 246 } 247 248 private EnumSet<Error> errors, labelErrors; 249 private boolean isTransDiff; 250 private boolean isBiDi; 251 private boolean isOkBiDi; 252 } 253 254 // The following protected methods give IDNA subclasses access to the private IDNAInfo fields. 255 // The IDNAInfo also provides intermediate state that is publicly invisible, 256 // avoiding the allocation of another worker object. 257 /** 258 * @deprecated This API is ICU internal only. 259 * @hide original deprecated declaration 260 * @hide draft / provisional / internal are hidden on Android 261 */ 262 @Deprecated resetInfo(Info info)263 protected static void resetInfo(Info info) { 264 info.reset(); 265 } 266 /** 267 * @deprecated This API is ICU internal only. 268 * @hide original deprecated declaration 269 * @hide draft / provisional / internal are hidden on Android 270 */ 271 @Deprecated hasCertainErrors(Info info, EnumSet<Error> errors)272 protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) { 273 return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors); 274 } 275 /** 276 * @deprecated This API is ICU internal only. 277 * @hide original deprecated declaration 278 * @hide draft / provisional / internal are hidden on Android 279 */ 280 @Deprecated hasCertainLabelErrors(Info info, EnumSet<Error> errors)281 protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) { 282 return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors); 283 } 284 /** 285 * @deprecated This API is ICU internal only. 286 * @hide original deprecated declaration 287 * @hide draft / provisional / internal are hidden on Android 288 */ 289 @Deprecated addLabelError(Info info, Error error)290 protected static void addLabelError(Info info, Error error) { 291 info.labelErrors.add(error); 292 } 293 /** 294 * @deprecated This API is ICU internal only. 295 * @hide original deprecated declaration 296 * @hide draft / provisional / internal are hidden on Android 297 */ 298 @Deprecated promoteAndResetLabelErrors(Info info)299 protected static void promoteAndResetLabelErrors(Info info) { 300 if(!info.labelErrors.isEmpty()) { 301 info.errors.addAll(info.labelErrors); 302 info.labelErrors.clear(); 303 } 304 } 305 /** 306 * @deprecated This API is ICU internal only. 307 * @hide original deprecated declaration 308 * @hide draft / provisional / internal are hidden on Android 309 */ 310 @Deprecated addError(Info info, Error error)311 protected static void addError(Info info, Error error) { 312 info.errors.add(error); 313 } 314 /** 315 * @deprecated This API is ICU internal only. 316 * @hide original deprecated declaration 317 * @hide draft / provisional / internal are hidden on Android 318 */ 319 @Deprecated setTransitionalDifferent(Info info)320 protected static void setTransitionalDifferent(Info info) { 321 info.isTransDiff=true; 322 } 323 /** 324 * @deprecated This API is ICU internal only. 325 * @hide original deprecated declaration 326 * @hide draft / provisional / internal are hidden on Android 327 */ 328 @Deprecated setBiDi(Info info)329 protected static void setBiDi(Info info) { 330 info.isBiDi=true; 331 } 332 /** 333 * @deprecated This API is ICU internal only. 334 * @hide original deprecated declaration 335 * @hide draft / provisional / internal are hidden on Android 336 */ 337 @Deprecated isBiDi(Info info)338 protected static boolean isBiDi(Info info) { 339 return info.isBiDi; 340 } 341 /** 342 * @deprecated This API is ICU internal only. 343 * @hide original deprecated declaration 344 * @hide draft / provisional / internal are hidden on Android 345 */ 346 @Deprecated setNotOkBiDi(Info info)347 protected static void setNotOkBiDi(Info info) { 348 info.isOkBiDi=false; 349 } 350 /** 351 * @deprecated This API is ICU internal only. 352 * @hide original deprecated declaration 353 * @hide draft / provisional / internal are hidden on Android 354 */ 355 @Deprecated isOkBiDi(Info info)356 protected static boolean isOkBiDi(Info info) { 357 return info.isOkBiDi; 358 } 359 360 /** 361 * IDNA error bit set values. 362 * When a domain name or label fails a processing step or does not meet the 363 * validity criteria, then one or more of these error bits are set. 364 */ 365 public static enum Error { 366 /** 367 * A non-final domain name label (or the whole domain name) is empty. 368 */ 369 EMPTY_LABEL, 370 /** 371 * A domain name label is longer than 63 bytes. 372 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 373 * This is only checked in ToASCII operations, and only if the output label is all-ASCII. 374 */ 375 LABEL_TOO_LONG, 376 /** 377 * A domain name is longer than 255 bytes in its storage form. 378 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 379 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. 380 */ 381 DOMAIN_NAME_TOO_LONG, 382 /** 383 * A label starts with a hyphen-minus ('-'). 384 */ 385 LEADING_HYPHEN, 386 /** 387 * A label ends with a hyphen-minus ('-'). 388 */ 389 TRAILING_HYPHEN, 390 /** 391 * A label contains hyphen-minus ('-') in the third and fourth positions. 392 */ 393 HYPHEN_3_4, 394 /** 395 * A label starts with a combining mark. 396 */ 397 LEADING_COMBINING_MARK, 398 /** 399 * A label or domain name contains disallowed characters. 400 */ 401 DISALLOWED, 402 /** 403 * A label starts with "xn--" but does not contain valid Punycode. 404 * That is, an xn-- label failed Punycode decoding. 405 */ 406 PUNYCODE, 407 /** 408 * A label contains a dot=full stop. 409 * This can occur in an input string for a single-label function. 410 */ 411 LABEL_HAS_DOT, 412 /** 413 * An ACE label does not contain a valid label string. 414 * The label was successfully ACE (Punycode) decoded but the resulting 415 * string had severe validation errors. For example, 416 * it might contain characters that are not allowed in ACE labels, 417 * or it might not be normalized. 418 */ 419 INVALID_ACE_LABEL, 420 /** 421 * A label does not meet the IDNA BiDi requirements (for right-to-left characters). 422 */ 423 BIDI, 424 /** 425 * A label does not meet the IDNA CONTEXTJ requirements. 426 */ 427 CONTEXTJ, 428 /** 429 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. 430 * Some punctuation characters "Would otherwise have been DISALLOWED" 431 * but are allowed in certain contexts. (RFC 5892) 432 */ 433 CONTEXTO_PUNCTUATION, 434 /** 435 * A label does not meet the IDNA CONTEXTO requirements for digits. 436 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). 437 */ 438 CONTEXTO_DIGITS 439 } 440 441 /** 442 * Sole constructor. (For invocation by subclass constructors, typically implicit.) 443 * @deprecated This API is ICU internal only. 444 * @hide original deprecated declaration 445 * @hide draft / provisional / internal are hidden on Android 446 */ 447 @Deprecated IDNA()448 protected IDNA() { 449 } 450 451 /* IDNA2003 API ------------------------------------------------------------- */ 452 453 /** 454 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 455 * This operation is done on <b>single labels</b> before sending it to something that expects 456 * ASCII names. A label is an individual part of a domain name. Labels are usually 457 * separated by dots; e.g." "www.example.com" is composed of 3 labels 458 * "www","example", and "com". 459 * 460 * @param src The input string to be processed 461 * @param options A bit set of options: 462 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 463 * and do not use STD3 ASCII rules 464 * If unassigned code points are found the operation fails with 465 * StringPrepParseException. 466 * 467 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 468 * If this option is set, the unassigned code points are in the input 469 * are treated as normal Unicode code points. 470 * 471 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 472 * If this option is set and the input does not satisfy STD3 rules, 473 * the operation will fail with ParseException 474 * @return StringBuffer the converted String 475 * @throws StringPrepParseException When an error occurs for parsing a string. 476 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 477 * @hide original deprecated declaration 478 */ 479 @Deprecated convertToASCII(String src, int options)480 public static StringBuffer convertToASCII(String src, int options) 481 throws StringPrepParseException{ 482 UCharacterIterator iter = UCharacterIterator.getInstance(src); 483 return convertToASCII(iter,options); 484 } 485 486 /** 487 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 488 * This operation is done on <b>single labels</b> before sending it to something that expects 489 * ASCII names. A label is an individual part of a domain name. Labels are usually 490 * separated by dots; e.g." "www.example.com" is composed of 3 labels 491 * "www","example", and "com". 492 * 493 * @param src The input string as StringBuffer to be processed 494 * @param options A bit set of options: 495 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 496 * and do not use STD3 ASCII rules 497 * If unassigned code points are found the operation fails with 498 * ParseException. 499 * 500 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 501 * If this option is set, the unassigned code points are in the input 502 * are treated as normal Unicode code points. 503 * 504 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 505 * If this option is set and the input does not satisfy STD3 rules, 506 * the operation will fail with ParseException 507 * @return StringBuffer the converted String 508 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 509 * @hide original deprecated declaration 510 */ 511 @Deprecated convertToASCII(StringBuffer src, int options)512 public static StringBuffer convertToASCII(StringBuffer src, int options) 513 throws StringPrepParseException{ 514 UCharacterIterator iter = UCharacterIterator.getInstance(src); 515 return convertToASCII(iter,options); 516 } 517 518 /** 519 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 520 * This operation is done on <b>single labels</b> before sending it to something that expects 521 * ASCII names. A label is an individual part of a domain name. Labels are usually 522 * separated by dots; e.g." "www.example.com" is composed of 3 labels 523 * "www","example", and "com". 524 * 525 * @param src The input string as UCharacterIterator to be processed 526 * @param options A bit set of options: 527 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 528 * and do not use STD3 ASCII rules 529 * If unassigned code points are found the operation fails with 530 * ParseException. 531 * 532 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 533 * If this option is set, the unassigned code points are in the input 534 * are treated as normal Unicode code points. 535 * 536 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 537 * If this option is set and the input does not satisfy STD3 rules, 538 * the operation will fail with ParseException 539 * @return StringBuffer the converted String 540 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 541 * @hide original deprecated declaration 542 */ 543 @Deprecated convertToASCII(UCharacterIterator src, int options)544 public static StringBuffer convertToASCII(UCharacterIterator src, int options) 545 throws StringPrepParseException{ 546 return IDNA2003.convertToASCII(src, options); 547 } 548 549 /** 550 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 551 * This operation is done on complete domain names, e.g: "www.example.com". 552 * It is important to note that this operation can fail. If it fails, then the input 553 * domain name cannot be used as an Internationalized Domain Name and the application 554 * should have methods defined to deal with the failure. 555 * 556 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 557 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 558 * and then convert. This function does not offer that level of granularity. The options once 559 * set will apply to all labels in the domain name 560 * 561 * @param src The input string as UCharacterIterator to be processed 562 * @param options A bit set of options: 563 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 564 * and do not use STD3 ASCII rules 565 * If unassigned code points are found the operation fails with 566 * ParseException. 567 * 568 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 569 * If this option is set, the unassigned code points are in the input 570 * are treated as normal Unicode code points. 571 * 572 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 573 * If this option is set and the input does not satisfy STD3 rules, 574 * the operation will fail with ParseException 575 * @return StringBuffer the converted String 576 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 577 * @hide original deprecated declaration 578 */ 579 @Deprecated convertIDNToASCII(UCharacterIterator src, int options)580 public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options) 581 throws StringPrepParseException{ 582 return convertIDNToASCII(src.getText(), options); 583 } 584 585 /** 586 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 587 * This operation is done on complete domain names, e.g: "www.example.com". 588 * It is important to note that this operation can fail. If it fails, then the input 589 * domain name cannot be used as an Internationalized Domain Name and the application 590 * should have methods defined to deal with the failure. 591 * 592 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 593 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 594 * and then convert. This function does not offer that level of granularity. The options once 595 * set will apply to all labels in the domain name 596 * 597 * @param src The input string as a StringBuffer to be processed 598 * @param options A bit set of options: 599 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 600 * and do not use STD3 ASCII rules 601 * If unassigned code points are found the operation fails with 602 * ParseException. 603 * 604 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 605 * If this option is set, the unassigned code points are in the input 606 * are treated as normal Unicode code points. 607 * 608 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 609 * If this option is set and the input does not satisfy STD3 rules, 610 * the operation will fail with ParseException 611 * @return StringBuffer the converted String 612 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 613 * @hide original deprecated declaration 614 */ 615 @Deprecated convertIDNToASCII(StringBuffer src, int options)616 public static StringBuffer convertIDNToASCII(StringBuffer src, int options) 617 throws StringPrepParseException{ 618 return convertIDNToASCII(src.toString(), options); 619 } 620 621 /** 622 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 623 * This operation is done on complete domain names, e.g: "www.example.com". 624 * It is important to note that this operation can fail. If it fails, then the input 625 * domain name cannot be used as an Internationalized Domain Name and the application 626 * should have methods defined to deal with the failure. 627 * 628 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 629 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 630 * and then convert. This function does not offer that level of granularity. The options once 631 * set will apply to all labels in the domain name 632 * 633 * @param src The input string to be processed 634 * @param options A bit set of options: 635 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 636 * and do not use STD3 ASCII rules 637 * If unassigned code points are found the operation fails with 638 * ParseException. 639 * 640 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 641 * If this option is set, the unassigned code points are in the input 642 * are treated as normal Unicode code points. 643 * 644 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 645 * If this option is set and the input does not satisfy STD3 rules, 646 * the operation will fail with ParseException 647 * @return StringBuffer the converted String 648 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 649 * @hide original deprecated declaration 650 */ 651 @Deprecated convertIDNToASCII(String src,int options)652 public static StringBuffer convertIDNToASCII(String src,int options) 653 throws StringPrepParseException{ 654 return IDNA2003.convertIDNToASCII(src, options); 655 } 656 657 658 /** 659 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 660 * This operation is done on <b>single labels</b> before sending it to something that expects 661 * Unicode names. A label is an individual part of a domain name. Labels are usually 662 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 663 * "www","example", and "com". 664 * 665 * @param src The input string to be processed 666 * @param options A bit set of options: 667 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 668 * and do not use STD3 ASCII rules 669 * If unassigned code points are found the operation fails with 670 * ParseException. 671 * 672 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 673 * If this option is set, the unassigned code points are in the input 674 * are treated as normal Unicode code points. 675 * 676 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 677 * If this option is set and the input does not satisfy STD3 rules, 678 * the operation will fail with ParseException 679 * @return StringBuffer the converted String 680 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 681 * @hide original deprecated declaration 682 */ 683 @Deprecated convertToUnicode(String src, int options)684 public static StringBuffer convertToUnicode(String src, int options) 685 throws StringPrepParseException{ 686 UCharacterIterator iter = UCharacterIterator.getInstance(src); 687 return convertToUnicode(iter,options); 688 } 689 690 /** 691 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 692 * This operation is done on <b>single labels</b> before sending it to something that expects 693 * Unicode names. A label is an individual part of a domain name. Labels are usually 694 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 695 * "www","example", and "com". 696 * 697 * @param src The input string as StringBuffer to be processed 698 * @param options A bit set of options: 699 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 700 * and do not use STD3 ASCII rules 701 * If unassigned code points are found the operation fails with 702 * ParseException. 703 * 704 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 705 * If this option is set, the unassigned code points are in the input 706 * are treated as normal Unicode code points. 707 * 708 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 709 * If this option is set and the input does not satisfy STD3 rules, 710 * the operation will fail with ParseException 711 * @return StringBuffer the converted String 712 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 713 * @hide original deprecated declaration 714 */ 715 @Deprecated convertToUnicode(StringBuffer src, int options)716 public static StringBuffer convertToUnicode(StringBuffer src, int options) 717 throws StringPrepParseException{ 718 UCharacterIterator iter = UCharacterIterator.getInstance(src); 719 return convertToUnicode(iter,options); 720 } 721 722 /** 723 * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC. 724 * This operation is done on <b>single labels</b> before sending it to something that expects 725 * Unicode names. A label is an individual part of a domain name. Labels are usually 726 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 727 * "www","example", and "com". 728 * 729 * @param src The input string as UCharacterIterator to be processed 730 * @param options A bit set of options: 731 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 732 * and do not use STD3 ASCII rules 733 * If unassigned code points are found the operation fails with 734 * ParseException. 735 * 736 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 737 * If this option is set, the unassigned code points are in the input 738 * are treated as normal Unicode code points. 739 * 740 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 741 * If this option is set and the input does not satisfy STD3 rules, 742 * the operation will fail with ParseException 743 * @return StringBuffer the converted String 744 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 745 * @hide original deprecated declaration 746 */ 747 @Deprecated convertToUnicode(UCharacterIterator src, int options)748 public static StringBuffer convertToUnicode(UCharacterIterator src, int options) 749 throws StringPrepParseException{ 750 return IDNA2003.convertToUnicode(src, options); 751 } 752 753 /** 754 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 755 * This operation is done on complete domain names, e.g: "www.example.com". 756 * 757 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 758 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 759 * and then convert. This function does not offer that level of granularity. The options once 760 * set will apply to all labels in the domain name 761 * 762 * @param src The input string as UCharacterIterator to be processed 763 * @param options A bit set of options: 764 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 765 * and do not use STD3 ASCII rules 766 * If unassigned code points are found the operation fails with 767 * ParseException. 768 * 769 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 770 * If this option is set, the unassigned code points are in the input 771 * are treated as normal Unicode code points. 772 * 773 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 774 * If this option is set and the input does not satisfy STD3 rules, 775 * the operation will fail with ParseException 776 * @return StringBuffer the converted String 777 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 778 * @hide original deprecated declaration 779 */ 780 @Deprecated convertIDNToUnicode(UCharacterIterator src, int options)781 public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options) 782 throws StringPrepParseException{ 783 return convertIDNToUnicode(src.getText(), options); 784 } 785 786 /** 787 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 788 * This operation is done on complete domain names, e.g: "www.example.com". 789 * 790 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 791 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 792 * and then convert. This function does not offer that level of granularity. The options once 793 * set will apply to all labels in the domain name 794 * 795 * @param src The input string as StringBuffer to be processed 796 * @param options A bit set of options: 797 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 798 * and do not use STD3 ASCII rules 799 * If unassigned code points are found the operation fails with 800 * ParseException. 801 * 802 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 803 * If this option is set, the unassigned code points are in the input 804 * are treated as normal Unicode code points. 805 * 806 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 807 * If this option is set and the input does not satisfy STD3 rules, 808 * the operation will fail with ParseException 809 * @return StringBuffer the converted String 810 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 811 * @hide original deprecated declaration 812 */ 813 @Deprecated convertIDNToUnicode(StringBuffer src, int options)814 public static StringBuffer convertIDNToUnicode(StringBuffer src, int options) 815 throws StringPrepParseException{ 816 return convertIDNToUnicode(src.toString(), options); 817 } 818 819 /** 820 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 821 * This operation is done on complete domain names, e.g: "www.example.com". 822 * 823 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 824 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 825 * and then convert. This function does not offer that level of granularity. The options once 826 * set will apply to all labels in the domain name 827 * 828 * @param src The input string to be processed 829 * @param options A bit set of options: 830 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 831 * and do not use STD3 ASCII rules 832 * If unassigned code points are found the operation fails with 833 * ParseException. 834 * 835 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 836 * If this option is set, the unassigned code points are in the input 837 * are treated as normal Unicode code points. 838 * 839 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 840 * If this option is set and the input does not satisfy STD3 rules, 841 * the operation will fail with ParseException 842 * @return StringBuffer the converted String 843 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 844 * @hide original deprecated declaration 845 */ 846 @Deprecated convertIDNToUnicode(String src, int options)847 public static StringBuffer convertIDNToUnicode(String src, int options) 848 throws StringPrepParseException{ 849 return IDNA2003.convertIDNToUnicode(src, options); 850 } 851 852 /** 853 * IDNA2003: Compare two IDN strings for equivalence. 854 * This function splits the domain names into labels and compares them. 855 * According to IDN RFC, whenever two labels are compared, they are 856 * considered equal if and only if their ASCII forms (obtained by 857 * applying toASCII) match using an case-insensitive ASCII comparison. 858 * Two domain names are considered a match if and only if all labels 859 * match regardless of whether label separators match. 860 * 861 * @param s1 First IDN string as StringBuffer 862 * @param s2 Second IDN string as StringBuffer 863 * @param options A bit set of options: 864 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 865 * and do not use STD3 ASCII rules 866 * If unassigned code points are found the operation fails with 867 * ParseException. 868 * 869 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 870 * If this option is set, the unassigned code points are in the input 871 * are treated as normal Unicode code points. 872 * 873 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 874 * If this option is set and the input does not satisfy STD3 rules, 875 * the operation will fail with ParseException 876 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 877 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 878 * @hide original deprecated declaration 879 */ 880 @Deprecated compare(StringBuffer s1, StringBuffer s2, int options)881 public static int compare(StringBuffer s1, StringBuffer s2, int options) 882 throws StringPrepParseException{ 883 if(s1==null || s2 == null){ 884 throw new IllegalArgumentException("One of the source buffers is null"); 885 } 886 return IDNA2003.compare(s1.toString(), s2.toString(), options); 887 } 888 889 /** 890 * IDNA2003: Compare two IDN strings for equivalence. 891 * This function splits the domain names into labels and compares them. 892 * According to IDN RFC, whenever two labels are compared, they are 893 * considered equal if and only if their ASCII forms (obtained by 894 * applying toASCII) match using an case-insensitive ASCII comparison. 895 * Two domain names are considered a match if and only if all labels 896 * match regardless of whether label separators match. 897 * 898 * @param s1 First IDN string 899 * @param s2 Second IDN string 900 * @param options A bit set of options: 901 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 902 * and do not use STD3 ASCII rules 903 * If unassigned code points are found the operation fails with 904 * ParseException. 905 * 906 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 907 * If this option is set, the unassigned code points are in the input 908 * are treated as normal Unicode code points. 909 * 910 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 911 * If this option is set and the input does not satisfy STD3 rules, 912 * the operation will fail with ParseException 913 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 914 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 915 * @hide original deprecated declaration 916 */ 917 @Deprecated compare(String s1, String s2, int options)918 public static int compare(String s1, String s2, int options) throws StringPrepParseException{ 919 if(s1==null || s2 == null){ 920 throw new IllegalArgumentException("One of the source buffers is null"); 921 } 922 return IDNA2003.compare(s1, s2, options); 923 } 924 /** 925 * IDNA2003: Compare two IDN strings for equivalence. 926 * This function splits the domain names into labels and compares them. 927 * According to IDN RFC, whenever two labels are compared, they are 928 * considered equal if and only if their ASCII forms (obtained by 929 * applying toASCII) match using an case-insensitive ASCII comparison. 930 * Two domain names are considered a match if and only if all labels 931 * match regardless of whether label separators match. 932 * 933 * @param s1 First IDN string as UCharacterIterator 934 * @param s2 Second IDN string as UCharacterIterator 935 * @param options A bit set of options: 936 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 937 * and do not use STD3 ASCII rules 938 * If unassigned code points are found the operation fails with 939 * ParseException. 940 * 941 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 942 * If this option is set, the unassigned code points are in the input 943 * are treated as normal Unicode code points. 944 * 945 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 946 * If this option is set and the input does not satisfy STD3 rules, 947 * the operation will fail with ParseException 948 * @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2 949 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 950 * @hide original deprecated declaration 951 */ 952 @Deprecated compare(UCharacterIterator s1, UCharacterIterator s2, int options)953 public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options) 954 throws StringPrepParseException{ 955 if(s1==null || s2 == null){ 956 throw new IllegalArgumentException("One of the source buffers is null"); 957 } 958 return IDNA2003.compare(s1.getText(), s2.getText(), options); 959 } 960 } 961