1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2003-2016, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.util.Collections; 13 import java.util.EnumSet; 14 import java.util.Set; 15 16 import com.ibm.icu.impl.IDNA2003; 17 import com.ibm.icu.impl.UTS46; 18 19 /** 20 * Abstract base class for IDNA processing. 21 * See http://www.unicode.org/reports/tr46/ 22 * and http://www.ietf.org/rfc/rfc3490.txt 23 * <p> 24 * The IDNA class is not intended for public subclassing. 25 * <p> 26 * The non-static methods implement UTS #46 and IDNA2008. 27 * IDNA2008 is implemented according to UTS #46, see getUTS46Instance(). 28 * <p> 29 * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated. 30 * <p> 31 * IDNA2003 API Overview: 32 * <p> 33 * The static IDNA API methods implement the IDNA protocol as defined in the 34 * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>. 35 * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels 36 * containing non-ASCII code points are required to be processed by 37 * ToASCII operation before passing it to resolver libraries. Domain names 38 * that are obtained from resolver libraries are required to be processed by 39 * ToUnicode operation before displaying the domain name to the user. 40 * IDNA requires that implementations process input strings with 41 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>, 42 * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> , 43 * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>. 44 * Implementations of IDNA MUST fully implement Nameprep and Punycode; 45 * neither Nameprep nor Punycode are optional. 46 * The input and output of ToASCII and ToUnicode operations are Unicode 47 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations 48 * multiple times to an input string will yield the same result as applying the operation 49 * once. 50 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 51 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). 52 * 53 * @author Ram Viswanadha, Markus Scherer 54 * @stable ICU 2.8 55 */ 56 public abstract class IDNA { 57 /** 58 * Default options value: None of the other options are set. 59 * For use in static worker and factory methods. 60 * @stable ICU 2.8 61 */ 62 public static final int DEFAULT = 0; 63 /** 64 * Option to allow unassigned code points in domain names and labels. 65 * For use in static worker and factory methods. 66 * <p>This option is ignored by the UTS46 implementation. 67 * (UTS #46 disallows unassigned code points.) 68 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 69 */ 70 @Deprecated 71 public static final int ALLOW_UNASSIGNED = 1; 72 /** 73 * Option to check whether the input conforms to the STD3 ASCII rules, 74 * for example the restriction of labels to LDH characters 75 * (ASCII Letters, Digits and Hyphen-Minus). 76 * For use in static worker and factory methods. 77 * @stable ICU 2.8 78 */ 79 public static final int USE_STD3_RULES = 2; 80 /** 81 * IDNA option to check for whether the input conforms to the BiDi rules. 82 * For use in static worker and factory methods. 83 * <p>This option is ignored by the IDNA2003 implementation. 84 * (IDNA2003 always performs a BiDi check.) 85 * @stable ICU 4.6 86 */ 87 public static final int CHECK_BIDI = 4; 88 /** 89 * IDNA option to check for whether the input conforms to the CONTEXTJ rules. 90 * For use in static worker and factory methods. 91 * <p>This option is ignored by the IDNA2003 implementation. 92 * (The CONTEXTJ check is new in IDNA2008.) 93 * @stable ICU 4.6 94 */ 95 public static final int CHECK_CONTEXTJ = 8; 96 /** 97 * IDNA option for nontransitional processing in ToASCII(). 98 * For use in static worker and factory methods. 99 * <p>By default, ToASCII() uses transitional processing. 100 * <p>This option is ignored by the IDNA2003 implementation. 101 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 102 * @stable ICU 4.6 103 */ 104 public static final int NONTRANSITIONAL_TO_ASCII = 0x10; 105 /** 106 * IDNA option for nontransitional processing in ToUnicode(). 107 * For use in static worker and factory methods. 108 * <p>By default, ToUnicode() uses transitional processing. 109 * <p>This option is ignored by the IDNA2003 implementation. 110 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 111 * @stable ICU 4.6 112 */ 113 public static final int NONTRANSITIONAL_TO_UNICODE = 0x20; 114 /** 115 * IDNA option to check for whether the input conforms to the CONTEXTO rules. 116 * For use in static worker and factory methods. 117 * <p>This option is ignored by the IDNA2003 implementation. 118 * (The CONTEXTO check is new in IDNA2008.) 119 * <p>This is for use by registries for IDNA2008 conformance. 120 * UTS #46 does not require the CONTEXTO check. 121 * @stable ICU 49 122 */ 123 public static final int CHECK_CONTEXTO = 0x40; 124 125 /** 126 * Returns an IDNA instance which implements UTS #46. 127 * Returns an unmodifiable instance, owned by the caller. 128 * Cache it for multiple operations, and delete it when done. 129 * The instance is thread-safe, that is, it can be used concurrently. 130 * <p> 131 * UTS #46 defines Unicode IDNA Compatibility Processing, 132 * updated to the latest version of Unicode and compatible with both 133 * IDNA2003 and IDNA2008. 134 * <p> 135 * The worker functions use transitional processing, including deviation mappings, 136 * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE 137 * is used in which case the deviation characters are passed through without change. 138 * <p> 139 * Disallowed characters are mapped to U+FFFD. 140 * <p> 141 * Operations with the UTS #46 instance do not support the 142 * ALLOW_UNASSIGNED option. 143 * <p> 144 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 145 * When the USE_STD3_RULES option is used, ASCII characters other than 146 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 147 * 148 * @param options Bit set to modify the processing and error checking. 149 * @return the UTS #46 IDNA instance, if successful 150 * @stable ICU 4.6 151 */ getUTS46Instance(int options)152 public static IDNA getUTS46Instance(int options) { 153 return new UTS46(options); 154 } 155 156 /** 157 * Converts a single domain name label into its ASCII form for DNS lookup. 158 * If any processing step fails, then info.hasErrors() will be true and 159 * the result might not be an ASCII string. 160 * The label might be modified according to the types of errors. 161 * Labels with severe errors will be left in (or turned into) their Unicode form. 162 * 163 * @param label Input domain name label 164 * @param dest Destination string object 165 * @param info Output container of IDNA processing details. 166 * @return dest 167 * @stable ICU 4.6 168 */ labelToASCII(CharSequence label, StringBuilder dest, Info info)169 public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info); 170 171 /** 172 * Converts a single domain name label into its Unicode form for human-readable display. 173 * If any processing step fails, then info.hasErrors() will be true. 174 * The label might be modified according to the types of errors. 175 * 176 * @param label Input domain name label 177 * @param dest Destination string object 178 * @param info Output container of IDNA processing details. 179 * @return dest 180 * @stable ICU 4.6 181 */ labelToUnicode(CharSequence label, StringBuilder dest, Info info)182 public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info); 183 184 /** 185 * Converts a whole domain name into its ASCII form for DNS lookup. 186 * If any processing step fails, then info.hasErrors() will be true and 187 * the result might not be an ASCII string. 188 * The domain name might be modified according to the types of errors. 189 * Labels with severe errors will be left in (or turned into) their Unicode form. 190 * 191 * @param name Input domain name 192 * @param dest Destination string object 193 * @param info Output container of IDNA processing details. 194 * @return dest 195 * @stable ICU 4.6 196 */ nameToASCII(CharSequence name, StringBuilder dest, Info info)197 public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info); 198 199 /** 200 * Converts a whole domain name into its Unicode form for human-readable display. 201 * If any processing step fails, then info.hasErrors() will be true. 202 * The domain name might be modified according to the types of errors. 203 * 204 * @param name Input domain name 205 * @param dest Destination string object 206 * @param info Output container of IDNA processing details. 207 * @return dest 208 * @stable ICU 4.6 209 */ nameToUnicode(CharSequence name, StringBuilder dest, Info info)210 public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info); 211 212 /** 213 * Output container for IDNA processing errors. 214 * The Info class is not suitable for subclassing. 215 * @stable ICU 4.6 216 */ 217 public static final class Info { 218 /** 219 * Constructor. 220 * @stable ICU 4.6 221 */ Info()222 public Info() { 223 errors=EnumSet.noneOf(Error.class); 224 labelErrors=EnumSet.noneOf(Error.class); 225 isTransDiff=false; 226 isBiDi=false; 227 isOkBiDi=true; 228 } 229 /** 230 * Were there IDNA processing errors? 231 * @return true if there were processing errors 232 * @stable ICU 4.6 233 */ hasErrors()234 public boolean hasErrors() { return !errors.isEmpty(); } 235 /** 236 * Returns a set indicating IDNA processing errors. 237 * @return set of processing errors (modifiable, and not null) 238 * @stable ICU 4.6 239 */ getErrors()240 public Set<Error> getErrors() { return errors; } 241 /** 242 * Returns true if transitional and nontransitional processing produce different results. 243 * This is the case when the input label or domain name contains 244 * one or more deviation characters outside a Punycode label (see UTS #46). 245 * <ul> 246 * <li>With nontransitional processing, such characters are 247 * copied to the destination string. 248 * <li>With transitional processing, such characters are 249 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 250 * </ul> 251 * @return true if transitional and nontransitional processing produce different results 252 * @stable ICU 4.6 253 */ isTransitionalDifferent()254 public boolean isTransitionalDifferent() { return isTransDiff; } 255 reset()256 private void reset() { 257 errors.clear(); 258 labelErrors.clear(); 259 isTransDiff=false; 260 isBiDi=false; 261 isOkBiDi=true; 262 } 263 264 private EnumSet<Error> errors, labelErrors; 265 private boolean isTransDiff; 266 private boolean isBiDi; 267 private boolean isOkBiDi; 268 } 269 270 // The following protected methods give IDNA subclasses access to the private IDNAInfo fields. 271 // The IDNAInfo also provides intermediate state that is publicly invisible, 272 // avoiding the allocation of another worker object. 273 /** 274 * @internal 275 * @deprecated This API is ICU internal only. 276 */ 277 @Deprecated resetInfo(Info info)278 protected static void resetInfo(Info info) { 279 info.reset(); 280 } 281 /** 282 * @internal 283 * @deprecated This API is ICU internal only. 284 */ 285 @Deprecated hasCertainErrors(Info info, EnumSet<Error> errors)286 protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) { 287 return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors); 288 } 289 /** 290 * @internal 291 * @deprecated This API is ICU internal only. 292 */ 293 @Deprecated hasCertainLabelErrors(Info info, EnumSet<Error> errors)294 protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) { 295 return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors); 296 } 297 /** 298 * @internal 299 * @deprecated This API is ICU internal only. 300 */ 301 @Deprecated addLabelError(Info info, Error error)302 protected static void addLabelError(Info info, Error error) { 303 info.labelErrors.add(error); 304 } 305 /** 306 * @internal 307 * @deprecated This API is ICU internal only. 308 */ 309 @Deprecated promoteAndResetLabelErrors(Info info)310 protected static void promoteAndResetLabelErrors(Info info) { 311 if(!info.labelErrors.isEmpty()) { 312 info.errors.addAll(info.labelErrors); 313 info.labelErrors.clear(); 314 } 315 } 316 /** 317 * @internal 318 * @deprecated This API is ICU internal only. 319 */ 320 @Deprecated addError(Info info, Error error)321 protected static void addError(Info info, Error error) { 322 info.errors.add(error); 323 } 324 /** 325 * @internal 326 * @deprecated This API is ICU internal only. 327 */ 328 @Deprecated setTransitionalDifferent(Info info)329 protected static void setTransitionalDifferent(Info info) { 330 info.isTransDiff=true; 331 } 332 /** 333 * @internal 334 * @deprecated This API is ICU internal only. 335 */ 336 @Deprecated setBiDi(Info info)337 protected static void setBiDi(Info info) { 338 info.isBiDi=true; 339 } 340 /** 341 * @internal 342 * @deprecated This API is ICU internal only. 343 */ 344 @Deprecated isBiDi(Info info)345 protected static boolean isBiDi(Info info) { 346 return info.isBiDi; 347 } 348 /** 349 * @internal 350 * @deprecated This API is ICU internal only. 351 */ 352 @Deprecated setNotOkBiDi(Info info)353 protected static void setNotOkBiDi(Info info) { 354 info.isOkBiDi=false; 355 } 356 /** 357 * @internal 358 * @deprecated This API is ICU internal only. 359 */ 360 @Deprecated isOkBiDi(Info info)361 protected static boolean isOkBiDi(Info info) { 362 return info.isOkBiDi; 363 } 364 365 /** 366 * IDNA error bit set values. 367 * When a domain name or label fails a processing step or does not meet the 368 * validity criteria, then one or more of these error bits are set. 369 * @stable ICU 4.6 370 */ 371 public static enum Error { 372 /** 373 * A non-final domain name label (or the whole domain name) is empty. 374 * @stable ICU 4.6 375 */ 376 EMPTY_LABEL, 377 /** 378 * A domain name label is longer than 63 bytes. 379 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 380 * This is only checked in ToASCII operations, and only if the output label is all-ASCII. 381 * @stable ICU 4.6 382 */ 383 LABEL_TOO_LONG, 384 /** 385 * A domain name is longer than 255 bytes in its storage form. 386 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 387 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. 388 * @stable ICU 4.6 389 */ 390 DOMAIN_NAME_TOO_LONG, 391 /** 392 * A label starts with a hyphen-minus ('-'). 393 * @stable ICU 4.6 394 */ 395 LEADING_HYPHEN, 396 /** 397 * A label ends with a hyphen-minus ('-'). 398 * @stable ICU 4.6 399 */ 400 TRAILING_HYPHEN, 401 /** 402 * A label contains hyphen-minus ('-') in the third and fourth positions. 403 * @stable ICU 4.6 404 */ 405 HYPHEN_3_4, 406 /** 407 * A label starts with a combining mark. 408 * @stable ICU 4.6 409 */ 410 LEADING_COMBINING_MARK, 411 /** 412 * A label or domain name contains disallowed characters. 413 * @stable ICU 4.6 414 */ 415 DISALLOWED, 416 /** 417 * A label starts with "xn--" but does not contain valid Punycode. 418 * That is, an xn-- label failed Punycode decoding. 419 * @stable ICU 4.6 420 */ 421 PUNYCODE, 422 /** 423 * A label contains a dot=full stop. 424 * This can occur in an input string for a single-label function. 425 * @stable ICU 4.6 426 */ 427 LABEL_HAS_DOT, 428 /** 429 * An ACE label does not contain a valid label string. 430 * The label was successfully ACE (Punycode) decoded but the resulting 431 * string had severe validation errors. For example, 432 * it might contain characters that are not allowed in ACE labels, 433 * or it might not be normalized. 434 * @stable ICU 4.6 435 */ 436 INVALID_ACE_LABEL, 437 /** 438 * A label does not meet the IDNA BiDi requirements (for right-to-left characters). 439 * @stable ICU 4.6 440 */ 441 BIDI, 442 /** 443 * A label does not meet the IDNA CONTEXTJ requirements. 444 * @stable ICU 4.6 445 */ 446 CONTEXTJ, 447 /** 448 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. 449 * Some punctuation characters "Would otherwise have been DISALLOWED" 450 * but are allowed in certain contexts. (RFC 5892) 451 * @stable ICU 49 452 */ 453 CONTEXTO_PUNCTUATION, 454 /** 455 * A label does not meet the IDNA CONTEXTO requirements for digits. 456 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). 457 * @stable ICU 49 458 */ 459 CONTEXTO_DIGITS 460 } 461 462 /** 463 * Sole constructor. (For invocation by subclass constructors, typically implicit.) 464 * @internal 465 * @deprecated This API is ICU internal only. 466 */ 467 @Deprecated IDNA()468 protected IDNA() { 469 } 470 471 /* IDNA2003 API ------------------------------------------------------------- */ 472 473 /** 474 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 475 * This operation is done on <b>single labels</b> before sending it to something that expects 476 * ASCII names. A label is an individual part of a domain name. Labels are usually 477 * separated by dots; e.g." "www.example.com" is composed of 3 labels 478 * "www","example", and "com". 479 * 480 * @param src The input string to be processed 481 * @param options A bit set of options: 482 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 483 * and do not use STD3 ASCII rules 484 * If unassigned code points are found the operation fails with 485 * StringPrepParseException. 486 * 487 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 488 * If this option is set, the unassigned code points are in the input 489 * are treated as normal Unicode code points. 490 * 491 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 492 * If this option is set and the input does not satisfy STD3 rules, 493 * the operation will fail with ParseException 494 * @return StringBuffer the converted String 495 * @throws StringPrepParseException When an error occurs for parsing a string. 496 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 497 */ 498 @Deprecated convertToASCII(String src, int options)499 public static StringBuffer convertToASCII(String src, int options) 500 throws StringPrepParseException{ 501 UCharacterIterator iter = UCharacterIterator.getInstance(src); 502 return convertToASCII(iter,options); 503 } 504 505 /** 506 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 507 * This operation is done on <b>single labels</b> before sending it to something that expects 508 * ASCII names. A label is an individual part of a domain name. Labels are usually 509 * separated by dots; e.g." "www.example.com" is composed of 3 labels 510 * "www","example", and "com". 511 * 512 * @param src The input string as StringBuffer to be processed 513 * @param options A bit set of options: 514 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 515 * and do not use STD3 ASCII rules 516 * If unassigned code points are found the operation fails with 517 * ParseException. 518 * 519 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 520 * If this option is set, the unassigned code points are in the input 521 * are treated as normal Unicode code points. 522 * 523 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 524 * If this option is set and the input does not satisfy STD3 rules, 525 * the operation will fail with ParseException 526 * @return StringBuffer the converted String 527 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 528 */ 529 @Deprecated convertToASCII(StringBuffer src, int options)530 public static StringBuffer convertToASCII(StringBuffer src, int options) 531 throws StringPrepParseException{ 532 UCharacterIterator iter = UCharacterIterator.getInstance(src); 533 return convertToASCII(iter,options); 534 } 535 536 /** 537 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 538 * This operation is done on <b>single labels</b> before sending it to something that expects 539 * ASCII names. A label is an individual part of a domain name. Labels are usually 540 * separated by dots; e.g." "www.example.com" is composed of 3 labels 541 * "www","example", and "com". 542 * 543 * @param src The input string as UCharacterIterator to be processed 544 * @param options A bit set of options: 545 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 546 * and do not use STD3 ASCII rules 547 * If unassigned code points are found the operation fails with 548 * ParseException. 549 * 550 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 551 * If this option is set, the unassigned code points are in the input 552 * are treated as normal Unicode code points. 553 * 554 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 555 * If this option is set and the input does not satisfy STD3 rules, 556 * the operation will fail with ParseException 557 * @return StringBuffer the converted String 558 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 559 */ 560 @Deprecated convertToASCII(UCharacterIterator src, int options)561 public static StringBuffer convertToASCII(UCharacterIterator src, int options) 562 throws StringPrepParseException{ 563 return IDNA2003.convertToASCII(src, options); 564 } 565 566 /** 567 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 568 * This operation is done on complete domain names, e.g: "www.example.com". 569 * It is important to note that this operation can fail. If it fails, then the input 570 * domain name cannot be used as an Internationalized Domain Name and the application 571 * should have methods defined to deal with the failure. 572 * 573 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 574 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 575 * and then convert. This function does not offer that level of granularity. The options once 576 * set will apply to all labels in the domain name 577 * 578 * @param src The input string as UCharacterIterator to be processed 579 * @param options A bit set of options: 580 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 581 * and do not use STD3 ASCII rules 582 * If unassigned code points are found the operation fails with 583 * ParseException. 584 * 585 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 586 * If this option is set, the unassigned code points are in the input 587 * are treated as normal Unicode code points. 588 * 589 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 590 * If this option is set and the input does not satisfy STD3 rules, 591 * the operation will fail with ParseException 592 * @return StringBuffer the converted String 593 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 594 */ 595 @Deprecated convertIDNToASCII(UCharacterIterator src, int options)596 public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options) 597 throws StringPrepParseException{ 598 return convertIDNToASCII(src.getText(), options); 599 } 600 601 /** 602 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 603 * This operation is done on complete domain names, e.g: "www.example.com". 604 * It is important to note that this operation can fail. If it fails, then the input 605 * domain name cannot be used as an Internationalized Domain Name and the application 606 * should have methods defined to deal with the failure. 607 * 608 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 609 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 610 * and then convert. This function does not offer that level of granularity. The options once 611 * set will apply to all labels in the domain name 612 * 613 * @param src The input string as a StringBuffer to be processed 614 * @param options A bit set of options: 615 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 616 * and do not use STD3 ASCII rules 617 * If unassigned code points are found the operation fails with 618 * ParseException. 619 * 620 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 621 * If this option is set, the unassigned code points are in the input 622 * are treated as normal Unicode code points. 623 * 624 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 625 * If this option is set and the input does not satisfy STD3 rules, 626 * the operation will fail with ParseException 627 * @return StringBuffer the converted String 628 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 629 */ 630 @Deprecated convertIDNToASCII(StringBuffer src, int options)631 public static StringBuffer convertIDNToASCII(StringBuffer src, int options) 632 throws StringPrepParseException{ 633 return convertIDNToASCII(src.toString(), options); 634 } 635 636 /** 637 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 638 * This operation is done on complete domain names, e.g: "www.example.com". 639 * It is important to note that this operation can fail. If it fails, then the input 640 * domain name cannot be used as an Internationalized Domain Name and the application 641 * should have methods defined to deal with the failure. 642 * 643 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 644 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 645 * and then convert. This function does not offer that level of granularity. The options once 646 * set will apply to all labels in the domain name 647 * 648 * @param src The input string to be processed 649 * @param options A bit set of options: 650 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 651 * and do not use STD3 ASCII rules 652 * If unassigned code points are found the operation fails with 653 * ParseException. 654 * 655 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 656 * If this option is set, the unassigned code points are in the input 657 * are treated as normal Unicode code points. 658 * 659 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 660 * If this option is set and the input does not satisfy STD3 rules, 661 * the operation will fail with ParseException 662 * @return StringBuffer the converted String 663 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 664 */ 665 @Deprecated convertIDNToASCII(String src,int options)666 public static StringBuffer convertIDNToASCII(String src,int options) 667 throws StringPrepParseException{ 668 return IDNA2003.convertIDNToASCII(src, options); 669 } 670 671 672 /** 673 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 674 * This operation is done on <b>single labels</b> before sending it to something that expects 675 * Unicode names. A label is an individual part of a domain name. Labels are usually 676 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 677 * "www","example", and "com". 678 * 679 * @param src The input string to be processed 680 * @param options A bit set of options: 681 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 682 * and do not use STD3 ASCII rules 683 * If unassigned code points are found the operation fails with 684 * ParseException. 685 * 686 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 687 * If this option is set, the unassigned code points are in the input 688 * are treated as normal Unicode code points. 689 * 690 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 691 * If this option is set and the input does not satisfy STD3 rules, 692 * the operation will fail with ParseException 693 * @return StringBuffer the converted String 694 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 695 */ 696 @Deprecated convertToUnicode(String src, int options)697 public static StringBuffer convertToUnicode(String src, int options) 698 throws StringPrepParseException{ 699 UCharacterIterator iter = UCharacterIterator.getInstance(src); 700 return convertToUnicode(iter,options); 701 } 702 703 /** 704 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 705 * This operation is done on <b>single labels</b> before sending it to something that expects 706 * Unicode names. A label is an individual part of a domain name. Labels are usually 707 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 708 * "www","example", and "com". 709 * 710 * @param src The input string as StringBuffer to be processed 711 * @param options A bit set of options: 712 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 713 * and do not use STD3 ASCII rules 714 * If unassigned code points are found the operation fails with 715 * ParseException. 716 * 717 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 718 * If this option is set, the unassigned code points are in the input 719 * are treated as normal Unicode code points. 720 * 721 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 722 * If this option is set and the input does not satisfy STD3 rules, 723 * the operation will fail with ParseException 724 * @return StringBuffer the converted String 725 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 726 */ 727 @Deprecated convertToUnicode(StringBuffer src, int options)728 public static StringBuffer convertToUnicode(StringBuffer src, int options) 729 throws StringPrepParseException{ 730 UCharacterIterator iter = UCharacterIterator.getInstance(src); 731 return convertToUnicode(iter,options); 732 } 733 734 /** 735 * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC. 736 * This operation is done on <b>single labels</b> before sending it to something that expects 737 * Unicode names. A label is an individual part of a domain name. Labels are usually 738 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 739 * "www","example", and "com". 740 * 741 * @param src The input string as UCharacterIterator to be processed 742 * @param options A bit set of options: 743 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 744 * and do not use STD3 ASCII rules 745 * If unassigned code points are found the operation fails with 746 * ParseException. 747 * 748 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 749 * If this option is set, the unassigned code points are in the input 750 * are treated as normal Unicode code points. 751 * 752 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 753 * If this option is set and the input does not satisfy STD3 rules, 754 * the operation will fail with ParseException 755 * @return StringBuffer the converted String 756 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 757 */ 758 @Deprecated convertToUnicode(UCharacterIterator src, int options)759 public static StringBuffer convertToUnicode(UCharacterIterator src, int options) 760 throws StringPrepParseException{ 761 return IDNA2003.convertToUnicode(src, options); 762 } 763 764 /** 765 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 766 * This operation is done on complete domain names, e.g: "www.example.com". 767 * 768 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 769 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 770 * and then convert. This function does not offer that level of granularity. The options once 771 * set will apply to all labels in the domain name 772 * 773 * @param src The input string as UCharacterIterator to be processed 774 * @param options A bit set of options: 775 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 776 * and do not use STD3 ASCII rules 777 * If unassigned code points are found the operation fails with 778 * ParseException. 779 * 780 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 781 * If this option is set, the unassigned code points are in the input 782 * are treated as normal Unicode code points. 783 * 784 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 785 * If this option is set and the input does not satisfy STD3 rules, 786 * the operation will fail with ParseException 787 * @return StringBuffer the converted String 788 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 789 */ 790 @Deprecated convertIDNToUnicode(UCharacterIterator src, int options)791 public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options) 792 throws StringPrepParseException{ 793 return convertIDNToUnicode(src.getText(), options); 794 } 795 796 /** 797 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 798 * This operation is done on complete domain names, e.g: "www.example.com". 799 * 800 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 801 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 802 * and then convert. This function does not offer that level of granularity. The options once 803 * set will apply to all labels in the domain name 804 * 805 * @param src The input string as StringBuffer to be processed 806 * @param options A bit set of options: 807 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 808 * and do not use STD3 ASCII rules 809 * If unassigned code points are found the operation fails with 810 * ParseException. 811 * 812 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 813 * If this option is set, the unassigned code points are in the input 814 * are treated as normal Unicode code points. 815 * 816 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 817 * If this option is set and the input does not satisfy STD3 rules, 818 * the operation will fail with ParseException 819 * @return StringBuffer the converted String 820 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 821 */ 822 @Deprecated convertIDNToUnicode(StringBuffer src, int options)823 public static StringBuffer convertIDNToUnicode(StringBuffer src, int options) 824 throws StringPrepParseException{ 825 return convertIDNToUnicode(src.toString(), options); 826 } 827 828 /** 829 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 830 * This operation is done on complete domain names, e.g: "www.example.com". 831 * 832 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 833 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 834 * and then convert. This function does not offer that level of granularity. The options once 835 * set will apply to all labels in the domain name 836 * 837 * @param src The input string to be processed 838 * @param options A bit set of options: 839 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 840 * and do not use STD3 ASCII rules 841 * If unassigned code points are found the operation fails with 842 * ParseException. 843 * 844 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 845 * If this option is set, the unassigned code points are in the input 846 * are treated as normal Unicode code points. 847 * 848 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 849 * If this option is set and the input does not satisfy STD3 rules, 850 * the operation will fail with ParseException 851 * @return StringBuffer the converted String 852 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 853 */ 854 @Deprecated convertIDNToUnicode(String src, int options)855 public static StringBuffer convertIDNToUnicode(String src, int options) 856 throws StringPrepParseException{ 857 return IDNA2003.convertIDNToUnicode(src, options); 858 } 859 860 /** 861 * IDNA2003: Compare two IDN strings for equivalence. 862 * This function splits the domain names into labels and compares them. 863 * According to IDN RFC, whenever two labels are compared, they are 864 * considered equal if and only if their ASCII forms (obtained by 865 * applying toASCII) match using an case-insensitive ASCII comparison. 866 * Two domain names are considered a match if and only if all labels 867 * match regardless of whether label separators match. 868 * 869 * @param s1 First IDN string as StringBuffer 870 * @param s2 Second IDN string as StringBuffer 871 * @param options A bit set of options: 872 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 873 * and do not use STD3 ASCII rules 874 * If unassigned code points are found the operation fails with 875 * ParseException. 876 * 877 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 878 * If this option is set, the unassigned code points are in the input 879 * are treated as normal Unicode code points. 880 * 881 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 882 * If this option is set and the input does not satisfy STD3 rules, 883 * the operation will fail with ParseException 884 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 885 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 886 */ 887 @Deprecated compare(StringBuffer s1, StringBuffer s2, int options)888 public static int compare(StringBuffer s1, StringBuffer s2, int options) 889 throws StringPrepParseException{ 890 if(s1==null || s2 == null){ 891 throw new IllegalArgumentException("One of the source buffers is null"); 892 } 893 return IDNA2003.compare(s1.toString(), s2.toString(), options); 894 } 895 896 /** 897 * IDNA2003: Compare two IDN strings for equivalence. 898 * This function splits the domain names into labels and compares them. 899 * According to IDN RFC, whenever two labels are compared, they are 900 * considered equal if and only if their ASCII forms (obtained by 901 * applying toASCII) match using an case-insensitive ASCII comparison. 902 * Two domain names are considered a match if and only if all labels 903 * match regardless of whether label separators match. 904 * 905 * @param s1 First IDN string 906 * @param s2 Second IDN string 907 * @param options A bit set of options: 908 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 909 * and do not use STD3 ASCII rules 910 * If unassigned code points are found the operation fails with 911 * ParseException. 912 * 913 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 914 * If this option is set, the unassigned code points are in the input 915 * are treated as normal Unicode code points. 916 * 917 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 918 * If this option is set and the input does not satisfy STD3 rules, 919 * the operation will fail with ParseException 920 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 921 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 922 */ 923 @Deprecated compare(String s1, String s2, int options)924 public static int compare(String s1, String s2, int options) throws StringPrepParseException{ 925 if(s1==null || s2 == null){ 926 throw new IllegalArgumentException("One of the source buffers is null"); 927 } 928 return IDNA2003.compare(s1, s2, options); 929 } 930 /** 931 * IDNA2003: Compare two IDN strings for equivalence. 932 * This function splits the domain names into labels and compares them. 933 * According to IDN RFC, whenever two labels are compared, they are 934 * considered equal if and only if their ASCII forms (obtained by 935 * applying toASCII) match using an case-insensitive ASCII comparison. 936 * Two domain names are considered a match if and only if all labels 937 * match regardless of whether label separators match. 938 * 939 * @param s1 First IDN string as UCharacterIterator 940 * @param s2 Second IDN string as UCharacterIterator 941 * @param options A bit set of options: 942 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 943 * and do not use STD3 ASCII rules 944 * If unassigned code points are found the operation fails with 945 * ParseException. 946 * 947 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 948 * If this option is set, the unassigned code points are in the input 949 * are treated as normal Unicode code points. 950 * 951 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 952 * If this option is set and the input does not satisfy STD3 rules, 953 * the operation will fail with ParseException 954 * @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2 955 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 956 */ 957 @Deprecated compare(UCharacterIterator s1, UCharacterIterator s2, int options)958 public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options) 959 throws StringPrepParseException{ 960 if(s1==null || s2 == null){ 961 throw new IllegalArgumentException("One of the source buffers is null"); 962 } 963 return IDNA2003.compare(s1.getText(), s2.getText(), options); 964 } 965 } 966