1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2003-2016, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.text;
11 
12 import java.util.Collections;
13 import java.util.EnumSet;
14 import java.util.Set;
15 
16 import com.ibm.icu.impl.IDNA2003;
17 import com.ibm.icu.impl.UTS46;
18 
19 /**
20  * Abstract base class for IDNA processing.
21  * See http://www.unicode.org/reports/tr46/
22  * and http://www.ietf.org/rfc/rfc3490.txt
23  * <p>
24  * The IDNA class is not intended for public subclassing.
25  * <p>
26  * The non-static methods implement UTS #46 and IDNA2008.
27  * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
28  * <p>
29  * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
30  * <p>
31  * IDNA2003 API Overview:
32  * <p>
33  * The static IDNA API methods implement the IDNA protocol as defined in the
34  * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
35  * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
36  * containing non-ASCII code points are required to be processed by
37  * ToASCII operation before passing it to resolver libraries. Domain names
38  * that are obtained from resolver libraries are required to be processed by
39  * ToUnicode operation before displaying the domain name to the user.
40  * IDNA requires that implementations process input strings with
41  * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
42  * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
43  * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
44  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
45  * neither Nameprep nor Punycode are optional.
46  * The input and output of ToASCII and ToUnicode operations are Unicode
47  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
48  * multiple times to an input string will yield the same result as applying the operation
49  * once.
50  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
51  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
52  *
53  * @author Ram Viswanadha, Markus Scherer
54  * @stable ICU 2.8
55  */
56 public abstract class IDNA {
57     /**
58      * Default options value: None of the other options are set.
59      * For use in static worker and factory methods.
60      * @stable ICU 2.8
61      */
62     public static final int DEFAULT = 0;
63     /**
64      * Option to allow unassigned code points in domain names and labels.
65      * For use in static worker and factory methods.
66      * <p>This option is ignored by the UTS46 implementation.
67      * (UTS #46 disallows unassigned code points.)
68      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
69      */
70     @Deprecated
71     public static final int ALLOW_UNASSIGNED = 1;
72     /**
73      * Option to check whether the input conforms to the STD3 ASCII rules,
74      * for example the restriction of labels to LDH characters
75      * (ASCII Letters, Digits and Hyphen-Minus).
76      * For use in static worker and factory methods.
77      * @stable ICU 2.8
78      */
79     public static final int USE_STD3_RULES = 2;
80     /**
81      * IDNA option to check for whether the input conforms to the BiDi rules.
82      * For use in static worker and factory methods.
83      * <p>This option is ignored by the IDNA2003 implementation.
84      * (IDNA2003 always performs a BiDi check.)
85      * @stable ICU 4.6
86      */
87     public static final int CHECK_BIDI = 4;
88     /**
89      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
90      * For use in static worker and factory methods.
91      * <p>This option is ignored by the IDNA2003 implementation.
92      * (The CONTEXTJ check is new in IDNA2008.)
93      * @stable ICU 4.6
94      */
95     public static final int CHECK_CONTEXTJ = 8;
96     /**
97      * IDNA option for nontransitional processing in ToASCII().
98      * For use in static worker and factory methods.
99      * <p>By default, ToASCII() uses transitional processing.
100      * <p>This option is ignored by the IDNA2003 implementation.
101      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
102      * @stable ICU 4.6
103      */
104     public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
105     /**
106      * IDNA option for nontransitional processing in ToUnicode().
107      * For use in static worker and factory methods.
108      * <p>By default, ToUnicode() uses transitional processing.
109      * <p>This option is ignored by the IDNA2003 implementation.
110      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
111      * @stable ICU 4.6
112      */
113     public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
114     /**
115      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
116      * For use in static worker and factory methods.
117      * <p>This option is ignored by the IDNA2003 implementation.
118      * (The CONTEXTO check is new in IDNA2008.)
119      * <p>This is for use by registries for IDNA2008 conformance.
120      * UTS #46 does not require the CONTEXTO check.
121      * @stable ICU 49
122      */
123     public static final int CHECK_CONTEXTO = 0x40;
124 
125     /**
126      * Returns an IDNA instance which implements UTS #46.
127      * Returns an unmodifiable instance, owned by the caller.
128      * Cache it for multiple operations, and delete it when done.
129      * The instance is thread-safe, that is, it can be used concurrently.
130      * <p>
131      * UTS #46 defines Unicode IDNA Compatibility Processing,
132      * updated to the latest version of Unicode and compatible with both
133      * IDNA2003 and IDNA2008.
134      * <p>
135      * The worker functions use transitional processing, including deviation mappings,
136      * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
137      * is used in which case the deviation characters are passed through without change.
138      * <p>
139      * Disallowed characters are mapped to U+FFFD.
140      * <p>
141      * Operations with the UTS #46 instance do not support the
142      * ALLOW_UNASSIGNED option.
143      * <p>
144      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
145      * When the USE_STD3_RULES option is used, ASCII characters other than
146      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
147      *
148      * @param options Bit set to modify the processing and error checking.
149      * @return the UTS #46 IDNA instance, if successful
150      * @stable ICU 4.6
151      */
getUTS46Instance(int options)152     public static IDNA getUTS46Instance(int options) {
153         return new UTS46(options);
154     }
155 
156     /**
157      * Converts a single domain name label into its ASCII form for DNS lookup.
158      * If any processing step fails, then info.hasErrors() will be true and
159      * the result might not be an ASCII string.
160      * The label might be modified according to the types of errors.
161      * Labels with severe errors will be left in (or turned into) their Unicode form.
162      *
163      * @param label Input domain name label
164      * @param dest Destination string object
165      * @param info Output container of IDNA processing details.
166      * @return dest
167      * @stable ICU 4.6
168      */
labelToASCII(CharSequence label, StringBuilder dest, Info info)169     public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
170 
171     /**
172      * Converts a single domain name label into its Unicode form for human-readable display.
173      * If any processing step fails, then info.hasErrors() will be true.
174      * The label might be modified according to the types of errors.
175      *
176      * @param label Input domain name label
177      * @param dest Destination string object
178      * @param info Output container of IDNA processing details.
179      * @return dest
180      * @stable ICU 4.6
181      */
labelToUnicode(CharSequence label, StringBuilder dest, Info info)182     public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
183 
184     /**
185      * Converts a whole domain name into its ASCII form for DNS lookup.
186      * If any processing step fails, then info.hasErrors() will be true and
187      * the result might not be an ASCII string.
188      * The domain name might be modified according to the types of errors.
189      * Labels with severe errors will be left in (or turned into) their Unicode form.
190      *
191      * @param name Input domain name
192      * @param dest Destination string object
193      * @param info Output container of IDNA processing details.
194      * @return dest
195      * @stable ICU 4.6
196      */
nameToASCII(CharSequence name, StringBuilder dest, Info info)197     public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
198 
199     /**
200      * Converts a whole domain name into its Unicode form for human-readable display.
201      * If any processing step fails, then info.hasErrors() will be true.
202      * The domain name might be modified according to the types of errors.
203      *
204      * @param name Input domain name
205      * @param dest Destination string object
206      * @param info Output container of IDNA processing details.
207      * @return dest
208      * @stable ICU 4.6
209      */
nameToUnicode(CharSequence name, StringBuilder dest, Info info)210     public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
211 
212     /**
213      * Output container for IDNA processing errors.
214      * The Info class is not suitable for subclassing.
215      * @stable ICU 4.6
216      */
217     public static final class Info {
218         /**
219          * Constructor.
220          * @stable ICU 4.6
221          */
Info()222         public Info() {
223             errors=EnumSet.noneOf(Error.class);
224             labelErrors=EnumSet.noneOf(Error.class);
225             isTransDiff=false;
226             isBiDi=false;
227             isOkBiDi=true;
228         }
229         /**
230          * Were there IDNA processing errors?
231          * @return true if there were processing errors
232          * @stable ICU 4.6
233          */
hasErrors()234         public boolean hasErrors() { return !errors.isEmpty(); }
235         /**
236          * Returns a set indicating IDNA processing errors.
237          * @return set of processing errors (modifiable, and not null)
238          * @stable ICU 4.6
239          */
getErrors()240         public Set<Error> getErrors() { return errors; }
241         /**
242          * Returns true if transitional and nontransitional processing produce different results.
243          * This is the case when the input label or domain name contains
244          * one or more deviation characters outside a Punycode label (see UTS #46).
245          * <ul>
246          * <li>With nontransitional processing, such characters are
247          * copied to the destination string.
248          * <li>With transitional processing, such characters are
249          * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
250          * </ul>
251          * @return true if transitional and nontransitional processing produce different results
252          * @stable ICU 4.6
253          */
isTransitionalDifferent()254         public boolean isTransitionalDifferent() { return isTransDiff; }
255 
reset()256         private void reset() {
257             errors.clear();
258             labelErrors.clear();
259             isTransDiff=false;
260             isBiDi=false;
261             isOkBiDi=true;
262         }
263 
264         private EnumSet<Error> errors, labelErrors;
265         private boolean isTransDiff;
266         private boolean isBiDi;
267         private boolean isOkBiDi;
268     }
269 
270     // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
271     // The IDNAInfo also provides intermediate state that is publicly invisible,
272     // avoiding the allocation of another worker object.
273     /**
274      * @internal
275      * @deprecated This API is ICU internal only.
276      */
277     @Deprecated
resetInfo(Info info)278     protected static void resetInfo(Info info) {
279         info.reset();
280     }
281     /**
282      * @internal
283      * @deprecated This API is ICU internal only.
284      */
285     @Deprecated
hasCertainErrors(Info info, EnumSet<Error> errors)286     protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
287         return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
288     }
289     /**
290      * @internal
291      * @deprecated This API is ICU internal only.
292      */
293     @Deprecated
hasCertainLabelErrors(Info info, EnumSet<Error> errors)294     protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
295         return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
296     }
297     /**
298      * @internal
299      * @deprecated This API is ICU internal only.
300      */
301     @Deprecated
addLabelError(Info info, Error error)302     protected static void addLabelError(Info info, Error error) {
303         info.labelErrors.add(error);
304     }
305     /**
306      * @internal
307      * @deprecated This API is ICU internal only.
308      */
309     @Deprecated
promoteAndResetLabelErrors(Info info)310     protected static void promoteAndResetLabelErrors(Info info) {
311         if(!info.labelErrors.isEmpty()) {
312             info.errors.addAll(info.labelErrors);
313             info.labelErrors.clear();
314         }
315     }
316     /**
317      * @internal
318      * @deprecated This API is ICU internal only.
319      */
320     @Deprecated
addError(Info info, Error error)321     protected static void addError(Info info, Error error) {
322         info.errors.add(error);
323     }
324     /**
325      * @internal
326      * @deprecated This API is ICU internal only.
327      */
328     @Deprecated
setTransitionalDifferent(Info info)329     protected static void setTransitionalDifferent(Info info) {
330         info.isTransDiff=true;
331     }
332     /**
333      * @internal
334      * @deprecated This API is ICU internal only.
335      */
336     @Deprecated
setBiDi(Info info)337     protected static void setBiDi(Info info) {
338         info.isBiDi=true;
339     }
340     /**
341      * @internal
342      * @deprecated This API is ICU internal only.
343      */
344     @Deprecated
isBiDi(Info info)345     protected static boolean isBiDi(Info info) {
346         return info.isBiDi;
347     }
348     /**
349      * @internal
350      * @deprecated This API is ICU internal only.
351      */
352     @Deprecated
setNotOkBiDi(Info info)353     protected static void setNotOkBiDi(Info info) {
354         info.isOkBiDi=false;
355     }
356     /**
357      * @internal
358      * @deprecated This API is ICU internal only.
359      */
360     @Deprecated
isOkBiDi(Info info)361     protected static boolean isOkBiDi(Info info) {
362         return info.isOkBiDi;
363     }
364 
365     /**
366      * IDNA error bit set values.
367      * When a domain name or label fails a processing step or does not meet the
368      * validity criteria, then one or more of these error bits are set.
369      * @stable ICU 4.6
370      */
371     public static enum Error {
372         /**
373          * A non-final domain name label (or the whole domain name) is empty.
374          * @stable ICU 4.6
375          */
376         EMPTY_LABEL,
377         /**
378          * A domain name label is longer than 63 bytes.
379          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
380          * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
381          * @stable ICU 4.6
382          */
383         LABEL_TOO_LONG,
384         /**
385          * A domain name is longer than 255 bytes in its storage form.
386          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
387          * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
388          * @stable ICU 4.6
389          */
390         DOMAIN_NAME_TOO_LONG,
391         /**
392          * A label starts with a hyphen-minus ('-').
393          * @stable ICU 4.6
394          */
395         LEADING_HYPHEN,
396         /**
397          * A label ends with a hyphen-minus ('-').
398          * @stable ICU 4.6
399          */
400         TRAILING_HYPHEN,
401         /**
402          * A label contains hyphen-minus ('-') in the third and fourth positions.
403          * @stable ICU 4.6
404          */
405         HYPHEN_3_4,
406         /**
407          * A label starts with a combining mark.
408          * @stable ICU 4.6
409          */
410         LEADING_COMBINING_MARK,
411         /**
412          * A label or domain name contains disallowed characters.
413          * @stable ICU 4.6
414          */
415         DISALLOWED,
416         /**
417          * A label starts with "xn--" but does not contain valid Punycode.
418          * That is, an xn-- label failed Punycode decoding.
419          * @stable ICU 4.6
420          */
421         PUNYCODE,
422         /**
423          * A label contains a dot=full stop.
424          * This can occur in an input string for a single-label function.
425          * @stable ICU 4.6
426          */
427         LABEL_HAS_DOT,
428         /**
429          * An ACE label does not contain a valid label string.
430          * The label was successfully ACE (Punycode) decoded but the resulting
431          * string had severe validation errors. For example,
432          * it might contain characters that are not allowed in ACE labels,
433          * or it might not be normalized.
434          * @stable ICU 4.6
435          */
436         INVALID_ACE_LABEL,
437         /**
438          * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
439          * @stable ICU 4.6
440          */
441         BIDI,
442         /**
443          * A label does not meet the IDNA CONTEXTJ requirements.
444          * @stable ICU 4.6
445          */
446         CONTEXTJ,
447         /**
448          * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
449          * Some punctuation characters "Would otherwise have been DISALLOWED"
450          * but are allowed in certain contexts. (RFC 5892)
451          * @stable ICU 49
452          */
453         CONTEXTO_PUNCTUATION,
454         /**
455          * A label does not meet the IDNA CONTEXTO requirements for digits.
456          * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
457          * @stable ICU 49
458          */
459         CONTEXTO_DIGITS
460     }
461 
462     /**
463      * Sole constructor. (For invocation by subclass constructors, typically implicit.)
464      * @internal
465      * @deprecated This API is ICU internal only.
466      */
467     @Deprecated
IDNA()468     protected IDNA() {
469     }
470 
471     /* IDNA2003 API ------------------------------------------------------------- */
472 
473     /**
474      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
475      * This operation is done on <b>single labels</b> before sending it to something that expects
476      * ASCII names. A label is an individual part of a domain name. Labels are usually
477      * separated by dots; e.g." "www.example.com" is composed of 3 labels
478      * "www","example", and "com".
479      *
480      * @param src       The input string to be processed
481      * @param options   A bit set of options:
482      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
483      *                              and do not use STD3 ASCII rules
484      *                              If unassigned code points are found the operation fails with
485      *                              StringPrepParseException.
486      *
487      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
488      *                              If this option is set, the unassigned code points are in the input
489      *                              are treated as normal Unicode code points.
490      *
491      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
492      *                              If this option is set and the input does not satisfy STD3 rules,
493      *                              the operation will fail with ParseException
494      * @return StringBuffer the converted String
495      * @throws StringPrepParseException When an error occurs for parsing a string.
496      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
497      */
498     @Deprecated
convertToASCII(String src, int options)499     public static StringBuffer convertToASCII(String src, int options)
500         throws StringPrepParseException{
501         UCharacterIterator iter = UCharacterIterator.getInstance(src);
502         return convertToASCII(iter,options);
503     }
504 
505     /**
506      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
507      * This operation is done on <b>single labels</b> before sending it to something that expects
508      * ASCII names. A label is an individual part of a domain name. Labels are usually
509      * separated by dots; e.g." "www.example.com" is composed of 3 labels
510      * "www","example", and "com".
511      *
512      * @param src       The input string as StringBuffer to be processed
513      * @param options   A bit set of options:
514      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
515      *                              and do not use STD3 ASCII rules
516      *                              If unassigned code points are found the operation fails with
517      *                              ParseException.
518      *
519      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
520      *                              If this option is set, the unassigned code points are in the input
521      *                              are treated as normal Unicode code points.
522      *
523      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
524      *                              If this option is set and the input does not satisfy STD3 rules,
525      *                              the operation will fail with ParseException
526      * @return StringBuffer the converted String
527      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
528      */
529     @Deprecated
convertToASCII(StringBuffer src, int options)530     public static StringBuffer convertToASCII(StringBuffer src, int options)
531         throws StringPrepParseException{
532         UCharacterIterator iter = UCharacterIterator.getInstance(src);
533         return convertToASCII(iter,options);
534     }
535 
536     /**
537      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
538      * This operation is done on <b>single labels</b> before sending it to something that expects
539      * ASCII names. A label is an individual part of a domain name. Labels are usually
540      * separated by dots; e.g." "www.example.com" is composed of 3 labels
541      * "www","example", and "com".
542      *
543      * @param src       The input string as UCharacterIterator to be processed
544      * @param options   A bit set of options:
545      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
546      *                              and do not use STD3 ASCII rules
547      *                              If unassigned code points are found the operation fails with
548      *                              ParseException.
549      *
550      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
551      *                              If this option is set, the unassigned code points are in the input
552      *                              are treated as normal Unicode code points.
553      *
554      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
555      *                              If this option is set and the input does not satisfy STD3 rules,
556      *                              the operation will fail with ParseException
557      * @return StringBuffer the converted String
558      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
559      */
560     @Deprecated
convertToASCII(UCharacterIterator src, int options)561     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
562                 throws StringPrepParseException{
563         return IDNA2003.convertToASCII(src, options);
564     }
565 
566     /**
567      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
568      * This operation is done on complete domain names, e.g: "www.example.com".
569      * It is important to note that this operation can fail. If it fails, then the input
570      * domain name cannot be used as an Internationalized Domain Name and the application
571      * should have methods defined to deal with the failure.
572      *
573      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
574      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
575      * and then convert. This function does not offer that level of granularity. The options once
576      * set will apply to all labels in the domain name
577      *
578      * @param src       The input string as UCharacterIterator to be processed
579      * @param options   A bit set of options:
580      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
581      *                              and do not use STD3 ASCII rules
582      *                              If unassigned code points are found the operation fails with
583      *                              ParseException.
584      *
585      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
586      *                              If this option is set, the unassigned code points are in the input
587      *                              are treated as normal Unicode code points.
588      *
589      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
590      *                              If this option is set and the input does not satisfy STD3 rules,
591      *                              the operation will fail with ParseException
592      * @return StringBuffer the converted String
593      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
594      */
595     @Deprecated
convertIDNToASCII(UCharacterIterator src, int options)596     public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
597             throws StringPrepParseException{
598         return convertIDNToASCII(src.getText(), options);
599     }
600 
601     /**
602      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
603      * This operation is done on complete domain names, e.g: "www.example.com".
604      * It is important to note that this operation can fail. If it fails, then the input
605      * domain name cannot be used as an Internationalized Domain Name and the application
606      * should have methods defined to deal with the failure.
607      *
608      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
609      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
610      * and then convert. This function does not offer that level of granularity. The options once
611      * set will apply to all labels in the domain name
612      *
613      * @param src       The input string as a StringBuffer to be processed
614      * @param options   A bit set of options:
615      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
616      *                              and do not use STD3 ASCII rules
617      *                              If unassigned code points are found the operation fails with
618      *                              ParseException.
619      *
620      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
621      *                              If this option is set, the unassigned code points are in the input
622      *                              are treated as normal Unicode code points.
623      *
624      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
625      *                              If this option is set and the input does not satisfy STD3 rules,
626      *                              the operation will fail with ParseException
627      * @return StringBuffer the converted String
628      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
629      */
630     @Deprecated
convertIDNToASCII(StringBuffer src, int options)631     public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
632             throws StringPrepParseException{
633             return convertIDNToASCII(src.toString(), options);
634     }
635 
636     /**
637      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
638      * This operation is done on complete domain names, e.g: "www.example.com".
639      * It is important to note that this operation can fail. If it fails, then the input
640      * domain name cannot be used as an Internationalized Domain Name and the application
641      * should have methods defined to deal with the failure.
642      *
643      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
644      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
645      * and then convert. This function does not offer that level of granularity. The options once
646      * set will apply to all labels in the domain name
647      *
648      * @param src       The input string to be processed
649      * @param options   A bit set of options:
650      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
651      *                              and do not use STD3 ASCII rules
652      *                              If unassigned code points are found the operation fails with
653      *                              ParseException.
654      *
655      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
656      *                              If this option is set, the unassigned code points are in the input
657      *                              are treated as normal Unicode code points.
658      *
659      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
660      *                              If this option is set and the input does not satisfy STD3 rules,
661      *                              the operation will fail with ParseException
662      * @return StringBuffer the converted String
663      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
664      */
665     @Deprecated
convertIDNToASCII(String src,int options)666     public static StringBuffer convertIDNToASCII(String src,int options)
667             throws StringPrepParseException{
668         return IDNA2003.convertIDNToASCII(src, options);
669     }
670 
671 
672     /**
673      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
674      * This operation is done on <b>single labels</b> before sending it to something that expects
675      * Unicode names. A label is an individual part of a domain name. Labels are usually
676      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
677      * "www","example", and "com".
678      *
679      * @param src       The input string to be processed
680      * @param options   A bit set of options:
681      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
682      *                              and do not use STD3 ASCII rules
683      *                              If unassigned code points are found the operation fails with
684      *                              ParseException.
685      *
686      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
687      *                              If this option is set, the unassigned code points are in the input
688      *                              are treated as normal Unicode code points.
689      *
690      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
691      *                              If this option is set and the input does not satisfy STD3 rules,
692      *                              the operation will fail with ParseException
693      * @return StringBuffer the converted String
694      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
695      */
696     @Deprecated
convertToUnicode(String src, int options)697     public static StringBuffer convertToUnicode(String src, int options)
698            throws StringPrepParseException{
699         UCharacterIterator iter = UCharacterIterator.getInstance(src);
700         return convertToUnicode(iter,options);
701     }
702 
703     /**
704      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
705      * This operation is done on <b>single labels</b> before sending it to something that expects
706      * Unicode names. A label is an individual part of a domain name. Labels are usually
707      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
708      * "www","example", and "com".
709      *
710      * @param src       The input string as StringBuffer to be processed
711      * @param options   A bit set of options:
712      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
713      *                              and do not use STD3 ASCII rules
714      *                              If unassigned code points are found the operation fails with
715      *                              ParseException.
716      *
717      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
718      *                              If this option is set, the unassigned code points are in the input
719      *                              are treated as normal Unicode code points.
720      *
721      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
722      *                              If this option is set and the input does not satisfy STD3 rules,
723      *                              the operation will fail with ParseException
724      * @return StringBuffer the converted String
725      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
726      */
727     @Deprecated
convertToUnicode(StringBuffer src, int options)728     public static StringBuffer convertToUnicode(StringBuffer src, int options)
729            throws StringPrepParseException{
730         UCharacterIterator iter = UCharacterIterator.getInstance(src);
731         return convertToUnicode(iter,options);
732     }
733 
734     /**
735      * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
736      * This operation is done on <b>single labels</b> before sending it to something that expects
737      * Unicode names. A label is an individual part of a domain name. Labels are usually
738      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
739      * "www","example", and "com".
740      *
741      * @param src       The input string as UCharacterIterator to be processed
742      * @param options   A bit set of options:
743      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
744      *                              and do not use STD3 ASCII rules
745      *                              If unassigned code points are found the operation fails with
746      *                              ParseException.
747      *
748      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
749      *                              If this option is set, the unassigned code points are in the input
750      *                              are treated as normal Unicode code points.
751      *
752      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
753      *                              If this option is set and the input does not satisfy STD3 rules,
754      *                              the operation will fail with ParseException
755      * @return StringBuffer the converted String
756      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
757      */
758     @Deprecated
convertToUnicode(UCharacterIterator src, int options)759     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
760            throws StringPrepParseException{
761         return IDNA2003.convertToUnicode(src, options);
762     }
763 
764     /**
765      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
766      * This operation is done on complete domain names, e.g: "www.example.com".
767      *
768      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
769      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
770      * and then convert. This function does not offer that level of granularity. The options once
771      * set will apply to all labels in the domain name
772      *
773      * @param src       The input string as UCharacterIterator to be processed
774      * @param options   A bit set of options:
775      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
776      *                              and do not use STD3 ASCII rules
777      *                              If unassigned code points are found the operation fails with
778      *                              ParseException.
779      *
780      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
781      *                              If this option is set, the unassigned code points are in the input
782      *                              are treated as normal Unicode code points.
783      *
784      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
785      *                              If this option is set and the input does not satisfy STD3 rules,
786      *                              the operation will fail with ParseException
787      * @return StringBuffer the converted String
788      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
789      */
790     @Deprecated
convertIDNToUnicode(UCharacterIterator src, int options)791     public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options)
792         throws StringPrepParseException{
793         return convertIDNToUnicode(src.getText(), options);
794     }
795 
796     /**
797      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
798      * This operation is done on complete domain names, e.g: "www.example.com".
799      *
800      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
801      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
802      * and then convert. This function does not offer that level of granularity. The options once
803      * set will apply to all labels in the domain name
804      *
805      * @param src       The input string as StringBuffer to be processed
806      * @param options   A bit set of options:
807      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
808      *                              and do not use STD3 ASCII rules
809      *                              If unassigned code points are found the operation fails with
810      *                              ParseException.
811      *
812      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
813      *                              If this option is set, the unassigned code points are in the input
814      *                              are treated as normal Unicode code points.
815      *
816      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
817      *                              If this option is set and the input does not satisfy STD3 rules,
818      *                              the operation will fail with ParseException
819      * @return StringBuffer the converted String
820      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
821      */
822     @Deprecated
convertIDNToUnicode(StringBuffer src, int options)823     public static StringBuffer convertIDNToUnicode(StringBuffer src, int options)
824         throws StringPrepParseException{
825         return convertIDNToUnicode(src.toString(), options);
826     }
827 
828     /**
829      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
830      * This operation is done on complete domain names, e.g: "www.example.com".
831      *
832      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
833      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
834      * and then convert. This function does not offer that level of granularity. The options once
835      * set will apply to all labels in the domain name
836      *
837      * @param src       The input string to be processed
838      * @param options   A bit set of options:
839      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
840      *                              and do not use STD3 ASCII rules
841      *                              If unassigned code points are found the operation fails with
842      *                              ParseException.
843      *
844      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
845      *                              If this option is set, the unassigned code points are in the input
846      *                              are treated as normal Unicode code points.
847      *
848      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
849      *                              If this option is set and the input does not satisfy STD3 rules,
850      *                              the operation will fail with ParseException
851      * @return StringBuffer the converted String
852      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
853      */
854     @Deprecated
convertIDNToUnicode(String src, int options)855     public static StringBuffer convertIDNToUnicode(String src, int options)
856             throws StringPrepParseException{
857         return IDNA2003.convertIDNToUnicode(src, options);
858     }
859 
860     /**
861      * IDNA2003: Compare two IDN strings for equivalence.
862      * This function splits the domain names into labels and compares them.
863      * According to IDN RFC, whenever two labels are compared, they are
864      * considered equal if and only if their ASCII forms (obtained by
865      * applying toASCII) match using an case-insensitive ASCII comparison.
866      * Two domain names are considered a match if and only if all labels
867      * match regardless of whether label separators match.
868      *
869      * @param s1        First IDN string as StringBuffer
870      * @param s2        Second IDN string as StringBuffer
871      * @param options   A bit set of options:
872      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
873      *                              and do not use STD3 ASCII rules
874      *                              If unassigned code points are found the operation fails with
875      *                              ParseException.
876      *
877      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
878      *                              If this option is set, the unassigned code points are in the input
879      *                              are treated as normal Unicode code points.
880      *
881      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
882      *                              If this option is set and the input does not satisfy STD3 rules,
883      *                              the operation will fail with ParseException
884      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
885      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
886      */
887     @Deprecated
compare(StringBuffer s1, StringBuffer s2, int options)888     public static int compare(StringBuffer s1, StringBuffer s2, int options)
889         throws StringPrepParseException{
890         if(s1==null || s2 == null){
891             throw new IllegalArgumentException("One of the source buffers is null");
892         }
893         return IDNA2003.compare(s1.toString(), s2.toString(), options);
894     }
895 
896     /**
897      * IDNA2003: Compare two IDN strings for equivalence.
898      * This function splits the domain names into labels and compares them.
899      * According to IDN RFC, whenever two labels are compared, they are
900      * considered equal if and only if their ASCII forms (obtained by
901      * applying toASCII) match using an case-insensitive ASCII comparison.
902      * Two domain names are considered a match if and only if all labels
903      * match regardless of whether label separators match.
904      *
905      * @param s1        First IDN string
906      * @param s2        Second IDN string
907      * @param options   A bit set of options:
908      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
909      *                              and do not use STD3 ASCII rules
910      *                              If unassigned code points are found the operation fails with
911      *                              ParseException.
912      *
913      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
914      *                              If this option is set, the unassigned code points are in the input
915      *                              are treated as normal Unicode code points.
916      *
917      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
918      *                              If this option is set and the input does not satisfy STD3 rules,
919      *                              the operation will fail with ParseException
920      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
921      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
922      */
923     @Deprecated
compare(String s1, String s2, int options)924     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
925         if(s1==null || s2 == null){
926             throw new IllegalArgumentException("One of the source buffers is null");
927         }
928         return IDNA2003.compare(s1, s2, options);
929     }
930     /**
931      * IDNA2003: Compare two IDN strings for equivalence.
932      * This function splits the domain names into labels and compares them.
933      * According to IDN RFC, whenever two labels are compared, they are
934      * considered equal if and only if their ASCII forms (obtained by
935      * applying toASCII) match using an case-insensitive ASCII comparison.
936      * Two domain names are considered a match if and only if all labels
937      * match regardless of whether label separators match.
938      *
939      * @param s1        First IDN string as UCharacterIterator
940      * @param s2        Second IDN string as UCharacterIterator
941      * @param options   A bit set of options:
942      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
943      *                              and do not use STD3 ASCII rules
944      *                              If unassigned code points are found the operation fails with
945      *                              ParseException.
946      *
947      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
948      *                              If this option is set, the unassigned code points are in the input
949      *                              are treated as normal Unicode code points.
950      *
951      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
952      *                              If this option is set and the input does not satisfy STD3 rules,
953      *                              the operation will fail with ParseException
954      * @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
955      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
956      */
957     @Deprecated
compare(UCharacterIterator s1, UCharacterIterator s2, int options)958     public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
959         throws StringPrepParseException{
960         if(s1==null || s2 == null){
961             throw new IllegalArgumentException("One of the source buffers is null");
962         }
963         return IDNA2003.compare(s1.getText(), s2.getText(), options);
964     }
965 }
966