1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /*
5  *******************************************************************************
6  * Copyright (C) 2003-2016, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 
11 package android.icu.text;
12 
13 import java.util.Collections;
14 import java.util.EnumSet;
15 import java.util.Set;
16 
17 import android.icu.impl.IDNA2003;
18 import android.icu.impl.UTS46;
19 
20 /**
21  * Abstract base class for IDNA processing.
22  * See http://www.unicode.org/reports/tr46/
23  * and http://www.ietf.org/rfc/rfc3490.txt
24  * <p>
25  * The IDNA class is not intended for public subclassing.
26  * <p>
27  * The non-static methods implement UTS #46 and IDNA2008.
28  * IDNA2008 is implemented according to UTS #46, see getUTS46Instance().
29  * <p>
30  * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
31  * <p>
32  * IDNA2003 API Overview:
33  * <p>
34  * The static IDNA API methods implement the IDNA protocol as defined in the
35  * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
36  * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
37  * containing non-ASCII code points are required to be processed by
38  * ToASCII operation before passing it to resolver libraries. Domain names
39  * that are obtained from resolver libraries are required to be processed by
40  * ToUnicode operation before displaying the domain name to the user.
41  * IDNA requires that implementations process input strings with
42  * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
43  * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
44  * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
45  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
46  * neither Nameprep nor Punycode are optional.
47  * The input and output of ToASCII and ToUnicode operations are Unicode
48  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
49  * multiple times to an input string will yield the same result as applying the operation
50  * once.
51  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
52  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
53  *
54  * @author Ram Viswanadha, Markus Scherer
55  */
56 public abstract class IDNA {
57     /**
58      * Default options value: None of the other options are set.
59      * For use in static worker and factory methods.
60      */
61     public static final int DEFAULT = 0;
62     /**
63      * Option to allow unassigned code points in domain names and labels.
64      * For use in static worker and factory methods.
65      * <p>This option is ignored by the UTS46 implementation.
66      * (UTS #46 disallows unassigned code points.)
67      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
68      * @hide original deprecated declaration
69      */
70     @Deprecated
71     public static final int ALLOW_UNASSIGNED = 1;
72     /**
73      * Option to check whether the input conforms to the STD3 ASCII rules,
74      * for example the restriction of labels to LDH characters
75      * (ASCII Letters, Digits and Hyphen-Minus).
76      * For use in static worker and factory methods.
77      */
78     public static final int USE_STD3_RULES = 2;
79     /**
80      * IDNA option to check for whether the input conforms to the BiDi rules.
81      * For use in static worker and factory methods.
82      * <p>This option is ignored by the IDNA2003 implementation.
83      * (IDNA2003 always performs a BiDi check.)
84      */
85     public static final int CHECK_BIDI = 4;
86     /**
87      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
88      * For use in static worker and factory methods.
89      * <p>This option is ignored by the IDNA2003 implementation.
90      * (The CONTEXTJ check is new in IDNA2008.)
91      */
92     public static final int CHECK_CONTEXTJ = 8;
93     /**
94      * IDNA option for nontransitional processing in ToASCII().
95      * For use in static worker and factory methods.
96      * <p>By default, ToASCII() uses transitional processing.
97      * <p>This option is ignored by the IDNA2003 implementation.
98      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
99      */
100     public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
101     /**
102      * IDNA option for nontransitional processing in ToUnicode().
103      * For use in static worker and factory methods.
104      * <p>By default, ToUnicode() uses transitional processing.
105      * <p>This option is ignored by the IDNA2003 implementation.
106      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
107      */
108     public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
109     /**
110      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
111      * For use in static worker and factory methods.
112      * <p>This option is ignored by the IDNA2003 implementation.
113      * (The CONTEXTO check is new in IDNA2008.)
114      * <p>This is for use by registries for IDNA2008 conformance.
115      * UTS #46 does not require the CONTEXTO check.
116      */
117     public static final int CHECK_CONTEXTO = 0x40;
118 
119     /**
120      * Returns an IDNA instance which implements UTS #46.
121      * Returns an unmodifiable instance, owned by the caller.
122      * Cache it for multiple operations, and delete it when done.
123      * The instance is thread-safe, that is, it can be used concurrently.
124      * <p>
125      * UTS #46 defines Unicode IDNA Compatibility Processing,
126      * updated to the latest version of Unicode and compatible with both
127      * IDNA2003 and IDNA2008.
128      * <p>
129      * The worker functions use transitional processing, including deviation mappings,
130      * unless NONTRANSITIONAL_TO_ASCII or NONTRANSITIONAL_TO_UNICODE
131      * is used in which case the deviation characters are passed through without change.
132      * <p>
133      * Disallowed characters are mapped to U+FFFD.
134      * <p>
135      * Operations with the UTS #46 instance do not support the
136      * ALLOW_UNASSIGNED option.
137      * <p>
138      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
139      * When the USE_STD3_RULES option is used, ASCII characters other than
140      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
141      *
142      * @param options Bit set to modify the processing and error checking.
143      * @return the UTS #46 IDNA instance, if successful
144      */
getUTS46Instance(int options)145     public static IDNA getUTS46Instance(int options) {
146         return new UTS46(options);
147     }
148 
149     /**
150      * Converts a single domain name label into its ASCII form for DNS lookup.
151      * If any processing step fails, then info.hasErrors() will be true and
152      * the result might not be an ASCII string.
153      * The label might be modified according to the types of errors.
154      * Labels with severe errors will be left in (or turned into) their Unicode form.
155      *
156      * @param label Input domain name label
157      * @param dest Destination string object
158      * @param info Output container of IDNA processing details.
159      * @return dest
160      */
labelToASCII(CharSequence label, StringBuilder dest, Info info)161     public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
162 
163     /**
164      * Converts a single domain name label into its Unicode form for human-readable display.
165      * If any processing step fails, then info.hasErrors() will be true.
166      * The label might be modified according to the types of errors.
167      *
168      * @param label Input domain name label
169      * @param dest Destination string object
170      * @param info Output container of IDNA processing details.
171      * @return dest
172      */
labelToUnicode(CharSequence label, StringBuilder dest, Info info)173     public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
174 
175     /**
176      * Converts a whole domain name into its ASCII form for DNS lookup.
177      * If any processing step fails, then info.hasErrors() will be true and
178      * the result might not be an ASCII string.
179      * The domain name might be modified according to the types of errors.
180      * Labels with severe errors will be left in (or turned into) their Unicode form.
181      *
182      * @param name Input domain name
183      * @param dest Destination string object
184      * @param info Output container of IDNA processing details.
185      * @return dest
186      */
nameToASCII(CharSequence name, StringBuilder dest, Info info)187     public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
188 
189     /**
190      * Converts a whole domain name into its Unicode form for human-readable display.
191      * If any processing step fails, then info.hasErrors() will be true.
192      * The domain name might be modified according to the types of errors.
193      *
194      * @param name Input domain name
195      * @param dest Destination string object
196      * @param info Output container of IDNA processing details.
197      * @return dest
198      */
nameToUnicode(CharSequence name, StringBuilder dest, Info info)199     public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
200 
201     /**
202      * Output container for IDNA processing errors.
203      * The Info class is not suitable for subclassing.
204      */
205     public static final class Info {
206         /**
207          * Constructor.
208          */
Info()209         public Info() {
210             errors=EnumSet.noneOf(Error.class);
211             labelErrors=EnumSet.noneOf(Error.class);
212             isTransDiff=false;
213             isBiDi=false;
214             isOkBiDi=true;
215         }
216         /**
217          * Were there IDNA processing errors?
218          * @return true if there were processing errors
219          */
hasErrors()220         public boolean hasErrors() { return !errors.isEmpty(); }
221         /**
222          * Returns a set indicating IDNA processing errors.
223          * @return set of processing errors (modifiable, and not null)
224          */
getErrors()225         public Set<Error> getErrors() { return errors; }
226         /**
227          * Returns true if transitional and nontransitional processing produce different results.
228          * This is the case when the input label or domain name contains
229          * one or more deviation characters outside a Punycode label (see UTS #46).
230          * <ul>
231          * <li>With nontransitional processing, such characters are
232          * copied to the destination string.
233          * <li>With transitional processing, such characters are
234          * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
235          * </ul>
236          * @return true if transitional and nontransitional processing produce different results
237          */
isTransitionalDifferent()238         public boolean isTransitionalDifferent() { return isTransDiff; }
239 
reset()240         private void reset() {
241             errors.clear();
242             labelErrors.clear();
243             isTransDiff=false;
244             isBiDi=false;
245             isOkBiDi=true;
246         }
247 
248         private EnumSet<Error> errors, labelErrors;
249         private boolean isTransDiff;
250         private boolean isBiDi;
251         private boolean isOkBiDi;
252     }
253 
254     // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
255     // The IDNAInfo also provides intermediate state that is publicly invisible,
256     // avoiding the allocation of another worker object.
257     /**
258      * @deprecated This API is ICU internal only.
259      * @hide original deprecated declaration
260      * @hide draft / provisional / internal are hidden on Android
261      */
262     @Deprecated
resetInfo(Info info)263     protected static void resetInfo(Info info) {
264         info.reset();
265     }
266     /**
267      * @deprecated This API is ICU internal only.
268      * @hide original deprecated declaration
269      * @hide draft / provisional / internal are hidden on Android
270      */
271     @Deprecated
hasCertainErrors(Info info, EnumSet<Error> errors)272     protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
273         return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
274     }
275     /**
276      * @deprecated This API is ICU internal only.
277      * @hide original deprecated declaration
278      * @hide draft / provisional / internal are hidden on Android
279      */
280     @Deprecated
hasCertainLabelErrors(Info info, EnumSet<Error> errors)281     protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
282         return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
283     }
284     /**
285      * @deprecated This API is ICU internal only.
286      * @hide original deprecated declaration
287      * @hide draft / provisional / internal are hidden on Android
288      */
289     @Deprecated
addLabelError(Info info, Error error)290     protected static void addLabelError(Info info, Error error) {
291         info.labelErrors.add(error);
292     }
293     /**
294      * @deprecated This API is ICU internal only.
295      * @hide original deprecated declaration
296      * @hide draft / provisional / internal are hidden on Android
297      */
298     @Deprecated
promoteAndResetLabelErrors(Info info)299     protected static void promoteAndResetLabelErrors(Info info) {
300         if(!info.labelErrors.isEmpty()) {
301             info.errors.addAll(info.labelErrors);
302             info.labelErrors.clear();
303         }
304     }
305     /**
306      * @deprecated This API is ICU internal only.
307      * @hide original deprecated declaration
308      * @hide draft / provisional / internal are hidden on Android
309      */
310     @Deprecated
addError(Info info, Error error)311     protected static void addError(Info info, Error error) {
312         info.errors.add(error);
313     }
314     /**
315      * @deprecated This API is ICU internal only.
316      * @hide original deprecated declaration
317      * @hide draft / provisional / internal are hidden on Android
318      */
319     @Deprecated
setTransitionalDifferent(Info info)320     protected static void setTransitionalDifferent(Info info) {
321         info.isTransDiff=true;
322     }
323     /**
324      * @deprecated This API is ICU internal only.
325      * @hide original deprecated declaration
326      * @hide draft / provisional / internal are hidden on Android
327      */
328     @Deprecated
setBiDi(Info info)329     protected static void setBiDi(Info info) {
330         info.isBiDi=true;
331     }
332     /**
333      * @deprecated This API is ICU internal only.
334      * @hide original deprecated declaration
335      * @hide draft / provisional / internal are hidden on Android
336      */
337     @Deprecated
isBiDi(Info info)338     protected static boolean isBiDi(Info info) {
339         return info.isBiDi;
340     }
341     /**
342      * @deprecated This API is ICU internal only.
343      * @hide original deprecated declaration
344      * @hide draft / provisional / internal are hidden on Android
345      */
346     @Deprecated
setNotOkBiDi(Info info)347     protected static void setNotOkBiDi(Info info) {
348         info.isOkBiDi=false;
349     }
350     /**
351      * @deprecated This API is ICU internal only.
352      * @hide original deprecated declaration
353      * @hide draft / provisional / internal are hidden on Android
354      */
355     @Deprecated
isOkBiDi(Info info)356     protected static boolean isOkBiDi(Info info) {
357         return info.isOkBiDi;
358     }
359 
360     /**
361      * IDNA error bit set values.
362      * When a domain name or label fails a processing step or does not meet the
363      * validity criteria, then one or more of these error bits are set.
364      */
365     public static enum Error {
366         /**
367          * A non-final domain name label (or the whole domain name) is empty.
368          */
369         EMPTY_LABEL,
370         /**
371          * A domain name label is longer than 63 bytes.
372          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
373          * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
374          */
375         LABEL_TOO_LONG,
376         /**
377          * A domain name is longer than 255 bytes in its storage form.
378          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
379          * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
380          */
381         DOMAIN_NAME_TOO_LONG,
382         /**
383          * A label starts with a hyphen-minus ('-').
384          */
385         LEADING_HYPHEN,
386         /**
387          * A label ends with a hyphen-minus ('-').
388          */
389         TRAILING_HYPHEN,
390         /**
391          * A label contains hyphen-minus ('-') in the third and fourth positions.
392          */
393         HYPHEN_3_4,
394         /**
395          * A label starts with a combining mark.
396          */
397         LEADING_COMBINING_MARK,
398         /**
399          * A label or domain name contains disallowed characters.
400          */
401         DISALLOWED,
402         /**
403          * A label starts with "xn--" but does not contain valid Punycode.
404          * That is, an xn-- label failed Punycode decoding.
405          */
406         PUNYCODE,
407         /**
408          * A label contains a dot=full stop.
409          * This can occur in an input string for a single-label function.
410          */
411         LABEL_HAS_DOT,
412         /**
413          * An ACE label does not contain a valid label string.
414          * The label was successfully ACE (Punycode) decoded but the resulting
415          * string had severe validation errors. For example,
416          * it might contain characters that are not allowed in ACE labels,
417          * or it might not be normalized.
418          */
419         INVALID_ACE_LABEL,
420         /**
421          * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
422          */
423         BIDI,
424         /**
425          * A label does not meet the IDNA CONTEXTJ requirements.
426          */
427         CONTEXTJ,
428         /**
429          * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
430          * Some punctuation characters "Would otherwise have been DISALLOWED"
431          * but are allowed in certain contexts. (RFC 5892)
432          */
433         CONTEXTO_PUNCTUATION,
434         /**
435          * A label does not meet the IDNA CONTEXTO requirements for digits.
436          * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
437          */
438         CONTEXTO_DIGITS
439     }
440 
441     /**
442      * Sole constructor. (For invocation by subclass constructors, typically implicit.)
443      * @deprecated This API is ICU internal only.
444      * @hide original deprecated declaration
445      * @hide draft / provisional / internal are hidden on Android
446      */
447     @Deprecated
IDNA()448     protected IDNA() {
449     }
450 
451     /* IDNA2003 API ------------------------------------------------------------- */
452 
453     /**
454      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
455      * This operation is done on <b>single labels</b> before sending it to something that expects
456      * ASCII names. A label is an individual part of a domain name. Labels are usually
457      * separated by dots; e.g." "www.example.com" is composed of 3 labels
458      * "www","example", and "com".
459      *
460      * @param src       The input string to be processed
461      * @param options   A bit set of options:
462      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
463      *                              and do not use STD3 ASCII rules
464      *                              If unassigned code points are found the operation fails with
465      *                              StringPrepParseException.
466      *
467      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
468      *                              If this option is set, the unassigned code points are in the input
469      *                              are treated as normal Unicode code points.
470      *
471      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
472      *                              If this option is set and the input does not satisfy STD3 rules,
473      *                              the operation will fail with ParseException
474      * @return StringBuffer the converted String
475      * @throws StringPrepParseException When an error occurs for parsing a string.
476      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
477      * @hide original deprecated declaration
478      */
479     @Deprecated
convertToASCII(String src, int options)480     public static StringBuffer convertToASCII(String src, int options)
481         throws StringPrepParseException{
482         UCharacterIterator iter = UCharacterIterator.getInstance(src);
483         return convertToASCII(iter,options);
484     }
485 
486     /**
487      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
488      * This operation is done on <b>single labels</b> before sending it to something that expects
489      * ASCII names. A label is an individual part of a domain name. Labels are usually
490      * separated by dots; e.g." "www.example.com" is composed of 3 labels
491      * "www","example", and "com".
492      *
493      * @param src       The input string as StringBuffer to be processed
494      * @param options   A bit set of options:
495      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
496      *                              and do not use STD3 ASCII rules
497      *                              If unassigned code points are found the operation fails with
498      *                              ParseException.
499      *
500      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
501      *                              If this option is set, the unassigned code points are in the input
502      *                              are treated as normal Unicode code points.
503      *
504      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
505      *                              If this option is set and the input does not satisfy STD3 rules,
506      *                              the operation will fail with ParseException
507      * @return StringBuffer the converted String
508      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
509      * @hide original deprecated declaration
510      */
511     @Deprecated
convertToASCII(StringBuffer src, int options)512     public static StringBuffer convertToASCII(StringBuffer src, int options)
513         throws StringPrepParseException{
514         UCharacterIterator iter = UCharacterIterator.getInstance(src);
515         return convertToASCII(iter,options);
516     }
517 
518     /**
519      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
520      * This operation is done on <b>single labels</b> before sending it to something that expects
521      * ASCII names. A label is an individual part of a domain name. Labels are usually
522      * separated by dots; e.g." "www.example.com" is composed of 3 labels
523      * "www","example", and "com".
524      *
525      * @param src       The input string as UCharacterIterator to be processed
526      * @param options   A bit set of options:
527      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
528      *                              and do not use STD3 ASCII rules
529      *                              If unassigned code points are found the operation fails with
530      *                              ParseException.
531      *
532      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
533      *                              If this option is set, the unassigned code points are in the input
534      *                              are treated as normal Unicode code points.
535      *
536      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
537      *                              If this option is set and the input does not satisfy STD3 rules,
538      *                              the operation will fail with ParseException
539      * @return StringBuffer the converted String
540      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
541      * @hide original deprecated declaration
542      */
543     @Deprecated
convertToASCII(UCharacterIterator src, int options)544     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
545                 throws StringPrepParseException{
546         return IDNA2003.convertToASCII(src, options);
547     }
548 
549     /**
550      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
551      * This operation is done on complete domain names, e.g: "www.example.com".
552      * It is important to note that this operation can fail. If it fails, then the input
553      * domain name cannot be used as an Internationalized Domain Name and the application
554      * should have methods defined to deal with the failure.
555      *
556      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
557      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
558      * and then convert. This function does not offer that level of granularity. The options once
559      * set will apply to all labels in the domain name
560      *
561      * @param src       The input string as UCharacterIterator to be processed
562      * @param options   A bit set of options:
563      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
564      *                              and do not use STD3 ASCII rules
565      *                              If unassigned code points are found the operation fails with
566      *                              ParseException.
567      *
568      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
569      *                              If this option is set, the unassigned code points are in the input
570      *                              are treated as normal Unicode code points.
571      *
572      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
573      *                              If this option is set and the input does not satisfy STD3 rules,
574      *                              the operation will fail with ParseException
575      * @return StringBuffer the converted String
576      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
577      * @hide original deprecated declaration
578      */
579     @Deprecated
convertIDNToASCII(UCharacterIterator src, int options)580     public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
581             throws StringPrepParseException{
582         return convertIDNToASCII(src.getText(), options);
583     }
584 
585     /**
586      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
587      * This operation is done on complete domain names, e.g: "www.example.com".
588      * It is important to note that this operation can fail. If it fails, then the input
589      * domain name cannot be used as an Internationalized Domain Name and the application
590      * should have methods defined to deal with the failure.
591      *
592      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
593      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
594      * and then convert. This function does not offer that level of granularity. The options once
595      * set will apply to all labels in the domain name
596      *
597      * @param src       The input string as a StringBuffer to be processed
598      * @param options   A bit set of options:
599      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
600      *                              and do not use STD3 ASCII rules
601      *                              If unassigned code points are found the operation fails with
602      *                              ParseException.
603      *
604      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
605      *                              If this option is set, the unassigned code points are in the input
606      *                              are treated as normal Unicode code points.
607      *
608      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
609      *                              If this option is set and the input does not satisfy STD3 rules,
610      *                              the operation will fail with ParseException
611      * @return StringBuffer the converted String
612      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
613      * @hide original deprecated declaration
614      */
615     @Deprecated
convertIDNToASCII(StringBuffer src, int options)616     public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
617             throws StringPrepParseException{
618             return convertIDNToASCII(src.toString(), options);
619     }
620 
621     /**
622      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
623      * This operation is done on complete domain names, e.g: "www.example.com".
624      * It is important to note that this operation can fail. If it fails, then the input
625      * domain name cannot be used as an Internationalized Domain Name and the application
626      * should have methods defined to deal with the failure.
627      *
628      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
629      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
630      * and then convert. This function does not offer that level of granularity. The options once
631      * set will apply to all labels in the domain name
632      *
633      * @param src       The input string to be processed
634      * @param options   A bit set of options:
635      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
636      *                              and do not use STD3 ASCII rules
637      *                              If unassigned code points are found the operation fails with
638      *                              ParseException.
639      *
640      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
641      *                              If this option is set, the unassigned code points are in the input
642      *                              are treated as normal Unicode code points.
643      *
644      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
645      *                              If this option is set and the input does not satisfy STD3 rules,
646      *                              the operation will fail with ParseException
647      * @return StringBuffer the converted String
648      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
649      * @hide original deprecated declaration
650      */
651     @Deprecated
convertIDNToASCII(String src,int options)652     public static StringBuffer convertIDNToASCII(String src,int options)
653             throws StringPrepParseException{
654         return IDNA2003.convertIDNToASCII(src, options);
655     }
656 
657 
658     /**
659      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
660      * This operation is done on <b>single labels</b> before sending it to something that expects
661      * Unicode names. A label is an individual part of a domain name. Labels are usually
662      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
663      * "www","example", and "com".
664      *
665      * @param src       The input string to be processed
666      * @param options   A bit set of options:
667      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
668      *                              and do not use STD3 ASCII rules
669      *                              If unassigned code points are found the operation fails with
670      *                              ParseException.
671      *
672      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
673      *                              If this option is set, the unassigned code points are in the input
674      *                              are treated as normal Unicode code points.
675      *
676      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
677      *                              If this option is set and the input does not satisfy STD3 rules,
678      *                              the operation will fail with ParseException
679      * @return StringBuffer the converted String
680      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
681      * @hide original deprecated declaration
682      */
683     @Deprecated
convertToUnicode(String src, int options)684     public static StringBuffer convertToUnicode(String src, int options)
685            throws StringPrepParseException{
686         UCharacterIterator iter = UCharacterIterator.getInstance(src);
687         return convertToUnicode(iter,options);
688     }
689 
690     /**
691      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
692      * This operation is done on <b>single labels</b> before sending it to something that expects
693      * Unicode names. A label is an individual part of a domain name. Labels are usually
694      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
695      * "www","example", and "com".
696      *
697      * @param src       The input string as StringBuffer to be processed
698      * @param options   A bit set of options:
699      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
700      *                              and do not use STD3 ASCII rules
701      *                              If unassigned code points are found the operation fails with
702      *                              ParseException.
703      *
704      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
705      *                              If this option is set, the unassigned code points are in the input
706      *                              are treated as normal Unicode code points.
707      *
708      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
709      *                              If this option is set and the input does not satisfy STD3 rules,
710      *                              the operation will fail with ParseException
711      * @return StringBuffer the converted String
712      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
713      * @hide original deprecated declaration
714      */
715     @Deprecated
convertToUnicode(StringBuffer src, int options)716     public static StringBuffer convertToUnicode(StringBuffer src, int options)
717            throws StringPrepParseException{
718         UCharacterIterator iter = UCharacterIterator.getInstance(src);
719         return convertToUnicode(iter,options);
720     }
721 
722     /**
723      * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
724      * This operation is done on <b>single labels</b> before sending it to something that expects
725      * Unicode names. A label is an individual part of a domain name. Labels are usually
726      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
727      * "www","example", and "com".
728      *
729      * @param src       The input string as UCharacterIterator to be processed
730      * @param options   A bit set of options:
731      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
732      *                              and do not use STD3 ASCII rules
733      *                              If unassigned code points are found the operation fails with
734      *                              ParseException.
735      *
736      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
737      *                              If this option is set, the unassigned code points are in the input
738      *                              are treated as normal Unicode code points.
739      *
740      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
741      *                              If this option is set and the input does not satisfy STD3 rules,
742      *                              the operation will fail with ParseException
743      * @return StringBuffer the converted String
744      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
745      * @hide original deprecated declaration
746      */
747     @Deprecated
convertToUnicode(UCharacterIterator src, int options)748     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
749            throws StringPrepParseException{
750         return IDNA2003.convertToUnicode(src, options);
751     }
752 
753     /**
754      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
755      * This operation is done on complete domain names, e.g: "www.example.com".
756      *
757      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
758      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
759      * and then convert. This function does not offer that level of granularity. The options once
760      * set will apply to all labels in the domain name
761      *
762      * @param src       The input string as UCharacterIterator to be processed
763      * @param options   A bit set of options:
764      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
765      *                              and do not use STD3 ASCII rules
766      *                              If unassigned code points are found the operation fails with
767      *                              ParseException.
768      *
769      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
770      *                              If this option is set, the unassigned code points are in the input
771      *                              are treated as normal Unicode code points.
772      *
773      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
774      *                              If this option is set and the input does not satisfy STD3 rules,
775      *                              the operation will fail with ParseException
776      * @return StringBuffer the converted String
777      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
778      * @hide original deprecated declaration
779      */
780     @Deprecated
convertIDNToUnicode(UCharacterIterator src, int options)781     public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options)
782         throws StringPrepParseException{
783         return convertIDNToUnicode(src.getText(), options);
784     }
785 
786     /**
787      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
788      * This operation is done on complete domain names, e.g: "www.example.com".
789      *
790      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
791      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
792      * and then convert. This function does not offer that level of granularity. The options once
793      * set will apply to all labels in the domain name
794      *
795      * @param src       The input string as StringBuffer to be processed
796      * @param options   A bit set of options:
797      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
798      *                              and do not use STD3 ASCII rules
799      *                              If unassigned code points are found the operation fails with
800      *                              ParseException.
801      *
802      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
803      *                              If this option is set, the unassigned code points are in the input
804      *                              are treated as normal Unicode code points.
805      *
806      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
807      *                              If this option is set and the input does not satisfy STD3 rules,
808      *                              the operation will fail with ParseException
809      * @return StringBuffer the converted String
810      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
811      * @hide original deprecated declaration
812      */
813     @Deprecated
convertIDNToUnicode(StringBuffer src, int options)814     public static StringBuffer convertIDNToUnicode(StringBuffer src, int options)
815         throws StringPrepParseException{
816         return convertIDNToUnicode(src.toString(), options);
817     }
818 
819     /**
820      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
821      * This operation is done on complete domain names, e.g: "www.example.com".
822      *
823      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
824      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
825      * and then convert. This function does not offer that level of granularity. The options once
826      * set will apply to all labels in the domain name
827      *
828      * @param src       The input string to be processed
829      * @param options   A bit set of options:
830      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
831      *                              and do not use STD3 ASCII rules
832      *                              If unassigned code points are found the operation fails with
833      *                              ParseException.
834      *
835      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
836      *                              If this option is set, the unassigned code points are in the input
837      *                              are treated as normal Unicode code points.
838      *
839      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
840      *                              If this option is set and the input does not satisfy STD3 rules,
841      *                              the operation will fail with ParseException
842      * @return StringBuffer the converted String
843      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
844      * @hide original deprecated declaration
845      */
846     @Deprecated
convertIDNToUnicode(String src, int options)847     public static StringBuffer convertIDNToUnicode(String src, int options)
848             throws StringPrepParseException{
849         return IDNA2003.convertIDNToUnicode(src, options);
850     }
851 
852     /**
853      * IDNA2003: Compare two IDN strings for equivalence.
854      * This function splits the domain names into labels and compares them.
855      * According to IDN RFC, whenever two labels are compared, they are
856      * considered equal if and only if their ASCII forms (obtained by
857      * applying toASCII) match using an case-insensitive ASCII comparison.
858      * Two domain names are considered a match if and only if all labels
859      * match regardless of whether label separators match.
860      *
861      * @param s1        First IDN string as StringBuffer
862      * @param s2        Second IDN string as StringBuffer
863      * @param options   A bit set of options:
864      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
865      *                              and do not use STD3 ASCII rules
866      *                              If unassigned code points are found the operation fails with
867      *                              ParseException.
868      *
869      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
870      *                              If this option is set, the unassigned code points are in the input
871      *                              are treated as normal Unicode code points.
872      *
873      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
874      *                              If this option is set and the input does not satisfy STD3 rules,
875      *                              the operation will fail with ParseException
876      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
877      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
878      * @hide original deprecated declaration
879      */
880     @Deprecated
compare(StringBuffer s1, StringBuffer s2, int options)881     public static int compare(StringBuffer s1, StringBuffer s2, int options)
882         throws StringPrepParseException{
883         if(s1==null || s2 == null){
884             throw new IllegalArgumentException("One of the source buffers is null");
885         }
886         return IDNA2003.compare(s1.toString(), s2.toString(), options);
887     }
888 
889     /**
890      * IDNA2003: Compare two IDN strings for equivalence.
891      * This function splits the domain names into labels and compares them.
892      * According to IDN RFC, whenever two labels are compared, they are
893      * considered equal if and only if their ASCII forms (obtained by
894      * applying toASCII) match using an case-insensitive ASCII comparison.
895      * Two domain names are considered a match if and only if all labels
896      * match regardless of whether label separators match.
897      *
898      * @param s1        First IDN string
899      * @param s2        Second IDN string
900      * @param options   A bit set of options:
901      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
902      *                              and do not use STD3 ASCII rules
903      *                              If unassigned code points are found the operation fails with
904      *                              ParseException.
905      *
906      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
907      *                              If this option is set, the unassigned code points are in the input
908      *                              are treated as normal Unicode code points.
909      *
910      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
911      *                              If this option is set and the input does not satisfy STD3 rules,
912      *                              the operation will fail with ParseException
913      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
914      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
915      * @hide original deprecated declaration
916      */
917     @Deprecated
compare(String s1, String s2, int options)918     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
919         if(s1==null || s2 == null){
920             throw new IllegalArgumentException("One of the source buffers is null");
921         }
922         return IDNA2003.compare(s1, s2, options);
923     }
924     /**
925      * IDNA2003: Compare two IDN strings for equivalence.
926      * This function splits the domain names into labels and compares them.
927      * According to IDN RFC, whenever two labels are compared, they are
928      * considered equal if and only if their ASCII forms (obtained by
929      * applying toASCII) match using an case-insensitive ASCII comparison.
930      * Two domain names are considered a match if and only if all labels
931      * match regardless of whether label separators match.
932      *
933      * @param s1        First IDN string as UCharacterIterator
934      * @param s2        Second IDN string as UCharacterIterator
935      * @param options   A bit set of options:
936      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
937      *                              and do not use STD3 ASCII rules
938      *                              If unassigned code points are found the operation fails with
939      *                              ParseException.
940      *
941      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
942      *                              If this option is set, the unassigned code points are in the input
943      *                              are treated as normal Unicode code points.
944      *
945      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
946      *                              If this option is set and the input does not satisfy STD3 rules,
947      *                              the operation will fail with ParseException
948      * @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
949      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
950      * @hide original deprecated declaration
951      */
952     @Deprecated
compare(UCharacterIterator s1, UCharacterIterator s2, int options)953     public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
954         throws StringPrepParseException{
955         if(s1==null || s2 == null){
956             throw new IllegalArgumentException("One of the source buffers is null");
957         }
958         return IDNA2003.compare(s1.getText(), s2.getText(), options);
959     }
960 }
961