1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2014, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  uidna.h
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #ifndef __UIDNA_H__
18 #define __UIDNA_H__
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_IDNA
23 
24 #include "unicode/localpointer.h"
25 #include "unicode/parseerr.h"
26 
27 /**
28  * \file
29  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
30  *
31  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
32  *
33  * The C API functions which do take a UIDNA * service object pointer
34  * implement UTS #46 and IDNA2008.
35  *
36  * IDNA2003 is obsolete.
37  * The C API functions which do not take a service object pointer
38  * implement IDNA2003. They are all deprecated.
39  */
40 
41 /*
42  * IDNA option bit set values.
43  */
44 enum {
45     /**
46      * Default options value: None of the other options are set.
47      * For use in static worker and factory methods.
48      * @stable ICU 2.6
49      */
50     UIDNA_DEFAULT=0,
51 #ifndef U_HIDE_DEPRECATED_API
52     /**
53      * Option to allow unassigned code points in domain names and labels.
54      * For use in static worker and factory methods.
55      * <p>This option is ignored by the UTS46 implementation.
56      * (UTS #46 disallows unassigned code points.)
57      * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
58      */
59     UIDNA_ALLOW_UNASSIGNED=1,
60 #endif  /* U_HIDE_DEPRECATED_API */
61     /**
62      * Option to check whether the input conforms to the STD3 ASCII rules,
63      * for example the restriction of labels to LDH characters
64      * (ASCII Letters, Digits and Hyphen-Minus).
65      * For use in static worker and factory methods.
66      * @stable ICU 2.6
67      */
68     UIDNA_USE_STD3_RULES=2,
69     /**
70      * IDNA option to check for whether the input conforms to the BiDi rules.
71      * For use in static worker and factory methods.
72      * <p>This option is ignored by the IDNA2003 implementation.
73      * (IDNA2003 always performs a BiDi check.)
74      * @stable ICU 4.6
75      */
76     UIDNA_CHECK_BIDI=4,
77     /**
78      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
79      * For use in static worker and factory methods.
80      * <p>This option is ignored by the IDNA2003 implementation.
81      * (The CONTEXTJ check is new in IDNA2008.)
82      * @stable ICU 4.6
83      */
84     UIDNA_CHECK_CONTEXTJ=8,
85     /**
86      * IDNA option for nontransitional processing in ToASCII().
87      * For use in static worker and factory methods.
88      * <p>By default, ToASCII() uses transitional processing.
89      * <p>This option is ignored by the IDNA2003 implementation.
90      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
91      * @stable ICU 4.6
92      */
93     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
94     /**
95      * IDNA option for nontransitional processing in ToUnicode().
96      * For use in static worker and factory methods.
97      * <p>By default, ToUnicode() uses transitional processing.
98      * <p>This option is ignored by the IDNA2003 implementation.
99      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
100      * @stable ICU 4.6
101      */
102     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
103     /**
104      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
105      * For use in static worker and factory methods.
106      * <p>This option is ignored by the IDNA2003 implementation.
107      * (The CONTEXTO check is new in IDNA2008.)
108      * <p>This is for use by registries for IDNA2008 conformance.
109      * UTS #46 does not require the CONTEXTO check.
110      * @stable ICU 49
111      */
112     UIDNA_CHECK_CONTEXTO=0x40
113 };
114 
115 /**
116  * Opaque C service object type for the new IDNA API.
117  * @stable ICU 4.6
118  */
119 struct UIDNA;
120 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
121 
122 /**
123  * Returns a UIDNA instance which implements UTS #46.
124  * Returns an unmodifiable instance, owned by the caller.
125  * Cache it for multiple operations, and uidna_close() it when done.
126  * The instance is thread-safe, that is, it can be used concurrently.
127  *
128  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
129  *
130  * @param options Bit set to modify the processing and error checking.
131  *                See option bit set values in uidna.h.
132  * @param pErrorCode Standard ICU error code. Its input value must
133  *                  pass the U_SUCCESS() test, or else the function returns
134  *                  immediately. Check for U_FAILURE() on output or use with
135  *                  function chaining. (See User Guide for details.)
136  * @return the UTS #46 UIDNA instance, if successful
137  * @stable ICU 4.6
138  */
139 U_STABLE UIDNA * U_EXPORT2
140 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
141 
142 /**
143  * Closes a UIDNA instance.
144  * @param idna UIDNA instance to be closed
145  * @stable ICU 4.6
146  */
147 U_STABLE void U_EXPORT2
148 uidna_close(UIDNA *idna);
149 
150 #if U_SHOW_CPLUSPLUS_API
151 
152 U_NAMESPACE_BEGIN
153 
154 /**
155  * \class LocalUIDNAPointer
156  * "Smart pointer" class, closes a UIDNA via uidna_close().
157  * For most methods see the LocalPointerBase base class.
158  *
159  * @see LocalPointerBase
160  * @see LocalPointer
161  * @stable ICU 4.6
162  */
163 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
164 
165 U_NAMESPACE_END
166 
167 #endif
168 
169 /**
170  * Output container for IDNA processing errors.
171  * Initialize with UIDNA_INFO_INITIALIZER:
172  * \code
173  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
174  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
175  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
176  * \endcode
177  * @stable ICU 4.6
178  */
179 typedef struct UIDNAInfo {
180     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
181     int16_t size;
182     /**
183      * Set to TRUE if transitional and nontransitional processing produce different results.
184      * For details see C++ IDNAInfo::isTransitionalDifferent().
185      * @stable ICU 4.6
186      */
187     UBool isTransitionalDifferent;
188     UBool reservedB3;  /**< Reserved field, do not use. @internal */
189     /**
190      * Bit set indicating IDNA processing errors. 0 if no errors.
191      * See UIDNA_ERROR_... constants.
192      * @stable ICU 4.6
193      */
194     uint32_t errors;
195     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
196     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
197 } UIDNAInfo;
198 
199 /**
200  * Static initializer for a UIDNAInfo struct.
201  * @stable ICU 4.6
202  */
203 #define UIDNA_INFO_INITIALIZER { \
204     (int16_t)sizeof(UIDNAInfo), \
205     FALSE, FALSE, \
206     0, 0, 0 }
207 
208 /**
209  * Converts a single domain name label into its ASCII form for DNS lookup.
210  * If any processing step fails, then pInfo->errors will be non-zero and
211  * the result might not be an ASCII string.
212  * The label might be modified according to the types of errors.
213  * Labels with severe errors will be left in (or turned into) their Unicode form.
214  *
215  * The UErrorCode indicates an error only in exceptional cases,
216  * such as a U_MEMORY_ALLOCATION_ERROR.
217  *
218  * @param idna UIDNA instance
219  * @param label Input domain name label
220  * @param length Label length, or -1 if NUL-terminated
221  * @param dest Destination string buffer
222  * @param capacity Destination buffer capacity
223  * @param pInfo Output container of IDNA processing details.
224  * @param pErrorCode Standard ICU error code. Its input value must
225  *                  pass the U_SUCCESS() test, or else the function returns
226  *                  immediately. Check for U_FAILURE() on output or use with
227  *                  function chaining. (See User Guide for details.)
228  * @return destination string length
229  * @stable ICU 4.6
230  */
231 U_STABLE int32_t U_EXPORT2
232 uidna_labelToASCII(const UIDNA *idna,
233                    const UChar *label, int32_t length,
234                    UChar *dest, int32_t capacity,
235                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
236 
237 /**
238  * Converts a single domain name label into its Unicode form for human-readable display.
239  * If any processing step fails, then pInfo->errors will be non-zero.
240  * The label might be modified according to the types of errors.
241  *
242  * The UErrorCode indicates an error only in exceptional cases,
243  * such as a U_MEMORY_ALLOCATION_ERROR.
244  *
245  * @param idna UIDNA instance
246  * @param label Input domain name label
247  * @param length Label length, or -1 if NUL-terminated
248  * @param dest Destination string buffer
249  * @param capacity Destination buffer capacity
250  * @param pInfo Output container of IDNA processing details.
251  * @param pErrorCode Standard ICU error code. Its input value must
252  *                  pass the U_SUCCESS() test, or else the function returns
253  *                  immediately. Check for U_FAILURE() on output or use with
254  *                  function chaining. (See User Guide for details.)
255  * @return destination string length
256  * @stable ICU 4.6
257  */
258 U_STABLE int32_t U_EXPORT2
259 uidna_labelToUnicode(const UIDNA *idna,
260                      const UChar *label, int32_t length,
261                      UChar *dest, int32_t capacity,
262                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
263 
264 /**
265  * Converts a whole domain name into its ASCII form for DNS lookup.
266  * If any processing step fails, then pInfo->errors will be non-zero and
267  * the result might not be an ASCII string.
268  * The domain name might be modified according to the types of errors.
269  * Labels with severe errors will be left in (or turned into) their Unicode form.
270  *
271  * The UErrorCode indicates an error only in exceptional cases,
272  * such as a U_MEMORY_ALLOCATION_ERROR.
273  *
274  * @param idna UIDNA instance
275  * @param name Input domain name
276  * @param length Domain name length, or -1 if NUL-terminated
277  * @param dest Destination string buffer
278  * @param capacity Destination buffer capacity
279  * @param pInfo Output container of IDNA processing details.
280  * @param pErrorCode Standard ICU error code. Its input value must
281  *                  pass the U_SUCCESS() test, or else the function returns
282  *                  immediately. Check for U_FAILURE() on output or use with
283  *                  function chaining. (See User Guide for details.)
284  * @return destination string length
285  * @stable ICU 4.6
286  */
287 U_STABLE int32_t U_EXPORT2
288 uidna_nameToASCII(const UIDNA *idna,
289                   const UChar *name, int32_t length,
290                   UChar *dest, int32_t capacity,
291                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
292 
293 /**
294  * Converts a whole domain name into its Unicode form for human-readable display.
295  * If any processing step fails, then pInfo->errors will be non-zero.
296  * The domain name might be modified according to the types of errors.
297  *
298  * The UErrorCode indicates an error only in exceptional cases,
299  * such as a U_MEMORY_ALLOCATION_ERROR.
300  *
301  * @param idna UIDNA instance
302  * @param name Input domain name
303  * @param length Domain name length, or -1 if NUL-terminated
304  * @param dest Destination string buffer
305  * @param capacity Destination buffer capacity
306  * @param pInfo Output container of IDNA processing details.
307  * @param pErrorCode Standard ICU error code. Its input value must
308  *                  pass the U_SUCCESS() test, or else the function returns
309  *                  immediately. Check for U_FAILURE() on output or use with
310  *                  function chaining. (See User Guide for details.)
311  * @return destination string length
312  * @stable ICU 4.6
313  */
314 U_STABLE int32_t U_EXPORT2
315 uidna_nameToUnicode(const UIDNA *idna,
316                     const UChar *name, int32_t length,
317                     UChar *dest, int32_t capacity,
318                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
319 
320 /* UTF-8 versions of the processing methods --------------------------------- */
321 
322 /**
323  * Converts a single domain name label into its ASCII form for DNS lookup.
324  * UTF-8 version of uidna_labelToASCII(), same behavior.
325  *
326  * @param idna UIDNA instance
327  * @param label Input domain name label
328  * @param length Label length, or -1 if NUL-terminated
329  * @param dest Destination string buffer
330  * @param capacity Destination buffer capacity
331  * @param pInfo Output container of IDNA processing details.
332  * @param pErrorCode Standard ICU error code. Its input value must
333  *                  pass the U_SUCCESS() test, or else the function returns
334  *                  immediately. Check for U_FAILURE() on output or use with
335  *                  function chaining. (See User Guide for details.)
336  * @return destination string length
337  * @stable ICU 4.6
338  */
339 U_STABLE int32_t U_EXPORT2
340 uidna_labelToASCII_UTF8(const UIDNA *idna,
341                         const char *label, int32_t length,
342                         char *dest, int32_t capacity,
343                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
344 
345 /**
346  * Converts a single domain name label into its Unicode form for human-readable display.
347  * UTF-8 version of uidna_labelToUnicode(), same behavior.
348  *
349  * @param idna UIDNA instance
350  * @param label Input domain name label
351  * @param length Label length, or -1 if NUL-terminated
352  * @param dest Destination string buffer
353  * @param capacity Destination buffer capacity
354  * @param pInfo Output container of IDNA processing details.
355  * @param pErrorCode Standard ICU error code. Its input value must
356  *                  pass the U_SUCCESS() test, or else the function returns
357  *                  immediately. Check for U_FAILURE() on output or use with
358  *                  function chaining. (See User Guide for details.)
359  * @return destination string length
360  * @stable ICU 4.6
361  */
362 U_STABLE int32_t U_EXPORT2
363 uidna_labelToUnicodeUTF8(const UIDNA *idna,
364                          const char *label, int32_t length,
365                          char *dest, int32_t capacity,
366                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
367 
368 /**
369  * Converts a whole domain name into its ASCII form for DNS lookup.
370  * UTF-8 version of uidna_nameToASCII(), same behavior.
371  *
372  * @param idna UIDNA instance
373  * @param name Input domain name
374  * @param length Domain name length, or -1 if NUL-terminated
375  * @param dest Destination string buffer
376  * @param capacity Destination buffer capacity
377  * @param pInfo Output container of IDNA processing details.
378  * @param pErrorCode Standard ICU error code. Its input value must
379  *                  pass the U_SUCCESS() test, or else the function returns
380  *                  immediately. Check for U_FAILURE() on output or use with
381  *                  function chaining. (See User Guide for details.)
382  * @return destination string length
383  * @stable ICU 4.6
384  */
385 U_STABLE int32_t U_EXPORT2
386 uidna_nameToASCII_UTF8(const UIDNA *idna,
387                        const char *name, int32_t length,
388                        char *dest, int32_t capacity,
389                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
390 
391 /**
392  * Converts a whole domain name into its Unicode form for human-readable display.
393  * UTF-8 version of uidna_nameToUnicode(), same behavior.
394  *
395  * @param idna UIDNA instance
396  * @param name Input domain name
397  * @param length Domain name length, or -1 if NUL-terminated
398  * @param dest Destination string buffer
399  * @param capacity Destination buffer capacity
400  * @param pInfo Output container of IDNA processing details.
401  * @param pErrorCode Standard ICU error code. Its input value must
402  *                  pass the U_SUCCESS() test, or else the function returns
403  *                  immediately. Check for U_FAILURE() on output or use with
404  *                  function chaining. (See User Guide for details.)
405  * @return destination string length
406  * @stable ICU 4.6
407  */
408 U_STABLE int32_t U_EXPORT2
409 uidna_nameToUnicodeUTF8(const UIDNA *idna,
410                         const char *name, int32_t length,
411                         char *dest, int32_t capacity,
412                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
413 
414 /*
415  * IDNA error bit set values.
416  * When a domain name or label fails a processing step or does not meet the
417  * validity criteria, then one or more of these error bits are set.
418  */
419 enum {
420     /**
421      * A non-final domain name label (or the whole domain name) is empty.
422      * @stable ICU 4.6
423      */
424     UIDNA_ERROR_EMPTY_LABEL=1,
425     /**
426      * A domain name label is longer than 63 bytes.
427      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
428      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
429      * @stable ICU 4.6
430      */
431     UIDNA_ERROR_LABEL_TOO_LONG=2,
432     /**
433      * A domain name is longer than 255 bytes in its storage form.
434      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
435      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
436      * @stable ICU 4.6
437      */
438     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
439     /**
440      * A label starts with a hyphen-minus ('-').
441      * @stable ICU 4.6
442      */
443     UIDNA_ERROR_LEADING_HYPHEN=8,
444     /**
445      * A label ends with a hyphen-minus ('-').
446      * @stable ICU 4.6
447      */
448     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
449     /**
450      * A label contains hyphen-minus ('-') in the third and fourth positions.
451      * @stable ICU 4.6
452      */
453     UIDNA_ERROR_HYPHEN_3_4=0x20,
454     /**
455      * A label starts with a combining mark.
456      * @stable ICU 4.6
457      */
458     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
459     /**
460      * A label or domain name contains disallowed characters.
461      * @stable ICU 4.6
462      */
463     UIDNA_ERROR_DISALLOWED=0x80,
464     /**
465      * A label starts with "xn--" but does not contain valid Punycode.
466      * That is, an xn-- label failed Punycode decoding.
467      * @stable ICU 4.6
468      */
469     UIDNA_ERROR_PUNYCODE=0x100,
470     /**
471      * A label contains a dot=full stop.
472      * This can occur in an input string for a single-label function.
473      * @stable ICU 4.6
474      */
475     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
476     /**
477      * An ACE label does not contain a valid label string.
478      * The label was successfully ACE (Punycode) decoded but the resulting
479      * string had severe validation errors. For example,
480      * it might contain characters that are not allowed in ACE labels,
481      * or it might not be normalized.
482      * @stable ICU 4.6
483      */
484     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
485     /**
486      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
487      * @stable ICU 4.6
488      */
489     UIDNA_ERROR_BIDI=0x800,
490     /**
491      * A label does not meet the IDNA CONTEXTJ requirements.
492      * @stable ICU 4.6
493      */
494     UIDNA_ERROR_CONTEXTJ=0x1000,
495     /**
496      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
497      * Some punctuation characters "Would otherwise have been DISALLOWED"
498      * but are allowed in certain contexts. (RFC 5892)
499      * @stable ICU 49
500      */
501     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
502     /**
503      * A label does not meet the IDNA CONTEXTO requirements for digits.
504      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
505      * @stable ICU 49
506      */
507     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
508 };
509 
510 #ifndef U_HIDE_DEPRECATED_API
511 
512 /* IDNA2003 API ------------------------------------------------------------- */
513 
514 /**
515  * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
516  * This operation is done on <b>single labels</b> before sending it to something that expects
517  * ASCII names. A label is an individual part of a domain name. Labels are usually
518  * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
519  *
520  * IDNA2003 API Overview:
521  *
522  * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
523  * (http://www.ietf.org/rfc/rfc3490.txt).
524  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
525  * containing non-ASCII code points are processed by the
526  * ToASCII operation before passing it to resolver libraries. Domain names
527  * that are obtained from resolver libraries are processed by the
528  * ToUnicode operation before displaying the domain name to the user.
529  * IDNA requires that implementations process input strings with Nameprep
530  * (http://www.ietf.org/rfc/rfc3491.txt),
531  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
532  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
533  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
534  * neither Nameprep nor Punycode are optional.
535  * The input and output of ToASCII and ToUnicode operations are Unicode
536  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
537  * multiple times to an input string will yield the same result as applying the operation
538  * once.
539  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
540  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
541  *
542  * @param src               Input UChar array containing label in Unicode.
543  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
544  * @param dest              Output UChar array with ASCII (ACE encoded) label.
545  * @param destCapacity      Size of dest.
546  * @param options           A bit set of options:
547  *
548  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
549  *                              and do not use STD3 ASCII rules
550  *                              If unassigned code points are found the operation fails with
551  *                              U_UNASSIGNED_ERROR error code.
552  *
553  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
554  *                              If this option is set, the unassigned code points are in the input
555  *                              are treated as normal Unicode code points.
556  *
557  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
558  *                              If this option is set and the input does not satisfy STD3 rules,
559  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
560  *
561  * @param parseError        Pointer to UParseError struct to receive information on position
562  *                          of error if an error is encountered. Can be NULL.
563  * @param status            ICU in/out error code parameter.
564  *                          U_INVALID_CHAR_FOUND if src contains
565  *                          unmatched single surrogates.
566  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
567  *                          too many code points.
568  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
569  * @return The length of the result string, if successful - or in case of a buffer overflow,
570  *         in which case it will be greater than destCapacity.
571  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
572  */
573 U_DEPRECATED int32_t U_EXPORT2
574 uidna_toASCII(const UChar* src, int32_t srcLength,
575               UChar* dest, int32_t destCapacity,
576               int32_t options,
577               UParseError* parseError,
578               UErrorCode* status);
579 
580 
581 /**
582  * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
583  * This operation is done on <b>single labels</b> before sending it to something that expects
584  * Unicode names. A label is an individual part of a domain name. Labels are usually
585  * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
586  *
587  * @param src               Input UChar array containing ASCII (ACE encoded) label.
588  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
589  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
590  * @param destCapacity      Size of dest.
591  * @param options           A bit set of options:
592  *
593  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
594  *                              and do not use STD3 ASCII rules
595  *                              If unassigned code points are found the operation fails with
596  *                              U_UNASSIGNED_ERROR error code.
597  *
598  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
599  *                              If this option is set, the unassigned code points are in the input
600  *                              are treated as normal Unicode code points. <b> Note: </b> This option is
601  *                              required on toUnicode operation because the RFC mandates
602  *                              verification of decoded ACE input by applying toASCII and comparing
603  *                              its output with source
604  *
605  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
606  *                              If this option is set and the input does not satisfy STD3 rules,
607  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
608  *
609  * @param parseError        Pointer to UParseError struct to receive information on position
610  *                          of error if an error is encountered. Can be NULL.
611  * @param status            ICU in/out error code parameter.
612  *                          U_INVALID_CHAR_FOUND if src contains
613  *                          unmatched single surrogates.
614  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
615  *                          too many code points.
616  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
617  * @return The length of the result string, if successful - or in case of a buffer overflow,
618  *         in which case it will be greater than destCapacity.
619  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
620  */
621 U_DEPRECATED int32_t U_EXPORT2
622 uidna_toUnicode(const UChar* src, int32_t srcLength,
623                 UChar* dest, int32_t destCapacity,
624                 int32_t options,
625                 UParseError* parseError,
626                 UErrorCode* status);
627 
628 
629 /**
630  * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
631  * This operation is done on complete domain names, e.g: "www.example.com".
632  * It is important to note that this operation can fail. If it fails, then the input
633  * domain name cannot be used as an Internationalized Domain Name and the application
634  * should have methods defined to deal with the failure.
635  *
636  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
637  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
638  * and then convert. This function does not offer that level of granularity. The options once
639  * set will apply to all labels in the domain name
640  *
641  * @param src               Input UChar array containing IDN in Unicode.
642  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
643  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
644  * @param destCapacity      Size of dest.
645  * @param options           A bit set of options:
646  *
647  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
648  *                              and do not use STD3 ASCII rules
649  *                              If unassigned code points are found the operation fails with
650  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
651  *
652  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
653  *                              If this option is set, the unassigned code points are in the input
654  *                              are treated as normal Unicode code points.
655  *
656  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
657  *                              If this option is set and the input does not satisfy STD3 rules,
658  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
659  *
660  * @param parseError        Pointer to UParseError struct to receive information on position
661  *                          of error if an error is encountered. Can be NULL.
662  * @param status            ICU in/out error code parameter.
663  *                          U_INVALID_CHAR_FOUND if src contains
664  *                          unmatched single surrogates.
665  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
666  *                          too many code points.
667  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
668  * @return The length of the result string, if successful - or in case of a buffer overflow,
669  *         in which case it will be greater than destCapacity.
670  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
671  */
672 U_DEPRECATED int32_t U_EXPORT2
673 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
674                    UChar* dest, int32_t destCapacity,
675                    int32_t options,
676                    UParseError* parseError,
677                    UErrorCode* status);
678 
679 /**
680  * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
681  * This operation is done on complete domain names, e.g: "www.example.com".
682  *
683  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
684  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
685  * and then convert. This function does not offer that level of granularity. The options once
686  * set will apply to all labels in the domain name
687  *
688  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
689  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
690  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
691  * @param destCapacity      Size of dest.
692  * @param options           A bit set of options:
693  *
694  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
695  *                              and do not use STD3 ASCII rules
696  *                              If unassigned code points are found the operation fails with
697  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
698  *
699  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
700  *                              If this option is set, the unassigned code points are in the input
701  *                              are treated as normal Unicode code points.
702  *
703  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
704  *                              If this option is set and the input does not satisfy STD3 rules,
705  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
706  *
707  * @param parseError        Pointer to UParseError struct to receive information on position
708  *                          of error if an error is encountered. Can be NULL.
709  * @param status            ICU in/out error code parameter.
710  *                          U_INVALID_CHAR_FOUND if src contains
711  *                          unmatched single surrogates.
712  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
713  *                          too many code points.
714  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
715  * @return The length of the result string, if successful - or in case of a buffer overflow,
716  *         in which case it will be greater than destCapacity.
717  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
718  */
719 U_DEPRECATED int32_t U_EXPORT2
720 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
721                      UChar* dest, int32_t destCapacity,
722                      int32_t options,
723                      UParseError* parseError,
724                      UErrorCode* status);
725 
726 /**
727  * IDNA2003: Compare two IDN strings for equivalence.
728  * This function splits the domain names into labels and compares them.
729  * According to IDN RFC, whenever two labels are compared, they are
730  * considered equal if and only if their ASCII forms (obtained by
731  * applying toASCII) match using an case-insensitive ASCII comparison.
732  * Two domain names are considered a match if and only if all labels
733  * match regardless of whether label separators match.
734  *
735  * @param s1                First source string.
736  * @param length1           Length of first source string, or -1 if NUL-terminated.
737  *
738  * @param s2                Second source string.
739  * @param length2           Length of second source string, or -1 if NUL-terminated.
740  * @param options           A bit set of options:
741  *
742  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
743  *                              and do not use STD3 ASCII rules
744  *                              If unassigned code points are found the operation fails with
745  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
746  *
747  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
748  *                              If this option is set, the unassigned code points are in the input
749  *                              are treated as normal Unicode code points.
750  *
751  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
752  *                              If this option is set and the input does not satisfy STD3 rules,
753  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
754  *
755  * @param status            ICU error code in/out parameter.
756  *                          Must fulfill U_SUCCESS before the function call.
757  * @return <0 or 0 or >0 as usual for string comparisons
758  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
759  */
760 U_DEPRECATED int32_t U_EXPORT2
761 uidna_compare(  const UChar *s1, int32_t length1,
762                 const UChar *s2, int32_t length2,
763                 int32_t options,
764                 UErrorCode* status);
765 
766 #endif  /* U_HIDE_DEPRECATED_API */
767 
768 #endif /* #if !UCONFIG_NO_IDNA */
769 
770 #endif
771