1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2012, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  idna.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010mar05
12 *   created by: Markus W. Scherer
13 */
14 
15 #ifndef __IDNA_H__
16 #define __IDNA_H__
17 
18 /**
19  * \file
20  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21  */
22 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_IDNA
26 
27 #include "unicode/bytestream.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/uidna.h"
30 #include "unicode/unistr.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 class IDNAInfo;
35 
36 /**
37  * Abstract base class for IDNA processing.
38  * See http://www.unicode.org/reports/tr46/
39  * and http://www.ietf.org/rfc/rfc3490.txt
40  *
41  * The IDNA class is not intended for public subclassing.
42  *
43  * This C++ API currently only implements UTS #46.
44  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45  * and IDNA2003 (functions that do not use a service object).
46  * @stable ICU 4.6
47  */
48 class U_COMMON_API IDNA : public UObject {
49 public:
50     /**
51      * Destructor.
52      * @stable ICU 4.6
53      */
54     ~IDNA();
55 
56     /**
57      * Returns an IDNA instance which implements UTS #46.
58      * Returns an unmodifiable instance, owned by the caller.
59      * Cache it for multiple operations, and delete it when done.
60      * The instance is thread-safe, that is, it can be used concurrently.
61      *
62      * UTS #46 defines Unicode IDNA Compatibility Processing,
63      * updated to the latest version of Unicode and compatible with both
64      * IDNA2003 and IDNA2008.
65      *
66      * The worker functions use transitional processing, including deviation mappings,
67      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
68      * is used in which case the deviation characters are passed through without change.
69      *
70      * Disallowed characters are mapped to U+FFFD.
71      *
72      * For available options see the uidna.h header.
73      * Operations with the UTS #46 instance do not support the
74      * UIDNA_ALLOW_UNASSIGNED option.
75      *
76      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
77      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
78      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
79      *
80      * @param options Bit set to modify the processing and error checking.
81      *                See option bit set values in uidna.h.
82      * @param errorCode Standard ICU error code. Its input value must
83      *                  pass the U_SUCCESS() test, or else the function returns
84      *                  immediately. Check for U_FAILURE() on output or use with
85      *                  function chaining. (See User Guide for details.)
86      * @return the UTS #46 IDNA instance, if successful
87      * @stable ICU 4.6
88      */
89     static IDNA *
90     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
91 
92     /**
93      * Converts a single domain name label into its ASCII form for DNS lookup.
94      * If any processing step fails, then info.hasErrors() will be TRUE and
95      * the result might not be an ASCII string.
96      * The label might be modified according to the types of errors.
97      * Labels with severe errors will be left in (or turned into) their Unicode form.
98      *
99      * The UErrorCode indicates an error only in exceptional cases,
100      * such as a U_MEMORY_ALLOCATION_ERROR.
101      *
102      * @param label Input domain name label
103      * @param dest Destination string object
104      * @param info Output container of IDNA processing details.
105      * @param errorCode Standard ICU error code. Its input value must
106      *                  pass the U_SUCCESS() test, or else the function returns
107      *                  immediately. Check for U_FAILURE() on output or use with
108      *                  function chaining. (See User Guide for details.)
109      * @return dest
110      * @stable ICU 4.6
111      */
112     virtual UnicodeString &
113     labelToASCII(const UnicodeString &label, UnicodeString &dest,
114                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
115 
116     /**
117      * Converts a single domain name label into its Unicode form for human-readable display.
118      * If any processing step fails, then info.hasErrors() will be TRUE.
119      * The label might be modified according to the types of errors.
120      *
121      * The UErrorCode indicates an error only in exceptional cases,
122      * such as a U_MEMORY_ALLOCATION_ERROR.
123      *
124      * @param label Input domain name label
125      * @param dest Destination string object
126      * @param info Output container of IDNA processing details.
127      * @param errorCode Standard ICU error code. Its input value must
128      *                  pass the U_SUCCESS() test, or else the function returns
129      *                  immediately. Check for U_FAILURE() on output or use with
130      *                  function chaining. (See User Guide for details.)
131      * @return dest
132      * @stable ICU 4.6
133      */
134     virtual UnicodeString &
135     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
136                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
137 
138     /**
139      * Converts a whole domain name into its ASCII form for DNS lookup.
140      * If any processing step fails, then info.hasErrors() will be TRUE and
141      * the result might not be an ASCII string.
142      * The domain name might be modified according to the types of errors.
143      * Labels with severe errors will be left in (or turned into) their Unicode form.
144      *
145      * The UErrorCode indicates an error only in exceptional cases,
146      * such as a U_MEMORY_ALLOCATION_ERROR.
147      *
148      * @param name Input domain name
149      * @param dest Destination string object
150      * @param info Output container of IDNA processing details.
151      * @param errorCode Standard ICU error code. Its input value must
152      *                  pass the U_SUCCESS() test, or else the function returns
153      *                  immediately. Check for U_FAILURE() on output or use with
154      *                  function chaining. (See User Guide for details.)
155      * @return dest
156      * @stable ICU 4.6
157      */
158     virtual UnicodeString &
159     nameToASCII(const UnicodeString &name, UnicodeString &dest,
160                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
161 
162     /**
163      * Converts a whole domain name into its Unicode form for human-readable display.
164      * If any processing step fails, then info.hasErrors() will be TRUE.
165      * The domain name might be modified according to the types of errors.
166      *
167      * The UErrorCode indicates an error only in exceptional cases,
168      * such as a U_MEMORY_ALLOCATION_ERROR.
169      *
170      * @param name Input domain name
171      * @param dest Destination string object
172      * @param info Output container of IDNA processing details.
173      * @param errorCode Standard ICU error code. Its input value must
174      *                  pass the U_SUCCESS() test, or else the function returns
175      *                  immediately. Check for U_FAILURE() on output or use with
176      *                  function chaining. (See User Guide for details.)
177      * @return dest
178      * @stable ICU 4.6
179      */
180     virtual UnicodeString &
181     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
182                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
183 
184     // UTF-8 versions of the processing methods ---------------------------- ***
185 
186     /**
187      * Converts a single domain name label into its ASCII form for DNS lookup.
188      * UTF-8 version of labelToASCII(), same behavior.
189      *
190      * @param label Input domain name label
191      * @param dest Destination byte sink; Flush()ed if successful
192      * @param info Output container of IDNA processing details.
193      * @param errorCode Standard ICU error code. Its input value must
194      *                  pass the U_SUCCESS() test, or else the function returns
195      *                  immediately. Check for U_FAILURE() on output or use with
196      *                  function chaining. (See User Guide for details.)
197      * @return dest
198      * @stable ICU 4.6
199      */
200     virtual void
201     labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
202                       IDNAInfo &info, UErrorCode &errorCode) const;
203 
204     /**
205      * Converts a single domain name label into its Unicode form for human-readable display.
206      * UTF-8 version of labelToUnicode(), same behavior.
207      *
208      * @param label Input domain name label
209      * @param dest Destination byte sink; Flush()ed if successful
210      * @param info Output container of IDNA processing details.
211      * @param errorCode Standard ICU error code. Its input value must
212      *                  pass the U_SUCCESS() test, or else the function returns
213      *                  immediately. Check for U_FAILURE() on output or use with
214      *                  function chaining. (See User Guide for details.)
215      * @return dest
216      * @stable ICU 4.6
217      */
218     virtual void
219     labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
220                        IDNAInfo &info, UErrorCode &errorCode) const;
221 
222     /**
223      * Converts a whole domain name into its ASCII form for DNS lookup.
224      * UTF-8 version of nameToASCII(), same behavior.
225      *
226      * @param name Input domain name
227      * @param dest Destination byte sink; Flush()ed if successful
228      * @param info Output container of IDNA processing details.
229      * @param errorCode Standard ICU error code. Its input value must
230      *                  pass the U_SUCCESS() test, or else the function returns
231      *                  immediately. Check for U_FAILURE() on output or use with
232      *                  function chaining. (See User Guide for details.)
233      * @return dest
234      * @stable ICU 4.6
235      */
236     virtual void
237     nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
238                      IDNAInfo &info, UErrorCode &errorCode) const;
239 
240     /**
241      * Converts a whole domain name into its Unicode form for human-readable display.
242      * UTF-8 version of nameToUnicode(), same behavior.
243      *
244      * @param name Input domain name
245      * @param dest Destination byte sink; Flush()ed if successful
246      * @param info Output container of IDNA processing details.
247      * @param errorCode Standard ICU error code. Its input value must
248      *                  pass the U_SUCCESS() test, or else the function returns
249      *                  immediately. Check for U_FAILURE() on output or use with
250      *                  function chaining. (See User Guide for details.)
251      * @return dest
252      * @stable ICU 4.6
253      */
254     virtual void
255     nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
256                       IDNAInfo &info, UErrorCode &errorCode) const;
257 };
258 
259 class UTS46;
260 
261 /**
262  * Output container for IDNA processing errors.
263  * The IDNAInfo class is not suitable for subclassing.
264  * @stable ICU 4.6
265  */
266 class U_COMMON_API IDNAInfo : public UMemory {
267 public:
268     /**
269      * Constructor for stack allocation.
270      * @stable ICU 4.6
271      */
IDNAInfo()272     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
273     /**
274      * Were there IDNA processing errors?
275      * @return TRUE if there were processing errors
276      * @stable ICU 4.6
277      */
hasErrors()278     UBool hasErrors() const { return errors!=0; }
279     /**
280      * Returns a bit set indicating IDNA processing errors.
281      * See UIDNA_ERROR_... constants in uidna.h.
282      * @return bit set of processing errors
283      * @stable ICU 4.6
284      */
getErrors()285     uint32_t getErrors() const { return errors; }
286     /**
287      * Returns TRUE if transitional and nontransitional processing produce different results.
288      * This is the case when the input label or domain name contains
289      * one or more deviation characters outside a Punycode label (see UTS #46).
290      * <ul>
291      * <li>With nontransitional processing, such characters are
292      * copied to the destination string.
293      * <li>With transitional processing, such characters are
294      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
295      * </ul>
296      * @return TRUE if transitional and nontransitional processing produce different results
297      * @stable ICU 4.6
298      */
isTransitionalDifferent()299     UBool isTransitionalDifferent() const { return isTransDiff; }
300 
301 private:
302     friend class UTS46;
303 
304     IDNAInfo(const IDNAInfo &other);  // no copying
305     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
306 
reset()307     void reset() {
308         errors=labelErrors=0;
309         isTransDiff=FALSE;
310         isBiDi=FALSE;
311         isOkBiDi=TRUE;
312     }
313 
314     uint32_t errors, labelErrors;
315     UBool isTransDiff;
316     UBool isBiDi;
317     UBool isOkBiDi;
318 };
319 
320 U_NAMESPACE_END
321 
322 #endif  // UCONFIG_NO_IDNA
323 #endif  // __IDNA_H__
324