1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2012, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  idna.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010mar05
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __IDNA_H__
18 #define __IDNA_H__
19 
20 /**
21  * \file
22  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23  */
24 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_IDNA
28 
29 #include "unicode/bytestream.h"
30 #include "unicode/stringpiece.h"
31 #include "unicode/uidna.h"
32 #include "unicode/unistr.h"
33 
34 U_NAMESPACE_BEGIN
35 
36 class IDNAInfo;
37 
38 /**
39  * Abstract base class for IDNA processing.
40  * See http://www.unicode.org/reports/tr46/
41  * and http://www.ietf.org/rfc/rfc3490.txt
42  *
43  * The IDNA class is not intended for public subclassing.
44  *
45  * This C++ API currently only implements UTS #46.
46  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
47  * and IDNA2003 (functions that do not use a service object).
48  * @stable ICU 4.6
49  */
50 class U_COMMON_API IDNA : public UObject {
51 public:
52     /**
53      * Destructor.
54      * @stable ICU 4.6
55      */
56     ~IDNA();
57 
58     /**
59      * Returns an IDNA instance which implements UTS #46.
60      * Returns an unmodifiable instance, owned by the caller.
61      * Cache it for multiple operations, and delete it when done.
62      * The instance is thread-safe, that is, it can be used concurrently.
63      *
64      * UTS #46 defines Unicode IDNA Compatibility Processing,
65      * updated to the latest version of Unicode and compatible with both
66      * IDNA2003 and IDNA2008.
67      *
68      * The worker functions use transitional processing, including deviation mappings,
69      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
70      * is used in which case the deviation characters are passed through without change.
71      *
72      * Disallowed characters are mapped to U+FFFD.
73      *
74      * For available options see the uidna.h header.
75      * Operations with the UTS #46 instance do not support the
76      * UIDNA_ALLOW_UNASSIGNED option.
77      *
78      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
79      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
80      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
81      *
82      * @param options Bit set to modify the processing and error checking.
83      *                See option bit set values in uidna.h.
84      * @param errorCode Standard ICU error code. Its input value must
85      *                  pass the U_SUCCESS() test, or else the function returns
86      *                  immediately. Check for U_FAILURE() on output or use with
87      *                  function chaining. (See User Guide for details.)
88      * @return the UTS #46 IDNA instance, if successful
89      * @stable ICU 4.6
90      */
91     static IDNA *
92     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
93 
94     /**
95      * Converts a single domain name label into its ASCII form for DNS lookup.
96      * If any processing step fails, then info.hasErrors() will be TRUE and
97      * the result might not be an ASCII string.
98      * The label might be modified according to the types of errors.
99      * Labels with severe errors will be left in (or turned into) their Unicode form.
100      *
101      * The UErrorCode indicates an error only in exceptional cases,
102      * such as a U_MEMORY_ALLOCATION_ERROR.
103      *
104      * @param label Input domain name label
105      * @param dest Destination string object
106      * @param info Output container of IDNA processing details.
107      * @param errorCode Standard ICU error code. Its input value must
108      *                  pass the U_SUCCESS() test, or else the function returns
109      *                  immediately. Check for U_FAILURE() on output or use with
110      *                  function chaining. (See User Guide for details.)
111      * @return dest
112      * @stable ICU 4.6
113      */
114     virtual UnicodeString &
115     labelToASCII(const UnicodeString &label, UnicodeString &dest,
116                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
117 
118     /**
119      * Converts a single domain name label into its Unicode form for human-readable display.
120      * If any processing step fails, then info.hasErrors() will be TRUE.
121      * The label might be modified according to the types of errors.
122      *
123      * The UErrorCode indicates an error only in exceptional cases,
124      * such as a U_MEMORY_ALLOCATION_ERROR.
125      *
126      * @param label Input domain name label
127      * @param dest Destination string object
128      * @param info Output container of IDNA processing details.
129      * @param errorCode Standard ICU error code. Its input value must
130      *                  pass the U_SUCCESS() test, or else the function returns
131      *                  immediately. Check for U_FAILURE() on output or use with
132      *                  function chaining. (See User Guide for details.)
133      * @return dest
134      * @stable ICU 4.6
135      */
136     virtual UnicodeString &
137     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
138                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
139 
140     /**
141      * Converts a whole domain name into its ASCII form for DNS lookup.
142      * If any processing step fails, then info.hasErrors() will be TRUE and
143      * the result might not be an ASCII string.
144      * The domain name might be modified according to the types of errors.
145      * Labels with severe errors will be left in (or turned into) their Unicode form.
146      *
147      * The UErrorCode indicates an error only in exceptional cases,
148      * such as a U_MEMORY_ALLOCATION_ERROR.
149      *
150      * @param name Input domain name
151      * @param dest Destination string object
152      * @param info Output container of IDNA processing details.
153      * @param errorCode Standard ICU error code. Its input value must
154      *                  pass the U_SUCCESS() test, or else the function returns
155      *                  immediately. Check for U_FAILURE() on output or use with
156      *                  function chaining. (See User Guide for details.)
157      * @return dest
158      * @stable ICU 4.6
159      */
160     virtual UnicodeString &
161     nameToASCII(const UnicodeString &name, UnicodeString &dest,
162                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
163 
164     /**
165      * Converts a whole domain name into its Unicode form for human-readable display.
166      * If any processing step fails, then info.hasErrors() will be TRUE.
167      * The domain name might be modified according to the types of errors.
168      *
169      * The UErrorCode indicates an error only in exceptional cases,
170      * such as a U_MEMORY_ALLOCATION_ERROR.
171      *
172      * @param name Input domain name
173      * @param dest Destination string object
174      * @param info Output container of IDNA processing details.
175      * @param errorCode Standard ICU error code. Its input value must
176      *                  pass the U_SUCCESS() test, or else the function returns
177      *                  immediately. Check for U_FAILURE() on output or use with
178      *                  function chaining. (See User Guide for details.)
179      * @return dest
180      * @stable ICU 4.6
181      */
182     virtual UnicodeString &
183     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
184                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
185 
186     // UTF-8 versions of the processing methods ---------------------------- ***
187 
188     /**
189      * Converts a single domain name label into its ASCII form for DNS lookup.
190      * UTF-8 version of labelToASCII(), same behavior.
191      *
192      * @param label Input domain name label
193      * @param dest Destination byte sink; Flush()ed if successful
194      * @param info Output container of IDNA processing details.
195      * @param errorCode Standard ICU error code. Its input value must
196      *                  pass the U_SUCCESS() test, or else the function returns
197      *                  immediately. Check for U_FAILURE() on output or use with
198      *                  function chaining. (See User Guide for details.)
199      * @return dest
200      * @stable ICU 4.6
201      */
202     virtual void
203     labelToASCII_UTF8(StringPiece label, ByteSink &dest,
204                       IDNAInfo &info, UErrorCode &errorCode) const;
205 
206     /**
207      * Converts a single domain name label into its Unicode form for human-readable display.
208      * UTF-8 version of labelToUnicode(), same behavior.
209      *
210      * @param label Input domain name label
211      * @param dest Destination byte sink; Flush()ed if successful
212      * @param info Output container of IDNA processing details.
213      * @param errorCode Standard ICU error code. Its input value must
214      *                  pass the U_SUCCESS() test, or else the function returns
215      *                  immediately. Check for U_FAILURE() on output or use with
216      *                  function chaining. (See User Guide for details.)
217      * @return dest
218      * @stable ICU 4.6
219      */
220     virtual void
221     labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
222                        IDNAInfo &info, UErrorCode &errorCode) const;
223 
224     /**
225      * Converts a whole domain name into its ASCII form for DNS lookup.
226      * UTF-8 version of nameToASCII(), same behavior.
227      *
228      * @param name Input domain name
229      * @param dest Destination byte sink; Flush()ed if successful
230      * @param info Output container of IDNA processing details.
231      * @param errorCode Standard ICU error code. Its input value must
232      *                  pass the U_SUCCESS() test, or else the function returns
233      *                  immediately. Check for U_FAILURE() on output or use with
234      *                  function chaining. (See User Guide for details.)
235      * @return dest
236      * @stable ICU 4.6
237      */
238     virtual void
239     nameToASCII_UTF8(StringPiece name, ByteSink &dest,
240                      IDNAInfo &info, UErrorCode &errorCode) const;
241 
242     /**
243      * Converts a whole domain name into its Unicode form for human-readable display.
244      * UTF-8 version of nameToUnicode(), same behavior.
245      *
246      * @param name Input domain name
247      * @param dest Destination byte sink; Flush()ed if successful
248      * @param info Output container of IDNA processing details.
249      * @param errorCode Standard ICU error code. Its input value must
250      *                  pass the U_SUCCESS() test, or else the function returns
251      *                  immediately. Check for U_FAILURE() on output or use with
252      *                  function chaining. (See User Guide for details.)
253      * @return dest
254      * @stable ICU 4.6
255      */
256     virtual void
257     nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
258                       IDNAInfo &info, UErrorCode &errorCode) const;
259 };
260 
261 class UTS46;
262 
263 /**
264  * Output container for IDNA processing errors.
265  * The IDNAInfo class is not suitable for subclassing.
266  * @stable ICU 4.6
267  */
268 class U_COMMON_API IDNAInfo : public UMemory {
269 public:
270     /**
271      * Constructor for stack allocation.
272      * @stable ICU 4.6
273      */
IDNAInfo()274     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
275     /**
276      * Were there IDNA processing errors?
277      * @return TRUE if there were processing errors
278      * @stable ICU 4.6
279      */
hasErrors()280     UBool hasErrors() const { return errors!=0; }
281     /**
282      * Returns a bit set indicating IDNA processing errors.
283      * See UIDNA_ERROR_... constants in uidna.h.
284      * @return bit set of processing errors
285      * @stable ICU 4.6
286      */
getErrors()287     uint32_t getErrors() const { return errors; }
288     /**
289      * Returns TRUE if transitional and nontransitional processing produce different results.
290      * This is the case when the input label or domain name contains
291      * one or more deviation characters outside a Punycode label (see UTS #46).
292      * <ul>
293      * <li>With nontransitional processing, such characters are
294      * copied to the destination string.
295      * <li>With transitional processing, such characters are
296      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
297      * </ul>
298      * @return TRUE if transitional and nontransitional processing produce different results
299      * @stable ICU 4.6
300      */
isTransitionalDifferent()301     UBool isTransitionalDifferent() const { return isTransDiff; }
302 
303 private:
304     friend class UTS46;
305 
306     IDNAInfo(const IDNAInfo &other);  // no copying
307     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
308 
reset()309     void reset() {
310         errors=labelErrors=0;
311         isTransDiff=FALSE;
312         isBiDi=FALSE;
313         isOkBiDi=TRUE;
314     }
315 
316     uint32_t errors, labelErrors;
317     UBool isTransDiff;
318     UBool isBiDi;
319     UBool isOkBiDi;
320 };
321 
322 U_NAMESPACE_END
323 
324 #endif  // UCONFIG_NO_IDNA
325 #endif  // __IDNA_H__
326