1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2011, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  nptrans.h
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #ifndef NPTRANS_H
20 #define NPTRANS_H
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_IDNA
25 #if !UCONFIG_NO_TRANSLITERATION
26 
27 #include "unicode/uniset.h"
28 #include "unicode/ures.h"
29 #include "unicode/translit.h"
30 
31 #include "intltest.h"
32 
33 
34 #define ASCII_SPACE 0x0020
35 
36 class NamePrepTransform {
37 
38 private :
39     Transliterator *mapping;
40     UnicodeSet unassigned;
41     UnicodeSet prohibited;
42     UnicodeSet labelSeparatorSet;
43     UResourceBundle *bundle;
44     NamePrepTransform(UParseError& parseError, UErrorCode& status);
45 
46 
47 public :
48 
49     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
50 
51     virtual ~NamePrepTransform();
52 
53 
54     inline UBool isProhibited(UChar32 ch);
55 
56     /**
57      * ICU "poor man's RTTI", returns a UClassID for the actual class.
58      */
getDynamicClassID()59     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
60 
61     /**
62      * ICU "poor man's RTTI", returns a UClassID for this class.
63      */
getStaticClassID()64     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
65 
66     /**
67      * Map every character in input stream with mapping character
68      * in the mapping table and populate the output stream.
69      * For any individual character the mapping table may specify
70      * that that a character be mapped to nothing, mapped to one
71      * other character or to a string of other characters.
72      *
73      * @param src           Pointer to UChar buffer containing a single label
74      * @param srcLength     Number of characters in the source label
75      * @param dest          Pointer to the destination buffer to receive the output
76      * @param destCapacity  The capacity of destination array
77      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
78      *                          If TRUE unassigned values are treated as normal Unicode code point.
79      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
80      * @param status        ICU error code in/out parameter.
81      *                      Must fulfill U_SUCCESS before the function call.
82      * @return The number of UChars in the destination buffer
83      */
84     int32_t map(const UChar* src, int32_t srcLength,
85                         UChar* dest, int32_t destCapacity,
86                         UBool allowUnassigned,
87                         UParseError* parseError,
88                         UErrorCode& status );
89 
90     /**
91      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
92      * checks for prohited and BiDi characters in the order defined by RFC 3454
93      *
94      * @param src           Pointer to UChar buffer containing a single label
95      * @param srcLength     Number of characters in the source label
96      * @param dest          Pointer to the destination buffer to receive the output
97      * @param destCapacity  The capacity of destination array
98      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
99      *                          If TRUE unassigned values are treated as normal Unicode code point.
100      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
101      * @param status        ICU error code in/out parameter.
102      *                      Must fulfill U_SUCCESS before the function call.
103      * @return The number of UChars in the destination buffer
104      */
105     int32_t process(const UChar* src, int32_t srcLength,
106                             UChar* dest, int32_t destCapacity,
107                             UBool allowUnassigned,
108                             UParseError* parseError,
109                             UErrorCode& status );
110 
111     /**
112      * Ascertain if the given code point is a label separator as specified by IDNA
113      *
114      * @return TRUE is the code point is a label separator
115      */
116     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
117 
118     inline UBool isLDHChar(UChar32 ch);
119 
120 private:
121     /**
122      * The address of this static class variable serves as this class's ID
123      * for ICU "poor man's RTTI".
124      */
125     static const char fgClassID;
126 };
127 
isLDHChar(UChar32 ch)128 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
129     // high runner case
130     if(ch>0x007A){
131         return FALSE;
132     }
133     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
134     if( (ch==0x002D) ||
135         (0x0030 <= ch && ch <= 0x0039) ||
136         (0x0041 <= ch && ch <= 0x005A) ||
137         (0x0061 <= ch && ch <= 0x007A)
138       ){
139         return TRUE;
140     }
141     return FALSE;
142 }
143 
144 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
145 #else
146 class NamePrepTransform {
147 };
148 #endif /* #if !UCONFIG_NO_IDNA */
149 
150 #endif
151 
152 /*
153  * Hey, Emacs, please set the following:
154  *
155  * Local Variables:
156  * indent-tabs-mode: nil
157  * End:
158  *
159  */
160