1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2003-2011, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  nptrans.h
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created on: 2003feb1
14  *   created by: Ram Viswanadha
15  */
16 
17 #ifndef NPTRANS_H
18 #define NPTRANS_H
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_IDNA
23 #if !UCONFIG_NO_TRANSLITERATION
24 
25 #include "unicode/uniset.h"
26 #include "unicode/ures.h"
27 #include "unicode/translit.h"
28 
29 #include "intltest.h"
30 
31 
32 #define ASCII_SPACE 0x0020
33 
34 class NamePrepTransform {
35 
36 private :
37     Transliterator *mapping;
38     UnicodeSet unassigned;
39     UnicodeSet prohibited;
40     UnicodeSet labelSeparatorSet;
41     UResourceBundle *bundle;
42     NamePrepTransform(UParseError& parseError, UErrorCode& status);
43 
44 
45 public :
46 
47     static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
48 
49     virtual ~NamePrepTransform();
50 
51 
52     inline UBool isProhibited(UChar32 ch);
53 
54     /**
55      * ICU "poor man's RTTI", returns a UClassID for the actual class.
56      */
getDynamicClassID()57     inline UClassID getDynamicClassID() const { return getStaticClassID(); }
58 
59     /**
60      * ICU "poor man's RTTI", returns a UClassID for this class.
61      */
getStaticClassID()62     static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
63 
64     /**
65      * Map every character in input stream with mapping character
66      * in the mapping table and populate the output stream.
67      * For any individual character the mapping table may specify
68      * that that a character be mapped to nothing, mapped to one
69      * other character or to a string of other characters.
70      *
71      * @param src           Pointer to UChar buffer containing a single label
72      * @param srcLength     Number of characters in the source label
73      * @param dest          Pointer to the destination buffer to receive the output
74      * @param destCapacity  The capacity of destination array
75      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
76      *                          If TRUE unassigned values are treated as normal Unicode code point.
77      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
78      * @param status        ICU error code in/out parameter.
79      *                      Must fulfill U_SUCCESS before the function call.
80      * @return The number of UChars in the destination buffer
81      */
82     int32_t map(const UChar* src, int32_t srcLength,
83                         UChar* dest, int32_t destCapacity,
84                         UBool allowUnassigned,
85                         UParseError* parseError,
86                         UErrorCode& status );
87 
88     /**
89      * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
90      * checks for prohited and BiDi characters in the order defined by RFC 3454
91      *
92      * @param src           Pointer to UChar buffer containing a single label
93      * @param srcLength     Number of characters in the source label
94      * @param dest          Pointer to the destination buffer to receive the output
95      * @param destCapacity  The capacity of destination array
96      * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
97      *                          If TRUE unassigned values are treated as normal Unicode code point.
98      *                          If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
99      * @param status        ICU error code in/out parameter.
100      *                      Must fulfill U_SUCCESS before the function call.
101      * @return The number of UChars in the destination buffer
102      */
103     int32_t process(const UChar* src, int32_t srcLength,
104                             UChar* dest, int32_t destCapacity,
105                             UBool allowUnassigned,
106                             UParseError* parseError,
107                             UErrorCode& status );
108 
109     /**
110      * Ascertain if the given code point is a label separator as specified by IDNA
111      *
112      * @return TRUE is the code point is a label separator
113      */
114     UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
115 
116     inline UBool isLDHChar(UChar32 ch);
117 
118 private:
119     /**
120      * The address of this static class variable serves as this class's ID
121      * for ICU "poor man's RTTI".
122      */
123     static const char fgClassID;
124 };
125 
isLDHChar(UChar32 ch)126 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
127     // high runner case
128     if(ch>0x007A){
129         return FALSE;
130     }
131     //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
132     if( (ch==0x002D) ||
133         (0x0030 <= ch && ch <= 0x0039) ||
134         (0x0041 <= ch && ch <= 0x005A) ||
135         (0x0061 <= ch && ch <= 0x007A)
136       ){
137         return TRUE;
138     }
139     return FALSE;
140 }
141 
142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
143 #else
144 class NamePrepTransform {
145 };
146 #endif /* #if !UCONFIG_NO_IDNA */
147 
148 #endif
149 
150 /*
151  * Hey, Emacs, please set the following:
152  *
153  * Local Variables:
154  * indent-tabs-mode: nil
155  * End:
156  *
157  */
158