1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: nptrans.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #ifndef NPTRANS_H
18 #define NPTRANS_H
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_IDNA
23 #if !UCONFIG_NO_TRANSLITERATION
24
25 #include "unicode/uniset.h"
26 #include "unicode/ures.h"
27 #include "unicode/translit.h"
28
29 #include "intltest.h"
30
31
32 #define ASCII_SPACE 0x0020
33
34 class NamePrepTransform {
35
36 private :
37 Transliterator *mapping;
38 UnicodeSet unassigned;
39 UnicodeSet prohibited;
40 UnicodeSet labelSeparatorSet;
41 UResourceBundle *bundle;
42 NamePrepTransform(UParseError& parseError, UErrorCode& status);
43
44
45 public :
46
47 static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
48
49 virtual ~NamePrepTransform();
50
51
52 inline UBool isProhibited(UChar32 ch);
53
54 /**
55 * ICU "poor man's RTTI", returns a UClassID for the actual class.
56 */
getDynamicClassID()57 inline UClassID getDynamicClassID() const { return getStaticClassID(); }
58
59 /**
60 * ICU "poor man's RTTI", returns a UClassID for this class.
61 */
getStaticClassID()62 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
63
64 /**
65 * Map every character in input stream with mapping character
66 * in the mapping table and populate the output stream.
67 * For any individual character the mapping table may specify
68 * that that a character be mapped to nothing, mapped to one
69 * other character or to a string of other characters.
70 *
71 * @param src Pointer to UChar buffer containing a single label
72 * @param srcLength Number of characters in the source label
73 * @param dest Pointer to the destination buffer to receive the output
74 * @param destCapacity The capacity of destination array
75 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
76 * If TRUE unassigned values are treated as normal Unicode code point.
77 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
78 * @param status ICU error code in/out parameter.
79 * Must fulfill U_SUCCESS before the function call.
80 * @return The number of UChars in the destination buffer
81 */
82 int32_t map(const UChar* src, int32_t srcLength,
83 UChar* dest, int32_t destCapacity,
84 UBool allowUnassigned,
85 UParseError* parseError,
86 UErrorCode& status );
87
88 /**
89 * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
90 * checks for prohited and BiDi characters in the order defined by RFC 3454
91 *
92 * @param src Pointer to UChar buffer containing a single label
93 * @param srcLength Number of characters in the source label
94 * @param dest Pointer to the destination buffer to receive the output
95 * @param destCapacity The capacity of destination array
96 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
97 * If TRUE unassigned values are treated as normal Unicode code point.
98 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
99 * @param status ICU error code in/out parameter.
100 * Must fulfill U_SUCCESS before the function call.
101 * @return The number of UChars in the destination buffer
102 */
103 int32_t process(const UChar* src, int32_t srcLength,
104 UChar* dest, int32_t destCapacity,
105 UBool allowUnassigned,
106 UParseError* parseError,
107 UErrorCode& status );
108
109 /**
110 * Ascertain if the given code point is a label separator as specified by IDNA
111 *
112 * @return TRUE is the code point is a label separator
113 */
114 UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
115
116 inline UBool isLDHChar(UChar32 ch);
117
118 private:
119 /**
120 * The address of this static class variable serves as this class's ID
121 * for ICU "poor man's RTTI".
122 */
123 static const char fgClassID;
124 };
125
isLDHChar(UChar32 ch)126 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
127 // high runner case
128 if(ch>0x007A){
129 return FALSE;
130 }
131 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
132 if( (ch==0x002D) ||
133 (0x0030 <= ch && ch <= 0x0039) ||
134 (0x0041 <= ch && ch <= 0x005A) ||
135 (0x0061 <= ch && ch <= 0x007A)
136 ){
137 return TRUE;
138 }
139 return FALSE;
140 }
141
142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
143 #else
144 class NamePrepTransform {
145 };
146 #endif /* #if !UCONFIG_NO_IDNA */
147
148 #endif
149
150 /*
151 * Hey, Emacs, please set the following:
152 *
153 * Local Variables:
154 * indent-tabs-mode: nil
155 * End:
156 *
157 */
158