1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  nptrans.h
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_TRANSLITERATION
22 #if !UCONFIG_NO_IDNA
23 
24 #include "nptrans.h"
25 #include "unicode/resbund.h"
26 #include "unicode/uniset.h"
27 #include "sprpimpl.h"
28 #include "cmemory.h"
29 #include "ustr_imp.h"
30 #include "intltest.h"
31 
32 #ifdef NPTRANS_DEBUG
33 #include <stdio.h>
34 #endif
35 
36 const char NamePrepTransform::fgClassID=0;
37 
38 //Factory method
createInstance(UParseError & parseError,UErrorCode & status)39 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
40     NamePrepTransform* transform = new NamePrepTransform(parseError, status);
41     if(U_FAILURE(status)){
42         delete transform;
43         return NULL;
44     }
45     return transform;
46 }
47 
48 //constructor
NamePrepTransform(UParseError & parseError,UErrorCode & status)49 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
50     : mapping(nullptr), unassigned(), prohibited(), labelSeparatorSet(), bundle(nullptr) {
51 
52     LocalPointer<Transliterator> lmapping;
53     LocalUResourceBundlePointer   lbundle;
54 
55     const char* testDataName = IntlTest::loadTestData(status);
56 
57     if(U_FAILURE(status)){
58         return;
59     }
60 
61     lbundle.adoptInstead(ures_openDirect(testDataName,"idna_rules",&status));
62 
63     if(lbundle.isValid() && U_SUCCESS(status)){
64         // create the mapping transliterator
65         int32_t ruleLen = 0;
66         const UChar* ruleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNFKC",&ruleLen, &status);
67         int32_t mapRuleLen = 0;
68         const UChar *mapRuleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNoNormalization", &mapRuleLen, &status);
69         UnicodeString rule(mapRuleUChar, mapRuleLen);
70         rule.append(ruleUChar, ruleLen);
71 
72         lmapping.adoptInstead( Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
73                                                    UTRANS_FORWARD, parseError,status));
74         if(U_FAILURE(status)) {
75             return;
76         }
77 
78         //create the unassigned set
79         int32_t patternLen =0;
80         const UChar* pattern = ures_getStringByKey(lbundle.getAlias(),"UnassignedSet",&patternLen, &status);
81         unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
82 
83         //create prohibited set
84         patternLen=0;
85         pattern =  ures_getStringByKey(lbundle.getAlias(),"ProhibitedSet",&patternLen, &status);
86         UnicodeString test(pattern,patternLen);
87         prohibited.applyPattern(test,status);
88 #ifdef NPTRANS_DEBUG
89         if(U_FAILURE(status)){
90             printf("Construction of Unicode set failed\n");
91         }
92 
93         if(U_SUCCESS(status)){
94             if(prohibited.contains((UChar) 0x644)){
95                 printf("The string contains 0x644 ... !!\n");
96             }
97             UnicodeString temp;
98             prohibited.toPattern(temp,TRUE);
99 
100             for(int32_t i=0;i<temp.length();i++){
101                 printf("%c", (char)temp.charAt(i));
102             }
103             printf("\n");
104         }
105 #endif
106 
107         //create label separator set
108         patternLen=0;
109         pattern =  ures_getStringByKey(lbundle.getAlias(), "LabelSeparatorSet", &patternLen, &status);
110         labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
111     }
112 
113     if(U_SUCCESS(status) && (lmapping.isNull())) {
114         status = U_MEMORY_ALLOCATION_ERROR;
115     }
116     if (U_FAILURE(status)) {
117         return;
118     }
119     mapping = lmapping.orphan();
120     bundle  = lbundle.orphan();
121 }
122 
123 
isProhibited(UChar32 ch)124 UBool NamePrepTransform::isProhibited(UChar32 ch){
125     return (UBool)(ch != ASCII_SPACE);
126 }
127 
~NamePrepTransform()128 NamePrepTransform::~NamePrepTransform(){
129     delete mapping;
130     mapping = NULL;
131 
132     //close the bundle
133     ures_close(bundle);
134     bundle = NULL;
135 }
136 
137 
map(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError *,UErrorCode & status)138 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
139                         UChar* dest, int32_t destCapacity,
140                         UBool allowUnassigned,
141                         UParseError* /*parseError*/,
142                         UErrorCode& status ){
143 
144     if(U_FAILURE(status)){
145         return 0;
146     }
147     //check arguments
148     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
149         status=U_ILLEGAL_ARGUMENT_ERROR;
150         return 0;
151     }
152 
153     UnicodeString rsource(src,srcLength);
154     // map the code points
155     // transliteration also performs NFKC
156     mapping->transliterate(rsource);
157 
158     const UChar* buffer = rsource.getBuffer();
159     int32_t bufLen = rsource.length();
160     // check if unassigned
161     if(allowUnassigned == FALSE){
162         int32_t bufIndex=0;
163         UChar32 ch =0 ;
164         for(;bufIndex<bufLen;){
165             U16_NEXT(buffer, bufIndex, bufLen, ch);
166             if(unassigned.contains(ch)){
167                 status = U_IDNA_UNASSIGNED_ERROR;
168                 return 0;
169             }
170         }
171     }
172     // check if there is enough room in the output
173     if(bufLen < destCapacity){
174         u_memcpy(dest, buffer, bufLen);
175     }
176 
177     return u_terminateUChars(dest, destCapacity, bufLen, &status);
178 }
179 
180 
181 #define MAX_BUFFER_SIZE 300
182 
process(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError * parseError,UErrorCode & status)183 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
184                                     UChar* dest, int32_t destCapacity,
185                                     UBool allowUnassigned,
186                                     UParseError* parseError,
187                                     UErrorCode& status ){
188     // check error status
189     if(U_FAILURE(status)){
190         return 0;
191     }
192 
193     //check arguments
194     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
195         status=U_ILLEGAL_ARGUMENT_ERROR;
196         return 0;
197     }
198 
199     UnicodeString b1String;
200     UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
201     int32_t b1Len;
202 
203     int32_t b1Index = 0;
204     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
205     UBool leftToRight=FALSE, rightToLeft=FALSE;
206 
207     b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
208     b1String.releaseBuffer(b1Len);
209 
210     if(status == U_BUFFER_OVERFLOW_ERROR){
211         // redo processing of string
212         /* we do not have enough room so grow the buffer*/
213         b1 = b1String.getBuffer(b1Len);
214         status = U_ZERO_ERROR; // reset error
215         b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
216         b1String.releaseBuffer(b1Len);
217     }
218 
219     if(U_FAILURE(status)){
220         b1Len = 0;
221         goto CLEANUP;
222     }
223 
224 
225     for(; b1Index<b1Len; ){
226 
227         UChar32 ch = 0;
228 
229         U16_NEXT(b1, b1Index, b1Len, ch);
230 
231         if(prohibited.contains(ch) && ch!=0x0020){
232             status = U_IDNA_PROHIBITED_ERROR;
233             b1Len = 0;
234             goto CLEANUP;
235         }
236 
237         direction = u_charDirection(ch);
238         if(firstCharDir==U_CHAR_DIRECTION_COUNT){
239             firstCharDir = direction;
240         }
241         if(direction == U_LEFT_TO_RIGHT){
242             leftToRight = TRUE;
243         }
244         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
245             rightToLeft = TRUE;
246         }
247     }
248 
249     // satisfy 2
250     if( leftToRight == TRUE && rightToLeft == TRUE){
251         status = U_IDNA_CHECK_BIDI_ERROR;
252         b1Len = 0;
253         goto CLEANUP;
254     }
255 
256     //satisfy 3
257     if( rightToLeft == TRUE &&
258         !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
259           (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
260        ){
261         status = U_IDNA_CHECK_BIDI_ERROR;
262         return FALSE;
263     }
264 
265     if(b1Len <= destCapacity){
266         u_memmove(dest, b1, b1Len);
267     }
268 
269 CLEANUP:
270     return u_terminateUChars(dest, destCapacity, b1Len, &status);
271 }
272 
isLabelSeparator(UChar32 ch,UErrorCode & status)273 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
274     // check error status
275     if(U_FAILURE(status)){
276         return FALSE;
277     }
278 
279     return labelSeparatorSet.contains(ch);
280 }
281 
282 #endif /* #if !UCONFIG_NO_IDNA */
283 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
284