1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  nptrans.h
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_TRANSLITERATION
22 #if !UCONFIG_NO_IDNA
23 
24 #include "nptrans.h"
25 #include "unicode/resbund.h"
26 #include "unicode/uniset.h"
27 #include "sprpimpl.h"
28 #include "cmemory.h"
29 #include "ustr_imp.h"
30 #include "intltest.h"
31 
32 #ifdef NPTRANS_DEBUG
33 #include <stdio.h>
34 #endif
35 
36 const char NamePrepTransform::fgClassID=0;
37 
38 //Factory method
createInstance(UParseError & parseError,UErrorCode & status)39 NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
40     NamePrepTransform* transform = new NamePrepTransform(parseError, status);
41     if(U_FAILURE(status)){
42         delete transform;
43         return NULL;
44     }
45     return transform;
46 }
47 
48 //constructor
NamePrepTransform(UParseError & parseError,UErrorCode & status)49 NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
50 : unassigned(), prohibited(), labelSeparatorSet(){
51 
52     mapping = NULL;
53     bundle = NULL;
54 
55 
56     const char* testDataName = IntlTest::loadTestData(status);
57 
58     if(U_FAILURE(status)){
59         return;
60     }
61 
62     bundle = ures_openDirect(testDataName,"idna_rules",&status);
63 
64     if(bundle != NULL && U_SUCCESS(status)){
65         // create the mapping transliterator
66         int32_t ruleLen = 0;
67         const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
68         int32_t mapRuleLen = 0;
69         const UChar *mapRuleUChar = ures_getStringByKey(bundle, "MapNoNormalization", &mapRuleLen, &status);
70         UnicodeString rule(mapRuleUChar, mapRuleLen);
71         rule.append(ruleUChar, ruleLen);
72 
73         mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
74                                                    UTRANS_FORWARD, parseError,status);
75         if(U_FAILURE(status)) {
76           return;
77         }
78 
79         //create the unassigned set
80         int32_t patternLen =0;
81         const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
82         unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
83 
84         //create prohibited set
85         patternLen=0;
86         pattern =  ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
87         UnicodeString test(pattern,patternLen);
88         prohibited.applyPattern(test,status);
89 #ifdef NPTRANS_DEBUG
90         if(U_FAILURE(status)){
91             printf("Construction of Unicode set failed\n");
92         }
93 
94         if(U_SUCCESS(status)){
95             if(prohibited.contains((UChar) 0x644)){
96                 printf("The string contains 0x644 ... !!\n");
97             }
98             UnicodeString temp;
99             prohibited.toPattern(temp,TRUE);
100 
101             for(int32_t i=0;i<temp.length();i++){
102                 printf("%c", (char)temp.charAt(i));
103             }
104             printf("\n");
105         }
106 #endif
107 
108         //create label separator set
109         patternLen=0;
110         pattern =  ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
111         labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
112     }
113 
114     if(U_SUCCESS(status) &&
115         (mapping == NULL)
116       ){
117         status = U_MEMORY_ALLOCATION_ERROR;
118         delete mapping;
119         ures_close(bundle);
120         mapping = NULL;
121         bundle = NULL;
122     }
123 
124 }
125 
126 
isProhibited(UChar32 ch)127 UBool NamePrepTransform::isProhibited(UChar32 ch){
128     return (UBool)(ch != ASCII_SPACE);
129 }
130 
~NamePrepTransform()131 NamePrepTransform::~NamePrepTransform(){
132     delete mapping;
133     mapping = NULL;
134 
135     //close the bundle
136     ures_close(bundle);
137     bundle = NULL;
138 }
139 
140 
map(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError *,UErrorCode & status)141 int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
142                         UChar* dest, int32_t destCapacity,
143                         UBool allowUnassigned,
144                         UParseError* /*parseError*/,
145                         UErrorCode& status ){
146 
147     if(U_FAILURE(status)){
148         return 0;
149     }
150     //check arguments
151     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
152         status=U_ILLEGAL_ARGUMENT_ERROR;
153         return 0;
154     }
155 
156     UnicodeString rsource(src,srcLength);
157     // map the code points
158     // transliteration also performs NFKC
159     mapping->transliterate(rsource);
160 
161     const UChar* buffer = rsource.getBuffer();
162     int32_t bufLen = rsource.length();
163     // check if unassigned
164     if(allowUnassigned == FALSE){
165         int32_t bufIndex=0;
166         UChar32 ch =0 ;
167         for(;bufIndex<bufLen;){
168             U16_NEXT(buffer, bufIndex, bufLen, ch);
169             if(unassigned.contains(ch)){
170                 status = U_IDNA_UNASSIGNED_ERROR;
171                 return 0;
172             }
173         }
174     }
175     // check if there is enough room in the output
176     if(bufLen < destCapacity){
177         u_memcpy(dest, buffer, bufLen);
178     }
179 
180     return u_terminateUChars(dest, destCapacity, bufLen, &status);
181 }
182 
183 
184 #define MAX_BUFFER_SIZE 300
185 
process(const UChar * src,int32_t srcLength,UChar * dest,int32_t destCapacity,UBool allowUnassigned,UParseError * parseError,UErrorCode & status)186 int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
187                                     UChar* dest, int32_t destCapacity,
188                                     UBool allowUnassigned,
189                                     UParseError* parseError,
190                                     UErrorCode& status ){
191     // check error status
192     if(U_FAILURE(status)){
193         return 0;
194     }
195 
196     //check arguments
197     if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
198         status=U_ILLEGAL_ARGUMENT_ERROR;
199         return 0;
200     }
201 
202     UnicodeString b1String;
203     UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
204     int32_t b1Len;
205 
206     int32_t b1Index = 0;
207     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
208     UBool leftToRight=FALSE, rightToLeft=FALSE;
209 
210     b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
211     b1String.releaseBuffer(b1Len);
212 
213     if(status == U_BUFFER_OVERFLOW_ERROR){
214         // redo processing of string
215         /* we do not have enough room so grow the buffer*/
216         b1 = b1String.getBuffer(b1Len);
217         status = U_ZERO_ERROR; // reset error
218         b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
219         b1String.releaseBuffer(b1Len);
220     }
221 
222     if(U_FAILURE(status)){
223         b1Len = 0;
224         goto CLEANUP;
225     }
226 
227 
228     for(; b1Index<b1Len; ){
229 
230         UChar32 ch = 0;
231 
232         U16_NEXT(b1, b1Index, b1Len, ch);
233 
234         if(prohibited.contains(ch) && ch!=0x0020){
235             status = U_IDNA_PROHIBITED_ERROR;
236             b1Len = 0;
237             goto CLEANUP;
238         }
239 
240         direction = u_charDirection(ch);
241         if(firstCharDir==U_CHAR_DIRECTION_COUNT){
242             firstCharDir = direction;
243         }
244         if(direction == U_LEFT_TO_RIGHT){
245             leftToRight = TRUE;
246         }
247         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
248             rightToLeft = TRUE;
249         }
250     }
251 
252     // satisfy 2
253     if( leftToRight == TRUE && rightToLeft == TRUE){
254         status = U_IDNA_CHECK_BIDI_ERROR;
255         b1Len = 0;
256         goto CLEANUP;
257     }
258 
259     //satisfy 3
260     if( rightToLeft == TRUE &&
261         !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
262           (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
263        ){
264         status = U_IDNA_CHECK_BIDI_ERROR;
265         return FALSE;
266     }
267 
268     if(b1Len <= destCapacity){
269         u_memmove(dest, b1, b1Len);
270     }
271 
272 CLEANUP:
273     return u_terminateUChars(dest, destCapacity, b1Len, &status);
274 }
275 
isLabelSeparator(UChar32 ch,UErrorCode & status)276 UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
277     // check error status
278     if(U_FAILURE(status)){
279         return FALSE;
280     }
281 
282     return labelSeparatorSet.contains(ch);
283 }
284 
285 #endif /* #if !UCONFIG_NO_IDNA */
286 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
287