1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2003-2013, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  testidn.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2003-02-06
14 *   created by: Ram Viswanadha
15 *
16 *   This program reads the rfc3454_*.txt files,
17 *   parses them, and extracts the data for Nameprep conformance.
18 *   It then preprocesses it and writes a binary file for efficient use
19 *   in various IDNA conversion processes.
20 */
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
25 
26 #define USPREP_TYPE_NAMES_ARRAY
27 
28 #include "unicode/uchar.h"
29 #include "unicode/putil.h"
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "unicode/udata.h"
33 #include "unicode/utf16.h"
34 #include "unewdata.h"
35 #include "uoptions.h"
36 #include "uparse.h"
37 #include "utrie.h"
38 #include "umutex.h"
39 #include "sprpimpl.h"
40 #include "testidna.h"
41 #include "punyref.h"
42 #include <stdlib.h>
43 
44 UBool beVerbose=FALSE, haveCopyright=TRUE;
45 
46 /* prototypes --------------------------------------------------------------- */
47 
48 
49 static void
50 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
51 
52 static void
53 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
54                UStringPrepType option);
55 
56 static void
57 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
58 
59 static void
60 testAllCodepoints(TestIDNA& test);
61 
62 static TestIDNA* pTestIDNA =NULL;
63 
64 static const char* fileNames[] = {
65                                     "rfc3491.txt"
66                                  };
67 static const UTrie *idnTrie              = NULL;
68 static const int32_t *indexes            = NULL;
69 static const uint16_t *mappingData       = NULL;
70 /* -------------------------------------------------------------------------- */
71 
72 /* file definitions */
73 #define DATA_TYPE "icu"
74 
75 #define SPREP_DIR "sprep"
76 
77 extern int
testData(TestIDNA & test)78 testData(TestIDNA& test) {
79     char *basename=NULL;
80     UErrorCode errorCode=U_ZERO_ERROR;
81     char *saveBasename =NULL;
82 
83     LocalUStringPrepProfilePointer profile(usprep_openByType(USPREP_RFC3491_NAMEPREP, &errorCode));
84     if(U_FAILURE(errorCode)){
85         test.errcheckln(errorCode, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
86         return errorCode;
87     }
88 
89     char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
90     //TODO get the srcDir dynamically
91     const char *srcDir=IntlTest::pathToDataDirectory();
92 
93     idnTrie     = &profile->sprepTrie;
94     indexes     = profile->indexes;
95     mappingData = profile->mappingData;
96 
97     //initialize
98     pTestIDNA = &test;
99 
100     /* prepare the filename beginning with the source dir */
101     if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
102         filename[0] = 0x2E;
103         filename[1] = U_FILE_SEP_CHAR;
104         uprv_strcpy(filename+2,srcDir);
105     }else{
106         uprv_strcpy(filename, srcDir);
107     }
108     basename=filename+uprv_strlen(filename);
109     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
110         *basename++=U_FILE_SEP_CHAR;
111     }
112 
113     /* process unassigned */
114     basename=filename+uprv_strlen(filename);
115     if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
116         *basename++=U_FILE_SEP_CHAR;
117     }
118 
119     /* first copy misc directory */
120     saveBasename = basename;
121     (void)saveBasename;    // Suppress set but not used warning.
122     uprv_strcpy(basename,SPREP_DIR);
123     basename = basename + uprv_strlen(SPREP_DIR);
124     *basename++=U_FILE_SEP_CHAR;
125 
126     /* process unassigned */
127     uprv_strcpy(basename,fileNames[0]);
128     parseMappings(filename,TRUE, test,&errorCode);
129     if(U_FAILURE(errorCode)) {
130         test.errln( "Could not open file %s for reading \n", filename);
131         return errorCode;
132     }
133 
134     testAllCodepoints(test);
135 
136     pTestIDNA = NULL;
137     free(filename);
138     return errorCode;
139 }
140 U_CDECL_BEGIN
141 
142 static void U_CALLCONV
strprepProfileLineFn(void *,char * fields[][2],int32_t fieldCount,UErrorCode * pErrorCode)143 strprepProfileLineFn(void * /*context*/,
144               char *fields[][2], int32_t fieldCount,
145               UErrorCode *pErrorCode) {
146     uint32_t mapping[40];
147     char *end, *map;
148     uint32_t code;
149     int32_t length;
150    /*UBool* mapWithNorm = (UBool*) context;*/
151     const char* typeName;
152     uint32_t rangeStart=0,rangeEnd =0;
153     const char *s;
154 
155     s = u_skipWhitespace(fields[0][0]);
156     if (*s == '@') {
157         /* a special directive introduced in 4.2 */
158         return;
159     }
160 
161     if(fieldCount != 3){
162         *pErrorCode = U_INVALID_FORMAT_ERROR;
163         return;
164     }
165 
166     typeName = fields[2][0];
167     map = fields[1][0];
168 
169     if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
170 
171         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
172 
173         /* store the range */
174         compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
175 
176     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
177 
178         u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
179 
180         /* store the range */
181         compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
182 
183     }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
184         /* get the character code, field 0 */
185         code=(uint32_t)uprv_strtoul(s, &end, 16);
186 
187         /* parse the mapping string */
188         length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
189 
190         /* store the mapping */
191         compareMapping(code,mapping, length,USPREP_MAP);
192 
193     }else{
194         *pErrorCode = U_INVALID_FORMAT_ERROR;
195     }
196 
197 }
198 
199 U_CDECL_END
200 
201 static void
parseMappings(const char * filename,UBool reportError,TestIDNA & test,UErrorCode * pErrorCode)202 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
203     char *fields[3][2];
204 
205     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
206         return;
207     }
208 
209     u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
210 
211     //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
212 
213     if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
214         test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
215     }
216 }
217 
218 
219 static inline UStringPrepType
getValues(uint32_t result,int32_t & value,UBool & isIndex)220 getValues(uint32_t result, int32_t& value, UBool& isIndex){
221 
222     UStringPrepType type;
223 
224     if(result == 0){
225         /*
226          * Initial value stored in the mapping table
227          * just return USPREP_TYPE_LIMIT .. so that
228          * the source codepoint is copied to the destination
229          */
230         type = USPREP_TYPE_LIMIT;
231         isIndex =FALSE;
232         value = 0;
233     }else if(result >= _SPREP_TYPE_THRESHOLD){
234         type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
235         isIndex =FALSE;
236         value = 0;
237     }else{
238         /* get the state */
239         type = USPREP_MAP;
240         /* ascertain if the value is index or delta */
241         if(result & 0x02){
242             isIndex = TRUE;
243             value = result  >> 2; //mask off the lower 2 bits and shift
244 
245         }else{
246             isIndex = FALSE;
247             value = (int16_t)result;
248             value =  (value >> 2);
249 
250         }
251         if((result>>2) == _SPREP_MAX_INDEX_VALUE){
252             type = USPREP_DELETE;
253             isIndex =FALSE;
254             value = 0;
255         }
256     }
257     return type;
258 }
259 
260 
261 
262 static void
testAllCodepoints(TestIDNA & test)263 testAllCodepoints(TestIDNA& test){
264     /*
265     {
266         UChar str[19] = {
267                             0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
268                             0x070F,//prohibited
269                             0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
270                         };
271         uint32_t in[19] = {0};
272         UErrorCode status = U_ZERO_ERROR;
273         int32_t inLength=0, outLength=100;
274         char output[100] = {0};
275         punycode_status error;
276         u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
277 
278         error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
279         printf(output);
280 
281     }
282     */
283 
284     uint32_t i = 0;
285     int32_t unassigned      = 0;
286     int32_t prohibited      = 0;
287     int32_t mappedWithNorm  = 0;
288     int32_t mapped          = 0;
289     int32_t noValueInTrie   = 0;
290 
291     UStringPrepType type;
292     int32_t value;
293     UBool isIndex = FALSE;
294 
295     for(i=0;i<=0x10FFFF;i++){
296         uint32_t result = 0;
297         UTRIE_GET16(idnTrie,i, result);
298         type = getValues(result,value, isIndex);
299         if(type != USPREP_TYPE_LIMIT ){
300             if(type == USPREP_UNASSIGNED){
301                 unassigned++;
302             }
303             if(type == USPREP_PROHIBITED){
304                 prohibited++;
305             }
306             if(type == USPREP_MAP){
307                 mapped++;
308             }
309         }else{
310             noValueInTrie++;
311             if(result > 0){
312                 test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
313             }
314         }
315     }
316 
317     test.logln("Number of Unassinged code points : %i \n",unassigned);
318     test.logln("Number of Prohibited code points : %i \n",prohibited);
319     test.logln("Number of Mapped code points : %i \n",mapped);
320     test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
321     test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
322 
323 
324 }
325 
326 static void
compareMapping(uint32_t codepoint,uint32_t * mapping,int32_t mapLength,UStringPrepType type)327 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
328                UStringPrepType type){
329     uint32_t result = 0;
330     UTRIE_GET16(idnTrie,codepoint, result);
331 
332     int32_t length=0;
333     UBool isIndex;
334     UStringPrepType retType;
335     int32_t value, index=0, delta=0;
336 
337     retType = getValues(result,value,isIndex);
338 
339 
340     if(type != retType && retType != USPREP_DELETE){
341 
342         pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
343 
344     }
345 
346     if(isIndex){
347         index = value;
348         if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
349                  index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
350             length = 1;
351         }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
352                  index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
353             length = 2;
354         }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
355                  index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
356             length = 3;
357         }else{
358             length = mappingData[index++];
359         }
360     }else{
361         delta = value;
362         length = (retType == USPREP_DELETE)? 0 :  1;
363     }
364 
365     int32_t realLength =0;
366     /* figure out the real length */
367     for(int32_t j=0; j<mapLength; j++){
368         if(mapping[j] > 0xFFFF){
369             realLength +=2;
370         }else{
371             realLength++;
372         }
373     }
374 
375     if(realLength != length){
376         pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
377     }
378 
379     if(isIndex){
380         for(int8_t i =0; i< mapLength; i++){
381             if(mapping[i] <= 0xFFFF){
382                 if(mappingData[index+i] != (uint16_t)mapping[i]){
383                     pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
384                 }
385             }else{
386                 UChar lead  = U16_LEAD(mapping[i]);
387                 UChar trail = U16_TRAIL(mapping[i]);
388                 if(mappingData[index+i] != lead ||
389                     mappingData[index+i+1] != trail){
390                     pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X  Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
391                 }
392             }
393         }
394     }else{
395         if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
396             pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
397         }
398     }
399 
400 }
401 
402 static void
compareFlagsForRange(uint32_t start,uint32_t end,UStringPrepType type)403 compareFlagsForRange(uint32_t start, uint32_t end,
404                      UStringPrepType type){
405 
406     uint32_t result =0 ;
407     UStringPrepType retType;
408     UBool isIndex=FALSE;
409     int32_t value=0;
410 /*
411     // supplementary code point
412     UChar __lead16=U16_LEAD(0x2323E);
413     int32_t __offset;
414 
415     // get data for lead surrogate
416     (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
417     __offset=(&idnTrie)->getFoldingOffset(result);
418 
419     // get the real data from the folded lead/trail units
420     if(__offset>0) {
421         (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
422     } else {
423         (result)=(uint32_t)((&idnTrie)->initialValue);
424     }
425 
426     UTRIE_GET16(&idnTrie,0x2323E, result);
427 */
428     while(start < end+1){
429         UTRIE_GET16(idnTrie,start, result);
430         retType = getValues(result,value,isIndex);
431         if(result > _SPREP_TYPE_THRESHOLD){
432             if(retType != type){
433                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
434             }
435         }else{
436             if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
437                 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
438             }
439         }
440 
441         start++;
442     }
443 
444 }
445 
446 
447 #endif /* #if !UCONFIG_NO_IDNA */
448 
449 /*
450  * Hey, Emacs, please set the following:
451  *
452  * Local Variables:
453  * indent-tabs-mode: nil
454  * End:
455  *
456  */
457