1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2005-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *
11  *   created on: 2005jun15
12  *   created by: Raymond Yang
13  */
14 
15 #if !UCONFIG_NO_IDNA
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "unicode/utypes.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uidna.h"
24 #include "unicode/utf16.h"
25 #include "idnaconf.h"
26 
27 static const UChar C_TAG[] = {0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0}; // =====
28 static const UChar C_NAMEZONE[] = {0x6E, 0x61, 0x6D, 0x65, 0x7A, 0x6F, 0x6E, 0x65, 0}; // namezone
29 static const UChar C_NAMEBASE[] = {0x6E, 0x61, 0x6D, 0x65, 0x62, 0x61, 0x73, 0x65, 0}; // namebase
30 
31 static const UChar C_TYPE[] = {0x74, 0x79, 0x70, 0x65, 0}; // type
32 static const UChar C_TOASCII[]  =  {0x74, 0x6F, 0x61, 0x73, 0x63, 0x69, 0x69, 0};       // toascii
33 static const UChar C_TOUNICODE[] = {0x74, 0x6F, 0x75, 0x6E, 0x69, 0x63, 0x6F, 0x64, 0x65, 0}; // tounicode
34 
35 static const UChar C_PASSFAIL[] = {0x70, 0x61, 0x73, 0x73, 0x66, 0x61, 0x69, 0x6C, 0}; // passfail
36 static const UChar C_PASS[] = {0x70, 0x61, 0x73, 0x73, 0}; // pass
37 static const UChar C_FAIL[] = {0x66, 0x61, 0x69, 0x6C, 0}; // fail
38 
39 static const UChar C_DESC[] = {0x64, 0x65, 0x73, 0x63, 0}; // desc
40 static const UChar C_USESTD3ASCIIRULES[] = {0x55, 0x73, 0x65, 0x53, 0x54, 0x44,
41        0x33, 0x41, 0x53, 0x43, 0x49, 0x49, 0x52, 0x75, 0x6C, 0x65, 0x73, 0}; // UseSTD3ASCIIRules
42 
IdnaConfTest()43 IdnaConfTest::IdnaConfTest(){
44     base = NULL;
45     len = 0;
46     curOffset = 0;
47 
48     type = option = passfail = -1;
49     namebase.setToBogus();
50     namezone.setToBogus();
51 }
~IdnaConfTest()52 IdnaConfTest::~IdnaConfTest(){
53     delete [] base;
54 }
55 
56 #if !UCONFIG_NO_IDNA
57 /* this function is modified from RBBITest::ReadAndConvertFile()
58  *
59  */
ReadAndConvertFile()60 UBool IdnaConfTest::ReadAndConvertFile(){
61 
62     char * source = NULL;
63     size_t source_len;
64 
65     // read the test data file to memory
66     FILE* f    = NULL;
67     UErrorCode  status  = U_ZERO_ERROR;
68 
69     const char *path = IntlTest::getSourceTestData(status);
70     if (U_FAILURE(status)) {
71         errln("%s", u_errorName(status));
72         return FALSE;
73     }
74 
75     const char* name = "idna_conf.txt";     // test data file
76     int t = strlen(path) + strlen(name) + 1;
77     char* absolute_name = new char[t];
78     strcpy(absolute_name, path);
79     strcat(absolute_name, name);
80     f = fopen(absolute_name, "rb");
81     delete [] absolute_name;
82 
83     if (f == NULL){
84         dataerrln("fopen error on %s", name);
85         return FALSE;
86     }
87 
88     fseek( f, 0, SEEK_END);
89     if ((source_len = ftell(f)) <= 0){
90         errln("Error reading test data file.");
91         fclose(f);
92         return FALSE;
93     }
94 
95     source = new char[source_len];
96     fseek(f, 0, SEEK_SET);
97     if (fread(source, 1, source_len, f) != source_len) {
98         errln("Error reading test data file.");
99         delete [] source;
100         fclose(f);
101         return FALSE;
102     }
103     fclose(f);
104 
105     // convert the UTF-8 encoded stream to UTF-16 stream
106     UConverter* conv = ucnv_open("utf-8", &status);
107     int dest_len = ucnv_toUChars(conv,
108                                 NULL,           //  dest,
109                                 0,              //  destCapacity,
110                                 source,
111                                 source_len,
112                                 &status);
113     if (status == U_BUFFER_OVERFLOW_ERROR) {
114         // Buffer Overflow is expected from the preflight operation.
115         status = U_ZERO_ERROR;
116         UChar * dest = NULL;
117         dest = new UChar[ dest_len + 1];
118         ucnv_toUChars(conv, dest, dest_len + 1, source, source_len, &status);
119         // Do not know the "if possible" behavior of ucnv_toUChars()
120         // Do it by ourself.
121         dest[dest_len] = 0;
122         len = dest_len;
123         base = dest;
124         delete [] source;
125         ucnv_close(conv);
126         return TRUE;    // The buffer will owned by caller.
127     }
128     errln("UConverter error: %s", u_errorName(status));
129     delete [] source;
130     ucnv_close(conv);
131     return FALSE;
132 }
133 
isNewlineMark()134 int IdnaConfTest::isNewlineMark(){
135     static const UChar LF        = 0x0a;
136     static const UChar CR        = 0x0d;
137     UChar c = base[curOffset];
138     // CR LF
139     if ( c == CR && curOffset + 1 < len && base[curOffset + 1] == LF){
140         return 2;
141     }
142 
143     // CR or LF
144     if ( c == CR || c == LF) {
145         return 1;
146     }
147 
148     return 0;
149 }
150 
151 /* Read a logical line.
152  *
153  * All lines ending in a backslash (\) and immediately followed by a newline
154  * character are joined with the next line in the source file forming logical
155  * lines from the physical lines.
156  *
157  */
ReadOneLine(UnicodeString & buf)158 UBool IdnaConfTest::ReadOneLine(UnicodeString& buf){
159     if ( !(curOffset < len) ) return FALSE; // stream end
160 
161     static const UChar BACKSLASH = 0x5c;
162     buf.remove();
163     int t = 0;
164     while (curOffset < len){
165         if ((t = isNewlineMark())) {  // end of line
166             curOffset += t;
167             break;
168         }
169         UChar c = base[curOffset];
170         if (c == BACKSLASH && curOffset < len -1){  // escaped new line mark
171             if ((t = isNewlineMark())){
172                 curOffset += 1 + t;  // BACKSLAH and NewlineMark
173                 continue;
174             }
175         };
176         buf.append(c);
177         curOffset++;
178     }
179     return TRUE;
180 }
181 
182 //
183 //===============================================================
184 //
185 
186 /* Explain <xxxxx> tag to a native value
187  *
188  * Since <xxxxx> is always larger than the native value,
189  * the operation will replace the tag directly in the buffer,
190  * and, of course, will shift tail elements.
191  */
ExplainCodePointTag(UnicodeString & buf)192 void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){
193     buf.append((UChar)0);    // add a terminal NULL
194     UChar* bufBase = buf.getBuffer(buf.length());
195     UChar* p = bufBase;
196     while (*p != 0){
197         if ( *p != 0x3C){    // <
198             *bufBase++ = *p++;
199         } else {
200             p++;    // skip <
201             UChar32 cp = 0;
202             for ( ;*p != 0x3E; p++){   // >
203                 if (0x30 <= *p && *p <= 0x39){        // 0-9
204                     cp = (cp * 16) + (*p - 0x30);
205                 } else if (0x61 <= *p && *p <= 0x66){ // a-f
206                     cp = (cp * 16) + (*p - 0x61) + 10;
207                 } else if (0x41 <= *p && *p <= 0x46) {// A-F
208                     cp = (cp * 16) + (*p - 0x41) + 10;
209                 }
210                 // no else. hope everything is good.
211             }
212             p++;    // skip >
213             if (U_IS_BMP(cp)){
214                 *bufBase++ = cp;
215             } else {
216                 *bufBase++ = U16_LEAD(cp);
217                 *bufBase++ = U16_TRAIL(cp);
218             }
219         }
220     }
221     *bufBase = 0;  // close our buffer
222     buf.releaseBuffer();
223 }
224 
Call()225 void IdnaConfTest::Call(){
226     if (type == -1 || option == -1 || passfail == -1 || namebase.isBogus() || namezone.isBogus()){
227         errln("Incomplete record");
228     } else {
229         UErrorCode status = U_ZERO_ERROR;
230         UChar result[200] = {0,};   // simple life
231         const UChar *p = namebase.getTerminatedBuffer();
232         const int p_len = namebase.length();
233 
234         if (type == 0 && option == 0){
235             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
236         } else if (type == 0 && option == 1){
237             uidna_IDNToASCII(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
238         } else if (type == 1 && option == 0){
239             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_USE_STD3_RULES, NULL, &status);
240         } else if (type == 1 && option == 1){
241             uidna_IDNToUnicode(p, p_len, result, 200, UIDNA_ALLOW_UNASSIGNED, NULL, &status);
242         }
243         if (passfail == 0){
244             if (U_FAILURE(status)){
245                 id.append(" should pass, but failed. - ");
246                 id.append(u_errorName(status));
247                 errcheckln(status, id);
248             } else{
249                 if (namezone.compare(result, -1) == 0){
250                     // expected
251                     logln(UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
252                 } else {
253                     id.append(" no error, but result is not as expected.");
254                     errln(id);
255                 }
256             }
257         } else if (passfail == 1){
258             if (U_FAILURE(status)){
259                 // expected
260                 // TODO: Uncomment this when U_IDNA_ZERO_LENGTH_LABEL_ERROR is added to u_errorName
261                 //logln("Got the expected error: " + UnicodeString(u_errorName(status)));
262             } else{
263                 if (namebase.compare(result, -1) == 0){
264                     // garbage in -> garbage out
265                     logln(UnicodeString("ICU will not recognize malformed ACE-Prefixes or incorrect ACE-Prefixes. ") + UnicodeString("namebase: ") + prettify(namebase) + UnicodeString(" result: ") + prettify(result));
266                 } else {
267                     id.append(" should fail, but not failed. ");
268                     id.append(u_errorName(status));
269                     errln(id);
270                 }
271             }
272         }
273     }
274     type = option = passfail = -1;
275     namebase.setToBogus();
276     namezone.setToBogus();
277     id.remove();
278     return;
279 }
280 
Test(void)281 void IdnaConfTest::Test(void){
282     if (!ReadAndConvertFile())return;
283 
284     UnicodeString s;
285     UnicodeString key;
286     UnicodeString value;
287 
288     // skip everything before the first "=====" and "=====" itself
289     do {
290         if (!ReadOneLine(s)) {
291             errln("End of file prematurely found");
292             break;
293         }
294     }
295     while (s.compare(C_TAG, -1) != 0);   //"====="
296 
297     while(ReadOneLine(s)){
298         s.trim();
299         key.remove();
300         value.remove();
301         if (s.compare(C_TAG, -1) == 0){   //"====="
302             Call();
303        } else {
304             // explain      key:value
305             int p = s.indexOf((UChar)0x3A);    // :
306             key.setTo(s,0,p).trim();
307             value.setTo(s,p+1).trim();
308             if (key.compare(C_TYPE, -1) == 0){
309                 if (value.compare(C_TOASCII, -1) == 0) {
310                     type = 0;
311                 } else if (value.compare(C_TOUNICODE, -1) == 0){
312                     type = 1;
313                 }
314             } else if (key.compare(C_PASSFAIL, -1) == 0){
315                 if (value.compare(C_PASS, -1) == 0){
316                     passfail = 0;
317                 } else if (value.compare(C_FAIL, -1) == 0){
318                     passfail = 1;
319                 }
320             } else if (key.compare(C_DESC, -1) == 0){
321                 if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
322                     option = 1; // not found
323                 } else {
324                     option = 0;
325                 }
326                 id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
327             } else if (key.compare(C_NAMEZONE, -1) == 0){
328                 ExplainCodePointTag(value);
329                 namezone.setTo(value);
330             } else if (key.compare(C_NAMEBASE, -1) == 0){
331                 ExplainCodePointTag(value);
332                 namebase.setTo(value);
333             }
334             // just skip other lines
335         }
336     }
337 
338     Call(); // for last record
339 }
340 #else
Test(void)341 void IdnaConfTest::Test(void)
342 {
343   // test nothing...
344 }
345 #endif
346 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)347 void IdnaConfTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/){
348     switch (index) {
349         TESTCASE(0,Test);
350         default: name = ""; break;
351     }
352 }
353 
354 #endif
355