1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 */
11
12 #include "unicode/utypes.h"
13 #include "unicode/uclean.h"
14 #include "unicode/uchar.h"
15 #include "unicode/unistr.h"
16 #include "unicode/uscript.h"
17 #include "unicode/putil.h"
18 #include "unicode/ctest.h"
19
20 #include "layout/LETypes.h"
21 #include "layout/LEScripts.h"
22
23 #include "letsutil.h"
24 #include "letest.h"
25
26 #include "xmlreader.h"
27
28 #include "xmlparser.h"
29
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33
34 //U_NAMESPACE_USE
35
36 #define CH_COMMA 0x002C
37
getHexArray(const UnicodeString & numbers,int32_t & arraySize)38 static le_uint32 *getHexArray(const UnicodeString &numbers, int32_t &arraySize)
39 {
40 int32_t offset = -1;
41
42 arraySize = 1;
43 while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
44 arraySize += 1;
45 }
46
47 le_uint32 *array = NEW_ARRAY(le_uint32, arraySize);
48 char number[16];
49 le_int32 count = 0;
50 le_int32 start = 0, end = 0;
51 le_int32 len = 0;
52
53 // trim leading whitespace
54 while(u_isUWhiteSpace(numbers[start])) {
55 start += 1;
56 }
57
58 while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
59 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
60 number[len] = '\0';
61 start = end + 1;
62
63 sscanf(number, "%x", &array[count++]);
64
65 // trim whitespace following the comma
66 while(u_isUWhiteSpace(numbers[start])) {
67 start += 1;
68 }
69 }
70
71 // trim trailing whitespace
72 end = numbers.length();
73 while(u_isUWhiteSpace(numbers[end - 1])) {
74 end -= 1;
75 }
76
77 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
78 number[len] = '\0';
79 sscanf(number, "%x", &array[count]);
80
81 return array;
82 }
83
getFloatArray(const UnicodeString & numbers,int32_t & arraySize)84 static float *getFloatArray(const UnicodeString &numbers, int32_t &arraySize)
85 {
86 int32_t offset = -1;
87
88 arraySize = 1;
89 while((offset = numbers.indexOf(CH_COMMA, offset + 1)) >= 0) {
90 arraySize += 1;
91 }
92
93 float *array = NEW_ARRAY(float, arraySize);
94 char number[32];
95 le_int32 count = 0;
96 le_int32 start = 0, end = 0;
97 le_int32 len = 0;
98
99 // trim leading whitespace
100 while(u_isUWhiteSpace(numbers[start])) {
101 start += 1;
102 }
103
104 while((end = numbers.indexOf(CH_COMMA, start)) >= 0) {
105 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
106 number[len] = '\0';
107 start = end + 1;
108
109 sscanf(number, "%f", &array[count++]);
110
111 // trim whiteapce following the comma
112 while(u_isUWhiteSpace(numbers[start])) {
113 start += 1;
114 }
115 }
116
117 while(u_isUWhiteSpace(numbers[start])) {
118 start += 1;
119 }
120
121 // trim trailing whitespace
122 end = numbers.length();
123 while(u_isUWhiteSpace(numbers[end - 1])) {
124 end -= 1;
125 }
126
127 len = numbers.extract(start, end - start, number, ARRAY_SIZE(number), US_INV);
128 number[len] = '\0';
129 sscanf(number, "%f", &array[count]);
130
131 return array;
132 }
133
134 U_CDECL_BEGIN
readTestFile(const char * testFilePath,TestCaseCallback callback)135 void readTestFile(const char *testFilePath, TestCaseCallback callback)
136 {
137 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
138 UErrorCode status = U_ZERO_ERROR;
139 UXMLParser *parser = UXMLParser::createParser(status);
140 UXMLElement *root = parser->parseFile(testFilePath, status);
141
142 if (root == NULL) {
143 log_err("Could not open the test data file: %s\n", testFilePath);
144 delete parser;
145 return;
146 }
147
148 UnicodeString test_case = UNICODE_STRING_SIMPLE("test-case");
149 UnicodeString test_text = UNICODE_STRING_SIMPLE("test-text");
150 UnicodeString test_font = UNICODE_STRING_SIMPLE("test-font");
151 UnicodeString result_glyphs = UNICODE_STRING_SIMPLE("result-glyphs");
152 UnicodeString result_indices = UNICODE_STRING_SIMPLE("result-indices");
153 UnicodeString result_positions = UNICODE_STRING_SIMPLE("result-positions");
154
155 // test-case attributes
156 UnicodeString id_attr = UNICODE_STRING_SIMPLE("id");
157 UnicodeString script_attr = UNICODE_STRING_SIMPLE("script");
158 UnicodeString lang_attr = UNICODE_STRING_SIMPLE("lang");
159
160 // test-font attributes
161 UnicodeString name_attr = UNICODE_STRING_SIMPLE("name");
162 UnicodeString ver_attr = UNICODE_STRING_SIMPLE("version");
163 UnicodeString cksum_attr = UNICODE_STRING_SIMPLE("checksum");
164
165 const UXMLElement *testCase;
166 int32_t tc = 0;
167
168 while((testCase = root->nextChildElement(tc)) != NULL) {
169 if (testCase->getTagName().compare(test_case) == 0) {
170 char *id = getCString(testCase->getAttribute(id_attr));
171 char *script = getCString(testCase->getAttribute(script_attr));
172 char *lang = getCString(testCase->getAttribute(lang_attr));
173 char *fontName = NULL;
174 char *fontVer = NULL;
175 char *fontCksum = NULL;
176 const UXMLElement *element;
177 int32_t ec = 0;
178 int32_t charCount = 0;
179 // int32_t typoFlags = 3; // kerning + ligatures...
180 UScriptCode scriptCode;
181 le_int32 languageCode = -1;
182 UnicodeString text, glyphs, indices, positions;
183 int32_t glyphCount = 0, indexCount = 0, positionCount = 0;
184 TestResult expected = {0, NULL, NULL, NULL};
185
186 uscript_getCode(script, &scriptCode, 1, &status);
187 if (LE_FAILURE(status)) {
188 log_err("invalid script name: %s.\n", script);
189 goto free_c_strings;
190 }
191
192 if (lang != NULL) {
193 languageCode = getLanguageCode(lang);
194
195 if (languageCode < 0) {
196 log_err("invalid language name: %s.\n", lang);
197 goto free_c_strings;
198 }
199 }
200
201 while((element = testCase->nextChildElement(ec)) != NULL) {
202 UnicodeString tag = element->getTagName();
203
204 // TODO: make sure that each element is only used once.
205 if (tag.compare(test_font) == 0) {
206 fontName = getCString(element->getAttribute(name_attr));
207 fontVer = getCString(element->getAttribute(ver_attr));
208 fontCksum = getCString(element->getAttribute(cksum_attr));
209
210 } else if (tag.compare(test_text) == 0) {
211 text = element->getText(TRUE);
212 charCount = text.length();
213 } else if (tag.compare(result_glyphs) == 0) {
214 glyphs = element->getText(TRUE);
215 } else if (tag.compare(result_indices) == 0) {
216 indices = element->getText(TRUE);
217 } else if (tag.compare(result_positions) == 0) {
218 positions = element->getText(TRUE);
219 } else {
220 // an unknown tag...
221 char *cTag = getCString(&tag);
222
223 log_info("Test %s: unknown element with tag \"%s\"\n", id, cTag);
224 freeCString(cTag);
225 }
226 }
227
228 expected.glyphs = (LEGlyphID *) getHexArray(glyphs, glyphCount);
229 expected.indices = (le_int32 *) getHexArray(indices, indexCount);
230 expected.positions = getFloatArray(positions, positionCount);
231
232 expected.glyphCount = glyphCount;
233
234 if (glyphCount < charCount || indexCount != glyphCount || positionCount < glyphCount * 2 + 2) {
235 log_err("Test %s: inconsistent input data: charCount = %d, glyphCount = %d, indexCount = %d, positionCount = %d\n",
236 id, charCount, glyphCount, indexCount, positionCount);
237 goto free_expected;
238 };
239
240 (*callback)(id, fontName, fontVer, fontCksum, scriptCode, languageCode, text.getBuffer(), charCount, &expected);
241
242 free_expected:
243 DELETE_ARRAY(expected.positions);
244 DELETE_ARRAY(expected.indices);
245 DELETE_ARRAY(expected.glyphs);
246
247 free_c_strings:
248 freeCString(fontCksum);
249 freeCString(fontVer);
250 freeCString(fontName);
251 freeCString(lang);
252 freeCString(script);
253 freeCString(id);
254 }
255 }
256
257 delete root;
258 delete parser;
259 #endif
260 }
261 U_CDECL_END
262