1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h"
6 
7 #include <memory>
8 
9 #include "core/fxcrt/cfx_seekablestreamproxy.h"
10 #include "core/fxcrt/fx_codepage.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "testing/test_support.h"
13 
TEST(CFX_XMLSyntaxParserTest,CData)14 TEST(CFX_XMLSyntaxParserTest, CData) {
15   const char* input =
16       "<script contentType=\"application/x-javascript\">\n"
17       "  <![CDATA[\n"
18       "    if (a[1] < 3)\n"
19       "      app.alert(\"Tclams\");\n"
20       "  ]]>\n"
21       "</script>";
22 
23   const wchar_t* cdata =
24       L"\n"
25       L"    if (a[1] < 3)\n"
26       L"      app.alert(\"Tclams\");\n"
27       L"  ";
28 
29   RetainPtr<CFX_SeekableStreamProxy> stream =
30       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
31           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
32   stream->SetCodePage(FX_CODEPAGE_UTF8);
33 
34   CFX_XMLSyntaxParser parser(stream);
35   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
36   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
37   ASSERT_EQ(L"script", parser.GetTagName());
38 
39   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
40   ASSERT_EQ(L"contentType", parser.GetAttributeName());
41   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
42   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
43 
44   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
45   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
46   ASSERT_EQ(L"\n  ", parser.GetTextData());
47 
48   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
49   ASSERT_EQ(cdata, parser.GetTextData());
50 
51   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
52   ASSERT_EQ(L"\n", parser.GetTextData());
53 
54   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
55   ASSERT_EQ(L"script", parser.GetTagName());
56 
57   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
58 }
59 
TEST(CFX_XMLSyntaxParserTest,CDataWithInnerScript)60 TEST(CFX_XMLSyntaxParserTest, CDataWithInnerScript) {
61   const char* input =
62       "<script contentType=\"application/x-javascript\">\n"
63       "  <![CDATA[\n"
64       "    if (a[1] < 3)\n"
65       "      app.alert(\"Tclams\");\n"
66       "    </script>\n"
67       "  ]]>\n"
68       "</script>";
69 
70   const wchar_t* cdata =
71       L"\n"
72       L"    if (a[1] < 3)\n"
73       L"      app.alert(\"Tclams\");\n"
74       L"    </script>\n"
75       L"  ";
76 
77   RetainPtr<CFX_SeekableStreamProxy> stream =
78       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
79           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
80   stream->SetCodePage(FX_CODEPAGE_UTF8);
81 
82   CFX_XMLSyntaxParser parser(stream);
83   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
84   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
85   ASSERT_EQ(L"script", parser.GetTagName());
86 
87   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
88   ASSERT_EQ(L"contentType", parser.GetAttributeName());
89   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
90   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
91 
92   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
93   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
94   ASSERT_EQ(L"\n  ", parser.GetTextData());
95 
96   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
97   ASSERT_EQ(cdata, parser.GetTextData());
98 
99   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
100   ASSERT_EQ(L"\n", parser.GetTextData());
101 
102   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
103   ASSERT_EQ(L"script", parser.GetTagName());
104 
105   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
106 }
107 
TEST(CFX_XMLSyntaxParserTest,ArrowBangArrow)108 TEST(CFX_XMLSyntaxParserTest, ArrowBangArrow) {
109   const char* input =
110       "<script contentType=\"application/x-javascript\">\n"
111       "  <!>\n"
112       "</script>";
113 
114   RetainPtr<CFX_SeekableStreamProxy> stream =
115       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
116           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
117   stream->SetCodePage(FX_CODEPAGE_UTF8);
118 
119   CFX_XMLSyntaxParser parser(stream);
120   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
121   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
122 
123   ASSERT_EQ(L"script", parser.GetTagName());
124 
125   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
126   ASSERT_EQ(L"contentType", parser.GetAttributeName());
127   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
128   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
129 
130   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
131   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
132   ASSERT_EQ(L"\n  ", parser.GetTextData());
133 
134   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
135   ASSERT_EQ(L"\n", parser.GetTextData());
136 
137   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
138   ASSERT_EQ(L"script", parser.GetTagName());
139 
140   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
141 }
142 
TEST(CFX_XMLSyntaxParserTest,ArrowBangBracketArrow)143 TEST(CFX_XMLSyntaxParserTest, ArrowBangBracketArrow) {
144   const char* input =
145       "<script contentType=\"application/x-javascript\">\n"
146       "  <![>\n"
147       "</script>";
148 
149   RetainPtr<CFX_SeekableStreamProxy> stream =
150       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
151           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
152   stream->SetCodePage(FX_CODEPAGE_UTF8);
153 
154   CFX_XMLSyntaxParser parser(stream);
155   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
156   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
157   ASSERT_EQ(L"script", parser.GetTagName());
158 
159   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
160   ASSERT_EQ(L"contentType", parser.GetAttributeName());
161   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
162   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
163 
164   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
165   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
166   ASSERT_EQ(L"\n  ", parser.GetTextData());
167 
168   // Parser walks to end of input.
169 
170   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
171 }
172 
TEST(CFX_XMLSyntaxParserTest,IncompleteCData)173 TEST(CFX_XMLSyntaxParserTest, IncompleteCData) {
174   const char* input =
175       "<script contentType=\"application/x-javascript\">\n"
176       "  <![CDATA>\n"
177       "</script>";
178 
179   RetainPtr<CFX_SeekableStreamProxy> stream =
180       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
181           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
182   stream->SetCodePage(FX_CODEPAGE_UTF8);
183 
184   CFX_XMLSyntaxParser parser(stream);
185   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
186   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
187   ASSERT_EQ(L"script", parser.GetTagName());
188 
189   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
190   ASSERT_EQ(L"contentType", parser.GetAttributeName());
191   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
192   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
193 
194   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
195   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
196   ASSERT_EQ(L"\n  ", parser.GetTextData());
197 
198   // Parser walks to end of input.
199 
200   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
201 }
202 
TEST(CFX_XMLSyntaxParserTest,UnClosedCData)203 TEST(CFX_XMLSyntaxParserTest, UnClosedCData) {
204   const char* input =
205       "<script contentType=\"application/x-javascript\">\n"
206       "  <![CDATA[\n"
207       "</script>";
208 
209   RetainPtr<CFX_SeekableStreamProxy> stream =
210       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
211           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
212   stream->SetCodePage(FX_CODEPAGE_UTF8);
213 
214   CFX_XMLSyntaxParser parser(stream);
215   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
216   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
217   ASSERT_EQ(L"script", parser.GetTagName());
218 
219   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
220   ASSERT_EQ(L"contentType", parser.GetAttributeName());
221   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
222   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
223 
224   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
225   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
226   ASSERT_EQ(L"\n  ", parser.GetTextData());
227 
228   // Parser walks to end of input.
229 
230   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
231 }
232 
TEST(CFX_XMLSyntaxParserTest,EmptyCData)233 TEST(CFX_XMLSyntaxParserTest, EmptyCData) {
234   const char* input =
235       "<script contentType=\"application/x-javascript\">\n"
236       "  <![CDATA[]]>\n"
237       "</script>";
238 
239   RetainPtr<CFX_SeekableStreamProxy> stream =
240       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
241           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
242   stream->SetCodePage(FX_CODEPAGE_UTF8);
243 
244   CFX_XMLSyntaxParser parser(stream);
245   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
246   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
247   ASSERT_EQ(L"script", parser.GetTagName());
248 
249   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
250   ASSERT_EQ(L"contentType", parser.GetAttributeName());
251   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
252   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
253 
254   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
255   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
256   ASSERT_EQ(L"\n  ", parser.GetTextData());
257 
258   ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse());
259   ASSERT_EQ(L"", parser.GetTextData());
260 
261   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
262   ASSERT_EQ(L"\n", parser.GetTextData());
263 
264   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
265   ASSERT_EQ(L"script", parser.GetTagName());
266 
267   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
268 }
269 
TEST(CFX_XMLSyntaxParserTest,Comment)270 TEST(CFX_XMLSyntaxParserTest, Comment) {
271   const char* input =
272       "<script contentType=\"application/x-javascript\">\n"
273       "  <!-- A Comment -->\n"
274       "</script>";
275 
276   RetainPtr<CFX_SeekableStreamProxy> stream =
277       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
278           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
279   stream->SetCodePage(FX_CODEPAGE_UTF8);
280 
281   CFX_XMLSyntaxParser parser(stream);
282   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
283   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
284   ASSERT_EQ(L"script", parser.GetTagName());
285 
286   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
287   ASSERT_EQ(L"contentType", parser.GetAttributeName());
288   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
289   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
290 
291   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
292   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
293   ASSERT_EQ(L"\n  ", parser.GetTextData());
294 
295   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
296   ASSERT_EQ(L"\n", parser.GetTextData());
297 
298   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
299   ASSERT_EQ(L"script", parser.GetTagName());
300 
301   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
302 }
303 
TEST(CFX_XMLSyntaxParserTest,IncorrectCommentStart)304 TEST(CFX_XMLSyntaxParserTest, IncorrectCommentStart) {
305   const char* input =
306       "<script contentType=\"application/x-javascript\">\n"
307       "  <!- A Comment -->\n"
308       "</script>";
309 
310   RetainPtr<CFX_SeekableStreamProxy> stream =
311       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
312           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
313   stream->SetCodePage(FX_CODEPAGE_UTF8);
314 
315   CFX_XMLSyntaxParser parser(stream);
316   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
317   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
318   ASSERT_EQ(L"script", parser.GetTagName());
319 
320   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
321   ASSERT_EQ(L"contentType", parser.GetAttributeName());
322   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
323   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
324 
325   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
326   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
327   ASSERT_EQ(L"\n  ", parser.GetTextData());
328 
329   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
330   ASSERT_EQ(L"\n", parser.GetTextData());
331 
332   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
333   ASSERT_EQ(L"script", parser.GetTagName());
334 
335   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
336 }
337 
TEST(CFX_XMLSyntaxParserTest,CommentEmpty)338 TEST(CFX_XMLSyntaxParserTest, CommentEmpty) {
339   const char* input =
340       "<script contentType=\"application/x-javascript\">\n"
341       "  <!---->\n"
342       "</script>";
343 
344   RetainPtr<CFX_SeekableStreamProxy> stream =
345       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
346           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
347   stream->SetCodePage(FX_CODEPAGE_UTF8);
348 
349   CFX_XMLSyntaxParser parser(stream);
350   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
351   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
352   ASSERT_EQ(L"script", parser.GetTagName());
353 
354   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
355   ASSERT_EQ(L"contentType", parser.GetAttributeName());
356   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
357   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
358 
359   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
360   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
361   ASSERT_EQ(L"\n  ", parser.GetTextData());
362 
363   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
364   ASSERT_EQ(L"\n", parser.GetTextData());
365 
366   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
367   ASSERT_EQ(L"script", parser.GetTagName());
368 
369   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
370 }
371 
TEST(CFX_XMLSyntaxParserTest,CommentThreeDash)372 TEST(CFX_XMLSyntaxParserTest, CommentThreeDash) {
373   const char* input =
374       "<script contentType=\"application/x-javascript\">\n"
375       "  <!--->\n"
376       "</script>";
377 
378   RetainPtr<CFX_SeekableStreamProxy> stream =
379       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
380           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
381   stream->SetCodePage(FX_CODEPAGE_UTF8);
382 
383   CFX_XMLSyntaxParser parser(stream);
384   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
385   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
386   ASSERT_EQ(L"script", parser.GetTagName());
387 
388   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
389   ASSERT_EQ(L"contentType", parser.GetAttributeName());
390   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
391   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
392 
393   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
394   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
395   ASSERT_EQ(L"\n  ", parser.GetTextData());
396 
397   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
398 }
399 
TEST(CFX_XMLSyntaxParserTest,CommentTwoDash)400 TEST(CFX_XMLSyntaxParserTest, CommentTwoDash) {
401   const char* input =
402       "<script contentType=\"application/x-javascript\">\n"
403       "  <!-->\n"
404       "</script>";
405 
406   RetainPtr<CFX_SeekableStreamProxy> stream =
407       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
408           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
409   stream->SetCodePage(FX_CODEPAGE_UTF8);
410 
411   CFX_XMLSyntaxParser parser(stream);
412   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
413   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
414   ASSERT_EQ(L"script", parser.GetTagName());
415 
416   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
417   ASSERT_EQ(L"contentType", parser.GetAttributeName());
418   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
419   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
420 
421   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
422   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
423   ASSERT_EQ(L"\n  ", parser.GetTextData());
424 
425   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
426 }
427 
TEST(CFX_XMLSyntaxParserTest,Entities)428 TEST(CFX_XMLSyntaxParserTest, Entities) {
429   const char* input =
430       "<script contentType=\"application/x-javascript\">"
431       "&#66;"
432       "&#x54;"
433       "&#x00000000000000000048;"
434       "&#x0000000000000000AB48;"
435       "&#x0000000000000000000;"
436       "</script>";
437 
438   RetainPtr<CFX_SeekableStreamProxy> stream =
439       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
440           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
441   stream->SetCodePage(FX_CODEPAGE_UTF8);
442 
443   CFX_XMLSyntaxParser parser(stream);
444   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
445   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
446   ASSERT_EQ(L"script", parser.GetTagName());
447 
448   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
449   ASSERT_EQ(L"contentType", parser.GetAttributeName());
450   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
451   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
452 
453   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
454   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
455   ASSERT_EQ(L"BTH\xab48", parser.GetTextData());
456 
457   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
458   ASSERT_EQ(L"script", parser.GetTagName());
459 
460   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
461 }
462 
TEST(CFX_XMLSyntaxParserTest,EntityOverflowHex)463 TEST(CFX_XMLSyntaxParserTest, EntityOverflowHex) {
464   const char* input =
465       "<script contentType=\"application/x-javascript\">"
466       "&#xaDBDFFFFF;"
467       "&#xafffffffffffffffffffffffffffffffff;"
468       "</script>";
469 
470   RetainPtr<CFX_SeekableStreamProxy> stream =
471       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
472           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
473   stream->SetCodePage(FX_CODEPAGE_UTF8);
474 
475   CFX_XMLSyntaxParser parser(stream);
476   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
477   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
478   ASSERT_EQ(L"script", parser.GetTagName());
479 
480   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
481   ASSERT_EQ(L"contentType", parser.GetAttributeName());
482   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
483   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
484 
485   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
486   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
487   ASSERT_EQ(L"  ", parser.GetTextData());
488 
489   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
490   ASSERT_EQ(L"script", parser.GetTagName());
491 
492   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
493 }
494 
TEST(CFX_XMLSyntaxParserTest,EntityOverflowDecimal)495 TEST(CFX_XMLSyntaxParserTest, EntityOverflowDecimal) {
496   const char* input =
497       "<script contentType=\"application/x-javascript\">"
498       "&#2914910205;"
499       "&#29149102052342342134521341234512351234213452315;"
500       "</script>";
501 
502   RetainPtr<CFX_SeekableStreamProxy> stream =
503       pdfium::MakeRetain<CFX_SeekableStreamProxy>(
504           reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input));
505   stream->SetCodePage(FX_CODEPAGE_UTF8);
506 
507   CFX_XMLSyntaxParser parser(stream);
508   ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
509   ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
510   ASSERT_EQ(L"script", parser.GetTagName());
511 
512   ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse());
513   ASSERT_EQ(L"contentType", parser.GetAttributeName());
514   ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse());
515   ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue());
516 
517   ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
518   ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
519   ASSERT_EQ(L"  ", parser.GetTextData());
520 
521   ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
522   ASSERT_EQ(L"script", parser.GetTagName());
523 
524   ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
525 }
526 
TEST(CFX_XMLSyntaxParserTest,IsXMLNameChar)527 TEST(CFX_XMLSyntaxParserTest, IsXMLNameChar) {
528   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', true));
529   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(L'-', false));
530 
531   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2069, true));
532   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2070, true));
533   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2073, true));
534   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0x218F, true));
535   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0x2190, true));
536 
537   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDEF, true));
538   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF0, true));
539   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFDF1, true));
540   EXPECT_TRUE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFD, true));
541   EXPECT_FALSE(CFX_XMLSyntaxParser::IsXMLNameChar(0xFFFE, true));
542 }
543