1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
6 
7 #include <vector>
8 
9 #include "testing/gtest/include/gtest/gtest.h"
10 #include "third_party/base/ptr_util.h"
11 
TEST(CXFA_FMLexerTest,NullString)12 TEST(CXFA_FMLexerTest, NullString) {
13   WideStringView null_string;
14   CXFA_FMLexer lexer(null_string);
15   CXFA_FMToken token = lexer.NextToken();
16   EXPECT_EQ(TOKeof, token.m_type);
17   EXPECT_TRUE(lexer.IsComplete());
18 }
19 
TEST(CXFA_FMLexerTest,EmptyString)20 TEST(CXFA_FMLexerTest, EmptyString) {
21   CXFA_FMLexer lexer(L"");
22   CXFA_FMToken token = lexer.NextToken();
23   EXPECT_EQ(TOKeof, token.m_type);
24   EXPECT_TRUE(lexer.IsComplete());
25 }
26 
TEST(CXFA_FMLexerTest,Numbers)27 TEST(CXFA_FMLexerTest, Numbers) {
28   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"-12");
29   CXFA_FMToken token = lexer->NextToken();
30   // TODO(dsinclair): Should this return -12 instead of two tokens?
31   EXPECT_EQ(TOKminus, token.m_type);
32   token = lexer->NextToken();
33   EXPECT_EQ(L"12", token.m_string);
34   token = lexer->NextToken();
35   EXPECT_EQ(TOKeof, token.m_type);
36 
37   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.5362");
38   token = lexer->NextToken();
39   EXPECT_EQ(TOKnumber, token.m_type);
40   EXPECT_EQ(L"1.5362", token.m_string);
41 
42   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"0.875");
43   token = lexer->NextToken();
44   EXPECT_EQ(TOKnumber, token.m_type);
45   EXPECT_EQ(L"0.875", token.m_string);
46 
47   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"5.56e-2");
48   token = lexer->NextToken();
49   EXPECT_EQ(TOKnumber, token.m_type);
50   EXPECT_EQ(L"5.56e-2", token.m_string);
51 
52   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.234E10");
53   token = lexer->NextToken();
54   EXPECT_EQ(TOKnumber, token.m_type);
55   EXPECT_EQ(L"1.234E10", token.m_string);
56 
57   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123456789.012345678");
58   token = lexer->NextToken();
59   EXPECT_EQ(TOKnumber, token.m_type);
60   // TODO(dsinclair): This should round as per IEEE 64-bit values.
61   // EXPECT_EQ(L"123456789.01234567", token.m_string);
62   EXPECT_EQ(L"123456789.012345678", token.m_string);
63 
64   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"99999999999999999");
65   token = lexer->NextToken();
66   EXPECT_EQ(TOKnumber, token.m_type);
67   // TODO(dsinclair): This is spec'd as rounding when > 16 significant digits
68   // prior to the exponent.
69   // EXPECT_EQ(L"100000000000000000", token.m_string);
70   EXPECT_EQ(L"99999999999999999", token.m_string);
71   EXPECT_TRUE(lexer->IsComplete());
72 }
73 
74 // The quotes are stripped in CXFA_FMStringExpression::ToJavaScript.
TEST(CXFA_FMLexerTest,Strings)75 TEST(CXFA_FMLexerTest, Strings) {
76   auto lexer =
77       pdfium::MakeUnique<CXFA_FMLexer>(L"\"The cat jumped over the fence.\"");
78   CXFA_FMToken token = lexer->NextToken();
79   EXPECT_EQ(TOKstring, token.m_type);
80   EXPECT_EQ(L"\"The cat jumped over the fence.\"", token.m_string);
81 
82   token = lexer->NextToken();
83   EXPECT_EQ(TOKeof, token.m_type);
84 
85   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"\"\"");
86   token = lexer->NextToken();
87   EXPECT_EQ(TOKstring, token.m_type);
88   EXPECT_EQ(L"\"\"", token.m_string);
89 
90   lexer = pdfium::MakeUnique<CXFA_FMLexer>(
91       L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"");
92   token = lexer->NextToken();
93   EXPECT_EQ(TOKstring, token.m_type);
94   EXPECT_EQ(L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"",
95             token.m_string);
96 
97   lexer = pdfium::MakeUnique<CXFA_FMLexer>(
98       L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"");
99   token = lexer->NextToken();
100   EXPECT_EQ(TOKstring, token.m_type);
101   EXPECT_EQ(
102       L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
103       token.m_string);
104   EXPECT_TRUE(lexer->IsComplete());
105 }
106 
107 // Note, 'this' is a keyword but is not matched by the lexer.
TEST(CXFA_FMLexerTest,OperatorsAndKeywords)108 TEST(CXFA_FMLexerTest, OperatorsAndKeywords) {
109   struct {
110     const wchar_t* op;
111     XFA_FM_TOKEN token;
112   } op[] = {{L"+", TOKplus},
113             {L"/", TOKdiv},
114             {L"-", TOKminus},
115             {L"&", TOKand},
116             {L"|", TOKor},
117             {L"*", TOKmul},
118             {L"<", TOKlt},
119             {L">", TOKgt},
120             {L"==", TOKeq},
121             {L"<>", TOKne},
122             {L"<=", TOKle},
123             {L">=", TOKge},
124             {L"and", TOKksand},
125             {L"break", TOKbreak},
126             {L"continue", TOKcontinue},
127             {L"do", TOKdo},
128             {L"downto", TOKdownto},
129             {L"else", TOKelse},
130             {L"elseif", TOKelseif},
131             {L"end", TOKend},
132             {L"endfor", TOKendfor},
133             {L"endfunc", TOKendfunc},
134             {L"endif", TOKendif},
135             {L"endwhile", TOKendwhile},
136             {L"eq", TOKkseq},
137             {L"exit", TOKexit},
138             {L"for", TOKfor},
139             {L"foreach", TOKforeach},
140             {L"func", TOKfunc},
141             {L"ge", TOKksge},
142             {L"gt", TOKksgt},
143             {L"if", TOKif},
144             {L"in", TOKin},
145             {L"infinity", TOKinfinity},
146             {L"le", TOKksle},
147             {L"lt", TOKkslt},
148             {L"nan", TOKnan},
149             {L"ne", TOKksne},
150             {L"not", TOKksnot},
151             {L"null", TOKnull},
152             {L"or", TOKksor},
153             {L"return", TOKreturn},
154             {L"step", TOKstep},
155             {L"then", TOKthen},
156             {L"throw", TOKthrow},
157             {L"upto", TOKupto},
158             {L"var", TOKvar},
159             {L"while", TOKwhile},
160 
161             // The following are defined but aren't in the spec.
162             {L"(", TOKlparen},
163             {L")", TOKrparen},
164             {L",", TOKcomma},
165             {L".", TOKdot},
166             {L"[", TOKlbracket},
167             {L"]", TOKrbracket},
168             {L"..", TOKdotdot},
169             {L".#", TOKdotscream},
170             {L".*", TOKdotstar}};
171 
172   for (size_t i = 0; i < FX_ArraySize(op); ++i) {
173     auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(op[i].op);
174     CXFA_FMToken token = lexer->NextToken();
175     EXPECT_EQ(op[i].token, token.m_type);
176     EXPECT_TRUE(lexer->IsComplete());
177   }
178 }
179 
TEST(CXFA_FMLexerTest,Comments)180 TEST(CXFA_FMLexerTest, Comments) {
181   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"// Empty.");
182   CXFA_FMToken token = lexer->NextToken();
183   EXPECT_EQ(TOKeof, token.m_type);
184 
185   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"//");
186   token = lexer->NextToken();
187   EXPECT_EQ(TOKeof, token.m_type);
188 
189   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 // Empty.\n\"str\"");
190   token = lexer->NextToken();
191   EXPECT_EQ(TOKnumber, token.m_type);
192   EXPECT_EQ(L"123", token.m_string);
193 
194   token = lexer->NextToken();
195   EXPECT_EQ(TOKstring, token.m_type);
196   EXPECT_EQ(L"\"str\"", token.m_string);
197 
198   token = lexer->NextToken();
199   EXPECT_EQ(TOKeof, token.m_type);
200 
201   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L";");
202   token = lexer->NextToken();
203   EXPECT_EQ(TOKeof, token.m_type);
204 
205   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"; Empty.");
206   token = lexer->NextToken();
207   EXPECT_EQ(TOKeof, token.m_type);
208 
209   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 ;Empty.\n\"str\"");
210   token = lexer->NextToken();
211   EXPECT_EQ(TOKnumber, token.m_type);
212   EXPECT_EQ(L"123", token.m_string);
213 
214   token = lexer->NextToken();
215   EXPECT_EQ(TOKstring, token.m_type);
216   EXPECT_EQ(L"\"str\"", token.m_string);
217 
218   token = lexer->NextToken();
219   EXPECT_EQ(TOKeof, token.m_type);
220   EXPECT_TRUE(lexer->IsComplete());
221 }
222 
TEST(CXFA_FMLexerTest,ValidIdentifiers)223 TEST(CXFA_FMLexerTest, ValidIdentifiers) {
224   std::vector<const wchar_t*> identifiers = {
225       L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"};
226   for (const auto* ident : identifiers) {
227     auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(ident);
228     CXFA_FMToken token = lexer->NextToken();
229     EXPECT_EQ(TOKidentifier, token.m_type);
230     EXPECT_EQ(ident, token.m_string);
231     EXPECT_TRUE(lexer->IsComplete());
232   }
233 }
234 
TEST(CXFA_FMLexerTest,InvalidIdentifiers)235 TEST(CXFA_FMLexerTest, InvalidIdentifiers) {
236   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"#a");
237   auto token = lexer->NextToken();
238   EXPECT_EQ(TOKreserver, token.m_type);
239 
240   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1a");
241   token = lexer->NextToken();
242   EXPECT_EQ(TOKreserver, token.m_type);
243 
244   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"an@identifier");
245   token = lexer->NextToken();
246   EXPECT_NE(TOKreserver, token.m_type);
247   token = lexer->NextToken();
248   EXPECT_EQ(TOKreserver, token.m_type);
249   token = lexer->NextToken();
250   EXPECT_EQ(TOKreserver, token.m_type);
251 
252   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"_ident@");
253   token = lexer->NextToken();
254   EXPECT_NE(TOKreserver, token.m_type);
255   token = lexer->NextToken();
256   EXPECT_EQ(TOKreserver, token.m_type);
257   EXPECT_FALSE(lexer->IsComplete());
258 }
259 
TEST(CXFA_FMLexerTest,Whitespace)260 TEST(CXFA_FMLexerTest, Whitespace) {
261   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L" \t\xc\x9\xb");
262   CXFA_FMToken token = lexer->NextToken();
263   EXPECT_EQ(TOKeof, token.m_type);
264 
265   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 \t\xc\x9\xb 456");
266   token = lexer->NextToken();
267   EXPECT_EQ(TOKnumber, token.m_type);
268   EXPECT_EQ(L"123", token.m_string);
269 
270   token = lexer->NextToken();
271   EXPECT_EQ(TOKnumber, token.m_type);
272   EXPECT_EQ(L"456", token.m_string);
273 
274   token = lexer->NextToken();
275   EXPECT_EQ(TOKeof, token.m_type);
276   EXPECT_TRUE(lexer->IsComplete());
277 }
278 
TEST(CXFA_FMLexerTest,NullData)279 TEST(CXFA_FMLexerTest, NullData) {
280   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(
281       WideStringView(L"\x2d\x32\x00\x2d\x32", 5));
282   CXFA_FMToken token = lexer->NextToken();
283   EXPECT_EQ(TOKminus, token.m_type);
284 
285   token = lexer->NextToken();
286   EXPECT_EQ(TOKnumber, token.m_type);
287   EXPECT_EQ(L"2", token.m_string);
288 
289   token = lexer->NextToken();
290   EXPECT_EQ(TOKeof, token.m_type);
291   EXPECT_FALSE(lexer->IsComplete());
292 }
293