1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
6
7 #include <vector>
8
9 #include "testing/gtest/include/gtest/gtest.h"
10 #include "third_party/base/ptr_util.h"
11
TEST(CXFA_FMLexerTest,NullString)12 TEST(CXFA_FMLexerTest, NullString) {
13 WideStringView null_string;
14 CXFA_FMLexer lexer(null_string);
15 CXFA_FMToken token = lexer.NextToken();
16 EXPECT_EQ(TOKeof, token.m_type);
17 EXPECT_TRUE(lexer.IsComplete());
18 }
19
TEST(CXFA_FMLexerTest,EmptyString)20 TEST(CXFA_FMLexerTest, EmptyString) {
21 CXFA_FMLexer lexer(L"");
22 CXFA_FMToken token = lexer.NextToken();
23 EXPECT_EQ(TOKeof, token.m_type);
24 EXPECT_TRUE(lexer.IsComplete());
25 }
26
TEST(CXFA_FMLexerTest,Numbers)27 TEST(CXFA_FMLexerTest, Numbers) {
28 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"-12");
29 CXFA_FMToken token = lexer->NextToken();
30 // TODO(dsinclair): Should this return -12 instead of two tokens?
31 EXPECT_EQ(TOKminus, token.m_type);
32 token = lexer->NextToken();
33 EXPECT_EQ(L"12", token.m_string);
34 token = lexer->NextToken();
35 EXPECT_EQ(TOKeof, token.m_type);
36
37 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.5362");
38 token = lexer->NextToken();
39 EXPECT_EQ(TOKnumber, token.m_type);
40 EXPECT_EQ(L"1.5362", token.m_string);
41
42 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"0.875");
43 token = lexer->NextToken();
44 EXPECT_EQ(TOKnumber, token.m_type);
45 EXPECT_EQ(L"0.875", token.m_string);
46
47 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"5.56e-2");
48 token = lexer->NextToken();
49 EXPECT_EQ(TOKnumber, token.m_type);
50 EXPECT_EQ(L"5.56e-2", token.m_string);
51
52 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.234E10");
53 token = lexer->NextToken();
54 EXPECT_EQ(TOKnumber, token.m_type);
55 EXPECT_EQ(L"1.234E10", token.m_string);
56
57 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123456789.012345678");
58 token = lexer->NextToken();
59 EXPECT_EQ(TOKnumber, token.m_type);
60 // TODO(dsinclair): This should round as per IEEE 64-bit values.
61 // EXPECT_EQ(L"123456789.01234567", token.m_string);
62 EXPECT_EQ(L"123456789.012345678", token.m_string);
63
64 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"99999999999999999");
65 token = lexer->NextToken();
66 EXPECT_EQ(TOKnumber, token.m_type);
67 // TODO(dsinclair): This is spec'd as rounding when > 16 significant digits
68 // prior to the exponent.
69 // EXPECT_EQ(L"100000000000000000", token.m_string);
70 EXPECT_EQ(L"99999999999999999", token.m_string);
71 EXPECT_TRUE(lexer->IsComplete());
72 }
73
74 // The quotes are stripped in CXFA_FMStringExpression::ToJavaScript.
TEST(CXFA_FMLexerTest,Strings)75 TEST(CXFA_FMLexerTest, Strings) {
76 auto lexer =
77 pdfium::MakeUnique<CXFA_FMLexer>(L"\"The cat jumped over the fence.\"");
78 CXFA_FMToken token = lexer->NextToken();
79 EXPECT_EQ(TOKstring, token.m_type);
80 EXPECT_EQ(L"\"The cat jumped over the fence.\"", token.m_string);
81
82 token = lexer->NextToken();
83 EXPECT_EQ(TOKeof, token.m_type);
84
85 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"\"\"");
86 token = lexer->NextToken();
87 EXPECT_EQ(TOKstring, token.m_type);
88 EXPECT_EQ(L"\"\"", token.m_string);
89
90 lexer = pdfium::MakeUnique<CXFA_FMLexer>(
91 L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"");
92 token = lexer->NextToken();
93 EXPECT_EQ(TOKstring, token.m_type);
94 EXPECT_EQ(L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"",
95 token.m_string);
96
97 lexer = pdfium::MakeUnique<CXFA_FMLexer>(
98 L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"");
99 token = lexer->NextToken();
100 EXPECT_EQ(TOKstring, token.m_type);
101 EXPECT_EQ(
102 L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
103 token.m_string);
104 EXPECT_TRUE(lexer->IsComplete());
105 }
106
107 // Note, 'this' is a keyword but is not matched by the lexer.
TEST(CXFA_FMLexerTest,OperatorsAndKeywords)108 TEST(CXFA_FMLexerTest, OperatorsAndKeywords) {
109 struct {
110 const wchar_t* op;
111 XFA_FM_TOKEN token;
112 } op[] = {{L"+", TOKplus},
113 {L"/", TOKdiv},
114 {L"-", TOKminus},
115 {L"&", TOKand},
116 {L"|", TOKor},
117 {L"*", TOKmul},
118 {L"<", TOKlt},
119 {L">", TOKgt},
120 {L"==", TOKeq},
121 {L"<>", TOKne},
122 {L"<=", TOKle},
123 {L">=", TOKge},
124 {L"and", TOKksand},
125 {L"break", TOKbreak},
126 {L"continue", TOKcontinue},
127 {L"do", TOKdo},
128 {L"downto", TOKdownto},
129 {L"else", TOKelse},
130 {L"elseif", TOKelseif},
131 {L"end", TOKend},
132 {L"endfor", TOKendfor},
133 {L"endfunc", TOKendfunc},
134 {L"endif", TOKendif},
135 {L"endwhile", TOKendwhile},
136 {L"eq", TOKkseq},
137 {L"exit", TOKexit},
138 {L"for", TOKfor},
139 {L"foreach", TOKforeach},
140 {L"func", TOKfunc},
141 {L"ge", TOKksge},
142 {L"gt", TOKksgt},
143 {L"if", TOKif},
144 {L"in", TOKin},
145 {L"infinity", TOKinfinity},
146 {L"le", TOKksle},
147 {L"lt", TOKkslt},
148 {L"nan", TOKnan},
149 {L"ne", TOKksne},
150 {L"not", TOKksnot},
151 {L"null", TOKnull},
152 {L"or", TOKksor},
153 {L"return", TOKreturn},
154 {L"step", TOKstep},
155 {L"then", TOKthen},
156 {L"throw", TOKthrow},
157 {L"upto", TOKupto},
158 {L"var", TOKvar},
159 {L"while", TOKwhile},
160
161 // The following are defined but aren't in the spec.
162 {L"(", TOKlparen},
163 {L")", TOKrparen},
164 {L",", TOKcomma},
165 {L".", TOKdot},
166 {L"[", TOKlbracket},
167 {L"]", TOKrbracket},
168 {L"..", TOKdotdot},
169 {L".#", TOKdotscream},
170 {L".*", TOKdotstar}};
171
172 for (size_t i = 0; i < FX_ArraySize(op); ++i) {
173 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(op[i].op);
174 CXFA_FMToken token = lexer->NextToken();
175 EXPECT_EQ(op[i].token, token.m_type);
176 EXPECT_TRUE(lexer->IsComplete());
177 }
178 }
179
TEST(CXFA_FMLexerTest,Comments)180 TEST(CXFA_FMLexerTest, Comments) {
181 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"// Empty.");
182 CXFA_FMToken token = lexer->NextToken();
183 EXPECT_EQ(TOKeof, token.m_type);
184
185 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"//");
186 token = lexer->NextToken();
187 EXPECT_EQ(TOKeof, token.m_type);
188
189 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 // Empty.\n\"str\"");
190 token = lexer->NextToken();
191 EXPECT_EQ(TOKnumber, token.m_type);
192 EXPECT_EQ(L"123", token.m_string);
193
194 token = lexer->NextToken();
195 EXPECT_EQ(TOKstring, token.m_type);
196 EXPECT_EQ(L"\"str\"", token.m_string);
197
198 token = lexer->NextToken();
199 EXPECT_EQ(TOKeof, token.m_type);
200
201 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L";");
202 token = lexer->NextToken();
203 EXPECT_EQ(TOKeof, token.m_type);
204
205 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"; Empty.");
206 token = lexer->NextToken();
207 EXPECT_EQ(TOKeof, token.m_type);
208
209 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 ;Empty.\n\"str\"");
210 token = lexer->NextToken();
211 EXPECT_EQ(TOKnumber, token.m_type);
212 EXPECT_EQ(L"123", token.m_string);
213
214 token = lexer->NextToken();
215 EXPECT_EQ(TOKstring, token.m_type);
216 EXPECT_EQ(L"\"str\"", token.m_string);
217
218 token = lexer->NextToken();
219 EXPECT_EQ(TOKeof, token.m_type);
220 EXPECT_TRUE(lexer->IsComplete());
221 }
222
TEST(CXFA_FMLexerTest,ValidIdentifiers)223 TEST(CXFA_FMLexerTest, ValidIdentifiers) {
224 std::vector<const wchar_t*> identifiers = {
225 L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"};
226 for (const auto* ident : identifiers) {
227 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(ident);
228 CXFA_FMToken token = lexer->NextToken();
229 EXPECT_EQ(TOKidentifier, token.m_type);
230 EXPECT_EQ(ident, token.m_string);
231 EXPECT_TRUE(lexer->IsComplete());
232 }
233 }
234
TEST(CXFA_FMLexerTest,InvalidIdentifiers)235 TEST(CXFA_FMLexerTest, InvalidIdentifiers) {
236 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"#a");
237 auto token = lexer->NextToken();
238 EXPECT_EQ(TOKreserver, token.m_type);
239
240 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1a");
241 token = lexer->NextToken();
242 EXPECT_EQ(TOKreserver, token.m_type);
243
244 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"an@identifier");
245 token = lexer->NextToken();
246 EXPECT_NE(TOKreserver, token.m_type);
247 token = lexer->NextToken();
248 EXPECT_EQ(TOKreserver, token.m_type);
249 token = lexer->NextToken();
250 EXPECT_EQ(TOKreserver, token.m_type);
251
252 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"_ident@");
253 token = lexer->NextToken();
254 EXPECT_NE(TOKreserver, token.m_type);
255 token = lexer->NextToken();
256 EXPECT_EQ(TOKreserver, token.m_type);
257 EXPECT_FALSE(lexer->IsComplete());
258 }
259
TEST(CXFA_FMLexerTest,Whitespace)260 TEST(CXFA_FMLexerTest, Whitespace) {
261 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L" \t\xc\x9\xb");
262 CXFA_FMToken token = lexer->NextToken();
263 EXPECT_EQ(TOKeof, token.m_type);
264
265 lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 \t\xc\x9\xb 456");
266 token = lexer->NextToken();
267 EXPECT_EQ(TOKnumber, token.m_type);
268 EXPECT_EQ(L"123", token.m_string);
269
270 token = lexer->NextToken();
271 EXPECT_EQ(TOKnumber, token.m_type);
272 EXPECT_EQ(L"456", token.m_string);
273
274 token = lexer->NextToken();
275 EXPECT_EQ(TOKeof, token.m_type);
276 EXPECT_TRUE(lexer->IsComplete());
277 }
278
TEST(CXFA_FMLexerTest,NullData)279 TEST(CXFA_FMLexerTest, NullData) {
280 auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(
281 WideStringView(L"\x2d\x32\x00\x2d\x32", 5));
282 CXFA_FMToken token = lexer->NextToken();
283 EXPECT_EQ(TOKminus, token.m_type);
284
285 token = lexer->NextToken();
286 EXPECT_EQ(TOKnumber, token.m_type);
287 EXPECT_EQ(L"2", token.m_string);
288
289 token = lexer->NextToken();
290 EXPECT_EQ(TOKeof, token.m_type);
291 EXPECT_FALSE(lexer->IsComplete());
292 }
293