1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "xfa/fxfa/fm2js/cxfa_fmlexer.h"
6 
7 #include <vector>
8 
9 #include "testing/gtest/include/gtest/gtest.h"
10 #include "testing/test_support.h"
11 #include "third_party/base/ptr_util.h"
12 
13 TEST(CXFA_FMLexerTest, EmptyString) {
14   CXFA_FMLexer lexer(L"");
15   std::unique_ptr<CXFA_FMToken> token = lexer.NextToken();
16   EXPECT_EQ(TOKeof, token->m_type);
17 }
18 
19 TEST(CXFA_FMLexerTest, Numbers) {
20   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"-12");
21   std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
22   // TODO(dsinclair): Should this return -12 instead of two tokens?
23   EXPECT_EQ(TOKminus, token->m_type);
24   token = lexer->NextToken();
25   EXPECT_EQ(L"12", token->m_string);
26   token = lexer->NextToken();
27   EXPECT_EQ(TOKeof, token->m_type);
28 
29   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.5362");
30   token = lexer->NextToken();
31   EXPECT_EQ(TOKnumber, token->m_type);
32   EXPECT_EQ(L"1.5362", token->m_string);
33 
34   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"0.875");
35   token = lexer->NextToken();
36   EXPECT_EQ(TOKnumber, token->m_type);
37   EXPECT_EQ(L"0.875", token->m_string);
38 
39   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"5.56e-2");
40   token = lexer->NextToken();
41   EXPECT_EQ(TOKnumber, token->m_type);
42   EXPECT_EQ(L"5.56e-2", token->m_string);
43 
44   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1.234E10");
45   token = lexer->NextToken();
46   EXPECT_EQ(TOKnumber, token->m_type);
47   EXPECT_EQ(L"1.234E10", token->m_string);
48 
49   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123456789.012345678");
50   token = lexer->NextToken();
51   EXPECT_EQ(TOKnumber, token->m_type);
52   // TODO(dsinclair): This should round as per IEEE 64-bit values.
53   // EXPECT_EQ(L"123456789.01234567", token->m_string);
54   EXPECT_EQ(L"123456789.012345678", token->m_string);
55 
56   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"99999999999999999");
57   token = lexer->NextToken();
58   EXPECT_EQ(TOKnumber, token->m_type);
59   // TODO(dsinclair): This is spec'd as rounding when > 16 significant digits
60   // prior to the exponent.
61   // EXPECT_EQ(L"100000000000000000", token->m_string);
62   EXPECT_EQ(L"99999999999999999", token->m_string);
63 }
64 
65 // The quotes are stripped in CXFA_FMStringExpression::ToJavaScript.
66 TEST(CXFA_FMLexerTest, Strings) {
67   auto lexer =
68       pdfium::MakeUnique<CXFA_FMLexer>(L"\"The cat jumped over the fence.\"");
69   std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
70   EXPECT_EQ(TOKstring, token->m_type);
71   EXPECT_EQ(L"\"The cat jumped over the fence.\"", token->m_string);
72 
73   token = lexer->NextToken();
74   EXPECT_EQ(TOKeof, token->m_type);
75 
76   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"\"\"");
77   token = lexer->NextToken();
78   EXPECT_EQ(TOKstring, token->m_type);
79   EXPECT_EQ(L"\"\"", token->m_string);
80 
81   lexer = pdfium::MakeUnique<CXFA_FMLexer>(
82       L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"");
83   token = lexer->NextToken();
84   EXPECT_EQ(TOKstring, token->m_type);
85   EXPECT_EQ(L"\"The message reads: \"\"Warning: Insufficient Memory\"\"\"",
86             token->m_string);
87 
88   lexer = pdfium::MakeUnique<CXFA_FMLexer>(
89       L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"");
90   token = lexer->NextToken();
91   EXPECT_EQ(TOKstring, token->m_type);
92   EXPECT_EQ(
93       L"\"\\u0047\\u006f\\u0066\\u0069\\u0073\\u0068\\u0021\\u000d\\u000a\"",
94       token->m_string);
95 }
96 
97 // Note, 'this' is a keyword but is not matched by the lexer.
98 TEST(CXFA_FMLexerTest, OperatorsAndKeywords) {
99   struct {
100     const wchar_t* op;
101     XFA_FM_TOKEN token;
102   } op[] = {{L"+", TOKplus},
103             {L"/", TOKdiv},
104             {L"-", TOKminus},
105             {L"&", TOKand},
106             {L"|", TOKor},
107             {L"*", TOKmul},
108             {L"<", TOKlt},
109             {L">", TOKgt},
110             {L"==", TOKeq},
111             {L"<>", TOKne},
112             {L"<=", TOKle},
113             {L">=", TOKge},
114             {L"and", TOKksand},
115             {L"break", TOKbreak},
116             {L"continue", TOKcontinue},
117             {L"do", TOKdo},
118             {L"downto", TOKdownto},
119             {L"else", TOKelse},
120             {L"elseif", TOKelseif},
121             {L"end", TOKend},
122             {L"endfor", TOKendfor},
123             {L"endfunc", TOKendfunc},
124             {L"endif", TOKendif},
125             {L"endwhile", TOKendwhile},
126             {L"eq", TOKkseq},
127             {L"exit", TOKexit},
128             {L"for", TOKfor},
129             {L"foreach", TOKforeach},
130             {L"func", TOKfunc},
131             {L"ge", TOKksge},
132             {L"gt", TOKksgt},
133             {L"if", TOKif},
134             {L"in", TOKin},
135             {L"infinity", TOKinfinity},
136             {L"le", TOKksle},
137             {L"lt", TOKkslt},
138             {L"nan", TOKnan},
139             {L"ne", TOKksne},
140             {L"not", TOKksnot},
141             {L"null", TOKnull},
142             {L"or", TOKksor},
143             {L"return", TOKreturn},
144             {L"step", TOKstep},
145             {L"then", TOKthen},
146             {L"throw", TOKthrow},
147             {L"upto", TOKupto},
148             {L"var", TOKvar},
149             {L"while", TOKwhile},
150 
151             // The following are defined but aren't in the spec.
152             {L"(", TOKlparen},
153             {L")", TOKrparen},
154             {L",", TOKcomma},
155             {L".", TOKdot},
156             {L"[", TOKlbracket},
157             {L"]", TOKrbracket},
158             {L"..", TOKdotdot},
159             {L".#", TOKdotscream},
160             {L".*", TOKdotstar}};
161 
162   for (size_t i = 0; i < FX_ArraySize(op); ++i) {
163     auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(op[i].op);
164     std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
165     EXPECT_EQ(op[i].token, token->m_type);
166   }
167 }
168 
169 TEST(CXFA_FMLexerTest, Comments) {
170   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"// Empty.");
171   std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
172   EXPECT_EQ(TOKeof, token->m_type);
173 
174   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"//");
175   token = lexer->NextToken();
176   EXPECT_EQ(TOKeof, token->m_type);
177 
178   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 // Empty.\n\"str\"");
179   token = lexer->NextToken();
180   EXPECT_EQ(TOKnumber, token->m_type);
181   EXPECT_EQ(L"123", token->m_string);
182 
183   token = lexer->NextToken();
184   EXPECT_EQ(TOKstring, token->m_type);
185   EXPECT_EQ(L"\"str\"", token->m_string);
186 
187   token = lexer->NextToken();
188   EXPECT_EQ(TOKeof, token->m_type);
189 
190   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L";");
191   token = lexer->NextToken();
192   EXPECT_EQ(TOKeof, token->m_type);
193 
194   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"; Empty.");
195   token = lexer->NextToken();
196   EXPECT_EQ(TOKeof, token->m_type);
197 
198   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 ;Empty.\n\"str\"");
199   token = lexer->NextToken();
200   EXPECT_EQ(TOKnumber, token->m_type);
201   EXPECT_EQ(L"123", token->m_string);
202 
203   token = lexer->NextToken();
204   EXPECT_EQ(TOKstring, token->m_type);
205   EXPECT_EQ(L"\"str\"", token->m_string);
206 
207   token = lexer->NextToken();
208   EXPECT_EQ(TOKeof, token->m_type);
209 }
210 
211 TEST(CXFA_FMLexerTest, ValidIdentifiers) {
212   std::vector<const wchar_t*> identifiers = {
213       L"a", L"an_identifier", L"_ident", L"$ident", L"!ident", L"GetAddr"};
214   for (const auto* ident : identifiers) {
215     auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(ident);
216     std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
217     EXPECT_EQ(TOKidentifier, token->m_type);
218     EXPECT_EQ(ident, token->m_string);
219   }
220 }
221 
222 TEST(CXFA_FMLexerTest, InvalidIdentifiers) {
223   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"#a");
224   EXPECT_EQ(nullptr, lexer->NextToken());
225 
226   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"1a");
227   EXPECT_EQ(nullptr, lexer->NextToken());
228 
229   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"an@identifier");
230   EXPECT_NE(nullptr, lexer->NextToken());
231   EXPECT_EQ(nullptr, lexer->NextToken());
232   EXPECT_EQ(nullptr, lexer->NextToken());
233 
234   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"_ident@");
235   EXPECT_NE(nullptr, lexer->NextToken());
236   EXPECT_EQ(nullptr, lexer->NextToken());
237 }
238 
239 TEST(CXFA_FMLexerTest, Whitespace) {
240   auto lexer = pdfium::MakeUnique<CXFA_FMLexer>(L" \t\xc\x9\xb");
241   std::unique_ptr<CXFA_FMToken> token = lexer->NextToken();
242   EXPECT_EQ(TOKeof, token->m_type);
243 
244   lexer = pdfium::MakeUnique<CXFA_FMLexer>(L"123 \t\xc\x9\xb 456");
245   token = lexer->NextToken();
246   EXPECT_EQ(TOKnumber, token->m_type);
247   EXPECT_EQ(L"123", token->m_string);
248 
249   token = lexer->NextToken();
250   EXPECT_EQ(TOKnumber, token->m_type);
251   EXPECT_EQ(L"456", token->m_string);
252 
253   token = lexer->NextToken();
254   EXPECT_EQ(TOKeof, token->m_type);
255 }
256