1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "xfa_fm2js.h"
8 struct XFA_FMDChar {
incXFA_FMDChar9 static const FX_WCHAR* inc(const FX_WCHAR*& p) {
10 ++p;
11 return p;
12 }
decXFA_FMDChar13 static const FX_WCHAR* dec(const FX_WCHAR*& p) {
14 --p;
15 return p;
16 }
getXFA_FMDChar17 static uint16_t get(const FX_WCHAR* p) { return *p; }
isWhiteSpaceXFA_FMDChar18 static FX_BOOL isWhiteSpace(const FX_WCHAR* p) {
19 return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
20 }
isLineTerminatorXFA_FMDChar21 static FX_BOOL isLineTerminator(const FX_WCHAR* p) {
22 return *p == 0x0A || *p == 0x0D;
23 }
isBinaryXFA_FMDChar24 static FX_BOOL isBinary(const FX_WCHAR* p) {
25 return (*p) >= '0' && (*p) <= '1';
26 }
isOctalXFA_FMDChar27 static FX_BOOL isOctal(const FX_WCHAR* p) {
28 return (*p) >= '0' && (*p) <= '7';
29 }
isDigitalXFA_FMDChar30 static FX_BOOL isDigital(const FX_WCHAR* p) {
31 return (*p) >= '0' && (*p) <= '9';
32 }
isHexXFA_FMDChar33 static FX_BOOL isHex(const FX_WCHAR* p) {
34 return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
35 ((*p) >= 'A' && (*p) <= 'F');
36 }
isAlphaXFA_FMDChar37 static FX_BOOL isAlpha(const FX_WCHAR* p) {
38 return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
39 }
40 static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0);
41 static FX_BOOL string2number(const FX_WCHAR* s,
42 FX_DOUBLE* pValue,
43 const FX_WCHAR*& pEnd);
44 static FX_BOOL isUnicodeAlpha(uint16_t ch);
45 };
isAvalid(const FX_WCHAR * p,FX_BOOL flag)46 inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) {
47 if (*p == 0) {
48 return 1;
49 }
50 if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
51 (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
52 return 1;
53 }
54 if (!flag) {
55 if (*p == 0x0B || *p == 0x0C) {
56 return 1;
57 }
58 }
59 return 0;
60 }
string2number(const FX_WCHAR * s,FX_DOUBLE * pValue,const FX_WCHAR * & pEnd)61 inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s,
62 FX_DOUBLE* pValue,
63 const FX_WCHAR*& pEnd) {
64 if (s) {
65 *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
66 }
67 return 0;
68 }
isUnicodeAlpha(uint16_t ch)69 inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
70 if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
71 ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
72 ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
73 ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
74 ch == '+' || ch == '-' || ch == '*' || ch == '/') {
75 return FALSE;
76 } else {
77 return TRUE;
78 }
79 }
80 static XFA_FMKeyword keyWords[] = {
81 {TOKand, 0x00000026, L"&"},
82 {TOKlparen, 0x00000028, L"("},
83 {TOKrparen, 0x00000029, L")"},
84 {TOKmul, 0x0000002a, L"*"},
85 {TOKplus, 0x0000002b, L"+"},
86 {TOKcomma, 0x0000002c, L","},
87 {TOKminus, 0x0000002d, L"-"},
88 {TOKdot, 0x0000002e, L"."},
89 {TOKdiv, 0x0000002f, L"/"},
90 {TOKlt, 0x0000003c, L"<"},
91 {TOKassign, 0x0000003d, L"="},
92 {TOKgt, 0x0000003e, L">"},
93 {TOKlbracket, 0x0000005b, L"["},
94 {TOKrbracket, 0x0000005d, L"]"},
95 {TOKor, 0x0000007c, L"|"},
96 {TOKdotscream, 0x0000ec11, L".#"},
97 {TOKdotstar, 0x0000ec18, L".*"},
98 {TOKdotdot, 0x0000ec1c, L".."},
99 {TOKle, 0x000133f9, L"<="},
100 {TOKne, 0x000133fa, L"<>"},
101 {TOKeq, 0x0001391a, L"=="},
102 {TOKge, 0x00013e3b, L">="},
103 {TOKdo, 0x00020153, L"do"},
104 {TOKkseq, 0x00020676, L"eq"},
105 {TOKksge, 0x000210ac, L"ge"},
106 {TOKksgt, 0x000210bb, L"gt"},
107 {TOKif, 0x00021aef, L"if"},
108 {TOKin, 0x00021af7, L"in"},
109 {TOKksle, 0x00022a51, L"le"},
110 {TOKkslt, 0x00022a60, L"lt"},
111 {TOKksne, 0x00023493, L"ne"},
112 {TOKksor, 0x000239c1, L"or"},
113 {TOKnull, 0x052931bb, L"null"},
114 {TOKbreak, 0x05518c25, L"break"},
115 {TOKksand, 0x09f9db33, L"and"},
116 {TOKend, 0x0a631437, L"end"},
117 {TOKeof, 0x0a63195a, L"eof"},
118 {TOKfor, 0x0a7d67a7, L"for"},
119 {TOKnan, 0x0b4f91dd, L"nan"},
120 {TOKksnot, 0x0b4fd9b1, L"not"},
121 {TOKvar, 0x0c2203e9, L"var"},
122 {TOKthen, 0x2d5738cf, L"then"},
123 {TOKelse, 0x45f65ee9, L"else"},
124 {TOKexit, 0x4731d6ba, L"exit"},
125 {TOKdownto, 0x4caadc3b, L"downto"},
126 {TOKreturn, 0x4db8bd60, L"return"},
127 {TOKinfinity, 0x5c0a010a, L"infinity"},
128 {TOKendwhile, 0x5c64bff0, L"endwhile"},
129 {TOKforeach, 0x67e31f38, L"foreach"},
130 {TOKendfunc, 0x68f984a3, L"endfunc"},
131 {TOKelseif, 0x78253218, L"elseif"},
132 {TOKwhile, 0x84229259, L"while"},
133 {TOKendfor, 0x8ab49d7e, L"endfor"},
134 {TOKthrow, 0x8db05c94, L"throw"},
135 {TOKstep, 0xa7a7887c, L"step"},
136 {TOKupto, 0xb5155328, L"upto"},
137 {TOKcontinue, 0xc0340685, L"continue"},
138 {TOKfunc, 0xcdce60ec, L"func"},
139 {TOKendif, 0xe0e8fee6, L"endif"},
140 };
141 static const FX_WORD KEYWORD_START = TOKdo;
142 static const FX_WORD KEYWORD_END = TOKendif;
XFA_FM_KeywordToString(XFA_FM_TOKEN op)143 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
144 return keyWords[op].m_keword;
145 }
CXFA_FMToken()146 CXFA_FMToken::CXFA_FMToken() {
147 m_type = TOKreserver;
148 m_uLinenum = 1;
149 m_pNext = 0;
150 }
CXFA_FMToken(FX_DWORD uLineNum)151 CXFA_FMToken::CXFA_FMToken(FX_DWORD uLineNum) {
152 m_type = TOKreserver;
153 m_uLinenum = uLineNum;
154 m_pNext = 0;
155 }
~CXFA_FMToken()156 CXFA_FMToken::~CXFA_FMToken() {}
CXFA_FMLexer(const CFX_WideStringC & wsFormCalc,CXFA_FMErrorInfo * pErrorInfo)157 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
158 CXFA_FMErrorInfo* pErrorInfo) {
159 m_pScript = wsFormCalc.GetPtr();
160 m_uLength = wsFormCalc.GetLength();
161 m_uCurrentLine = 1;
162 m_ptr = m_pScript;
163 m_pToken = 0;
164 m_pErrorInfo = pErrorInfo;
165 }
NextToken()166 CXFA_FMToken* CXFA_FMLexer::NextToken() {
167 CXFA_FMToken* t = 0;
168 if (!m_pToken) {
169 m_pToken = Scan();
170 } else {
171 if (m_pToken->m_pNext) {
172 t = m_pToken->m_pNext;
173 delete m_pToken;
174 m_pToken = t;
175 } else {
176 t = m_pToken;
177 m_pToken = Scan();
178 delete t;
179 }
180 }
181 return m_pToken;
182 }
Scan()183 CXFA_FMToken* CXFA_FMLexer::Scan() {
184 uint16_t ch = 0;
185 CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
186 if (!XFA_FMDChar::isAvalid(m_ptr)) {
187 ch = XFA_FMDChar::get(m_ptr);
188 Error(FMERR_UNSUPPORTED_CHAR, ch);
189 return p;
190 }
191 int iRet = 0;
192 while (1) {
193 if (!XFA_FMDChar::isAvalid(m_ptr)) {
194 ch = XFA_FMDChar::get(m_ptr);
195 Error(FMERR_UNSUPPORTED_CHAR, ch);
196 return p;
197 }
198 ch = XFA_FMDChar::get(m_ptr);
199 switch (ch) {
200 case 0:
201 p->m_type = TOKeof;
202 return p;
203 case 0x0A:
204 ++m_uCurrentLine;
205 p->m_uLinenum = m_uCurrentLine;
206 XFA_FMDChar::inc(m_ptr);
207 break;
208 case 0x0D:
209 XFA_FMDChar::inc(m_ptr);
210 break;
211 case ';': {
212 const FX_WCHAR* pTemp = 0;
213 Comment(m_ptr, pTemp);
214 m_ptr = pTemp;
215 } break;
216 case '"': {
217 const FX_WCHAR* pTemp = 0;
218 p->m_type = TOKstring;
219 iRet = String(p, m_ptr, pTemp);
220 if (iRet) {
221 return p;
222 }
223 m_ptr = pTemp;
224 }
225 return p;
226 case '0':
227 case '1':
228 case '2':
229 case '3':
230 case '4':
231 case '5':
232 case '6':
233 case '7':
234 case '8':
235 case '9': {
236 p->m_type = TOKnumber;
237 const FX_WCHAR* pTemp = 0;
238 iRet = Number(p, m_ptr, pTemp);
239 m_ptr = pTemp;
240 if (iRet) {
241 Error(FMERR_BAD_SUFFIX_NUMBER);
242 return p;
243 }
244 }
245 return p;
246 case '=':
247 XFA_FMDChar::inc(m_ptr);
248 if (XFA_FMDChar::isAvalid(m_ptr)) {
249 ch = XFA_FMDChar::get(m_ptr);
250 if (ch == '=') {
251 p->m_type = TOKeq;
252 XFA_FMDChar::inc(m_ptr);
253 return p;
254 } else {
255 p->m_type = TOKassign;
256 return p;
257 }
258 } else {
259 ch = XFA_FMDChar::get(m_ptr);
260 Error(FMERR_UNSUPPORTED_CHAR, ch);
261 return p;
262 }
263 break;
264 case '<':
265 XFA_FMDChar::inc(m_ptr);
266 if (XFA_FMDChar::isAvalid(m_ptr)) {
267 ch = XFA_FMDChar::get(m_ptr);
268 if (ch == '=') {
269 p->m_type = TOKle;
270 XFA_FMDChar::inc(m_ptr);
271 return p;
272 } else if (ch == '>') {
273 p->m_type = TOKne;
274 XFA_FMDChar::inc(m_ptr);
275 return p;
276 } else {
277 p->m_type = TOKlt;
278 return p;
279 }
280 } else {
281 ch = XFA_FMDChar::get(m_ptr);
282 Error(FMERR_UNSUPPORTED_CHAR, ch);
283 return p;
284 }
285 break;
286 case '>':
287 XFA_FMDChar::inc(m_ptr);
288 if (XFA_FMDChar::isAvalid(m_ptr)) {
289 ch = XFA_FMDChar::get(m_ptr);
290 if (ch == '=') {
291 p->m_type = TOKge;
292 XFA_FMDChar::inc(m_ptr);
293 return p;
294 } else {
295 p->m_type = TOKgt;
296 return p;
297 }
298 } else {
299 ch = XFA_FMDChar::get(m_ptr);
300 Error(FMERR_UNSUPPORTED_CHAR, ch);
301 return p;
302 }
303 break;
304 case ',':
305 p->m_type = TOKcomma;
306 XFA_FMDChar::inc(m_ptr);
307 return p;
308 case '(':
309 p->m_type = TOKlparen;
310 XFA_FMDChar::inc(m_ptr);
311 return p;
312 case ')':
313 p->m_type = TOKrparen;
314 XFA_FMDChar::inc(m_ptr);
315 return p;
316 case '[':
317 p->m_type = TOKlbracket;
318 XFA_FMDChar::inc(m_ptr);
319 return p;
320 case ']':
321 p->m_type = TOKrbracket;
322 XFA_FMDChar::inc(m_ptr);
323 return p;
324 case '&':
325 XFA_FMDChar::inc(m_ptr);
326 p->m_type = TOKand;
327 return p;
328 case '|':
329 XFA_FMDChar::inc(m_ptr);
330 p->m_type = TOKor;
331 return p;
332 case '+':
333 XFA_FMDChar::inc(m_ptr);
334 p->m_type = TOKplus;
335 return p;
336 case '-':
337 XFA_FMDChar::inc(m_ptr);
338 p->m_type = TOKminus;
339 return p;
340 case '*':
341 XFA_FMDChar::inc(m_ptr);
342 p->m_type = TOKmul;
343 return p;
344 case '/':
345 XFA_FMDChar::inc(m_ptr);
346 if (XFA_FMDChar::isAvalid(m_ptr)) {
347 ch = XFA_FMDChar::get(m_ptr);
348 if (ch == '/') {
349 const FX_WCHAR* pTemp = 0;
350 Comment(m_ptr, pTemp);
351 m_ptr = pTemp;
352 break;
353 } else {
354 p->m_type = TOKdiv;
355 return p;
356 }
357 } else {
358 ch = XFA_FMDChar::get(m_ptr);
359 Error(FMERR_UNSUPPORTED_CHAR, ch);
360 return p;
361 }
362 break;
363 case '.':
364 XFA_FMDChar::inc(m_ptr);
365 if (XFA_FMDChar::isAvalid(m_ptr)) {
366 ch = XFA_FMDChar::get(m_ptr);
367 if (ch == '.') {
368 p->m_type = TOKdotdot;
369 XFA_FMDChar::inc(m_ptr);
370 return p;
371 } else if (ch == '*') {
372 p->m_type = TOKdotstar;
373 XFA_FMDChar::inc(m_ptr);
374 return p;
375 } else if (ch == '#') {
376 p->m_type = TOKdotscream;
377 XFA_FMDChar::inc(m_ptr);
378 return p;
379 } else if (ch <= '9' && ch >= '0') {
380 p->m_type = TOKnumber;
381 const FX_WCHAR* pTemp = 0;
382 XFA_FMDChar::dec(m_ptr);
383 iRet = Number(p, m_ptr, pTemp);
384 m_ptr = pTemp;
385 if (iRet) {
386 Error(FMERR_BAD_SUFFIX_NUMBER);
387 }
388 return p;
389 } else {
390 p->m_type = TOKdot;
391 return p;
392 }
393 } else {
394 ch = XFA_FMDChar::get(m_ptr);
395 Error(FMERR_UNSUPPORTED_CHAR, ch);
396 return p;
397 }
398 case 0x09:
399 case 0x0B:
400 case 0x0C:
401 case 0x20:
402 XFA_FMDChar::inc(m_ptr);
403 break;
404 default: {
405 const FX_WCHAR* pTemp = 0;
406 iRet = Identifiers(p, m_ptr, pTemp);
407 m_ptr = pTemp;
408 if (iRet) {
409 return p;
410 }
411 p->m_type = IsKeyword(p->m_wstring);
412 }
413 return p;
414 }
415 }
416 }
Number(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)417 FX_DWORD CXFA_FMLexer::Number(CXFA_FMToken* t,
418 const FX_WCHAR* p,
419 const FX_WCHAR*& pEnd) {
420 FX_DOUBLE number = 0;
421 if (XFA_FMDChar::string2number(p, &number, pEnd)) {
422 return 1;
423 }
424 if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
425 return 1;
426 }
427 t->m_wstring = CFX_WideStringC(p, (pEnd - p));
428 return 0;
429 }
String(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)430 FX_DWORD CXFA_FMLexer::String(CXFA_FMToken* t,
431 const FX_WCHAR* p,
432 const FX_WCHAR*& pEnd) {
433 const FX_WCHAR* pStart = p;
434 uint16_t ch = 0;
435 XFA_FMDChar::inc(p);
436 ch = XFA_FMDChar::get(p);
437 while (ch) {
438 if (!XFA_FMDChar::isAvalid(p)) {
439 ch = XFA_FMDChar::get(p);
440 pEnd = p;
441 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
442 Error(FMERR_UNSUPPORTED_CHAR, ch);
443 return 1;
444 }
445 if (ch == '"') {
446 XFA_FMDChar::inc(p);
447 if (!XFA_FMDChar::isAvalid(p)) {
448 ch = XFA_FMDChar::get(p);
449 pEnd = p;
450 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
451 Error(FMERR_UNSUPPORTED_CHAR, ch);
452 return 1;
453 }
454 ch = XFA_FMDChar::get(p);
455 if (ch == '"') {
456 goto NEXT;
457 } else {
458 break;
459 }
460 }
461 NEXT:
462 XFA_FMDChar::inc(p);
463 ch = XFA_FMDChar::get(p);
464 }
465 pEnd = p;
466 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
467 return 0;
468 }
Identifiers(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)469 FX_DWORD CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
470 const FX_WCHAR* p,
471 const FX_WCHAR*& pEnd) {
472 const FX_WCHAR* pStart = p;
473 uint16_t ch = 0;
474 ch = XFA_FMDChar::get(p);
475 XFA_FMDChar::inc(p);
476 if (!XFA_FMDChar::isAvalid(p)) {
477 pEnd = p;
478 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
479 Error(FMERR_UNSUPPORTED_CHAR, ch);
480 return 1;
481 }
482 ch = XFA_FMDChar::get(p);
483 while (ch) {
484 if (!XFA_FMDChar::isAvalid(p)) {
485 pEnd = p;
486 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
487 Error(FMERR_UNSUPPORTED_CHAR, ch);
488 return 1;
489 }
490 ch = XFA_FMDChar::get(p);
491 if (XFA_FMDChar::isUnicodeAlpha(ch)) {
492 XFA_FMDChar::inc(p);
493 } else {
494 pEnd = p;
495 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
496 return 0;
497 }
498 }
499 pEnd = p;
500 t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
501 return 0;
502 }
Comment(const FX_WCHAR * p,const FX_WCHAR * & pEnd)503 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
504 unsigned ch = 0;
505 XFA_FMDChar::inc(p);
506 ch = XFA_FMDChar::get(p);
507 while (ch) {
508 if (ch == 0x0D) {
509 XFA_FMDChar::inc(p);
510 pEnd = p;
511 return;
512 }
513 if (ch == 0x0A) {
514 ++m_uCurrentLine;
515 XFA_FMDChar::inc(p);
516 pEnd = p;
517 return;
518 }
519 XFA_FMDChar::inc(p);
520 ch = XFA_FMDChar::get(p);
521 }
522 pEnd = p;
523 }
IsKeyword(const CFX_WideStringC & str)524 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
525 int32_t iLength = str.GetLength();
526 uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE);
527 int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END;
528 int32_t iMid = (iStart + iEnd) / 2;
529 XFA_FMKeyword keyword;
530 do {
531 iMid = (iStart + iEnd) / 2;
532 keyword = keyWords[iMid];
533 if (uHash == keyword.m_uHash) {
534 return keyword.m_type;
535 } else if (uHash < keyword.m_uHash) {
536 iEnd = iMid - 1;
537 } else {
538 iStart = iMid + 1;
539 }
540 } while (iStart <= iEnd);
541 return TOKidentifier;
542 }
~CXFA_FMLexer()543 CXFA_FMLexer::~CXFA_FMLexer() {
544 m_pScript = 0;
545 m_ptr = m_pScript;
546 if (m_pToken) {
547 CXFA_FMToken* t1 = m_pToken;
548 CXFA_FMToken* t2 = t1->m_pNext;
549 while (t2) {
550 delete t1;
551 t1 = t2;
552 t2 = t2->m_pNext;
553 }
554 delete m_pToken;
555 m_pToken = 0;
556 }
557 m_pErrorInfo = 0;
558 }
Error(XFA_FM_ERRMSG msg,...)559 void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) {
560 m_pErrorInfo->linenum = m_uCurrentLine;
561 const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg);
562 va_list ap;
563 va_start(ap, msg);
564 m_pErrorInfo->message.FormatV(lpMessageInfo, ap);
565 va_end(ap);
566 }
HasError() const567 FX_BOOL CXFA_FMLexer::HasError() const {
568 if (m_pErrorInfo->message.IsEmpty()) {
569 return FALSE;
570 }
571 return TRUE;
572 }
573