1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/fm2js/xfa_lexer.h"
8 
9 #include "core/fxcrt/fx_ext.h"
10 
11 namespace {
12 
13 struct XFA_FMDChar {
inc__anoncccfa0470111::XFA_FMDChar14   static const FX_WCHAR* inc(const FX_WCHAR*& p) {
15     ++p;
16     return p;
17   }
dec__anoncccfa0470111::XFA_FMDChar18   static const FX_WCHAR* dec(const FX_WCHAR*& p) {
19     --p;
20     return p;
21   }
get__anoncccfa0470111::XFA_FMDChar22   static uint16_t get(const FX_WCHAR* p) { return *p; }
isWhiteSpace__anoncccfa0470111::XFA_FMDChar23   static bool isWhiteSpace(const FX_WCHAR* p) {
24     return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
25   }
isLineTerminator__anoncccfa0470111::XFA_FMDChar26   static bool isLineTerminator(const FX_WCHAR* p) {
27     return *p == 0x0A || *p == 0x0D;
28   }
isBinary__anoncccfa0470111::XFA_FMDChar29   static bool isBinary(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '1'; }
isOctal__anoncccfa0470111::XFA_FMDChar30   static bool isOctal(const FX_WCHAR* p) { return (*p) >= '0' && (*p) <= '7'; }
isDigital__anoncccfa0470111::XFA_FMDChar31   static bool isDigital(const FX_WCHAR* p) {
32     return (*p) >= '0' && (*p) <= '9';
33   }
isHex__anoncccfa0470111::XFA_FMDChar34   static bool isHex(const FX_WCHAR* p) {
35     return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
36            ((*p) >= 'A' && (*p) <= 'F');
37   }
isAlpha__anoncccfa0470111::XFA_FMDChar38   static bool isAlpha(const FX_WCHAR* p) {
39     return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
40   }
41   static bool isAvalid(const FX_WCHAR* p, bool flag = 0);
42   static bool string2number(const FX_WCHAR* s,
43                             FX_DOUBLE* pValue,
44                             const FX_WCHAR*& pEnd);
45   static bool isUnicodeAlpha(uint16_t ch);
46 };
47 
isAvalid(const FX_WCHAR * p,bool flag)48 inline bool XFA_FMDChar::isAvalid(const FX_WCHAR* p, bool flag) {
49   if (*p == 0) {
50     return 1;
51   }
52   if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
53       (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
54     return 1;
55   }
56   if (!flag) {
57     if (*p == 0x0B || *p == 0x0C) {
58       return 1;
59     }
60   }
61   return 0;
62 }
63 
string2number(const FX_WCHAR * s,FX_DOUBLE * pValue,const FX_WCHAR * & pEnd)64 inline bool XFA_FMDChar::string2number(const FX_WCHAR* s,
65                                        FX_DOUBLE* pValue,
66                                        const FX_WCHAR*& pEnd) {
67   if (s) {
68     *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
69   }
70   return 0;
71 }
72 
isUnicodeAlpha(uint16_t ch)73 inline bool XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
74   if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
75       ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
76       ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
77       ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
78       ch == '+' || ch == '-' || ch == '*' || ch == '/') {
79     return false;
80   }
81   return true;
82 }
83 
84 const XFA_FMKeyword keyWords[] = {
85     {TOKand, 0x00000026, L"&"},
86     {TOKlparen, 0x00000028, L"("},
87     {TOKrparen, 0x00000029, L")"},
88     {TOKmul, 0x0000002a, L"*"},
89     {TOKplus, 0x0000002b, L"+"},
90     {TOKcomma, 0x0000002c, L","},
91     {TOKminus, 0x0000002d, L"-"},
92     {TOKdot, 0x0000002e, L"."},
93     {TOKdiv, 0x0000002f, L"/"},
94     {TOKlt, 0x0000003c, L"<"},
95     {TOKassign, 0x0000003d, L"="},
96     {TOKgt, 0x0000003e, L">"},
97     {TOKlbracket, 0x0000005b, L"["},
98     {TOKrbracket, 0x0000005d, L"]"},
99     {TOKor, 0x0000007c, L"|"},
100     {TOKdotscream, 0x0000ec11, L".#"},
101     {TOKdotstar, 0x0000ec18, L".*"},
102     {TOKdotdot, 0x0000ec1c, L".."},
103     {TOKle, 0x000133f9, L"<="},
104     {TOKne, 0x000133fa, L"<>"},
105     {TOKeq, 0x0001391a, L"=="},
106     {TOKge, 0x00013e3b, L">="},
107     {TOKdo, 0x00020153, L"do"},
108     {TOKkseq, 0x00020676, L"eq"},
109     {TOKksge, 0x000210ac, L"ge"},
110     {TOKksgt, 0x000210bb, L"gt"},
111     {TOKif, 0x00021aef, L"if"},
112     {TOKin, 0x00021af7, L"in"},
113     {TOKksle, 0x00022a51, L"le"},
114     {TOKkslt, 0x00022a60, L"lt"},
115     {TOKksne, 0x00023493, L"ne"},
116     {TOKksor, 0x000239c1, L"or"},
117     {TOKnull, 0x052931bb, L"null"},
118     {TOKbreak, 0x05518c25, L"break"},
119     {TOKksand, 0x09f9db33, L"and"},
120     {TOKend, 0x0a631437, L"end"},
121     {TOKeof, 0x0a63195a, L"eof"},
122     {TOKfor, 0x0a7d67a7, L"for"},
123     {TOKnan, 0x0b4f91dd, L"nan"},
124     {TOKksnot, 0x0b4fd9b1, L"not"},
125     {TOKvar, 0x0c2203e9, L"var"},
126     {TOKthen, 0x2d5738cf, L"then"},
127     {TOKelse, 0x45f65ee9, L"else"},
128     {TOKexit, 0x4731d6ba, L"exit"},
129     {TOKdownto, 0x4caadc3b, L"downto"},
130     {TOKreturn, 0x4db8bd60, L"return"},
131     {TOKinfinity, 0x5c0a010a, L"infinity"},
132     {TOKendwhile, 0x5c64bff0, L"endwhile"},
133     {TOKforeach, 0x67e31f38, L"foreach"},
134     {TOKendfunc, 0x68f984a3, L"endfunc"},
135     {TOKelseif, 0x78253218, L"elseif"},
136     {TOKwhile, 0x84229259, L"while"},
137     {TOKendfor, 0x8ab49d7e, L"endfor"},
138     {TOKthrow, 0x8db05c94, L"throw"},
139     {TOKstep, 0xa7a7887c, L"step"},
140     {TOKupto, 0xb5155328, L"upto"},
141     {TOKcontinue, 0xc0340685, L"continue"},
142     {TOKfunc, 0xcdce60ec, L"func"},
143     {TOKendif, 0xe0e8fee6, L"endif"},
144 };
145 
146 const XFA_FM_TOKEN KEYWORD_START = TOKdo;
147 const XFA_FM_TOKEN KEYWORD_END = TOKendif;
148 
149 }  // namespace
150 
XFA_FM_KeywordToString(XFA_FM_TOKEN op)151 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
152   if (op < KEYWORD_START || op > KEYWORD_END)
153     return L"";
154   return keyWords[op].m_keyword;
155 }
156 
CXFA_FMToken()157 CXFA_FMToken::CXFA_FMToken() : m_type(TOKreserver), m_uLinenum(1) {}
158 
CXFA_FMToken(uint32_t uLineNum)159 CXFA_FMToken::CXFA_FMToken(uint32_t uLineNum)
160     : m_type(TOKreserver), m_uLinenum(uLineNum) {}
161 
CXFA_FMLexer(const CFX_WideStringC & wsFormCalc,CXFA_FMErrorInfo * pErrorInfo)162 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
163                            CXFA_FMErrorInfo* pErrorInfo)
164     : m_ptr(wsFormCalc.c_str()), m_uCurrentLine(1), m_pErrorInfo(pErrorInfo) {}
165 
~CXFA_FMLexer()166 CXFA_FMLexer::~CXFA_FMLexer() {}
167 
NextToken()168 CXFA_FMToken* CXFA_FMLexer::NextToken() {
169   m_pToken.reset(Scan());
170   return m_pToken.get();
171 }
172 
Scan()173 CXFA_FMToken* CXFA_FMLexer::Scan() {
174   uint16_t ch = 0;
175   CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
176   if (!XFA_FMDChar::isAvalid(m_ptr)) {
177     ch = XFA_FMDChar::get(m_ptr);
178     Error(kFMErrUnsupportedChar, ch);
179     return p;
180   }
181   int iRet = 0;
182   while (1) {
183     if (!XFA_FMDChar::isAvalid(m_ptr)) {
184       ch = XFA_FMDChar::get(m_ptr);
185       Error(kFMErrUnsupportedChar, ch);
186       return p;
187     }
188     ch = XFA_FMDChar::get(m_ptr);
189     switch (ch) {
190       case 0:
191         p->m_type = TOKeof;
192         return p;
193       case 0x0A:
194         ++m_uCurrentLine;
195         p->m_uLinenum = m_uCurrentLine;
196         XFA_FMDChar::inc(m_ptr);
197         break;
198       case 0x0D:
199         XFA_FMDChar::inc(m_ptr);
200         break;
201       case ';': {
202         const FX_WCHAR* pTemp = 0;
203         Comment(m_ptr, pTemp);
204         m_ptr = pTemp;
205       } break;
206       case '"': {
207         const FX_WCHAR* pTemp = 0;
208         p->m_type = TOKstring;
209         iRet = String(p, m_ptr, pTemp);
210         m_ptr = pTemp;
211       }
212         return p;
213       case '0':
214       case '1':
215       case '2':
216       case '3':
217       case '4':
218       case '5':
219       case '6':
220       case '7':
221       case '8':
222       case '9': {
223         p->m_type = TOKnumber;
224         const FX_WCHAR* pTemp = 0;
225         iRet = Number(p, m_ptr, pTemp);
226         m_ptr = pTemp;
227         if (iRet) {
228           Error(kFMErrBadSuffixNumber);
229           return p;
230         }
231       }
232         return p;
233       case '=':
234         XFA_FMDChar::inc(m_ptr);
235         if (XFA_FMDChar::isAvalid(m_ptr)) {
236           ch = XFA_FMDChar::get(m_ptr);
237           if (ch == '=') {
238             p->m_type = TOKeq;
239             XFA_FMDChar::inc(m_ptr);
240             return p;
241           } else {
242             p->m_type = TOKassign;
243             return p;
244           }
245         } else {
246           ch = XFA_FMDChar::get(m_ptr);
247           Error(kFMErrUnsupportedChar, ch);
248           return p;
249         }
250         break;
251       case '<':
252         XFA_FMDChar::inc(m_ptr);
253         if (XFA_FMDChar::isAvalid(m_ptr)) {
254           ch = XFA_FMDChar::get(m_ptr);
255           if (ch == '=') {
256             p->m_type = TOKle;
257             XFA_FMDChar::inc(m_ptr);
258             return p;
259           } else if (ch == '>') {
260             p->m_type = TOKne;
261             XFA_FMDChar::inc(m_ptr);
262             return p;
263           } else {
264             p->m_type = TOKlt;
265             return p;
266           }
267         } else {
268           ch = XFA_FMDChar::get(m_ptr);
269           Error(kFMErrUnsupportedChar, ch);
270           return p;
271         }
272         break;
273       case '>':
274         XFA_FMDChar::inc(m_ptr);
275         if (XFA_FMDChar::isAvalid(m_ptr)) {
276           ch = XFA_FMDChar::get(m_ptr);
277           if (ch == '=') {
278             p->m_type = TOKge;
279             XFA_FMDChar::inc(m_ptr);
280             return p;
281           } else {
282             p->m_type = TOKgt;
283             return p;
284           }
285         } else {
286           ch = XFA_FMDChar::get(m_ptr);
287           Error(kFMErrUnsupportedChar, ch);
288           return p;
289         }
290         break;
291       case ',':
292         p->m_type = TOKcomma;
293         XFA_FMDChar::inc(m_ptr);
294         return p;
295       case '(':
296         p->m_type = TOKlparen;
297         XFA_FMDChar::inc(m_ptr);
298         return p;
299       case ')':
300         p->m_type = TOKrparen;
301         XFA_FMDChar::inc(m_ptr);
302         return p;
303       case '[':
304         p->m_type = TOKlbracket;
305         XFA_FMDChar::inc(m_ptr);
306         return p;
307       case ']':
308         p->m_type = TOKrbracket;
309         XFA_FMDChar::inc(m_ptr);
310         return p;
311       case '&':
312         XFA_FMDChar::inc(m_ptr);
313         p->m_type = TOKand;
314         return p;
315       case '|':
316         XFA_FMDChar::inc(m_ptr);
317         p->m_type = TOKor;
318         return p;
319       case '+':
320         XFA_FMDChar::inc(m_ptr);
321         p->m_type = TOKplus;
322         return p;
323       case '-':
324         XFA_FMDChar::inc(m_ptr);
325         p->m_type = TOKminus;
326         return p;
327       case '*':
328         XFA_FMDChar::inc(m_ptr);
329         p->m_type = TOKmul;
330         return p;
331       case '/':
332         XFA_FMDChar::inc(m_ptr);
333         if (XFA_FMDChar::isAvalid(m_ptr)) {
334           ch = XFA_FMDChar::get(m_ptr);
335           if (ch == '/') {
336             const FX_WCHAR* pTemp = 0;
337             Comment(m_ptr, pTemp);
338             m_ptr = pTemp;
339             break;
340           } else {
341             p->m_type = TOKdiv;
342             return p;
343           }
344         } else {
345           ch = XFA_FMDChar::get(m_ptr);
346           Error(kFMErrUnsupportedChar, ch);
347           return p;
348         }
349         break;
350       case '.':
351         XFA_FMDChar::inc(m_ptr);
352         if (XFA_FMDChar::isAvalid(m_ptr)) {
353           ch = XFA_FMDChar::get(m_ptr);
354           if (ch == '.') {
355             p->m_type = TOKdotdot;
356             XFA_FMDChar::inc(m_ptr);
357             return p;
358           } else if (ch == '*') {
359             p->m_type = TOKdotstar;
360             XFA_FMDChar::inc(m_ptr);
361             return p;
362           } else if (ch == '#') {
363             p->m_type = TOKdotscream;
364             XFA_FMDChar::inc(m_ptr);
365             return p;
366           } else if (ch <= '9' && ch >= '0') {
367             p->m_type = TOKnumber;
368             const FX_WCHAR* pTemp = 0;
369             XFA_FMDChar::dec(m_ptr);
370             iRet = Number(p, m_ptr, pTemp);
371             m_ptr = pTemp;
372             if (iRet) {
373               Error(kFMErrBadSuffixNumber);
374             }
375             return p;
376           } else {
377             p->m_type = TOKdot;
378             return p;
379           }
380         } else {
381           ch = XFA_FMDChar::get(m_ptr);
382           Error(kFMErrUnsupportedChar, ch);
383           return p;
384         }
385       case 0x09:
386       case 0x0B:
387       case 0x0C:
388       case 0x20:
389         XFA_FMDChar::inc(m_ptr);
390         break;
391       default: {
392         const FX_WCHAR* pTemp = 0;
393         iRet = Identifiers(p, m_ptr, pTemp);
394         m_ptr = pTemp;
395         if (iRet) {
396           return p;
397         }
398         p->m_type = IsKeyword(p->m_wstring);
399       }
400         return p;
401     }
402   }
403 }
404 
Number(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)405 uint32_t CXFA_FMLexer::Number(CXFA_FMToken* t,
406                               const FX_WCHAR* p,
407                               const FX_WCHAR*& pEnd) {
408   FX_DOUBLE number = 0;
409   if (XFA_FMDChar::string2number(p, &number, pEnd)) {
410     return 1;
411   }
412   if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
413     return 1;
414   }
415   t->m_wstring = CFX_WideStringC(p, (pEnd - p));
416   return 0;
417 }
418 
String(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)419 uint32_t CXFA_FMLexer::String(CXFA_FMToken* t,
420                               const FX_WCHAR* p,
421                               const FX_WCHAR*& pEnd) {
422   const FX_WCHAR* pStart = p;
423   uint16_t ch = 0;
424   XFA_FMDChar::inc(p);
425   ch = XFA_FMDChar::get(p);
426   while (ch) {
427     if (!XFA_FMDChar::isAvalid(p)) {
428       ch = XFA_FMDChar::get(p);
429       pEnd = p;
430       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
431       Error(kFMErrUnsupportedChar, ch);
432       return 1;
433     }
434     if (ch == '"') {
435       XFA_FMDChar::inc(p);
436       if (!XFA_FMDChar::isAvalid(p)) {
437         ch = XFA_FMDChar::get(p);
438         pEnd = p;
439         t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
440         Error(kFMErrUnsupportedChar, ch);
441         return 1;
442       }
443       ch = XFA_FMDChar::get(p);
444       if (ch == '"') {
445         goto NEXT;
446       } else {
447         break;
448       }
449     }
450   NEXT:
451     XFA_FMDChar::inc(p);
452     ch = XFA_FMDChar::get(p);
453   }
454   pEnd = p;
455   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
456   return 0;
457 }
458 
Identifiers(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)459 uint32_t CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
460                                    const FX_WCHAR* p,
461                                    const FX_WCHAR*& pEnd) {
462   const FX_WCHAR* pStart = p;
463   uint16_t ch = 0;
464   ch = XFA_FMDChar::get(p);
465   XFA_FMDChar::inc(p);
466   if (!XFA_FMDChar::isAvalid(p)) {
467     pEnd = p;
468     t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
469     Error(kFMErrUnsupportedChar, ch);
470     return 1;
471   }
472   ch = XFA_FMDChar::get(p);
473   while (ch) {
474     if (!XFA_FMDChar::isAvalid(p)) {
475       pEnd = p;
476       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
477       Error(kFMErrUnsupportedChar, ch);
478       return 1;
479     }
480     ch = XFA_FMDChar::get(p);
481     if (XFA_FMDChar::isUnicodeAlpha(ch)) {
482       XFA_FMDChar::inc(p);
483     } else {
484       pEnd = p;
485       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
486       return 0;
487     }
488   }
489   pEnd = p;
490   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
491   return 0;
492 }
493 
Comment(const FX_WCHAR * p,const FX_WCHAR * & pEnd)494 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
495   unsigned ch = 0;
496   XFA_FMDChar::inc(p);
497   ch = XFA_FMDChar::get(p);
498   while (ch) {
499     if (ch == 0x0D) {
500       XFA_FMDChar::inc(p);
501       pEnd = p;
502       return;
503     }
504     if (ch == 0x0A) {
505       ++m_uCurrentLine;
506       XFA_FMDChar::inc(p);
507       pEnd = p;
508       return;
509     }
510     XFA_FMDChar::inc(p);
511     ch = XFA_FMDChar::get(p);
512   }
513   pEnd = p;
514 }
515 
IsKeyword(const CFX_WideStringC & str)516 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
517   uint32_t uHash = FX_HashCode_GetW(str, true);
518   int32_t iStart = KEYWORD_START;
519   int32_t iEnd = KEYWORD_END;
520   do {
521     int32_t iMid = (iStart + iEnd) / 2;
522     XFA_FMKeyword keyword = keyWords[iMid];
523     if (uHash == keyword.m_uHash)
524       return keyword.m_type;
525     if (uHash < keyword.m_uHash)
526       iEnd = iMid - 1;
527     else
528       iStart = iMid + 1;
529   } while (iStart <= iEnd);
530   return TOKidentifier;
531 }
532 
Error(const FX_WCHAR * msg,...)533 void CXFA_FMLexer::Error(const FX_WCHAR* msg, ...) {
534   m_pErrorInfo->linenum = m_uCurrentLine;
535   va_list ap;
536   va_start(ap, msg);
537   m_pErrorInfo->message.FormatV(msg, ap);
538   va_end(ap);
539 }
540 
HasError() const541 bool CXFA_FMLexer::HasError() const {
542   if (m_pErrorInfo->message.IsEmpty()) {
543     return false;
544   }
545   return true;
546 }
547