1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa_fm2js.h"
8 struct XFA_FMDChar {
incXFA_FMDChar9   static const FX_WCHAR* inc(const FX_WCHAR*& p) {
10     ++p;
11     return p;
12   }
decXFA_FMDChar13   static const FX_WCHAR* dec(const FX_WCHAR*& p) {
14     --p;
15     return p;
16   }
getXFA_FMDChar17   static uint16_t get(const FX_WCHAR* p) { return *p; }
isWhiteSpaceXFA_FMDChar18   static FX_BOOL isWhiteSpace(const FX_WCHAR* p) {
19     return (*p) == 0x09 || (*p) == 0x0b || (*p) == 0x0c || (*p) == 0x20;
20   }
isLineTerminatorXFA_FMDChar21   static FX_BOOL isLineTerminator(const FX_WCHAR* p) {
22     return *p == 0x0A || *p == 0x0D;
23   }
isBinaryXFA_FMDChar24   static FX_BOOL isBinary(const FX_WCHAR* p) {
25     return (*p) >= '0' && (*p) <= '1';
26   }
isOctalXFA_FMDChar27   static FX_BOOL isOctal(const FX_WCHAR* p) {
28     return (*p) >= '0' && (*p) <= '7';
29   }
isDigitalXFA_FMDChar30   static FX_BOOL isDigital(const FX_WCHAR* p) {
31     return (*p) >= '0' && (*p) <= '9';
32   }
isHexXFA_FMDChar33   static FX_BOOL isHex(const FX_WCHAR* p) {
34     return isDigital(p) || ((*p) >= 'a' && (*p) <= 'f') ||
35            ((*p) >= 'A' && (*p) <= 'F');
36   }
isAlphaXFA_FMDChar37   static FX_BOOL isAlpha(const FX_WCHAR* p) {
38     return ((*p) <= 'z' && (*p) >= 'a') || ((*p) <= 'Z' && (*p) >= 'A');
39   }
40   static FX_BOOL isAvalid(const FX_WCHAR* p, FX_BOOL flag = 0);
41   static FX_BOOL string2number(const FX_WCHAR* s,
42                                FX_DOUBLE* pValue,
43                                const FX_WCHAR*& pEnd);
44   static FX_BOOL isUnicodeAlpha(uint16_t ch);
45 };
isAvalid(const FX_WCHAR * p,FX_BOOL flag)46 inline FX_BOOL XFA_FMDChar::isAvalid(const FX_WCHAR* p, FX_BOOL flag) {
47   if (*p == 0) {
48     return 1;
49   }
50   if ((*p <= 0x0A && *p >= 0x09) || *p == 0x0D ||
51       (*p <= 0xd7ff && *p >= 0x20) || (*p <= 0xfffd && *p >= 0xe000)) {
52     return 1;
53   }
54   if (!flag) {
55     if (*p == 0x0B || *p == 0x0C) {
56       return 1;
57     }
58   }
59   return 0;
60 }
string2number(const FX_WCHAR * s,FX_DOUBLE * pValue,const FX_WCHAR * & pEnd)61 inline FX_BOOL XFA_FMDChar::string2number(const FX_WCHAR* s,
62                                           FX_DOUBLE* pValue,
63                                           const FX_WCHAR*& pEnd) {
64   if (s) {
65     *pValue = wcstod((wchar_t*)s, (wchar_t**)&pEnd);
66   }
67   return 0;
68 }
isUnicodeAlpha(uint16_t ch)69 inline FX_BOOL XFA_FMDChar::isUnicodeAlpha(uint16_t ch) {
70   if (ch == 0 || ch == 0x0A || ch == 0x0D || ch == 0x09 || ch == 0x0B ||
71       ch == 0x0C || ch == 0x20 || ch == '.' || ch == ';' || ch == '"' ||
72       ch == '=' || ch == '<' || ch == '>' || ch == ',' || ch == '(' ||
73       ch == ')' || ch == ']' || ch == '[' || ch == '&' || ch == '|' ||
74       ch == '+' || ch == '-' || ch == '*' || ch == '/') {
75     return FALSE;
76   } else {
77     return TRUE;
78   }
79 }
80 static XFA_FMKeyword keyWords[] = {
81     {TOKand, 0x00000026, L"&"},
82     {TOKlparen, 0x00000028, L"("},
83     {TOKrparen, 0x00000029, L")"},
84     {TOKmul, 0x0000002a, L"*"},
85     {TOKplus, 0x0000002b, L"+"},
86     {TOKcomma, 0x0000002c, L","},
87     {TOKminus, 0x0000002d, L"-"},
88     {TOKdot, 0x0000002e, L"."},
89     {TOKdiv, 0x0000002f, L"/"},
90     {TOKlt, 0x0000003c, L"<"},
91     {TOKassign, 0x0000003d, L"="},
92     {TOKgt, 0x0000003e, L">"},
93     {TOKlbracket, 0x0000005b, L"["},
94     {TOKrbracket, 0x0000005d, L"]"},
95     {TOKor, 0x0000007c, L"|"},
96     {TOKdotscream, 0x0000ec11, L".#"},
97     {TOKdotstar, 0x0000ec18, L".*"},
98     {TOKdotdot, 0x0000ec1c, L".."},
99     {TOKle, 0x000133f9, L"<="},
100     {TOKne, 0x000133fa, L"<>"},
101     {TOKeq, 0x0001391a, L"=="},
102     {TOKge, 0x00013e3b, L">="},
103     {TOKdo, 0x00020153, L"do"},
104     {TOKkseq, 0x00020676, L"eq"},
105     {TOKksge, 0x000210ac, L"ge"},
106     {TOKksgt, 0x000210bb, L"gt"},
107     {TOKif, 0x00021aef, L"if"},
108     {TOKin, 0x00021af7, L"in"},
109     {TOKksle, 0x00022a51, L"le"},
110     {TOKkslt, 0x00022a60, L"lt"},
111     {TOKksne, 0x00023493, L"ne"},
112     {TOKksor, 0x000239c1, L"or"},
113     {TOKnull, 0x052931bb, L"null"},
114     {TOKbreak, 0x05518c25, L"break"},
115     {TOKksand, 0x09f9db33, L"and"},
116     {TOKend, 0x0a631437, L"end"},
117     {TOKeof, 0x0a63195a, L"eof"},
118     {TOKfor, 0x0a7d67a7, L"for"},
119     {TOKnan, 0x0b4f91dd, L"nan"},
120     {TOKksnot, 0x0b4fd9b1, L"not"},
121     {TOKvar, 0x0c2203e9, L"var"},
122     {TOKthen, 0x2d5738cf, L"then"},
123     {TOKelse, 0x45f65ee9, L"else"},
124     {TOKexit, 0x4731d6ba, L"exit"},
125     {TOKdownto, 0x4caadc3b, L"downto"},
126     {TOKreturn, 0x4db8bd60, L"return"},
127     {TOKinfinity, 0x5c0a010a, L"infinity"},
128     {TOKendwhile, 0x5c64bff0, L"endwhile"},
129     {TOKforeach, 0x67e31f38, L"foreach"},
130     {TOKendfunc, 0x68f984a3, L"endfunc"},
131     {TOKelseif, 0x78253218, L"elseif"},
132     {TOKwhile, 0x84229259, L"while"},
133     {TOKendfor, 0x8ab49d7e, L"endfor"},
134     {TOKthrow, 0x8db05c94, L"throw"},
135     {TOKstep, 0xa7a7887c, L"step"},
136     {TOKupto, 0xb5155328, L"upto"},
137     {TOKcontinue, 0xc0340685, L"continue"},
138     {TOKfunc, 0xcdce60ec, L"func"},
139     {TOKendif, 0xe0e8fee6, L"endif"},
140 };
141 static const FX_WORD KEYWORD_START = TOKdo;
142 static const FX_WORD KEYWORD_END = TOKendif;
XFA_FM_KeywordToString(XFA_FM_TOKEN op)143 const FX_WCHAR* XFA_FM_KeywordToString(XFA_FM_TOKEN op) {
144   return keyWords[op].m_keword;
145 }
CXFA_FMToken()146 CXFA_FMToken::CXFA_FMToken() {
147   m_type = TOKreserver;
148   m_uLinenum = 1;
149   m_pNext = 0;
150 }
CXFA_FMToken(FX_DWORD uLineNum)151 CXFA_FMToken::CXFA_FMToken(FX_DWORD uLineNum) {
152   m_type = TOKreserver;
153   m_uLinenum = uLineNum;
154   m_pNext = 0;
155 }
~CXFA_FMToken()156 CXFA_FMToken::~CXFA_FMToken() {}
CXFA_FMLexer(const CFX_WideStringC & wsFormCalc,CXFA_FMErrorInfo * pErrorInfo)157 CXFA_FMLexer::CXFA_FMLexer(const CFX_WideStringC& wsFormCalc,
158                            CXFA_FMErrorInfo* pErrorInfo) {
159   m_pScript = wsFormCalc.GetPtr();
160   m_uLength = wsFormCalc.GetLength();
161   m_uCurrentLine = 1;
162   m_ptr = m_pScript;
163   m_pToken = 0;
164   m_pErrorInfo = pErrorInfo;
165 }
NextToken()166 CXFA_FMToken* CXFA_FMLexer::NextToken() {
167   CXFA_FMToken* t = 0;
168   if (!m_pToken) {
169     m_pToken = Scan();
170   } else {
171     if (m_pToken->m_pNext) {
172       t = m_pToken->m_pNext;
173       delete m_pToken;
174       m_pToken = t;
175     } else {
176       t = m_pToken;
177       m_pToken = Scan();
178       delete t;
179     }
180   }
181   return m_pToken;
182 }
Scan()183 CXFA_FMToken* CXFA_FMLexer::Scan() {
184   uint16_t ch = 0;
185   CXFA_FMToken* p = new CXFA_FMToken(m_uCurrentLine);
186   if (!XFA_FMDChar::isAvalid(m_ptr)) {
187     ch = XFA_FMDChar::get(m_ptr);
188     Error(FMERR_UNSUPPORTED_CHAR, ch);
189     return p;
190   }
191   int iRet = 0;
192   while (1) {
193     if (!XFA_FMDChar::isAvalid(m_ptr)) {
194       ch = XFA_FMDChar::get(m_ptr);
195       Error(FMERR_UNSUPPORTED_CHAR, ch);
196       return p;
197     }
198     ch = XFA_FMDChar::get(m_ptr);
199     switch (ch) {
200       case 0:
201         p->m_type = TOKeof;
202         return p;
203       case 0x0A:
204         ++m_uCurrentLine;
205         p->m_uLinenum = m_uCurrentLine;
206         XFA_FMDChar::inc(m_ptr);
207         break;
208       case 0x0D:
209         XFA_FMDChar::inc(m_ptr);
210         break;
211       case ';': {
212         const FX_WCHAR* pTemp = 0;
213         Comment(m_ptr, pTemp);
214         m_ptr = pTemp;
215       } break;
216       case '"': {
217         const FX_WCHAR* pTemp = 0;
218         p->m_type = TOKstring;
219         iRet = String(p, m_ptr, pTemp);
220         if (iRet) {
221           return p;
222         }
223         m_ptr = pTemp;
224       }
225         return p;
226       case '0':
227       case '1':
228       case '2':
229       case '3':
230       case '4':
231       case '5':
232       case '6':
233       case '7':
234       case '8':
235       case '9': {
236         p->m_type = TOKnumber;
237         const FX_WCHAR* pTemp = 0;
238         iRet = Number(p, m_ptr, pTemp);
239         m_ptr = pTemp;
240         if (iRet) {
241           Error(FMERR_BAD_SUFFIX_NUMBER);
242           return p;
243         }
244       }
245         return p;
246       case '=':
247         XFA_FMDChar::inc(m_ptr);
248         if (XFA_FMDChar::isAvalid(m_ptr)) {
249           ch = XFA_FMDChar::get(m_ptr);
250           if (ch == '=') {
251             p->m_type = TOKeq;
252             XFA_FMDChar::inc(m_ptr);
253             return p;
254           } else {
255             p->m_type = TOKassign;
256             return p;
257           }
258         } else {
259           ch = XFA_FMDChar::get(m_ptr);
260           Error(FMERR_UNSUPPORTED_CHAR, ch);
261           return p;
262         }
263         break;
264       case '<':
265         XFA_FMDChar::inc(m_ptr);
266         if (XFA_FMDChar::isAvalid(m_ptr)) {
267           ch = XFA_FMDChar::get(m_ptr);
268           if (ch == '=') {
269             p->m_type = TOKle;
270             XFA_FMDChar::inc(m_ptr);
271             return p;
272           } else if (ch == '>') {
273             p->m_type = TOKne;
274             XFA_FMDChar::inc(m_ptr);
275             return p;
276           } else {
277             p->m_type = TOKlt;
278             return p;
279           }
280         } else {
281           ch = XFA_FMDChar::get(m_ptr);
282           Error(FMERR_UNSUPPORTED_CHAR, ch);
283           return p;
284         }
285         break;
286       case '>':
287         XFA_FMDChar::inc(m_ptr);
288         if (XFA_FMDChar::isAvalid(m_ptr)) {
289           ch = XFA_FMDChar::get(m_ptr);
290           if (ch == '=') {
291             p->m_type = TOKge;
292             XFA_FMDChar::inc(m_ptr);
293             return p;
294           } else {
295             p->m_type = TOKgt;
296             return p;
297           }
298         } else {
299           ch = XFA_FMDChar::get(m_ptr);
300           Error(FMERR_UNSUPPORTED_CHAR, ch);
301           return p;
302         }
303         break;
304       case ',':
305         p->m_type = TOKcomma;
306         XFA_FMDChar::inc(m_ptr);
307         return p;
308       case '(':
309         p->m_type = TOKlparen;
310         XFA_FMDChar::inc(m_ptr);
311         return p;
312       case ')':
313         p->m_type = TOKrparen;
314         XFA_FMDChar::inc(m_ptr);
315         return p;
316       case '[':
317         p->m_type = TOKlbracket;
318         XFA_FMDChar::inc(m_ptr);
319         return p;
320       case ']':
321         p->m_type = TOKrbracket;
322         XFA_FMDChar::inc(m_ptr);
323         return p;
324       case '&':
325         XFA_FMDChar::inc(m_ptr);
326         p->m_type = TOKand;
327         return p;
328       case '|':
329         XFA_FMDChar::inc(m_ptr);
330         p->m_type = TOKor;
331         return p;
332       case '+':
333         XFA_FMDChar::inc(m_ptr);
334         p->m_type = TOKplus;
335         return p;
336       case '-':
337         XFA_FMDChar::inc(m_ptr);
338         p->m_type = TOKminus;
339         return p;
340       case '*':
341         XFA_FMDChar::inc(m_ptr);
342         p->m_type = TOKmul;
343         return p;
344       case '/':
345         XFA_FMDChar::inc(m_ptr);
346         if (XFA_FMDChar::isAvalid(m_ptr)) {
347           ch = XFA_FMDChar::get(m_ptr);
348           if (ch == '/') {
349             const FX_WCHAR* pTemp = 0;
350             Comment(m_ptr, pTemp);
351             m_ptr = pTemp;
352             break;
353           } else {
354             p->m_type = TOKdiv;
355             return p;
356           }
357         } else {
358           ch = XFA_FMDChar::get(m_ptr);
359           Error(FMERR_UNSUPPORTED_CHAR, ch);
360           return p;
361         }
362         break;
363       case '.':
364         XFA_FMDChar::inc(m_ptr);
365         if (XFA_FMDChar::isAvalid(m_ptr)) {
366           ch = XFA_FMDChar::get(m_ptr);
367           if (ch == '.') {
368             p->m_type = TOKdotdot;
369             XFA_FMDChar::inc(m_ptr);
370             return p;
371           } else if (ch == '*') {
372             p->m_type = TOKdotstar;
373             XFA_FMDChar::inc(m_ptr);
374             return p;
375           } else if (ch == '#') {
376             p->m_type = TOKdotscream;
377             XFA_FMDChar::inc(m_ptr);
378             return p;
379           } else if (ch <= '9' && ch >= '0') {
380             p->m_type = TOKnumber;
381             const FX_WCHAR* pTemp = 0;
382             XFA_FMDChar::dec(m_ptr);
383             iRet = Number(p, m_ptr, pTemp);
384             m_ptr = pTemp;
385             if (iRet) {
386               Error(FMERR_BAD_SUFFIX_NUMBER);
387             }
388             return p;
389           } else {
390             p->m_type = TOKdot;
391             return p;
392           }
393         } else {
394           ch = XFA_FMDChar::get(m_ptr);
395           Error(FMERR_UNSUPPORTED_CHAR, ch);
396           return p;
397         }
398       case 0x09:
399       case 0x0B:
400       case 0x0C:
401       case 0x20:
402         XFA_FMDChar::inc(m_ptr);
403         break;
404       default: {
405         const FX_WCHAR* pTemp = 0;
406         iRet = Identifiers(p, m_ptr, pTemp);
407         m_ptr = pTemp;
408         if (iRet) {
409           return p;
410         }
411         p->m_type = IsKeyword(p->m_wstring);
412       }
413         return p;
414     }
415   }
416 }
Number(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)417 FX_DWORD CXFA_FMLexer::Number(CXFA_FMToken* t,
418                               const FX_WCHAR* p,
419                               const FX_WCHAR*& pEnd) {
420   FX_DOUBLE number = 0;
421   if (XFA_FMDChar::string2number(p, &number, pEnd)) {
422     return 1;
423   }
424   if (pEnd && XFA_FMDChar::isAlpha(pEnd)) {
425     return 1;
426   }
427   t->m_wstring = CFX_WideStringC(p, (pEnd - p));
428   return 0;
429 }
String(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)430 FX_DWORD CXFA_FMLexer::String(CXFA_FMToken* t,
431                               const FX_WCHAR* p,
432                               const FX_WCHAR*& pEnd) {
433   const FX_WCHAR* pStart = p;
434   uint16_t ch = 0;
435   XFA_FMDChar::inc(p);
436   ch = XFA_FMDChar::get(p);
437   while (ch) {
438     if (!XFA_FMDChar::isAvalid(p)) {
439       ch = XFA_FMDChar::get(p);
440       pEnd = p;
441       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
442       Error(FMERR_UNSUPPORTED_CHAR, ch);
443       return 1;
444     }
445     if (ch == '"') {
446       XFA_FMDChar::inc(p);
447       if (!XFA_FMDChar::isAvalid(p)) {
448         ch = XFA_FMDChar::get(p);
449         pEnd = p;
450         t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
451         Error(FMERR_UNSUPPORTED_CHAR, ch);
452         return 1;
453       }
454       ch = XFA_FMDChar::get(p);
455       if (ch == '"') {
456         goto NEXT;
457       } else {
458         break;
459       }
460     }
461   NEXT:
462     XFA_FMDChar::inc(p);
463     ch = XFA_FMDChar::get(p);
464   }
465   pEnd = p;
466   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
467   return 0;
468 }
Identifiers(CXFA_FMToken * t,const FX_WCHAR * p,const FX_WCHAR * & pEnd)469 FX_DWORD CXFA_FMLexer::Identifiers(CXFA_FMToken* t,
470                                    const FX_WCHAR* p,
471                                    const FX_WCHAR*& pEnd) {
472   const FX_WCHAR* pStart = p;
473   uint16_t ch = 0;
474   ch = XFA_FMDChar::get(p);
475   XFA_FMDChar::inc(p);
476   if (!XFA_FMDChar::isAvalid(p)) {
477     pEnd = p;
478     t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
479     Error(FMERR_UNSUPPORTED_CHAR, ch);
480     return 1;
481   }
482   ch = XFA_FMDChar::get(p);
483   while (ch) {
484     if (!XFA_FMDChar::isAvalid(p)) {
485       pEnd = p;
486       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
487       Error(FMERR_UNSUPPORTED_CHAR, ch);
488       return 1;
489     }
490     ch = XFA_FMDChar::get(p);
491     if (XFA_FMDChar::isUnicodeAlpha(ch)) {
492       XFA_FMDChar::inc(p);
493     } else {
494       pEnd = p;
495       t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
496       return 0;
497     }
498   }
499   pEnd = p;
500   t->m_wstring = CFX_WideStringC(pStart, (pEnd - pStart));
501   return 0;
502 }
Comment(const FX_WCHAR * p,const FX_WCHAR * & pEnd)503 void CXFA_FMLexer::Comment(const FX_WCHAR* p, const FX_WCHAR*& pEnd) {
504   unsigned ch = 0;
505   XFA_FMDChar::inc(p);
506   ch = XFA_FMDChar::get(p);
507   while (ch) {
508     if (ch == 0x0D) {
509       XFA_FMDChar::inc(p);
510       pEnd = p;
511       return;
512     }
513     if (ch == 0x0A) {
514       ++m_uCurrentLine;
515       XFA_FMDChar::inc(p);
516       pEnd = p;
517       return;
518     }
519     XFA_FMDChar::inc(p);
520     ch = XFA_FMDChar::get(p);
521   }
522   pEnd = p;
523 }
IsKeyword(const CFX_WideStringC & str)524 XFA_FM_TOKEN CXFA_FMLexer::IsKeyword(const CFX_WideStringC& str) {
525   int32_t iLength = str.GetLength();
526   uint32_t uHash = FX_HashCode_String_GetW(str.GetPtr(), iLength, TRUE);
527   int32_t iStart = KEYWORD_START, iEnd = KEYWORD_END;
528   int32_t iMid = (iStart + iEnd) / 2;
529   XFA_FMKeyword keyword;
530   do {
531     iMid = (iStart + iEnd) / 2;
532     keyword = keyWords[iMid];
533     if (uHash == keyword.m_uHash) {
534       return keyword.m_type;
535     } else if (uHash < keyword.m_uHash) {
536       iEnd = iMid - 1;
537     } else {
538       iStart = iMid + 1;
539     }
540   } while (iStart <= iEnd);
541   return TOKidentifier;
542 }
~CXFA_FMLexer()543 CXFA_FMLexer::~CXFA_FMLexer() {
544   m_pScript = 0;
545   m_ptr = m_pScript;
546   if (m_pToken) {
547     CXFA_FMToken* t1 = m_pToken;
548     CXFA_FMToken* t2 = t1->m_pNext;
549     while (t2) {
550       delete t1;
551       t1 = t2;
552       t2 = t2->m_pNext;
553     }
554     delete m_pToken;
555     m_pToken = 0;
556   }
557   m_pErrorInfo = 0;
558 }
Error(XFA_FM_ERRMSG msg,...)559 void CXFA_FMLexer::Error(XFA_FM_ERRMSG msg, ...) {
560   m_pErrorInfo->linenum = m_uCurrentLine;
561   const FX_WCHAR* lpMessageInfo = XFA_FM_ErrorMsg(msg);
562   va_list ap;
563   va_start(ap, msg);
564   m_pErrorInfo->message.FormatV(lpMessageInfo, ap);
565   va_end(ap);
566 }
HasError() const567 FX_BOOL CXFA_FMLexer::HasError() const {
568   if (m_pErrorInfo->message.IsEmpty()) {
569     return FALSE;
570   }
571   return TRUE;
572 }
573