1 /*
2  * Copyright (C) 2018 Google, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.google.escapevelocity;
17 
18 import com.google.common.base.CharMatcher;
19 import com.google.common.base.Verify;
20 import com.google.common.collect.ImmutableList;
21 import com.google.common.collect.ImmutableListMultimap;
22 import com.google.common.collect.Iterables;
23 import com.google.common.primitives.Chars;
24 import com.google.common.primitives.Ints;
25 import com.google.escapevelocity.DirectiveNode.SetNode;
26 import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode;
27 import com.google.escapevelocity.ExpressionNode.NotExpressionNode;
28 import com.google.escapevelocity.ReferenceNode.IndexReferenceNode;
29 import com.google.escapevelocity.ReferenceNode.MemberReferenceNode;
30 import com.google.escapevelocity.ReferenceNode.MethodReferenceNode;
31 import com.google.escapevelocity.ReferenceNode.PlainReferenceNode;
32 import com.google.escapevelocity.TokenNode.CommentTokenNode;
33 import com.google.escapevelocity.TokenNode.ElseIfTokenNode;
34 import com.google.escapevelocity.TokenNode.ElseTokenNode;
35 import com.google.escapevelocity.TokenNode.EndTokenNode;
36 import com.google.escapevelocity.TokenNode.EofNode;
37 import com.google.escapevelocity.TokenNode.ForEachTokenNode;
38 import com.google.escapevelocity.TokenNode.IfTokenNode;
39 import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode;
40 import com.google.escapevelocity.TokenNode.NestedTokenNode;
41 import java.io.IOException;
42 import java.io.LineNumberReader;
43 import java.io.Reader;
44 
45 /**
46  * A parser that reads input from the given {@link Reader} and parses it to produce a
47  * {@link Template}.
48  *
49  * @author emcmanus@google.com (Éamonn McManus)
50  */
51 class Parser {
52   private static final int EOF = -1;
53 
54   private final LineNumberReader reader;
55   private final String resourceName;
56   private final Template.ResourceOpener resourceOpener;
57 
58   /**
59    * The invariant of this parser is that {@code c} is always the next character of interest.
60    * This means that we almost never have to "unget" a character by reading too far. For example,
61    * after we parse an integer, {@code c} will be the first character after the integer, which is
62    * exactly the state we will be in when there are no more digits.
63    *
64    * <p>Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}.
65    */
66   private int c;
67 
68   /**
69    * A single character of pushback. If this is not negative, the {@link #next()} method will
70    * return it instead of reading a character.
71    */
72   private int pushback = -1;
73 
Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)74   Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)
75       throws IOException {
76     this.reader = new LineNumberReader(reader);
77     this.reader.setLineNumber(1);
78     next();
79     this.resourceName = resourceName;
80     this.resourceOpener = resourceOpener;
81   }
82 
83   /**
84    * Parse the input completely to produce a {@link Template}.
85    *
86    * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include
87    * entire references such as <pre>
88    *    ${x.foo()[23]}
89    * </pre>or entire directives such as<pre>
90    *    #set ($x = $y + $z)
91    * </pre>But tokens do not span complex constructs. For example,<pre>
92    *    #if ($x == $y) something #end
93    * </pre>is three tokens:<pre>
94    *    #if ($x == $y)
95    *    (literal text " something ")
96    *   #end
97    * </pre>
98    *
99    * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it.
100    * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre>
101    *    ${x.foo()[23]}
102    *    #set ($x = $y + $z)
103    * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will
104    * become a single IfNode in the parse tree in the second phase.
105    *
106    * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the
107    * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and
108    * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however,
109    * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize
110    * the inside of <pre>
111    *    #if ($x == $a + $b)
112    * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical
113    * parser/lexer combination, where the lexer would need to switch between these two modes, we
114    * replace the lexer with an ad-hoc parser that is the first phase described above, and we
115    * define a simple parser over the resultant tokens that is the second phase.
116    */
parse()117   Template parse() throws IOException {
118     ImmutableList<Node> tokens = parseTokens();
119     return new Reparser(tokens).reparse();
120   }
121 
parseTokens()122   private ImmutableList<Node> parseTokens() throws IOException {
123     ImmutableList.Builder<Node> tokens = ImmutableList.builder();
124     Node token;
125     do {
126       token = parseNode();
127       tokens.add(token);
128     } while (!(token instanceof EofNode));
129     return tokens.build();
130   }
131 
lineNumber()132   private int lineNumber() {
133     return reader.getLineNumber();
134   }
135 
136   /**
137    * Gets the next character from the reader and assigns it to {@code c}. If there are no more
138    * characters, sets {@code c} to {@link #EOF} if it is not already.
139    */
next()140   private void next() throws IOException {
141     if (c != EOF) {
142       if (pushback < 0) {
143         c = reader.read();
144       } else {
145         c = pushback;
146         pushback = -1;
147       }
148     }
149   }
150 
151   /**
152    * Saves the current character {@code c} to be read again, and sets {@code c} to the given
153    * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}.
154    * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have
155    * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent
156    * calls to {@code next()} will continue reading from {@link #reader}. So the pushback
157    * essentially puts us back in the state we were in before we read {@code y}.
158    */
pushback(int c1)159   private void pushback(int c1) {
160     pushback = c;
161     c = c1;
162   }
163 
164   /**
165    * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or
166    * there are no more characters.
167    */
skipSpace()168   private void skipSpace() throws IOException {
169     while (Character.isWhitespace(c)) {
170       next();
171     }
172   }
173 
174   /**
175    * Gets the next character from the reader, and if it is a space character, keeps reading until
176    * a non-space character is found.
177    */
nextNonSpace()178   private void nextNonSpace() throws IOException {
179     next();
180     skipSpace();
181   }
182 
183   /**
184    * Skips any space in the reader, and then throws an exception if the first non-space character
185    * found is not the expected one. Sets {@code c} to the first character after that expected one.
186    */
expect(char expected)187   private void expect(char expected) throws IOException {
188     skipSpace();
189     if (c == expected) {
190       next();
191     } else {
192       throw parseException("Expected " + expected);
193     }
194   }
195 
196   /**
197    * Parses a single node from the reader, as part of the first parsing phase.
198    * <pre>{@code
199    * <template> -> <empty> |
200    *               <directive> <template> |
201    *               <non-directive> <template>
202    * }</pre>
203    */
parseNode()204   private Node parseNode() throws IOException {
205     if (c == '#') {
206       next();
207       switch (c) {
208         case '#':
209           return parseLineComment();
210         case '*':
211           return parseBlockComment();
212         case '[':
213           return parseHashSquare();
214         case '{':
215           return parseDirective();
216         default:
217           if (isAsciiLetter(c)) {
218             return parseDirective();
219           } else {
220             // For consistency with Velocity, we treat # not followed by a letter or one of the
221             // characters above as a plain character, and we treat #$foo as a literal # followed by
222             // the reference $foo.
223             return parsePlainText('#');
224           }
225       }
226     }
227     if (c == EOF) {
228       return new EofNode(resourceName, lineNumber());
229     }
230     return parseNonDirective();
231   }
232 
parseHashSquare()233   private Node parseHashSquare() throws IOException {
234     // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character
235     // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever
236     // that next character is.
237     assert c == '[';
238     next();
239     if (c != '[') {
240       return parsePlainText(new StringBuilder("#["));
241     }
242     int startLine = lineNumber();
243     next();
244     StringBuilder sb = new StringBuilder();
245     while (true) {
246       if (c == EOF) {
247         throw new ParseException(
248             "Unterminated #[[ - did not see matching ]]#", resourceName, startLine);
249       }
250       if (c == '#') {
251         // This might be the last character of ]]# or it might just be a random #.
252         int len = sb.length();
253         if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') {
254           next();
255           break;
256         }
257       }
258       sb.append((char) c);
259       next();
260     }
261     String quoted = sb.substring(0, sb.length() - 2);
262     return new ConstantExpressionNode(resourceName, lineNumber(), quoted);
263   }
264 
265   /**
266    * Parses a single non-directive node from the reader.
267    * <pre>{@code
268    * <non-directive> -> <reference> |
269    *                    <text containing neither $ nor #>
270    * }</pre>
271    */
parseNonDirective()272   private Node parseNonDirective() throws IOException {
273     if (c == '$') {
274       next();
275       if (isAsciiLetter(c) || c == '{') {
276         return parseReference();
277       } else {
278         return parsePlainText('$');
279       }
280     } else {
281       int firstChar = c;
282       next();
283       return parsePlainText(firstChar);
284     }
285   }
286 
287   /**
288    * Parses a single directive token from the reader. Directives can be spelled with or without
289    * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions
290    * here: <pre>{@code
291    * <directive> -> <if-token> |
292    *                <else-token> |
293    *                <elseif-token> |
294    *                <end-token> |
295    *                <foreach-token> |
296    *                <set-token> |
297    *                <parse-token> |
298    *                <macro-token> |
299    *                <macro-call> |
300    *                <comment>
301    * }</pre>
302    */
parseDirective()303   private Node parseDirective() throws IOException {
304     String directive;
305     if (c == '{') {
306       next();
307       directive = parseId("Directive inside #{...}");
308       expect('}');
309     } else {
310       directive = parseId("Directive");
311     }
312     Node node;
313     switch (directive) {
314       case "end":
315         node = new EndTokenNode(resourceName, lineNumber());
316         break;
317       case "if":
318       case "elseif":
319         node = parseIfOrElseIf(directive);
320         break;
321       case "else":
322         node = new ElseTokenNode(resourceName, lineNumber());
323         break;
324       case "foreach":
325         node = parseForEach();
326         break;
327       case "set":
328         node = parseSet();
329         break;
330       case "parse":
331         node = parseParse();
332         break;
333       case "macro":
334         node = parseMacroDefinition();
335         break;
336       default:
337         node = parsePossibleMacroCall(directive);
338     }
339     // Velocity skips a newline after any directive.
340     // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented.
341     if (c == '\n') {
342       next();
343     }
344     return node;
345   }
346 
347   /**
348    * Parses the condition following {@code #if} or {@code #elseif}.
349    * <pre>{@code
350    * <if-token> -> #if ( <condition> )
351    * <elseif-token> -> #elseif ( <condition> )
352    * }</pre>
353    *
354    * @param directive either {@code "if"} or {@code "elseif"}.
355    */
parseIfOrElseIf(String directive)356   private Node parseIfOrElseIf(String directive) throws IOException {
357     expect('(');
358     ExpressionNode condition = parseExpression();
359     expect(')');
360     return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition);
361   }
362 
363   /**
364    * Parses a {@code #foreach} token from the reader. <pre>{@code
365    * <foreach-token> -> #foreach ( $<id> in <expression> )
366    * }</pre>
367    */
parseForEach()368   private Node parseForEach() throws IOException {
369     expect('(');
370     expect('$');
371     String var = parseId("For-each variable");
372     skipSpace();
373     boolean bad = false;
374     if (c != 'i') {
375       bad = true;
376     } else {
377       next();
378       if (c != 'n') {
379         bad = true;
380       }
381     }
382     if (bad) {
383       throw parseException("Expected 'in' for #foreach");
384     }
385     next();
386     ExpressionNode collection = parseExpression();
387     expect(')');
388     return new ForEachTokenNode(var, collection);
389   }
390 
391   /**
392    * Parses a {@code #set} token from the reader. <pre>{@code
393    * <set-token> -> #set ( $<id> = <expression>)
394    * }</pre>
395    */
parseSet()396   private Node parseSet() throws IOException {
397     expect('(');
398     expect('$');
399     String var = parseId("#set variable");
400     expect('=');
401     ExpressionNode expression = parseExpression();
402     expect(')');
403     return new SetNode(var, expression);
404   }
405 
406   /**
407    * Parses a {@code #parse} token from the reader. <pre>{@code
408    * <parse-token> -> #parse ( <string-literal> )
409    * }</pre>
410    *
411    * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive
412    * is evaluated when it is encountered during template evaluation. That means that the argument
413    * can be a variable, and it also means that you can use {@code #if} to choose whether or not
414    * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the
415    * {@code #parse} are integrated into the containing template pretty much as if they had been
416    * written inline. That also means that EscapeVelocity allows forward references to macros
417    * inside {@code #parse} directives, which Velocity does not.
418    */
parseParse()419   private Node parseParse() throws IOException {
420     expect('(');
421     skipSpace();
422     if (c != '"' && c != '\'') {
423       throw parseException("#parse only supported with string literal argument");
424     }
425     ExpressionNode nestedResourceNameExpression = parseStringLiteral(c, false);
426     String nestedResourceName = nestedResourceNameExpression.evaluate(null).toString();
427     expect(')');
428     try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) {
429       Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener);
430       ImmutableList<Node> nestedTokens = nestedParser.parseTokens();
431       return new NestedTokenNode(nestedResourceName, nestedTokens);
432     }
433   }
434 
435   /**
436    * Parses a {@code #macro} token from the reader. <pre>{@code
437    * <macro-token> -> #macro ( <id> <macro-parameter-list> )
438    * <macro-parameter-list> -> <empty> |
439    *                           $<id> <macro-parameter-list>
440    * }</pre>
441    *
442    * <p>Macro parameters are optionally separated by commas.
443    */
parseMacroDefinition()444   private Node parseMacroDefinition() throws IOException {
445     expect('(');
446     skipSpace();
447     String name = parseId("Macro name");
448     ImmutableList.Builder<String> parameterNames = ImmutableList.builder();
449     while (true) {
450       skipSpace();
451       if (c == ')') {
452         next();
453         break;
454       }
455       if (c == ',') {
456         next();
457         skipSpace();
458       }
459       if (c != '$') {
460         throw parseException("Macro parameters should look like $name");
461       }
462       next();
463       parameterNames.add(parseId("Macro parameter name"));
464     }
465     return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build());
466   }
467 
468   /**
469    * Parses an identifier after {@code #} that is not one of the standard directives. The assumption
470    * is that it is a call of a macro that is defined in the template. Macro definitions are
471    * extracted from the template during the second parsing phase (and not during evaluation of the
472    * template as you might expect). This means that a macro can be called before it is defined.
473    * <pre>{@code
474    * <macro-call> -> # <id> ( <expression-list> )
475    * <expression-list> -> <empty> |
476    *                      <expression> <optional-comma> <expression-list>
477    * <optional-comma> -> <empty> | ,
478    * }</pre>
479    */
parsePossibleMacroCall(String directive)480   private Node parsePossibleMacroCall(String directive) throws IOException {
481     skipSpace();
482     if (c != '(') {
483       throw parseException("Unrecognized directive #" + directive);
484     }
485     next();
486     ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder();
487     while (true) {
488       skipSpace();
489       if (c == ')') {
490         next();
491         break;
492       }
493       parameterNodes.add(parsePrimary());
494       if (c == ',') {
495         // The documentation doesn't say so, but you can apparently have an optional comma in
496         // macro calls.
497         next();
498       }
499     }
500     return new DirectiveNode.MacroCallNode(
501         resourceName, lineNumber(), directive, parameterNodes.build());
502   }
503 
504   /**
505    * Parses and discards a line comment, which is {@code ##} followed by any number of characters
506    * up to and including the next newline.
507    */
parseLineComment()508   private Node parseLineComment() throws IOException {
509     int lineNumber = lineNumber();
510     while (c != '\n' && c != EOF) {
511       next();
512     }
513     next();
514     return new CommentTokenNode(resourceName, lineNumber);
515   }
516 
517   /**
518    * Parses and discards a block comment, which is {@code #*} followed by everything up to and
519    * including the next {@code *#}.
520    */
parseBlockComment()521   private Node parseBlockComment() throws IOException {
522     assert c == '*';
523     int startLine = lineNumber();
524     int lastC = '\0';
525     next();
526     // Consistently with Velocity, we do not make it an error if a #* comment is not closed.
527     while (!(lastC == '*' && c == '#') && c != EOF) {
528       lastC = c;
529       next();
530     }
531     next(); // this may read EOF twice, which works
532     return new CommentTokenNode(resourceName, startLine);
533   }
534 
535   /**
536    * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given
537    * {@code firstChar} is the first character of the plain text, and {@link #c} is the second
538    * (if the plain text is more than one character).
539    */
parsePlainText(int firstChar)540   private Node parsePlainText(int firstChar) throws IOException {
541     StringBuilder sb = new StringBuilder();
542     sb.appendCodePoint(firstChar);
543     return parsePlainText(sb);
544   }
545 
parsePlainText(StringBuilder sb)546   private Node parsePlainText(StringBuilder sb) throws IOException {
547     literal:
548     while (true) {
549       switch (c) {
550         case EOF:
551         case '$':
552         case '#':
553           break literal;
554         default:
555           // Just some random character.
556       }
557       sb.appendCodePoint(c);
558       next();
559     }
560     return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString());
561   }
562 
563   /**
564    * Parses a reference, which is everything that can start with a {@code $}. References can
565    * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are
566    * useful when text after the reference would otherwise be parsed as part of it. For example,
567    * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}.
568    * Of course {@code $xy} would be a reference to the variable {@code $xy}.
569    * <pre>{@code
570    * <reference> -> $<reference-no-brace> |
571    *                ${<reference-no-brace>}
572    * }</pre>
573    *
574    * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}.
575    */
parseReference()576   private Node parseReference() throws IOException {
577     if (c == '{') {
578       next();
579       if (!isAsciiLetter(c)) {
580         return parsePlainText(new StringBuilder("${"));
581       }
582       ReferenceNode node = parseReferenceNoBrace();
583       expect('}');
584       return node;
585     } else {
586       return parseReferenceNoBrace();
587     }
588   }
589 
590   /**
591    * Same as {@link #parseReference()}, except it really must be a reference. A {@code $} in
592    * normal text doesn't start a reference if it is not followed by an identifier. But in an
593    * expression, for example in {@code #if ($x == 23)}, {@code $} must be followed by an
594    * identifier.
595    */
parseRequiredReference()596   private ReferenceNode parseRequiredReference() throws IOException {
597     if (c == '{') {
598       next();
599       ReferenceNode node = parseReferenceNoBrace();
600       expect('}');
601       return node;
602     } else {
603       return parseReferenceNoBrace();
604     }
605   }
606 
607   /**
608    * Parses a reference, in the simple form without braces.
609    * <pre>{@code
610    * <reference-no-brace> -> <id><reference-suffix>
611    * }</pre>
612    */
parseReferenceNoBrace()613   private ReferenceNode parseReferenceNoBrace() throws IOException {
614     String id = parseId("Reference");
615     ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id);
616     return parseReferenceSuffix(lhs);
617   }
618 
619   /**
620    * Parses the modifiers that can appear at the tail of a reference.
621    * <pre>{@code
622    * <reference-suffix> -> <empty> |
623    *                       <reference-member> |
624    *                       <reference-index>
625    * }</pre>
626    *
627    * @param lhs the reference node representing the first part of the reference
628    * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}.
629    */
parseReferenceSuffix(ReferenceNode lhs)630   private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException {
631     switch (c) {
632       case '.':
633         return parseReferenceMember(lhs);
634       case '[':
635         return parseReferenceIndex(lhs);
636       default:
637         return lhs;
638     }
639   }
640 
641   /**
642    * Parses a reference member, which is either a property reference like {@code $x.y} or a method
643    * call like {@code $x.y($z)}.
644    * <pre>{@code
645    * <reference-member> -> .<id><reference-property-or-method><reference-suffix>
646    * <reference-property-or-method> -> <id> |
647    *                                   <id> ( <method-parameter-list> )
648    * }</pre>
649    *
650    * @param lhs the reference node representing what appears to the left of the dot, like the
651    *     {@code $x} in {@code $x.foo} or {@code $x.foo()}.
652    */
parseReferenceMember(ReferenceNode lhs)653   private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException {
654     assert c == '.';
655     next();
656     if (!isAsciiLetter(c)) {
657       // We've seen something like `$foo.!`, so it turns out it's not a member after all.
658       pushback('.');
659       return lhs;
660     }
661     String id = parseId("Member");
662     ReferenceNode reference;
663     if (c == '(') {
664       reference = parseReferenceMethodParams(lhs, id);
665     } else {
666       reference = new MemberReferenceNode(lhs, id);
667     }
668     return parseReferenceSuffix(reference);
669   }
670 
671   /**
672    * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}.
673    * <pre>{@code
674    * <method-parameter-list> -> <empty> |
675    *                            <non-empty-method-parameter-list>
676    * <non-empty-method-parameter-list> -> <expression> |
677    *                                      <expression> , <non-empty-method-parameter-list>
678    * }</pre>
679    *
680    * @param lhs the reference node representing what appears to the left of the dot, like the
681    *     {@code $x} in {@code $x.foo()}.
682    */
parseReferenceMethodParams(ReferenceNode lhs, String id)683   private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id)
684       throws IOException {
685     assert c == '(';
686     nextNonSpace();
687     ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder();
688     if (c != ')') {
689       args.add(parseExpression());
690       while (c == ',') {
691         nextNonSpace();
692         args.add(parseExpression());
693       }
694       if (c != ')') {
695         throw parseException("Expected )");
696       }
697     }
698     assert c == ')';
699     next();
700     return new MethodReferenceNode(lhs, id, args.build());
701   }
702 
703   /**
704    * Parses an index suffix to a method, like {@code $x[$i]}.
705    * <pre>{@code
706    * <reference-index> -> [ <expression> ]
707    * }</pre>
708    *
709    * @param lhs the reference node representing what appears to the left of the dot, like the
710    *     {@code $x} in {@code $x[$i]}.
711    */
parseReferenceIndex(ReferenceNode lhs)712   private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException {
713     assert c == '[';
714     next();
715     ExpressionNode index = parseExpression();
716     if (c != ']') {
717       throw parseException("Expected ]");
718     }
719     next();
720     ReferenceNode reference = new IndexReferenceNode(lhs, index);
721     return parseReferenceSuffix(reference);
722   }
723 
724   enum Operator {
725     /**
726      * A dummy operator with low precedence. When parsing subexpressions, we always stop when we
727      * reach an operator of lower precedence than the "current precedence". For example, when
728      * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when
729      * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator,
730      * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it
731      * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as
732      * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}.
733      */
734     STOP("", 0),
735 
736     // If a one-character operator is a prefix of a two-character operator, like < and <=, then
737     // the one-character operator must come first.
738     OR("||", 1),
739     AND("&&", 2),
740     EQUAL("==", 3), NOT_EQUAL("!=", 3),
741     LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4),
742     PLUS("+", 5), MINUS("-", 5),
743     TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6);
744 
745     final String symbol;
746     final int precedence;
747 
Operator(String symbol, int precedence)748     Operator(String symbol, int precedence) {
749       this.symbol = symbol;
750       this.precedence = precedence;
751     }
752 
753     @Override
toString()754     public String toString() {
755       return symbol;
756     }
757   }
758 
759   /**
760    * Maps a code point to the operators that begin with that code point. For example, maps
761    * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}.
762    */
763   private static final ImmutableListMultimap<Integer, Operator> CODE_POINT_TO_OPERATORS;
764   static {
765     ImmutableListMultimap.Builder<Integer, Operator> builder = ImmutableListMultimap.builder();
766     for (Operator operator : Operator.values()) {
767       if (operator != Operator.STOP) {
768         builder.put((int) operator.symbol.charAt(0), operator);
769       }
770     }
771     CODE_POINT_TO_OPERATORS = builder.build();
772   }
773 
774   /**
775    * Parses an expression, which can occur within a directive like {@code #if} or {@code #set},
776    * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}.
777    * <pre>{@code
778    * <expression> -> <and-expression> |
779    *                 <expression> || <and-expression>
780    * <and-expression> -> <relational-expression> |
781    *                     <and-expression> && <relational-expression>
782    * <equality-exression> -> <relational-expression> |
783    *                         <equality-expression> <equality-op> <relational-expression>
784    * <equality-op> -> == | !=
785    * <relational-expression> -> <additive-expression> |
786    *                            <relational-expression> <relation> <additive-expression>
787    * <relation> -> < | <= | > | >=
788    * <additive-expression> -> <multiplicative-expression> |
789    *                          <additive-expression> <add-op> <multiplicative-expression>
790    * <add-op> -> + | -
791    * <multiplicative-expression> -> <unary-expression> |
792    *                                <multiplicative-expression> <mult-op> <unary-expression>
793    * <mult-op> -> * | / | %
794    * }</pre>
795    */
parseExpression()796   private ExpressionNode parseExpression() throws IOException {
797     ExpressionNode lhs = parseUnaryExpression();
798     return new OperatorParser().parse(lhs, 1);
799   }
800 
801   /**
802    * An operator-precedence parser for the binary operations we understand. It implements an
803    * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia
804    * that uses recursion rather than having an explicit stack of operators and values.
805    */
806   private class OperatorParser {
807     /**
808      * The operator we have just scanned, in the same way that {@link #c} is the character we have
809      * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}.
810      */
811     private Operator currentOperator;
812 
OperatorParser()813     OperatorParser() throws IOException {
814       nextOperator();
815     }
816 
817     /**
818      * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider
819      * operators with precedence at least {@code minPrecedence}.
820      *
821      * @return the parsed subexpression
822      */
parse(ExpressionNode lhs, int minPrecedence)823     ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException {
824       while (currentOperator.precedence >= minPrecedence) {
825         Operator operator = currentOperator;
826         ExpressionNode rhs = parseUnaryExpression();
827         nextOperator();
828         while (currentOperator.precedence > operator.precedence) {
829           rhs = parse(rhs, currentOperator.precedence);
830         }
831         lhs = new BinaryExpressionNode(lhs, operator, rhs);
832       }
833       return lhs;
834     }
835 
836     /**
837      * Updates {@link #currentOperator} to be an operator read from the input,
838      * or {@link Operator#STOP} if there is none.
839      */
nextOperator()840     private void nextOperator() throws IOException {
841       skipSpace();
842       ImmutableList<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c);
843       if (possibleOperators.isEmpty()) {
844         currentOperator = Operator.STOP;
845         return;
846       }
847       char firstChar = Chars.checkedCast(c);
848       next();
849       Operator operator = null;
850       for (Operator possibleOperator : possibleOperators) {
851         if (possibleOperator.symbol.length() == 1) {
852           Verify.verify(operator == null);
853           operator = possibleOperator;
854         } else if (possibleOperator.symbol.charAt(1) == c) {
855           next();
856           operator = possibleOperator;
857         }
858       }
859       if (operator == null) {
860         throw parseException(
861             "Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar);
862       }
863       currentOperator = operator;
864     }
865   }
866 
867   /**
868    * Parses an expression not containing any operators (except inside parentheses).
869    * <pre>{@code
870    * <unary-expression> -> <primary> |
871    *                       ( <expression> ) |
872    *                       ! <unary-expression>
873    * }</pre>
874    */
parseUnaryExpression()875   private ExpressionNode parseUnaryExpression() throws IOException {
876     skipSpace();
877     ExpressionNode node;
878     if (c == '(') {
879       nextNonSpace();
880       node = parseExpression();
881       expect(')');
882       skipSpace();
883       return node;
884     } else if (c == '!') {
885       next();
886       node = new NotExpressionNode(parseUnaryExpression());
887       skipSpace();
888       return node;
889     } else {
890       return parsePrimary();
891     }
892   }
893 
894   /**
895    * Parses an expression containing only literals or references.
896    * <pre>{@code
897    * <primary> -> <reference> |
898    *              <string-literal> |
899    *              <integer-literal> |
900    *              <boolean-literal>
901    * }</pre>
902    */
parsePrimary()903   private ExpressionNode parsePrimary() throws IOException {
904     ExpressionNode node;
905     if (c == '$') {
906       next();
907       node = parseRequiredReference();
908     } else if (c == '"') {
909       node = parseStringLiteral(c, true);
910     } else if (c == '\'') {
911       node = parseStringLiteral(c, false);
912     } else if (c == '-') {
913       // Velocity does not have a negation operator. If we see '-' it must be the start of a
914       // negative integer literal.
915       next();
916       node = parseIntLiteral("-");
917     } else if (isAsciiDigit(c)) {
918       node = parseIntLiteral("");
919     } else if (isAsciiLetter(c)) {
920       node = parseBooleanLiteral();
921     } else {
922       throw parseException("Expected an expression");
923     }
924     skipSpace();
925     return node;
926   }
927 
928   /**
929    * Parses a string literal, which may contain references to be expanded. Examples are
930    * {@code "foo"} or {@code "foo${bar}baz"}.
931    * <pre>{@code
932    * <string-literal> -> <double-quote-literal> | <single-quote-literal>
933    * <double-quote-literal> -> " <double-quote-string-contents> "
934    * <double-quote-string-contents> -> <empty> |
935    *                                   <reference> <double-quote-string-contents> |
936    *                                   <character-other-than-"> <double-quote-string-contents>
937    * <single-quote-literal> -> ' <single-quote-string-contents> '
938    * <single-quote-string-contents> -> <empty> |
939    *                                   <character-other-than-'> <single-quote-string-contents>
940    * }</pre>
941    */
parseStringLiteral(int quote, boolean allowReferences)942   private ExpressionNode parseStringLiteral(int quote, boolean allowReferences)
943       throws IOException {
944     assert c == quote;
945     next();
946     ImmutableList.Builder<Node> nodes = ImmutableList.builder();
947     StringBuilder sb = new StringBuilder();
948     while (c != quote) {
949       switch (c) {
950         case '\n':
951         case EOF:
952           throw parseException("Unterminated string constant");
953         case '\\':
954           throw parseException(
955               "Escapes in string constants are not currently supported");
956         case '$':
957           if (allowReferences) {
958             if (sb.length() > 0) {
959               nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()));
960               sb.setLength(0);
961             }
962             next();
963             nodes.add(parseReference());
964             break;
965           }
966           // fall through
967         default:
968           sb.appendCodePoint(c);
969           next();
970       }
971     }
972     next();
973     if (sb.length() > 0) {
974       nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()));
975     }
976     return new StringLiteralNode(resourceName, lineNumber(), nodes.build());
977   }
978 
979   private static class StringLiteralNode extends ExpressionNode {
980     private final ImmutableList<Node> nodes;
981 
StringLiteralNode(String resourceName, int lineNumber, ImmutableList<Node> nodes)982     StringLiteralNode(String resourceName, int lineNumber, ImmutableList<Node> nodes) {
983       super(resourceName, lineNumber);
984       this.nodes = nodes;
985     }
986 
987     @Override
evaluate(EvaluationContext context)988     Object evaluate(EvaluationContext context) {
989       StringBuilder sb = new StringBuilder();
990       for (Node node : nodes) {
991         sb.append(node.evaluate(context));
992       }
993       return sb.toString();
994     }
995   }
996 
parseIntLiteral(String prefix)997   private ExpressionNode parseIntLiteral(String prefix) throws IOException {
998     StringBuilder sb = new StringBuilder(prefix);
999     while (isAsciiDigit(c)) {
1000       sb.appendCodePoint(c);
1001       next();
1002     }
1003     Integer value = Ints.tryParse(sb.toString());
1004     if (value == null) {
1005       throw parseException("Invalid integer: " + sb);
1006     }
1007     return new ConstantExpressionNode(resourceName, lineNumber(), value);
1008   }
1009 
1010   /**
1011    * Parses a boolean literal, either {@code true} or {@code false}.
1012    * <boolean-literal> -> true |
1013    *                      false
1014    */
parseBooleanLiteral()1015   private ExpressionNode parseBooleanLiteral() throws IOException {
1016     String s = parseId("Identifier without $");
1017     boolean value;
1018     if (s.equals("true")) {
1019       value = true;
1020     } else if (s.equals("false")) {
1021       value = false;
1022     } else {
1023       throw parseException("Identifier in expression must be preceded by $ or be true or false");
1024     }
1025     return new ConstantExpressionNode(resourceName, lineNumber(), value);
1026   }
1027 
1028   private static final CharMatcher ASCII_LETTER =
1029       CharMatcher.inRange('A', 'Z')
1030           .or(CharMatcher.inRange('a', 'z'))
1031           .precomputed();
1032 
1033   private static final CharMatcher ASCII_DIGIT =
1034       CharMatcher.inRange('0', '9')
1035           .precomputed();
1036 
1037   private static final CharMatcher ID_CHAR =
1038       ASCII_LETTER
1039           .or(ASCII_DIGIT)
1040           .or(CharMatcher.anyOf("-_"))
1041           .precomputed();
1042 
isAsciiLetter(int c)1043   private static boolean isAsciiLetter(int c) {
1044     return (char) c == c && ASCII_LETTER.matches((char) c);
1045   }
1046 
isAsciiDigit(int c)1047   private static boolean isAsciiDigit(int c) {
1048     return (char) c == c && ASCII_DIGIT.matches((char) c);
1049   }
1050 
isIdChar(int c)1051   private static boolean isIdChar(int c) {
1052     return (char) c == c && ID_CHAR.matches((char) c);
1053   }
1054 
1055   /**
1056    * Parse an identifier as specified by the
1057    * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL
1058    * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and
1059    * {@code _}.
1060    */
parseId(String what)1061   private String parseId(String what) throws IOException {
1062     if (!isAsciiLetter(c)) {
1063       throw parseException(what + " should start with an ASCII letter");
1064     }
1065     StringBuilder id = new StringBuilder();
1066     while (isIdChar(c)) {
1067       id.appendCodePoint(c);
1068       next();
1069     }
1070     return id.toString();
1071   }
1072 
1073   /**
1074    * Returns an exception to be thrown describing a parse error with the given message, and
1075    * including information about where it occurred.
1076    */
parseException(String message)1077   private ParseException parseException(String message) throws IOException {
1078     StringBuilder context = new StringBuilder();
1079     if (c == EOF) {
1080       context.append("EOF");
1081     } else {
1082       int count = 0;
1083       while (c != EOF && count < 20) {
1084         context.appendCodePoint(c);
1085         next();
1086         count++;
1087       }
1088       if (c != EOF) {
1089         context.append("...");
1090       }
1091     }
1092     return new ParseException(message, resourceName, lineNumber(), context.toString());
1093   }
1094 }
1095