1 /* 2 * Copyright (C) 2018 Google, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.google.escapevelocity; 17 18 import com.google.common.base.CharMatcher; 19 import com.google.common.base.Verify; 20 import com.google.common.collect.ImmutableList; 21 import com.google.common.collect.ImmutableListMultimap; 22 import com.google.common.collect.Iterables; 23 import com.google.common.primitives.Chars; 24 import com.google.common.primitives.Ints; 25 import com.google.escapevelocity.DirectiveNode.SetNode; 26 import com.google.escapevelocity.ExpressionNode.BinaryExpressionNode; 27 import com.google.escapevelocity.ExpressionNode.NotExpressionNode; 28 import com.google.escapevelocity.ReferenceNode.IndexReferenceNode; 29 import com.google.escapevelocity.ReferenceNode.MemberReferenceNode; 30 import com.google.escapevelocity.ReferenceNode.MethodReferenceNode; 31 import com.google.escapevelocity.ReferenceNode.PlainReferenceNode; 32 import com.google.escapevelocity.TokenNode.CommentTokenNode; 33 import com.google.escapevelocity.TokenNode.ElseIfTokenNode; 34 import com.google.escapevelocity.TokenNode.ElseTokenNode; 35 import com.google.escapevelocity.TokenNode.EndTokenNode; 36 import com.google.escapevelocity.TokenNode.EofNode; 37 import com.google.escapevelocity.TokenNode.ForEachTokenNode; 38 import com.google.escapevelocity.TokenNode.IfTokenNode; 39 import com.google.escapevelocity.TokenNode.MacroDefinitionTokenNode; 40 import com.google.escapevelocity.TokenNode.NestedTokenNode; 41 import java.io.IOException; 42 import java.io.LineNumberReader; 43 import java.io.Reader; 44 45 /** 46 * A parser that reads input from the given {@link Reader} and parses it to produce a 47 * {@link Template}. 48 * 49 * @author emcmanus@google.com (Éamonn McManus) 50 */ 51 class Parser { 52 private static final int EOF = -1; 53 54 private final LineNumberReader reader; 55 private final String resourceName; 56 private final Template.ResourceOpener resourceOpener; 57 58 /** 59 * The invariant of this parser is that {@code c} is always the next character of interest. 60 * This means that we almost never have to "unget" a character by reading too far. For example, 61 * after we parse an integer, {@code c} will be the first character after the integer, which is 62 * exactly the state we will be in when there are no more digits. 63 * 64 * <p>Sometimes we need to read two characters ahead, and in that case we use {@link #pushback}. 65 */ 66 private int c; 67 68 /** 69 * A single character of pushback. If this is not negative, the {@link #next()} method will 70 * return it instead of reading a character. 71 */ 72 private int pushback = -1; 73 Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener)74 Parser(Reader reader, String resourceName, Template.ResourceOpener resourceOpener) 75 throws IOException { 76 this.reader = new LineNumberReader(reader); 77 this.reader.setLineNumber(1); 78 next(); 79 this.resourceName = resourceName; 80 this.resourceOpener = resourceOpener; 81 } 82 83 /** 84 * Parse the input completely to produce a {@link Template}. 85 * 86 * <p>Parsing happens in two phases. First, we parse a sequence of "tokens", where tokens include 87 * entire references such as <pre> 88 * ${x.foo()[23]} 89 * </pre>or entire directives such as<pre> 90 * #set ($x = $y + $z) 91 * </pre>But tokens do not span complex constructs. For example,<pre> 92 * #if ($x == $y) something #end 93 * </pre>is three tokens:<pre> 94 * #if ($x == $y) 95 * (literal text " something ") 96 * #end 97 * </pre> 98 * 99 * <p>The second phase then takes the sequence of tokens and constructs a parse tree out of it. 100 * Some nodes in the parse tree will be unchanged from the token sequence, such as the <pre> 101 * ${x.foo()[23]} 102 * #set ($x = $y + $z) 103 * </pre> examples above. But a construct such as the {@code #if ... #end} mentioned above will 104 * become a single IfNode in the parse tree in the second phase. 105 * 106 * <p>The main reason for this approach is that Velocity has two kinds of lexical contexts. At the 107 * top level, there can be arbitrary literal text; references like <code>${x.foo()}</code>; and 108 * directives like {@code #if} or {@code #set}. Inside the parentheses of a directive, however, 109 * neither arbitrary text nor directives can appear, but expressions can, so we need to tokenize 110 * the inside of <pre> 111 * #if ($x == $a + $b) 112 * </pre> as the five tokens "$x", "==", "$a", "+", "$b". Rather than having a classical 113 * parser/lexer combination, where the lexer would need to switch between these two modes, we 114 * replace the lexer with an ad-hoc parser that is the first phase described above, and we 115 * define a simple parser over the resultant tokens that is the second phase. 116 */ parse()117 Template parse() throws IOException { 118 ImmutableList<Node> tokens = parseTokens(); 119 return new Reparser(tokens).reparse(); 120 } 121 parseTokens()122 private ImmutableList<Node> parseTokens() throws IOException { 123 ImmutableList.Builder<Node> tokens = ImmutableList.builder(); 124 Node token; 125 do { 126 token = parseNode(); 127 tokens.add(token); 128 } while (!(token instanceof EofNode)); 129 return tokens.build(); 130 } 131 lineNumber()132 private int lineNumber() { 133 return reader.getLineNumber(); 134 } 135 136 /** 137 * Gets the next character from the reader and assigns it to {@code c}. If there are no more 138 * characters, sets {@code c} to {@link #EOF} if it is not already. 139 */ next()140 private void next() throws IOException { 141 if (c != EOF) { 142 if (pushback < 0) { 143 c = reader.read(); 144 } else { 145 c = pushback; 146 pushback = -1; 147 } 148 } 149 } 150 151 /** 152 * Saves the current character {@code c} to be read again, and sets {@code c} to the given 153 * {@code c1}. Suppose the text contains {@code xy} and we have just read {@code y}. 154 * So {@code c == 'y'}. Now if we execute {@code pushback('x')}, we will have 155 * {@code c == 'x'} and the next call to {@link #next()} will set {@code c == 'y'}. Subsequent 156 * calls to {@code next()} will continue reading from {@link #reader}. So the pushback 157 * essentially puts us back in the state we were in before we read {@code y}. 158 */ pushback(int c1)159 private void pushback(int c1) { 160 pushback = c; 161 c = c1; 162 } 163 164 /** 165 * If {@code c} is a space character, keeps reading until {@code c} is a non-space character or 166 * there are no more characters. 167 */ skipSpace()168 private void skipSpace() throws IOException { 169 while (Character.isWhitespace(c)) { 170 next(); 171 } 172 } 173 174 /** 175 * Gets the next character from the reader, and if it is a space character, keeps reading until 176 * a non-space character is found. 177 */ nextNonSpace()178 private void nextNonSpace() throws IOException { 179 next(); 180 skipSpace(); 181 } 182 183 /** 184 * Skips any space in the reader, and then throws an exception if the first non-space character 185 * found is not the expected one. Sets {@code c} to the first character after that expected one. 186 */ expect(char expected)187 private void expect(char expected) throws IOException { 188 skipSpace(); 189 if (c == expected) { 190 next(); 191 } else { 192 throw parseException("Expected " + expected); 193 } 194 } 195 196 /** 197 * Parses a single node from the reader, as part of the first parsing phase. 198 * <pre>{@code 199 * <template> -> <empty> | 200 * <directive> <template> | 201 * <non-directive> <template> 202 * }</pre> 203 */ parseNode()204 private Node parseNode() throws IOException { 205 if (c == '#') { 206 next(); 207 switch (c) { 208 case '#': 209 return parseLineComment(); 210 case '*': 211 return parseBlockComment(); 212 case '[': 213 return parseHashSquare(); 214 case '{': 215 return parseDirective(); 216 default: 217 if (isAsciiLetter(c)) { 218 return parseDirective(); 219 } else { 220 // For consistency with Velocity, we treat # not followed by a letter or one of the 221 // characters above as a plain character, and we treat #$foo as a literal # followed by 222 // the reference $foo. 223 return parsePlainText('#'); 224 } 225 } 226 } 227 if (c == EOF) { 228 return new EofNode(resourceName, lineNumber()); 229 } 230 return parseNonDirective(); 231 } 232 parseHashSquare()233 private Node parseHashSquare() throws IOException { 234 // We've just seen #[ which might be the start of a #[[quoted block]]#. If the next character 235 // is not another [ then it's not a quoted block, but it *is* a literal #[ followed by whatever 236 // that next character is. 237 assert c == '['; 238 next(); 239 if (c != '[') { 240 return parsePlainText(new StringBuilder("#[")); 241 } 242 int startLine = lineNumber(); 243 next(); 244 StringBuilder sb = new StringBuilder(); 245 while (true) { 246 if (c == EOF) { 247 throw new ParseException( 248 "Unterminated #[[ - did not see matching ]]#", resourceName, startLine); 249 } 250 if (c == '#') { 251 // This might be the last character of ]]# or it might just be a random #. 252 int len = sb.length(); 253 if (len > 1 && sb.charAt(len - 1) == ']' && sb.charAt(len - 2) == ']') { 254 next(); 255 break; 256 } 257 } 258 sb.append((char) c); 259 next(); 260 } 261 String quoted = sb.substring(0, sb.length() - 2); 262 return new ConstantExpressionNode(resourceName, lineNumber(), quoted); 263 } 264 265 /** 266 * Parses a single non-directive node from the reader. 267 * <pre>{@code 268 * <non-directive> -> <reference> | 269 * <text containing neither $ nor #> 270 * }</pre> 271 */ parseNonDirective()272 private Node parseNonDirective() throws IOException { 273 if (c == '$') { 274 next(); 275 if (isAsciiLetter(c) || c == '{') { 276 return parseReference(); 277 } else { 278 return parsePlainText('$'); 279 } 280 } else { 281 int firstChar = c; 282 next(); 283 return parsePlainText(firstChar); 284 } 285 } 286 287 /** 288 * Parses a single directive token from the reader. Directives can be spelled with or without 289 * braces, for example {@code #if} or {@code #{if}}. We omit the brace spelling in the productions 290 * here: <pre>{@code 291 * <directive> -> <if-token> | 292 * <else-token> | 293 * <elseif-token> | 294 * <end-token> | 295 * <foreach-token> | 296 * <set-token> | 297 * <parse-token> | 298 * <macro-token> | 299 * <macro-call> | 300 * <comment> 301 * }</pre> 302 */ parseDirective()303 private Node parseDirective() throws IOException { 304 String directive; 305 if (c == '{') { 306 next(); 307 directive = parseId("Directive inside #{...}"); 308 expect('}'); 309 } else { 310 directive = parseId("Directive"); 311 } 312 Node node; 313 switch (directive) { 314 case "end": 315 node = new EndTokenNode(resourceName, lineNumber()); 316 break; 317 case "if": 318 case "elseif": 319 node = parseIfOrElseIf(directive); 320 break; 321 case "else": 322 node = new ElseTokenNode(resourceName, lineNumber()); 323 break; 324 case "foreach": 325 node = parseForEach(); 326 break; 327 case "set": 328 node = parseSet(); 329 break; 330 case "parse": 331 node = parseParse(); 332 break; 333 case "macro": 334 node = parseMacroDefinition(); 335 break; 336 default: 337 node = parsePossibleMacroCall(directive); 338 } 339 // Velocity skips a newline after any directive. 340 // TODO(emcmanus): in fact it also skips space before the newline, which should be implemented. 341 if (c == '\n') { 342 next(); 343 } 344 return node; 345 } 346 347 /** 348 * Parses the condition following {@code #if} or {@code #elseif}. 349 * <pre>{@code 350 * <if-token> -> #if ( <condition> ) 351 * <elseif-token> -> #elseif ( <condition> ) 352 * }</pre> 353 * 354 * @param directive either {@code "if"} or {@code "elseif"}. 355 */ parseIfOrElseIf(String directive)356 private Node parseIfOrElseIf(String directive) throws IOException { 357 expect('('); 358 ExpressionNode condition = parseExpression(); 359 expect(')'); 360 return directive.equals("if") ? new IfTokenNode(condition) : new ElseIfTokenNode(condition); 361 } 362 363 /** 364 * Parses a {@code #foreach} token from the reader. <pre>{@code 365 * <foreach-token> -> #foreach ( $<id> in <expression> ) 366 * }</pre> 367 */ parseForEach()368 private Node parseForEach() throws IOException { 369 expect('('); 370 expect('$'); 371 String var = parseId("For-each variable"); 372 skipSpace(); 373 boolean bad = false; 374 if (c != 'i') { 375 bad = true; 376 } else { 377 next(); 378 if (c != 'n') { 379 bad = true; 380 } 381 } 382 if (bad) { 383 throw parseException("Expected 'in' for #foreach"); 384 } 385 next(); 386 ExpressionNode collection = parseExpression(); 387 expect(')'); 388 return new ForEachTokenNode(var, collection); 389 } 390 391 /** 392 * Parses a {@code #set} token from the reader. <pre>{@code 393 * <set-token> -> #set ( $<id> = <expression>) 394 * }</pre> 395 */ parseSet()396 private Node parseSet() throws IOException { 397 expect('('); 398 expect('$'); 399 String var = parseId("#set variable"); 400 expect('='); 401 ExpressionNode expression = parseExpression(); 402 expect(')'); 403 return new SetNode(var, expression); 404 } 405 406 /** 407 * Parses a {@code #parse} token from the reader. <pre>{@code 408 * <parse-token> -> #parse ( <string-literal> ) 409 * }</pre> 410 * 411 * <p>The way this works is inconsistent with Velocity. In Velocity, the {@code #parse} directive 412 * is evaluated when it is encountered during template evaluation. That means that the argument 413 * can be a variable, and it also means that you can use {@code #if} to choose whether or not 414 * to do the {@code #parse}. Neither of those is true in EscapeVelocity. The contents of the 415 * {@code #parse} are integrated into the containing template pretty much as if they had been 416 * written inline. That also means that EscapeVelocity allows forward references to macros 417 * inside {@code #parse} directives, which Velocity does not. 418 */ parseParse()419 private Node parseParse() throws IOException { 420 expect('('); 421 skipSpace(); 422 if (c != '"' && c != '\'') { 423 throw parseException("#parse only supported with string literal argument"); 424 } 425 ExpressionNode nestedResourceNameExpression = parseStringLiteral(c, false); 426 String nestedResourceName = nestedResourceNameExpression.evaluate(null).toString(); 427 expect(')'); 428 try (Reader nestedReader = resourceOpener.openResource(nestedResourceName)) { 429 Parser nestedParser = new Parser(nestedReader, nestedResourceName, resourceOpener); 430 ImmutableList<Node> nestedTokens = nestedParser.parseTokens(); 431 return new NestedTokenNode(nestedResourceName, nestedTokens); 432 } 433 } 434 435 /** 436 * Parses a {@code #macro} token from the reader. <pre>{@code 437 * <macro-token> -> #macro ( <id> <macro-parameter-list> ) 438 * <macro-parameter-list> -> <empty> | 439 * $<id> <macro-parameter-list> 440 * }</pre> 441 * 442 * <p>Macro parameters are optionally separated by commas. 443 */ parseMacroDefinition()444 private Node parseMacroDefinition() throws IOException { 445 expect('('); 446 skipSpace(); 447 String name = parseId("Macro name"); 448 ImmutableList.Builder<String> parameterNames = ImmutableList.builder(); 449 while (true) { 450 skipSpace(); 451 if (c == ')') { 452 next(); 453 break; 454 } 455 if (c == ',') { 456 next(); 457 skipSpace(); 458 } 459 if (c != '$') { 460 throw parseException("Macro parameters should look like $name"); 461 } 462 next(); 463 parameterNames.add(parseId("Macro parameter name")); 464 } 465 return new MacroDefinitionTokenNode(resourceName, lineNumber(), name, parameterNames.build()); 466 } 467 468 /** 469 * Parses an identifier after {@code #} that is not one of the standard directives. The assumption 470 * is that it is a call of a macro that is defined in the template. Macro definitions are 471 * extracted from the template during the second parsing phase (and not during evaluation of the 472 * template as you might expect). This means that a macro can be called before it is defined. 473 * <pre>{@code 474 * <macro-call> -> # <id> ( <expression-list> ) 475 * <expression-list> -> <empty> | 476 * <expression> <optional-comma> <expression-list> 477 * <optional-comma> -> <empty> | , 478 * }</pre> 479 */ parsePossibleMacroCall(String directive)480 private Node parsePossibleMacroCall(String directive) throws IOException { 481 skipSpace(); 482 if (c != '(') { 483 throw parseException("Unrecognized directive #" + directive); 484 } 485 next(); 486 ImmutableList.Builder<Node> parameterNodes = ImmutableList.builder(); 487 while (true) { 488 skipSpace(); 489 if (c == ')') { 490 next(); 491 break; 492 } 493 parameterNodes.add(parsePrimary()); 494 if (c == ',') { 495 // The documentation doesn't say so, but you can apparently have an optional comma in 496 // macro calls. 497 next(); 498 } 499 } 500 return new DirectiveNode.MacroCallNode( 501 resourceName, lineNumber(), directive, parameterNodes.build()); 502 } 503 504 /** 505 * Parses and discards a line comment, which is {@code ##} followed by any number of characters 506 * up to and including the next newline. 507 */ parseLineComment()508 private Node parseLineComment() throws IOException { 509 int lineNumber = lineNumber(); 510 while (c != '\n' && c != EOF) { 511 next(); 512 } 513 next(); 514 return new CommentTokenNode(resourceName, lineNumber); 515 } 516 517 /** 518 * Parses and discards a block comment, which is {@code #*} followed by everything up to and 519 * including the next {@code *#}. 520 */ parseBlockComment()521 private Node parseBlockComment() throws IOException { 522 assert c == '*'; 523 int startLine = lineNumber(); 524 int lastC = '\0'; 525 next(); 526 // Consistently with Velocity, we do not make it an error if a #* comment is not closed. 527 while (!(lastC == '*' && c == '#') && c != EOF) { 528 lastC = c; 529 next(); 530 } 531 next(); // this may read EOF twice, which works 532 return new CommentTokenNode(resourceName, startLine); 533 } 534 535 /** 536 * Parses plain text, which is text that contains neither {@code $} nor {@code #}. The given 537 * {@code firstChar} is the first character of the plain text, and {@link #c} is the second 538 * (if the plain text is more than one character). 539 */ parsePlainText(int firstChar)540 private Node parsePlainText(int firstChar) throws IOException { 541 StringBuilder sb = new StringBuilder(); 542 sb.appendCodePoint(firstChar); 543 return parsePlainText(sb); 544 } 545 parsePlainText(StringBuilder sb)546 private Node parsePlainText(StringBuilder sb) throws IOException { 547 literal: 548 while (true) { 549 switch (c) { 550 case EOF: 551 case '$': 552 case '#': 553 break literal; 554 default: 555 // Just some random character. 556 } 557 sb.appendCodePoint(c); 558 next(); 559 } 560 return new ConstantExpressionNode(resourceName, lineNumber(), sb.toString()); 561 } 562 563 /** 564 * Parses a reference, which is everything that can start with a {@code $}. References can 565 * optionally be enclosed in braces, so {@code $x} and {@code ${x}} are the same. Braces are 566 * useful when text after the reference would otherwise be parsed as part of it. For example, 567 * {@code ${x}y} is a reference to the variable {@code $x}, followed by the plain text {@code y}. 568 * Of course {@code $xy} would be a reference to the variable {@code $xy}. 569 * <pre>{@code 570 * <reference> -> $<reference-no-brace> | 571 * ${<reference-no-brace>} 572 * }</pre> 573 * 574 * <p>On entry to this method, {@link #c} is the character immediately after the {@code $}. 575 */ parseReference()576 private Node parseReference() throws IOException { 577 if (c == '{') { 578 next(); 579 if (!isAsciiLetter(c)) { 580 return parsePlainText(new StringBuilder("${")); 581 } 582 ReferenceNode node = parseReferenceNoBrace(); 583 expect('}'); 584 return node; 585 } else { 586 return parseReferenceNoBrace(); 587 } 588 } 589 590 /** 591 * Same as {@link #parseReference()}, except it really must be a reference. A {@code $} in 592 * normal text doesn't start a reference if it is not followed by an identifier. But in an 593 * expression, for example in {@code #if ($x == 23)}, {@code $} must be followed by an 594 * identifier. 595 */ parseRequiredReference()596 private ReferenceNode parseRequiredReference() throws IOException { 597 if (c == '{') { 598 next(); 599 ReferenceNode node = parseReferenceNoBrace(); 600 expect('}'); 601 return node; 602 } else { 603 return parseReferenceNoBrace(); 604 } 605 } 606 607 /** 608 * Parses a reference, in the simple form without braces. 609 * <pre>{@code 610 * <reference-no-brace> -> <id><reference-suffix> 611 * }</pre> 612 */ parseReferenceNoBrace()613 private ReferenceNode parseReferenceNoBrace() throws IOException { 614 String id = parseId("Reference"); 615 ReferenceNode lhs = new PlainReferenceNode(resourceName, lineNumber(), id); 616 return parseReferenceSuffix(lhs); 617 } 618 619 /** 620 * Parses the modifiers that can appear at the tail of a reference. 621 * <pre>{@code 622 * <reference-suffix> -> <empty> | 623 * <reference-member> | 624 * <reference-index> 625 * }</pre> 626 * 627 * @param lhs the reference node representing the first part of the reference 628 * {@code $x} in {@code $x.foo} or {@code $x.foo()}, or later {@code $x.y} in {@code $x.y.z}. 629 */ parseReferenceSuffix(ReferenceNode lhs)630 private ReferenceNode parseReferenceSuffix(ReferenceNode lhs) throws IOException { 631 switch (c) { 632 case '.': 633 return parseReferenceMember(lhs); 634 case '[': 635 return parseReferenceIndex(lhs); 636 default: 637 return lhs; 638 } 639 } 640 641 /** 642 * Parses a reference member, which is either a property reference like {@code $x.y} or a method 643 * call like {@code $x.y($z)}. 644 * <pre>{@code 645 * <reference-member> -> .<id><reference-property-or-method><reference-suffix> 646 * <reference-property-or-method> -> <id> | 647 * <id> ( <method-parameter-list> ) 648 * }</pre> 649 * 650 * @param lhs the reference node representing what appears to the left of the dot, like the 651 * {@code $x} in {@code $x.foo} or {@code $x.foo()}. 652 */ parseReferenceMember(ReferenceNode lhs)653 private ReferenceNode parseReferenceMember(ReferenceNode lhs) throws IOException { 654 assert c == '.'; 655 next(); 656 if (!isAsciiLetter(c)) { 657 // We've seen something like `$foo.!`, so it turns out it's not a member after all. 658 pushback('.'); 659 return lhs; 660 } 661 String id = parseId("Member"); 662 ReferenceNode reference; 663 if (c == '(') { 664 reference = parseReferenceMethodParams(lhs, id); 665 } else { 666 reference = new MemberReferenceNode(lhs, id); 667 } 668 return parseReferenceSuffix(reference); 669 } 670 671 /** 672 * Parses the parameters to a method reference, like {@code $foo.bar($a, $b)}. 673 * <pre>{@code 674 * <method-parameter-list> -> <empty> | 675 * <non-empty-method-parameter-list> 676 * <non-empty-method-parameter-list> -> <expression> | 677 * <expression> , <non-empty-method-parameter-list> 678 * }</pre> 679 * 680 * @param lhs the reference node representing what appears to the left of the dot, like the 681 * {@code $x} in {@code $x.foo()}. 682 */ parseReferenceMethodParams(ReferenceNode lhs, String id)683 private ReferenceNode parseReferenceMethodParams(ReferenceNode lhs, String id) 684 throws IOException { 685 assert c == '('; 686 nextNonSpace(); 687 ImmutableList.Builder<ExpressionNode> args = ImmutableList.builder(); 688 if (c != ')') { 689 args.add(parseExpression()); 690 while (c == ',') { 691 nextNonSpace(); 692 args.add(parseExpression()); 693 } 694 if (c != ')') { 695 throw parseException("Expected )"); 696 } 697 } 698 assert c == ')'; 699 next(); 700 return new MethodReferenceNode(lhs, id, args.build()); 701 } 702 703 /** 704 * Parses an index suffix to a method, like {@code $x[$i]}. 705 * <pre>{@code 706 * <reference-index> -> [ <expression> ] 707 * }</pre> 708 * 709 * @param lhs the reference node representing what appears to the left of the dot, like the 710 * {@code $x} in {@code $x[$i]}. 711 */ parseReferenceIndex(ReferenceNode lhs)712 private ReferenceNode parseReferenceIndex(ReferenceNode lhs) throws IOException { 713 assert c == '['; 714 next(); 715 ExpressionNode index = parseExpression(); 716 if (c != ']') { 717 throw parseException("Expected ]"); 718 } 719 next(); 720 ReferenceNode reference = new IndexReferenceNode(lhs, index); 721 return parseReferenceSuffix(reference); 722 } 723 724 enum Operator { 725 /** 726 * A dummy operator with low precedence. When parsing subexpressions, we always stop when we 727 * reach an operator of lower precedence than the "current precedence". For example, when 728 * parsing {@code 1 + 2 * 3 + 4}, we'll stop parsing the subexpression {@code * 3 + 4} when 729 * we reach the {@code +} because it has lower precedence than {@code *}. This dummy operator, 730 * then, behaves like {@code +} when the minimum precedence is {@code *}. We also return it 731 * if we're looking for an operator and don't find one. If this operator is {@code ⊙}, it's as 732 * if our expressions are bracketed with it, like {@code ⊙ 1 + 2 * 3 + 4 ⊙}. 733 */ 734 STOP("", 0), 735 736 // If a one-character operator is a prefix of a two-character operator, like < and <=, then 737 // the one-character operator must come first. 738 OR("||", 1), 739 AND("&&", 2), 740 EQUAL("==", 3), NOT_EQUAL("!=", 3), 741 LESS("<", 4), LESS_OR_EQUAL("<=", 4), GREATER(">", 4), GREATER_OR_EQUAL(">=", 4), 742 PLUS("+", 5), MINUS("-", 5), 743 TIMES("*", 6), DIVIDE("/", 6), REMAINDER("%", 6); 744 745 final String symbol; 746 final int precedence; 747 Operator(String symbol, int precedence)748 Operator(String symbol, int precedence) { 749 this.symbol = symbol; 750 this.precedence = precedence; 751 } 752 753 @Override toString()754 public String toString() { 755 return symbol; 756 } 757 } 758 759 /** 760 * Maps a code point to the operators that begin with that code point. For example, maps 761 * {@code <} to {@code LESS} and {@code LESS_OR_EQUAL}. 762 */ 763 private static final ImmutableListMultimap<Integer, Operator> CODE_POINT_TO_OPERATORS; 764 static { 765 ImmutableListMultimap.Builder<Integer, Operator> builder = ImmutableListMultimap.builder(); 766 for (Operator operator : Operator.values()) { 767 if (operator != Operator.STOP) { 768 builder.put((int) operator.symbol.charAt(0), operator); 769 } 770 } 771 CODE_POINT_TO_OPERATORS = builder.build(); 772 } 773 774 /** 775 * Parses an expression, which can occur within a directive like {@code #if} or {@code #set}, 776 * or within a reference like {@code $x[$a + $b]} or {@code $x.m($a + $b)}. 777 * <pre>{@code 778 * <expression> -> <and-expression> | 779 * <expression> || <and-expression> 780 * <and-expression> -> <relational-expression> | 781 * <and-expression> && <relational-expression> 782 * <equality-exression> -> <relational-expression> | 783 * <equality-expression> <equality-op> <relational-expression> 784 * <equality-op> -> == | != 785 * <relational-expression> -> <additive-expression> | 786 * <relational-expression> <relation> <additive-expression> 787 * <relation> -> < | <= | > | >= 788 * <additive-expression> -> <multiplicative-expression> | 789 * <additive-expression> <add-op> <multiplicative-expression> 790 * <add-op> -> + | - 791 * <multiplicative-expression> -> <unary-expression> | 792 * <multiplicative-expression> <mult-op> <unary-expression> 793 * <mult-op> -> * | / | % 794 * }</pre> 795 */ parseExpression()796 private ExpressionNode parseExpression() throws IOException { 797 ExpressionNode lhs = parseUnaryExpression(); 798 return new OperatorParser().parse(lhs, 1); 799 } 800 801 /** 802 * An operator-precedence parser for the binary operations we understand. It implements an 803 * <a href="http://en.wikipedia.org/wiki/Operator-precedence_parser">algorithm</a> from Wikipedia 804 * that uses recursion rather than having an explicit stack of operators and values. 805 */ 806 private class OperatorParser { 807 /** 808 * The operator we have just scanned, in the same way that {@link #c} is the character we have 809 * just read. If we were not able to scan an operator, this will be {@link Operator#STOP}. 810 */ 811 private Operator currentOperator; 812 OperatorParser()813 OperatorParser() throws IOException { 814 nextOperator(); 815 } 816 817 /** 818 * Parse a subexpression whose left-hand side is {@code lhs} and where we only consider 819 * operators with precedence at least {@code minPrecedence}. 820 * 821 * @return the parsed subexpression 822 */ parse(ExpressionNode lhs, int minPrecedence)823 ExpressionNode parse(ExpressionNode lhs, int minPrecedence) throws IOException { 824 while (currentOperator.precedence >= minPrecedence) { 825 Operator operator = currentOperator; 826 ExpressionNode rhs = parseUnaryExpression(); 827 nextOperator(); 828 while (currentOperator.precedence > operator.precedence) { 829 rhs = parse(rhs, currentOperator.precedence); 830 } 831 lhs = new BinaryExpressionNode(lhs, operator, rhs); 832 } 833 return lhs; 834 } 835 836 /** 837 * Updates {@link #currentOperator} to be an operator read from the input, 838 * or {@link Operator#STOP} if there is none. 839 */ nextOperator()840 private void nextOperator() throws IOException { 841 skipSpace(); 842 ImmutableList<Operator> possibleOperators = CODE_POINT_TO_OPERATORS.get(c); 843 if (possibleOperators.isEmpty()) { 844 currentOperator = Operator.STOP; 845 return; 846 } 847 char firstChar = Chars.checkedCast(c); 848 next(); 849 Operator operator = null; 850 for (Operator possibleOperator : possibleOperators) { 851 if (possibleOperator.symbol.length() == 1) { 852 Verify.verify(operator == null); 853 operator = possibleOperator; 854 } else if (possibleOperator.symbol.charAt(1) == c) { 855 next(); 856 operator = possibleOperator; 857 } 858 } 859 if (operator == null) { 860 throw parseException( 861 "Expected " + Iterables.getOnlyElement(possibleOperators) + ", not just " + firstChar); 862 } 863 currentOperator = operator; 864 } 865 } 866 867 /** 868 * Parses an expression not containing any operators (except inside parentheses). 869 * <pre>{@code 870 * <unary-expression> -> <primary> | 871 * ( <expression> ) | 872 * ! <unary-expression> 873 * }</pre> 874 */ parseUnaryExpression()875 private ExpressionNode parseUnaryExpression() throws IOException { 876 skipSpace(); 877 ExpressionNode node; 878 if (c == '(') { 879 nextNonSpace(); 880 node = parseExpression(); 881 expect(')'); 882 skipSpace(); 883 return node; 884 } else if (c == '!') { 885 next(); 886 node = new NotExpressionNode(parseUnaryExpression()); 887 skipSpace(); 888 return node; 889 } else { 890 return parsePrimary(); 891 } 892 } 893 894 /** 895 * Parses an expression containing only literals or references. 896 * <pre>{@code 897 * <primary> -> <reference> | 898 * <string-literal> | 899 * <integer-literal> | 900 * <boolean-literal> 901 * }</pre> 902 */ parsePrimary()903 private ExpressionNode parsePrimary() throws IOException { 904 ExpressionNode node; 905 if (c == '$') { 906 next(); 907 node = parseRequiredReference(); 908 } else if (c == '"') { 909 node = parseStringLiteral(c, true); 910 } else if (c == '\'') { 911 node = parseStringLiteral(c, false); 912 } else if (c == '-') { 913 // Velocity does not have a negation operator. If we see '-' it must be the start of a 914 // negative integer literal. 915 next(); 916 node = parseIntLiteral("-"); 917 } else if (isAsciiDigit(c)) { 918 node = parseIntLiteral(""); 919 } else if (isAsciiLetter(c)) { 920 node = parseBooleanLiteral(); 921 } else { 922 throw parseException("Expected an expression"); 923 } 924 skipSpace(); 925 return node; 926 } 927 928 /** 929 * Parses a string literal, which may contain references to be expanded. Examples are 930 * {@code "foo"} or {@code "foo${bar}baz"}. 931 * <pre>{@code 932 * <string-literal> -> <double-quote-literal> | <single-quote-literal> 933 * <double-quote-literal> -> " <double-quote-string-contents> " 934 * <double-quote-string-contents> -> <empty> | 935 * <reference> <double-quote-string-contents> | 936 * <character-other-than-"> <double-quote-string-contents> 937 * <single-quote-literal> -> ' <single-quote-string-contents> ' 938 * <single-quote-string-contents> -> <empty> | 939 * <character-other-than-'> <single-quote-string-contents> 940 * }</pre> 941 */ parseStringLiteral(int quote, boolean allowReferences)942 private ExpressionNode parseStringLiteral(int quote, boolean allowReferences) 943 throws IOException { 944 assert c == quote; 945 next(); 946 ImmutableList.Builder<Node> nodes = ImmutableList.builder(); 947 StringBuilder sb = new StringBuilder(); 948 while (c != quote) { 949 switch (c) { 950 case '\n': 951 case EOF: 952 throw parseException("Unterminated string constant"); 953 case '\\': 954 throw parseException( 955 "Escapes in string constants are not currently supported"); 956 case '$': 957 if (allowReferences) { 958 if (sb.length() > 0) { 959 nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString())); 960 sb.setLength(0); 961 } 962 next(); 963 nodes.add(parseReference()); 964 break; 965 } 966 // fall through 967 default: 968 sb.appendCodePoint(c); 969 next(); 970 } 971 } 972 next(); 973 if (sb.length() > 0) { 974 nodes.add(new ConstantExpressionNode(resourceName, lineNumber(), sb.toString())); 975 } 976 return new StringLiteralNode(resourceName, lineNumber(), nodes.build()); 977 } 978 979 private static class StringLiteralNode extends ExpressionNode { 980 private final ImmutableList<Node> nodes; 981 StringLiteralNode(String resourceName, int lineNumber, ImmutableList<Node> nodes)982 StringLiteralNode(String resourceName, int lineNumber, ImmutableList<Node> nodes) { 983 super(resourceName, lineNumber); 984 this.nodes = nodes; 985 } 986 987 @Override evaluate(EvaluationContext context)988 Object evaluate(EvaluationContext context) { 989 StringBuilder sb = new StringBuilder(); 990 for (Node node : nodes) { 991 sb.append(node.evaluate(context)); 992 } 993 return sb.toString(); 994 } 995 } 996 parseIntLiteral(String prefix)997 private ExpressionNode parseIntLiteral(String prefix) throws IOException { 998 StringBuilder sb = new StringBuilder(prefix); 999 while (isAsciiDigit(c)) { 1000 sb.appendCodePoint(c); 1001 next(); 1002 } 1003 Integer value = Ints.tryParse(sb.toString()); 1004 if (value == null) { 1005 throw parseException("Invalid integer: " + sb); 1006 } 1007 return new ConstantExpressionNode(resourceName, lineNumber(), value); 1008 } 1009 1010 /** 1011 * Parses a boolean literal, either {@code true} or {@code false}. 1012 * <boolean-literal> -> true | 1013 * false 1014 */ parseBooleanLiteral()1015 private ExpressionNode parseBooleanLiteral() throws IOException { 1016 String s = parseId("Identifier without $"); 1017 boolean value; 1018 if (s.equals("true")) { 1019 value = true; 1020 } else if (s.equals("false")) { 1021 value = false; 1022 } else { 1023 throw parseException("Identifier in expression must be preceded by $ or be true or false"); 1024 } 1025 return new ConstantExpressionNode(resourceName, lineNumber(), value); 1026 } 1027 1028 private static final CharMatcher ASCII_LETTER = 1029 CharMatcher.inRange('A', 'Z') 1030 .or(CharMatcher.inRange('a', 'z')) 1031 .precomputed(); 1032 1033 private static final CharMatcher ASCII_DIGIT = 1034 CharMatcher.inRange('0', '9') 1035 .precomputed(); 1036 1037 private static final CharMatcher ID_CHAR = 1038 ASCII_LETTER 1039 .or(ASCII_DIGIT) 1040 .or(CharMatcher.anyOf("-_")) 1041 .precomputed(); 1042 isAsciiLetter(int c)1043 private static boolean isAsciiLetter(int c) { 1044 return (char) c == c && ASCII_LETTER.matches((char) c); 1045 } 1046 isAsciiDigit(int c)1047 private static boolean isAsciiDigit(int c) { 1048 return (char) c == c && ASCII_DIGIT.matches((char) c); 1049 } 1050 isIdChar(int c)1051 private static boolean isIdChar(int c) { 1052 return (char) c == c && ID_CHAR.matches((char) c); 1053 } 1054 1055 /** 1056 * Parse an identifier as specified by the 1057 * <a href="http://velocity.apache.org/engine/devel/vtl-reference-guide.html#Variables">VTL 1058 * </a>. Identifiers are ASCII: starts with a letter, then letters, digits, {@code -} and 1059 * {@code _}. 1060 */ parseId(String what)1061 private String parseId(String what) throws IOException { 1062 if (!isAsciiLetter(c)) { 1063 throw parseException(what + " should start with an ASCII letter"); 1064 } 1065 StringBuilder id = new StringBuilder(); 1066 while (isIdChar(c)) { 1067 id.appendCodePoint(c); 1068 next(); 1069 } 1070 return id.toString(); 1071 } 1072 1073 /** 1074 * Returns an exception to be thrown describing a parse error with the given message, and 1075 * including information about where it occurred. 1076 */ parseException(String message)1077 private ParseException parseException(String message) throws IOException { 1078 StringBuilder context = new StringBuilder(); 1079 if (c == EOF) { 1080 context.append("EOF"); 1081 } else { 1082 int count = 0; 1083 while (c != EOF && count < 20) { 1084 context.appendCodePoint(c); 1085 next(); 1086 count++; 1087 } 1088 if (c != EOF) { 1089 context.append("..."); 1090 } 1091 } 1092 return new ParseException(message, resourceName, lineNumber(), context.toString()); 1093 } 1094 } 1095