1 /*
2  * Copyright (C) 2010 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.clearsilver.jsilver.syntax;
18 
19 import com.google.clearsilver.jsilver.syntax.analysis.DepthFirstAdapter;
20 import com.google.clearsilver.jsilver.syntax.node.AAltCommand;
21 import com.google.clearsilver.jsilver.syntax.node.ACallCommand;
22 import com.google.clearsilver.jsilver.syntax.node.ADataCommand;
23 import com.google.clearsilver.jsilver.syntax.node.ADefCommand;
24 import com.google.clearsilver.jsilver.syntax.node.AEachCommand;
25 import com.google.clearsilver.jsilver.syntax.node.AEscapeCommand;
26 import com.google.clearsilver.jsilver.syntax.node.AEvarCommand;
27 import com.google.clearsilver.jsilver.syntax.node.AIfCommand;
28 import com.google.clearsilver.jsilver.syntax.node.ALoopCommand;
29 import com.google.clearsilver.jsilver.syntax.node.ALoopIncCommand;
30 import com.google.clearsilver.jsilver.syntax.node.ALoopToCommand;
31 import com.google.clearsilver.jsilver.syntax.node.ALvarCommand;
32 import com.google.clearsilver.jsilver.syntax.node.ANameCommand;
33 import com.google.clearsilver.jsilver.syntax.node.ANoopCommand;
34 import com.google.clearsilver.jsilver.syntax.node.ASetCommand;
35 import com.google.clearsilver.jsilver.syntax.node.AUvarCommand;
36 import com.google.clearsilver.jsilver.syntax.node.AVarCommand;
37 import com.google.clearsilver.jsilver.syntax.node.AWithCommand;
38 import com.google.clearsilver.jsilver.syntax.node.Start;
39 import com.google.clearsilver.jsilver.syntax.node.TData;
40 
41 import java.util.ArrayList;
42 import java.util.List;
43 import java.util.regex.Matcher;
44 import java.util.regex.Pattern;
45 
46 /**
47  * Detects sequences of commands corresponding to a line in the template containing only structural
48  * commands, comments or whitespace and rewrites the syntax tree to effectively remove any data
49  * (text) associated with that line (including the trailing whitespace).
50  * <p>
51  * A structural command is any command that never emits any output. These come in three types:
52  * <ul>
53  * <li>Commands that can contain other commands (eg, "alt", "each", "escape", "if", "loop", "with",
54  * etc...).
55  * <li>Commands that operate on the template itself (eg, "include", "autoescape", etc...).
56  * <li>Comments.
57  * </ul>
58  * <p>
59  * This makes it much easier to write human readable templates in cases where the output format is
60  * whitespace sensitive.
61  * <p>
62  * Thus the input:
63  *
64  * <pre>
65  * {@literal
66  * ----------------
67  * Value is:
68  * <?cs if:x>0 ?>
69  *   positive
70  * <?cs elif:x<0 ?>
71  *   negative
72  * <?cs else ?>
73  *   zero
74  * <?cs /if ?>.
75  * ----------------
76  * }
77  * </pre>
78  * is equivalent to:
79  *
80  * <pre>
81  * {@literal
82  * ----------------
83  * Value is:
84  * <?cs if:x>0 ?>  positive
85  * <?cs elif:x<0 ?>  negative
86  * <?cs else ?>  zero
87  * <?cs /if ?>.
88  * ----------------
89  * }
90  * </pre>
91  * but is much easier to read.
92  * <p>
93  * Where data commands become empty they are replaced with Noop commands, which effectively removes
94  * them from the tree. These can be removed (if needed) by a later optimization step but shouldn't
95  * cause any issues.
96  */
97 public class StructuralWhitespaceStripper extends DepthFirstAdapter {
98   /**
99    * A regex snippet to match sequences of inline whitespace. The easiest way to define this is as
100    * "not (non-space or newline)".
101    */
102   private static final String IWS = "[^\\S\\n]*";
103 
104   /** Pattern to match strings that consist only of inline whitespace. */
105   private static final Pattern INLINE_WHITESPACE = Pattern.compile(IWS);
106 
107   /**
108    * Pattern to match strings that start with arbitrary (inline) whitespace, followed by a newline.
109    */
110   private static final Pattern STARTS_WITH_NEWLINE = Pattern.compile("^" + IWS + "\\n");
111 
112   /**
113    * Pattern to match strings that end with a newline, followed by trailing (inline) whitespace.
114    */
115   private static final Pattern ENDS_WITH_NEWLINE = Pattern.compile("\\n" + IWS + "$");
116 
117   /**
118    * Pattern to capture the content of a string after a leading newline. Only ever used on input
119    * that previously matched STARTS_WITH_NEWLINE.
120    */
121   private static final Pattern LEADING_WHITESPACE_AND_NEWLINE =
122       Pattern.compile("^" + IWS + "\\n(.*)$", Pattern.DOTALL);
123 
124   /**
125    * Pattern to capture the content of a string before a trailing newline. Note that this may have
126    * to match text that has already had the final newline removed so we must greedily match the
127    * whitespace rather than the content.
128    */
129   private static final Pattern TRAILING_WHITESPACE =
130       Pattern.compile("^(.*?)" + IWS + "$", Pattern.DOTALL);
131 
132   /**
133    * Flag to tell us if we are in whitespace chomping mode. By default we start in this mode because
134    * the content of the first line in a template is not preceded by a newline (but should behave as
135    * if it was). Once this flag has been set to false, it remains unset until a new line is
136    * encountered.
137    * <p>
138    * Note that we only actually remove whitespace when we find the terminating condition rather than
139    * when as visit the nodes (ie, this mode can be aborted and any visited whitespace will be left
140    * untouched).
141    */
142   private boolean maybeChompWhitespace = true;
143 
144   /**
145    * Flag to tell us if the line we are processing has an inline command in it.
146    * <p>
147    * An inline command is a complex command (eg. 'if', 'loop') where both the start and end of the
148    * command exists on the same line. Non-complex commands (eg. 'var', 'name') cannot be considered
149    * inline.
150    * <p>
151    * This flag is set when we process the start of a complex command and unset when we finish
152    * processing a line. Thus if the flag is still true when we encounter the end of a complex
153    * command, it tells us that (at least one) complex command was entirely contained within the
154    * current line and that we should stop chomping whitespace for the current line.
155    * <p>
156    * This means we can detect input such as:
157    *
158    * <pre>
159    * {@literal <?cs if:x?>   <?cs /if?>}
160    * </pre>
161    * for which the trailing newline and surrounding whitespace should not be removed, as opposed to:
162    *
163    * <pre>
164    * {@literal <?cs if:x?>
165    *    something
166    *  <?cs /if?>
167    * }
168    * </pre>
169    * where the trailing newlines for both the opening and closing of the 'if' command should be
170    * removed.
171    */
172   private boolean currentLineContainsInlineComplexCommand = false;
173 
174   /**
175    * First data command we saw when we started 'chomping' whitespace (note that this can be null if
176    * we are at the beginning of a file or when we have chomped a previous data command down to
177    * nothing).
178    */
179   private ADataCommand firstChompedData = null;
180 
181   /**
182    * Intermediate whitespace-only data commands that we may need to remove.
183    * <p>
184    * This list is built up as we visit commands and is either processed when we need to remove
185    * structural whitespace or cleared if we encounter situations that prohibit whitespace removal.
186    */
187   private List<ADataCommand> whitespaceData = new ArrayList<ADataCommand>();
188 
isInlineWhitespace(String text)189   private static boolean isInlineWhitespace(String text) {
190     return INLINE_WHITESPACE.matcher(text).matches();
191   }
192 
startsWithNewline(String text)193   private static boolean startsWithNewline(String text) {
194     return STARTS_WITH_NEWLINE.matcher(text).find();
195   }
196 
endsWithNewline(String text)197   private static boolean endsWithNewline(String text) {
198     return ENDS_WITH_NEWLINE.matcher(text).find();
199   }
200 
201   /**
202    * Removes leading whitespace (including first newline) from the given string. The text must start
203    * with optional whitespace followed by a newline.
204    */
stripLeadingWhitespaceAndNewline(String text)205   private static String stripLeadingWhitespaceAndNewline(String text) {
206     Matcher matcher = LEADING_WHITESPACE_AND_NEWLINE.matcher(text);
207     if (!matcher.matches()) {
208       throw new IllegalStateException("Text '" + text + "' should have leading whitespace/newline.");
209     }
210     return matcher.group(1);
211   }
212 
213   /**
214    * Removes trailing whitespace (if present) from the given string.
215    */
stripTrailingWhitespace(String text)216   private static String stripTrailingWhitespace(String text) {
217     Matcher matcher = TRAILING_WHITESPACE.matcher(text);
218     if (!matcher.matches()) {
219       // The trailing whitespace regex should never fail to match a string.
220       throw new AssertionError("Error in regular expression");
221     }
222     return matcher.group(1);
223   }
224 
225   /**
226    * Remove whitespace (including first newline) from the start of the given data command (replacing
227    * it with a Noop command if it becomes empty). Returns a modified data command, or null if all
228    * text was removed.
229    * <p>
230    * The given command can be null at the beginning of the file or if the original data command was
231    * entirely consumed by a previous strip operation (remember that data commands can be processed
232    * twice, at both the start and end of a whitespace sequence).
233    */
stripLeadingWhitespaceAndNewline(ADataCommand data)234   private static ADataCommand stripLeadingWhitespaceAndNewline(ADataCommand data) {
235     if (data != null) {
236       String text = stripLeadingWhitespaceAndNewline(data.getData().getText());
237       if (text.isEmpty()) {
238         data.replaceBy(new ANoopCommand());
239         // Returning null just means we have chomped the whitespace to nothing.
240         data = null;
241       } else {
242         data.setData(new TData(text));
243       }
244     }
245     return data;
246   }
247 
248   /**
249    * Removes whitespace from the end of the given data command (replacing it with a Noop command if
250    * it becomes empty).
251    */
stripTrailingWhitespace(ADataCommand data)252   private static void stripTrailingWhitespace(ADataCommand data) {
253     if (data != null) {
254       String text = stripTrailingWhitespace(data.getData().getText());
255       if (text.isEmpty()) {
256         data.replaceBy(new ANoopCommand());
257       } else {
258         data.setData(new TData(text));
259       }
260     }
261   }
262 
263   /**
264    * Removes all data commands collected while chomping the current line and clears the given list.
265    */
removeWhitespace(List<ADataCommand> whitespaceData)266   private static void removeWhitespace(List<ADataCommand> whitespaceData) {
267     for (ADataCommand data : whitespaceData) {
268       data.replaceBy(new ANoopCommand());
269     }
270     whitespaceData.clear();
271   }
272 
273   @Override
caseStart(Start node)274   public void caseStart(Start node) {
275     // Process the hierarchy.
276     super.caseStart(node);
277     // We might end after processing a non-data node, so deal with any
278     // unprocessed whitespace before we exit.
279     if (maybeChompWhitespace) {
280       stripTrailingWhitespace(firstChompedData);
281       removeWhitespace(whitespaceData);
282       firstChompedData = null;
283     }
284     // Verify we have consumed (and cleared) any object references.
285     if (firstChompedData != null) {
286       throw new IllegalStateException("Unexpected first data node.");
287     }
288     if (!whitespaceData.isEmpty()) {
289       throw new IllegalStateException("Unexpected data nodes.");
290     }
291   }
292 
293   @Override
caseADataCommand(ADataCommand data)294   public void caseADataCommand(ADataCommand data) {
295     final String originalText = data.getData().getText();
296     if (maybeChompWhitespace) {
297       if (isInlineWhitespace(originalText)) {
298         // This data command is whitespace between two commands on the same
299         // line, simply chomp it and continue ("Om-nom-nom").
300         whitespaceData.add(data);
301         return;
302       }
303       if (startsWithNewline(originalText)) {
304         // This data command is at the end of a line that contains only
305         // structural commands and whitespace. We remove all whitespace
306         // associated with this line by:
307         // * Stripping whitespace from the end of the data command at the start
308         // of this line.
309         // * Removing all intermediate (whitespace only) data commands.
310         // * Stripping whitespace from the start of the current data command.
311         stripTrailingWhitespace(firstChompedData);
312         removeWhitespace(whitespaceData);
313         data = stripLeadingWhitespaceAndNewline(data);
314         currentLineContainsInlineComplexCommand = false;
315       } else {
316         // This data command contains some non-whitespace text so we must abort
317         // the chomping of this line and output it normally.
318         abortWhitespaceChompingForCurrentLine();
319       }
320     }
321     // Test to see if we should start chomping on the next line.
322     maybeChompWhitespace = endsWithNewline(originalText);
323     // Note that data can be null here if we stripped all the whitespace from
324     // it (which means that firstChompedData can be null next time around).
325     firstChompedData = maybeChompWhitespace ? data : null;
326   }
327 
328   /**
329    * Helper method to abort whitespace processing for the current line. This method is idempotent on
330    * a per line basis, and once it has been called the state is only reset at the start of the next
331    * line.
332    */
abortWhitespaceChompingForCurrentLine()333   private void abortWhitespaceChompingForCurrentLine() {
334     maybeChompWhitespace = false;
335     currentLineContainsInlineComplexCommand = false;
336     whitespaceData.clear();
337   }
338 
339   // ---- Inline commands that prohibit whitespace removal. ----
340 
341   @Override
inAAltCommand(AAltCommand node)342   public void inAAltCommand(AAltCommand node) {
343     abortWhitespaceChompingForCurrentLine();
344   }
345 
346   @Override
inACallCommand(ACallCommand node)347   public void inACallCommand(ACallCommand node) {
348     abortWhitespaceChompingForCurrentLine();
349   }
350 
351   @Override
inAEvarCommand(AEvarCommand node)352   public void inAEvarCommand(AEvarCommand node) {
353     abortWhitespaceChompingForCurrentLine();
354   }
355 
356   @Override
inALvarCommand(ALvarCommand node)357   public void inALvarCommand(ALvarCommand node) {
358     abortWhitespaceChompingForCurrentLine();
359   }
360 
361   @Override
inANameCommand(ANameCommand node)362   public void inANameCommand(ANameCommand node) {
363     abortWhitespaceChompingForCurrentLine();
364   }
365 
366   @Override
inASetCommand(ASetCommand node)367   public void inASetCommand(ASetCommand node) {
368     abortWhitespaceChompingForCurrentLine();
369   }
370 
371   @Override
inAUvarCommand(AUvarCommand node)372   public void inAUvarCommand(AUvarCommand node) {
373     abortWhitespaceChompingForCurrentLine();
374   }
375 
376   @Override
inAVarCommand(AVarCommand node)377   public void inAVarCommand(AVarCommand node) {
378     abortWhitespaceChompingForCurrentLine();
379   }
380 
381   // ---- Two part (open/close) commands that can have child commands. ----
382 
enterComplexCommand()383   public void enterComplexCommand() {
384     currentLineContainsInlineComplexCommand = true;
385   }
386 
exitComplexCommand()387   public void exitComplexCommand() {
388     if (currentLineContainsInlineComplexCommand) {
389       abortWhitespaceChompingForCurrentLine();
390     }
391   }
392 
393   @Override
caseAAltCommand(AAltCommand node)394   public void caseAAltCommand(AAltCommand node) {
395     enterComplexCommand();
396     super.caseAAltCommand(node);
397     exitComplexCommand();
398   }
399 
400   @Override
caseADefCommand(ADefCommand node)401   public void caseADefCommand(ADefCommand node) {
402     enterComplexCommand();
403     super.caseADefCommand(node);
404     exitComplexCommand();
405   }
406 
407   @Override
caseAEachCommand(AEachCommand node)408   public void caseAEachCommand(AEachCommand node) {
409     enterComplexCommand();
410     super.caseAEachCommand(node);
411     exitComplexCommand();
412   }
413 
414   @Override
caseAEscapeCommand(AEscapeCommand node)415   public void caseAEscapeCommand(AEscapeCommand node) {
416     enterComplexCommand();
417     super.caseAEscapeCommand(node);
418     exitComplexCommand();
419   }
420 
421   @Override
caseAIfCommand(AIfCommand node)422   public void caseAIfCommand(AIfCommand node) {
423     enterComplexCommand();
424     super.caseAIfCommand(node);
425     exitComplexCommand();
426   }
427 
428   @Override
caseALoopCommand(ALoopCommand node)429   public void caseALoopCommand(ALoopCommand node) {
430     enterComplexCommand();
431     super.caseALoopCommand(node);
432     exitComplexCommand();
433   }
434 
435   @Override
caseALoopIncCommand(ALoopIncCommand node)436   public void caseALoopIncCommand(ALoopIncCommand node) {
437     enterComplexCommand();
438     super.caseALoopIncCommand(node);
439     exitComplexCommand();
440   }
441 
442   @Override
caseALoopToCommand(ALoopToCommand node)443   public void caseALoopToCommand(ALoopToCommand node) {
444     enterComplexCommand();
445     super.caseALoopToCommand(node);
446     exitComplexCommand();
447   }
448 
449   @Override
caseAWithCommand(AWithCommand node)450   public void caseAWithCommand(AWithCommand node) {
451     enterComplexCommand();
452     super.caseAWithCommand(node);
453     exitComplexCommand();
454   }
455 }
456