1 /*
2  * Copyright (C) 2010 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.streamhtmlparser.util;
18 
19 import com.google.common.base.Preconditions;
20 
21 import java.util.Arrays;
22 
23 /**
24  * Implements a circular (ring) buffer of characters with specialized
25  * application logic in order to determine the context of some
26  * Javascript content that is being parsed.
27  *
28  * This is a specialized class - of no use to external code -
29  * which aims to be 100% compatible with the corresponding logic
30  * in the C-version of the HtmlParser, specifically
31  * <code>jsparser.c</code>. In particular:
32  * <ul>
33  *   <li> The API is odd, using negative indexes to access content in
34  *        the buffer. Changing the API would mean changing the test
35  *        cases and have more difficulty determining whether we are
36  *        remaining compatible with the C-version. It is left as an
37  *        exercise for once the code is very stable and proven.
38  *   <li> Repeated whitespace is folded into just one character to
39  *        use the space available efficiently.
40  *   <li> The buffer size is fixed. There is currently no need to
41  *        make it variable so we avoid the need for constructors.
42  * </ul>
43  */
44 public class JavascriptTokenBuffer {
45 
46   /**
47    * Size of the ring buffer used to lookup the last token in the javascript
48    * stream. The size is somewhat arbitrary but must be larger than
49    * the biggest token we want to lookup plus three: Two delimiters plus
50    * an empty ring buffer slot.
51    */
52   private static final int BUFFER_SIZE = 18;
53 
54   /** Storage implementing the circular buffer. */
55   private final char[] buffer;
56 
57   /** Index of the first item in our circular buffer. */
58   private int startIndex;
59 
60   /** Index of the last item in our circular buffer. */
61   private int endIndex;
62 
63   /**
64    * Constructs an empty javascript token buffer. The size is fixed,
65    * see {@link #BUFFER_SIZE}.
66    */
JavascriptTokenBuffer()67   public JavascriptTokenBuffer() {
68     buffer = new char[BUFFER_SIZE];
69     startIndex = 0;
70     endIndex = 0;
71   }
72 
73   /**
74    * Constructs a javascript token buffer that is identical to
75    * the one given. In particular, it has the same size and contents.
76    *
77    * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy
78    */
JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer)79   public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) {
80     buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer,
81                            aJavascriptTokenBuffer.buffer.length);
82     startIndex = aJavascriptTokenBuffer.startIndex;
83     endIndex = aJavascriptTokenBuffer.endIndex;
84   }
85 
86   /**
87    * A simple wrapper over <code>appendChar</code>, it appends a string
88    * to the buffer. Sequences of whitespace and newlines
89    * are folded into one character to save space. Null strings are
90    * not allowed.
91    *
92    * @param input the {@code String} to append, cannot be {@code null}
93    */
94   // TODO: Move to testing since not used in code.
appendString(String input)95   public void appendString(String input) {
96     if (input == null) {
97       throw new NullPointerException("input == null is not allowed");
98     }
99     for (int i = 0; i < input.length(); i++) {
100       appendChar(input.charAt(i));
101     }
102   }
103 
104   /**
105    * Appends a character to the buffer. We fold sequences of whitespace and
106    * newlines into one to save space.
107    *
108    * @param input the {@code char} to append
109    */
appendChar(char input)110   public void appendChar(char input) {
111     if (HtmlUtils.isJavascriptWhitespace(input) &&
112         HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
113       return;
114     }
115     buffer[endIndex] = input;
116     endIndex = (endIndex + 1) % buffer.length;
117     if (endIndex == startIndex) {
118       startIndex = (endIndex + 1) % buffer.length;
119     }
120   }
121 
122   /**
123    * Returns the last character in the buffer and removes it from the buffer
124    * or the NUL character '\0' if the buffer is empty.
125    *
126    * @return last character in the buffer or '\0' if the buffer is empty
127    */
popChar()128   public char popChar() {
129     if (startIndex == endIndex) {
130       return '\0';
131     }
132     endIndex--;
133     if (endIndex < 0) {
134       endIndex += buffer.length;
135     }
136     return buffer[endIndex];
137   }
138 
139   /**
140    * Returns the character at a given index in the buffer or nul ('\0')
141    * if the index is outside the range of the buffer. Such could happen
142    * if the buffer is not filled enough or the index is larger than the
143    * size of the buffer.
144    *
145    * <p>Position must be negative where -1 is the index of the last
146    * character in the buffer.
147    *
148    * @param position The index into the buffer
149    *
150    * @return character at the requested index
151    */
getChar(int position)152   public char getChar(int position) {
153     assert(position < 0);   // Developer error if it triggers.
154 
155     int absolutePosition = getAbsolutePosition(position);
156     if (absolutePosition < 0) {
157       return '\0';
158     }
159 
160     return buffer[absolutePosition];
161   }
162 
163   /**
164    * Sets the given {@code input} at the given {@code position} of the buffer.
165    * Returns {@code true} if we succeeded or {@code false} if we
166    * failed (i.e. the write was beyond the buffer boundary).
167    *
168    * <p>Index positions are negative where -1 is the index of the
169    * last character in the buffer.
170    *
171    * @param position The index at which to set the character
172    * @param input The character to set in the buffer
173    * @return {@code true} if we succeeded, {@code false} otherwise
174    */
175   public boolean setChar(int position, char input) {
176     assert(position < 0);   // Developer error if it triggers.
177 
178     int absolutePosition = getAbsolutePosition(position);
179     if (absolutePosition < 0) {
180       return false;
181     }
182 
183     buffer[absolutePosition] = input;
184     return true;
185   }
186 
187 
188   /**
189    * Returns the last javascript identifier/keyword in the buffer.
190    *
191    * @return the last identifier or {@code null} if none was found
192    */
193   public String getLastIdentifier() {
194     int end = -1;
195 
196     if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
197       end--;
198     }
199     int position;
200     for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position));
201          position--) {
202     }
203     if ((position + 1) >= end) {
204       return null;
205     }
206     return slice(position + 1, end);
207   }
208 
209   /**
210    * Returns a slice of the buffer delimited by the given indices.
211    *
212    * The start and end indexes represent the start and end of the
213    * slice to copy. If the start argument extends beyond the beginning
214    * of the buffer, the slice will only contain characters
215    * starting from the beginning of the buffer.
216    *
217    * @param start The index of the first character the copy
218    * @param end the index of the last character to copy
219    *
220    * @return {@code String} between the given indices
221    */
222   public String slice(int start, int end) {
223     // Developer error if any of the asserts below fail.
224     Preconditions.checkArgument(start <= end);
225     Preconditions.checkArgument(start < 0);
226     Preconditions.checkArgument(end < 0);
227 
228     StringBuffer output = new StringBuffer();
229     for (int position = start; position <= end; position++) {
230       char c = getChar(position);
231       if (c != '\0') {
232         output.append(c);
233       }
234     }
235     return new String(output);
236   }
237 
238   /**
239    * Returns the position relative to the start of the buffer or -1
240    * if the position is past the size of the buffer.
241    *
242    * @param position the index to be translated
243    * @return the position relative to the start of the buffer
244    */
245   private int getAbsolutePosition(int position) {
246     assert (position < 0);   // Developer error if it triggers.
247     if (position <= -buffer.length) {
248       return -1;
249     }
250     int len = endIndex - startIndex;
251     if (len < 0) {
252       len += buffer.length;
253     }
254     if (position < -len) {
255       return -1;
256     }
257     int absolutePosition = (position + endIndex) % buffer.length;
258     if (absolutePosition < 0) {
259       absolutePosition += buffer.length;
260     }
261     return absolutePosition;
262   }
263 }
264