1 // Copyright (c) 2013, Mike Samuel
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // Neither the name of the OWASP nor the names of its contributors may
14 // be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 
29 package org.owasp.html;
30 
31 import java.util.Arrays;
32 import java.util.EnumMap;
33 import java.util.Random;
34 import java.util.regex.Pattern;
35 
36 import org.junit.Test;
37 import org.owasp.html.CssTokens.TokenType;
38 
39 import com.google.common.collect.Maps;
40 
41 public class CssFuzzerTest extends FuzzyTestCase {
42 
43   private static final String[] TOKEN_PARTS = new String[] {
44     "'", "\"", "<!--", "-->", "/*", "*/", "***", "//", "\r", "\n",
45     "<", ">", "/", ",", ";", ":", "(", "url", "Url", ")", "[", "]", "{", "}",
46     "\\", "\\a", "\\d", "\\0", " ", "\t", "42", ".", "ex", "auto", "foo", "BAr",
47     "important", "!", "\ufeff", "\u0000", "\u00a0", "\ufffd", "\ud801\udc02",
48     "\u007f", "\u000c", "CDATA", "style"
49   };
50 
51   private static final String[] FREQUENT_TOKEN_PARTS = new String[] {
52     "*/", " ", "\t", "\r", "\n",
53   };
54 
55   private static final String[] DISALLOWED_IN_OUTPUT = {
56     "</style", "<![CDATA[", "]]>", "\r", "\n",
57   };
58 
59   final class Watcher implements Runnable {
60     String input;
61     long started;
62 
run()63     public void run() {
64       synchronized (this) {
65         try {
66           while (true) {
67             this.wait(1000 /* ms = 1s */);
68             String input = this.input;
69             if (input == null) { break; }  // Done
70             long started = this.started;
71             long now = System.nanoTime();
72             if (now - started >= 1000000000L /* ns = 1s */) {
73               System.err.println(
74                   "`" + input + "` is slow. seed=" + CssFuzzerTest.this.seed);
75             }
76           }
77         } catch (InterruptedException ex) {
78           // Done
79         }
80       }
81     }
82   }
83 
84   @Test
testUnderStress()85   public final void testUnderStress() {
86     Random r = this.rnd;
87     Watcher watcher = new Watcher();
88     Thread watcherThread = null;
89     for (int run = 0, nRuns = (1 << 16); run < nRuns; ++run) {
90       // Compose a random string from token parts.
91       StringBuilder sb = new StringBuilder();
92       int nParts = r.nextInt(64) + 16;
93       for (int j = nParts; --j >= 0;) {
94         int die = r.nextInt(32);
95         switch (die) {
96         case 0: sb.append((char) rnd.nextInt(0x80)); break;
97         case 1: sb.append((char) rnd.nextInt(0x1800)); break;
98         default:
99           String[] arr = (die & 1) != 0 ? TOKEN_PARTS : FREQUENT_TOKEN_PARTS;
100           sb.append(arr[rnd.nextInt(arr.length)]);
101           break;
102         }
103       }
104       String randomCss = sb.toString();
105 
106       synchronized (watcher) {
107         watcher.input = randomCss;
108         watcher.started = System.nanoTime();
109       }
110       if (watcherThread == null) {
111         watcherThread = new Thread(watcher);
112         watcherThread.setDaemon(true);
113         watcherThread.start();
114       }
115 
116       String msg = "seed=" + this.seed + ", css=`" + randomCss + "`";
117       CssTokens tokens = CssTokens.lex(randomCss);
118 
119       // Test idempotent
120       String renormalized = CssTokens.lex(tokens.normalizedCss).normalizedCss;
121       if (!renormalized.equals(tokens.normalizedCss)) {
122         if (!renormalized.equals(fixDigitSpaceUnit(tokens))) {
123           for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();
124                it.advance()) {
125             System.err.println(it.token() + ":" + it.type());
126           }
127           assertEquals(
128               "not idempotent, " + msg,
129               tokens.normalizedCss,
130               renormalized);
131         }
132       }
133 
134       // Test normalized CSS does not contain HTML/XML breaking tokens.
135       for (String disallowed : DISALLOWED_IN_OUTPUT) {
136         assertFalse(
137             "contains " + disallowed + ", " + msg,
138             tokens.normalizedCss.contains(disallowed));
139       }
140 
141       // Test that tokens are roughly well-formed.
142       int nTokens = 0;
143       for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
144         CssTokens.TokenType type = it.type();
145         String token = it.next();
146         Pattern filter = TOKEN_TYPE_FILTERS.get(type);
147         if (filter != null && !filter.matcher(token).matches()) {
148           fail(type + " `" + token + "`, " + msg);
149         }
150         ++nTokens;
151       }
152 
153       // Test that walking the bracket list works.
154       int[] reverse = new int[nTokens];
155       Arrays.fill(reverse, -1);
156       for (int j = 0; j < nTokens; ++j) {
157         int partner = tokens.brackets.partner(j);
158         if (partner != -1) {
159           reverse[partner] = j;
160         }
161       }
162       for (int j = 0; j < nTokens; ++j) {
163         if (reverse[j] != -1) {
164           assertEquals(msg, reverse[reverse[j]], j);
165         }
166       }
167     }
168     synchronized (watcher) {
169       watcher.input = null;
170       watcher.notifyAll();
171     }
172   }
173 
174   private static final EnumMap<CssTokens.TokenType, Pattern> TOKEN_TYPE_FILTERS
175     = Maps.newEnumMap(CssTokens.TokenType.class);
176   static {
177     String NUMBER = "-?(?:0|[1-9][0-9]*)(?:\\.[0-9]*[1-9])?(?:e-?[1-9][0-9]*)?";
178     String IDENT_START = "[a-zA-Z_\\u0080-\udbff\udfff\\-]";
179     String IDENT_PART = "(?:" + IDENT_START + "|[0-9])";
180     String IDENT = IDENT_START + IDENT_PART + "*";
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.AT, Pattern.compile("@" + IDENT))181     TOKEN_TYPE_FILTERS.put(
182         CssTokens.TokenType.AT, Pattern.compile("@" + IDENT));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.COLON, Pattern.compile(":"))183     TOKEN_TYPE_FILTERS.put(
184         CssTokens.TokenType.COLON, Pattern.compile(":"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.COLUMN, Pattern.compile("\\\\|\\\\|"))185     TOKEN_TYPE_FILTERS.put(
186         CssTokens.TokenType.COLUMN, Pattern.compile("\\|\\|"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.COMMA, Pattern.compile(","))187     TOKEN_TYPE_FILTERS.put(
188         CssTokens.TokenType.COMMA, Pattern.compile(","));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.DELIM, Pattern.compile("[^\\\\w\\u0000- \\u0080-\\uffff\\\\-]"))189     TOKEN_TYPE_FILTERS.put(
190         CssTokens.TokenType.DELIM,
191         Pattern.compile("[^\\w\u0000- \u0080-\uffff\\-]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+"))192     TOKEN_TYPE_FILTERS.put(
193         CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\\\." + IDENT))194     TOKEN_TYPE_FILTERS.put(
195         CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\." + IDENT));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]"))196     TOKEN_TYPE_FILTERS.put(
197         CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+"))198     TOKEN_TYPE_FILTERS.put(
199         CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.HASH_UNRESTRICTED, Pattern.compile("#[a-fA-F0-9]+"))200     TOKEN_TYPE_FILTERS.put(
201         CssTokens.TokenType.HASH_UNRESTRICTED,
202         Pattern.compile("#[a-fA-F0-9]+"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.IDENT, Pattern.compile(IDENT))203     TOKEN_TYPE_FILTERS.put(
204         CssTokens.TokenType.IDENT,
205         Pattern.compile(IDENT));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.LEFT_CURLY, Pattern.compile("[{]"))206     TOKEN_TYPE_FILTERS.put(
207         CssTokens.TokenType.LEFT_CURLY,
208         Pattern.compile("[{]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.LEFT_PAREN, Pattern.compile("[(]"))209     TOKEN_TYPE_FILTERS.put(
210         CssTokens.TokenType.LEFT_PAREN,
211         Pattern.compile("[(]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.LEFT_SQUARE, Pattern.compile("[\\\\[]"))212     TOKEN_TYPE_FILTERS.put(
213         CssTokens.TokenType.LEFT_SQUARE,
214         Pattern.compile("[\\[]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.MATCH, Pattern.compile("[~^$|*]="))215     TOKEN_TYPE_FILTERS.put(
216         CssTokens.TokenType.MATCH,
217         Pattern.compile("[~^$|*]="));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.NUMBER, Pattern.compile(NUMBER))218     TOKEN_TYPE_FILTERS.put(
219         CssTokens.TokenType.NUMBER,
220         Pattern.compile(NUMBER));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.PERCENTAGE, Pattern.compile(NUMBER + "%"))221     TOKEN_TYPE_FILTERS.put(
222         CssTokens.TokenType.PERCENTAGE,
223         Pattern.compile(NUMBER + "%"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.RIGHT_CURLY, Pattern.compile("[}]"))224     TOKEN_TYPE_FILTERS.put(
225         CssTokens.TokenType.RIGHT_CURLY,
226         Pattern.compile("[}]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.RIGHT_PAREN, Pattern.compile("[)]"))227     TOKEN_TYPE_FILTERS.put(
228         CssTokens.TokenType.RIGHT_PAREN,
229         Pattern.compile("[)]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.RIGHT_SQUARE, Pattern.compile("[\\\\]]"))230     TOKEN_TYPE_FILTERS.put(
231         CssTokens.TokenType.RIGHT_SQUARE,
232         Pattern.compile("[\\]]"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.SEMICOLON, Pattern.compile(";"))233     TOKEN_TYPE_FILTERS.put(
234         CssTokens.TokenType.SEMICOLON,
235         Pattern.compile(";"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.STRING, Pattern.compile("'(?:[^'\\r\\n\\\\\\\\]|\\\\\\\\[^\\r\\n])*'"))236     TOKEN_TYPE_FILTERS.put(
237         CssTokens.TokenType.STRING,
238         Pattern.compile("'(?:[^'\r\n\\\\]|\\\\[^\r\n])*'"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.UNICODE_RANGE, Pattern.compile("U\\\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\\\?{0,5})?"))239     TOKEN_TYPE_FILTERS.put(
240         CssTokens.TokenType.UNICODE_RANGE,
241         Pattern.compile("U\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\?{0,5})?"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.URL, Pattern.compile("url\\\\('[0-9A-Za-z\\\\-_.~:/?#\\\\[\\\\]@!$&+,;=%]*'\\\\)"))242     TOKEN_TYPE_FILTERS.put(
243         CssTokens.TokenType.URL,
244         Pattern.compile("url\\('[0-9A-Za-z\\-_.~:/?#\\[\\]@!$&+,;=%]*'\\)"));
TOKEN_TYPE_FILTERS.put( CssTokens.TokenType.WHITESPACE, Pattern.compile(" "))245     TOKEN_TYPE_FILTERS.put(
246         CssTokens.TokenType.WHITESPACE,
247         Pattern.compile(" "));
248   }
249 
250   /**
251    * "1:NUMBER ex:IDENT" -> "1ex:DIMENSION" is a common source source of
252    * a-idempotency, but not one that causes problems in practice.
253    * This hack helps ignore it.
254    */
fixDigitSpaceUnit(CssTokens tokens)255   static String fixDigitSpaceUnit(CssTokens tokens) {
256     StringBuilder sb = new StringBuilder();
257     for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
258       if (it.type() != TokenType.NUMBER) {
259         sb.append(it.next());
260       } else {
261         do {
262           sb.append(it.next());
263         } while (it.hasNext() && it.type() == TokenType.NUMBER);
264         if (it.hasNext() && it.type() == TokenType.WHITESPACE) {
265           it.advance();
266           String numberFollower = null;
267           if (it.hasNext()) {
268             String token = it.token();
269             switch (it.type()) {
270               case IDENT:
271                 if (CssTokens.isWellKnownUnit(token)) {
272                   numberFollower = token;
273                   it.advance();
274                   if (it.hasNext() && it.token().startsWith(".")) {
275                     numberFollower += " ";
276                   }
277                   it.backup();
278                 }
279                 break;
280               case FUNCTION:
281                 String name = token.substring(0, token.length() - 1);
282                 if (CssTokens.isWellKnownUnit(name)) {
283                   numberFollower = token;
284                 }
285                 break;
286               case DELIM:
287                 if ("%".equals(token)) {
288                   numberFollower = token;
289                 }
290                 break;
291               default: break;
292             }
293           }
294           if (numberFollower == null) {
295             sb.append(' ');
296           } else {
297             // Drop the space and append a lower-case version of the unit.
298             sb.append(Strings.toLowerCase(numberFollower));
299             it.advance();
300           }
301         }
302       }
303     }
304     return sb.toString();
305   }
306 }
307