1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 
21 import junit.framework.TestCase;
22 
23 /**
24  * Tests for {@link UnicodeEscaper}.
25  *
26  * @author David Beaumont
27  */
28 @GwtCompatible
29 public class UnicodeEscaperTest extends TestCase {
30 
31   private static final String SMALLEST_SURROGATE =
32       "" + Character.MIN_HIGH_SURROGATE + Character.MIN_LOW_SURROGATE;
33   private static final String LARGEST_SURROGATE =
34       "" + Character.MAX_HIGH_SURROGATE + Character.MAX_LOW_SURROGATE;
35 
36   private static final String TEST_STRING =
37       "\0abyz\u0080\u0100\u0800\u1000ABYZ\uffff" +
38       SMALLEST_SURROGATE + "0189" +  LARGEST_SURROGATE;
39 
40   // Escapes nothing
41   private static final UnicodeEscaper NOP_ESCAPER = new UnicodeEscaper() {
42     @Override
43     protected char[] escape(int c) {
44       return null;
45     }
46   };
47 
48   // Escapes everything except [a-zA-Z0-9]
49   private static final UnicodeEscaper SIMPLE_ESCAPER = new UnicodeEscaper() {
50     @Override
51     protected char[] escape(int cp) {
52       return ('a' <= cp && cp <= 'z') ||
53              ('A' <= cp && cp <= 'Z') ||
54              ('0' <= cp && cp <= '9') ? null :
55           ("[" + String.valueOf(cp) + "]").toCharArray();
56     }
57   };
58 
testNopEscaper()59   public void testNopEscaper() {
60     UnicodeEscaper e = NOP_ESCAPER;
61     assertEquals(TEST_STRING, escapeAsString(e, TEST_STRING));
62   }
63 
testSimpleEscaper()64   public void testSimpleEscaper() {
65     UnicodeEscaper e = SIMPLE_ESCAPER;
66     String expected =
67         "[0]abyz[128][256][2048][4096]ABYZ[65535]" +
68         "[" + Character.MIN_SUPPLEMENTARY_CODE_POINT + "]" +
69         "0189[" + Character.MAX_CODE_POINT + "]";
70     assertEquals(expected, escapeAsString(e, TEST_STRING));
71   }
72 
testGrowBuffer()73   public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
74     StringBuffer input = new StringBuffer();
75     StringBuffer expected = new StringBuffer();
76     for (int i = 256; i < 1024; i++) {
77       input.append((char) i);
78       expected.append("[" + i + "]");
79     }
80     assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
81   }
82 
testSurrogatePairs()83   public void testSurrogatePairs() {
84     UnicodeEscaper e = SIMPLE_ESCAPER;
85 
86     // Build up a range of surrogate pair characters to test
87     final int min = Character.MIN_SUPPLEMENTARY_CODE_POINT;
88     final int max = Character.MAX_CODE_POINT;
89     final int range = max - min;
90     final int s1 = min + (1 * range) / 4;
91     final int s2 = min + (2 * range) / 4;
92     final int s3 = min + (3 * range) / 4;
93     final char[] dst = new char[12];
94 
95     // Put surrogate pairs at odd indices so they can be split easily
96     dst[0] = 'x';
97     Character.toChars(min, dst, 1);
98     Character.toChars(s1, dst, 3);
99     Character.toChars(s2, dst, 5);
100     Character.toChars(s3, dst, 7);
101     Character.toChars(max, dst, 9);
102     dst[11] = 'x';
103     String test = new String(dst);
104 
105     // Get the expected result string
106     String expected =
107         "x[" + min + "][" + s1 + "][" + s2 + "][" + s3 + "][" + max + "]x";
108     assertEquals(expected, escapeAsString(e, test));
109   }
110 
testTrailingHighSurrogate()111   public void testTrailingHighSurrogate() {
112     String test = "abc" + Character.MIN_HIGH_SURROGATE;
113     try {
114       escapeAsString(NOP_ESCAPER, test);
115       fail("Trailing high surrogate should cause exception");
116     } catch (IllegalArgumentException expected) {
117       // Pass
118     }
119     try {
120       escapeAsString(SIMPLE_ESCAPER, test);
121       fail("Trailing high surrogate should cause exception");
122     } catch (IllegalArgumentException expected) {
123       // Pass
124     }
125   }
126 
testNullInput()127   public void testNullInput() {
128     UnicodeEscaper e = SIMPLE_ESCAPER;
129     try {
130       e.escape((String) null);
131       fail("Null string should cause exception");
132     } catch (NullPointerException expected) {
133       // Pass
134     }
135   }
136 
testBadStrings()137   public void testBadStrings() {
138     UnicodeEscaper e = SIMPLE_ESCAPER;
139     String[] BAD_STRINGS = {
140         String.valueOf(Character.MIN_LOW_SURROGATE),
141         Character.MIN_LOW_SURROGATE + "xyz",
142         "abc" + Character.MIN_LOW_SURROGATE,
143         "abc" + Character.MIN_LOW_SURROGATE + "xyz",
144         String.valueOf(Character.MAX_LOW_SURROGATE),
145         Character.MAX_LOW_SURROGATE + "xyz",
146         "abc" + Character.MAX_LOW_SURROGATE,
147         "abc" + Character.MAX_LOW_SURROGATE + "xyz",
148     };
149     for (String s : BAD_STRINGS) {
150       try {
151         escapeAsString(e, s);
152         fail("Isolated low surrogate should cause exception [" + s + "]");
153       } catch (IllegalArgumentException expected) {
154         // Pass
155       }
156     }
157   }
158 
testFalsePositivesForNextEscapedIndex()159   public void testFalsePositivesForNextEscapedIndex() {
160     UnicodeEscaper e = new UnicodeEscaper() {
161       // Canonical escaper method that only escapes lower case ASCII letters.
162       @Override
163       protected char[] escape(int cp) {
164         return ('a' <= cp && cp <= 'z') ?
165             new char[] { Character.toUpperCase((char) cp) } : null;
166       }
167       // Inefficient implementation that defines all letters as escapable.
168       @Override
169       protected int nextEscapeIndex(CharSequence csq, int index, int end) {
170         while (index < end && !Character.isLetter(csq.charAt(index))) {
171           index++;
172         }
173         return index;
174       }
175     };
176     assertEquals("\0HELLO \uD800\uDC00 WORLD!\n",
177         e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
178   }
179 
testCodePointAt_IndexOutOfBoundsException()180   public void testCodePointAt_IndexOutOfBoundsException() {
181     try {
182       UnicodeEscaper.codePointAt("Testing...", 4, 2);
183       fail();
184     } catch (IndexOutOfBoundsException expected) {
185     }
186   }
187 
escapeAsString(Escaper e, String s)188   private String escapeAsString(Escaper e, String s) {
189     return e.escape(s);
190   }
191 }
192