1 /*
2  * Copyright (C) 2009 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import com.google.common.annotations.GwtCompatible;
20 import com.google.common.collect.ImmutableMap;
21 import com.google.common.escape.testing.EscaperAsserts;
22 
23 import junit.framework.TestCase;
24 
25 import java.io.IOException;
26 import java.util.Map;
27 
28 /**
29  * @author David Beaumont
30  */
31 @GwtCompatible
32 public class ArrayBasedUnicodeEscaperTest extends TestCase {
33   private static final Map<Character, String> NO_REPLACEMENTS =
34       ImmutableMap.of();
35   private static final Map<Character, String> SIMPLE_REPLACEMENTS =
36       ImmutableMap.of(
37           '\n', "<newline>",
38           '\t', "<tab>",
39           '&', "<and>");
40   private static final char[] NO_CHARS = new char[0];
41 
testReplacements()42   public void testReplacements() throws IOException {
43     // In reality this is not a very sensible escaper to have (if you are only
44     // escaping elements from a map you would use a ArrayBasedCharEscaper).
45     UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS,
46         Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
47           @Override protected char[] escapeUnsafe(int c) {
48             return NO_CHARS;
49           }
50     };
51     EscaperAsserts.assertBasic(escaper);
52     assertEquals("<tab>Fish <and> Chips<newline>",
53         escaper.escape("\tFish & Chips\n"));
54 
55     // Verify that everything else is left unescaped.
56     String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
57     assertEquals(safeChars, escaper.escape(safeChars));
58 
59     // Ensure that Unicode escapers behave correctly wrt badly formed input.
60     String badUnicode = "\uDC00\uD800";
61     try {
62       escaper.escape(badUnicode);
63       fail("should fail for bad Unicode");
64     } catch (IllegalArgumentException e) {
65       // Pass
66     }
67   }
68 
testSafeRange()69   public void testSafeRange() throws IOException {
70     // Basic escaping of unsafe chars (wrap them in {,}'s)
71     UnicodeEscaper wrappingEscaper =
72         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
73           @Override protected char[] escapeUnsafe(int c) {
74             return ("{" + (char) c + "}").toCharArray();
75           }
76         };
77     EscaperAsserts.assertBasic(wrappingEscaper);
78     // '[' and '@' lie either side of [A-Z].
79     assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
80   }
81 
testDeleteUnsafeChars()82   public void testDeleteUnsafeChars() throws IOException {
83     UnicodeEscaper deletingEscaper =
84         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
85           @Override protected char[] escapeUnsafe(int c) {
86             return NO_CHARS;
87           }
88         };
89     EscaperAsserts.assertBasic(deletingEscaper);
90     assertEquals("Everything outside the printable ASCII range is deleted.",
91         deletingEscaper.escape("\tEverything\0 outside the\uD800\uDC00 " +
92             "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
93   }
94 
testReplacementPriority()95   public void testReplacementPriority() throws IOException {
96     UnicodeEscaper replacingEscaper =
97         new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
98           private final char[] unknown = new char[] { '?' };
99           @Override protected char[] escapeUnsafe(int c) {
100             return unknown;
101           }
102         };
103     EscaperAsserts.assertBasic(replacingEscaper);
104 
105     // Replacements are applied first regardless of whether the character is in
106     // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
107     assertEquals("<tab>Fish <and>? Chips?<newline>",
108         replacingEscaper.escape("\tFish &\0 Chips\r\n"));
109   }
110 
testCodePointsFromSurrogatePairs()111   public void testCodePointsFromSurrogatePairs() throws IOException {
112     UnicodeEscaper surrogateEscaper =
113         new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
114           private final char[] escaped = new char[] { 'X' };
115           @Override protected char[] escapeUnsafe(int c) {
116             return escaped;
117           }
118         };
119     EscaperAsserts.assertBasic(surrogateEscaper);
120 
121     // A surrogate pair defining a code point within the safe range.
122     String safeInput = "\uD800\uDC00";  // 0x10000
123     assertEquals(safeInput, surrogateEscaper.escape(safeInput));
124 
125     // A surrogate pair defining a code point outside the safe range (but both
126     // of the surrogate characters lie within the safe range). It is important
127     // not to accidentally treat this as a sequence of safe characters.
128     String unsafeInput = "\uDBFF\uDFFF";  // 0x10FFFF
129     assertEquals("X", surrogateEscaper.escape(unsafeInput));
130   }
131 }
132