1 /*
2  * Copyright (C) 2009 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.escape;
18 
19 import java.util.HashMap;
20 import java.util.Map;
21 
22 /**
23  * A factory for Escaper instances used to escape strings for safe use in Java.
24  *
25  * <p>This is a subset of source code escapers that are in the process of being open-sources as part
26  * of guava, see: https://github.com/google/guava/issues/1620
27  */
28 // TODO(cushon): migrate to the guava version once it is open-sourced, and delete this
29 public final class SourceCodeEscapers {
SourceCodeEscapers()30   private SourceCodeEscapers() {}
31 
32   // For each xxxEscaper() method, please add links to external reference pages
33   // that are considered authoritative for the behavior of that escaper.
34 
35   // From: http://en.wikipedia.org/wiki/ASCII#ASCII_printable_characters
36   private static final char PRINTABLE_ASCII_MIN = 0x20; // ' '
37   private static final char PRINTABLE_ASCII_MAX = 0x7E; // '~'
38 
39   private static final char[] HEX_DIGITS = "0123456789abcdef".toCharArray();
40 
41   /**
42    * Returns an {@link Escaper} instance that escapes special characters in a string so it can
43    * safely be included in either a Java character literal or string literal. This is the preferred
44    * way to escape Java characters for use in String or character literals.
45    *
46    * <p>See: <a href= "http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089"
47    * >The Java Language Specification</a> for more details.
48    */
javaCharEscaper()49   public static CharEscaper javaCharEscaper() {
50     return JAVA_CHAR_ESCAPER;
51   }
52 
53   private static final CharEscaper JAVA_CHAR_ESCAPER;
54 
55   static {
56     Map<Character, String> javaMap = new HashMap<>();
57     javaMap.put('\b', "\\b");
58     javaMap.put('\f', "\\f");
59     javaMap.put('\n', "\\n");
60     javaMap.put('\r', "\\r");
61     javaMap.put('\t', "\\t");
62     javaMap.put('\"', "\\\"");
63     javaMap.put('\\', "\\\\");
64     javaMap.put('\'', "\\'");
65     JAVA_CHAR_ESCAPER = new JavaCharEscaper(javaMap);
66   }
67 
68   // This escaper does not produce octal escape sequences. See:
69   // http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089
70   //  "Octal escapes are provided for compatibility with C, but can express
71   //   only Unicode values \u0000 through \u00FF, so Unicode escapes are
72   //   usually preferred."
73   private static class JavaCharEscaper extends ArrayBasedCharEscaper {
JavaCharEscaper(Map<Character, String> replacements)74     JavaCharEscaper(Map<Character, String> replacements) {
75       super(replacements, PRINTABLE_ASCII_MIN, PRINTABLE_ASCII_MAX);
76     }
77 
78     @Override
escapeUnsafe(char c)79     protected char[] escapeUnsafe(char c) {
80       return asUnicodeHexEscape(c);
81     }
82   }
83 
84   // Helper for common case of escaping a single char.
asUnicodeHexEscape(char c)85   private static char[] asUnicodeHexEscape(char c) {
86     // Equivalent to String.format("\\u%04x", (int)c);
87     char[] r = new char[6];
88     r[0] = '\\';
89     r[1] = 'u';
90     r[5] = HEX_DIGITS[c & 0xF];
91     c >>>= 4;
92     r[4] = HEX_DIGITS[c & 0xF];
93     c >>>= 4;
94     r[3] = HEX_DIGITS[c & 0xF];
95     c >>>= 4;
96     r[2] = HEX_DIGITS[c & 0xF];
97     return r;
98   }
99 }
100