1 /**
2  * Copyright (c) 2008, Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.android.mail.common.base;
18 
19 /**
20  * An object that converts literal text into a format safe for inclusion in a particular context
21  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
22  * text is performed automatically by the relevant parser.
23  *
24  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
25  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
26  * resulting XML document is parsed, the parser API will return this text as the original literal
27  * string {@code "Foo<Bar>"}.
28  *
29  * <p>An {@code Escaper} instance is required to be stateless, and safe when used concurrently by
30  * multiple threads.
31  *
32  * <p>The two primary implementations of this interface are {@link CharEscaper} and {@link
33  * UnicodeEscaper}. They are heavily optimized for performance and greatly simplify the task of
34  * implementing new escapers. It is strongly recommended that when implementing a new escaper you
35  * extend one of these classes. If you find that you are unable to achieve the desired behavior
36  * using either of these classes, please contact the Java libraries team for advice.
37  *
38  * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
39  * your own escapers, use {@link CharEscaperBuilder}, or extend {@link CharEscaper} or {@code
40  * UnicodeEscaper}.
41  *
42  * @author dbeaumont@google.com (David Beaumont)
43  */
44 public abstract class Escaper {
45   /**
46    * Returns the escaped form of a given literal string.
47    *
48    * <p>Note that this method may treat input characters differently depending on the specific
49    * escaper implementation.
50    *
51    * <ul>
52    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
53    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
54    *     should throw {@link IllegalArgumentException}.
55    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
56    *     for well formed characters. A CharEscaper should not be used in situations where input is
57    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
58    * </ul>
59    *
60    * @param string the literal string to be escaped
61    * @return the escaped form of {@code string}
62    * @throws NullPointerException if {@code string} is null
63    * @throws IllegalArgumentException if {@code string} contains badly formed UTF-16 or cannot be
64    *         escaped for any other reason
65    */
escape(String string)66   public abstract String escape(String string);
67 
68   /**
69    * Returns an {@code Appendable} instance which automatically escapes all text appended to it
70    * before passing the resulting text to an underlying {@code Appendable}.
71    *
72    * <p>Note that the Appendable returned by this method may treat input characters differently
73    * depending on the specific escaper implementation.
74    *
75    * <ul>
76    * <li>{@link UnicodeEscaper} handles <a href="http://en.wikipedia.org/wiki/UTF-16">UTF-16</a>
77    *     correctly, including surrogate character pairs. If the input is badly formed the escaper
78    *     should throw {@link IllegalArgumentException}.
79    * <li>{@link CharEscaper} handles Java characters independently and does not verify the input
80    *     for well formed characters. A CharEscaper should not be used in situations where input is
81    *     not guaranteed to be restricted to the Basic Multilingual Plane (BMP).
82    * </ul>
83    *
84    * <p>In all implementations the escaped Appendable should throw {@code NullPointerException} if
85    * given a {@code null} {@link CharSequence}.
86    *
87    * @param out the underlying {@code Appendable} to append escaped output to
88    * @return an {@code Appendable} which passes text to {@code out} after escaping it
89    */
escape(Appendable out)90   public abstract Appendable escape(Appendable out);
91 
92   private final Function<String, String> asFunction =
93       new Function<String, String>() {
94         public String apply(String from) {
95           return escape(from);
96         }
97       };
98 
99   /**
100    * Returns a {@link Function} that invokes {@link #escape(String)} on this escaper.
101    */
asFunction()102   public Function<String, String> asFunction() {
103     return asFunction;
104   }
105 }