1 /**
2  * Copyright (c) 2006, Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.android.mail.common.base;
18 
19 import static com.google.android.mail.common.base.Preconditions.checkNotNull;
20 
21 import java.io.IOException;
22 
23 /**
24  * An object that converts literal text into a format safe for inclusion in a particular context
25  * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
26  * text is performed automatically by the relevant parser.
27  *
28  * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
29  * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
30  * resulting XML document is parsed, the parser API will return this text as the original literal
31  * string {@code "Foo<Bar>"}.
32  *
33  * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
34  * multiple threads.
35  *
36  * <p>Several popular escapers are defined as constants in the class {@link CharEscapers}. To create
37  * your own escapers, use {@link CharEscaperBuilder}, or extend this class and implement the {@link
38  * #escape(char)} method.
39  *
40  * @author sven@google.com (Sven Mawson)
41  */
42 public abstract class CharEscaper extends Escaper {
43   /**
44    * Returns the escaped form of a given literal string.
45    *
46    * @param string the literal string to be escaped
47    * @return the escaped form of {@code string}
48    * @throws NullPointerException if {@code string} is null
49    */
escape(String string)50   @Override public String escape(String string) {
51     checkNotNull(string);
52     // Inlineable fast-path loop which hands off to escapeSlow() only if needed
53     int length = string.length();
54     for (int index = 0; index < length; index++) {
55       if (escape(string.charAt(index)) != null) {
56         return escapeSlow(string, index);
57       }
58     }
59     return string;
60   }
61 
62   /**
63    * Returns an {@code Appendable} instance which automatically escapes all text appended to it
64    * before passing the resulting text to an underlying {@code Appendable}.
65    *
66    * <p>The methods of the returned object will propagate any exceptions thrown by the underlying
67    * {@code Appendable}, and will throw {@link NullPointerException} if asked to append {@code
68    * null}, but do not otherwise throw any exceptions.
69    *
70    * <p>The escaping behavior is identical to that of {@link #escape(String)}, so the following code
71    * is always equivalent to {@code escaper.escape(string)}: <pre>   {@code
72    *
73    *   StringBuilder sb = new StringBuilder();
74    *   escaper.escape(sb).append(string);
75    *   return sb.toString();}</pre>
76    *
77    * @param out the underlying {@code Appendable} to append escaped output to
78    * @return an {@code Appendable} which passes text to {@code out} after escaping it
79    * @throws NullPointerException if {@code out} is null.
80    */
escape(final Appendable out)81   @Override public Appendable escape(final Appendable out) {
82     checkNotNull(out);
83 
84     return new Appendable() {
85       @Override public Appendable append(CharSequence csq) throws IOException {
86         out.append(escape(csq.toString()));
87         return this;
88       }
89 
90       @Override public Appendable append(CharSequence csq, int start, int end) throws IOException {
91         out.append(escape(csq.subSequence(start, end).toString()));
92         return this;
93       }
94 
95       @Override public Appendable append(char c) throws IOException {
96         char[] escaped = escape(c);
97         if (escaped == null) {
98           out.append(c);
99         } else {
100           for (char e : escaped) {
101             out.append(e);
102           }
103         }
104         return this;
105       }
106     };
107   }
108 
109   /**
110    * Returns the escaped form of a given literal string, starting at the given index. This method is
111    * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
112    * protected to allow subclasses to override the fastpath escaping function to inline their
113    * escaping test. See {@link CharEscaperBuilder} for an example usage.
114    *
115    * @param s the literal string to be escaped
116    * @param index the index to start escaping from
117    * @return the escaped form of {@code string}
118    * @throws NullPointerException if {@code string} is null
119    */
120   protected String escapeSlow(String s, int index) {
121     int slen = s.length();
122 
123     // Get a destination buffer and setup some loop variables.
124     char[] dest = Platform.charBufferFromThreadLocal();
125     int destSize = dest.length;
126     int destIndex = 0;
127     int lastEscape = 0;
128 
129     // Loop through the rest of the string, replacing when needed into the
130     // destination buffer, which gets grown as needed as well.
131     for (; index < slen; index++) {
132 
133       // Get a replacement for the current character.
134       char[] r = escape(s.charAt(index));
135 
136       // If no replacement is needed, just continue.
137       if (r == null) continue;
138 
139       int rlen = r.length;
140       int charsSkipped = index - lastEscape;
141 
142       // This is the size needed to add the replacement, not the full size needed by the string. We
143       // only regrow when we absolutely must.
144       int sizeNeeded = destIndex + charsSkipped + rlen;
145       if (destSize < sizeNeeded) {
146         destSize = sizeNeeded + (slen - index) + DEST_PAD;
147         dest = growBuffer(dest, destIndex, destSize);
148       }
149 
150       // If we have skipped any characters, we need to copy them now.
151       if (charsSkipped > 0) {
152         s.getChars(lastEscape, index, dest, destIndex);
153         destIndex += charsSkipped;
154       }
155 
156       // Copy the replacement string into the dest buffer as needed.
157       if (rlen > 0) {
158         System.arraycopy(r, 0, dest, destIndex, rlen);
159         destIndex += rlen;
160       }
161       lastEscape = index + 1;
162     }
163 
164     // Copy leftover characters if there are any.
165     int charsLeft = slen - lastEscape;
166     if (charsLeft > 0) {
167       int sizeNeeded = destIndex + charsLeft;
168       if (destSize < sizeNeeded) {
169 
170         // Regrow and copy, expensive! No padding as this is the final copy.
171         dest = growBuffer(dest, destIndex, sizeNeeded);
172       }
173       s.getChars(lastEscape, slen, dest, destIndex);
174       destIndex = sizeNeeded;
175     }
176     return new String(dest, 0, destIndex);
177   }
178 
179   /**
180    * Returns the escaped form of the given character, or {@code null} if this character does not
181    * need to be escaped. If an empty array is returned, this effectively strips the input character
182    * from the resulting text.
183    *
184    * <p>If the character does not need to be escaped, this method should return {@code null}, rather
185    * than a one-character array containing the character itself. This enables the escaping algorithm
186    * to perform more efficiently.
187    *
188    * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
189    * not throw any exceptions.
190    *
191    * @param c the character to escape if necessary
192    * @return the replacement characters, or {@code null} if no escaping was needed
193    */
194   protected abstract char[] escape(char c);
195 
196   /**
197    * Helper method to grow the character buffer as needed, this only happens once in a while so it's
198    * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
199    */
200   private static char[] growBuffer(char[] dest, int index, int size) {
201     char[] copy = new char[size];
202     if (index > 0) {
203       System.arraycopy(dest, 0, copy, 0, index);
204     }
205     return copy;
206   }
207 
208   /**
209    * The amount of padding to use when growing the escape buffer.
210    */
211   private static final int DEST_PAD = 32;
212 }