1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 /**
34  * Provide text format escaping support for proto2 instances.
35  */
36 final class TextFormatEscaper {
TextFormatEscaper()37   private TextFormatEscaper() {}
38 
39   private interface ByteSequence {
size()40     int size();
byteAt(int offset)41     byte byteAt(int offset);
42   }
43 
44   /**
45    * Escapes bytes in the format used in protocol buffer text format, which
46    * is the same as the format used for C string literals.  All bytes
47    * that are not printable 7-bit ASCII characters are escaped, as well as
48    * backslash, single-quote, and double-quote characters.  Characters for
49    * which no defined short-hand escape sequence is defined will be escaped
50    * using 3-digit octal sequences.
51    */
escapeBytes(final ByteSequence input)52   static String escapeBytes(final ByteSequence input) {
53     final StringBuilder builder = new StringBuilder(input.size());
54     for (int i = 0; i < input.size(); i++) {
55       final byte b = input.byteAt(i);
56       switch (b) {
57         // Java does not recognize \a or \v, apparently.
58         case 0x07: builder.append("\\a"); break;
59         case '\b': builder.append("\\b"); break;
60         case '\f': builder.append("\\f"); break;
61         case '\n': builder.append("\\n"); break;
62         case '\r': builder.append("\\r"); break;
63         case '\t': builder.append("\\t"); break;
64         case 0x0b: builder.append("\\v"); break;
65         case '\\': builder.append("\\\\"); break;
66         case '\'': builder.append("\\\'"); break;
67         case '"' : builder.append("\\\""); break;
68         default:
69           // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
70           // printable.  Other byte values must be escaped.
71           if (b >= 0x20 && b <= 0x7e) {
72             builder.append((char) b);
73           } else {
74             builder.append('\\');
75             builder.append((char) ('0' + ((b >>> 6) & 3)));
76             builder.append((char) ('0' + ((b >>> 3) & 7)));
77             builder.append((char) ('0' + (b & 7)));
78           }
79           break;
80       }
81     }
82     return builder.toString();
83   }
84 
85   /**
86    * Escapes bytes in the format used in protocol buffer text format, which
87    * is the same as the format used for C string literals.  All bytes
88    * that are not printable 7-bit ASCII characters are escaped, as well as
89    * backslash, single-quote, and double-quote characters.  Characters for
90    * which no defined short-hand escape sequence is defined will be escaped
91    * using 3-digit octal sequences.
92    */
escapeBytes(final ByteString input)93   static String escapeBytes(final ByteString input) {
94     return escapeBytes(new ByteSequence() {
95       @Override
96       public int size() {
97         return input.size();
98       }
99       @Override
100       public byte byteAt(int offset) {
101         return input.byteAt(offset);
102       }
103     });
104   }
105 
106   /**
107    * Like {@link #escapeBytes(ByteString)}, but used for byte array.
108    */
109   static String escapeBytes(final byte[] input) {
110     return escapeBytes(new ByteSequence() {
111       @Override
112       public int size() {
113         return input.length;
114       }
115       @Override
116       public byte byteAt(int offset) {
117         return input[offset];
118       }
119     });
120   }
121 
122   /**
123    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
124    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
125    * individually as a 3-digit octal escape.  Yes, it's weird.
126    */
127   static String escapeText(final String input) {
128     return escapeBytes(ByteString.copyFromUtf8(input));
129   }
130 
131   /**
132    * Escape double quotes and backslashes in a String for unicode output of a message.
133    */
134   static String escapeDoubleQuotesAndBackslashes(final String input) {
135     return input.replace("\\", "\\\\").replace("\"", "\\\"");
136   }
137 }
138