• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 /** Provide text format escaping support for proto2 instances. */
34 final class TextFormatEscaper {
TextFormatEscaper()35   private TextFormatEscaper() {}
36 
37   private interface ByteSequence {
size()38     int size();
39 
byteAt(int offset)40     byte byteAt(int offset);
41   }
42 
43   /**
44    * Escapes bytes in the format used in protocol buffer text format, which is the same as the
45    * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are
46    * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which
47    * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences.
48    */
escapeBytes(final ByteSequence input)49   static String escapeBytes(final ByteSequence input) {
50     final StringBuilder builder = new StringBuilder(input.size());
51     for (int i = 0; i < input.size(); i++) {
52       final byte b = input.byteAt(i);
53       switch (b) {
54           // Java does not recognize \a or \v, apparently.
55         case 0x07:
56           builder.append("\\a");
57           break;
58         case '\b':
59           builder.append("\\b");
60           break;
61         case '\f':
62           builder.append("\\f");
63           break;
64         case '\n':
65           builder.append("\\n");
66           break;
67         case '\r':
68           builder.append("\\r");
69           break;
70         case '\t':
71           builder.append("\\t");
72           break;
73         case 0x0b:
74           builder.append("\\v");
75           break;
76         case '\\':
77           builder.append("\\\\");
78           break;
79         case '\'':
80           builder.append("\\\'");
81           break;
82         case '"':
83           builder.append("\\\"");
84           break;
85         default:
86           // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
87           // printable.  Other byte values must be escaped.
88           if (b >= 0x20 && b <= 0x7e) {
89             builder.append((char) b);
90           } else {
91             builder.append('\\');
92             builder.append((char) ('0' + ((b >>> 6) & 3)));
93             builder.append((char) ('0' + ((b >>> 3) & 7)));
94             builder.append((char) ('0' + (b & 7)));
95           }
96           break;
97       }
98     }
99     return builder.toString();
100   }
101 
102   /**
103    * Escapes bytes in the format used in protocol buffer text format, which is the same as the
104    * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are
105    * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which
106    * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences.
107    */
escapeBytes(final ByteString input)108   static String escapeBytes(final ByteString input) {
109     return escapeBytes(
110         new ByteSequence() {
111           @Override
112           public int size() {
113             return input.size();
114           }
115 
116           @Override
117           public byte byteAt(int offset) {
118             return input.byteAt(offset);
119           }
120         });
121   }
122 
123   /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */
124   static String escapeBytes(final byte[] input) {
125     return escapeBytes(
126         new ByteSequence() {
127           @Override
128           public int size() {
129             return input.length;
130           }
131 
132           @Override
133           public byte byteAt(int offset) {
134             return input[offset];
135           }
136         });
137   }
138 
139   /**
140    * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are
141    * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes,
142    * it's weird.
143    */
144   static String escapeText(final String input) {
145     return escapeBytes(ByteString.copyFromUtf8(input));
146   }
147 
148   /** Escape double quotes and backslashes in a String for unicode output of a message. */
149   static String escapeDoubleQuotesAndBackslashes(final String input) {
150     return input.replace("\\", "\\\\").replace("\"", "\\\"");
151   }
152 }
153