1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import com.google.protobuf.IsValidUtf8TestUtil.Shard;
34 
35 import junit.framework.TestCase;
36 
37 import java.io.UnsupportedEncodingException;
38 
39 /**
40  * Tests cases for {@link ByteString#isValidUtf8()}. This includes three
41  * brute force tests that actually test every permutation of one byte, two byte,
42  * and three byte sequences to ensure that the method produces the right result
43  * for every possible byte encoding where "right" means it's consistent with
44  * java's UTF-8 string encoding/decoding such that the method returns true for
45  * any sequence that will round trip when converted to a String and then back to
46  * bytes and will return false for any sequence that will not round trip.
47  * See also {@link IsValidUtf8FourByteTest}. It also includes some
48  * other more targeted tests.
49  *
50  * @author jonp@google.com (Jon Perlow)
51  * @author martinrb@google.com (Martin Buchholz)
52  */
53 public class IsValidUtf8Test extends TestCase {
54 
55   /**
56    * Tests that round tripping of all two byte permutations work.
57    */
testIsValidUtf8_1Byte()58   public void testIsValidUtf8_1Byte() throws UnsupportedEncodingException {
59     IsValidUtf8TestUtil.testBytes(1,
60         IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
61   }
62 
63   /**
64    * Tests that round tripping of all two byte permutations work.
65    */
testIsValidUtf8_2Bytes()66   public void testIsValidUtf8_2Bytes() throws UnsupportedEncodingException {
67     IsValidUtf8TestUtil.testBytes(2,
68         IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
69   }
70 
71   /**
72    * Tests that round tripping of all three byte permutations work.
73    */
testIsValidUtf8_3Bytes()74   public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException {
75     IsValidUtf8TestUtil.testBytes(3,
76         IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
77   }
78 
79   /**
80    * Tests that round tripping of a sample of four byte permutations work.
81    * All permutations are prohibitively expensive to test for automated runs;
82    * {@link IsValidUtf8FourByteTest} is used for full coverage. This method
83    * tests specific four-byte cases.
84    */
testIsValidUtf8_4BytesSamples()85   public void testIsValidUtf8_4BytesSamples()
86       throws UnsupportedEncodingException {
87     // Valid 4 byte.
88     assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2);
89 
90     // Bad trailing bytes
91     assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F);
92     assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0);
93 
94     // Special cases for byte2
95     assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2);
96     assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2);
97   }
98 
99   /**
100    * Tests some hard-coded test cases.
101    */
testSomeSequences()102   public void testSomeSequences() {
103     // Empty
104     assertTrue(asBytes("").isValidUtf8());
105 
106     // One-byte characters, including control characters
107     assertTrue(asBytes("\u0000abc\u007f").isValidUtf8());
108 
109     // Two-byte characters
110     assertTrue(asBytes("\u00a2\u00a2").isValidUtf8());
111 
112     // Three-byte characters
113     assertTrue(asBytes("\u020ac\u020ac").isValidUtf8());
114 
115     // Four-byte characters
116     assertTrue(asBytes("\u024B62\u024B62").isValidUtf8());
117 
118     // Mixed string
119     assertTrue(
120         asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62")
121         .isValidUtf8());
122 
123     // Not a valid string
124     assertInvalidUtf8(-1, 0, -1, 0);
125   }
126 
toByteArray(int... bytes)127   private byte[] toByteArray(int... bytes) {
128     byte[] realBytes = new byte[bytes.length];
129     for (int i = 0; i < bytes.length; i++) {
130       realBytes[i] = (byte) bytes[i];
131     }
132     return realBytes;
133   }
134 
toByteString(int... bytes)135   private ByteString toByteString(int... bytes) {
136     return ByteString.copyFrom(toByteArray(bytes));
137   }
138 
assertValidUtf8(int[] bytes, boolean not)139   private void assertValidUtf8(int[] bytes, boolean not) {
140     byte[] realBytes = toByteArray(bytes);
141     assertTrue(not ^ Utf8.isValidUtf8(realBytes));
142     assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length));
143     ByteString lit = ByteString.copyFrom(realBytes);
144     ByteString sub = lit.substring(0, bytes.length);
145     assertTrue(not ^ lit.isValidUtf8());
146     assertTrue(not ^ sub.isValidUtf8());
147     ByteString[] ropes = {
148       RopeByteString.newInstanceForTest(ByteString.EMPTY, lit),
149       RopeByteString.newInstanceForTest(ByteString.EMPTY, sub),
150       RopeByteString.newInstanceForTest(lit, ByteString.EMPTY),
151       RopeByteString.newInstanceForTest(sub, ByteString.EMPTY),
152       RopeByteString.newInstanceForTest(sub, lit)
153     };
154     for (ByteString rope : ropes) {
155       assertTrue(not ^ rope.isValidUtf8());
156     }
157   }
158 
assertValidUtf8(int... bytes)159   private void assertValidUtf8(int... bytes) {
160     assertValidUtf8(bytes, false);
161   }
162 
assertInvalidUtf8(int... bytes)163   private void assertInvalidUtf8(int... bytes) {
164     assertValidUtf8(bytes, true);
165   }
166 
asBytes(String s)167   private static ByteString asBytes(String s) {
168     return ByteString.copyFromUtf8(s);
169   }
170 
testShardsHaveExpectedRoundTrippables()171   public void testShardsHaveExpectedRoundTrippables() {
172     // A sanity check.
173     int actual = 0;
174     for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) {
175       actual += shard.expected;
176     }
177     assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT,
178         actual);
179   }
180 }
181