1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import com.google.protobuf.IsValidUtf8TestUtil.Shard; 34 35 import junit.framework.TestCase; 36 37 import java.io.UnsupportedEncodingException; 38 39 /** 40 * Tests cases for {@link ByteString#isValidUtf8()}. This includes three 41 * brute force tests that actually test every permutation of one byte, two byte, 42 * and three byte sequences to ensure that the method produces the right result 43 * for every possible byte encoding where "right" means it's consistent with 44 * java's UTF-8 string encoding/decoding such that the method returns true for 45 * any sequence that will round trip when converted to a String and then back to 46 * bytes and will return false for any sequence that will not round trip. 47 * See also {@link IsValidUtf8FourByteTest}. It also includes some 48 * other more targeted tests. 49 * 50 * @author jonp@google.com (Jon Perlow) 51 * @author martinrb@google.com (Martin Buchholz) 52 */ 53 public class IsValidUtf8Test extends TestCase { 54 55 /** 56 * Tests that round tripping of all two byte permutations work. 57 */ testIsValidUtf8_1Byte()58 public void testIsValidUtf8_1Byte() throws UnsupportedEncodingException { 59 IsValidUtf8TestUtil.testBytes(1, 60 IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 61 } 62 63 /** 64 * Tests that round tripping of all two byte permutations work. 65 */ testIsValidUtf8_2Bytes()66 public void testIsValidUtf8_2Bytes() throws UnsupportedEncodingException { 67 IsValidUtf8TestUtil.testBytes(2, 68 IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 69 } 70 71 /** 72 * Tests that round tripping of all three byte permutations work. 73 */ testIsValidUtf8_3Bytes()74 public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException { 75 IsValidUtf8TestUtil.testBytes(3, 76 IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 77 } 78 79 /** 80 * Tests that round tripping of a sample of four byte permutations work. 81 * All permutations are prohibitively expensive to test for automated runs; 82 * {@link IsValidUtf8FourByteTest} is used for full coverage. This method 83 * tests specific four-byte cases. 84 */ testIsValidUtf8_4BytesSamples()85 public void testIsValidUtf8_4BytesSamples() 86 throws UnsupportedEncodingException { 87 // Valid 4 byte. 88 assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2); 89 90 // Bad trailing bytes 91 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F); 92 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0); 93 94 // Special cases for byte2 95 assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2); 96 assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2); 97 } 98 99 /** 100 * Tests some hard-coded test cases. 101 */ testSomeSequences()102 public void testSomeSequences() { 103 // Empty 104 assertTrue(asBytes("").isValidUtf8()); 105 106 // One-byte characters, including control characters 107 assertTrue(asBytes("\u0000abc\u007f").isValidUtf8()); 108 109 // Two-byte characters 110 assertTrue(asBytes("\u00a2\u00a2").isValidUtf8()); 111 112 // Three-byte characters 113 assertTrue(asBytes("\u020ac\u020ac").isValidUtf8()); 114 115 // Four-byte characters 116 assertTrue(asBytes("\u024B62\u024B62").isValidUtf8()); 117 118 // Mixed string 119 assertTrue( 120 asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62") 121 .isValidUtf8()); 122 123 // Not a valid string 124 assertInvalidUtf8(-1, 0, -1, 0); 125 } 126 toByteArray(int... bytes)127 private byte[] toByteArray(int... bytes) { 128 byte[] realBytes = new byte[bytes.length]; 129 for (int i = 0; i < bytes.length; i++) { 130 realBytes[i] = (byte) bytes[i]; 131 } 132 return realBytes; 133 } 134 toByteString(int... bytes)135 private ByteString toByteString(int... bytes) { 136 return ByteString.copyFrom(toByteArray(bytes)); 137 } 138 assertValidUtf8(int[] bytes, boolean not)139 private void assertValidUtf8(int[] bytes, boolean not) { 140 byte[] realBytes = toByteArray(bytes); 141 assertTrue(not ^ Utf8.isValidUtf8(realBytes)); 142 assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length)); 143 ByteString lit = ByteString.copyFrom(realBytes); 144 ByteString sub = lit.substring(0, bytes.length); 145 assertTrue(not ^ lit.isValidUtf8()); 146 assertTrue(not ^ sub.isValidUtf8()); 147 ByteString[] ropes = { 148 RopeByteString.newInstanceForTest(ByteString.EMPTY, lit), 149 RopeByteString.newInstanceForTest(ByteString.EMPTY, sub), 150 RopeByteString.newInstanceForTest(lit, ByteString.EMPTY), 151 RopeByteString.newInstanceForTest(sub, ByteString.EMPTY), 152 RopeByteString.newInstanceForTest(sub, lit) 153 }; 154 for (ByteString rope : ropes) { 155 assertTrue(not ^ rope.isValidUtf8()); 156 } 157 } 158 assertValidUtf8(int... bytes)159 private void assertValidUtf8(int... bytes) { 160 assertValidUtf8(bytes, false); 161 } 162 assertInvalidUtf8(int... bytes)163 private void assertInvalidUtf8(int... bytes) { 164 assertValidUtf8(bytes, true); 165 } 166 asBytes(String s)167 private static ByteString asBytes(String s) { 168 return ByteString.copyFromUtf8(s); 169 } 170 testShardsHaveExpectedRoundTrippables()171 public void testShardsHaveExpectedRoundTrippables() { 172 // A sanity check. 173 int actual = 0; 174 for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) { 175 actual += shard.expected; 176 } 177 assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, 178 actual); 179 } 180 } 181