1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import static com.google.protobuf.IsValidUtf8TestUtil.DIRECT_NIO_FACTORY; 34 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT; 35 import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT; 36 import static com.google.protobuf.IsValidUtf8TestUtil.HEAP_NIO_FACTORY; 37 import static com.google.protobuf.IsValidUtf8TestUtil.LITERAL_FACTORY; 38 import static com.google.protobuf.IsValidUtf8TestUtil.testBytes; 39 40 import com.google.protobuf.IsValidUtf8TestUtil.ByteStringFactory; 41 import com.google.protobuf.IsValidUtf8TestUtil.Shard; 42 43 import junit.framework.TestCase; 44 45 /** 46 * Tests cases for {@link ByteString#isValidUtf8()}. This includes three 47 * brute force tests that actually test every permutation of one byte, two byte, 48 * and three byte sequences to ensure that the method produces the right result 49 * for every possible byte encoding where "right" means it's consistent with 50 * java's UTF-8 string encoding/decoding such that the method returns true for 51 * any sequence that will round trip when converted to a String and then back to 52 * bytes and will return false for any sequence that will not round trip. 53 * See also {@link IsValidUtf8FourByteTest}. It also includes some 54 * other more targeted tests. 55 * 56 * @author jonp@google.com (Jon Perlow) 57 * @author martinrb@google.com (Martin Buchholz) 58 */ 59 public class IsValidUtf8Test extends TestCase { 60 /** 61 * Tests that round tripping of all two byte permutations work. 62 */ testIsValidUtf8_1Byte()63 public void testIsValidUtf8_1Byte() { 64 testBytes(LITERAL_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 65 testBytes(HEAP_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 66 testBytes(DIRECT_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT); 67 } 68 69 /** 70 * Tests that round tripping of all two byte permutations work. 71 */ testIsValidUtf8_2Bytes()72 public void testIsValidUtf8_2Bytes() { 73 testBytes(LITERAL_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 74 testBytes(HEAP_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 75 testBytes(DIRECT_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT); 76 } 77 78 /** 79 * Tests that round tripping of all three byte permutations work. 80 */ testIsValidUtf8_3Bytes()81 public void testIsValidUtf8_3Bytes() { 82 // Travis' OOM killer doesn't like this test 83 if (System.getenv("TRAVIS") == null) { 84 testBytes(LITERAL_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 85 testBytes(HEAP_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 86 testBytes(DIRECT_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT); 87 } 88 } 89 90 /** 91 * Tests that round tripping of a sample of four byte permutations work. 92 * All permutations are prohibitively expensive to test for automated runs; 93 * {@link IsValidUtf8FourByteTest} is used for full coverage. This method 94 * tests specific four-byte cases. 95 */ testIsValidUtf8_4BytesSamples()96 public void testIsValidUtf8_4BytesSamples() { 97 // Valid 4 byte. 98 assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2); 99 100 // Bad trailing bytes 101 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F); 102 assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0); 103 104 // Special cases for byte2 105 assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2); 106 assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2); 107 } 108 109 /** 110 * Tests some hard-coded test cases. 111 */ testSomeSequences()112 public void testSomeSequences() { 113 // Empty 114 assertTrue(asBytes("").isValidUtf8()); 115 116 // One-byte characters, including control characters 117 assertTrue(asBytes("\u0000abc\u007f").isValidUtf8()); 118 119 // Two-byte characters 120 assertTrue(asBytes("\u00a2\u00a2").isValidUtf8()); 121 122 // Three-byte characters 123 assertTrue(asBytes("\u020ac\u020ac").isValidUtf8()); 124 125 // Four-byte characters 126 assertTrue(asBytes("\u024B62\u024B62").isValidUtf8()); 127 128 // Mixed string 129 assertTrue(asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62").isValidUtf8()); 130 131 // Not a valid string 132 assertInvalidUtf8(-1, 0, -1, 0); 133 } 134 toByteArray(int... bytes)135 private byte[] toByteArray(int... bytes) { 136 byte[] realBytes = new byte[bytes.length]; 137 for (int i = 0; i < bytes.length; i++) { 138 realBytes[i] = (byte) bytes[i]; 139 } 140 return realBytes; 141 } 142 assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not)143 private void assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not) { 144 byte[] realBytes = toByteArray(bytes); 145 assertTrue(not ^ Utf8.isValidUtf8(realBytes)); 146 assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length)); 147 ByteString leaf = factory.newByteString(realBytes); 148 ByteString sub = leaf.substring(0, bytes.length); 149 assertTrue(not ^ leaf.isValidUtf8()); 150 assertTrue(not ^ sub.isValidUtf8()); 151 ByteString[] ropes = { 152 RopeByteString.newInstanceForTest(ByteString.EMPTY, leaf), 153 RopeByteString.newInstanceForTest(ByteString.EMPTY, sub), 154 RopeByteString.newInstanceForTest(leaf, ByteString.EMPTY), 155 RopeByteString.newInstanceForTest(sub, ByteString.EMPTY), 156 RopeByteString.newInstanceForTest(sub, leaf)}; 157 for (ByteString rope : ropes) { 158 assertTrue(not ^ rope.isValidUtf8()); 159 } 160 } 161 assertValidUtf8(int... bytes)162 private void assertValidUtf8(int... bytes) { 163 assertValidUtf8(LITERAL_FACTORY, bytes, false); 164 assertValidUtf8(HEAP_NIO_FACTORY, bytes, false); 165 assertValidUtf8(DIRECT_NIO_FACTORY, bytes, false); 166 } 167 assertInvalidUtf8(int... bytes)168 private void assertInvalidUtf8(int... bytes) { 169 assertValidUtf8(LITERAL_FACTORY, bytes, true); 170 assertValidUtf8(HEAP_NIO_FACTORY, bytes, true); 171 assertValidUtf8(DIRECT_NIO_FACTORY, bytes, true); 172 } 173 asBytes(String s)174 private static ByteString asBytes(String s) { 175 return ByteString.copyFromUtf8(s); 176 } 177 testShardsHaveExpectedRoundTrippables()178 public void testShardsHaveExpectedRoundTrippables() { 179 // A sanity check. 180 int actual = 0; 181 for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) { 182 actual += shard.expected; 183 } 184 assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual); 185 } 186 } 187