1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Unicode_test"
18 #include <utils/Log.h>
19 #include <utils/Unicode.h>
20 
21 #include <gtest/gtest.h>
22 
23 namespace android {
24 
25 class UnicodeTest : public testing::Test {
26 protected:
SetUp()27     virtual void SetUp() {
28     }
29 
TearDown()30     virtual void TearDown() {
31     }
32 
33     char16_t const * const kSearchString = u"I am a leaf on the wind.";
34 };
35 
TEST_F(UnicodeTest,UTF8toUTF16ZeroLength)36 TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) {
37     ssize_t measured;
38 
39     const uint8_t str[] = { };
40 
41     measured = utf8_to_utf16_length(str, 0);
42     EXPECT_EQ(0, measured)
43             << "Zero length input should return zero length output.";
44 }
45 
TEST_F(UnicodeTest,UTF8toUTF16ASCIILength)46 TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) {
47     ssize_t measured;
48 
49     // U+0030 or ASCII '0'
50     const uint8_t str[] = { 0x30 };
51 
52     measured = utf8_to_utf16_length(str, sizeof(str));
53     EXPECT_EQ(1, measured)
54             << "ASCII glyphs should have a length of 1 char16_t";
55 }
56 
TEST_F(UnicodeTest,UTF8toUTF16Plane1Length)57 TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) {
58     ssize_t measured;
59 
60     // U+2323 SMILE
61     const uint8_t str[] = { 0xE2, 0x8C, 0xA3 };
62 
63     measured = utf8_to_utf16_length(str, sizeof(str));
64     EXPECT_EQ(1, measured)
65             << "Plane 1 glyphs should have a length of 1 char16_t";
66 }
67 
TEST_F(UnicodeTest,UTF8toUTF16SurrogateLength)68 TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) {
69     ssize_t measured;
70 
71     // U+10000
72     const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 };
73 
74     measured = utf8_to_utf16_length(str, sizeof(str));
75     EXPECT_EQ(2, measured)
76             << "Surrogate pairs should have a length of 2 char16_t";
77 }
78 
TEST_F(UnicodeTest,UTF8toUTF16TruncatedUTF8)79 TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) {
80     ssize_t measured;
81 
82     // Truncated U+2323 SMILE
83     // U+2323 SMILE
84     const uint8_t str[] = { 0xE2, 0x8C };
85 
86     measured = utf8_to_utf16_length(str, sizeof(str));
87     EXPECT_EQ(-1, measured)
88             << "Truncated UTF-8 should return -1 to indicate invalid";
89 }
90 
TEST_F(UnicodeTest,UTF8toUTF16Normal)91 TEST_F(UnicodeTest, UTF8toUTF16Normal) {
92     const uint8_t str[] = {
93         0x30, // U+0030, 1 UTF-16 character
94         0xC4, 0x80, // U+0100, 1 UTF-16 character
95         0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character
96         0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character
97     };
98 
99     char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL
100 
101     utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0]));
102 
103     EXPECT_EQ(0x0030, output[0])
104             << "should be U+0030";
105     EXPECT_EQ(0x0100, output[1])
106             << "should be U+0100";
107     EXPECT_EQ(0x2323, output[2])
108             << "should be U+2323";
109     EXPECT_EQ(0xD800, output[3])
110             << "should be first half of surrogate U+10000";
111     EXPECT_EQ(0xDC00, output[4])
112             << "should be second half of surrogate U+10000";
113     EXPECT_EQ(NULL, output[5])
114             << "should be NULL terminated";
115 }
116 
TEST_F(UnicodeTest,strstr16EmptyTarget)117 TEST_F(UnicodeTest, strstr16EmptyTarget) {
118     EXPECT_EQ(strstr16(kSearchString, u""), kSearchString)
119             << "should return the original pointer";
120 }
121 
TEST_F(UnicodeTest,strstr16SameString)122 TEST_F(UnicodeTest, strstr16SameString) {
123     const char16_t* result = strstr16(kSearchString, kSearchString);
124     EXPECT_EQ(kSearchString, result)
125             << "should return the original pointer";
126 }
127 
TEST_F(UnicodeTest,strstr16TargetStartOfString)128 TEST_F(UnicodeTest, strstr16TargetStartOfString) {
129     const char16_t* result = strstr16(kSearchString, u"I am");
130     EXPECT_EQ(kSearchString, result)
131             << "should return the original pointer";
132 }
133 
134 
TEST_F(UnicodeTest,strstr16TargetEndOfString)135 TEST_F(UnicodeTest, strstr16TargetEndOfString) {
136     const char16_t* result = strstr16(kSearchString, u"wind.");
137     EXPECT_EQ(kSearchString+19, result);
138 }
139 
TEST_F(UnicodeTest,strstr16TargetWithinString)140 TEST_F(UnicodeTest, strstr16TargetWithinString) {
141     const char16_t* result = strstr16(kSearchString, u"leaf");
142     EXPECT_EQ(kSearchString+7, result);
143 }
144 
TEST_F(UnicodeTest,strstr16TargetNotPresent)145 TEST_F(UnicodeTest, strstr16TargetNotPresent) {
146     const char16_t* result = strstr16(kSearchString, u"soar");
147     EXPECT_EQ(nullptr, result);
148 }
149 
150 // http://b/29267949
151 // Test that overreading in utf8_to_utf16_length is detected
TEST_F(UnicodeTest,InvalidUtf8OverreadDetected)152 TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) {
153     // An utf8 char starting with \xc4 is two bytes long.
154     // Add extra zeros so no extra memory is read in case the code doesn't
155     // work as expected.
156     static char utf8[] = "\xc4\x00\x00\x00";
157     ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8),
158             true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */);
159 }
160 
161 }
162