1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Some UTF character seqeuences in this file were taken from
6 // https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
7
8 #include <gtest/gtest.h>
9 #include <stdio.h>
10
11 extern "C" {
12 #include "cras_utf8.h"
13 }
14
15 namespace {
16
TEST(UTF8,ValidStress)17 TEST(UTF8, ValidStress) {
18 size_t pos;
19
20 EXPECT_EQ(1, valid_utf8_string("The greek word 'kosme': "
21 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce"
22 "\xbc\xce\xb5",
23 &pos));
24 EXPECT_EQ(35, pos);
25
26 EXPECT_EQ(1, valid_utf8_string("Playback", &pos));
27 EXPECT_EQ(8, pos);
28
29 EXPECT_EQ(1, valid_utf8_string("The Euro sign: \xe2\x82\xac", &pos));
30 EXPECT_EQ(18, pos);
31
32 /* First possible sequence of a certain length. */
33 EXPECT_EQ(1, valid_utf8_string("\x01", &pos));
34 EXPECT_EQ(1, pos);
35 EXPECT_EQ(1, valid_utf8_string("\xc2\x80", &pos));
36 EXPECT_EQ(2, pos);
37 EXPECT_EQ(1, valid_utf8_string("\xe0\xa0\x80", &pos));
38 EXPECT_EQ(3, pos);
39 EXPECT_EQ(1, valid_utf8_string("\xe1\x80\x80", &pos));
40 EXPECT_EQ(3, pos);
41 EXPECT_EQ(1, valid_utf8_string("\xf0\x90\x80\x80", &pos));
42 EXPECT_EQ(4, pos);
43 EXPECT_EQ(1, valid_utf8_string("\xf1\x80\x80\x80", &pos));
44 EXPECT_EQ(4, pos);
45
46 /* Last possible sequence of a certain length. */
47 EXPECT_EQ(1, valid_utf8_string("\x7f", &pos));
48 EXPECT_EQ(1, pos);
49 EXPECT_EQ(1, valid_utf8_string("\xdf\xbf", &pos));
50 EXPECT_EQ(2, pos);
51 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbf", &pos));
52 EXPECT_EQ(3, pos);
53 EXPECT_EQ(1, valid_utf8_string("\xf4\x8f\xbf\xbf", &pos));
54 EXPECT_EQ(4, pos);
55
56 /* Other boundary conditions. */
57 EXPECT_EQ(1, valid_utf8_string("\xed\x9f\xbf", &pos));
58 EXPECT_EQ(3, pos);
59 EXPECT_EQ(1, valid_utf8_string("\xee\x80\x80", &pos));
60 EXPECT_EQ(3, pos);
61 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbd", &pos));
62 EXPECT_EQ(3, pos);
63 EXPECT_EQ(1, valid_utf8_string("\xf0\xbf\xbf\xbf", &pos));
64 EXPECT_EQ(4, pos);
65
66 /* BOM sequence. */
67 EXPECT_EQ(1, valid_utf8_string("\xef\xbb\xbf", &pos));
68 EXPECT_EQ(3, pos);
69
70 /* Valid UTF-8 that shouldn't appear in text; chose to allow
71 * these characters anyway. */
72 EXPECT_EQ(1, valid_utf8_string("U+FFFE: \xef\xbf\xbe", &pos));
73 EXPECT_EQ(11, pos);
74 EXPECT_EQ(1, valid_utf8_string("U+FDD0: \xef\xb7\x90", &pos));
75 EXPECT_EQ(11, pos);
76 EXPECT_EQ(1, valid_utf8_string("\xf0\x9f\xbf\xbe", &pos));
77 EXPECT_EQ(4, pos);
78 }
79
TEST(UTF8,InvalidStress)80 TEST(UTF8, InvalidStress) {
81 size_t pos;
82
83 /* Malformed continuation bytes. */
84 EXPECT_EQ(0, valid_utf8_string("\x80", &pos));
85 EXPECT_EQ(0, pos);
86 EXPECT_EQ(0, valid_utf8_string("\xbf", &pos));
87 EXPECT_EQ(0, pos);
88 EXPECT_EQ(0, valid_utf8_string("\x80\xbf", &pos));
89 EXPECT_EQ(0, pos);
90 EXPECT_EQ(0, valid_utf8_string("\xc2\x80\xbf", &pos));
91 EXPECT_EQ(2, pos);
92
93 /* Lonely start characters. */
94 EXPECT_EQ(0, valid_utf8_string("\xc2 \xc3 \xc4 ", &pos));
95 EXPECT_EQ(1, pos);
96
97 /* Out of range cases. */
98 EXPECT_EQ(0, valid_utf8_string("\xf4\x90\xbf\xbf", &pos));
99 EXPECT_EQ(1, pos);
100 EXPECT_EQ(0, valid_utf8_string(" \xf5\x80", &pos));
101 EXPECT_EQ(1, pos);
102 EXPECT_EQ(0, valid_utf8_string(" \xe0\x80\x80", &pos));
103 EXPECT_EQ(2, pos);
104 EXPECT_EQ(0, valid_utf8_string("\xf4\x80\x80\xcf", &pos));
105 EXPECT_EQ(3, pos);
106
107 /* Stop in mid-sequence. */
108 EXPECT_EQ(0, valid_utf8_string("\xf4\x80", &pos));
109 EXPECT_EQ(2, pos);
110
111 /* Bad characters. */
112 EXPECT_EQ(0, valid_utf8_string("\xff", &pos));
113 EXPECT_EQ(0, pos);
114 EXPECT_EQ(0, valid_utf8_string("\xfe", &pos));
115 EXPECT_EQ(0, pos);
116
117 /* Overlong representations of ASCII characters. */
118 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
119 "many bytes: \xe0\x80\xaf",
120 &pos));
121 EXPECT_EQ(53, pos);
122 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
123 "many bytes: \xf0\x80\x80\xaf",
124 &pos));
125 EXPECT_EQ(53, pos);
126
127 /* Should not be interpreted as the ASCII NUL character. */
128 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
129 "many bytes: \xe0\x80\x80",
130 &pos));
131 EXPECT_EQ(55, pos);
132 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
133 "many bytes: \xf0\x80\x80\x80",
134 &pos));
135 EXPECT_EQ(55, pos);
136
137 /* Single UTF-16 surrogates. */
138 EXPECT_EQ(0, valid_utf8_string("\xed\xa0\x80", &pos));
139 EXPECT_EQ(1, pos);
140 EXPECT_EQ(0, valid_utf8_string("\xed\xad\xbf", &pos));
141 EXPECT_EQ(1, pos);
142 EXPECT_EQ(0, valid_utf8_string("\xed\xae\x80", &pos));
143 EXPECT_EQ(1, pos);
144 EXPECT_EQ(0, valid_utf8_string("\xed\xaf\xbf", &pos));
145 EXPECT_EQ(1, pos);
146 EXPECT_EQ(0, valid_utf8_string("\xed\xb0\x80", &pos));
147 EXPECT_EQ(1, pos);
148 EXPECT_EQ(0, valid_utf8_string("\xed\xbe\x80", &pos));
149 EXPECT_EQ(1, pos);
150 EXPECT_EQ(0, valid_utf8_string("\xed\xbf\xbf", &pos));
151 EXPECT_EQ(1, pos);
152 }
153
154 } // namespace
155
main(int argc,char ** argv)156 int main(int argc, char** argv) {
157 ::testing::InitGoogleTest(&argc, argv);
158 return RUN_ALL_TESTS();
159 }
160