1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Some UTF character seqeuences in this file were taken from
6 // https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
7
8 #include <gtest/gtest.h>
9 #include <stdio.h>
10
11 extern "C" {
12 #include "cras_utf8.h"
13 }
14
15 namespace {
16
TEST(UTF8,ValidStress)17 TEST(UTF8, ValidStress) {
18 size_t pos;
19
20 EXPECT_EQ(1, valid_utf8_string("The greek word 'kosme': "
21 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce"
22 "\xbc\xce\xb5", &pos));
23 EXPECT_EQ(35, pos);
24
25 EXPECT_EQ(1, valid_utf8_string("Playback", &pos));
26 EXPECT_EQ(8, pos);
27
28 EXPECT_EQ(1, valid_utf8_string("The Euro sign: \xe2\x82\xac", &pos));
29 EXPECT_EQ(18, pos);
30
31 /* First possible sequence of a certain length. */
32 EXPECT_EQ(1, valid_utf8_string("\x01", &pos));
33 EXPECT_EQ(1, pos);
34 EXPECT_EQ(1, valid_utf8_string("\xc2\x80", &pos));
35 EXPECT_EQ(2, pos);
36 EXPECT_EQ(1, valid_utf8_string("\xe0\xa0\x80", &pos));
37 EXPECT_EQ(3, pos);
38 EXPECT_EQ(1, valid_utf8_string("\xe1\x80\x80", &pos));
39 EXPECT_EQ(3, pos);
40 EXPECT_EQ(1, valid_utf8_string("\xf0\x90\x80\x80", &pos));
41 EXPECT_EQ(4, pos);
42 EXPECT_EQ(1, valid_utf8_string("\xf1\x80\x80\x80", &pos));
43 EXPECT_EQ(4, pos);
44
45 /* Last possible sequence of a certain length. */
46 EXPECT_EQ(1, valid_utf8_string("\x7f", &pos));
47 EXPECT_EQ(1, pos);
48 EXPECT_EQ(1, valid_utf8_string("\xdf\xbf", &pos));
49 EXPECT_EQ(2, pos);
50 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbf", &pos));
51 EXPECT_EQ(3, pos);
52 EXPECT_EQ(1, valid_utf8_string("\xf4\x8f\xbf\xbf", &pos));
53 EXPECT_EQ(4, pos);
54
55 /* Other boundary conditions. */
56 EXPECT_EQ(1, valid_utf8_string("\xed\x9f\xbf", &pos));
57 EXPECT_EQ(3, pos);
58 EXPECT_EQ(1, valid_utf8_string("\xee\x80\x80", &pos));
59 EXPECT_EQ(3, pos);
60 EXPECT_EQ(1, valid_utf8_string("\xef\xbf\xbd", &pos));
61 EXPECT_EQ(3, pos);
62 EXPECT_EQ(1, valid_utf8_string("\xf0\xbf\xbf\xbf", &pos));
63 EXPECT_EQ(4, pos);
64
65 /* BOM sequence. */
66 EXPECT_EQ(1, valid_utf8_string("\xef\xbb\xbf", &pos));
67 EXPECT_EQ(3, pos);
68
69 /* Valid UTF-8 that shouldn't appear in text; chose to allow
70 * these characters anyway. */
71 EXPECT_EQ(1, valid_utf8_string("U+FFFE: \xef\xbf\xbe", &pos));
72 EXPECT_EQ(11, pos);
73 EXPECT_EQ(1, valid_utf8_string("U+FDD0: \xef\xb7\x90", &pos));
74 EXPECT_EQ(11, pos);
75 EXPECT_EQ(1, valid_utf8_string("\xf0\x9f\xbf\xbe", &pos));
76 EXPECT_EQ(4, pos);
77 }
78
TEST(UTF8,InvalidStress)79 TEST(UTF8, InvalidStress) {
80 size_t pos;
81
82 /* Malformed continuation bytes. */
83 EXPECT_EQ(0, valid_utf8_string("\x80", &pos));
84 EXPECT_EQ(0, pos);
85 EXPECT_EQ(0, valid_utf8_string("\xbf", &pos));
86 EXPECT_EQ(0, pos);
87 EXPECT_EQ(0, valid_utf8_string("\x80\xbf", &pos));
88 EXPECT_EQ(0, pos);
89 EXPECT_EQ(0, valid_utf8_string("\xc2\x80\xbf", &pos));
90 EXPECT_EQ(2, pos);
91
92 /* Lonely start characters. */
93 EXPECT_EQ(0, valid_utf8_string("\xc2 \xc3 \xc4 ", &pos));
94 EXPECT_EQ(1, pos);
95
96 /* Out of range cases. */
97 EXPECT_EQ(0, valid_utf8_string("\xf4\x90\xbf\xbf", &pos));
98 EXPECT_EQ(1, pos);
99 EXPECT_EQ(0, valid_utf8_string(" \xf5\x80", &pos));
100 EXPECT_EQ(1, pos);
101 EXPECT_EQ(0, valid_utf8_string(" \xe0\x80\x80", &pos));
102 EXPECT_EQ(2, pos);
103 EXPECT_EQ(0, valid_utf8_string("\xf4\x80\x80\xcf", &pos));
104 EXPECT_EQ(3, pos);
105
106 /* Stop in mid-sequence. */
107 EXPECT_EQ(0, valid_utf8_string("\xf4\x80", &pos));
108 EXPECT_EQ(2, pos);
109
110 /* Bad characters. */
111 EXPECT_EQ(0, valid_utf8_string("\xff", &pos));
112 EXPECT_EQ(0, pos);
113 EXPECT_EQ(0, valid_utf8_string("\xfe", &pos));
114 EXPECT_EQ(0, pos);
115
116 /* Overlong representations of ASCII characters. */
117 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
118 "many bytes: \xe0\x80\xaf", &pos));
119 EXPECT_EQ(53, pos);
120 EXPECT_EQ(0, valid_utf8_string("This represents the / character with too"
121 "many bytes: \xf0\x80\x80\xaf", &pos));
122 EXPECT_EQ(53, pos);
123
124 /* Should not be interpreted as the ASCII NUL character. */
125 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
126 "many bytes: \xe0\x80\x80", &pos));
127 EXPECT_EQ(55, pos);
128 EXPECT_EQ(0, valid_utf8_string("This represents the NUL character with too"
129 "many bytes: \xf0\x80\x80\x80", &pos));
130 EXPECT_EQ(55, pos);
131
132 /* Single UTF-16 surrogates. */
133 EXPECT_EQ(0, valid_utf8_string("\xed\xa0\x80", &pos));
134 EXPECT_EQ(1, pos);
135 EXPECT_EQ(0, valid_utf8_string("\xed\xad\xbf", &pos));
136 EXPECT_EQ(1, pos);
137 EXPECT_EQ(0, valid_utf8_string("\xed\xae\x80", &pos));
138 EXPECT_EQ(1, pos);
139 EXPECT_EQ(0, valid_utf8_string("\xed\xaf\xbf", &pos));
140 EXPECT_EQ(1, pos);
141 EXPECT_EQ(0, valid_utf8_string("\xed\xb0\x80", &pos));
142 EXPECT_EQ(1, pos);
143 EXPECT_EQ(0, valid_utf8_string("\xed\xbe\x80", &pos));
144 EXPECT_EQ(1, pos);
145 EXPECT_EQ(0, valid_utf8_string("\xed\xbf\xbf", &pos));
146 EXPECT_EQ(1, pos);
147 }
148
149 } // namespace
150
main(int argc,char ** argv)151 int main(int argc, char **argv) {
152 ::testing::InitGoogleTest(&argc, argv);
153 return RUN_ALL_TESTS();
154 }
155