1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <gtest/gtest.h>
18 #include <UnicodeUtils.h>
19
20 #include "LayoutUtils.h"
21
22 namespace {
23
ExpectNextWordBreakForCache(size_t offset_in,const char * query_str)24 void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) {
25 const size_t BUF_SIZE = 256U;
26 uint16_t buf[BUF_SIZE];
27 size_t expected_breakpoint = 0U;
28 size_t size = 0U;
29
30 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
31 EXPECT_EQ(expected_breakpoint,
32 getNextWordBreakForCache(buf, offset_in, size))
33 << "Expected position is [" << query_str << "] from offset " << offset_in;
34 }
35
ExpectPrevWordBreakForCache(size_t offset_in,const char * query_str)36 void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) {
37 const size_t BUF_SIZE = 256U;
38 uint16_t buf[BUF_SIZE];
39 size_t expected_breakpoint = 0U;
40 size_t size = 0U;
41
42 ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint);
43 EXPECT_EQ(expected_breakpoint,
44 getPrevWordBreakForCache(buf, offset_in, size))
45 << "Expected position is [" << query_str << "] from offset " << offset_in;
46 }
47
TEST(WordBreakTest,goNextWordBreakTest)48 TEST(WordBreakTest, goNextWordBreakTest) {
49 ExpectNextWordBreakForCache(0, "|");
50
51 // Continue for spaces.
52 ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |");
53 ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |");
54 ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |");
55 ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |");
56 ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |");
57 ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |");
58
59 // Space makes word break.
60 ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'");
61 ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'");
62 ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'");
63 ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |");
64 ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |");
65 ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |");
66 ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |");
67
68 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'");
69 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'");
70 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'");
71 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |");
72 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |");
73 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |");
74 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |");
75
76 ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'");
77 ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'");
78 ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'");
79 ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
80 ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |");
81 ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |");
82 ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |");
83 ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |");
84
85 // CJK ideographs makes word break.
86 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
87 ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
88 ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
89 ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
90 ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
91 ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
92 ExpectNextWordBreakForCache(1000,
93 "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |");
94
95 ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
96 ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
97 ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
98 ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
99 ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
100 ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
101 ExpectNextWordBreakForCache(1000,
102 "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |");
103
104 ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
105 ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
106 ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
107 ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
108 ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
109 ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
110 ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |");
111
112 // Continue if trailing characters is Unicode combining characters.
113 ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00");
114 ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00");
115 ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |");
116 ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |");
117 ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |");
118
119 // Surrogate pairs.
120 ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |");
121 ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |");
122 ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |");
123 ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |");
124 ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |");
125 ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |");
126
127 // Broken surrogate pairs.
128 // U+D84D is leading surrogate but there is no trailing surrogate for it.
129 ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |");
130 ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |");
131 ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |");
132 ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |");
133 ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |");
134
135 ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |");
136 ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |");
137 ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |");
138 ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |");
139 ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |");
140
141 // U+DE0D is trailing surrogate but there is no leading surrogate for it.
142 ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |");
143 ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |");
144 ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |");
145 ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |");
146 ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |");
147
148 ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |");
149 ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |");
150 ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |");
151 ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |");
152 ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |");
153
154 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
155 ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |");
156 ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |");
157 ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |");
158 ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |");
159
160 // Tone marks.
161 // CJK ideographic char + Tone mark + CJK ideographic char
162 ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444");
163 ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444");
164 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |");
165 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |");
166 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |");
167
168 // Variation Selectors.
169 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
170 ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B");
171 ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B");
172 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |");
173 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |");
174 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |");
175
176 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
177 ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B");
178 ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B");
179 ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B");
180 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |");
181 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |");
182 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |");
183 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |");
184
185 // CJK ideographic char + Tone mark + Variation Character(VS1)
186 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444");
187 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444");
188 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444");
189 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |");
190 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |");
191 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |");
192
193 // CJK ideographic char + Tone mark + Variation Character(VS17)
194 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444");
195 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444");
196 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444");
197 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444");
198 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |");
199 ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |");
200 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |");
201
202 // CJK ideographic char + Variation Character(VS1) + Tone mark
203 ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444");
204 ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444");
205 ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444");
206 ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |");
207 ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |");
208 ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |");
209
210 // CJK ideographic char + Variation Character(VS17) + Tone mark
211 ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444");
212 ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444");
213 ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444");
214 ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444");
215 ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |");
216 ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |");
217 ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |");
218
219 // Following test cases are unusual usage of variation selectors and tone
220 // marks for caching up the further behavior changes, e.g. index of bounds
221 // or crashes. Please feel free to update the test expectations if the
222 // behavior change makes sense to you.
223
224 // Isolated Tone marks and Variation Selectors
225 ExpectNextWordBreakForCache(0, "U+FE00 |");
226 ExpectNextWordBreakForCache(1, "U+FE00 |");
227 ExpectNextWordBreakForCache(1000, "U+FE00 |");
228 ExpectNextWordBreakForCache(0, "U+E0100 |");
229 ExpectNextWordBreakForCache(1000, "U+E0100 |");
230 ExpectNextWordBreakForCache(0, "U+302D |");
231 ExpectNextWordBreakForCache(1000, "U+302D |");
232
233 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
234 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B");
235 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B");
236 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B");
237 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |");
238 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |");
239 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |");
240
241 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
242 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B");
243 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B");
244 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B");
245 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B");
246 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B");
247 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |");
248 ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |");
249 ExpectNextWordBreakForCache(1000,
250 "U+845B U+E0100 U+E0100 U+845B |");
251
252 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
253 ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B");
254 ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B");
255 ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B");
256 ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B");
257 ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |");
258 ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |");
259 ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |");
260
261 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
262 ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B");
263 ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B");
264 ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B");
265 ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B");
266 ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |");
267 ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |");
268 ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |");
269
270 // Tone mark. + Tone mark
271 ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444");
272 ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444");
273 ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444");
274 ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |");
275 ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |");
276 ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |");
277 }
278
TEST(WordBreakTest,goPrevWordBreakTest)279 TEST(WordBreakTest, goPrevWordBreakTest) {
280 ExpectPrevWordBreakForCache(0, "|");
281
282 // Continue for spaces.
283 ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'");
284 ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'");
285 ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'");
286 ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'");
287 ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'");
288 ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'");
289
290 // Space makes word break.
291 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'");
292 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'");
293 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'");
294 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'");
295 ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'");
296 ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'");
297 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'");
298
299 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'");
300 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'");
301 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'");
302 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'");
303 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'");
304 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'");
305 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'");
306
307 ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
308 ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
309 ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'");
310 ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'");
311 ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'");
312 ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
313 ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
314 ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'");
315
316 // CJK ideographs makes word break.
317 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
318 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00");
319 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00");
320 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00");
321 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00");
322 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
323 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00");
324
325 ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
326 ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94");
327 ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94");
328 ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94");
329 ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94");
330 ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
331 ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94");
332
333 // Mixed case.
334 ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
335 ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
336 ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
337 ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00");
338 ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00");
339 ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00");
340 ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
341 ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00");
342
343 // Continue if trailing characters is Unicode combining characters.
344 ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00");
345 ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00");
346 ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00");
347 ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00");
348 ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00");
349
350 // Surrogate pairs.
351 ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618");
352 ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618");
353 ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618");
354 ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618");
355 ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618");
356 ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618");
357
358 // Broken surrogate pairs.
359 // U+D84D is leading surrogate but there is no trailing surrogate for it.
360 ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618");
361 ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618");
362 ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618");
363 ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618");
364 ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618");
365
366 ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D");
367 ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D");
368 ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D");
369 ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D");
370 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D");
371
372 // U+DE0D is trailing surrogate but there is no leading surrogate for it.
373 ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618");
374 ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618");
375 ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618");
376 ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618");
377 ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618");
378
379 ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D");
380 ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D");
381 ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D");
382 ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D");
383 ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D");
384
385 // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag.
386 ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8");
387 ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8");
388 ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8");
389 ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8");
390
391 // Tone marks.
392 // CJK ideographic char + Tone mark + CJK ideographic char
393 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444");
394 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444");
395 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444");
396 ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444");
397 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444");
398
399 // Variation Selectors.
400 // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char
401 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B");
402 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B");
403 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B");
404 ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B");
405 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B");
406
407 // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char
408 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B");
409 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B");
410 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B");
411 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B");
412 ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B");
413 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B");
414 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B");
415
416 // CJK ideographic char + Tone mark + Variation Character(VS1)
417 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444");
418 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444");
419 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444");
420 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444");
421 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444");
422 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444");
423
424 // CJK ideographic char + Tone mark + Variation Character(VS17)
425 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444");
426 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444");
427 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444");
428 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444");
429 ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444");
430 ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444");
431 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444");
432
433 // CJK ideographic char + Variation Character(VS1) + Tone mark
434 ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444");
435 ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444");
436 ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444");
437 ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444");
438 ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444");
439 ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444");
440
441 // CJK ideographic char + Variation Character(VS17) + Tone mark
442 ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444");
443 ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444");
444 ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444");
445 ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444");
446 ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444");
447 ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444");
448 ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444");
449
450 // Following test cases are unusual usage of variation selectors and tone
451 // marks for caching up the further behavior changes, e.g. index of bounds
452 // or crashes. Please feel free to update the test expectations if the
453 // behavior change makes sense to you.
454
455 // Isolated Tone marks and Variation Selectors
456 ExpectPrevWordBreakForCache(0, "| U+FE00");
457 ExpectPrevWordBreakForCache(1, "| U+FE00");
458 ExpectPrevWordBreakForCache(1000, "| U+FE00");
459 ExpectPrevWordBreakForCache(0, "| U+E0100");
460 ExpectPrevWordBreakForCache(1000, "| U+E0100");
461 ExpectPrevWordBreakForCache(0, "| U+302D");
462 ExpectPrevWordBreakForCache(1000, "| U+302D");
463
464 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1)
465 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B");
466 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B");
467 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B");
468 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B");
469 ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B");
470 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B");
471
472 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17)
473 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B");
474 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B");
475 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B");
476 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B");
477 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B");
478 ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B");
479 ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B");
480 ExpectPrevWordBreakForCache(1000,
481 "U+845B U+E0100 U+E0100 | U+845B");
482
483 // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17)
484 ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B");
485 ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B");
486 ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B");
487 ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B");
488 ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B");
489 ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B");
490 ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B");
491
492 // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1)
493 ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B");
494 ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B");
495 ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B");
496 ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B");
497 ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B");
498 ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B");
499 ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B");
500
501 // Tone mark. + Tone mark
502 ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444");
503 ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444");
504 ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444");
505 ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444");
506 ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444");
507 ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444");
508 }
509
510 } // namespace
511