1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Support/ConvertUTF.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "gtest/gtest.h"
12 #include <string>
13 #include <vector>
14
15 using namespace llvm;
16
TEST(ConvertUTFTest,ConvertUTF16LittleEndianToUTF8String)17 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
18 // Src is the look of disapproval.
19 alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
20 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
21 std::string Result;
22 bool Success = convertUTF16ToUTF8String(Ref, Result);
23 EXPECT_TRUE(Success);
24 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
25 EXPECT_EQ(Expected, Result);
26 }
27
TEST(ConvertUTFTest,ConvertUTF16BigEndianToUTF8String)28 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
29 // Src is the look of disapproval.
30 alignas(UTF16) static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
31 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
32 std::string Result;
33 bool Success = convertUTF16ToUTF8String(Ref, Result);
34 EXPECT_TRUE(Success);
35 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
36 EXPECT_EQ(Expected, Result);
37 }
38
TEST(ConvertUTFTest,ConvertUTF8ToUTF16String)39 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
40 // Src is the look of disapproval.
41 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
42 StringRef Ref(Src, sizeof(Src) - 1);
43 SmallVector<UTF16, 5> Result;
44 bool Success = convertUTF8ToUTF16String(Ref, Result);
45 EXPECT_TRUE(Success);
46 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
47 ASSERT_EQ(3u, Result.size());
48 for (int I = 0, E = 3; I != E; ++I)
49 EXPECT_EQ(Expected[I], Result[I]);
50 }
51
TEST(ConvertUTFTest,OddLengthInput)52 TEST(ConvertUTFTest, OddLengthInput) {
53 std::string Result;
54 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
55 EXPECT_FALSE(Success);
56 }
57
TEST(ConvertUTFTest,Empty)58 TEST(ConvertUTFTest, Empty) {
59 std::string Result;
60 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
61 EXPECT_TRUE(Success);
62 EXPECT_TRUE(Result.empty());
63 }
64
TEST(ConvertUTFTest,HasUTF16BOM)65 TEST(ConvertUTFTest, HasUTF16BOM) {
66 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
67 EXPECT_TRUE(HasBOM);
68 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
69 EXPECT_TRUE(HasBOM);
70 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
71 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
72 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
73 EXPECT_TRUE(HasBOM);
74
75 HasBOM = hasUTF16ByteOrderMark(None);
76 EXPECT_FALSE(HasBOM);
77 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
78 EXPECT_FALSE(HasBOM);
79 }
80
TEST(ConvertUTFTest,UTF16WrappersForConvertUTF16ToUTF8String)81 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
82 // Src is the look of disapproval.
83 alignas(UTF16) static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
84 ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
85 std::string Result;
86 bool Success = convertUTF16ToUTF8String(SrcRef, Result);
87 EXPECT_TRUE(Success);
88 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
89 EXPECT_EQ(Expected, Result);
90 }
91
TEST(ConvertUTFTest,ConvertUTF8toWide)92 TEST(ConvertUTFTest, ConvertUTF8toWide) {
93 // Src is the look of disapproval.
94 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
95 std::wstring Result;
96 bool Success = ConvertUTF8toWide((const char*)Src, Result);
97 EXPECT_TRUE(Success);
98 std::wstring Expected(L"\x0ca0_\x0ca0");
99 EXPECT_EQ(Expected, Result);
100 Result.clear();
101 Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
102 EXPECT_TRUE(Success);
103 EXPECT_EQ(Expected, Result);
104 }
105
TEST(ConvertUTFTest,convertWideToUTF8)106 TEST(ConvertUTFTest, convertWideToUTF8) {
107 // Src is the look of disapproval.
108 static const wchar_t Src[] = L"\x0ca0_\x0ca0";
109 std::string Result;
110 bool Success = convertWideToUTF8(Src, Result);
111 EXPECT_TRUE(Success);
112 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
113 EXPECT_EQ(Expected, Result);
114 }
115
116 struct ConvertUTFResultContainer {
117 ConversionResult ErrorCode;
118 std::vector<unsigned> UnicodeScalars;
119
ConvertUTFResultContainerConvertUTFResultContainer120 ConvertUTFResultContainer(ConversionResult ErrorCode)
121 : ErrorCode(ErrorCode) {}
122
123 ConvertUTFResultContainer
withScalarsConvertUTFResultContainer124 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
125 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
126 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
127 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
128 ConvertUTFResultContainer Result(*this);
129 if (US0 != 0x110000)
130 Result.UnicodeScalars.push_back(US0);
131 if (US1 != 0x110000)
132 Result.UnicodeScalars.push_back(US1);
133 if (US2 != 0x110000)
134 Result.UnicodeScalars.push_back(US2);
135 if (US3 != 0x110000)
136 Result.UnicodeScalars.push_back(US3);
137 if (US4 != 0x110000)
138 Result.UnicodeScalars.push_back(US4);
139 if (US5 != 0x110000)
140 Result.UnicodeScalars.push_back(US5);
141 if (US6 != 0x110000)
142 Result.UnicodeScalars.push_back(US6);
143 if (US7 != 0x110000)
144 Result.UnicodeScalars.push_back(US7);
145 return Result;
146 }
147 };
148
149 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsLenient(StringRef S)150 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
151 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
152
153 const UTF8 *SourceNext = SourceStart;
154 std::vector<UTF32> Decoded(S.size(), 0);
155 UTF32 *TargetStart = Decoded.data();
156
157 auto ErrorCode =
158 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
159 Decoded.data() + Decoded.size(), lenientConversion);
160
161 Decoded.resize(TargetStart - Decoded.data());
162
163 return std::make_pair(ErrorCode, Decoded);
164 }
165
166 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S)167 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
168 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
169
170 const UTF8 *SourceNext = SourceStart;
171 std::vector<UTF32> Decoded(S.size(), 0);
172 UTF32 *TargetStart = Decoded.data();
173
174 auto ErrorCode = ConvertUTF8toUTF32Partial(
175 &SourceNext, SourceStart + S.size(), &TargetStart,
176 Decoded.data() + Decoded.size(), lenientConversion);
177
178 Decoded.resize(TargetStart - Decoded.data());
179
180 return std::make_pair(ErrorCode, Decoded);
181 }
182
183 ::testing::AssertionResult
CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,StringRef S,bool Partial=false)184 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
185 StringRef S, bool Partial = false) {
186 ConversionResult ErrorCode;
187 std::vector<unsigned> Decoded;
188 if (!Partial)
189 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
190 else
191 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
192
193 if (Expected.ErrorCode != ErrorCode)
194 return ::testing::AssertionFailure() << "Expected error code "
195 << Expected.ErrorCode << ", actual "
196 << ErrorCode;
197
198 if (Expected.UnicodeScalars != Decoded)
199 return ::testing::AssertionFailure()
200 << "Expected lenient decoded result:\n"
201 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
202 << "Actual result:\n" << ::testing::PrintToString(Decoded);
203
204 return ::testing::AssertionSuccess();
205 }
206
TEST(ConvertUTFTest,UTF8ToUTF32Lenient)207 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
208
209 //
210 // 1-byte sequences
211 //
212
213 // U+0041 LATIN CAPITAL LETTER A
214 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
215 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
216
217 //
218 // 2-byte sequences
219 //
220
221 // U+0283 LATIN SMALL LETTER ESH
222 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
223 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
224 "\xca\x83"));
225
226 // U+03BA GREEK SMALL LETTER KAPPA
227 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
228 // U+03C3 GREEK SMALL LETTER SIGMA
229 // U+03BC GREEK SMALL LETTER MU
230 // U+03B5 GREEK SMALL LETTER EPSILON
231 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
232 ConvertUTFResultContainer(conversionOK)
233 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
234 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
235
236 //
237 // 3-byte sequences
238 //
239
240 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
241 // U+6587 CJK UNIFIED IDEOGRAPH-6587
242 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
243 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
244 "\xe4\xbe\x8b\xe6\x96\x87"));
245
246 // U+D55C HANGUL SYLLABLE HAN
247 // U+AE00 HANGUL SYLLABLE GEUL
248 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
249 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
250 "\xed\x95\x9c\xea\xb8\x80"));
251
252 // U+1112 HANGUL CHOSEONG HIEUH
253 // U+1161 HANGUL JUNGSEONG A
254 // U+11AB HANGUL JONGSEONG NIEUN
255 // U+1100 HANGUL CHOSEONG KIYEOK
256 // U+1173 HANGUL JUNGSEONG EU
257 // U+11AF HANGUL JONGSEONG RIEUL
258 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
259 ConvertUTFResultContainer(conversionOK)
260 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
261 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
262 "\xe1\x86\xaf"));
263
264 //
265 // 4-byte sequences
266 //
267
268 // U+E0100 VARIATION SELECTOR-17
269 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
270 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
271 "\xf3\xa0\x84\x80"));
272
273 //
274 // First possible sequence of a certain length
275 //
276
277 // U+0000 NULL
278 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
279 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
280 StringRef("\x00", 1)));
281
282 // U+0080 PADDING CHARACTER
283 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
284 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
285 "\xc2\x80"));
286
287 // U+0800 SAMARITAN LETTER ALAF
288 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
289 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
290 "\xe0\xa0\x80"));
291
292 // U+10000 LINEAR B SYLLABLE B008 A
293 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
294 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
295 "\xf0\x90\x80\x80"));
296
297 // U+200000 (invalid)
298 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
299 ConvertUTFResultContainer(sourceIllegal)
300 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
301 "\xf8\x88\x80\x80\x80"));
302
303 // U+4000000 (invalid)
304 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
305 ConvertUTFResultContainer(sourceIllegal)
306 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
307 "\xfc\x84\x80\x80\x80\x80"));
308
309 //
310 // Last possible sequence of a certain length
311 //
312
313 // U+007F DELETE
314 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
315 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
316
317 // U+07FF (unassigned)
318 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
319 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
320 "\xdf\xbf"));
321
322 // U+FFFF (noncharacter)
323 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
324 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
325 "\xef\xbf\xbf"));
326
327 // U+1FFFFF (invalid)
328 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
329 ConvertUTFResultContainer(sourceIllegal)
330 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
331 "\xf7\xbf\xbf\xbf"));
332
333 // U+3FFFFFF (invalid)
334 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
335 ConvertUTFResultContainer(sourceIllegal)
336 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
337 "\xfb\xbf\xbf\xbf\xbf"));
338
339 // U+7FFFFFFF (invalid)
340 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
341 ConvertUTFResultContainer(sourceIllegal)
342 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
343 "\xfd\xbf\xbf\xbf\xbf\xbf"));
344
345 //
346 // Other boundary conditions
347 //
348
349 // U+D7FF (unassigned)
350 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
351 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
352 "\xed\x9f\xbf"));
353
354 // U+E000 (private use)
355 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
356 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
357 "\xee\x80\x80"));
358
359 // U+FFFD REPLACEMENT CHARACTER
360 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
361 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
362 "\xef\xbf\xbd"));
363
364 // U+10FFFF (noncharacter)
365 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
366 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
367 "\xf4\x8f\xbf\xbf"));
368
369 // U+110000 (invalid)
370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
371 ConvertUTFResultContainer(sourceIllegal)
372 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
373 "\xf4\x90\x80\x80"));
374
375 //
376 // Unexpected continuation bytes
377 //
378
379 // A sequence of unexpected continuation bytes that don't follow a first
380 // byte, every byte is a maximal subpart.
381
382 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
383 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
386 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
387 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
388 "\x80\x80"));
389 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
391 "\x80\xbf"));
392 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
393 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
394 "\xbf\x80"));
395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
396 ConvertUTFResultContainer(sourceIllegal)
397 .withScalars(0xfffd, 0xfffd, 0xfffd),
398 "\x80\xbf\x80"));
399 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
400 ConvertUTFResultContainer(sourceIllegal)
401 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
402 "\x80\xbf\x80\xbf"));
403 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
404 ConvertUTFResultContainer(sourceIllegal)
405 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
406 "\x80\xbf\x82\xbf\xaa"));
407 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
408 ConvertUTFResultContainer(sourceIllegal)
409 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
410 "\xaa\xb0\xbb\xbf\xaa\xa0"));
411 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
412 ConvertUTFResultContainer(sourceIllegal)
413 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
414 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
415
416 // All continuation bytes (0x80--0xbf).
417 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
418 ConvertUTFResultContainer(sourceIllegal)
419 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
420 0xfffd, 0xfffd, 0xfffd, 0xfffd)
421 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
422 0xfffd, 0xfffd, 0xfffd, 0xfffd)
423 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
424 0xfffd, 0xfffd, 0xfffd, 0xfffd)
425 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
426 0xfffd, 0xfffd, 0xfffd, 0xfffd)
427 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
428 0xfffd, 0xfffd, 0xfffd, 0xfffd)
429 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
430 0xfffd, 0xfffd, 0xfffd, 0xfffd)
431 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
432 0xfffd, 0xfffd, 0xfffd, 0xfffd)
433 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
434 0xfffd, 0xfffd, 0xfffd, 0xfffd),
435 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
436 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
437 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
438 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
439
440 //
441 // Lonely start bytes
442 //
443
444 // Start bytes of 2-byte sequences (0xc0--0xdf).
445 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
446 ConvertUTFResultContainer(sourceIllegal)
447 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
448 0xfffd, 0xfffd, 0xfffd, 0xfffd)
449 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
450 0xfffd, 0xfffd, 0xfffd, 0xfffd)
451 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452 0xfffd, 0xfffd, 0xfffd, 0xfffd)
453 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454 0xfffd, 0xfffd, 0xfffd, 0xfffd),
455 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
456 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
457
458 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
459 ConvertUTFResultContainer(sourceIllegal)
460 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
461 0xfffd, 0x0020, 0xfffd, 0x0020)
462 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
463 0xfffd, 0x0020, 0xfffd, 0x0020)
464 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
465 0xfffd, 0x0020, 0xfffd, 0x0020)
466 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
467 0xfffd, 0x0020, 0xfffd, 0x0020)
468 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
469 0xfffd, 0x0020, 0xfffd, 0x0020)
470 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
471 0xfffd, 0x0020, 0xfffd, 0x0020)
472 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
473 0xfffd, 0x0020, 0xfffd, 0x0020)
474 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
475 0xfffd, 0x0020, 0xfffd, 0x0020),
476 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
477 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
478 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
479 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
480
481 // Start bytes of 3-byte sequences (0xe0--0xef).
482 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
483 ConvertUTFResultContainer(sourceIllegal)
484 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
485 0xfffd, 0xfffd, 0xfffd, 0xfffd)
486 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
487 0xfffd, 0xfffd, 0xfffd, 0xfffd),
488 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
489
490 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
491 ConvertUTFResultContainer(sourceIllegal)
492 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
493 0xfffd, 0x0020, 0xfffd, 0x0020)
494 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
495 0xfffd, 0x0020, 0xfffd, 0x0020)
496 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
497 0xfffd, 0x0020, 0xfffd, 0x0020)
498 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
499 0xfffd, 0x0020, 0xfffd, 0x0020),
500 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
501 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
502
503 // Start bytes of 4-byte sequences (0xf0--0xf7).
504 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
505 ConvertUTFResultContainer(sourceIllegal)
506 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
507 0xfffd, 0xfffd, 0xfffd, 0xfffd),
508 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
509
510 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
511 ConvertUTFResultContainer(sourceIllegal)
512 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
513 0xfffd, 0x0020, 0xfffd, 0x0020)
514 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
515 0xfffd, 0x0020, 0xfffd, 0x0020),
516 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
517
518 // Start bytes of 5-byte sequences (0xf8--0xfb).
519 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
520 ConvertUTFResultContainer(sourceIllegal)
521 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
522 "\xf8\xf9\xfa\xfb"));
523
524 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
525 ConvertUTFResultContainer(sourceIllegal)
526 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
527 0xfffd, 0x0020, 0xfffd, 0x0020),
528 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
529
530 // Start bytes of 6-byte sequences (0xfc--0xfd).
531 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
532 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
533 "\xfc\xfd"));
534
535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
536 ConvertUTFResultContainer(sourceIllegal)
537 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
538 "\xfc\x20\xfd\x20"));
539
540 //
541 // Other bytes (0xc0--0xc1, 0xfe--0xff).
542 //
543
544 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
545 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
547 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
548 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
549 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
551 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
552
553 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
554 ConvertUTFResultContainer(sourceIllegal)
555 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
556 "\xc0\xc1\xfe\xff"));
557
558 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
559 ConvertUTFResultContainer(sourceIllegal)
560 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
561 "\xfe\xfe\xff\xff"));
562
563 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
564 ConvertUTFResultContainer(sourceIllegal)
565 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
566 "\xfe\x80\x80\x80\x80\x80"));
567
568 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
569 ConvertUTFResultContainer(sourceIllegal)
570 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
571 "\xff\x80\x80\x80\x80\x80"));
572
573 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
574 ConvertUTFResultContainer(sourceIllegal)
575 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
576 0xfffd, 0x0020, 0xfffd, 0x0020),
577 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
578
579 //
580 // Sequences with one continuation byte missing
581 //
582
583 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
584 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
585 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
586 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
589 "\xe0\xa0"));
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592 "\xe0\xbf"));
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595 "\xe1\x80"));
596 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
598 "\xec\xbf"));
599 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
600 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601 "\xed\x80"));
602 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
603 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604 "\xed\x9f"));
605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
606 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
607 "\xee\x80"));
608 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
610 "\xef\xbf"));
611 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613 "\xf0\x90\x80"));
614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616 "\xf0\xbf\xbf"));
617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619 "\xf1\x80\x80"));
620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622 "\xf3\xbf\xbf"));
623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625 "\xf4\x80\x80"));
626 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628 "\xf4\x8f\xbf"));
629
630 // Overlong sequences with one trailing byte missing.
631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
632 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
633 "\xc0"));
634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
635 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
636 "\xc1"));
637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
638 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
639 "\xe0\x80"));
640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
641 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642 "\xe0\x9f"));
643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644 ConvertUTFResultContainer(sourceIllegal)
645 .withScalars(0xfffd, 0xfffd, 0xfffd),
646 "\xf0\x80\x80"));
647 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
648 ConvertUTFResultContainer(sourceIllegal)
649 .withScalars(0xfffd, 0xfffd, 0xfffd),
650 "\xf0\x8f\x80"));
651 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
652 ConvertUTFResultContainer(sourceIllegal)
653 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
654 "\xf8\x80\x80\x80"));
655 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
656 ConvertUTFResultContainer(sourceIllegal)
657 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
658 "\xfc\x80\x80\x80\x80"));
659
660 // Sequences that represent surrogates with one trailing byte missing.
661 // High surrogates
662 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
663 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
664 "\xed\xa0"));
665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
667 "\xed\xac"));
668 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
670 "\xed\xaf"));
671 // Low surrogates
672 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
673 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
674 "\xed\xb0"));
675 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
677 "\xed\xb4"));
678 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
679 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
680 "\xed\xbf"));
681
682 // Ill-formed 4-byte sequences.
683 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
684 // U+1100xx (invalid)
685 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
686 ConvertUTFResultContainer(sourceIllegal)
687 .withScalars(0xfffd, 0xfffd, 0xfffd),
688 "\xf4\x90\x80"));
689 // U+13FBxx (invalid)
690 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
691 ConvertUTFResultContainer(sourceIllegal)
692 .withScalars(0xfffd, 0xfffd, 0xfffd),
693 "\xf4\xbf\xbf"));
694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
695 ConvertUTFResultContainer(sourceIllegal)
696 .withScalars(0xfffd, 0xfffd, 0xfffd),
697 "\xf5\x80\x80"));
698 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
699 ConvertUTFResultContainer(sourceIllegal)
700 .withScalars(0xfffd, 0xfffd, 0xfffd),
701 "\xf6\x80\x80"));
702 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
703 ConvertUTFResultContainer(sourceIllegal)
704 .withScalars(0xfffd, 0xfffd, 0xfffd),
705 "\xf7\x80\x80"));
706 // U+1FFBxx (invalid)
707 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
708 ConvertUTFResultContainer(sourceIllegal)
709 .withScalars(0xfffd, 0xfffd, 0xfffd),
710 "\xf7\xbf\xbf"));
711
712 // Ill-formed 5-byte sequences.
713 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
714 // U+2000xx (invalid)
715 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
716 ConvertUTFResultContainer(sourceIllegal)
717 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
718 "\xf8\x88\x80\x80"));
719 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
720 ConvertUTFResultContainer(sourceIllegal)
721 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
722 "\xf8\xbf\xbf\xbf"));
723 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
724 ConvertUTFResultContainer(sourceIllegal)
725 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
726 "\xf9\x80\x80\x80"));
727 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
728 ConvertUTFResultContainer(sourceIllegal)
729 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
730 "\xfa\x80\x80\x80"));
731 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
732 ConvertUTFResultContainer(sourceIllegal)
733 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
734 "\xfb\x80\x80\x80"));
735 // U+3FFFFxx (invalid)
736 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
737 ConvertUTFResultContainer(sourceIllegal)
738 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
739 "\xfb\xbf\xbf\xbf"));
740
741 // Ill-formed 6-byte sequences.
742 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
743 // U+40000xx (invalid)
744 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
745 ConvertUTFResultContainer(sourceIllegal)
746 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
747 "\xfc\x84\x80\x80\x80"));
748 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
749 ConvertUTFResultContainer(sourceIllegal)
750 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
751 "\xfc\xbf\xbf\xbf\xbf"));
752 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
753 ConvertUTFResultContainer(sourceIllegal)
754 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
755 "\xfd\x80\x80\x80\x80"));
756 // U+7FFFFFxx (invalid)
757 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
758 ConvertUTFResultContainer(sourceIllegal)
759 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
760 "\xfd\xbf\xbf\xbf\xbf"));
761
762 //
763 // Sequences with two continuation bytes missing
764 //
765
766 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
767 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
768 "\xf0\x90"));
769 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
770 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
771 "\xf0\xbf"));
772 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
774 "\xf1\x80"));
775 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
776 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
777 "\xf3\xbf"));
778 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
779 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
780 "\xf4\x80"));
781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
783 "\xf4\x8f"));
784
785 // Overlong sequences with two trailing byte missing.
786 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
787 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
788 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
789 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
790 "\xf0\x80"));
791 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
792 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793 "\xf0\x8f"));
794 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795 ConvertUTFResultContainer(sourceIllegal)
796 .withScalars(0xfffd, 0xfffd, 0xfffd),
797 "\xf8\x80\x80"));
798 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
799 ConvertUTFResultContainer(sourceIllegal)
800 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
801 "\xfc\x80\x80\x80"));
802
803 // Sequences that represent surrogates with two trailing bytes missing.
804 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
805 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
806
807 // Ill-formed 4-byte sequences.
808 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
809 // U+110yxx (invalid)
810 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
812 "\xf4\x90"));
813 // U+13Fyxx (invalid)
814 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
815 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
816 "\xf4\xbf"));
817 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
819 "\xf5\x80"));
820 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
822 "\xf6\x80"));
823 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
824 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
825 "\xf7\x80"));
826 // U+1FFyxx (invalid)
827 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
828 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
829 "\xf7\xbf"));
830
831 // Ill-formed 5-byte sequences.
832 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
833 // U+200yxx (invalid)
834 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
836 "\xf8\x88\x80"));
837 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
838 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
839 "\xf8\xbf\xbf"));
840 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
842 "\xf9\x80\x80"));
843 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
844 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
845 "\xfa\x80\x80"));
846 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
848 "\xfb\x80\x80"));
849 // U+3FFFyxx (invalid)
850 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
851 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
852 "\xfb\xbf\xbf"));
853
854 // Ill-formed 6-byte sequences.
855 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
856 // U+4000yxx (invalid)
857 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
858 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
859 "\xfc\x84\x80\x80"));
860 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
861 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
862 "\xfc\xbf\xbf\xbf"));
863 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
865 "\xfd\x80\x80\x80"));
866 // U+7FFFFyxx (invalid)
867 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
868 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
869 "\xfd\xbf\xbf\xbf"));
870
871 //
872 // Sequences with three continuation bytes missing
873 //
874
875 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
876 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
877 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
878 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
879 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
880 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
881 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
882 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
883 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
884 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
885
886 // Broken overlong sequences.
887 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
888 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
889 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
891 "\xf8\x80"));
892 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
894 "\xfc\x80\x80"));
895
896 // Ill-formed 4-byte sequences.
897 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
898 // U+14yyxx (invalid)
899 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
900 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
901 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
902 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
903 // U+1Cyyxx (invalid)
904 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
905 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
906
907 // Ill-formed 5-byte sequences.
908 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
909 // U+20yyxx (invalid)
910 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
911 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
912 "\xf8\x88"));
913 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915 "\xf8\xbf"));
916 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
918 "\xf9\x80"));
919 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
920 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
921 "\xfa\x80"));
922 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
924 "\xfb\x80"));
925 // U+3FCyyxx (invalid)
926 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
927 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
928 "\xfb\xbf"));
929
930 // Ill-formed 6-byte sequences.
931 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
932 // U+400yyxx (invalid)
933 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
934 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
935 "\xfc\x84\x80"));
936 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
938 "\xfc\xbf\xbf"));
939 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
941 "\xfd\x80\x80"));
942 // U+7FFCyyxx (invalid)
943 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
944 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
945 "\xfd\xbf\xbf"));
946
947 //
948 // Sequences with four continuation bytes missing
949 //
950
951 // Ill-formed 5-byte sequences.
952 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
953 // U+uzyyxx (invalid)
954 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
955 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
956 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
957 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
958 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
959 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
960 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
961 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
962 // U+3zyyxx (invalid)
963 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
965
966 // Broken overlong sequences.
967 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
968 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
969 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
970 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
971 "\xfc\x80"));
972
973 // Ill-formed 6-byte sequences.
974 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
975 // U+uzzyyxx (invalid)
976 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
977 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
978 "\xfc\x84"));
979 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
980 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
981 "\xfc\xbf"));
982 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
984 "\xfd\x80"));
985 // U+7Fzzyyxx (invalid)
986 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
987 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
988 "\xfd\xbf"));
989
990 //
991 // Sequences with five continuation bytes missing
992 //
993
994 // Ill-formed 6-byte sequences.
995 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
996 // U+uzzyyxx (invalid)
997 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
998 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
999 // U+uuzzyyxx (invalid)
1000 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
1002
1003 //
1004 // Consecutive sequences with trailing bytes missing
1005 //
1006
1007 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1008 ConvertUTFResultContainer(sourceIllegal)
1009 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1010 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1011 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1012 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1015 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1016 "\xf8\x80\x80\x80"
1017 "\xfc\x80\x80\x80\x80"
1018 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1019 "\xfb\xbf\xbf\xbf"
1020 "\xfd\xbf\xbf\xbf\xbf"));
1021
1022 //
1023 // Overlong UTF-8 sequences
1024 //
1025
1026 // U+002F SOLIDUS
1027 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1028 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
1029
1030 // Overlong sequences of the above.
1031 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1033 "\xc0\xaf"));
1034 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035 ConvertUTFResultContainer(sourceIllegal)
1036 .withScalars(0xfffd, 0xfffd, 0xfffd),
1037 "\xe0\x80\xaf"));
1038 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1039 ConvertUTFResultContainer(sourceIllegal)
1040 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1041 "\xf0\x80\x80\xaf"));
1042 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1043 ConvertUTFResultContainer(sourceIllegal)
1044 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1045 "\xf8\x80\x80\x80\xaf"));
1046 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1047 ConvertUTFResultContainer(sourceIllegal)
1048 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1049 "\xfc\x80\x80\x80\x80\xaf"));
1050
1051 // U+0000 NULL
1052 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1053 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1054 StringRef("\x00", 1)));
1055
1056 // Overlong sequences of the above.
1057 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1058 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1059 "\xc0\x80"));
1060 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061 ConvertUTFResultContainer(sourceIllegal)
1062 .withScalars(0xfffd, 0xfffd, 0xfffd),
1063 "\xe0\x80\x80"));
1064 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1065 ConvertUTFResultContainer(sourceIllegal)
1066 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1067 "\xf0\x80\x80\x80"));
1068 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1069 ConvertUTFResultContainer(sourceIllegal)
1070 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1071 "\xf8\x80\x80\x80\x80"));
1072 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1073 ConvertUTFResultContainer(sourceIllegal)
1074 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1075 "\xfc\x80\x80\x80\x80\x80"));
1076
1077 // Other overlong sequences.
1078 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1079 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1080 "\xc0\xbf"));
1081 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083 "\xc1\x80"));
1084 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1086 "\xc1\xbf"));
1087 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1088 ConvertUTFResultContainer(sourceIllegal)
1089 .withScalars(0xfffd, 0xfffd, 0xfffd),
1090 "\xe0\x9f\xbf"));
1091 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1092 ConvertUTFResultContainer(sourceIllegal)
1093 .withScalars(0xfffd, 0xfffd, 0xfffd),
1094 "\xed\xa0\x80"));
1095 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1096 ConvertUTFResultContainer(sourceIllegal)
1097 .withScalars(0xfffd, 0xfffd, 0xfffd),
1098 "\xed\xbf\xbf"));
1099 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1100 ConvertUTFResultContainer(sourceIllegal)
1101 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1102 "\xf0\x8f\x80\x80"));
1103 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1104 ConvertUTFResultContainer(sourceIllegal)
1105 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1106 "\xf0\x8f\xbf\xbf"));
1107 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1108 ConvertUTFResultContainer(sourceIllegal)
1109 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1110 "\xf8\x87\xbf\xbf\xbf"));
1111 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1112 ConvertUTFResultContainer(sourceIllegal)
1113 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1114 "\xfc\x83\xbf\xbf\xbf\xbf"));
1115
1116 //
1117 // Isolated surrogates
1118 //
1119
1120 // Unicode 6.3.0:
1121 //
1122 // D71. High-surrogate code point: A Unicode code point in the range
1123 // U+D800 to U+DBFF.
1124 //
1125 // D73. Low-surrogate code point: A Unicode code point in the range
1126 // U+DC00 to U+DFFF.
1127
1128 // Note: U+E0100 is <DB40 DD00> in UTF16.
1129
1130 // High surrogates
1131
1132 // U+D800
1133 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1134 ConvertUTFResultContainer(sourceIllegal)
1135 .withScalars(0xfffd, 0xfffd, 0xfffd),
1136 "\xed\xa0\x80"));
1137
1138 // U+DB40
1139 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1140 ConvertUTFResultContainer(sourceIllegal)
1141 .withScalars(0xfffd, 0xfffd, 0xfffd),
1142 "\xed\xac\xa0"));
1143
1144 // U+DBFF
1145 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1146 ConvertUTFResultContainer(sourceIllegal)
1147 .withScalars(0xfffd, 0xfffd, 0xfffd),
1148 "\xed\xaf\xbf"));
1149
1150 // Low surrogates
1151
1152 // U+DC00
1153 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1154 ConvertUTFResultContainer(sourceIllegal)
1155 .withScalars(0xfffd, 0xfffd, 0xfffd),
1156 "\xed\xb0\x80"));
1157
1158 // U+DD00
1159 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1160 ConvertUTFResultContainer(sourceIllegal)
1161 .withScalars(0xfffd, 0xfffd, 0xfffd),
1162 "\xed\xb4\x80"));
1163
1164 // U+DFFF
1165 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1166 ConvertUTFResultContainer(sourceIllegal)
1167 .withScalars(0xfffd, 0xfffd, 0xfffd),
1168 "\xed\xbf\xbf"));
1169
1170 // Surrogate pairs
1171
1172 // U+D800 U+DC00
1173 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1174 ConvertUTFResultContainer(sourceIllegal)
1175 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1176 "\xed\xa0\x80\xed\xb0\x80"));
1177
1178 // U+D800 U+DD00
1179 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1180 ConvertUTFResultContainer(sourceIllegal)
1181 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1182 "\xed\xa0\x80\xed\xb4\x80"));
1183
1184 // U+D800 U+DFFF
1185 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1186 ConvertUTFResultContainer(sourceIllegal)
1187 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1188 "\xed\xa0\x80\xed\xbf\xbf"));
1189
1190 // U+DB40 U+DC00
1191 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1192 ConvertUTFResultContainer(sourceIllegal)
1193 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1194 "\xed\xac\xa0\xed\xb0\x80"));
1195
1196 // U+DB40 U+DD00
1197 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1198 ConvertUTFResultContainer(sourceIllegal)
1199 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1200 "\xed\xac\xa0\xed\xb4\x80"));
1201
1202 // U+DB40 U+DFFF
1203 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1204 ConvertUTFResultContainer(sourceIllegal)
1205 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1206 "\xed\xac\xa0\xed\xbf\xbf"));
1207
1208 // U+DBFF U+DC00
1209 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1210 ConvertUTFResultContainer(sourceIllegal)
1211 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1212 "\xed\xaf\xbf\xed\xb0\x80"));
1213
1214 // U+DBFF U+DD00
1215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1216 ConvertUTFResultContainer(sourceIllegal)
1217 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1218 "\xed\xaf\xbf\xed\xb4\x80"));
1219
1220 // U+DBFF U+DFFF
1221 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1222 ConvertUTFResultContainer(sourceIllegal)
1223 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1224 "\xed\xaf\xbf\xed\xbf\xbf"));
1225
1226 //
1227 // Noncharacters
1228 //
1229
1230 // Unicode 6.3.0:
1231 //
1232 // D14. Noncharacter: A code point that is permanently reserved for
1233 // internal use and that should never be interchanged. Noncharacters
1234 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1235 // and the values U+FDD0..U+FDEF.
1236
1237 // U+FFFE
1238 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1239 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1240 "\xef\xbf\xbe"));
1241
1242 // U+FFFF
1243 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1244 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1245 "\xef\xbf\xbf"));
1246
1247 // U+1FFFE
1248 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1249 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1250 "\xf0\x9f\xbf\xbe"));
1251
1252 // U+1FFFF
1253 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1254 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1255 "\xf0\x9f\xbf\xbf"));
1256
1257 // U+2FFFE
1258 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1259 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1260 "\xf0\xaf\xbf\xbe"));
1261
1262 // U+2FFFF
1263 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1264 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1265 "\xf0\xaf\xbf\xbf"));
1266
1267 // U+3FFFE
1268 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1269 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1270 "\xf0\xbf\xbf\xbe"));
1271
1272 // U+3FFFF
1273 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1274 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1275 "\xf0\xbf\xbf\xbf"));
1276
1277 // U+4FFFE
1278 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1279 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1280 "\xf1\x8f\xbf\xbe"));
1281
1282 // U+4FFFF
1283 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1284 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1285 "\xf1\x8f\xbf\xbf"));
1286
1287 // U+5FFFE
1288 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1289 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1290 "\xf1\x9f\xbf\xbe"));
1291
1292 // U+5FFFF
1293 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1294 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1295 "\xf1\x9f\xbf\xbf"));
1296
1297 // U+6FFFE
1298 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1299 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1300 "\xf1\xaf\xbf\xbe"));
1301
1302 // U+6FFFF
1303 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1304 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1305 "\xf1\xaf\xbf\xbf"));
1306
1307 // U+7FFFE
1308 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1309 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1310 "\xf1\xbf\xbf\xbe"));
1311
1312 // U+7FFFF
1313 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1314 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1315 "\xf1\xbf\xbf\xbf"));
1316
1317 // U+8FFFE
1318 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1319 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1320 "\xf2\x8f\xbf\xbe"));
1321
1322 // U+8FFFF
1323 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1324 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1325 "\xf2\x8f\xbf\xbf"));
1326
1327 // U+9FFFE
1328 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1329 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1330 "\xf2\x9f\xbf\xbe"));
1331
1332 // U+9FFFF
1333 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1334 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1335 "\xf2\x9f\xbf\xbf"));
1336
1337 // U+AFFFE
1338 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1339 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1340 "\xf2\xaf\xbf\xbe"));
1341
1342 // U+AFFFF
1343 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1344 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1345 "\xf2\xaf\xbf\xbf"));
1346
1347 // U+BFFFE
1348 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1349 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1350 "\xf2\xbf\xbf\xbe"));
1351
1352 // U+BFFFF
1353 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1354 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1355 "\xf2\xbf\xbf\xbf"));
1356
1357 // U+CFFFE
1358 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1359 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1360 "\xf3\x8f\xbf\xbe"));
1361
1362 // U+CFFFF
1363 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1364 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1365 "\xf3\x8f\xbf\xbf"));
1366
1367 // U+DFFFE
1368 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1369 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1370 "\xf3\x9f\xbf\xbe"));
1371
1372 // U+DFFFF
1373 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1374 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1375 "\xf3\x9f\xbf\xbf"));
1376
1377 // U+EFFFE
1378 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1379 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1380 "\xf3\xaf\xbf\xbe"));
1381
1382 // U+EFFFF
1383 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1384 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1385 "\xf3\xaf\xbf\xbf"));
1386
1387 // U+FFFFE
1388 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1389 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1390 "\xf3\xbf\xbf\xbe"));
1391
1392 // U+FFFFF
1393 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1394 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1395 "\xf3\xbf\xbf\xbf"));
1396
1397 // U+10FFFE
1398 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1399 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1400 "\xf4\x8f\xbf\xbe"));
1401
1402 // U+10FFFF
1403 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1404 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1405 "\xf4\x8f\xbf\xbf"));
1406
1407 // U+FDD0
1408 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1409 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1410 "\xef\xb7\x90"));
1411
1412 // U+FDD1
1413 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1414 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1415 "\xef\xb7\x91"));
1416
1417 // U+FDD2
1418 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1419 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1420 "\xef\xb7\x92"));
1421
1422 // U+FDD3
1423 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1424 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1425 "\xef\xb7\x93"));
1426
1427 // U+FDD4
1428 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1429 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1430 "\xef\xb7\x94"));
1431
1432 // U+FDD5
1433 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1434 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1435 "\xef\xb7\x95"));
1436
1437 // U+FDD6
1438 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1439 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1440 "\xef\xb7\x96"));
1441
1442 // U+FDD7
1443 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1444 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1445 "\xef\xb7\x97"));
1446
1447 // U+FDD8
1448 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1449 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1450 "\xef\xb7\x98"));
1451
1452 // U+FDD9
1453 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1454 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1455 "\xef\xb7\x99"));
1456
1457 // U+FDDA
1458 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1459 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1460 "\xef\xb7\x9a"));
1461
1462 // U+FDDB
1463 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1464 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1465 "\xef\xb7\x9b"));
1466
1467 // U+FDDC
1468 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1469 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1470 "\xef\xb7\x9c"));
1471
1472 // U+FDDD
1473 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1474 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1475 "\xef\xb7\x9d"));
1476
1477 // U+FDDE
1478 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1479 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1480 "\xef\xb7\x9e"));
1481
1482 // U+FDDF
1483 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1484 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1485 "\xef\xb7\x9f"));
1486
1487 // U+FDE0
1488 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1489 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1490 "\xef\xb7\xa0"));
1491
1492 // U+FDE1
1493 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1494 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1495 "\xef\xb7\xa1"));
1496
1497 // U+FDE2
1498 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1499 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1500 "\xef\xb7\xa2"));
1501
1502 // U+FDE3
1503 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1504 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1505 "\xef\xb7\xa3"));
1506
1507 // U+FDE4
1508 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1509 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1510 "\xef\xb7\xa4"));
1511
1512 // U+FDE5
1513 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1514 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1515 "\xef\xb7\xa5"));
1516
1517 // U+FDE6
1518 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1519 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1520 "\xef\xb7\xa6"));
1521
1522 // U+FDE7
1523 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1524 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1525 "\xef\xb7\xa7"));
1526
1527 // U+FDE8
1528 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1529 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1530 "\xef\xb7\xa8"));
1531
1532 // U+FDE9
1533 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1534 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1535 "\xef\xb7\xa9"));
1536
1537 // U+FDEA
1538 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1539 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1540 "\xef\xb7\xaa"));
1541
1542 // U+FDEB
1543 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1544 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1545 "\xef\xb7\xab"));
1546
1547 // U+FDEC
1548 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1549 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1550 "\xef\xb7\xac"));
1551
1552 // U+FDED
1553 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1554 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1555 "\xef\xb7\xad"));
1556
1557 // U+FDEE
1558 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1559 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1560 "\xef\xb7\xae"));
1561
1562 // U+FDEF
1563 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1564 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1565 "\xef\xb7\xaf"));
1566
1567 // U+FDF0
1568 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1569 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1570 "\xef\xb7\xb0"));
1571
1572 // U+FDF1
1573 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1574 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1575 "\xef\xb7\xb1"));
1576
1577 // U+FDF2
1578 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1579 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1580 "\xef\xb7\xb2"));
1581
1582 // U+FDF3
1583 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1584 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1585 "\xef\xb7\xb3"));
1586
1587 // U+FDF4
1588 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1589 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1590 "\xef\xb7\xb4"));
1591
1592 // U+FDF5
1593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1594 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1595 "\xef\xb7\xb5"));
1596
1597 // U+FDF6
1598 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1599 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1600 "\xef\xb7\xb6"));
1601
1602 // U+FDF7
1603 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1604 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1605 "\xef\xb7\xb7"));
1606
1607 // U+FDF8
1608 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1609 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1610 "\xef\xb7\xb8"));
1611
1612 // U+FDF9
1613 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1614 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1615 "\xef\xb7\xb9"));
1616
1617 // U+FDFA
1618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1619 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1620 "\xef\xb7\xba"));
1621
1622 // U+FDFB
1623 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1624 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1625 "\xef\xb7\xbb"));
1626
1627 // U+FDFC
1628 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1629 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1630 "\xef\xb7\xbc"));
1631
1632 // U+FDFD
1633 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1634 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1635 "\xef\xb7\xbd"));
1636
1637 // U+FDFE
1638 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1639 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1640 "\xef\xb7\xbe"));
1641
1642 // U+FDFF
1643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1644 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1645 "\xef\xb7\xbf"));
1646 }
1647
TEST(ConvertUTFTest,UTF8ToUTF32PartialLenient)1648 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1649 // U+0041 LATIN CAPITAL LETTER A
1650 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1651 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1652 "\x41", true));
1653
1654 //
1655 // Sequences with one continuation byte missing
1656 //
1657
1658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1659 ConvertUTFResultContainer(sourceExhausted),
1660 "\xc2", true));
1661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662 ConvertUTFResultContainer(sourceExhausted),
1663 "\xdf", true));
1664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665 ConvertUTFResultContainer(sourceExhausted),
1666 "\xe0\xa0", true));
1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668 ConvertUTFResultContainer(sourceExhausted),
1669 "\xe0\xbf", true));
1670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671 ConvertUTFResultContainer(sourceExhausted),
1672 "\xe1\x80", true));
1673 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1674 ConvertUTFResultContainer(sourceExhausted),
1675 "\xec\xbf", true));
1676 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1677 ConvertUTFResultContainer(sourceExhausted),
1678 "\xed\x80", true));
1679 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1680 ConvertUTFResultContainer(sourceExhausted),
1681 "\xed\x9f", true));
1682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683 ConvertUTFResultContainer(sourceExhausted),
1684 "\xee\x80", true));
1685 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686 ConvertUTFResultContainer(sourceExhausted),
1687 "\xef\xbf", true));
1688 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689 ConvertUTFResultContainer(sourceExhausted),
1690 "\xf0\x90\x80", true));
1691 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692 ConvertUTFResultContainer(sourceExhausted),
1693 "\xf0\xbf\xbf", true));
1694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695 ConvertUTFResultContainer(sourceExhausted),
1696 "\xf1\x80\x80", true));
1697 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698 ConvertUTFResultContainer(sourceExhausted),
1699 "\xf3\xbf\xbf", true));
1700 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701 ConvertUTFResultContainer(sourceExhausted),
1702 "\xf4\x80\x80", true));
1703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704 ConvertUTFResultContainer(sourceExhausted),
1705 "\xf4\x8f\xbf", true));
1706
1707 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1708 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1709 "\x41\xc2", true));
1710 }
1711
1712