1 //===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Support/ConvertUTF.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/Support/Format.h"
13 #include "gtest/gtest.h"
14 #include <string>
15 #include <utility>
16 #include <vector>
17 
18 using namespace llvm;
19 
TEST(ConvertUTFTest,ConvertUTF16LittleEndianToUTF8String)20 TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
21   // Src is the look of disapproval.
22   static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
23   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
24   std::string Result;
25   bool Success = convertUTF16ToUTF8String(Ref, Result);
26   EXPECT_TRUE(Success);
27   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
28   EXPECT_EQ(Expected, Result);
29 }
30 
TEST(ConvertUTFTest,ConvertUTF16BigEndianToUTF8String)31 TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
32   // Src is the look of disapproval.
33   static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
34   ArrayRef<char> Ref(Src, sizeof(Src) - 1);
35   std::string Result;
36   bool Success = convertUTF16ToUTF8String(Ref, Result);
37   EXPECT_TRUE(Success);
38   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
39   EXPECT_EQ(Expected, Result);
40 }
41 
TEST(ConvertUTFTest,ConvertUTF8ToUTF16String)42 TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
43   // Src is the look of disapproval.
44   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
45   StringRef Ref(Src, sizeof(Src) - 1);
46   SmallVector<UTF16, 5> Result;
47   bool Success = convertUTF8ToUTF16String(Ref, Result);
48   EXPECT_TRUE(Success);
49   static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
50   ASSERT_EQ(3u, Result.size());
51   for (int I = 0, E = 3; I != E; ++I)
52     EXPECT_EQ(Expected[I], Result[I]);
53 }
54 
TEST(ConvertUTFTest,OddLengthInput)55 TEST(ConvertUTFTest, OddLengthInput) {
56   std::string Result;
57   bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
58   EXPECT_FALSE(Success);
59 }
60 
TEST(ConvertUTFTest,Empty)61 TEST(ConvertUTFTest, Empty) {
62   std::string Result;
63   bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
64   EXPECT_TRUE(Success);
65   EXPECT_TRUE(Result.empty());
66 }
67 
TEST(ConvertUTFTest,HasUTF16BOM)68 TEST(ConvertUTFTest, HasUTF16BOM) {
69   bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
70   EXPECT_TRUE(HasBOM);
71   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
72   EXPECT_TRUE(HasBOM);
73   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
74   EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
75   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
76   EXPECT_TRUE(HasBOM);
77 
78   HasBOM = hasUTF16ByteOrderMark(None);
79   EXPECT_FALSE(HasBOM);
80   HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
81   EXPECT_FALSE(HasBOM);
82 }
83 
TEST(ConvertUTFTest,UTF16WrappersForConvertUTF16ToUTF8String)84 TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
85   // Src is the look of disapproval.
86   static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
87   ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
88   std::string Result;
89   bool Success = convertUTF16ToUTF8String(SrcRef, Result);
90   EXPECT_TRUE(Success);
91   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
92   EXPECT_EQ(Expected, Result);
93 }
94 
TEST(ConvertUTFTest,ConvertUTF8toWide)95 TEST(ConvertUTFTest, ConvertUTF8toWide) {
96   // Src is the look of disapproval.
97   static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
98   std::wstring Result;
99   bool Success = ConvertUTF8toWide((const char*)Src, Result);
100   EXPECT_TRUE(Success);
101   std::wstring Expected(L"\x0ca0_\x0ca0");
102   EXPECT_EQ(Expected, Result);
103   Result.clear();
104   Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
105   EXPECT_TRUE(Success);
106   EXPECT_EQ(Expected, Result);
107 }
108 
TEST(ConvertUTFTest,convertWideToUTF8)109 TEST(ConvertUTFTest, convertWideToUTF8) {
110   // Src is the look of disapproval.
111   static const wchar_t Src[] = L"\x0ca0_\x0ca0";
112   std::string Result;
113   bool Success = convertWideToUTF8(Src, Result);
114   EXPECT_TRUE(Success);
115   std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
116   EXPECT_EQ(Expected, Result);
117 }
118 
119 struct ConvertUTFResultContainer {
120   ConversionResult ErrorCode;
121   std::vector<unsigned> UnicodeScalars;
122 
ConvertUTFResultContainerConvertUTFResultContainer123   ConvertUTFResultContainer(ConversionResult ErrorCode)
124       : ErrorCode(ErrorCode) {}
125 
126   ConvertUTFResultContainer
withScalarsConvertUTFResultContainer127   withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
128               unsigned US2 = 0x110000, unsigned US3 = 0x110000,
129               unsigned US4 = 0x110000, unsigned US5 = 0x110000,
130               unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
131     ConvertUTFResultContainer Result(*this);
132     if (US0 != 0x110000)
133       Result.UnicodeScalars.push_back(US0);
134     if (US1 != 0x110000)
135       Result.UnicodeScalars.push_back(US1);
136     if (US2 != 0x110000)
137       Result.UnicodeScalars.push_back(US2);
138     if (US3 != 0x110000)
139       Result.UnicodeScalars.push_back(US3);
140     if (US4 != 0x110000)
141       Result.UnicodeScalars.push_back(US4);
142     if (US5 != 0x110000)
143       Result.UnicodeScalars.push_back(US5);
144     if (US6 != 0x110000)
145       Result.UnicodeScalars.push_back(US6);
146     if (US7 != 0x110000)
147       Result.UnicodeScalars.push_back(US7);
148     return Result;
149   }
150 };
151 
152 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsLenient(StringRef S)153 ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
154   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
155 
156   const UTF8 *SourceNext = SourceStart;
157   std::vector<UTF32> Decoded(S.size(), 0);
158   UTF32 *TargetStart = Decoded.data();
159 
160   auto ErrorCode =
161       ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
162                          Decoded.data() + Decoded.size(), lenientConversion);
163 
164   Decoded.resize(TargetStart - Decoded.data());
165 
166   return std::make_pair(ErrorCode, Decoded);
167 }
168 
169 std::pair<ConversionResult, std::vector<unsigned>>
ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S)170 ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
171   const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
172 
173   const UTF8 *SourceNext = SourceStart;
174   std::vector<UTF32> Decoded(S.size(), 0);
175   UTF32 *TargetStart = Decoded.data();
176 
177   auto ErrorCode = ConvertUTF8toUTF32Partial(
178       &SourceNext, SourceStart + S.size(), &TargetStart,
179       Decoded.data() + Decoded.size(), lenientConversion);
180 
181   Decoded.resize(TargetStart - Decoded.data());
182 
183   return std::make_pair(ErrorCode, Decoded);
184 }
185 
186 ::testing::AssertionResult
CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,StringRef S,bool Partial=false)187 CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
188                                  StringRef S, bool Partial = false) {
189   ConversionResult ErrorCode;
190   std::vector<unsigned> Decoded;
191   if (!Partial)
192     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
193   else
194     std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
195 
196   if (Expected.ErrorCode != ErrorCode)
197     return ::testing::AssertionFailure() << "Expected error code "
198                                          << Expected.ErrorCode << ", actual "
199                                          << ErrorCode;
200 
201   if (Expected.UnicodeScalars != Decoded)
202     return ::testing::AssertionFailure()
203            << "Expected lenient decoded result:\n"
204            << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
205            << "Actual result:\n" << ::testing::PrintToString(Decoded);
206 
207   return ::testing::AssertionSuccess();
208 }
209 
TEST(ConvertUTFTest,UTF8ToUTF32Lenient)210 TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
211 
212   //
213   // 1-byte sequences
214   //
215 
216   // U+0041 LATIN CAPITAL LETTER A
217   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
218       ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
219 
220   //
221   // 2-byte sequences
222   //
223 
224   // U+0283 LATIN SMALL LETTER ESH
225   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
226       ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
227       "\xca\x83"));
228 
229   // U+03BA GREEK SMALL LETTER KAPPA
230   // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
231   // U+03C3 GREEK SMALL LETTER SIGMA
232   // U+03BC GREEK SMALL LETTER MU
233   // U+03B5 GREEK SMALL LETTER EPSILON
234   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
235       ConvertUTFResultContainer(conversionOK)
236           .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
237       "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
238 
239   //
240   // 3-byte sequences
241   //
242 
243   // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
244   // U+6587 CJK UNIFIED IDEOGRAPH-6587
245   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
246       ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
247       "\xe4\xbe\x8b\xe6\x96\x87"));
248 
249   // U+D55C HANGUL SYLLABLE HAN
250   // U+AE00 HANGUL SYLLABLE GEUL
251   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
252       ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
253       "\xed\x95\x9c\xea\xb8\x80"));
254 
255   // U+1112 HANGUL CHOSEONG HIEUH
256   // U+1161 HANGUL JUNGSEONG A
257   // U+11AB HANGUL JONGSEONG NIEUN
258   // U+1100 HANGUL CHOSEONG KIYEOK
259   // U+1173 HANGUL JUNGSEONG EU
260   // U+11AF HANGUL JONGSEONG RIEUL
261   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
262       ConvertUTFResultContainer(conversionOK)
263           .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
264       "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
265       "\xe1\x86\xaf"));
266 
267   //
268   // 4-byte sequences
269   //
270 
271   // U+E0100 VARIATION SELECTOR-17
272   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
273       ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
274       "\xf3\xa0\x84\x80"));
275 
276   //
277   // First possible sequence of a certain length
278   //
279 
280   // U+0000 NULL
281   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
283       StringRef("\x00", 1)));
284 
285   // U+0080 PADDING CHARACTER
286   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
287       ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
288       "\xc2\x80"));
289 
290   // U+0800 SAMARITAN LETTER ALAF
291   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
292       ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
293       "\xe0\xa0\x80"));
294 
295   // U+10000 LINEAR B SYLLABLE B008 A
296   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
297       ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
298       "\xf0\x90\x80\x80"));
299 
300   // U+200000 (invalid)
301   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
302       ConvertUTFResultContainer(sourceIllegal)
303           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
304       "\xf8\x88\x80\x80\x80"));
305 
306   // U+4000000 (invalid)
307   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308       ConvertUTFResultContainer(sourceIllegal)
309           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
310       "\xfc\x84\x80\x80\x80\x80"));
311 
312   //
313   // Last possible sequence of a certain length
314   //
315 
316   // U+007F DELETE
317   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318       ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
319 
320   // U+07FF (unassigned)
321   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
322       ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
323       "\xdf\xbf"));
324 
325   // U+FFFF (noncharacter)
326   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
327       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
328       "\xef\xbf\xbf"));
329 
330   // U+1FFFFF (invalid)
331   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
332       ConvertUTFResultContainer(sourceIllegal)
333           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
334       "\xf7\xbf\xbf\xbf"));
335 
336   // U+3FFFFFF (invalid)
337   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338       ConvertUTFResultContainer(sourceIllegal)
339           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
340       "\xfb\xbf\xbf\xbf\xbf"));
341 
342   // U+7FFFFFFF (invalid)
343   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
344       ConvertUTFResultContainer(sourceIllegal)
345           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
346       "\xfd\xbf\xbf\xbf\xbf\xbf"));
347 
348   //
349   // Other boundary conditions
350   //
351 
352   // U+D7FF (unassigned)
353   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
354       ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
355       "\xed\x9f\xbf"));
356 
357   // U+E000 (private use)
358   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
359       ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
360       "\xee\x80\x80"));
361 
362   // U+FFFD REPLACEMENT CHARACTER
363   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
364       ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
365       "\xef\xbf\xbd"));
366 
367   // U+10FFFF (noncharacter)
368   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
369       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
370       "\xf4\x8f\xbf\xbf"));
371 
372   // U+110000 (invalid)
373   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
374       ConvertUTFResultContainer(sourceIllegal)
375           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
376       "\xf4\x90\x80\x80"));
377 
378   //
379   // Unexpected continuation bytes
380   //
381 
382   // A sequence of unexpected continuation bytes that don't follow a first
383   // byte, every byte is a maximal subpart.
384 
385   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
386       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
387   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
388       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
389   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
390       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
391       "\x80\x80"));
392   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
393       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
394       "\x80\xbf"));
395   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
396       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
397       "\xbf\x80"));
398   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
399       ConvertUTFResultContainer(sourceIllegal)
400           .withScalars(0xfffd, 0xfffd, 0xfffd),
401       "\x80\xbf\x80"));
402   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
403       ConvertUTFResultContainer(sourceIllegal)
404           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
405       "\x80\xbf\x80\xbf"));
406   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
407       ConvertUTFResultContainer(sourceIllegal)
408           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
409       "\x80\xbf\x82\xbf\xaa"));
410   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
411       ConvertUTFResultContainer(sourceIllegal)
412           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
413       "\xaa\xb0\xbb\xbf\xaa\xa0"));
414   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
415       ConvertUTFResultContainer(sourceIllegal)
416           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
417       "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
418 
419   // All continuation bytes (0x80--0xbf).
420   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
421       ConvertUTFResultContainer(sourceIllegal)
422           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
423                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
424           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
425                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
426           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
427                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
428           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
429                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
430           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
431                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
432           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
433                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
434           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
435                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
436           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
437                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
438       "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
439       "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
440       "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
441       "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
442 
443   //
444   // Lonely start bytes
445   //
446 
447   // Start bytes of 2-byte sequences (0xc0--0xdf).
448   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
449       ConvertUTFResultContainer(sourceIllegal)
450           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
451                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
452           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
453                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
454           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
455                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
456           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
457                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
458       "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
459       "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
460 
461   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
462       ConvertUTFResultContainer(sourceIllegal)
463           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464                        0xfffd, 0x0020, 0xfffd, 0x0020)
465           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466                        0xfffd, 0x0020, 0xfffd, 0x0020)
467           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
468                        0xfffd, 0x0020, 0xfffd, 0x0020)
469           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
470                        0xfffd, 0x0020, 0xfffd, 0x0020)
471           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
472                        0xfffd, 0x0020, 0xfffd, 0x0020)
473           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
474                        0xfffd, 0x0020, 0xfffd, 0x0020)
475           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
476                        0xfffd, 0x0020, 0xfffd, 0x0020)
477           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
478                        0xfffd, 0x0020, 0xfffd, 0x0020),
479       "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
480       "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
481       "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
482       "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
483 
484   // Start bytes of 3-byte sequences (0xe0--0xef).
485   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
486       ConvertUTFResultContainer(sourceIllegal)
487           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
488                        0xfffd, 0xfffd, 0xfffd, 0xfffd)
489           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
490                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
491       "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
492 
493   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
494       ConvertUTFResultContainer(sourceIllegal)
495           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
496                        0xfffd, 0x0020, 0xfffd, 0x0020)
497           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
498                        0xfffd, 0x0020, 0xfffd, 0x0020)
499           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
500                        0xfffd, 0x0020, 0xfffd, 0x0020)
501           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
502                        0xfffd, 0x0020, 0xfffd, 0x0020),
503       "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
504       "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
505 
506   // Start bytes of 4-byte sequences (0xf0--0xf7).
507   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
508       ConvertUTFResultContainer(sourceIllegal)
509           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
510                        0xfffd, 0xfffd, 0xfffd, 0xfffd),
511       "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
512 
513   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
514       ConvertUTFResultContainer(sourceIllegal)
515           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
516                        0xfffd, 0x0020, 0xfffd, 0x0020)
517           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
518                        0xfffd, 0x0020, 0xfffd, 0x0020),
519       "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
520 
521   // Start bytes of 5-byte sequences (0xf8--0xfb).
522   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
523       ConvertUTFResultContainer(sourceIllegal)
524           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
525       "\xf8\xf9\xfa\xfb"));
526 
527   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
528       ConvertUTFResultContainer(sourceIllegal)
529           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
530                        0xfffd, 0x0020, 0xfffd, 0x0020),
531       "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
532 
533   // Start bytes of 6-byte sequences (0xfc--0xfd).
534   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
535       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
536       "\xfc\xfd"));
537 
538   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
539       ConvertUTFResultContainer(sourceIllegal)
540           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
541       "\xfc\x20\xfd\x20"));
542 
543   //
544   // Other bytes (0xc0--0xc1, 0xfe--0xff).
545   //
546 
547   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
548       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
549   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
550       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
551   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
552       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
553   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
554       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
555 
556   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
557       ConvertUTFResultContainer(sourceIllegal)
558           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
559       "\xc0\xc1\xfe\xff"));
560 
561   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
562       ConvertUTFResultContainer(sourceIllegal)
563           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
564       "\xfe\xfe\xff\xff"));
565 
566   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
567       ConvertUTFResultContainer(sourceIllegal)
568           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
569       "\xfe\x80\x80\x80\x80\x80"));
570 
571   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
572       ConvertUTFResultContainer(sourceIllegal)
573           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
574       "\xff\x80\x80\x80\x80\x80"));
575 
576   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
577       ConvertUTFResultContainer(sourceIllegal)
578           .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
579                        0xfffd, 0x0020, 0xfffd, 0x0020),
580       "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
581 
582   //
583   // Sequences with one continuation byte missing
584   //
585 
586   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
587       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
588   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
589       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
590   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592       "\xe0\xa0"));
593   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595       "\xe0\xbf"));
596   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
597       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
598       "\xe1\x80"));
599   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
600       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
601       "\xec\xbf"));
602   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
603       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
604       "\xed\x80"));
605   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
606       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
607       "\xed\x9f"));
608   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
609       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
610       "\xee\x80"));
611   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
612       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
613       "\xef\xbf"));
614   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
616       "\xf0\x90\x80"));
617   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
618       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
619       "\xf0\xbf\xbf"));
620   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
621       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
622       "\xf1\x80\x80"));
623   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
624       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
625       "\xf3\xbf\xbf"));
626   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
627       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
628       "\xf4\x80\x80"));
629   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
631       "\xf4\x8f\xbf"));
632 
633   // Overlong sequences with one trailing byte missing.
634   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
635       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
636       "\xc0"));
637   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
638       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
639       "\xc1"));
640   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
641       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
642       "\xe0\x80"));
643   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
644       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
645       "\xe0\x9f"));
646   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
647       ConvertUTFResultContainer(sourceIllegal)
648           .withScalars(0xfffd, 0xfffd, 0xfffd),
649       "\xf0\x80\x80"));
650   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
651       ConvertUTFResultContainer(sourceIllegal)
652           .withScalars(0xfffd, 0xfffd, 0xfffd),
653       "\xf0\x8f\x80"));
654   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
655       ConvertUTFResultContainer(sourceIllegal)
656           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
657       "\xf8\x80\x80\x80"));
658   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
659       ConvertUTFResultContainer(sourceIllegal)
660           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
661       "\xfc\x80\x80\x80\x80"));
662 
663   // Sequences that represent surrogates with one trailing byte missing.
664   // High surrogates
665   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
667       "\xed\xa0"));
668   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
669       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
670       "\xed\xac"));
671   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
672       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
673       "\xed\xaf"));
674   // Low surrogates
675   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
676       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
677       "\xed\xb0"));
678   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
679       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
680       "\xed\xb4"));
681   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
682       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
683       "\xed\xbf"));
684 
685   // Ill-formed 4-byte sequences.
686   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
687   // U+1100xx (invalid)
688   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
689       ConvertUTFResultContainer(sourceIllegal)
690           .withScalars(0xfffd, 0xfffd, 0xfffd),
691       "\xf4\x90\x80"));
692   // U+13FBxx (invalid)
693   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
694       ConvertUTFResultContainer(sourceIllegal)
695           .withScalars(0xfffd, 0xfffd, 0xfffd),
696       "\xf4\xbf\xbf"));
697   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
698       ConvertUTFResultContainer(sourceIllegal)
699           .withScalars(0xfffd, 0xfffd, 0xfffd),
700       "\xf5\x80\x80"));
701   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
702       ConvertUTFResultContainer(sourceIllegal)
703           .withScalars(0xfffd, 0xfffd, 0xfffd),
704       "\xf6\x80\x80"));
705   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
706       ConvertUTFResultContainer(sourceIllegal)
707           .withScalars(0xfffd, 0xfffd, 0xfffd),
708       "\xf7\x80\x80"));
709   // U+1FFBxx (invalid)
710   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
711       ConvertUTFResultContainer(sourceIllegal)
712           .withScalars(0xfffd, 0xfffd, 0xfffd),
713       "\xf7\xbf\xbf"));
714 
715   // Ill-formed 5-byte sequences.
716   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
717   // U+2000xx (invalid)
718   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
719       ConvertUTFResultContainer(sourceIllegal)
720           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
721       "\xf8\x88\x80\x80"));
722   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
723       ConvertUTFResultContainer(sourceIllegal)
724           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
725       "\xf8\xbf\xbf\xbf"));
726   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
727       ConvertUTFResultContainer(sourceIllegal)
728           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
729       "\xf9\x80\x80\x80"));
730   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
731       ConvertUTFResultContainer(sourceIllegal)
732           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
733       "\xfa\x80\x80\x80"));
734   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
735       ConvertUTFResultContainer(sourceIllegal)
736           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
737       "\xfb\x80\x80\x80"));
738   // U+3FFFFxx (invalid)
739   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740       ConvertUTFResultContainer(sourceIllegal)
741           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
742       "\xfb\xbf\xbf\xbf"));
743 
744   // Ill-formed 6-byte sequences.
745   // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
746   // U+40000xx (invalid)
747   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
748       ConvertUTFResultContainer(sourceIllegal)
749           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
750       "\xfc\x84\x80\x80\x80"));
751   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
752       ConvertUTFResultContainer(sourceIllegal)
753           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
754       "\xfc\xbf\xbf\xbf\xbf"));
755   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756       ConvertUTFResultContainer(sourceIllegal)
757           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
758       "\xfd\x80\x80\x80\x80"));
759   // U+7FFFFFxx (invalid)
760   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
761       ConvertUTFResultContainer(sourceIllegal)
762           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
763       "\xfd\xbf\xbf\xbf\xbf"));
764 
765   //
766   // Sequences with two continuation bytes missing
767   //
768 
769   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
770       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
771       "\xf0\x90"));
772   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
773       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
774       "\xf0\xbf"));
775   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
776       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
777       "\xf1\x80"));
778   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
779       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
780       "\xf3\xbf"));
781   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
783       "\xf4\x80"));
784   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
785       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
786       "\xf4\x8f"));
787 
788   // Overlong sequences with two trailing byte missing.
789   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
790       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
791   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
792       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
793       "\xf0\x80"));
794   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
796       "\xf0\x8f"));
797   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
798       ConvertUTFResultContainer(sourceIllegal)
799           .withScalars(0xfffd, 0xfffd, 0xfffd),
800       "\xf8\x80\x80"));
801   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
802       ConvertUTFResultContainer(sourceIllegal)
803           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
804       "\xfc\x80\x80\x80"));
805 
806   // Sequences that represent surrogates with two trailing bytes missing.
807   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
808       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
809 
810   // Ill-formed 4-byte sequences.
811   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
812   // U+110yxx (invalid)
813   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
815       "\xf4\x90"));
816   // U+13Fyxx (invalid)
817   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
819       "\xf4\xbf"));
820   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
821       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
822       "\xf5\x80"));
823   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
824       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
825       "\xf6\x80"));
826   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
827       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
828       "\xf7\x80"));
829   // U+1FFyxx (invalid)
830   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
832       "\xf7\xbf"));
833 
834   // Ill-formed 5-byte sequences.
835   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
836   // U+200yxx (invalid)
837   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
838       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
839       "\xf8\x88\x80"));
840   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
841       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
842       "\xf8\xbf\xbf"));
843   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
844       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
845       "\xf9\x80\x80"));
846   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
848       "\xfa\x80\x80"));
849   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
850       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
851       "\xfb\x80\x80"));
852   // U+3FFFyxx (invalid)
853   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
854       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
855       "\xfb\xbf\xbf"));
856 
857   // Ill-formed 6-byte sequences.
858   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
859   // U+4000yxx (invalid)
860   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
861       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
862       "\xfc\x84\x80\x80"));
863   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
864       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
865       "\xfc\xbf\xbf\xbf"));
866   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
868       "\xfd\x80\x80\x80"));
869   // U+7FFFFyxx (invalid)
870   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
871       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
872       "\xfd\xbf\xbf\xbf"));
873 
874   //
875   // Sequences with three continuation bytes missing
876   //
877 
878   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
879       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
880   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
882   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
883       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
884   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
885       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
886   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
888 
889   // Broken overlong sequences.
890   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
891       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
892   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
893       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
894       "\xf8\x80"));
895   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
896       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
897       "\xfc\x80\x80"));
898 
899   // Ill-formed 4-byte sequences.
900   // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
901   // U+14yyxx (invalid)
902   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
903       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
904   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
905       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
906   // U+1Cyyxx (invalid)
907   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
908       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
909 
910   // Ill-formed 5-byte sequences.
911   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
912   // U+20yyxx (invalid)
913   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
914       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
915       "\xf8\x88"));
916   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
917       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
918       "\xf8\xbf"));
919   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
920       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
921       "\xf9\x80"));
922   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
923       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
924       "\xfa\x80"));
925   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
927       "\xfb\x80"));
928   // U+3FCyyxx (invalid)
929   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
930       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
931       "\xfb\xbf"));
932 
933   // Ill-formed 6-byte sequences.
934   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
935   // U+400yyxx (invalid)
936   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
938       "\xfc\x84\x80"));
939   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
940       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
941       "\xfc\xbf\xbf"));
942   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
943       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
944       "\xfd\x80\x80"));
945   // U+7FFCyyxx (invalid)
946   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
948       "\xfd\xbf\xbf"));
949 
950   //
951   // Sequences with four continuation bytes missing
952   //
953 
954   // Ill-formed 5-byte sequences.
955   // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
956   // U+uzyyxx (invalid)
957   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
958       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
959   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
960       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
961   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
962       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
963   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
964       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
965   // U+3zyyxx (invalid)
966   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
967       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
968 
969   // Broken overlong sequences.
970   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
971       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
972   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
973       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
974       "\xfc\x80"));
975 
976   // Ill-formed 6-byte sequences.
977   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
978   // U+uzzyyxx (invalid)
979   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
980       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
981       "\xfc\x84"));
982   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
983       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
984       "\xfc\xbf"));
985   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
986       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
987       "\xfd\x80"));
988   // U+7Fzzyyxx (invalid)
989   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
990       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
991       "\xfd\xbf"));
992 
993   //
994   // Sequences with five continuation bytes missing
995   //
996 
997   // Ill-formed 6-byte sequences.
998   // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
999   // U+uzzyyxx (invalid)
1000   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1001       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
1002   // U+uuzzyyxx (invalid)
1003   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1004       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
1005 
1006   //
1007   // Consecutive sequences with trailing bytes missing
1008   //
1009 
1010   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1011       ConvertUTFResultContainer(sourceIllegal)
1012           .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1013           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1015           .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1016           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1017           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1018       "\xc0" "\xe0\x80" "\xf0\x80\x80"
1019       "\xf8\x80\x80\x80"
1020       "\xfc\x80\x80\x80\x80"
1021       "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1022       "\xfb\xbf\xbf\xbf"
1023       "\xfd\xbf\xbf\xbf\xbf"));
1024 
1025   //
1026   // Overlong UTF-8 sequences
1027   //
1028 
1029   // U+002F SOLIDUS
1030   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1031       ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
1032 
1033   // Overlong sequences of the above.
1034   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1035       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1036       "\xc0\xaf"));
1037   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1038       ConvertUTFResultContainer(sourceIllegal)
1039           .withScalars(0xfffd, 0xfffd, 0xfffd),
1040       "\xe0\x80\xaf"));
1041   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1042       ConvertUTFResultContainer(sourceIllegal)
1043           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1044       "\xf0\x80\x80\xaf"));
1045   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1046       ConvertUTFResultContainer(sourceIllegal)
1047           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1048       "\xf8\x80\x80\x80\xaf"));
1049   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1050       ConvertUTFResultContainer(sourceIllegal)
1051           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1052       "\xfc\x80\x80\x80\x80\xaf"));
1053 
1054   // U+0000 NULL
1055   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1056       ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1057       StringRef("\x00", 1)));
1058 
1059   // Overlong sequences of the above.
1060   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1061       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1062       "\xc0\x80"));
1063   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1064       ConvertUTFResultContainer(sourceIllegal)
1065           .withScalars(0xfffd, 0xfffd, 0xfffd),
1066       "\xe0\x80\x80"));
1067   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1068       ConvertUTFResultContainer(sourceIllegal)
1069           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1070       "\xf0\x80\x80\x80"));
1071   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1072       ConvertUTFResultContainer(sourceIllegal)
1073           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1074       "\xf8\x80\x80\x80\x80"));
1075   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1076       ConvertUTFResultContainer(sourceIllegal)
1077           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1078       "\xfc\x80\x80\x80\x80\x80"));
1079 
1080   // Other overlong sequences.
1081   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1082       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1083       "\xc0\xbf"));
1084   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1085       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1086       "\xc1\x80"));
1087   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1088       ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1089       "\xc1\xbf"));
1090   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1091       ConvertUTFResultContainer(sourceIllegal)
1092           .withScalars(0xfffd, 0xfffd, 0xfffd),
1093       "\xe0\x9f\xbf"));
1094   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1095       ConvertUTFResultContainer(sourceIllegal)
1096           .withScalars(0xfffd, 0xfffd, 0xfffd),
1097       "\xed\xa0\x80"));
1098   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1099       ConvertUTFResultContainer(sourceIllegal)
1100           .withScalars(0xfffd, 0xfffd, 0xfffd),
1101       "\xed\xbf\xbf"));
1102   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1103       ConvertUTFResultContainer(sourceIllegal)
1104           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1105       "\xf0\x8f\x80\x80"));
1106   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107       ConvertUTFResultContainer(sourceIllegal)
1108           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1109       "\xf0\x8f\xbf\xbf"));
1110   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1111       ConvertUTFResultContainer(sourceIllegal)
1112           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1113       "\xf8\x87\xbf\xbf\xbf"));
1114   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1115       ConvertUTFResultContainer(sourceIllegal)
1116           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1117       "\xfc\x83\xbf\xbf\xbf\xbf"));
1118 
1119   //
1120   // Isolated surrogates
1121   //
1122 
1123   // Unicode 6.3.0:
1124   //
1125   //    D71.  High-surrogate code point: A Unicode code point in the range
1126   //    U+D800 to U+DBFF.
1127   //
1128   //    D73.  Low-surrogate code point: A Unicode code point in the range
1129   //    U+DC00 to U+DFFF.
1130 
1131   // Note: U+E0100 is <DB40 DD00> in UTF16.
1132 
1133   // High surrogates
1134 
1135   // U+D800
1136   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1137       ConvertUTFResultContainer(sourceIllegal)
1138           .withScalars(0xfffd, 0xfffd, 0xfffd),
1139       "\xed\xa0\x80"));
1140 
1141   // U+DB40
1142   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1143       ConvertUTFResultContainer(sourceIllegal)
1144           .withScalars(0xfffd, 0xfffd, 0xfffd),
1145       "\xed\xac\xa0"));
1146 
1147   // U+DBFF
1148   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1149       ConvertUTFResultContainer(sourceIllegal)
1150           .withScalars(0xfffd, 0xfffd, 0xfffd),
1151       "\xed\xaf\xbf"));
1152 
1153   // Low surrogates
1154 
1155   // U+DC00
1156   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1157       ConvertUTFResultContainer(sourceIllegal)
1158           .withScalars(0xfffd, 0xfffd, 0xfffd),
1159       "\xed\xb0\x80"));
1160 
1161   // U+DD00
1162   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1163       ConvertUTFResultContainer(sourceIllegal)
1164           .withScalars(0xfffd, 0xfffd, 0xfffd),
1165       "\xed\xb4\x80"));
1166 
1167   // U+DFFF
1168   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1169       ConvertUTFResultContainer(sourceIllegal)
1170           .withScalars(0xfffd, 0xfffd, 0xfffd),
1171       "\xed\xbf\xbf"));
1172 
1173   // Surrogate pairs
1174 
1175   // U+D800 U+DC00
1176   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1177       ConvertUTFResultContainer(sourceIllegal)
1178           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1179       "\xed\xa0\x80\xed\xb0\x80"));
1180 
1181   // U+D800 U+DD00
1182   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1183       ConvertUTFResultContainer(sourceIllegal)
1184           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1185       "\xed\xa0\x80\xed\xb4\x80"));
1186 
1187   // U+D800 U+DFFF
1188   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1189       ConvertUTFResultContainer(sourceIllegal)
1190           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1191       "\xed\xa0\x80\xed\xbf\xbf"));
1192 
1193   // U+DB40 U+DC00
1194   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1195       ConvertUTFResultContainer(sourceIllegal)
1196           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1197       "\xed\xac\xa0\xed\xb0\x80"));
1198 
1199   // U+DB40 U+DD00
1200   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1201       ConvertUTFResultContainer(sourceIllegal)
1202           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1203       "\xed\xac\xa0\xed\xb4\x80"));
1204 
1205   // U+DB40 U+DFFF
1206   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1207       ConvertUTFResultContainer(sourceIllegal)
1208           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1209       "\xed\xac\xa0\xed\xbf\xbf"));
1210 
1211   // U+DBFF U+DC00
1212   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1213       ConvertUTFResultContainer(sourceIllegal)
1214           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1215       "\xed\xaf\xbf\xed\xb0\x80"));
1216 
1217   // U+DBFF U+DD00
1218   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1219       ConvertUTFResultContainer(sourceIllegal)
1220           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1221       "\xed\xaf\xbf\xed\xb4\x80"));
1222 
1223   // U+DBFF U+DFFF
1224   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1225       ConvertUTFResultContainer(sourceIllegal)
1226           .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1227       "\xed\xaf\xbf\xed\xbf\xbf"));
1228 
1229   //
1230   // Noncharacters
1231   //
1232 
1233   // Unicode 6.3.0:
1234   //
1235   //    D14.  Noncharacter: A code point that is permanently reserved for
1236   //    internal use and that should never be interchanged. Noncharacters
1237   //    consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1238   //    and the values U+FDD0..U+FDEF.
1239 
1240   // U+FFFE
1241   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1242       ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1243       "\xef\xbf\xbe"));
1244 
1245   // U+FFFF
1246   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1247       ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1248       "\xef\xbf\xbf"));
1249 
1250   // U+1FFFE
1251   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1252       ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1253       "\xf0\x9f\xbf\xbe"));
1254 
1255   // U+1FFFF
1256   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1257       ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1258       "\xf0\x9f\xbf\xbf"));
1259 
1260   // U+2FFFE
1261   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1262       ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1263       "\xf0\xaf\xbf\xbe"));
1264 
1265   // U+2FFFF
1266   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1267       ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1268       "\xf0\xaf\xbf\xbf"));
1269 
1270   // U+3FFFE
1271   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1272       ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1273       "\xf0\xbf\xbf\xbe"));
1274 
1275   // U+3FFFF
1276   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1277       ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1278       "\xf0\xbf\xbf\xbf"));
1279 
1280   // U+4FFFE
1281   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1282       ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1283       "\xf1\x8f\xbf\xbe"));
1284 
1285   // U+4FFFF
1286   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1287       ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1288       "\xf1\x8f\xbf\xbf"));
1289 
1290   // U+5FFFE
1291   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1292       ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1293       "\xf1\x9f\xbf\xbe"));
1294 
1295   // U+5FFFF
1296   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1297       ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1298       "\xf1\x9f\xbf\xbf"));
1299 
1300   // U+6FFFE
1301   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1302       ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1303       "\xf1\xaf\xbf\xbe"));
1304 
1305   // U+6FFFF
1306   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1307       ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1308       "\xf1\xaf\xbf\xbf"));
1309 
1310   // U+7FFFE
1311   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1312       ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1313       "\xf1\xbf\xbf\xbe"));
1314 
1315   // U+7FFFF
1316   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1317       ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1318       "\xf1\xbf\xbf\xbf"));
1319 
1320   // U+8FFFE
1321   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1322       ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1323       "\xf2\x8f\xbf\xbe"));
1324 
1325   // U+8FFFF
1326   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1327       ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1328       "\xf2\x8f\xbf\xbf"));
1329 
1330   // U+9FFFE
1331   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1332       ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1333       "\xf2\x9f\xbf\xbe"));
1334 
1335   // U+9FFFF
1336   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1337       ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1338       "\xf2\x9f\xbf\xbf"));
1339 
1340   // U+AFFFE
1341   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1342       ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1343       "\xf2\xaf\xbf\xbe"));
1344 
1345   // U+AFFFF
1346   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1347       ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1348       "\xf2\xaf\xbf\xbf"));
1349 
1350   // U+BFFFE
1351   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1352       ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1353       "\xf2\xbf\xbf\xbe"));
1354 
1355   // U+BFFFF
1356   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1357       ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1358       "\xf2\xbf\xbf\xbf"));
1359 
1360   // U+CFFFE
1361   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1362       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1363       "\xf3\x8f\xbf\xbe"));
1364 
1365   // U+CFFFF
1366   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1367       ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1368       "\xf3\x8f\xbf\xbf"));
1369 
1370   // U+DFFFE
1371   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1372       ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1373       "\xf3\x9f\xbf\xbe"));
1374 
1375   // U+DFFFF
1376   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1377       ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1378       "\xf3\x9f\xbf\xbf"));
1379 
1380   // U+EFFFE
1381   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1382       ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1383       "\xf3\xaf\xbf\xbe"));
1384 
1385   // U+EFFFF
1386   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1387       ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1388       "\xf3\xaf\xbf\xbf"));
1389 
1390   // U+FFFFE
1391   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1392       ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1393       "\xf3\xbf\xbf\xbe"));
1394 
1395   // U+FFFFF
1396   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1397       ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1398       "\xf3\xbf\xbf\xbf"));
1399 
1400   // U+10FFFE
1401   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1402       ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1403       "\xf4\x8f\xbf\xbe"));
1404 
1405   // U+10FFFF
1406   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1407       ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1408       "\xf4\x8f\xbf\xbf"));
1409 
1410   // U+FDD0
1411   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1412       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1413       "\xef\xb7\x90"));
1414 
1415   // U+FDD1
1416   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1417       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1418       "\xef\xb7\x91"));
1419 
1420   // U+FDD2
1421   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1422       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1423       "\xef\xb7\x92"));
1424 
1425   // U+FDD3
1426   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1427       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1428       "\xef\xb7\x93"));
1429 
1430   // U+FDD4
1431   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1432       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1433       "\xef\xb7\x94"));
1434 
1435   // U+FDD5
1436   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1437       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1438       "\xef\xb7\x95"));
1439 
1440   // U+FDD6
1441   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1442       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1443       "\xef\xb7\x96"));
1444 
1445   // U+FDD7
1446   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1447       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1448       "\xef\xb7\x97"));
1449 
1450   // U+FDD8
1451   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1452       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1453       "\xef\xb7\x98"));
1454 
1455   // U+FDD9
1456   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1457       ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1458       "\xef\xb7\x99"));
1459 
1460   // U+FDDA
1461   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1462       ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1463       "\xef\xb7\x9a"));
1464 
1465   // U+FDDB
1466   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1467       ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1468       "\xef\xb7\x9b"));
1469 
1470   // U+FDDC
1471   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1472       ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1473       "\xef\xb7\x9c"));
1474 
1475   // U+FDDD
1476   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1477       ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1478       "\xef\xb7\x9d"));
1479 
1480   // U+FDDE
1481   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1482       ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1483       "\xef\xb7\x9e"));
1484 
1485   // U+FDDF
1486   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1487       ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1488       "\xef\xb7\x9f"));
1489 
1490   // U+FDE0
1491   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1492       ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1493       "\xef\xb7\xa0"));
1494 
1495   // U+FDE1
1496   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1497       ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1498       "\xef\xb7\xa1"));
1499 
1500   // U+FDE2
1501   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1502       ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1503       "\xef\xb7\xa2"));
1504 
1505   // U+FDE3
1506   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1507       ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1508       "\xef\xb7\xa3"));
1509 
1510   // U+FDE4
1511   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1512       ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1513       "\xef\xb7\xa4"));
1514 
1515   // U+FDE5
1516   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1517       ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1518       "\xef\xb7\xa5"));
1519 
1520   // U+FDE6
1521   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1522       ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1523       "\xef\xb7\xa6"));
1524 
1525   // U+FDE7
1526   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1527       ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1528       "\xef\xb7\xa7"));
1529 
1530   // U+FDE8
1531   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1532       ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1533       "\xef\xb7\xa8"));
1534 
1535   // U+FDE9
1536   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1537       ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1538       "\xef\xb7\xa9"));
1539 
1540   // U+FDEA
1541   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1542       ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1543       "\xef\xb7\xaa"));
1544 
1545   // U+FDEB
1546   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1547       ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1548       "\xef\xb7\xab"));
1549 
1550   // U+FDEC
1551   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1552       ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1553       "\xef\xb7\xac"));
1554 
1555   // U+FDED
1556   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1557       ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1558       "\xef\xb7\xad"));
1559 
1560   // U+FDEE
1561   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1562       ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1563       "\xef\xb7\xae"));
1564 
1565   // U+FDEF
1566   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1567       ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1568       "\xef\xb7\xaf"));
1569 
1570   // U+FDF0
1571   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1572       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1573       "\xef\xb7\xb0"));
1574 
1575   // U+FDF1
1576   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1577       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1578       "\xef\xb7\xb1"));
1579 
1580   // U+FDF2
1581   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1582       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1583       "\xef\xb7\xb2"));
1584 
1585   // U+FDF3
1586   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1587       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1588       "\xef\xb7\xb3"));
1589 
1590   // U+FDF4
1591   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1592       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1593       "\xef\xb7\xb4"));
1594 
1595   // U+FDF5
1596   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1597       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1598       "\xef\xb7\xb5"));
1599 
1600   // U+FDF6
1601   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1602       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1603       "\xef\xb7\xb6"));
1604 
1605   // U+FDF7
1606   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1607       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1608       "\xef\xb7\xb7"));
1609 
1610   // U+FDF8
1611   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1612       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1613       "\xef\xb7\xb8"));
1614 
1615   // U+FDF9
1616   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1617       ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1618       "\xef\xb7\xb9"));
1619 
1620   // U+FDFA
1621   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1622       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1623       "\xef\xb7\xba"));
1624 
1625   // U+FDFB
1626   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1627       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1628       "\xef\xb7\xbb"));
1629 
1630   // U+FDFC
1631   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1632       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1633       "\xef\xb7\xbc"));
1634 
1635   // U+FDFD
1636   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1637       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1638       "\xef\xb7\xbd"));
1639 
1640   // U+FDFE
1641   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1642       ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1643       "\xef\xb7\xbe"));
1644 
1645   // U+FDFF
1646   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1647       ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1648       "\xef\xb7\xbf"));
1649 }
1650 
TEST(ConvertUTFTest,UTF8ToUTF32PartialLenient)1651 TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1652   // U+0041 LATIN CAPITAL LETTER A
1653   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1654       ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1655       "\x41", true));
1656 
1657   //
1658   // Sequences with one continuation byte missing
1659   //
1660 
1661   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662       ConvertUTFResultContainer(sourceExhausted),
1663       "\xc2", true));
1664   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665       ConvertUTFResultContainer(sourceExhausted),
1666       "\xdf", true));
1667   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668       ConvertUTFResultContainer(sourceExhausted),
1669       "\xe0\xa0", true));
1670   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671       ConvertUTFResultContainer(sourceExhausted),
1672       "\xe0\xbf", true));
1673   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1674       ConvertUTFResultContainer(sourceExhausted),
1675       "\xe1\x80", true));
1676   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1677       ConvertUTFResultContainer(sourceExhausted),
1678       "\xec\xbf", true));
1679   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1680       ConvertUTFResultContainer(sourceExhausted),
1681       "\xed\x80", true));
1682   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1683       ConvertUTFResultContainer(sourceExhausted),
1684       "\xed\x9f", true));
1685   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1686       ConvertUTFResultContainer(sourceExhausted),
1687       "\xee\x80", true));
1688   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1689       ConvertUTFResultContainer(sourceExhausted),
1690       "\xef\xbf", true));
1691   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1692       ConvertUTFResultContainer(sourceExhausted),
1693       "\xf0\x90\x80", true));
1694   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1695       ConvertUTFResultContainer(sourceExhausted),
1696       "\xf0\xbf\xbf", true));
1697   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1698       ConvertUTFResultContainer(sourceExhausted),
1699       "\xf1\x80\x80", true));
1700   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1701       ConvertUTFResultContainer(sourceExhausted),
1702       "\xf3\xbf\xbf", true));
1703   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1704       ConvertUTFResultContainer(sourceExhausted),
1705       "\xf4\x80\x80", true));
1706   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1707       ConvertUTFResultContainer(sourceExhausted),
1708       "\xf4\x8f\xbf", true));
1709 
1710   EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1711       ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1712       "\x41\xc2", true));
1713 }
1714 
1715