1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 
12 #include <algorithm>
13 
14 #include "base/macros.h"
15 #include "base/strings/string16.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 
20 using ::testing::ElementsAre;
21 
22 namespace base {
23 
24 static const struct trim_case {
25   const wchar_t* input;
26   const TrimPositions positions;
27   const wchar_t* output;
28   const TrimPositions return_value;
29 } trim_cases[] = {
30   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
31   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
32   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
33   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
34   {L"", TRIM_ALL, L"", TRIM_NONE},
35   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
36   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
37   {L"  ", TRIM_ALL, L"", TRIM_ALL},
38   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
39   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
40 };
41 
42 static const struct trim_case_ascii {
43   const char* input;
44   const TrimPositions positions;
45   const char* output;
46   const TrimPositions return_value;
47 } trim_cases_ascii[] = {
48   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
49   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
50   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
51   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
52   {"", TRIM_ALL, "", TRIM_NONE},
53   {"  ", TRIM_LEADING, "", TRIM_LEADING},
54   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
55   {"  ", TRIM_ALL, "", TRIM_ALL},
56   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
57 };
58 
59 namespace {
60 
61 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)62 bool Truncated(const std::string& input,
63                const size_t byte_size,
64                std::string* output) {
65     size_t prev = input.length();
66     TruncateUTF8ToByteSize(input, byte_size, output);
67     return prev != output->length();
68 }
69 
70 }  // namespace
71 
TEST(StringUtilTest,TruncateUTF8ToByteSize)72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
73   std::string output;
74 
75   // Empty strings and invalid byte_size arguments
76   EXPECT_FALSE(Truncated(std::string(), 0, &output));
77   EXPECT_EQ(output, "");
78   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
79   EXPECT_EQ(output, "");
80   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
81   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
82 
83   // Testing the truncation of valid UTF8 correctly
84   EXPECT_TRUE(Truncated("abc", 2, &output));
85   EXPECT_EQ(output, "ab");
86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
88   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
89   EXPECT_EQ(output.compare("\xc2\x81"), 0);
90   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
91   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
92 
93   {
94     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
95     const std::string array_string(array, arraysize(array));
96     EXPECT_TRUE(Truncated(array_string, 4, &output));
97     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
98   }
99 
100   {
101     const char array[] = "\x00\xc2\x81\xc2\x81";
102     const std::string array_string(array, arraysize(array));
103     EXPECT_TRUE(Truncated(array_string, 4, &output));
104     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
105   }
106 
107   // Testing invalid UTF8
108   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
109   EXPECT_EQ(output.compare(""), 0);
110   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
111   EXPECT_EQ(output.compare(""), 0);
112   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
113   EXPECT_EQ(output.compare(""), 0);
114 
115   // Testing invalid UTF8 mixed with valid UTF8
116   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
117   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
118   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
119   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
120   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
121               10, &output));
122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
123   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
124               10, &output));
125   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
126   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
127   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
128 
129   // Overlong sequences
130   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
131   EXPECT_EQ(output.compare(""), 0);
132   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
133   EXPECT_EQ(output.compare(""), 0);
134   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
135   EXPECT_EQ(output.compare(""), 0);
136   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
137   EXPECT_EQ(output.compare(""), 0);
138   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
139   EXPECT_EQ(output.compare(""), 0);
140   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
141   EXPECT_EQ(output.compare(""), 0);
142   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
143   EXPECT_EQ(output.compare(""), 0);
144   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
145   EXPECT_EQ(output.compare(""), 0);
146   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
147   EXPECT_EQ(output.compare(""), 0);
148   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
149   EXPECT_EQ(output.compare(""), 0);
150   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
151   EXPECT_EQ(output.compare(""), 0);
152 
153   // Beyond U+10FFFF (the upper limit of Unicode codespace)
154   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
155   EXPECT_EQ(output.compare(""), 0);
156   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
157   EXPECT_EQ(output.compare(""), 0);
158   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
159   EXPECT_EQ(output.compare(""), 0);
160 
161   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
162   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
163   EXPECT_EQ(output.compare(""), 0);
164   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
165   EXPECT_EQ(output.compare(""), 0);
166 
167   {
168     const char array[] = "\x00\x00\xfe\xff";
169     const std::string array_string(array, arraysize(array));
170     EXPECT_TRUE(Truncated(array_string, 4, &output));
171     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
172   }
173 
174   // Variants on the previous test
175   {
176     const char array[] = "\xff\xfe\x00\x00";
177     const std::string array_string(array, 4);
178     EXPECT_FALSE(Truncated(array_string, 4, &output));
179     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
180   }
181   {
182     const char array[] = "\xff\x00\x00\xfe";
183     const std::string array_string(array, arraysize(array));
184     EXPECT_TRUE(Truncated(array_string, 4, &output));
185     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
186   }
187 
188   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
189   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
190   EXPECT_EQ(output.compare(""), 0);
191   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
192   EXPECT_EQ(output.compare(""), 0);
193   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
194   EXPECT_EQ(output.compare(""), 0);
195   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
196   EXPECT_EQ(output.compare(""), 0);
197   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
198   EXPECT_EQ(output.compare(""), 0);
199 
200   // Strings in legacy encodings that are valid in UTF-8, but
201   // are invalid as UTF-8 in real data.
202   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
203   EXPECT_EQ(output.compare("caf"), 0);
204   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
205   EXPECT_EQ(output.compare(""), 0);
206   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
208   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
209               &output));
210   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211 
212   // Testing using the same string as input and output.
213   EXPECT_FALSE(Truncated(output, 4, &output));
214   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
215   EXPECT_TRUE(Truncated(output, 3, &output));
216   EXPECT_EQ(output.compare("\xa7\x41"), 0);
217 
218   // "abc" with U+201[CD] in windows-125[0-8]
219   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
220   EXPECT_EQ(output.compare("\x93" "abc"), 0);
221 
222   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
223   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
224   EXPECT_EQ(output.compare(""), 0);
225 
226   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
227   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
228   EXPECT_EQ(output.compare(""), 0);
229 }
230 
TEST(StringUtilTest,TrimWhitespace)231 TEST(StringUtilTest, TrimWhitespace) {
232   string16 output;  // Allow contents to carry over to next testcase
233   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
234     const trim_case& value = trim_cases[i];
235     EXPECT_EQ(value.return_value,
236               TrimWhitespace(WideToUTF16(value.input), value.positions,
237                              &output));
238     EXPECT_EQ(WideToUTF16(value.output), output);
239   }
240 
241   // Test that TrimWhitespace() can take the same string for input and output
242   output = ASCIIToUTF16("  This is a test \r\n");
243   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
245 
246   // Once more, but with a string of whitespace
247   output = ASCIIToUTF16("  \r\n");
248   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
249   EXPECT_EQ(string16(), output);
250 
251   std::string output_ascii;
252   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
253     const trim_case_ascii& value = trim_cases_ascii[i];
254     EXPECT_EQ(value.return_value,
255               TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
256     EXPECT_EQ(value.output, output_ascii);
257   }
258 }
259 
260 static const struct collapse_case {
261   const wchar_t* input;
262   const bool trim;
263   const wchar_t* output;
264 } collapse_cases[] = {
265   {L" Google Video ", false, L"Google Video"},
266   {L"Google Video", false, L"Google Video"},
267   {L"", false, L""},
268   {L"  ", false, L""},
269   {L"\t\rTest String\n", false, L"Test String"},
270   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
271   {L"    Test     \n  \t String    ", false, L"Test String"},
272   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
273   {L"   Test String", false, L"Test String"},
274   {L"Test String    ", false, L"Test String"},
275   {L"Test String", false, L"Test String"},
276   {L"", true, L""},
277   {L"\n", true, L""},
278   {L"  \r  ", true, L""},
279   {L"\nFoo", true, L"Foo"},
280   {L"\r  Foo  ", true, L"Foo"},
281   {L" Foo bar ", true, L"Foo bar"},
282   {L"  \tFoo  bar  \n", true, L"Foo bar"},
283   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
284 };
285 
TEST(StringUtilTest,CollapseWhitespace)286 TEST(StringUtilTest, CollapseWhitespace) {
287   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
288     const collapse_case& value = collapse_cases[i];
289     EXPECT_EQ(WideToUTF16(value.output),
290               CollapseWhitespace(WideToUTF16(value.input), value.trim));
291   }
292 }
293 
294 static const struct collapse_case_ascii {
295   const char* input;
296   const bool trim;
297   const char* output;
298 } collapse_cases_ascii[] = {
299   {" Google Video ", false, "Google Video"},
300   {"Google Video", false, "Google Video"},
301   {"", false, ""},
302   {"  ", false, ""},
303   {"\t\rTest String\n", false, "Test String"},
304   {"    Test     \n  \t String    ", false, "Test String"},
305   {"   Test String", false, "Test String"},
306   {"Test String    ", false, "Test String"},
307   {"Test String", false, "Test String"},
308   {"", true, ""},
309   {"\n", true, ""},
310   {"  \r  ", true, ""},
311   {"\nFoo", true, "Foo"},
312   {"\r  Foo  ", true, "Foo"},
313   {" Foo bar ", true, "Foo bar"},
314   {"  \tFoo  bar  \n", true, "Foo bar"},
315   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
316 };
317 
TEST(StringUtilTest,CollapseWhitespaceASCII)318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
319   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
320     const collapse_case_ascii& value = collapse_cases_ascii[i];
321     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
322   }
323 }
324 
TEST(StringUtilTest,IsStringUTF8)325 TEST(StringUtilTest, IsStringUTF8) {
326   EXPECT_TRUE(IsStringUTF8("abc"));
327   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
328   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
329   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
330   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
331   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
332 
333   // surrogate code points
334   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
335   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
336   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
337 
338   // overlong sequences
339   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
340   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
341   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
342   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
343   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
345   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
346   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
347   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
348   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
349   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
350 
351   // Beyond U+10FFFF (the upper limit of Unicode codespace)
352   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
353   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
354   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
355 
356   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
357   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
358   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
359   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
360   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
361 
362   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
363   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
364   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
365   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
366   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
367   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
368   // Strings in legacy encodings. We can certainly make up strings
369   // in a legacy encoding that are valid in UTF-8, but in real data,
370   // most of them are invalid as UTF-8.
371   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
372   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
373   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
374   // "abc" with U+201[CD] in windows-125[0-8]
375   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
376   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
377   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
378   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
379   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
380 
381   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
382   // representation, and the second uses a 2-byte sequence. The second version
383   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
384   // given codepoint must be used.
385   static const char kEmbeddedNull[] = "embedded\0null";
386   EXPECT_TRUE(IsStringUTF8(
387       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
388   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
389 }
390 
TEST(StringUtilTest,IsStringASCII)391 TEST(StringUtilTest, IsStringASCII) {
392   static char char_ascii[] =
393       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
394   static char16 char16_ascii[] = {
395       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
396       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
397       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
398   static std::wstring wchar_ascii(
399       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
400 
401   // Test a variety of the fragment start positions and lengths in order to make
402   // sure that bit masking in IsStringASCII works correctly.
403   // Also, test that a non-ASCII character will be detected regardless of its
404   // position inside the string.
405   {
406     const size_t string_length = arraysize(char_ascii) - 1;
407     for (size_t offset = 0; offset < 8; ++offset) {
408       for (size_t len = 0, max_len = string_length - offset; len < max_len;
409            ++len) {
410         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
411         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
412           char_ascii[char_pos] |= '\x80';
413           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
414           char_ascii[char_pos] &= ~'\x80';
415         }
416       }
417     }
418   }
419 
420   {
421     const size_t string_length = arraysize(char16_ascii) - 1;
422     for (size_t offset = 0; offset < 4; ++offset) {
423       for (size_t len = 0, max_len = string_length - offset; len < max_len;
424            ++len) {
425         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
426         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
427           char16_ascii[char_pos] |= 0x80;
428           EXPECT_FALSE(
429               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
430           char16_ascii[char_pos] &= ~0x80;
431           // Also test when the upper half is non-zero.
432           char16_ascii[char_pos] |= 0x100;
433           EXPECT_FALSE(
434               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
435           char16_ascii[char_pos] &= ~0x100;
436         }
437       }
438     }
439   }
440 
441   {
442     const size_t string_length = wchar_ascii.length();
443     for (size_t len = 0; len < string_length; ++len) {
444       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
445       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
446         wchar_ascii[char_pos] |= 0x80;
447         EXPECT_FALSE(
448             IsStringASCII(wchar_ascii.substr(0, len)));
449         wchar_ascii[char_pos] &= ~0x80;
450         wchar_ascii[char_pos] |= 0x100;
451         EXPECT_FALSE(
452             IsStringASCII(wchar_ascii.substr(0, len)));
453         wchar_ascii[char_pos] &= ~0x100;
454 #if defined(WCHAR_T_IS_UTF32)
455         wchar_ascii[char_pos] |= 0x10000;
456         EXPECT_FALSE(
457             IsStringASCII(wchar_ascii.substr(0, len)));
458         wchar_ascii[char_pos] &= ~0x10000;
459 #endif  // WCHAR_T_IS_UTF32
460       }
461     }
462   }
463 }
464 
TEST(StringUtilTest,ConvertASCII)465 TEST(StringUtilTest, ConvertASCII) {
466   static const char* const char_cases[] = {
467     "Google Video",
468     "Hello, world\n",
469     "0123ABCDwxyz \a\b\t\r\n!+,.~"
470   };
471 
472   static const wchar_t* const wchar_cases[] = {
473     L"Google Video",
474     L"Hello, world\n",
475     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
476   };
477 
478   for (size_t i = 0; i < arraysize(char_cases); ++i) {
479     EXPECT_TRUE(IsStringASCII(char_cases[i]));
480     string16 utf16 = ASCIIToUTF16(char_cases[i]);
481     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
482 
483     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
484     EXPECT_EQ(char_cases[i], ascii);
485   }
486 
487   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
488 
489   // Convert empty strings.
490   string16 empty16;
491   std::string empty;
492   EXPECT_EQ(empty, UTF16ToASCII(empty16));
493   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
494 
495   // Convert strings with an embedded NUL character.
496   const char chars_with_nul[] = "test\0string";
497   const int length_with_nul = arraysize(chars_with_nul) - 1;
498   std::string string_with_nul(chars_with_nul, length_with_nul);
499   string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
500   EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
501             string16_with_nul.length());
502   std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
503   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
504             narrow_with_nul.length());
505   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
506 }
507 
TEST(StringUtilTest,ToLowerASCII)508 TEST(StringUtilTest, ToLowerASCII) {
509   EXPECT_EQ('c', ToLowerASCII('C'));
510   EXPECT_EQ('c', ToLowerASCII('c'));
511   EXPECT_EQ('2', ToLowerASCII('2'));
512 
513   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
514   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
515   EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
516 
517   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
518   EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
519 }
520 
TEST(StringUtilTest,ToUpperASCII)521 TEST(StringUtilTest, ToUpperASCII) {
522   EXPECT_EQ('C', ToUpperASCII('C'));
523   EXPECT_EQ('C', ToUpperASCII('c'));
524   EXPECT_EQ('2', ToUpperASCII('2'));
525 
526   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
527   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
528   EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
529 
530   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
531   EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
532 }
533 
TEST(StringUtilTest,LowerCaseEqualsASCII)534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
535   static const struct {
536     const char*    src_a;
537     const char*    dst;
538   } lowercase_cases[] = {
539     { "FoO", "foo" },
540     { "foo", "foo" },
541     { "FOO", "foo" },
542   };
543 
544   for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
545     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
546                                      lowercase_cases[i].dst));
547     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
548                                      lowercase_cases[i].dst));
549   }
550 }
551 
TEST(StringUtilTest,FormatBytesUnlocalized)552 TEST(StringUtilTest, FormatBytesUnlocalized) {
553   static const struct {
554     int64_t bytes;
555     const char* expected;
556   } cases[] = {
557     // Expected behavior: we show one post-decimal digit when we have
558     // under two pre-decimal digits, except in cases where it makes no
559     // sense (zero or bytes).
560     // Since we switch units once we cross the 1000 mark, this keeps
561     // the display of file sizes or bytes consistently around three
562     // digits.
563     {0, "0 B"},
564     {512, "512 B"},
565     {1024*1024, "1.0 MB"},
566     {1024*1024*1024, "1.0 GB"},
567     {10LL*1024*1024*1024, "10.0 GB"},
568     {99LL*1024*1024*1024, "99.0 GB"},
569     {105LL*1024*1024*1024, "105 GB"},
570     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
571     {~(1LL << 63), "8192 PB"},
572 
573     {99*1024 + 103, "99.1 kB"},
574     {1024*1024 + 103, "1.0 MB"},
575     {1024*1024 + 205 * 1024, "1.2 MB"},
576     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
577     {10LL*1024*1024*1024, "10.0 GB"},
578     {100LL*1024*1024*1024, "100 GB"},
579   };
580 
581   for (size_t i = 0; i < arraysize(cases); ++i) {
582     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
583               FormatBytesUnlocalized(cases[i].bytes));
584   }
585 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
587   static const struct {
588     const char* str;
589     string16::size_type start_offset;
590     const char* find_this;
591     const char* replace_with;
592     const char* expected;
593   } cases[] = {
594     {"aaa", 0, "a", "b", "bbb"},
595     {"abb", 0, "ab", "a", "ab"},
596     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
597     {"Not found", 0, "x", "0", "Not found"},
598     {"Not found again", 5, "x", "0", "Not found again"},
599     {" Making it much longer ", 0, " ", "Four score and seven years ago",
600      "Four score and seven years agoMakingFour score and seven years agoit"
601      "Four score and seven years agomuchFour score and seven years agolonger"
602      "Four score and seven years ago"},
603     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
604     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
605     {"abababab", 2, "ab", "c", "abccc"},
606   };
607 
608   for (size_t i = 0; i < arraysize(cases); i++) {
609     string16 str = ASCIIToUTF16(cases[i].str);
610     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
611                                  ASCIIToUTF16(cases[i].find_this),
612                                  ASCIIToUTF16(cases[i].replace_with));
613     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
614   }
615 }
616 
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
618   static const struct {
619     const char* str;
620     string16::size_type start_offset;
621     const char* find_this;
622     const char* replace_with;
623     const char* expected;
624   } cases[] = {
625     {"aaa", 0, "a", "b", "baa"},
626     {"abb", 0, "ab", "a", "ab"},
627     {"Removing some substrings inging", 0, "ing", "",
628       "Remov some substrings inging"},
629     {"Not found", 0, "x", "0", "Not found"},
630     {"Not found again", 5, "x", "0", "Not found again"},
631     {" Making it much longer ", 0, " ", "Four score and seven years ago",
632      "Four score and seven years agoMaking it much longer "},
633     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
634     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
635     {"abababab", 2, "ab", "c", "abcabab"},
636   };
637 
638   for (size_t i = 0; i < arraysize(cases); i++) {
639     string16 str = ASCIIToUTF16(cases[i].str);
640     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
641                                      ASCIIToUTF16(cases[i].find_this),
642                                      ASCIIToUTF16(cases[i].replace_with));
643     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
644   }
645 }
646 
TEST(StringUtilTest,HexDigitToInt)647 TEST(StringUtilTest, HexDigitToInt) {
648   EXPECT_EQ(0, HexDigitToInt('0'));
649   EXPECT_EQ(1, HexDigitToInt('1'));
650   EXPECT_EQ(2, HexDigitToInt('2'));
651   EXPECT_EQ(3, HexDigitToInt('3'));
652   EXPECT_EQ(4, HexDigitToInt('4'));
653   EXPECT_EQ(5, HexDigitToInt('5'));
654   EXPECT_EQ(6, HexDigitToInt('6'));
655   EXPECT_EQ(7, HexDigitToInt('7'));
656   EXPECT_EQ(8, HexDigitToInt('8'));
657   EXPECT_EQ(9, HexDigitToInt('9'));
658   EXPECT_EQ(10, HexDigitToInt('A'));
659   EXPECT_EQ(11, HexDigitToInt('B'));
660   EXPECT_EQ(12, HexDigitToInt('C'));
661   EXPECT_EQ(13, HexDigitToInt('D'));
662   EXPECT_EQ(14, HexDigitToInt('E'));
663   EXPECT_EQ(15, HexDigitToInt('F'));
664 
665   // Verify the lower case as well.
666   EXPECT_EQ(10, HexDigitToInt('a'));
667   EXPECT_EQ(11, HexDigitToInt('b'));
668   EXPECT_EQ(12, HexDigitToInt('c'));
669   EXPECT_EQ(13, HexDigitToInt('d'));
670   EXPECT_EQ(14, HexDigitToInt('e'));
671   EXPECT_EQ(15, HexDigitToInt('f'));
672 }
673 
TEST(StringUtilTest,JoinString)674 TEST(StringUtilTest, JoinString) {
675   std::string separator(", ");
676   std::vector<std::string> parts;
677   EXPECT_EQ(std::string(), JoinString(parts, separator));
678 
679   parts.push_back("a");
680   EXPECT_EQ("a", JoinString(parts, separator));
681 
682   parts.push_back("b");
683   parts.push_back("c");
684   EXPECT_EQ("a, b, c", JoinString(parts, separator));
685 
686   parts.push_back(std::string());
687   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
688   parts.push_back(" ");
689   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
690 }
691 
TEST(StringUtilTest,JoinString16)692 TEST(StringUtilTest, JoinString16) {
693   string16 separator = ASCIIToUTF16(", ");
694   std::vector<string16> parts;
695   EXPECT_EQ(string16(), JoinString(parts, separator));
696 
697   parts.push_back(ASCIIToUTF16("a"));
698   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
699 
700   parts.push_back(ASCIIToUTF16("b"));
701   parts.push_back(ASCIIToUTF16("c"));
702   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
703 
704   parts.push_back(ASCIIToUTF16(""));
705   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
706   parts.push_back(ASCIIToUTF16(" "));
707   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
708 }
709 
TEST(StringUtilTest,StartsWith)710 TEST(StringUtilTest, StartsWith) {
711   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
712                          base::CompareCase::SENSITIVE));
713   EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
714                           base::CompareCase::SENSITIVE));
715   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
716                          base::CompareCase::INSENSITIVE_ASCII));
717   EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
718                          base::CompareCase::INSENSITIVE_ASCII));
719   EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
720   EXPECT_FALSE(StartsWith("java", "javascript",
721                           base::CompareCase::INSENSITIVE_ASCII));
722   EXPECT_FALSE(StartsWith(std::string(), "javascript",
723                           base::CompareCase::INSENSITIVE_ASCII));
724   EXPECT_FALSE(StartsWith(std::string(), "javascript",
725                           base::CompareCase::SENSITIVE));
726   EXPECT_TRUE(StartsWith("java", std::string(),
727                          base::CompareCase::INSENSITIVE_ASCII));
728   EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
729 
730   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
731                          ASCIIToUTF16("javascript"),
732                          base::CompareCase::SENSITIVE));
733   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
734                           ASCIIToUTF16("javascript"),
735                           base::CompareCase::SENSITIVE));
736   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
737                          ASCIIToUTF16("javascript"),
738                          base::CompareCase::INSENSITIVE_ASCII));
739   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
740                          ASCIIToUTF16("javascript"),
741                          base::CompareCase::INSENSITIVE_ASCII));
742   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
743                           base::CompareCase::SENSITIVE));
744   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
745                           base::CompareCase::INSENSITIVE_ASCII));
746   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
747                           base::CompareCase::INSENSITIVE_ASCII));
748   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
749                           base::CompareCase::SENSITIVE));
750   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
751                          base::CompareCase::INSENSITIVE_ASCII));
752   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
753                          base::CompareCase::SENSITIVE));
754 }
755 
TEST(StringUtilTest,EndsWith)756 TEST(StringUtilTest, EndsWith) {
757   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
758                        base::CompareCase::SENSITIVE));
759   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
760                         base::CompareCase::SENSITIVE));
761   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
762                        base::CompareCase::INSENSITIVE_ASCII));
763   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
764                        base::CompareCase::INSENSITIVE_ASCII));
765   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
766                         base::CompareCase::SENSITIVE));
767   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
768                         base::CompareCase::INSENSITIVE_ASCII));
769   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
770                         base::CompareCase::SENSITIVE));
771   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
772                         base::CompareCase::INSENSITIVE_ASCII));
773   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
774                         base::CompareCase::INSENSITIVE_ASCII));
775   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
776                         base::CompareCase::SENSITIVE));
777   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
778                        base::CompareCase::INSENSITIVE_ASCII));
779   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
780                        base::CompareCase::SENSITIVE));
781   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
782                        base::CompareCase::INSENSITIVE_ASCII));
783   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
784                        base::CompareCase::SENSITIVE));
785   EXPECT_TRUE(
786       EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
787   EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
788 }
789 
TEST(StringUtilTest,GetStringFWithOffsets)790 TEST(StringUtilTest, GetStringFWithOffsets) {
791   std::vector<string16> subst;
792   subst.push_back(ASCIIToUTF16("1"));
793   subst.push_back(ASCIIToUTF16("2"));
794   std::vector<size_t> offsets;
795 
796   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
797                             subst,
798                             &offsets);
799   EXPECT_EQ(2U, offsets.size());
800   EXPECT_EQ(7U, offsets[0]);
801   EXPECT_EQ(25U, offsets[1]);
802   offsets.clear();
803 
804   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
805                             subst,
806                             &offsets);
807   EXPECT_EQ(2U, offsets.size());
808   EXPECT_EQ(25U, offsets[0]);
809   EXPECT_EQ(7U, offsets[1]);
810   offsets.clear();
811 }
812 
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)813 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
814   // Test whether replacestringplaceholders works as expected when there
815   // are fewer inputs than outputs.
816   std::vector<string16> subst;
817   subst.push_back(ASCIIToUTF16("9a"));
818   subst.push_back(ASCIIToUTF16("8b"));
819   subst.push_back(ASCIIToUTF16("7c"));
820 
821   string16 formatted =
822       ReplaceStringPlaceholders(
823           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
824 
825   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
826 }
827 
TEST(StringUtilTest,ReplaceStringPlaceholders)828 TEST(StringUtilTest, ReplaceStringPlaceholders) {
829   std::vector<string16> subst;
830   subst.push_back(ASCIIToUTF16("9a"));
831   subst.push_back(ASCIIToUTF16("8b"));
832   subst.push_back(ASCIIToUTF16("7c"));
833   subst.push_back(ASCIIToUTF16("6d"));
834   subst.push_back(ASCIIToUTF16("5e"));
835   subst.push_back(ASCIIToUTF16("4f"));
836   subst.push_back(ASCIIToUTF16("3g"));
837   subst.push_back(ASCIIToUTF16("2h"));
838   subst.push_back(ASCIIToUTF16("1i"));
839 
840   string16 formatted =
841       ReplaceStringPlaceholders(
842           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
843 
844   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
845 }
846 
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)847 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
848   std::vector<string16> subst;
849   subst.push_back(ASCIIToUTF16("1a"));
850   string16 formatted =
851       ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
852   EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
853 }
854 
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)855 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
856   std::vector<string16> subst;
857   subst.push_back(ASCIIToUTF16("1a"));
858   string16 formatted =
859       ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
860   EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
861 }
862 
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)863 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
864   std::vector<std::string> subst;
865   subst.push_back("9a");
866   subst.push_back("8b");
867   subst.push_back("7c");
868   subst.push_back("6d");
869   subst.push_back("5e");
870   subst.push_back("4f");
871   subst.push_back("3g");
872   subst.push_back("2h");
873   subst.push_back("1i");
874 
875   std::string formatted =
876       ReplaceStringPlaceholders(
877           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
878 
879   EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
880 }
881 
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)882 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
883   std::vector<std::string> subst;
884   subst.push_back("a");
885   subst.push_back("b");
886   subst.push_back("c");
887   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
888             "$1 $$2 $$$3");
889 }
890 
TEST(StringUtilTest,LcpyTest)891 TEST(StringUtilTest, LcpyTest) {
892   // Test the normal case where we fit in our buffer.
893   {
894     char dst[10];
895     wchar_t wdst[10];
896     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
897     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
898     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
899     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
900   }
901 
902   // Test dst_size == 0, nothing should be written to |dst| and we should
903   // have the equivalent of strlen(src).
904   {
905     char dst[2] = {1, 2};
906     wchar_t wdst[2] = {1, 2};
907     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
908     EXPECT_EQ(1, dst[0]);
909     EXPECT_EQ(2, dst[1]);
910     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
911     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
912     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
913   }
914 
915   // Test the case were we _just_ competely fit including the null.
916   {
917     char dst[8];
918     wchar_t wdst[8];
919     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
920     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
921     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
922     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
923   }
924 
925   // Test the case were we we are one smaller, so we can't fit the null.
926   {
927     char dst[7];
928     wchar_t wdst[7];
929     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
930     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
931     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
932     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
933   }
934 
935   // Test the case were we are just too small.
936   {
937     char dst[3];
938     wchar_t wdst[3];
939     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
940     EXPECT_EQ(0, memcmp(dst, "ab", 3));
941     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
942     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
943   }
944 }
945 
TEST(StringUtilTest,WprintfFormatPortabilityTest)946 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
947   static const struct {
948     const wchar_t* input;
949     bool portable;
950   } cases[] = {
951     { L"%ls", true },
952     { L"%s", false },
953     { L"%S", false },
954     { L"%lS", false },
955     { L"Hello, %s", false },
956     { L"%lc", true },
957     { L"%c", false },
958     { L"%C", false },
959     { L"%lC", false },
960     { L"%ls %s", false },
961     { L"%s %ls", false },
962     { L"%s %ls %s", false },
963     { L"%f", true },
964     { L"%f %F", false },
965     { L"%d %D", false },
966     { L"%o %O", false },
967     { L"%u %U", false },
968     { L"%f %d %o %u", true },
969     { L"%-8d (%02.1f%)", true },
970     { L"% 10s", false },
971     { L"% 10ls", true }
972   };
973   for (size_t i = 0; i < arraysize(cases); ++i)
974     EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
975 }
976 
TEST(StringUtilTest,RemoveChars)977 TEST(StringUtilTest, RemoveChars) {
978   const char kRemoveChars[] = "-/+*";
979   std::string input = "A-+bc/d!*";
980   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
981   EXPECT_EQ("Abcd!", input);
982 
983   // No characters match kRemoveChars.
984   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
985   EXPECT_EQ("Abcd!", input);
986 
987   // Empty string.
988   input.clear();
989   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
990   EXPECT_EQ(std::string(), input);
991 }
992 
TEST(StringUtilTest,ReplaceChars)993 TEST(StringUtilTest, ReplaceChars) {
994   struct TestData {
995     const char* input;
996     const char* replace_chars;
997     const char* replace_with;
998     const char* output;
999     bool result;
1000   } cases[] = {
1001     { "", "", "", "", false },
1002     { "test", "", "", "test", false },
1003     { "test", "", "!", "test", false },
1004     { "test", "z", "!", "test", false },
1005     { "test", "e", "!", "t!st", true },
1006     { "test", "e", "!?", "t!?st", true },
1007     { "test", "ez", "!", "t!st", true },
1008     { "test", "zed", "!?", "t!?st", true },
1009     { "test", "t", "!?", "!?es!?", true },
1010     { "test", "et", "!>", "!>!>s!>", true },
1011     { "test", "zest", "!", "!!!!", true },
1012     { "test", "szt", "!", "!e!!", true },
1013     { "test", "t", "test", "testestest", true },
1014   };
1015 
1016   for (size_t i = 0; i < arraysize(cases); ++i) {
1017     std::string output;
1018     bool result = ReplaceChars(cases[i].input,
1019                                cases[i].replace_chars,
1020                                cases[i].replace_with,
1021                                &output);
1022     EXPECT_EQ(cases[i].result, result);
1023     EXPECT_EQ(cases[i].output, output);
1024   }
1025 }
1026 
TEST(StringUtilTest,ContainsOnlyChars)1027 TEST(StringUtilTest, ContainsOnlyChars) {
1028   // Providing an empty list of characters should return false but for the empty
1029   // string.
1030   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1031   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1032 
1033   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1034   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1035   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1036   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1037   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1038 
1039   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1040   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1041   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1042   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1043   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1044   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1045 
1046   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1047   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1048   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1049   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1050   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1051   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1052                                   kWhitespaceUTF16));
1053 }
1054 
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1055 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1056   EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1057   EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1058 
1059   // Differing lengths.
1060   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1061   EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1062 
1063   // Differing values.
1064   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1065   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1066 }
1067 
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1068 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1069   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1070   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1071   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1072   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1073 }
1074 
TEST(StringUtilTest,IsUnicodeWhitespace)1075 TEST(StringUtilTest, IsUnicodeWhitespace) {
1076   // NOT unicode white space.
1077   EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1078   EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1079   EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1080   EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1081   EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1082   EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1083 
1084   // Actual unicode whitespace.
1085   EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1086   EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1087   EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1088   EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1089   EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1090   EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1091   EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1092   EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1093 }
1094 
1095 class WriteIntoTest : public testing::Test {
1096  protected:
WritesCorrectly(size_t num_chars)1097   static void WritesCorrectly(size_t num_chars) {
1098     std::string buffer;
1099     char kOriginal[] = "supercali";
1100     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1101     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1102     // string at the first \0.
1103     EXPECT_EQ(std::string(kOriginal,
1104                           std::min(num_chars, arraysize(kOriginal) - 1)),
1105               std::string(buffer.c_str()));
1106     EXPECT_EQ(num_chars, buffer.size());
1107   }
1108 };
1109 
TEST_F(WriteIntoTest,WriteInto)1110 TEST_F(WriteIntoTest, WriteInto) {
1111   // Validate that WriteInto reserves enough space and
1112   // sizes a string correctly.
1113   WritesCorrectly(1);
1114   WritesCorrectly(2);
1115   WritesCorrectly(5000);
1116 
1117   // Validate that WriteInto doesn't modify other strings
1118   // when using a Copy-on-Write implementation.
1119   const char kLive[] = "live";
1120   const char kDead[] = "dead";
1121   const std::string live = kLive;
1122   std::string dead = live;
1123   strncpy(WriteInto(&dead, 5), kDead, 4);
1124   EXPECT_EQ(kDead, dead);
1125   EXPECT_EQ(4u, dead.size());
1126   EXPECT_EQ(kLive, live);
1127   EXPECT_EQ(4u, live.size());
1128 }
1129 
1130 }  // namespace base
1131