1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_split.h"
6 
7 #include <stddef.h>
8 
9 #include "base/macros.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "testing/gmock/include/gmock/gmock.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14 
15 using ::testing::ElementsAre;
16 
17 namespace base {
18 
19 class SplitStringIntoKeyValuePairsTest : public testing::Test {
20  protected:
21   base::StringPairs kv_pairs;
22 };
23 
TEST_F(SplitStringIntoKeyValuePairsTest,EmptyString)24 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyString) {
25   EXPECT_TRUE(SplitStringIntoKeyValuePairs(std::string(),
26                                            ':',  // Key-value delimiter
27                                            ',',  // Key-value pair delimiter
28                                            &kv_pairs));
29   EXPECT_TRUE(kv_pairs.empty());
30 }
31 
TEST_F(SplitStringIntoKeyValuePairsTest,MissingKeyValueDelimiter)32 TEST_F(SplitStringIntoKeyValuePairsTest, MissingKeyValueDelimiter) {
33   EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1,key2:value2",
34                                             ':',  // Key-value delimiter
35                                             ',',  // Key-value pair delimiter
36                                             &kv_pairs));
37   ASSERT_EQ(2U, kv_pairs.size());
38   EXPECT_TRUE(kv_pairs[0].first.empty());
39   EXPECT_TRUE(kv_pairs[0].second.empty());
40   EXPECT_EQ("key2", kv_pairs[1].first);
41   EXPECT_EQ("value2", kv_pairs[1].second);
42 }
43 
TEST_F(SplitStringIntoKeyValuePairsTest,EmptyKeyWithKeyValueDelimiter)44 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyKeyWithKeyValueDelimiter) {
45   EXPECT_TRUE(SplitStringIntoKeyValuePairs(":value1,key2:value2",
46                                            ':',  // Key-value delimiter
47                                            ',',  // Key-value pair delimiter
48                                            &kv_pairs));
49   ASSERT_EQ(2U, kv_pairs.size());
50   EXPECT_TRUE(kv_pairs[0].first.empty());
51   EXPECT_EQ("value1", kv_pairs[0].second);
52   EXPECT_EQ("key2", kv_pairs[1].first);
53   EXPECT_EQ("value2", kv_pairs[1].second);
54 }
55 
TEST_F(SplitStringIntoKeyValuePairsTest,TrailingAndLeadingPairDelimiter)56 TEST_F(SplitStringIntoKeyValuePairsTest, TrailingAndLeadingPairDelimiter) {
57   EXPECT_TRUE(SplitStringIntoKeyValuePairs(",key1:value1,key2:value2,",
58                                            ':',   // Key-value delimiter
59                                            ',',   // Key-value pair delimiter
60                                            &kv_pairs));
61   ASSERT_EQ(2U, kv_pairs.size());
62   EXPECT_EQ("key1", kv_pairs[0].first);
63   EXPECT_EQ("value1", kv_pairs[0].second);
64   EXPECT_EQ("key2", kv_pairs[1].first);
65   EXPECT_EQ("value2", kv_pairs[1].second);
66 }
67 
TEST_F(SplitStringIntoKeyValuePairsTest,EmptyPair)68 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyPair) {
69   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1,,key3:value3",
70                                            ':',   // Key-value delimiter
71                                            ',',   // Key-value pair delimiter
72                                            &kv_pairs));
73   ASSERT_EQ(2U, kv_pairs.size());
74   EXPECT_EQ("key1", kv_pairs[0].first);
75   EXPECT_EQ("value1", kv_pairs[0].second);
76   EXPECT_EQ("key3", kv_pairs[1].first);
77   EXPECT_EQ("value3", kv_pairs[1].second);
78 }
79 
TEST_F(SplitStringIntoKeyValuePairsTest,EmptyValue)80 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyValue) {
81   EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1:,key2:value2",
82                                             ':',   // Key-value delimiter
83                                             ',',   // Key-value pair delimiter
84                                             &kv_pairs));
85   ASSERT_EQ(2U, kv_pairs.size());
86   EXPECT_EQ("key1", kv_pairs[0].first);
87   EXPECT_EQ("", kv_pairs[0].second);
88   EXPECT_EQ("key2", kv_pairs[1].first);
89   EXPECT_EQ("value2", kv_pairs[1].second);
90 }
91 
TEST_F(SplitStringIntoKeyValuePairsTest,UntrimmedWhitespace)92 TEST_F(SplitStringIntoKeyValuePairsTest, UntrimmedWhitespace) {
93   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1 : value1",
94                                            ':',  // Key-value delimiter
95                                            ',',  // Key-value pair delimiter
96                                            &kv_pairs));
97   ASSERT_EQ(1U, kv_pairs.size());
98   EXPECT_EQ("key1 ", kv_pairs[0].first);
99   EXPECT_EQ(" value1", kv_pairs[0].second);
100 }
101 
TEST_F(SplitStringIntoKeyValuePairsTest,TrimmedWhitespace)102 TEST_F(SplitStringIntoKeyValuePairsTest, TrimmedWhitespace) {
103   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1 , key2:value2",
104                                            ':',   // Key-value delimiter
105                                            ',',   // Key-value pair delimiter
106                                            &kv_pairs));
107   ASSERT_EQ(2U, kv_pairs.size());
108   EXPECT_EQ("key1", kv_pairs[0].first);
109   EXPECT_EQ("value1", kv_pairs[0].second);
110   EXPECT_EQ("key2", kv_pairs[1].first);
111   EXPECT_EQ("value2", kv_pairs[1].second);
112 }
113 
TEST_F(SplitStringIntoKeyValuePairsTest,MultipleKeyValueDelimiters)114 TEST_F(SplitStringIntoKeyValuePairsTest, MultipleKeyValueDelimiters) {
115   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:::value1,key2:value2",
116                                            ':',   // Key-value delimiter
117                                            ',',   // Key-value pair delimiter
118                                            &kv_pairs));
119   ASSERT_EQ(2U, kv_pairs.size());
120   EXPECT_EQ("key1", kv_pairs[0].first);
121   EXPECT_EQ("value1", kv_pairs[0].second);
122   EXPECT_EQ("key2", kv_pairs[1].first);
123   EXPECT_EQ("value2", kv_pairs[1].second);
124 }
125 
TEST_F(SplitStringIntoKeyValuePairsTest,OnlySplitAtGivenSeparator)126 TEST_F(SplitStringIntoKeyValuePairsTest, OnlySplitAtGivenSeparator) {
127   std::string a("a ?!@#$%^&*()_+:/{}\\\t\nb");
128   EXPECT_TRUE(SplitStringIntoKeyValuePairs(a + "X" + a + "Y" + a + "X" + a,
129                                            'X',  // Key-value delimiter
130                                            'Y',  // Key-value pair delimiter
131                                            &kv_pairs));
132   ASSERT_EQ(2U, kv_pairs.size());
133   EXPECT_EQ(a, kv_pairs[0].first);
134   EXPECT_EQ(a, kv_pairs[0].second);
135   EXPECT_EQ(a, kv_pairs[1].first);
136   EXPECT_EQ(a, kv_pairs[1].second);
137 }
138 
139 
TEST_F(SplitStringIntoKeyValuePairsTest,DelimiterInValue)140 TEST_F(SplitStringIntoKeyValuePairsTest, DelimiterInValue) {
141   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:va:ue1,key2:value2",
142                                            ':',   // Key-value delimiter
143                                            ',',   // Key-value pair delimiter
144                                            &kv_pairs));
145   ASSERT_EQ(2U, kv_pairs.size());
146   EXPECT_EQ("key1", kv_pairs[0].first);
147   EXPECT_EQ("va:ue1", kv_pairs[0].second);
148   EXPECT_EQ("key2", kv_pairs[1].first);
149   EXPECT_EQ("value2", kv_pairs[1].second);
150 }
151 
TEST(SplitStringUsingSubstrTest,EmptyString)152 TEST(SplitStringUsingSubstrTest, EmptyString) {
153   std::vector<std::string> results = SplitStringUsingSubstr(
154       std::string(), "DELIMITER", TRIM_WHITESPACE, SPLIT_WANT_ALL);
155   ASSERT_EQ(1u, results.size());
156   EXPECT_THAT(results, ElementsAre(""));
157 }
158 
TEST(StringUtilTest,SplitString_Basics)159 TEST(StringUtilTest, SplitString_Basics) {
160   std::vector<std::string> r;
161 
162   r = SplitString(std::string(), ",:;", KEEP_WHITESPACE, SPLIT_WANT_ALL);
163   EXPECT_TRUE(r.empty());
164 
165   // Empty separator list
166   r = SplitString("hello, world", "", KEEP_WHITESPACE, SPLIT_WANT_ALL);
167   ASSERT_EQ(1u, r.size());
168   EXPECT_EQ("hello, world", r[0]);
169 
170   // Should split on any of the separators.
171   r = SplitString("::,,;;", ",:;", KEEP_WHITESPACE, SPLIT_WANT_ALL);
172   ASSERT_EQ(7u, r.size());
173   for (auto str : r)
174     ASSERT_TRUE(str.empty());
175 
176   r = SplitString("red, green; blue:", ",:;", TRIM_WHITESPACE,
177                   SPLIT_WANT_NONEMPTY);
178   ASSERT_EQ(3u, r.size());
179   EXPECT_EQ("red", r[0]);
180   EXPECT_EQ("green", r[1]);
181   EXPECT_EQ("blue", r[2]);
182 
183   // Want to split a string along whitespace sequences.
184   r = SplitString("  red green   \tblue\n", " \t\n", TRIM_WHITESPACE,
185                   SPLIT_WANT_NONEMPTY);
186   ASSERT_EQ(3u, r.size());
187   EXPECT_EQ("red", r[0]);
188   EXPECT_EQ("green", r[1]);
189   EXPECT_EQ("blue", r[2]);
190 
191   // Weird case of splitting on spaces but not trimming.
192   r = SplitString(" red ", " ", TRIM_WHITESPACE, SPLIT_WANT_ALL);
193   ASSERT_EQ(3u, r.size());
194   EXPECT_EQ("", r[0]);  // Before the first space.
195   EXPECT_EQ("red", r[1]);
196   EXPECT_EQ("", r[2]);  // After the last space.
197 }
198 
TEST(StringUtilTest,SplitString_WhitespaceAndResultType)199 TEST(StringUtilTest, SplitString_WhitespaceAndResultType) {
200   std::vector<std::string> r;
201 
202   // Empty input handling.
203   r = SplitString(std::string(), ",", KEEP_WHITESPACE, SPLIT_WANT_ALL);
204   EXPECT_TRUE(r.empty());
205   r = SplitString(std::string(), ",", KEEP_WHITESPACE, SPLIT_WANT_NONEMPTY);
206   EXPECT_TRUE(r.empty());
207 
208   // Input string is space and we're trimming.
209   r = SplitString(" ", ",", TRIM_WHITESPACE, SPLIT_WANT_ALL);
210   ASSERT_EQ(1u, r.size());
211   EXPECT_EQ("", r[0]);
212   r = SplitString(" ", ",", TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
213   EXPECT_TRUE(r.empty());
214 
215   // Test all 4 combinations of flags on ", ,".
216   r = SplitString(", ,", ",", KEEP_WHITESPACE, SPLIT_WANT_ALL);
217   ASSERT_EQ(3u, r.size());
218   EXPECT_EQ("", r[0]);
219   EXPECT_EQ(" ", r[1]);
220   EXPECT_EQ("", r[2]);
221   r = SplitString(", ,", ",", KEEP_WHITESPACE, SPLIT_WANT_NONEMPTY);
222   ASSERT_EQ(1u, r.size());
223   ASSERT_EQ(" ", r[0]);
224   r = SplitString(", ,", ",", TRIM_WHITESPACE, SPLIT_WANT_ALL);
225   ASSERT_EQ(3u, r.size());
226   EXPECT_EQ("", r[0]);
227   EXPECT_EQ("", r[1]);
228   EXPECT_EQ("", r[2]);
229   r = SplitString(", ,", ",", TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
230   ASSERT_TRUE(r.empty());
231 }
232 
TEST(SplitStringUsingSubstrTest,StringWithNoDelimiter)233 TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
234   std::vector<std::string> results = SplitStringUsingSubstr(
235       "alongwordwithnodelimiter", "DELIMITER", TRIM_WHITESPACE,
236       SPLIT_WANT_ALL);
237   ASSERT_EQ(1u, results.size());
238   EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
239 }
240 
TEST(SplitStringUsingSubstrTest,LeadingDelimitersSkipped)241 TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
242   std::vector<std::string> results = SplitStringUsingSubstr(
243       "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
244       "DELIMITER", TRIM_WHITESPACE, SPLIT_WANT_ALL);
245   ASSERT_EQ(6u, results.size());
246   EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
247 }
248 
TEST(SplitStringUsingSubstrTest,ConsecutiveDelimitersSkipped)249 TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
250   std::vector<std::string> results = SplitStringUsingSubstr(
251       "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
252       "DELIMITER", TRIM_WHITESPACE, SPLIT_WANT_ALL);
253   ASSERT_EQ(7u, results.size());
254   EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
255 }
256 
TEST(SplitStringUsingSubstrTest,TrailingDelimitersSkipped)257 TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
258   std::vector<std::string> results = SplitStringUsingSubstr(
259       "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
260       "DELIMITER", TRIM_WHITESPACE, SPLIT_WANT_ALL);
261   ASSERT_EQ(7u, results.size());
262   EXPECT_THAT(
263       results, ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
264 }
265 
TEST(SplitStringPieceUsingSubstrTest,StringWithNoDelimiter)266 TEST(SplitStringPieceUsingSubstrTest, StringWithNoDelimiter) {
267   std::vector<base::StringPiece> results =
268       SplitStringPieceUsingSubstr("alongwordwithnodelimiter", "DELIMITER",
269                                   base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
270   ASSERT_EQ(1u, results.size());
271   EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
272 }
273 
TEST(SplitStringPieceUsingSubstrTest,LeadingDelimitersSkipped)274 TEST(SplitStringPieceUsingSubstrTest, LeadingDelimitersSkipped) {
275   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
276       "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree", "DELIMITER",
277       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
278   ASSERT_EQ(6u, results.size());
279   EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
280 }
281 
TEST(SplitStringPieceUsingSubstrTest,ConsecutiveDelimitersSkipped)282 TEST(SplitStringPieceUsingSubstrTest, ConsecutiveDelimitersSkipped) {
283   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
284       "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
285       "DELIMITER", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
286   ASSERT_EQ(7u, results.size());
287   EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
288 }
289 
TEST(SplitStringPieceUsingSubstrTest,TrailingDelimitersSkipped)290 TEST(SplitStringPieceUsingSubstrTest, TrailingDelimitersSkipped) {
291   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
292       "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
293       "DELIMITER", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
294   ASSERT_EQ(7u, results.size());
295   EXPECT_THAT(results,
296               ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
297 }
298 
TEST(SplitStringPieceUsingSubstrTest,KeepWhitespace)299 TEST(SplitStringPieceUsingSubstrTest, KeepWhitespace) {
300   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
301       "un DELIMITERdeux\tDELIMITERtrois\nDELIMITERquatre", "DELIMITER",
302       base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
303   ASSERT_EQ(4u, results.size());
304   EXPECT_THAT(results, ElementsAre("un ", "deux\t", "trois\n", "quatre"));
305 }
306 
TEST(SplitStringPieceUsingSubstrTest,TrimWhitespace)307 TEST(SplitStringPieceUsingSubstrTest, TrimWhitespace) {
308   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
309       "un DELIMITERdeux\tDELIMITERtrois\nDELIMITERquatre", "DELIMITER",
310       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
311   ASSERT_EQ(4u, results.size());
312   EXPECT_THAT(results, ElementsAre("un", "deux", "trois", "quatre"));
313 }
314 
TEST(SplitStringPieceUsingSubstrTest,SplitWantAll)315 TEST(SplitStringPieceUsingSubstrTest, SplitWantAll) {
316   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
317       "unDELIMITERdeuxDELIMITERtroisDELIMITERDELIMITER", "DELIMITER",
318       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
319   ASSERT_EQ(5u, results.size());
320   EXPECT_THAT(results, ElementsAre("un", "deux", "trois", "", ""));
321 }
322 
TEST(SplitStringPieceUsingSubstrTest,SplitWantNonEmpty)323 TEST(SplitStringPieceUsingSubstrTest, SplitWantNonEmpty) {
324   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
325       "unDELIMITERdeuxDELIMITERtroisDELIMITERDELIMITER", "DELIMITER",
326       base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
327   ASSERT_EQ(3u, results.size());
328   EXPECT_THAT(results, ElementsAre("un", "deux", "trois"));
329 }
330 
TEST(StringSplitTest,StringSplitKeepWhitespace)331 TEST(StringSplitTest, StringSplitKeepWhitespace) {
332   std::vector<std::string> r;
333 
334   r = SplitString("   ", "*", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
335   ASSERT_EQ(1U, r.size());
336   EXPECT_EQ(r[0], "   ");
337 
338   r = SplitString("\t  \ta\t ", "\t", base::KEEP_WHITESPACE,
339                   base::SPLIT_WANT_ALL);
340   ASSERT_EQ(4U, r.size());
341   EXPECT_EQ(r[0], "");
342   EXPECT_EQ(r[1], "  ");
343   EXPECT_EQ(r[2], "a");
344   EXPECT_EQ(r[3], " ");
345 
346   r = SplitString("\ta\t\nb\tcc", "\n", base::KEEP_WHITESPACE,
347                   base::SPLIT_WANT_ALL);
348   ASSERT_EQ(2U, r.size());
349   EXPECT_EQ(r[0], "\ta\t");
350   EXPECT_EQ(r[1], "b\tcc");
351 }
352 
TEST(StringSplitTest,SplitStringAlongWhitespace)353 TEST(StringSplitTest, SplitStringAlongWhitespace) {
354   struct TestData {
355     const char* input;
356     const size_t expected_result_count;
357     const char* output1;
358     const char* output2;
359   } data[] = {
360     { "a",       1, "a",  ""   },
361     { " ",       0, "",   ""   },
362     { " a",      1, "a",  ""   },
363     { " ab ",    1, "ab", ""   },
364     { " ab c",   2, "ab", "c"  },
365     { " ab c ",  2, "ab", "c"  },
366     { " ab cd",  2, "ab", "cd" },
367     { " ab cd ", 2, "ab", "cd" },
368     { " \ta\t",  1, "a",  ""   },
369     { " b\ta\t", 2, "b",  "a"  },
370     { " b\tat",  2, "b",  "at" },
371     { "b\tat",   2, "b",  "at" },
372     { "b\t at",  2, "b",  "at" },
373   };
374   for (size_t i = 0; i < arraysize(data); ++i) {
375     std::vector<std::string> results = base::SplitString(
376         data[i].input, kWhitespaceASCII, base::KEEP_WHITESPACE,
377         base::SPLIT_WANT_NONEMPTY);
378     ASSERT_EQ(data[i].expected_result_count, results.size());
379     if (data[i].expected_result_count > 0)
380       ASSERT_EQ(data[i].output1, results[0]);
381     if (data[i].expected_result_count > 1)
382       ASSERT_EQ(data[i].output2, results[1]);
383   }
384 }
385 
386 }  // namespace base
387