• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <string>
6 
7 #include "base/i18n/rtl.h"
8 #include "base/i18n/string_search.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "third_party/icu/source/i18n/unicode/usearch.h"
13 
14 namespace base {
15 namespace i18n {
16 
17 // Note on setting default locale for testing: The current default locale on
18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
19 // string search is case-sensitive, when normally it should be
20 // case-insensitive. In other locales (including en_US which English speakers
21 // in the U.S. use), this search would be case-insensitive as expected.
22 
TEST(StringSearchTest,ASCII)23 TEST(StringSearchTest, ASCII) {
24   std::string default_locale(uloc_getDefault());
25   bool locale_is_posix = (default_locale == "en_US_POSIX");
26   if (locale_is_posix)
27     SetICUDefaultLocale("en_US");
28 
29   size_t index = 0;
30   size_t length = 0;
31 
32   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
33       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
34   EXPECT_EQ(0U, index);
35   EXPECT_EQ(5U, length);
36 
37   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
38       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
39       &index, &length));
40 
41   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
42       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
43   EXPECT_EQ(4U, index);
44   EXPECT_EQ(6U, length);
45 
46   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
47       ASCIIToUTF16("searching within empty string"), string16(),
48       &index, &length));
49 
50   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
51       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
52   EXPECT_EQ(0U, index);
53   EXPECT_EQ(0U, length);
54 
55   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
56       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
57       &index, &length));
58   EXPECT_EQ(0U, index);
59   EXPECT_EQ(18U, length);
60 
61   if (locale_is_posix)
62     SetICUDefaultLocale(default_locale.data());
63 }
64 
TEST(StringSearchTest,UnicodeLocaleIndependent)65 TEST(StringSearchTest, UnicodeLocaleIndependent) {
66   // Base characters
67   const string16 e_base = WideToUTF16(L"e");
68   const string16 E_base = WideToUTF16(L"E");
69   const string16 a_base = WideToUTF16(L"a");
70 
71   // Composed characters
72   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
73   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
74   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
75   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
76   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
77 
78   // Decomposed characters
79   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
80   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
81   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
82   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
83   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
84 
85   std::string default_locale(uloc_getDefault());
86   bool locale_is_posix = (default_locale == "en_US_POSIX");
87   if (locale_is_posix)
88     SetICUDefaultLocale("en_US");
89 
90   size_t index = 0;
91   size_t length = 0;
92 
93   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
94       e_base, e_with_acute_accent, &index, &length));
95   EXPECT_EQ(0U, index);
96   EXPECT_EQ(e_with_acute_accent.size(), length);
97 
98   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
99       e_with_acute_accent, e_base, &index, &length));
100   EXPECT_EQ(0U, index);
101   EXPECT_EQ(e_base.size(), length);
102 
103   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
104       e_base, e_with_acute_combining_mark, &index, &length));
105   EXPECT_EQ(0U, index);
106   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
107 
108   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
109       e_with_acute_combining_mark, e_base, &index, &length));
110   EXPECT_EQ(0U, index);
111   EXPECT_EQ(e_base.size(), length);
112 
113   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
114       e_with_acute_combining_mark, e_with_acute_accent,
115       &index, &length));
116   EXPECT_EQ(0U, index);
117   EXPECT_EQ(e_with_acute_accent.size(), length);
118 
119   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
120       e_with_acute_accent, e_with_acute_combining_mark,
121       &index, &length));
122   EXPECT_EQ(0U, index);
123   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
124 
125   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
126       e_with_acute_combining_mark, e_with_grave_combining_mark,
127       &index, &length));
128   EXPECT_EQ(0U, index);
129   EXPECT_EQ(e_with_grave_combining_mark.size(), length);
130 
131   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
132       e_with_grave_combining_mark, e_with_acute_combining_mark,
133       &index, &length));
134   EXPECT_EQ(0U, index);
135   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
136 
137   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
138       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
139   EXPECT_EQ(0U, index);
140   EXPECT_EQ(e_with_grave_accent.size(), length);
141 
142   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
143       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
144   EXPECT_EQ(0U, index);
145   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
146 
147   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
148       E_with_acute_accent, e_with_acute_accent, &index, &length));
149   EXPECT_EQ(0U, index);
150   EXPECT_EQ(e_with_acute_accent.size(), length);
151 
152   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
153       E_with_grave_accent, e_with_acute_accent, &index, &length));
154   EXPECT_EQ(0U, index);
155   EXPECT_EQ(e_with_acute_accent.size(), length);
156 
157   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
158       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
159   EXPECT_EQ(0U, index);
160   EXPECT_EQ(e_with_grave_accent.size(), length);
161 
162   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
163       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
164   EXPECT_EQ(0U, index);
165   EXPECT_EQ(e_with_acute_accent.size(), length);
166 
167   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
168       E_base, e_with_grave_accent, &index, &length));
169   EXPECT_EQ(0U, index);
170   EXPECT_EQ(e_with_grave_accent.size(), length);
171 
172   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
173       a_with_acute_accent, e_with_acute_accent, &index, &length));
174 
175   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
176       a_with_acute_combining_mark, e_with_acute_combining_mark,
177       &index, &length));
178 
179   if (locale_is_posix)
180     SetICUDefaultLocale(default_locale.data());
181 }
182 
TEST(StringSearchTest,UnicodeLocaleDependent)183 TEST(StringSearchTest, UnicodeLocaleDependent) {
184   // Base characters
185   const string16 a_base = WideToUTF16(L"a");
186 
187   // Composed characters
188   const string16 a_with_ring = WideToUTF16(L"\u00e5");
189 
190   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
191       a_base, a_with_ring, NULL, NULL));
192 
193   const char* default_locale = uloc_getDefault();
194   SetICUDefaultLocale("da");
195 
196   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
197       a_base, a_with_ring, NULL, NULL));
198 
199   SetICUDefaultLocale(default_locale);
200 }
201 
TEST(StringSearchTest,FixedPatternMultipleSearch)202 TEST(StringSearchTest, FixedPatternMultipleSearch) {
203   std::string default_locale(uloc_getDefault());
204   bool locale_is_posix = (default_locale == "en_US_POSIX");
205   if (locale_is_posix)
206     SetICUDefaultLocale("en_US");
207 
208   size_t index = 0;
209   size_t length = 0;
210 
211   // Search "hello" over multiple texts.
212   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
213   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
214   EXPECT_EQ(2U, index);
215   EXPECT_EQ(5U, length);
216   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
217   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
218   EXPECT_EQ(0U, index);
219   EXPECT_EQ(5U, length);
220 
221   if (locale_is_posix)
222     SetICUDefaultLocale(default_locale.data());
223 }
224 
225 }  // namespace i18n
226 }  // namespace base
227