1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.common;
18 
19 import android.test.AndroidTestCase;
20 import android.test.suitebuilder.annotation.SmallTest;
21 
22 import java.util.Locale;
23 
24 @SmallTest
25 public class StringUtilsTests extends AndroidTestCase {
26     private static final Locale US = Locale.US;
27     private static final Locale GERMAN = Locale.GERMAN;
28     private static final Locale TURKEY = new Locale("tr", "TR");
29     private static final Locale GREECE = new Locale("el", "GR");
30 
assert_toTitleCaseOfKeyLabel(final Locale locale, final String lowerCase, final String expected)31     private static void assert_toTitleCaseOfKeyLabel(final Locale locale,
32             final String lowerCase, final String expected) {
33         assertEquals(lowerCase + " in " + locale, expected,
34                 StringUtils.toTitleCaseOfKeyLabel(lowerCase, locale));
35     }
36 
test_toTitleCaseOfKeyLabel()37     public void test_toTitleCaseOfKeyLabel() {
38         assert_toTitleCaseOfKeyLabel(US, null, null);
39         assert_toTitleCaseOfKeyLabel(US, "", "");
40         assert_toTitleCaseOfKeyLabel(US, "aeiou", "AEIOU");
41         // U+00E0: "à" LATIN SMALL LETTER A WITH GRAVE
42         // U+00E8: "è" LATIN SMALL LETTER E WITH GRAVE
43         // U+00EE: "î" LATIN SMALL LETTER I WITH CIRCUMFLEX
44         // U+00F6: "ö" LATIN SMALL LETTER O WITH DIAERESIS
45         // U+016B: "ū" LATIN SMALL LETTER U WITH MACRON
46         // U+00F1: "ñ" LATIN SMALL LETTER N WITH TILDE
47         // U+00E7: "ç" LATIN SMALL LETTER C WITH CEDILLA
48         // U+00C0: "À" LATIN CAPITAL LETTER A WITH GRAVE
49         // U+00C8: "È" LATIN CAPITAL LETTER E WITH GRAVE
50         // U+00CE: "Î" LATIN CAPITAL LETTER I WITH CIRCUMFLEX
51         // U+00D6: "Ö" LATIN CAPITAL LETTER O WITH DIAERESIS
52         // U+016A: "Ū" LATIN CAPITAL LETTER U WITH MACRON
53         // U+00D1: "Ñ" LATIN CAPITAL LETTER N WITH TILDE
54         // U+00C7: "Ç" LATIN CAPITAL LETTER C WITH CEDILLA
55         assert_toTitleCaseOfKeyLabel(US,
56                 "\u00E0\u00E8\u00EE\u00F6\u016B\u00F1\u00E7",
57                 "\u00C0\u00C8\u00CE\u00D6\u016A\u00D1\u00C7");
58         // U+00DF: "ß" LATIN SMALL LETTER SHARP S
59         // U+015B: "ś" LATIN SMALL LETTER S WITH ACUTE
60         // U+0161: "š" LATIN SMALL LETTER S WITH CARON
61         // U+015A: "Ś" LATIN CAPITAL LETTER S WITH ACUTE
62         // U+0160: "Š" LATIN CAPITAL LETTER S WITH CARONZ
63         assert_toTitleCaseOfKeyLabel(GERMAN,
64                 "\u00DF\u015B\u0161",
65                 "SS\u015A\u0160");
66         // U+0259: "ə" LATIN SMALL LETTER SCHWA
67         // U+0069: "i" LATIN SMALL LETTER I
68         // U+0131: "ı" LATIN SMALL LETTER DOTLESS I
69         // U+018F: "Ə" LATIN SMALL LETTER SCHWA
70         // U+0130: "İ" LATIN SMALL LETTER I WITH DOT ABOVE
71         // U+0049: "I" LATIN SMALL LETTER I
72         assert_toTitleCaseOfKeyLabel(TURKEY,
73                 "\u0259\u0069\u0131",
74                 "\u018F\u0130\u0049");
75         // U+03C3: "σ" GREEK SMALL LETTER SIGMA
76         // U+03C2: "ς" GREEK SMALL LETTER FINAL SIGMA
77         // U+03A3: "Σ" GREEK CAPITAL LETTER SIGMA
78         assert_toTitleCaseOfKeyLabel(GREECE,
79                 "\u03C3\u03C2",
80                 "\u03A3\u03A3");
81         // U+03AC: "ά" GREEK SMALL LETTER ALPHA WITH TONOS
82         // U+03AD: "έ" GREEK SMALL LETTER EPSILON WITH TONOS
83         // U+03AE: "ή" GREEK SMALL LETTER ETA WITH TONOS
84         // U+03AF: "ί" GREEK SMALL LETTER IOTA WITH TONOS
85         // U+03CC: "ό" GREEK SMALL LETTER OMICRON WITH TONOS
86         // U+03CD: "ύ" GREEK SMALL LETTER UPSILON WITH TONOS
87         // U+03CE: "ώ" GREEK SMALL LETTER OMEGA WITH TONOS
88         // U+0386: "Ά" GREEK CAPITAL LETTER ALPHA WITH TONOS
89         // U+0388: "Έ" GREEK CAPITAL LETTER EPSILON WITH TONOS
90         // U+0389: "Ή" GREEK CAPITAL LETTER ETA WITH TONOS
91         // U+038A: "Ί" GREEK CAPITAL LETTER IOTA WITH TONOS
92         // U+038C: "Ό" GREEK CAPITAL LETTER OMICRON WITH TONOS
93         // U+038E: "Ύ" GREEK CAPITAL LETTER UPSILON WITH TONOS
94         // U+038F: "Ώ" GREEK CAPITAL LETTER OMEGA WITH TONOS
95         assert_toTitleCaseOfKeyLabel(GREECE,
96                 "\u03AC\u03AD\u03AE\u03AF\u03CC\u03CD\u03CE",
97                 "\u0386\u0388\u0389\u038A\u038C\u038E\u038F");
98         // U+03CA: "ϊ" GREEK SMALL LETTER IOTA WITH DIALYTIKA
99         // U+03CB: "ϋ" GREEK SMALL LETTER UPSILON WITH DIALYTIKA
100         // U+0390: "ΐ" GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
101         // U+03B0: "ΰ" GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
102         // U+03AA: "Ϊ" GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
103         // U+03AB: "Ϋ" GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
104         // U+0399: "Ι" GREEK CAPITAL LETTER IOTA
105         // U+03A5: "Υ" GREEK CAPITAL LETTER UPSILON
106         // U+0308: COMBINING DIAERESIS
107         // U+0301: COMBINING GRAVE ACCENT
108         assert_toTitleCaseOfKeyLabel(GREECE,
109                 "\u03CA\u03CB\u0390\u03B0",
110                 "\u03AA\u03AB\u0399\u0308\u0301\u03A5\u0308\u0301");
111     }
112 
assert_toTitleCaseOfKeyCode(final Locale locale, final int lowerCase, final int expected)113     private static void assert_toTitleCaseOfKeyCode(final Locale locale, final int lowerCase,
114             final int expected) {
115         assertEquals(lowerCase + " in " + locale, expected,
116                 StringUtils.toTitleCaseOfKeyCode(lowerCase, locale));
117     }
118 
test_toTitleCaseOfKeyCode()119     public void test_toTitleCaseOfKeyCode() {
120         assert_toTitleCaseOfKeyCode(US, Constants.CODE_ENTER, Constants.CODE_ENTER);
121         assert_toTitleCaseOfKeyCode(US, Constants.CODE_SPACE, Constants.CODE_SPACE);
122         assert_toTitleCaseOfKeyCode(US, Constants.CODE_COMMA, Constants.CODE_COMMA);
123         // U+0069: "i" LATIN SMALL LETTER I
124         // U+0131: "ı" LATIN SMALL LETTER DOTLESS I
125         // U+0130: "İ" LATIN SMALL LETTER I WITH DOT ABOVE
126         // U+0049: "I" LATIN SMALL LETTER I
127         assert_toTitleCaseOfKeyCode(US, 0x0069, 0x0049); // i -> I
128         assert_toTitleCaseOfKeyCode(US, 0x0131, 0x0049); // ı -> I
129         assert_toTitleCaseOfKeyCode(TURKEY, 0x0069, 0x0130); // i -> İ
130         assert_toTitleCaseOfKeyCode(TURKEY, 0x0131, 0x0049); // ı -> I
131         // U+00DF: "ß" LATIN SMALL LETTER SHARP S
132         // The title case of "ß" is "SS".
133         assert_toTitleCaseOfKeyCode(US, 0x00DF, Constants.CODE_UNSPECIFIED);
134         // U+03AC: "ά" GREEK SMALL LETTER ALPHA WITH TONOS
135         // U+0386: "Ά" GREEK CAPITAL LETTER ALPHA WITH TONOS
136         assert_toTitleCaseOfKeyCode(GREECE, 0x03AC, 0x0386);
137         // U+03CA: "ϊ" GREEK SMALL LETTER IOTA WITH DIALYTIKA
138         // U+03AA: "Ϊ" GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
139         assert_toTitleCaseOfKeyCode(GREECE, 0x03CA, 0x03AA);
140         // U+03B0: "ΰ" GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
141         // The title case of "ΰ" is "\u03A5\u0308\u0301".
142         assert_toTitleCaseOfKeyCode(GREECE, 0x03B0, Constants.CODE_UNSPECIFIED);
143     }
144 
assert_capitalizeFirstCodePoint(final Locale locale, final String text, final String expected)145     private static void assert_capitalizeFirstCodePoint(final Locale locale, final String text,
146             final String expected) {
147         assertEquals(text + " in " + locale, expected,
148                 StringUtils.capitalizeFirstCodePoint(text, locale));
149     }
150 
test_capitalizeFirstCodePoint()151     public void test_capitalizeFirstCodePoint() {
152         assert_capitalizeFirstCodePoint(US, "", "");
153         assert_capitalizeFirstCodePoint(US, "a", "A");
154         assert_capitalizeFirstCodePoint(US, "à", "À");
155         assert_capitalizeFirstCodePoint(US, "ß", "SS");
156         assert_capitalizeFirstCodePoint(US, "text", "Text");
157         assert_capitalizeFirstCodePoint(US, "iGoogle", "IGoogle");
158         assert_capitalizeFirstCodePoint(TURKEY, "iyi", "İyi");
159         assert_capitalizeFirstCodePoint(TURKEY, "ısırdı", "Isırdı");
160         assert_capitalizeFirstCodePoint(GREECE, "ά", "Ά");
161         assert_capitalizeFirstCodePoint(GREECE, "άνεση", "Άνεση");
162     }
163 
assert_capitalizeFirstAndDowncaseRest(final Locale locale, final String text, final String expected)164     private static void assert_capitalizeFirstAndDowncaseRest(final Locale locale,
165             final String text, final String expected) {
166         assertEquals(text + " in " + locale, expected,
167                 StringUtils.capitalizeFirstAndDowncaseRest(text, locale));
168     }
169 
test_capitalizeFirstAndDowncaseRest()170     public void test_capitalizeFirstAndDowncaseRest() {
171         assert_capitalizeFirstAndDowncaseRest(US, "", "");
172         assert_capitalizeFirstAndDowncaseRest(US, "a", "A");
173         assert_capitalizeFirstAndDowncaseRest(US, "à", "À");
174         assert_capitalizeFirstAndDowncaseRest(US, "ß", "SS");
175         assert_capitalizeFirstAndDowncaseRest(US, "text", "Text");
176         assert_capitalizeFirstAndDowncaseRest(US, "iGoogle", "Igoogle");
177         assert_capitalizeFirstAndDowncaseRest(US, "invite", "Invite");
178         assert_capitalizeFirstAndDowncaseRest(US, "INVITE", "Invite");
179         assert_capitalizeFirstAndDowncaseRest(TURKEY, "iyi", "İyi");
180         assert_capitalizeFirstAndDowncaseRest(TURKEY, "İYİ", "İyi");
181         assert_capitalizeFirstAndDowncaseRest(TURKEY, "ısırdı", "Isırdı");
182         assert_capitalizeFirstAndDowncaseRest(TURKEY, "ISIRDI", "Isırdı");
183         assert_capitalizeFirstAndDowncaseRest(GREECE, "ά", "Ά");
184         assert_capitalizeFirstAndDowncaseRest(GREECE, "άνεση", "Άνεση");
185         assert_capitalizeFirstAndDowncaseRest(GREECE, "ΆΝΕΣΗ", "Άνεση");
186     }
187 
testContainsInArray()188     public void testContainsInArray() {
189         assertFalse("empty array", StringUtils.containsInArray("key", new String[0]));
190         assertFalse("not in 1 element", StringUtils.containsInArray("key", new String[] {
191                 "key1"
192         }));
193         assertFalse("not in 2 elements", StringUtils.containsInArray("key", new String[] {
194                 "key1", "key2"
195         }));
196 
197         assertTrue("in 1 element", StringUtils.containsInArray("key", new String[] {
198                 "key"
199         }));
200         assertTrue("in 2 elements", StringUtils.containsInArray("key", new String[] {
201                 "key1", "key"
202         }));
203     }
204 
testContainsInCommaSplittableText()205     public void testContainsInCommaSplittableText() {
206         assertFalse("null", StringUtils.containsInCommaSplittableText("key", null));
207         assertFalse("empty", StringUtils.containsInCommaSplittableText("key", ""));
208         assertFalse("not in 1 element",
209                 StringUtils.containsInCommaSplittableText("key", "key1"));
210         assertFalse("not in 2 elements",
211                 StringUtils.containsInCommaSplittableText("key", "key1,key2"));
212 
213         assertTrue("in 1 element", StringUtils.containsInCommaSplittableText("key", "key"));
214         assertTrue("in 2 elements", StringUtils.containsInCommaSplittableText("key", "key1,key"));
215     }
216 
testRemoveFromCommaSplittableTextIfExists()217     public void testRemoveFromCommaSplittableTextIfExists() {
218         assertEquals("null", "", StringUtils.removeFromCommaSplittableTextIfExists("key", null));
219         assertEquals("empty", "", StringUtils.removeFromCommaSplittableTextIfExists("key", ""));
220 
221         assertEquals("not in 1 element", "key1",
222                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key1"));
223         assertEquals("not in 2 elements", "key1,key2",
224                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key1,key2"));
225 
226         assertEquals("in 1 element", "",
227                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key"));
228         assertEquals("in 2 elements at position 1", "key2",
229                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key,key2"));
230         assertEquals("in 2 elements at position 2", "key1",
231                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key1,key"));
232         assertEquals("in 3 elements at position 2", "key1,key3",
233                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key1,key,key3"));
234 
235         assertEquals("in 3 elements at position 1,2,3", "",
236                 StringUtils.removeFromCommaSplittableTextIfExists("key", "key,key,key"));
237         assertEquals("in 5 elements at position 2,4", "key1,key3,key5",
238                 StringUtils.removeFromCommaSplittableTextIfExists(
239                         "key", "key1,key,key3,key,key5"));
240     }
241 
242 
testCapitalizeFirstCodePoint()243     public void testCapitalizeFirstCodePoint() {
244         assertEquals("SSaa",
245                 StringUtils.capitalizeFirstCodePoint("ßaa", Locale.GERMAN));
246         assertEquals("Aßa",
247                 StringUtils.capitalizeFirstCodePoint("aßa", Locale.GERMAN));
248         assertEquals("Iab",
249                 StringUtils.capitalizeFirstCodePoint("iab", Locale.ENGLISH));
250         assertEquals("CAmElCaSe",
251                 StringUtils.capitalizeFirstCodePoint("cAmElCaSe", Locale.ENGLISH));
252         assertEquals("İab",
253                 StringUtils.capitalizeFirstCodePoint("iab", new Locale("tr")));
254         assertEquals("AİB",
255                 StringUtils.capitalizeFirstCodePoint("AİB", new Locale("tr")));
256         assertEquals("A",
257                 StringUtils.capitalizeFirstCodePoint("a", Locale.ENGLISH));
258         assertEquals("A",
259                 StringUtils.capitalizeFirstCodePoint("A", Locale.ENGLISH));
260     }
261 
testCapitalizeFirstAndDowncaseRest()262     public void testCapitalizeFirstAndDowncaseRest() {
263         assertEquals("SSaa",
264                 StringUtils.capitalizeFirstAndDowncaseRest("ßaa", Locale.GERMAN));
265         assertEquals("Aßa",
266                 StringUtils.capitalizeFirstAndDowncaseRest("aßa", Locale.GERMAN));
267         assertEquals("Iab",
268                 StringUtils.capitalizeFirstAndDowncaseRest("iab", Locale.ENGLISH));
269         assertEquals("Camelcase",
270                 StringUtils.capitalizeFirstAndDowncaseRest("cAmElCaSe", Locale.ENGLISH));
271         assertEquals("İab",
272                 StringUtils.capitalizeFirstAndDowncaseRest("iab", new Locale("tr")));
273         assertEquals("Aib",
274                 StringUtils.capitalizeFirstAndDowncaseRest("AİB", new Locale("tr")));
275         assertEquals("A",
276                 StringUtils.capitalizeFirstAndDowncaseRest("a", Locale.ENGLISH));
277         assertEquals("A",
278                 StringUtils.capitalizeFirstAndDowncaseRest("A", Locale.ENGLISH));
279     }
280 
testGetCapitalizationType()281     public void testGetCapitalizationType() {
282         assertEquals(StringUtils.CAPITALIZE_NONE,
283                 StringUtils.getCapitalizationType("capitalize"));
284         assertEquals(StringUtils.CAPITALIZE_NONE,
285                 StringUtils.getCapitalizationType("cApITalize"));
286         assertEquals(StringUtils.CAPITALIZE_NONE,
287                 StringUtils.getCapitalizationType("capitalizE"));
288         assertEquals(StringUtils.CAPITALIZE_NONE,
289                 StringUtils.getCapitalizationType("__c a piu$@tali56ze"));
290         assertEquals(StringUtils.CAPITALIZE_FIRST,
291                 StringUtils.getCapitalizationType("A__c a piu$@tali56ze"));
292         assertEquals(StringUtils.CAPITALIZE_FIRST,
293                 StringUtils.getCapitalizationType("Capitalize"));
294         assertEquals(StringUtils.CAPITALIZE_FIRST,
295                 StringUtils.getCapitalizationType("     Capitalize"));
296         assertEquals(StringUtils.CAPITALIZE_ALL,
297                 StringUtils.getCapitalizationType("CAPITALIZE"));
298         assertEquals(StringUtils.CAPITALIZE_ALL,
299                 StringUtils.getCapitalizationType("  PI26LIE"));
300         assertEquals(StringUtils.CAPITALIZE_NONE,
301                 StringUtils.getCapitalizationType(""));
302     }
303 
testIsIdenticalAfterUpcaseIsIdenticalAfterDowncase()304     public void testIsIdenticalAfterUpcaseIsIdenticalAfterDowncase() {
305         assertFalse(StringUtils.isIdenticalAfterUpcase("capitalize"));
306         assertTrue(StringUtils.isIdenticalAfterDowncase("capitalize"));
307         assertFalse(StringUtils.isIdenticalAfterUpcase("cApITalize"));
308         assertFalse(StringUtils.isIdenticalAfterDowncase("cApITalize"));
309         assertFalse(StringUtils.isIdenticalAfterUpcase("capitalizE"));
310         assertFalse(StringUtils.isIdenticalAfterDowncase("capitalizE"));
311         assertFalse(StringUtils.isIdenticalAfterUpcase("__c a piu$@tali56ze"));
312         assertTrue(StringUtils.isIdenticalAfterDowncase("__c a piu$@tali56ze"));
313         assertFalse(StringUtils.isIdenticalAfterUpcase("A__c a piu$@tali56ze"));
314         assertFalse(StringUtils.isIdenticalAfterDowncase("A__c a piu$@tali56ze"));
315         assertFalse(StringUtils.isIdenticalAfterUpcase("Capitalize"));
316         assertFalse(StringUtils.isIdenticalAfterDowncase("Capitalize"));
317         assertFalse(StringUtils.isIdenticalAfterUpcase("     Capitalize"));
318         assertFalse(StringUtils.isIdenticalAfterDowncase("     Capitalize"));
319         assertTrue(StringUtils.isIdenticalAfterUpcase("CAPITALIZE"));
320         assertFalse(StringUtils.isIdenticalAfterDowncase("CAPITALIZE"));
321         assertTrue(StringUtils.isIdenticalAfterUpcase("  PI26LIE"));
322         assertFalse(StringUtils.isIdenticalAfterDowncase("  PI26LIE"));
323         assertTrue(StringUtils.isIdenticalAfterUpcase(""));
324         assertTrue(StringUtils.isIdenticalAfterDowncase(""));
325     }
326 
checkCapitalize(final String src, final String dst, final int[] sortedSeparators, final Locale locale)327     private static void checkCapitalize(final String src, final String dst,
328             final int[] sortedSeparators, final Locale locale) {
329         assertEquals(dst, StringUtils.capitalizeEachWord(src, sortedSeparators, locale));
330         assert(src.equals(dst)
331                 == StringUtils.isIdenticalAfterCapitalizeEachWord(src, sortedSeparators));
332     }
333 
334     private static final int[] SPACE = { Constants.CODE_SPACE };
335     private static final int[] SPACE_PERIOD = StringUtils.toSortedCodePointArray(" .");
336     private static final int[] SENTENCE_SEPARATORS =
337             StringUtils.toSortedCodePointArray(" \n.!?*()&");
338     private static final int[] WORD_SEPARATORS = StringUtils.toSortedCodePointArray(" \n.!?*,();&");
339 
testCapitalizeEachWord()340     public void testCapitalizeEachWord() {
341         checkCapitalize("", "", SPACE, Locale.ENGLISH);
342         checkCapitalize("test", "Test", SPACE, Locale.ENGLISH);
343         checkCapitalize("    test", "    Test", SPACE, Locale.ENGLISH);
344         checkCapitalize("Test", "Test", SPACE, Locale.ENGLISH);
345         checkCapitalize("    Test", "    Test", SPACE, Locale.ENGLISH);
346         checkCapitalize(".Test", ".test", SPACE, Locale.ENGLISH);
347         checkCapitalize(".Test", ".Test", SPACE_PERIOD, Locale.ENGLISH);
348         checkCapitalize("test and retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
349         checkCapitalize("Test and retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
350         checkCapitalize("Test And Retest", "Test And Retest", SPACE_PERIOD, Locale.ENGLISH);
351         checkCapitalize("Test And.Retest  ", "Test And.Retest  ", SPACE_PERIOD, Locale.ENGLISH);
352         checkCapitalize("Test And.retest  ", "Test And.Retest  ", SPACE_PERIOD, Locale.ENGLISH);
353         checkCapitalize("Test And.retest  ", "Test And.retest  ", SPACE, Locale.ENGLISH);
354         checkCapitalize("Test And.Retest  ", "Test And.retest  ", SPACE, Locale.ENGLISH);
355         checkCapitalize("test and ietest", "Test And İetest", SPACE_PERIOD, new Locale("tr"));
356         checkCapitalize("test and ietest", "Test And Ietest", SPACE_PERIOD, Locale.ENGLISH);
357         checkCapitalize("Test&Retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
358         checkCapitalize("Test&retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
359         checkCapitalize("test&Retest", "Test&Retest", SENTENCE_SEPARATORS, Locale.ENGLISH);
360         checkCapitalize("rest\nrecreation! And in the end...",
361                 "Rest\nRecreation! And In The End...", WORD_SEPARATORS, Locale.ENGLISH);
362         checkCapitalize("lorem ipsum dolor sit amet", "Lorem Ipsum Dolor Sit Amet",
363                 WORD_SEPARATORS, Locale.ENGLISH);
364         checkCapitalize("Lorem!Ipsum (Dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet",
365                 WORD_SEPARATORS, Locale.ENGLISH);
366         checkCapitalize("Lorem!Ipsum (dolor) Sit * Amet", "Lorem!Ipsum (Dolor) Sit * Amet",
367                 WORD_SEPARATORS, Locale.ENGLISH);
368     }
369 
testLooksLikeURL()370     public void testLooksLikeURL() {
371         assertTrue(StringUtils.lastPartLooksLikeURL("http://www.google."));
372         assertFalse(StringUtils.lastPartLooksLikeURL("word wo"));
373         assertTrue(StringUtils.lastPartLooksLikeURL("/etc/foo"));
374         assertFalse(StringUtils.lastPartLooksLikeURL("left/right"));
375         assertTrue(StringUtils.lastPartLooksLikeURL("www.goo"));
376         assertTrue(StringUtils.lastPartLooksLikeURL("www."));
377         assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A"));
378         assertFalse(StringUtils.lastPartLooksLikeURL("U.S.A."));
379         assertTrue(StringUtils.lastPartLooksLikeURL("rtsp://foo."));
380         assertTrue(StringUtils.lastPartLooksLikeURL("://"));
381         assertFalse(StringUtils.lastPartLooksLikeURL("abc/"));
382         assertTrue(StringUtils.lastPartLooksLikeURL("abc.def/ghi"));
383         assertFalse(StringUtils.lastPartLooksLikeURL("abc.def"));
384         // TODO: ideally this would not look like a URL, but to keep down the complexity of the
385         // code for now True is acceptable.
386         assertTrue(StringUtils.lastPartLooksLikeURL("abc./def"));
387         // TODO: ideally this would not look like a URL, but to keep down the complexity of the
388         // code for now True is acceptable.
389         assertTrue(StringUtils.lastPartLooksLikeURL(".abc/def"));
390     }
391 
testHexStringUtils()392     public void testHexStringUtils() {
393         final byte[] bytes = new byte[] { (byte)0x01, (byte)0x11, (byte)0x22, (byte)0x33,
394                 (byte)0x55, (byte)0x88, (byte)0xEE };
395         final String bytesStr = StringUtils.byteArrayToHexString(bytes);
396         final byte[] bytes2 = StringUtils.hexStringToByteArray(bytesStr);
397         for (int i = 0; i < bytes.length; ++i) {
398             assertTrue(bytes[i] == bytes2[i]);
399         }
400         final String bytesStr2 = StringUtils.byteArrayToHexString(bytes2);
401         assertTrue(bytesStr.equals(bytesStr2));
402     }
403 
testToCodePointArray()404     public void testToCodePointArray() {
405         final String STR_WITH_SUPPLEMENTARY_CHAR = "abcde\uD861\uDED7fgh\u0000\u2002\u2003\u3000xx";
406         final int[] EXPECTED_RESULT = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7, 'f', 'g', 'h',
407                 0, 0x2002, 0x2003, 0x3000, 'x', 'x'};
408         final int[] codePointArray = StringUtils.toCodePointArray(STR_WITH_SUPPLEMENTARY_CHAR, 0,
409                 STR_WITH_SUPPLEMENTARY_CHAR.length());
410         assertEquals("toCodePointArray, size matches", codePointArray.length,
411                 EXPECTED_RESULT.length);
412         for (int i = 0; i < EXPECTED_RESULT.length; ++i) {
413             assertEquals("toCodePointArray position " + i, codePointArray[i], EXPECTED_RESULT[i]);
414         }
415     }
416 
testCopyCodePointsAndReturnCodePointCount()417     public void testCopyCodePointsAndReturnCodePointCount() {
418         final String STR_WITH_SUPPLEMENTARY_CHAR = "AbcDE\uD861\uDED7fGh\u0000\u2002\u3000あx";
419         final int[] EXPECTED_RESULT = new int[] { 'A', 'b', 'c', 'D', 'E', 0x286D7,
420                 'f', 'G', 'h', 0, 0x2002, 0x3000, 'あ', 'x'};
421         final int[] EXPECTED_RESULT_DOWNCASE = new int[] { 'a', 'b', 'c', 'd', 'e', 0x286D7,
422                 'f', 'g', 'h', 0, 0x2002, 0x3000, 'あ', 'x'};
423 
424         int[] codePointArray = new int[50];
425         int codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
426                 STR_WITH_SUPPLEMENTARY_CHAR, 0,
427                 STR_WITH_SUPPLEMENTARY_CHAR.length(), false /* downCase */);
428         assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount,
429                 EXPECTED_RESULT.length);
430         for (int i = 0; i < codePointCount; ++i) {
431             assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
432                     EXPECTED_RESULT[i]);
433         }
434 
435         codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
436                 STR_WITH_SUPPLEMENTARY_CHAR, 0,
437                 STR_WITH_SUPPLEMENTARY_CHAR.length(), true /* downCase */);
438         assertEquals("copyCodePointsAndReturnCodePointCount downcase, size matches", codePointCount,
439                 EXPECTED_RESULT_DOWNCASE.length);
440         for (int i = 0; i < codePointCount; ++i) {
441             assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
442                     EXPECTED_RESULT_DOWNCASE[i]);
443         }
444 
445         final int JAVA_CHAR_COUNT = 8;
446         final int CODEPOINT_COUNT = 7;
447         codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
448                 STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */);
449         assertEquals("copyCodePointsAndReturnCodePointCount, size matches", codePointCount,
450                 CODEPOINT_COUNT);
451         for (int i = 0; i < codePointCount; ++i) {
452             assertEquals("copyCodePointsAndReturnCodePointCount position " + i, codePointArray[i],
453                     EXPECTED_RESULT[i]);
454         }
455 
456         boolean exceptionHappened = false;
457         codePointArray = new int[5];
458         try {
459             codePointCount = StringUtils.copyCodePointsAndReturnCodePointCount(codePointArray,
460                     STR_WITH_SUPPLEMENTARY_CHAR, 0, JAVA_CHAR_COUNT, false /* downCase */);
461         } catch (ArrayIndexOutOfBoundsException e) {
462             exceptionHappened = true;
463         }
464         assertTrue("copyCodePointsAndReturnCodePointCount throws when array is too small",
465                 exceptionHappened);
466     }
467 
testGetTrailingSingleQuotesCount()468     public void testGetTrailingSingleQuotesCount() {
469         assertEquals(0, StringUtils.getTrailingSingleQuotesCount(""));
470         assertEquals(1, StringUtils.getTrailingSingleQuotesCount("'"));
471         assertEquals(5, StringUtils.getTrailingSingleQuotesCount("'''''"));
472         assertEquals(0, StringUtils.getTrailingSingleQuotesCount("a"));
473         assertEquals(0, StringUtils.getTrailingSingleQuotesCount("'this"));
474         assertEquals(1, StringUtils.getTrailingSingleQuotesCount("'word'"));
475         assertEquals(0, StringUtils.getTrailingSingleQuotesCount("I'm"));
476     }
477 }
478