1 /*
2  * Copyright (C) 2008 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.common.base;
18 
19 import static com.google.common.base.CharMatcher.BREAKING_WHITESPACE;
20 import static com.google.common.base.CharMatcher.WHITESPACE;
21 import static com.google.common.base.CharMatcher.anyOf;
22 import static com.google.common.base.CharMatcher.forPredicate;
23 import static com.google.common.base.CharMatcher.inRange;
24 import static com.google.common.base.CharMatcher.is;
25 import static com.google.common.base.CharMatcher.isNot;
26 import static com.google.common.base.CharMatcher.noneOf;
27 
28 import com.google.common.annotations.GwtCompatible;
29 import com.google.common.annotations.GwtIncompatible;
30 import com.google.common.collect.Sets;
31 import com.google.common.testing.NullPointerTester;
32 
33 import junit.framework.AssertionFailedError;
34 import junit.framework.TestCase;
35 
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.HashSet;
39 import java.util.Random;
40 import java.util.Set;
41 
42 /**
43  * Unit test for {@link CharMatcher}.
44  *
45  * @author Kevin Bourrillion
46  */
47 @GwtCompatible(emulated = true)
48 public class CharMatcherTest extends TestCase {
49 
50   @GwtIncompatible("NullPointerTester")
testStaticNullPointers()51   public void testStaticNullPointers() throws Exception {
52     NullPointerTester tester = new NullPointerTester();
53     tester.testAllPublicStaticMethods(CharMatcher.class);
54     tester.testAllPublicInstanceMethods(CharMatcher.ANY);
55     tester.testAllPublicInstanceMethods(CharMatcher.anyOf("abc"));
56   }
57 
58   private static final CharMatcher WHATEVER = new CharMatcher() {
59     @Override public boolean matches(char c) {
60       throw new AssertionFailedError(
61           "You weren't supposed to actually invoke me!");
62     }
63   };
64 
testAnyAndNone_logicalOps()65   public void testAnyAndNone_logicalOps() throws Exception {
66     // These are testing behavior that's never promised by the API, but since
67     // we're lucky enough that these do pass, it saves us from having to write
68     // more excruciating tests! Hooray!
69 
70     assertSame(CharMatcher.ANY, CharMatcher.NONE.negate());
71     assertSame(CharMatcher.NONE, CharMatcher.ANY.negate());
72 
73     assertSame(WHATEVER, CharMatcher.ANY.and(WHATEVER));
74     assertSame(CharMatcher.ANY, CharMatcher.ANY.or(WHATEVER));
75 
76     assertSame(CharMatcher.NONE, CharMatcher.NONE.and(WHATEVER));
77     assertSame(WHATEVER, CharMatcher.NONE.or(WHATEVER));
78   }
79 
80   // The rest of the behavior of ANY and DEFAULT will be covered in the tests for
81   // the text processing methods below.
82 
testWhitespaceBreakingWhitespaceSubset()83   public void testWhitespaceBreakingWhitespaceSubset() throws Exception {
84     for (int c = 0; c <= Character.MAX_VALUE; c++) {
85       if (BREAKING_WHITESPACE.apply((char) c)) {
86         assertTrue(Integer.toHexString(c), WHITESPACE.apply((char) c));
87       }
88     }
89   }
90 
91   // The next tests require ICU4J and have, at least for now, been sliced out
92   // of the open-source view of the tests.
93 
94   @GwtIncompatible("Character.isISOControl")
testJavaIsoControl()95   public void testJavaIsoControl() {
96     for (int c = 0; c <= Character.MAX_VALUE; c++) {
97       assertEquals("" + c, Character.isISOControl(c),
98           CharMatcher.JAVA_ISO_CONTROL.matches((char) c));
99     }
100   }
101 
102   // Omitting tests for the rest of the JAVA_* constants as these are defined
103   // as extremely straightforward pass-throughs to the JDK methods.
104 
105   // We're testing the is(), isNot(), anyOf(), noneOf() and inRange() methods
106   // below by testing their text-processing methods.
107 
108   // The organization of this test class is unusual, as it's not done by
109   // method, but by overall "scenario". Also, the variety of actual tests we
110   // do borders on absurd overkill. Better safe than sorry, though?
111 
112   @GwtIncompatible("java.util.BitSet")
testSetBits()113   public void testSetBits() {
114     doTestSetBits(CharMatcher.ANY);
115     doTestSetBits(CharMatcher.NONE);
116     doTestSetBits(is('a'));
117     doTestSetBits(isNot('a'));
118     doTestSetBits(anyOf(""));
119     doTestSetBits(anyOf("x"));
120     doTestSetBits(anyOf("xy"));
121     doTestSetBits(anyOf("CharMatcher"));
122     doTestSetBits(noneOf("CharMatcher"));
123     doTestSetBits(inRange('n', 'q'));
124     doTestSetBits(forPredicate(Predicates.equalTo('c')));
125     doTestSetBits(CharMatcher.ASCII);
126     doTestSetBits(CharMatcher.DIGIT);
127     doTestSetBits(CharMatcher.INVISIBLE);
128     doTestSetBits(CharMatcher.WHITESPACE);
129     doTestSetBits(inRange('A', 'Z').and(inRange('F', 'K').negate()));
130   }
131 
132   @GwtIncompatible("java.util.BitSet")
doTestSetBits(CharMatcher matcher)133   private void doTestSetBits(CharMatcher matcher) {
134     BitSet bitset = new BitSet();
135     matcher.setBits(bitset);
136     for (int i = Character.MIN_VALUE; i <= Character.MAX_VALUE; i++) {
137       assertEquals(matcher.matches((char) i), bitset.get(i));
138     }
139   }
140 
testEmpty()141   public void testEmpty() throws Exception {
142     doTestEmpty(CharMatcher.ANY);
143     doTestEmpty(CharMatcher.NONE);
144     doTestEmpty(is('a'));
145     doTestEmpty(isNot('a'));
146     doTestEmpty(anyOf(""));
147     doTestEmpty(anyOf("x"));
148     doTestEmpty(anyOf("xy"));
149     doTestEmpty(anyOf("CharMatcher"));
150     doTestEmpty(noneOf("CharMatcher"));
151     doTestEmpty(inRange('n', 'q'));
152     doTestEmpty(forPredicate(Predicates.equalTo('c')));
153   }
154 
155   @GwtIncompatible("NullPointerTester")
testNull()156   public void testNull() throws Exception {
157     doTestNull(CharMatcher.ANY);
158     doTestNull(CharMatcher.NONE);
159     doTestNull(is('a'));
160     doTestNull(isNot('a'));
161     doTestNull(anyOf(""));
162     doTestNull(anyOf("x"));
163     doTestNull(anyOf("xy"));
164     doTestNull(anyOf("CharMatcher"));
165     doTestNull(noneOf("CharMatcher"));
166     doTestNull(inRange('n', 'q'));
167     doTestNull(forPredicate(Predicates.equalTo('c')));
168   }
169 
doTestEmpty(CharMatcher matcher)170   private void doTestEmpty(CharMatcher matcher) throws Exception {
171     reallyTestEmpty(matcher);
172     reallyTestEmpty(matcher.negate());
173     reallyTestEmpty(matcher.precomputed());
174   }
175 
reallyTestEmpty(CharMatcher matcher)176   private void reallyTestEmpty(CharMatcher matcher) throws Exception {
177     assertEquals(-1, matcher.indexIn(""));
178     assertEquals(-1, matcher.indexIn("", 0));
179     try {
180       matcher.indexIn("", 1);
181       fail();
182     } catch (IndexOutOfBoundsException expected) {
183     }
184     try {
185       matcher.indexIn("", -1);
186       fail();
187     } catch (IndexOutOfBoundsException expected) {
188     }
189     assertEquals(-1, matcher.lastIndexIn(""));
190     assertFalse(matcher.matchesAnyOf(""));
191     assertTrue(matcher.matchesAllOf(""));
192     assertTrue(matcher.matchesNoneOf(""));
193     assertEquals("", matcher.removeFrom(""));
194     assertEquals("", matcher.replaceFrom("", 'z'));
195     assertEquals("", matcher.replaceFrom("", "ZZ"));
196     assertEquals("", matcher.trimFrom(""));
197     assertEquals(0, matcher.countIn(""));
198   }
199 
200   @GwtIncompatible("NullPointerTester")
doTestNull(CharMatcher matcher)201   private void doTestNull(CharMatcher matcher) throws Exception {
202     NullPointerTester tester = new NullPointerTester();
203     tester.testAllPublicInstanceMethods(matcher);
204   }
205 
testNoMatches()206   public void testNoMatches() {
207     doTestNoMatches(CharMatcher.NONE, "blah");
208     doTestNoMatches(is('a'), "bcde");
209     doTestNoMatches(isNot('a'), "aaaa");
210     doTestNoMatches(anyOf(""), "abcd");
211     doTestNoMatches(anyOf("x"), "abcd");
212     doTestNoMatches(anyOf("xy"), "abcd");
213     doTestNoMatches(anyOf("CharMatcher"), "zxqy");
214     doTestNoMatches(noneOf("CharMatcher"), "ChMa");
215     doTestNoMatches(inRange('p', 'x'), "mom");
216     doTestNoMatches(forPredicate(Predicates.equalTo('c')), "abe");
217     doTestNoMatches(inRange('A', 'Z').and(inRange('F', 'K').negate()), "F1a");
218     doTestNoMatches(CharMatcher.DIGIT, "\tAz()");
219     doTestNoMatches(CharMatcher.JAVA_DIGIT, "\tAz()");
220     doTestNoMatches(CharMatcher.DIGIT.and(CharMatcher.ASCII), "\tAz()");
221     doTestNoMatches(CharMatcher.SINGLE_WIDTH, "\u05bf\u3000");
222   }
223 
doTestNoMatches(CharMatcher matcher, String s)224   private void doTestNoMatches(CharMatcher matcher, String s) {
225     reallyTestNoMatches(matcher, s);
226     reallyTestAllMatches(matcher.negate(), s);
227     reallyTestNoMatches(matcher.precomputed(), s);
228     reallyTestAllMatches(matcher.negate().precomputed(), s);
229     reallyTestAllMatches(matcher.precomputed().negate(), s);
230     reallyTestNoMatches(forPredicate(matcher), s);
231 
232     reallyTestNoMatches(matcher, new StringBuilder(s));
233   }
234 
testAllMatches()235   public void testAllMatches() {
236     doTestAllMatches(CharMatcher.ANY, "blah");
237     doTestAllMatches(isNot('a'), "bcde");
238     doTestAllMatches(is('a'), "aaaa");
239     doTestAllMatches(noneOf("CharMatcher"), "zxqy");
240     doTestAllMatches(anyOf("x"), "xxxx");
241     doTestAllMatches(anyOf("xy"), "xyyx");
242     doTestAllMatches(anyOf("CharMatcher"), "ChMa");
243     doTestAllMatches(inRange('m', 'p'), "mom");
244     doTestAllMatches(forPredicate(Predicates.equalTo('c')), "ccc");
245     doTestAllMatches(CharMatcher.DIGIT, "0123456789\u0ED0\u1B59");
246     doTestAllMatches(CharMatcher.JAVA_DIGIT, "0123456789");
247     doTestAllMatches(CharMatcher.DIGIT.and(CharMatcher.ASCII), "0123456789");
248     doTestAllMatches(CharMatcher.SINGLE_WIDTH, "\t0123ABCdef~\u00A0\u2111");
249   }
250 
doTestAllMatches(CharMatcher matcher, String s)251   private void doTestAllMatches(CharMatcher matcher, String s) {
252     reallyTestAllMatches(matcher, s);
253     reallyTestNoMatches(matcher.negate(), s);
254     reallyTestAllMatches(matcher.precomputed(), s);
255     reallyTestNoMatches(matcher.negate().precomputed(), s);
256     reallyTestNoMatches(matcher.precomputed().negate(), s);
257     reallyTestAllMatches(forPredicate(matcher), s);
258 
259     reallyTestAllMatches(matcher, new StringBuilder(s));
260   }
261 
reallyTestNoMatches(CharMatcher matcher, CharSequence s)262   private void reallyTestNoMatches(CharMatcher matcher, CharSequence s) {
263     assertFalse(matcher.matches(s.charAt(0)));
264     assertEquals(-1, matcher.indexIn(s));
265     assertEquals(-1, matcher.indexIn(s, 0));
266     assertEquals(-1, matcher.indexIn(s, 1));
267     assertEquals(-1, matcher.indexIn(s, s.length()));
268     try {
269       matcher.indexIn(s, s.length() + 1);
270       fail();
271     } catch (IndexOutOfBoundsException expected) {
272     }
273     try {
274       matcher.indexIn(s, -1);
275       fail();
276     } catch (IndexOutOfBoundsException expected) {
277     }
278     assertEquals(-1, matcher.lastIndexIn(s));
279     assertFalse(matcher.matchesAnyOf(s));
280     assertFalse(matcher.matchesAllOf(s));
281     assertTrue(matcher.matchesNoneOf(s));
282 
283     assertEquals(s.toString(), matcher.removeFrom(s));
284     assertEquals(s.toString(), matcher.replaceFrom(s, 'z'));
285     assertEquals(s.toString(), matcher.replaceFrom(s, "ZZ"));
286     assertEquals(s.toString(), matcher.trimFrom(s));
287     assertEquals(0, matcher.countIn(s));
288   }
289 
reallyTestAllMatches(CharMatcher matcher, CharSequence s)290   private void reallyTestAllMatches(CharMatcher matcher, CharSequence s) {
291     assertTrue(matcher.matches(s.charAt(0)));
292     assertEquals(0, matcher.indexIn(s));
293     assertEquals(0, matcher.indexIn(s, 0));
294     assertEquals(1, matcher.indexIn(s, 1));
295     assertEquals(-1, matcher.indexIn(s, s.length()));
296     assertEquals(s.length() - 1, matcher.lastIndexIn(s));
297     assertTrue(matcher.matchesAnyOf(s));
298     assertTrue(matcher.matchesAllOf(s));
299     assertFalse(matcher.matchesNoneOf(s));
300     assertEquals("", matcher.removeFrom(s));
301     assertEquals(Strings.repeat("z", s.length()),
302         matcher.replaceFrom(s, 'z'));
303     assertEquals(Strings.repeat("ZZ", s.length()),
304         matcher.replaceFrom(s, "ZZ"));
305     assertEquals("", matcher.trimFrom(s));
306     assertEquals(s.length(), matcher.countIn(s));
307   }
308 
testGeneral()309   public void testGeneral() {
310     doTestGeneral(is('a'), 'a', 'b');
311     doTestGeneral(isNot('a'), 'b', 'a');
312     doTestGeneral(anyOf("x"), 'x', 'z');
313     doTestGeneral(anyOf("xy"), 'y', 'z');
314     doTestGeneral(anyOf("CharMatcher"), 'C', 'z');
315     doTestGeneral(noneOf("CharMatcher"), 'z', 'C');
316     doTestGeneral(inRange('p', 'x'), 'q', 'z');
317   }
318 
doTestGeneral(CharMatcher matcher, char match, char noMatch)319   private void doTestGeneral(CharMatcher matcher, char match, char noMatch) {
320     doTestOneCharMatch(matcher, "" + match);
321     doTestOneCharNoMatch(matcher, "" + noMatch);
322     doTestMatchThenNoMatch(matcher, "" + match + noMatch);
323     doTestNoMatchThenMatch(matcher, "" + noMatch + match);
324   }
325 
doTestOneCharMatch(CharMatcher matcher, String s)326   private void doTestOneCharMatch(CharMatcher matcher, String s) {
327     reallyTestOneCharMatch(matcher, s);
328     reallyTestOneCharNoMatch(matcher.negate(), s);
329     reallyTestOneCharMatch(matcher.precomputed(), s);
330     reallyTestOneCharNoMatch(matcher.negate().precomputed(), s);
331     reallyTestOneCharNoMatch(matcher.precomputed().negate(), s);
332   }
333 
doTestOneCharNoMatch(CharMatcher matcher, String s)334   private void doTestOneCharNoMatch(CharMatcher matcher, String s) {
335     reallyTestOneCharNoMatch(matcher, s);
336     reallyTestOneCharMatch(matcher.negate(), s);
337     reallyTestOneCharNoMatch(matcher.precomputed(), s);
338     reallyTestOneCharMatch(matcher.negate().precomputed(), s);
339     reallyTestOneCharMatch(matcher.precomputed().negate(), s);
340   }
341 
doTestMatchThenNoMatch(CharMatcher matcher, String s)342   private void doTestMatchThenNoMatch(CharMatcher matcher, String s) {
343     reallyTestMatchThenNoMatch(matcher, s);
344     reallyTestNoMatchThenMatch(matcher.negate(), s);
345     reallyTestMatchThenNoMatch(matcher.precomputed(), s);
346     reallyTestNoMatchThenMatch(matcher.negate().precomputed(), s);
347     reallyTestNoMatchThenMatch(matcher.precomputed().negate(), s);
348   }
349 
doTestNoMatchThenMatch(CharMatcher matcher, String s)350   private void doTestNoMatchThenMatch(CharMatcher matcher, String s) {
351     reallyTestNoMatchThenMatch(matcher, s);
352     reallyTestMatchThenNoMatch(matcher.negate(), s);
353     reallyTestNoMatchThenMatch(matcher.precomputed(), s);
354     reallyTestMatchThenNoMatch(matcher.negate().precomputed(), s);
355     reallyTestMatchThenNoMatch(matcher.precomputed().negate(), s);
356   }
357 
reallyTestOneCharMatch(CharMatcher matcher, String s)358   private void reallyTestOneCharMatch(CharMatcher matcher, String s) {
359     assertTrue(matcher.matches(s.charAt(0)));
360     assertTrue(matcher.apply(s.charAt(0)));
361     assertEquals(0, matcher.indexIn(s));
362     assertEquals(0, matcher.indexIn(s, 0));
363     assertEquals(-1, matcher.indexIn(s, 1));
364     assertEquals(0, matcher.lastIndexIn(s));
365     assertTrue(matcher.matchesAnyOf(s));
366     assertTrue(matcher.matchesAllOf(s));
367     assertFalse(matcher.matchesNoneOf(s));
368     assertEquals("", matcher.removeFrom(s));
369     assertEquals("z", matcher.replaceFrom(s, 'z'));
370     assertEquals("ZZ", matcher.replaceFrom(s, "ZZ"));
371     assertEquals("", matcher.trimFrom(s));
372     assertEquals(1, matcher.countIn(s));
373   }
374 
reallyTestOneCharNoMatch(CharMatcher matcher, String s)375   private void reallyTestOneCharNoMatch(CharMatcher matcher, String s) {
376     assertFalse(matcher.matches(s.charAt(0)));
377     assertFalse(matcher.apply(s.charAt(0)));
378     assertEquals(-1, matcher.indexIn(s));
379     assertEquals(-1, matcher.indexIn(s, 0));
380     assertEquals(-1, matcher.indexIn(s, 1));
381     assertEquals(-1, matcher.lastIndexIn(s));
382     assertFalse(matcher.matchesAnyOf(s));
383     assertFalse(matcher.matchesAllOf(s));
384     assertTrue(matcher.matchesNoneOf(s));
385 
386     assertSame(s, matcher.removeFrom(s));
387     assertSame(s, matcher.replaceFrom(s, 'z'));
388     assertSame(s, matcher.replaceFrom(s, "ZZ"));
389     assertSame(s, matcher.trimFrom(s));
390     assertSame(0, matcher.countIn(s));
391   }
392 
reallyTestMatchThenNoMatch(CharMatcher matcher, String s)393   private void reallyTestMatchThenNoMatch(CharMatcher matcher, String s) {
394     assertEquals(0, matcher.indexIn(s));
395     assertEquals(0, matcher.indexIn(s, 0));
396     assertEquals(-1, matcher.indexIn(s, 1));
397     assertEquals(-1, matcher.indexIn(s, 2));
398     assertEquals(0, matcher.lastIndexIn(s));
399     assertTrue(matcher.matchesAnyOf(s));
400     assertFalse(matcher.matchesAllOf(s));
401     assertFalse(matcher.matchesNoneOf(s));
402     assertEquals(s.substring(1), matcher.removeFrom(s));
403     assertEquals("z" + s.substring(1), matcher.replaceFrom(s, 'z'));
404     assertEquals("ZZ" + s.substring(1), matcher.replaceFrom(s, "ZZ"));
405     assertEquals(s.substring(1), matcher.trimFrom(s));
406     assertEquals(1, matcher.countIn(s));
407   }
408 
reallyTestNoMatchThenMatch(CharMatcher matcher, String s)409   private void reallyTestNoMatchThenMatch(CharMatcher matcher, String s) {
410     assertEquals(1, matcher.indexIn(s));
411     assertEquals(1, matcher.indexIn(s, 0));
412     assertEquals(1, matcher.indexIn(s, 1));
413     assertEquals(-1, matcher.indexIn(s, 2));
414     assertEquals(1, matcher.lastIndexIn(s));
415     assertTrue(matcher.matchesAnyOf(s));
416     assertFalse(matcher.matchesAllOf(s));
417     assertFalse(matcher.matchesNoneOf(s));
418     assertEquals(s.substring(0, 1), matcher.removeFrom(s));
419     assertEquals(s.substring(0, 1) + "z", matcher.replaceFrom(s, 'z'));
420     assertEquals(s.substring(0, 1) + "ZZ", matcher.replaceFrom(s, "ZZ"));
421     assertEquals(s.substring(0, 1), matcher.trimFrom(s));
422     assertEquals(1, matcher.countIn(s));
423   }
424 
425   /**
426    * Checks that expected is equals to out, and further, if in is
427    * equals to expected, then out is successfully optimized to be
428    * identical to in, i.e. that "in" is simply returned.
429    */
assertEqualsSame(String expected, String in, String out)430   private void assertEqualsSame(String expected, String in, String out) {
431     if (expected.equals(in)) {
432       assertSame(in, out);
433     } else {
434       assertEquals(expected, out);
435     }
436   }
437 
438   // Test collapse() a little differently than the rest, as we really want to
439   // cover lots of different configurations of input text
testCollapse()440   public void testCollapse() {
441     // collapsing groups of '-' into '_' or '-'
442     doTestCollapse("-", "_");
443     doTestCollapse("x-", "x_");
444     doTestCollapse("-x", "_x");
445     doTestCollapse("--", "_");
446     doTestCollapse("x--", "x_");
447     doTestCollapse("--x", "_x");
448     doTestCollapse("-x-", "_x_");
449     doTestCollapse("x-x", "x_x");
450     doTestCollapse("---", "_");
451     doTestCollapse("--x-", "_x_");
452     doTestCollapse("--xx", "_xx");
453     doTestCollapse("-x--", "_x_");
454     doTestCollapse("-x-x", "_x_x");
455     doTestCollapse("-xx-", "_xx_");
456     doTestCollapse("x--x", "x_x");
457     doTestCollapse("x-x-", "x_x_");
458     doTestCollapse("x-xx", "x_xx");
459     doTestCollapse("x-x--xx---x----x", "x_x_xx_x_x");
460 
461     doTestCollapseWithNoChange("");
462     doTestCollapseWithNoChange("x");
463     doTestCollapseWithNoChange("xx");
464   }
465 
doTestCollapse(String in, String out)466   private void doTestCollapse(String in, String out) {
467     // Try a few different matchers which all match '-' and not 'x'
468     // Try replacement chars that both do and do not change the value.
469     for (char replacement : new char[] { '_', '-' }) {
470       String expected = out.replace('_', replacement);
471       assertEqualsSame(expected, in, is('-').collapseFrom(in, replacement));
472       assertEqualsSame(expected, in, is('-').collapseFrom(in, replacement));
473       assertEqualsSame(expected, in, is('-').or(is('#')).collapseFrom(in, replacement));
474       assertEqualsSame(expected, in, isNot('x').collapseFrom(in, replacement));
475       assertEqualsSame(expected, in, is('x').negate().collapseFrom(in, replacement));
476       assertEqualsSame(expected, in, anyOf("-").collapseFrom(in, replacement));
477       assertEqualsSame(expected, in, anyOf("-#").collapseFrom(in, replacement));
478       assertEqualsSame(expected, in, anyOf("-#123").collapseFrom(in, replacement));
479     }
480   }
481 
doTestCollapseWithNoChange(String inout)482   private void doTestCollapseWithNoChange(String inout) {
483     assertSame(inout, is('-').collapseFrom(inout, '_'));
484     assertSame(inout, is('-').or(is('#')).collapseFrom(inout, '_'));
485     assertSame(inout, isNot('x').collapseFrom(inout, '_'));
486     assertSame(inout, is('x').negate().collapseFrom(inout, '_'));
487     assertSame(inout, anyOf("-").collapseFrom(inout, '_'));
488     assertSame(inout, anyOf("-#").collapseFrom(inout, '_'));
489     assertSame(inout, anyOf("-#123").collapseFrom(inout, '_'));
490     assertSame(inout, CharMatcher.NONE.collapseFrom(inout, '_'));
491   }
492 
testCollapse_any()493   public void testCollapse_any() {
494     assertEquals("", CharMatcher.ANY.collapseFrom("", '_'));
495     assertEquals("_", CharMatcher.ANY.collapseFrom("a", '_'));
496     assertEquals("_", CharMatcher.ANY.collapseFrom("ab", '_'));
497     assertEquals("_", CharMatcher.ANY.collapseFrom("abcd", '_'));
498   }
499 
testTrimFrom()500   public void testTrimFrom() {
501     // trimming -
502     doTestTrimFrom("-", "");
503     doTestTrimFrom("x-", "x");
504     doTestTrimFrom("-x", "x");
505     doTestTrimFrom("--", "");
506     doTestTrimFrom("x--", "x");
507     doTestTrimFrom("--x", "x");
508     doTestTrimFrom("-x-", "x");
509     doTestTrimFrom("x-x", "x-x");
510     doTestTrimFrom("---", "");
511     doTestTrimFrom("--x-", "x");
512     doTestTrimFrom("--xx", "xx");
513     doTestTrimFrom("-x--", "x");
514     doTestTrimFrom("-x-x", "x-x");
515     doTestTrimFrom("-xx-", "xx");
516     doTestTrimFrom("x--x", "x--x");
517     doTestTrimFrom("x-x-", "x-x");
518     doTestTrimFrom("x-xx", "x-xx");
519     doTestTrimFrom("x-x--xx---x----x", "x-x--xx---x----x");
520     // additional testing using the doc example
521     assertEquals("cat", anyOf("ab").trimFrom("abacatbab"));
522   }
523 
doTestTrimFrom(String in, String out)524   private void doTestTrimFrom(String in, String out) {
525     // Try a few different matchers which all match '-' and not 'x'
526     assertEquals(out, is('-').trimFrom(in));
527     assertEquals(out, is('-').or(is('#')).trimFrom(in));
528     assertEquals(out, isNot('x').trimFrom(in));
529     assertEquals(out, is('x').negate().trimFrom(in));
530     assertEquals(out, anyOf("-").trimFrom(in));
531     assertEquals(out, anyOf("-#").trimFrom(in));
532     assertEquals(out, anyOf("-#123").trimFrom(in));
533   }
534 
testTrimLeadingFrom()535   public void testTrimLeadingFrom() {
536     // trimming -
537     doTestTrimLeadingFrom("-", "");
538     doTestTrimLeadingFrom("x-", "x-");
539     doTestTrimLeadingFrom("-x", "x");
540     doTestTrimLeadingFrom("--", "");
541     doTestTrimLeadingFrom("x--", "x--");
542     doTestTrimLeadingFrom("--x", "x");
543     doTestTrimLeadingFrom("-x-", "x-");
544     doTestTrimLeadingFrom("x-x", "x-x");
545     doTestTrimLeadingFrom("---", "");
546     doTestTrimLeadingFrom("--x-", "x-");
547     doTestTrimLeadingFrom("--xx", "xx");
548     doTestTrimLeadingFrom("-x--", "x--");
549     doTestTrimLeadingFrom("-x-x", "x-x");
550     doTestTrimLeadingFrom("-xx-", "xx-");
551     doTestTrimLeadingFrom("x--x", "x--x");
552     doTestTrimLeadingFrom("x-x-", "x-x-");
553     doTestTrimLeadingFrom("x-xx", "x-xx");
554     doTestTrimLeadingFrom("x-x--xx---x----x", "x-x--xx---x----x");
555     // additional testing using the doc example
556     assertEquals("catbab", anyOf("ab").trimLeadingFrom("abacatbab"));
557   }
558 
doTestTrimLeadingFrom(String in, String out)559   private void doTestTrimLeadingFrom(String in, String out) {
560     // Try a few different matchers which all match '-' and not 'x'
561     assertEquals(out, is('-').trimLeadingFrom(in));
562     assertEquals(out, is('-').or(is('#')).trimLeadingFrom(in));
563     assertEquals(out, isNot('x').trimLeadingFrom(in));
564     assertEquals(out, is('x').negate().trimLeadingFrom(in));
565     assertEquals(out, anyOf("-#").trimLeadingFrom(in));
566     assertEquals(out, anyOf("-#123").trimLeadingFrom(in));
567   }
568 
testTrimTrailingFrom()569   public void testTrimTrailingFrom() {
570     // trimming -
571     doTestTrimTrailingFrom("-", "");
572     doTestTrimTrailingFrom("x-", "x");
573     doTestTrimTrailingFrom("-x", "-x");
574     doTestTrimTrailingFrom("--", "");
575     doTestTrimTrailingFrom("x--", "x");
576     doTestTrimTrailingFrom("--x", "--x");
577     doTestTrimTrailingFrom("-x-", "-x");
578     doTestTrimTrailingFrom("x-x", "x-x");
579     doTestTrimTrailingFrom("---", "");
580     doTestTrimTrailingFrom("--x-", "--x");
581     doTestTrimTrailingFrom("--xx", "--xx");
582     doTestTrimTrailingFrom("-x--", "-x");
583     doTestTrimTrailingFrom("-x-x", "-x-x");
584     doTestTrimTrailingFrom("-xx-", "-xx");
585     doTestTrimTrailingFrom("x--x", "x--x");
586     doTestTrimTrailingFrom("x-x-", "x-x");
587     doTestTrimTrailingFrom("x-xx", "x-xx");
588     doTestTrimTrailingFrom("x-x--xx---x----x", "x-x--xx---x----x");
589     // additional testing using the doc example
590     assertEquals("abacat", anyOf("ab").trimTrailingFrom("abacatbab"));
591   }
592 
doTestTrimTrailingFrom(String in, String out)593   private void doTestTrimTrailingFrom(String in, String out) {
594     // Try a few different matchers which all match '-' and not 'x'
595     assertEquals(out, is('-').trimTrailingFrom(in));
596     assertEquals(out, is('-').or(is('#')).trimTrailingFrom(in));
597     assertEquals(out, isNot('x').trimTrailingFrom(in));
598     assertEquals(out, is('x').negate().trimTrailingFrom(in));
599     assertEquals(out, anyOf("-#").trimTrailingFrom(in));
600     assertEquals(out, anyOf("-#123").trimTrailingFrom(in));
601   }
602 
testTrimAndCollapse()603   public void testTrimAndCollapse() {
604     // collapsing groups of '-' into '_' or '-'
605     doTestTrimAndCollapse("", "");
606     doTestTrimAndCollapse("x", "x");
607     doTestTrimAndCollapse("-", "");
608     doTestTrimAndCollapse("x-", "x");
609     doTestTrimAndCollapse("-x", "x");
610     doTestTrimAndCollapse("--", "");
611     doTestTrimAndCollapse("x--", "x");
612     doTestTrimAndCollapse("--x", "x");
613     doTestTrimAndCollapse("-x-", "x");
614     doTestTrimAndCollapse("x-x", "x_x");
615     doTestTrimAndCollapse("---", "");
616     doTestTrimAndCollapse("--x-", "x");
617     doTestTrimAndCollapse("--xx", "xx");
618     doTestTrimAndCollapse("-x--", "x");
619     doTestTrimAndCollapse("-x-x", "x_x");
620     doTestTrimAndCollapse("-xx-", "xx");
621     doTestTrimAndCollapse("x--x", "x_x");
622     doTestTrimAndCollapse("x-x-", "x_x");
623     doTestTrimAndCollapse("x-xx", "x_xx");
624     doTestTrimAndCollapse("x-x--xx---x----x", "x_x_xx_x_x");
625   }
626 
doTestTrimAndCollapse(String in, String out)627   private void doTestTrimAndCollapse(String in, String out) {
628     // Try a few different matchers which all match '-' and not 'x'
629     for (char replacement : new char[] { '_', '-' }) {
630       String expected = out.replace('_', replacement);
631       assertEqualsSame(expected, in, is('-').trimAndCollapseFrom(in, replacement));
632       assertEqualsSame(expected, in, is('-').or(is('#')).trimAndCollapseFrom(in, replacement));
633       assertEqualsSame(expected, in, isNot('x').trimAndCollapseFrom(in, replacement));
634       assertEqualsSame(expected, in, is('x').negate().trimAndCollapseFrom(in, replacement));
635       assertEqualsSame(expected, in, anyOf("-").trimAndCollapseFrom(in, replacement));
636       assertEqualsSame(expected, in, anyOf("-#").trimAndCollapseFrom(in, replacement));
637       assertEqualsSame(expected, in, anyOf("-#123").trimAndCollapseFrom(in, replacement));
638     }
639   }
640 
testReplaceFrom()641   public void testReplaceFrom() {
642     assertEquals("yoho", is('a').replaceFrom("yaha", 'o'));
643     assertEquals("yh", is('a').replaceFrom("yaha", ""));
644     assertEquals("yoho", is('a').replaceFrom("yaha", "o"));
645     assertEquals("yoohoo", is('a').replaceFrom("yaha", "oo"));
646     assertEquals("12 &gt; 5", is('>').replaceFrom("12 > 5", "&gt;"));
647   }
648 
testPrecomputedOptimizations()649   public void testPrecomputedOptimizations() {
650     // These are testing behavior that's never promised by the API.
651     // Some matchers are so efficient that it is a waste of effort to
652     // build a precomputed version.
653     CharMatcher m1 = is('x');
654     assertSame(m1, m1.precomputed());
655     assertSame(m1.toString(), m1.precomputed().toString());
656 
657     CharMatcher m2 = anyOf("Az");
658     assertSame(m2, m2.precomputed());
659     assertSame(m2.toString(), m2.precomputed().toString());
660 
661     CharMatcher m3 = inRange('A', 'Z');
662     assertSame(m3, m3.precomputed());
663     assertSame(m3.toString(), m3.precomputed().toString());
664 
665     assertSame(CharMatcher.NONE, CharMatcher.NONE.precomputed());
666     assertSame(CharMatcher.ANY, CharMatcher.ANY.precomputed());
667   }
668 
669   @GwtIncompatible("java.util.BitSet")
bitSet(String chars)670   private static BitSet bitSet(String chars) {
671     return bitSet(chars.toCharArray());
672   }
673 
674   @GwtIncompatible("java.util.BitSet")
bitSet(char[] chars)675   private static BitSet bitSet(char[] chars) {
676     BitSet tmp = new BitSet();
677     for (int i = 0; i < chars.length; i++) {
678       tmp.set(chars[i]);
679     }
680     return tmp;
681   }
682 
683   @GwtIncompatible("java.util.Random, java.util.BitSet")
testSmallCharMatcher()684   public void testSmallCharMatcher() {
685     CharMatcher len1 = SmallCharMatcher.from(bitSet("#"), "#");
686     CharMatcher len2 = SmallCharMatcher.from(bitSet("ab"), "ab");
687     CharMatcher len3 = SmallCharMatcher.from(bitSet("abc"), "abc");
688     CharMatcher len4 = SmallCharMatcher.from(bitSet("abcd"), "abcd");
689     assertTrue(len1.matches('#'));
690     assertFalse(len1.matches('!'));
691     assertTrue(len2.matches('a'));
692     assertTrue(len2.matches('b'));
693     for (char c = 'c'; c < 'z'; c++) {
694       assertFalse(len2.matches(c));
695     }
696     assertTrue(len3.matches('a'));
697     assertTrue(len3.matches('b'));
698     assertTrue(len3.matches('c'));
699     for (char c = 'd'; c < 'z'; c++) {
700       assertFalse(len3.matches(c));
701     }
702     assertTrue(len4.matches('a'));
703     assertTrue(len4.matches('b'));
704     assertTrue(len4.matches('c'));
705     assertTrue(len4.matches('d'));
706     for (char c = 'e'; c < 'z'; c++) {
707       assertFalse(len4.matches(c));
708     }
709 
710     Random rand = new Random(1234);
711     for (int testCase = 0; testCase < 100; testCase++) {
712       char[] chars = randomChars(rand, rand.nextInt(63) + 1);
713       CharMatcher m = SmallCharMatcher.from(bitSet(chars), new String(chars));
714       checkExactMatches(m, chars);
715     }
716   }
717 
checkExactMatches(CharMatcher m, char[] chars)718   static void checkExactMatches(CharMatcher m, char[] chars) {
719     Set<Character> positive = Sets.newHashSetWithExpectedSize(chars.length);
720     for (int i = 0; i < chars.length; i++) {
721       positive.add(chars[i]);
722     }
723     for (int c = 0; c <= Character.MAX_VALUE; c++) {
724       assertFalse(positive.contains(new Character((char) c)) ^ m.matches((char) c));
725     }
726   }
727 
randomChars(Random rand, int size)728   static char[] randomChars(Random rand, int size) {
729     Set<Character> chars = new HashSet<Character>(size);
730     for (int i = 0; i < size; i++) {
731       char c;
732       while (true) {
733         c = (char) rand.nextInt(Character.MAX_VALUE - Character.MIN_VALUE + 1);
734         if (!chars.contains(c)) {
735           break;
736         }
737       }
738       chars.add(c);
739     }
740     char[] retValue = new char[chars.size()];
741     int i = 0;
742     for (char c : chars) {
743       retValue[i++] = c;
744     }
745     Arrays.sort(retValue);
746     return retValue;
747   }
748 
testToString()749   public void testToString() {
750     assertToStringWorks("CharMatcher.NONE", CharMatcher.anyOf(""));
751     assertToStringWorks("CharMatcher.is('\\u0031')", CharMatcher.anyOf("1"));
752     assertToStringWorks("CharMatcher.isNot('\\u0031')", CharMatcher.isNot('1'));
753     assertToStringWorks("CharMatcher.anyOf(\"\\u0031\\u0032\")", CharMatcher.anyOf("12"));
754     assertToStringWorks("CharMatcher.anyOf(\"\\u0031\\u0032\\u0033\")",
755         CharMatcher.anyOf("321"));
756     assertToStringWorks("CharMatcher.inRange('\\u0031', '\\u0033')",
757         CharMatcher.inRange('1', '3'));
758   }
759 
assertToStringWorks(String expected, CharMatcher matcher)760   private static void assertToStringWorks(String expected, CharMatcher matcher) {
761     assertEquals(expected, matcher.toString());
762     assertEquals(expected, matcher.precomputed().toString());
763     assertEquals(expected, matcher.negate().negate().toString());
764     assertEquals(expected, matcher.negate().precomputed().negate().toString());
765     assertEquals(expected, matcher.negate().precomputed().negate().precomputed().toString());
766   }
767 }
768