1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.text;
8 
9 /**
10  * <code>UnicodeFilter</code> defines a protocol for selecting a
11  * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
12  * Currently, filters are used in conjunction with classes like
13  * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a>
14  * to only process selected characters through a
15  * transformation.
16  * @stable ICU 2.0
17  */
18 public abstract class UnicodeFilter implements UnicodeMatcher {
19 
20     /**
21      * Returns <tt>true</tt> for characters that are in the selected
22      * subset.  In other words, if a character is <b>to be
23      * filtered</b>, then <tt>contains()</tt> returns
24      * <b><tt>false</tt></b>.
25      * @stable ICU 2.0
26      */
contains(int c)27     public abstract boolean contains(int c);
28 
29     /**
30      * Default implementation of UnicodeMatcher::matches() for Unicode
31      * filters.  Matches a single 16-bit code unit at offset.
32      * @stable ICU 2.0
33      */
matches(Replaceable text, int[] offset, int limit, boolean incremental)34     public int matches(Replaceable text,
35                        int[] offset,
36                        int limit,
37                        boolean incremental) {
38         int c;
39         if (offset[0] < limit &&
40             contains(c = text.char32At(offset[0]))) {
41             offset[0] += UTF16.getCharCount(c);
42             return U_MATCH;
43         }
44         if (offset[0] > limit && contains(text.char32At(offset[0]))) {
45             // Backup offset by 1, unless the preceding character is a
46             // surrogate pair -- then backup by 2 (keep offset pointing at
47             // the lead surrogate).
48             --offset[0];
49             if (offset[0] >= 0) {
50                 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1;
51             }
52             return U_MATCH;
53         }
54         if (incremental && offset[0] == limit) {
55             return U_PARTIAL_MATCH;
56         }
57         return U_MISMATCH;
58     }
59 
60     // TODO Remove this when the JDK property implements MemberDoc.isSynthetic
61     /**
62      * (This should not be here; it is declared to make CheckTags
63      * happy.  Java inserts a synthetic constructor and CheckTags
64      * can't tell that it's synthetic.)
65      *
66      * @internal
67      * @deprecated This API is ICU internal only.
68      */
69     @Deprecated
UnicodeFilter()70     protected UnicodeFilter() {}
71 }
72