1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 /** 10 * <code>UnicodeFilter</code> defines a protocol for selecting a 11 * subset of the full range (U+0000 to U+FFFF) of Unicode characters. 12 * Currently, filters are used in conjunction with classes like 13 * <a href="Transliterator.html" title="class in com.ibm.icu.text"><code>Transliterator</code></a> 14 * to only process selected characters through a 15 * transformation. 16 * @stable ICU 2.0 17 */ 18 public abstract class UnicodeFilter implements UnicodeMatcher { 19 20 /** 21 * Returns <tt>true</tt> for characters that are in the selected 22 * subset. In other words, if a character is <b>to be 23 * filtered</b>, then <tt>contains()</tt> returns 24 * <b><tt>false</tt></b>. 25 * @stable ICU 2.0 26 */ contains(int c)27 public abstract boolean contains(int c); 28 29 /** 30 * Default implementation of UnicodeMatcher::matches() for Unicode 31 * filters. Matches a single 16-bit code unit at offset. 32 * @stable ICU 2.0 33 */ matches(Replaceable text, int[] offset, int limit, boolean incremental)34 public int matches(Replaceable text, 35 int[] offset, 36 int limit, 37 boolean incremental) { 38 int c; 39 if (offset[0] < limit && 40 contains(c = text.char32At(offset[0]))) { 41 offset[0] += UTF16.getCharCount(c); 42 return U_MATCH; 43 } 44 if (offset[0] > limit && contains(text.char32At(offset[0]))) { 45 // Backup offset by 1, unless the preceding character is a 46 // surrogate pair -- then backup by 2 (keep offset pointing at 47 // the lead surrogate). 48 --offset[0]; 49 if (offset[0] >= 0) { 50 offset[0] -= UTF16.getCharCount(text.char32At(offset[0])) - 1; 51 } 52 return U_MATCH; 53 } 54 if (incremental && offset[0] == limit) { 55 return U_PARTIAL_MATCH; 56 } 57 return U_MISMATCH; 58 } 59 60 // TODO Remove this when the JDK property implements MemberDoc.isSynthetic 61 /** 62 * (This should not be here; it is declared to make CheckTags 63 * happy. Java inserts a synthetic constructor and CheckTags 64 * can't tell that it's synthetic.) 65 * 66 * @internal 67 * @deprecated This API is ICU internal only. 68 */ 69 @Deprecated UnicodeFilter()70 protected UnicodeFilter() {} 71 } 72