1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2010, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/17/99    aliu        Creation.
10 **********************************************************************
11 */
12 #ifndef UNIFILT_H
13 #define UNIFILT_H
14 
15 #include "unicode/unifunct.h"
16 #include "unicode/unimatch.h"
17 
18 /**
19  * \file
20  * \brief C++ API: Unicode Filter
21  */
22 
23 U_NAMESPACE_BEGIN
24 
25 /**
26  * U_ETHER is used to represent character values for positions outside
27  * a range.  For example, transliterator uses this to represent
28  * characters outside the range contextStart..contextLimit-1.  This
29  * allows explicit matching by rules and UnicodeSets of text outside a
30  * defined range.
31  * @stable ICU 3.0
32  */
33 #define U_ETHER ((UChar)0xFFFF)
34 
35 /**
36  *
37  * <code>UnicodeFilter</code> defines a protocol for selecting a
38  * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
39  * Currently, filters are used in conjunction with classes like {@link
40  * Transliterator} to only process selected characters through a
41  * transformation.
42  *
43  * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
44  * of its base class, UnicodeMatcher.  These methods are toPattern()
45  * and matchesIndexValue().  This is done so that filter classes that
46  * are not actually used as matchers -- specifically, those in the
47  * UnicodeFilterLogic component, and those in tests -- can continue to
48  * work without defining these methods.  As long as a filter is not
49  * used in an RBT during real transliteration, these methods will not
50  * be called.  However, this breaks the UnicodeMatcher base class
51  * protocol, and it is not a correct solution.
52  *
53  * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
54  * hierarchy and either redesign it, or simply remove the stubs in
55  * UnicodeFilter and force subclasses to implement the full
56  * UnicodeMatcher protocol.
57  *
58  * @see UnicodeFilterLogic
59  * @stable ICU 2.0
60  */
61 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
62 
63 public:
64     /**
65      * Destructor
66      * @stable ICU 2.0
67      */
68     virtual ~UnicodeFilter();
69 
70     /**
71      * Returns <tt>true</tt> for characters that are in the selected
72      * subset.  In other words, if a character is <b>to be
73      * filtered</b>, then <tt>contains()</tt> returns
74      * <b><tt>false</tt></b>.
75      * @stable ICU 2.0
76      */
77     virtual UBool contains(UChar32 c) const = 0;
78 
79     /**
80      * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
81      * and return the pointer.
82      * @stable ICU 2.4
83      */
84     virtual UnicodeMatcher* toMatcher() const;
85 
86     /**
87      * Implement UnicodeMatcher API.
88      * @stable ICU 2.4
89      */
90     virtual UMatchDegree matches(const Replaceable& text,
91                                  int32_t& offset,
92                                  int32_t limit,
93                                  UBool incremental);
94 
95     /**
96      * UnicodeFunctor API.  Nothing to do.
97      * @stable ICU 2.4
98      */
99     virtual void setData(const TransliterationRuleData*);
100 
101     /**
102      * ICU "poor man's RTTI", returns a UClassID for this class.
103      *
104      * @stable ICU 2.2
105      */
106     static UClassID U_EXPORT2 getStaticClassID();
107 
108 protected:
109 
110     /*
111      * Since this class has pure virtual functions,
112      * a constructor can't be used.
113      * @stable ICU 2.0
114      */
115 /*    UnicodeFilter();*/
116 };
117 
118 /*inline UnicodeFilter::UnicodeFilter() {}*/
119 
120 U_NAMESPACE_END
121 
122 #endif
123