1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Date Name Description 7 * 07/18/01 aliu Creation. 8 ********************************************************************** 9 */ 10 #ifndef UNIMATCH_H 11 #define UNIMATCH_H 12 13 #include "unicode/utypes.h" 14 15 /** 16 * \file 17 * \brief C++ API: Unicode Matcher 18 */ 19 20 21 U_NAMESPACE_BEGIN 22 23 class Replaceable; 24 class UnicodeString; 25 class UnicodeSet; 26 27 /** 28 * Constants returned by <code>UnicodeMatcher::matches()</code> 29 * indicating the degree of match. 30 * @stable ICU 2.4 31 */ 32 enum UMatchDegree { 33 /** 34 * Constant returned by <code>matches()</code> indicating a 35 * mismatch between the text and this matcher. The text contains 36 * a character which does not match, or the text does not contain 37 * all desired characters for a non-incremental match. 38 * @stable ICU 2.4 39 */ 40 U_MISMATCH, 41 42 /** 43 * Constant returned by <code>matches()</code> indicating a 44 * partial match between the text and this matcher. This value is 45 * only returned for incremental match operations. All characters 46 * of the text match, but more characters are required for a 47 * complete match. Alternatively, for variable-length matchers, 48 * all characters of the text match, and if more characters were 49 * supplied at limit, they might also match. 50 * @stable ICU 2.4 51 */ 52 U_PARTIAL_MATCH, 53 54 /** 55 * Constant returned by <code>matches()</code> indicating a 56 * complete match between the text and this matcher. For an 57 * incremental variable-length match, this value is returned if 58 * the given text matches, and it is known that additional 59 * characters would not alter the extent of the match. 60 * @stable ICU 2.4 61 */ 62 U_MATCH 63 }; 64 65 /** 66 * <code>UnicodeMatcher</code> defines a protocol for objects that can 67 * match a range of characters in a Replaceable string. 68 * @stable ICU 2.4 69 */ 70 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { 71 72 public: 73 /** 74 * Destructor. 75 * @stable ICU 2.4 76 */ 77 virtual ~UnicodeMatcher(); 78 79 /** 80 * Return a UMatchDegree value indicating the degree of match for 81 * the given text at the given offset. Zero, one, or more 82 * characters may be matched. 83 * 84 * Matching in the forward direction is indicated by limit > 85 * offset. Characters from offset forwards to limit-1 will be 86 * considered for matching. 87 * 88 * Matching in the reverse direction is indicated by limit < 89 * offset. Characters from offset backwards to limit+1 will be 90 * considered for matching. 91 * 92 * If limit == offset then the only match possible is a zero 93 * character match (which subclasses may implement if desired). 94 * 95 * As a side effect, advance the offset parameter to the limit of 96 * the matched substring. In the forward direction, this will be 97 * the index of the last matched character plus one. In the 98 * reverse direction, this will be the index of the last matched 99 * character minus one. 100 * 101 * <p>Note: This method is not const because some classes may 102 * modify their state as the result of a match. 103 * 104 * @param text the text to be matched 105 * @param offset on input, the index into text at which to begin 106 * matching. On output, the limit of the matched text. The 107 * number of matched characters is the output value of offset 108 * minus the input value. Offset should always point to the 109 * HIGH SURROGATE (leading code unit) of a pair of surrogates, 110 * both on entry and upon return. 111 * @param limit the limit index of text to be matched. Greater 112 * than offset for a forward direction match, less than offset for 113 * a backward direction match. The last character to be 114 * considered for matching will be text.charAt(limit-1) in the 115 * forward direction or text.charAt(limit+1) in the backward 116 * direction. 117 * @param incremental if TRUE, then assume further characters may 118 * be inserted at limit and check for partial matching. Otherwise 119 * assume the text as given is complete. 120 * @return a match degree value indicating a full match, a partial 121 * match, or a mismatch. If incremental is FALSE then 122 * U_PARTIAL_MATCH should never be returned. 123 * @stable ICU 2.4 124 */ 125 virtual UMatchDegree matches(const Replaceable& text, 126 int32_t& offset, 127 int32_t limit, 128 UBool incremental) = 0; 129 130 /** 131 * Returns a string representation of this matcher. If the result of 132 * calling this function is passed to the appropriate parser, it 133 * will produce another matcher that is equal to this one. 134 * @param result the string to receive the pattern. Previous 135 * contents will be deleted. 136 * @param escapeUnprintable if TRUE then convert unprintable 137 * character to their hex escape representations, \\uxxxx or 138 * \\Uxxxxxxxx. Unprintable characters are those other than 139 * U+000A, U+0020..U+007E. 140 * @stable ICU 2.4 141 */ 142 virtual UnicodeString& toPattern(UnicodeString& result, 143 UBool escapeUnprintable = FALSE) const = 0; 144 145 /** 146 * Returns TRUE if this matcher will match a character c, where c 147 * & 0xFF == v, at offset, in the forward direction (with limit > 148 * offset). This is used by <tt>RuleBasedTransliterator</tt> for 149 * indexing. 150 * @stable ICU 2.4 151 */ 152 virtual UBool matchesIndexValue(uint8_t v) const = 0; 153 154 /** 155 * Union the set of all characters that may be matched by this object 156 * into the given set. 157 * @param toUnionTo the set into which to union the source characters 158 * @stable ICU 2.4 159 */ 160 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; 161 }; 162 163 U_NAMESPACE_END 164 165 #endif 166