1 /* 2 ******************************************************************************** 3 * Copyright (C) 1997-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************** 6 */ 7 8 #ifndef FILTEREDBRK_H 9 #define FILTEREDBRK_H 10 11 #include "unicode/utypes.h" 12 #include "unicode/brkiter.h" 13 14 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 15 #ifndef U_HIDE_INTERNAL_API 16 17 U_NAMESPACE_BEGIN 18 19 /** 20 * \file 21 * \brief C++ API: FilteredBreakIteratorBuilder 22 */ 23 24 /** 25 * The BreakIteratorFilter is used to modify the behavior of a BreakIterator 26 * by constructing a new BreakIterator which suppresses certain segment boundaries. 27 * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . 28 * For example, a typical English Sentence Break Iterator would break on the space 29 * in the string "Mr. Smith" (resulting in two segments), 30 * but with "Mr." as an exception, a filtered break iterator 31 * would consider the string "Mr. Smith" to be a single segment. 32 * 33 * <p><b>Note:</b> An instance of {@link BreakIterator} returned by this builder 34 * class currently does not support following operations in this technology preview 35 * version: 36 * <ul> 37 * <li>{@link BreakIterator#next(int32_t) next(int32_t n)}</li> 38 * <li>{@link BreakIterator#previous(void) previous(void)}</li> 39 * <li>{@link BreakIterator#following(int32_t) following(int32_t offset)}</li> 40 * <li>{@link BreakIterator#preceding(int32_t) preceding(int32_t offset)}</li> 41 * </ul> 42 * When one of above methods is called, BreakIterator.DONE will be returned immediately 43 * without updating the internal state. 44 * 45 * @internal technology preview 46 */ 47 class U_I18N_API FilteredBreakIteratorBuilder : public UObject { 48 public: 49 /** 50 * destructor. 51 * @internal technology preview 52 */ 53 virtual ~FilteredBreakIteratorBuilder(); 54 55 /** 56 * Construct a FilteredBreakIteratorBuilder based on rules in a locale. 57 * The rules are taken from CLDR exception data for the locale, 58 * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions 59 * This is the equivalent of calling createInstance(UErrorCode&) 60 * and then repeatedly calling addNoBreakAfter(...) with the contents 61 * of the CLDR exception data. 62 * @param where the locale. 63 * @param status The error code. 64 * @return the new builder 65 * @internal technology preview 66 */ 67 static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); 68 69 /** 70 * Construct an empty FilteredBreakIteratorBuilder. 71 * In this state, it will not suppress any segment boundaries. 72 * @param status The error code. 73 * @return the new builder 74 * @internal technology preview 75 */ 76 static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); 77 78 /** 79 * Suppress a certain string from being the end of a segment. 80 * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned 81 * by the iterator. 82 * @param string the string to suppress, such as "Mr." 83 * @param status error code 84 * @return returns TRUE if the string was not present and now added, 85 * FALSE if the call was a no-op because the string was already being suppressed. 86 * @internal technology preview 87 */ 88 virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; 89 90 /** 91 * Stop suppressing a certain string from being the end of the segment. 92 * This function does not create any new segment boundaries, but only serves to un-do 93 * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of 94 * locale data which may be suppressing certain strings. 95 * @param exception the exception to remove 96 * @param status error code 97 * @return returns TRUE if the string was present and now removed, 98 * FALSE if the call was a no-op because the string was not being suppressed. 99 * @internal technology preview 100 */ 101 virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; 102 103 /** 104 * Wrap (adopt) an existing break iterator in a new filtered instance. 105 * The resulting BreakIterator is owned by the caller. 106 * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. 107 * Note that the adoptBreakIterator is adopted by the new BreakIterator 108 * and should no longer be used by the caller. 109 * The FilteredBreakIteratorBuilder may be reused. 110 * @param adoptBreakIterator the break iterator to adopt 111 * @param status error code 112 * @return the new BreakIterator, owned by the caller. 113 * @internal technology preview 114 */ 115 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; 116 117 protected: 118 /** 119 * For subclass use 120 * @internal technology preview 121 */ 122 FilteredBreakIteratorBuilder(); 123 }; 124 125 126 U_NAMESPACE_END 127 128 #endif /* U_HIDE_INTERNAL_API */ 129 #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 130 131 #endif // #ifndef FILTEREDBRK_H 132