1 /*
2 ********************************************************************************
3 *   Copyright (C) 1997-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 ********************************************************************************
6 */
7 
8 #ifndef FILTEREDBRK_H
9 #define FILTEREDBRK_H
10 
11 #include "unicode/utypes.h"
12 #include "unicode/brkiter.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
15 #ifndef U_HIDE_INTERNAL_API
16 
17 U_NAMESPACE_BEGIN
18 
19 /**
20  * \file
21  * \brief C++ API: FilteredBreakIteratorBuilder
22  */
23 
24 /**
25  * The BreakIteratorFilter is used to modify the behavior of a BreakIterator
26  *  by constructing a new BreakIterator which suppresses certain segment boundaries.
27  *  See  http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
28  *  For example, a typical English Sentence Break Iterator would break on the space
29  *  in the string "Mr. Smith" (resulting in two segments),
30  *  but with "Mr." as an exception, a filtered break iterator
31  *  would consider the string "Mr. Smith" to be a single segment.
32  *
33  * <p><b>Note:</b> An instance of {@link BreakIterator} returned by this builder
34  * class currently does not support following operations in this technology preview
35  * version:
36  * <ul>
37  *   <li>{@link BreakIterator#next(int32_t) next(int32_t n)}</li>
38  *   <li>{@link BreakIterator#previous(void) previous(void)}</li>
39  *   <li>{@link BreakIterator#following(int32_t) following(int32_t offset)}</li>
40  *   <li>{@link BreakIterator#preceding(int32_t) preceding(int32_t offset)}</li>
41  * </ul>
42  * When one of above methods is called, BreakIterator.DONE will be returned immediately
43  * without updating the internal state.
44  *
45  * @internal technology preview
46  */
47 class U_I18N_API FilteredBreakIteratorBuilder : public UObject {
48  public:
49   /**
50    *  destructor.
51    * @internal technology preview
52    */
53   virtual ~FilteredBreakIteratorBuilder();
54 
55   /**
56    * Construct a FilteredBreakIteratorBuilder based on rules in a locale.
57    * The rules are taken from CLDR exception data for the locale,
58    *  see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
59    *  This is the equivalent of calling createInstance(UErrorCode&)
60    *    and then repeatedly calling addNoBreakAfter(...) with the contents
61    *    of the CLDR exception data.
62    * @param where the locale.
63    * @param status The error code.
64    * @return the new builder
65    * @internal technology preview
66    */
67   static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
68 
69   /**
70    * Construct an empty FilteredBreakIteratorBuilder.
71    * In this state, it will not suppress any segment boundaries.
72    * @param status The error code.
73    * @return the new builder
74    * @internal technology preview
75    */
76   static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
77 
78   /**
79    * Suppress a certain string from being the end of a segment.
80    * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
81    * by the iterator.
82    * @param string the string to suppress, such as "Mr."
83    * @param status error code
84    * @return returns TRUE if the string was not present and now added,
85    * FALSE if the call was a no-op because the string was already being suppressed.
86    * @internal technology preview
87    */
88   virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
89 
90   /**
91    * Stop suppressing a certain string from being the end of the segment.
92    * This function does not create any new segment boundaries, but only serves to un-do
93    * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
94    * locale data which may be suppressing certain strings.
95    * @param exception the exception to remove
96    * @param status error code
97    * @return returns TRUE if the string was present and now removed,
98    * FALSE if the call was a no-op because the string was not being suppressed.
99    * @internal technology preview
100    */
101   virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
102 
103   /**
104    * Wrap (adopt) an existing break iterator in a new filtered instance.
105    * The resulting BreakIterator is owned by the caller.
106    * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
107    * Note that the adoptBreakIterator is adopted by the new BreakIterator
108    * and should no longer be used by the caller.
109    * The FilteredBreakIteratorBuilder may be reused.
110    * @param adoptBreakIterator the break iterator to adopt
111    * @param status error code
112    * @return the new BreakIterator, owned by the caller.
113    * @internal technology preview
114    */
115   virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
116 
117  protected:
118   /**
119    * For subclass use
120    * @internal technology preview
121    */
122   FilteredBreakIteratorBuilder();
123 };
124 
125 
126 U_NAMESPACE_END
127 
128 #endif  /* U_HIDE_INTERNAL_API */
129 #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
130 
131 #endif // #ifndef FILTEREDBRK_H
132