1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 *   Copyright (C) 1999-2016 International Business Machines Corporation   *
6 *   and others. All rights reserved.                                      *
7 ***************************************************************************
8 
9 **********************************************************************
10 *   Date        Name        Description
11 *   10/22/99    alan        Creation.
12 *   11/11/99    rgillam     Complete port from Java.
13 **********************************************************************
14 */
15 
16 #ifndef RBBI_H
17 #define RBBI_H
18 
19 #include "unicode/utypes.h"
20 
21 /**
22  * \file
23  * \brief C++ API: Rule Based Break Iterator
24  */
25 
26 #if !UCONFIG_NO_BREAK_ITERATION
27 
28 #include "unicode/brkiter.h"
29 #include "unicode/udata.h"
30 #include "unicode/parseerr.h"
31 #include "unicode/schriter.h"
32 #include "unicode/uchriter.h"
33 
34 
35 struct UTrie;
36 
37 U_NAMESPACE_BEGIN
38 
39 /** @internal */
40 struct RBBIDataHeader;
41 class  RuleBasedBreakIteratorTables;
42 class  BreakIterator;
43 class  RBBIDataWrapper;
44 class  UStack;
45 class  LanguageBreakEngine;
46 class  UnhandledEngine;
47 struct RBBIStateTable;
48 
49 
50 
51 
52 /**
53  *
54  * A subclass of BreakIterator whose behavior is specified using a list of rules.
55  * <p>Instances of this class are most commonly created by the factory methods of
56  *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
57  *  and then used via the abstract API in class BreakIterator</p>
58  *
59  * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
60  *
61  * <p>This class is not intended to be subclassed.</p>
62  */
63 class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator {
64 
65 private:
66     /**
67      * The UText through which this BreakIterator accesses the text
68      * @internal
69      */
70     UText  *fText;
71 
72     /**
73      *   A character iterator that refers to the same text as the UText, above.
74      *   Only included for compatibility with old API, which was based on CharacterIterators.
75      *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
76      */
77     CharacterIterator  *fCharIter;
78 
79     /**
80      *   When the input text is provided by a UnicodeString, this will point to
81      *    a characterIterator that wraps that data.  Needed only for the
82      *    implementation of getText(), a backwards compatibility issue.
83      */
84     StringCharacterIterator *fSCharIter;
85 
86     /**
87      *  When the input text is provided by a UText, this
88      *    dummy CharacterIterator over an empty string will
89      *    be returned from getText()
90      */
91     UCharCharacterIterator *fDCharIter;
92 
93     /**
94      * The rule data for this BreakIterator instance
95      * @internal
96      */
97     RBBIDataWrapper    *fData;
98 
99     /** Index of the Rule {tag} values for the most recent match.
100      *  @internal
101     */
102     int32_t             fLastRuleStatusIndex;
103 
104     /**
105      * Rule tag value valid flag.
106      * Some iterator operations don't intrinsically set the correct tag value.
107      * This flag lets us lazily compute the value if we are ever asked for it.
108      * @internal
109      */
110     UBool               fLastStatusIndexValid;
111 
112     /**
113      * Counter for the number of characters encountered with the "dictionary"
114      *   flag set.
115      * @internal
116      */
117     uint32_t            fDictionaryCharCount;
118 
119     /**
120      * When a range of characters is divided up using the dictionary, the break
121      * positions that are discovered are stored here, preventing us from having
122      * to use either the dictionary or the state table again until the iterator
123      * leaves this range of text. Has the most impact for line breaking.
124      * @internal
125      */
126     int32_t*            fCachedBreakPositions;
127 
128     /**
129      * The number of elements in fCachedBreakPositions
130      * @internal
131      */
132     int32_t             fNumCachedBreakPositions;
133 
134     /**
135      * if fCachedBreakPositions is not null, this indicates which item in the
136      * cache the current iteration position refers to
137      * @internal
138      */
139     int32_t             fPositionInCache;
140 
141     /**
142      *
143      * If present, UStack of LanguageBreakEngine objects that might handle
144      * dictionary characters. Searched from top to bottom to find an object to
145      * handle a given character.
146      * @internal
147      */
148     UStack              *fLanguageBreakEngines;
149 
150     /**
151      *
152      * If present, the special LanguageBreakEngine used for handling
153      * characters that are in the dictionary set, but not handled by any
154      * LangugageBreakEngine.
155      * @internal
156      */
157     UnhandledEngine     *fUnhandledBreakEngine;
158 
159     /**
160      *
161      * The type of the break iterator, or -1 if it has not been set.
162      * @internal
163      */
164     int32_t             fBreakType;
165 
166     //=======================================================================
167     // constructors
168     //=======================================================================
169 
170     /**
171      * Constructor from a flattened set of RBBI data in malloced memory.
172      *             RulesBasedBreakIterators built from a custom set of rules
173      *             are created via this constructor; the rules are compiled
174      *             into memory, then the break iterator is constructed here.
175      *
176      *             The break iterator adopts the memory, and will
177      *             free it when done.
178      * @internal
179      */
180     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
181 
182 
183     friend class RBBIRuleBuilder;
184     /** @internal */
185     friend class BreakIterator;
186 
187 
188 
189 public:
190 
191     /** Default constructor.  Creates an empty shell of an iterator, with no
192      *  rules or text to iterate over.   Object can subsequently be assigned to.
193      *  @stable ICU 2.2
194      */
195     RuleBasedBreakIterator();
196 
197     /**
198      * Copy constructor.  Will produce a break iterator with the same behavior,
199      * and which iterates over the same text, as the one passed in.
200      * @param that The RuleBasedBreakIterator passed to be copied
201      * @stable ICU 2.0
202      */
203     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
204 
205     /**
206      * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
207      * @param rules The break rules to be used.
208      * @param parseError  In the event of a syntax error in the rules, provides the location
209      *                    within the rules of the problem.
210      * @param status Information on any errors encountered.
211      * @stable ICU 2.2
212      */
213     RuleBasedBreakIterator( const UnicodeString    &rules,
214                              UParseError           &parseError,
215                              UErrorCode            &status);
216 
217     /**
218      * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
219      * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
220      * Construction of a break iterator in this way is substantially faster than
221      * constuction from source rules.
222      *
223      * Ownership of the storage containing the compiled rules remains with the
224      * caller of this function.  The compiled rules must not be  modified or
225      * deleted during the life of the break iterator.
226      *
227      * The compiled rules are not compatible across different major versions of ICU.
228      * The compiled rules are comaptible only between machines with the same
229      * byte ordering (little or big endian) and the same base character set family
230      * (ASCII or EBCDIC).
231      *
232      * @see #getBinaryRules
233      * @param compiledRules A pointer to the compiled break rules to be used.
234      * @param ruleLength The length of the compiled break rules, in bytes.  This
235      *   corresponds to the length value produced by getBinaryRules().
236      * @param status Information on any errors encountered, including invalid
237      *   binary rules.
238      * @stable ICU 4.8
239      */
240     RuleBasedBreakIterator(const uint8_t *compiledRules,
241                            uint32_t       ruleLength,
242                            UErrorCode    &status);
243 
244     /**
245      * This constructor uses the udata interface to create a BreakIterator
246      * whose internal tables live in a memory-mapped file.  "image" is an
247      * ICU UDataMemory handle for the pre-compiled break iterator tables.
248      * @param image handle to the memory image for the break iterator data.
249      *        Ownership of the UDataMemory handle passes to the Break Iterator,
250      *        which will be responsible for closing it when it is no longer needed.
251      * @param status Information on any errors encountered.
252      * @see udata_open
253      * @see #getBinaryRules
254      * @stable ICU 2.8
255      */
256     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
257 
258     /**
259      * Destructor
260      *  @stable ICU 2.0
261      */
262     virtual ~RuleBasedBreakIterator();
263 
264     /**
265      * Assignment operator.  Sets this iterator to have the same behavior,
266      * and iterate over the same text, as the one passed in.
267      * @param that The RuleBasedBreakItertor passed in
268      * @return the newly created RuleBasedBreakIterator
269      *  @stable ICU 2.0
270      */
271     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
272 
273     /**
274      * Equality operator.  Returns TRUE if both BreakIterators are of the
275      * same class, have the same behavior, and iterate over the same text.
276      * @param that The BreakIterator to be compared for equality
277      * @return TRUE if both BreakIterators are of the
278      * same class, have the same behavior, and iterate over the same text.
279      *  @stable ICU 2.0
280      */
281     virtual UBool operator==(const BreakIterator& that) const;
282 
283     /**
284      * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
285      * and vice versa.
286      * @param that The BreakIterator to be compared for inequality
287      * @return TRUE if both BreakIterators are not same.
288      *  @stable ICU 2.0
289      */
290     UBool operator!=(const BreakIterator& that) const;
291 
292     /**
293      * Returns a newly-constructed RuleBasedBreakIterator with the same
294      * behavior, and iterating over the same text, as this one.
295      * Differs from the copy constructor in that it is polymorphic, and
296      * will correctly clone (copy) a derived class.
297      * clone() is thread safe.  Multiple threads may simultaeneously
298      * clone the same source break iterator.
299      * @return a newly-constructed RuleBasedBreakIterator
300      * @stable ICU 2.0
301      */
302     virtual BreakIterator* clone() const;
303 
304     /**
305      * Compute a hash code for this BreakIterator
306      * @return A hash code
307      *  @stable ICU 2.0
308      */
309     virtual int32_t hashCode(void) const;
310 
311     /**
312      * Returns the description used to create this iterator
313      * @return the description used to create this iterator
314      *  @stable ICU 2.0
315      */
316     virtual const UnicodeString& getRules(void) const;
317 
318     //=======================================================================
319     // BreakIterator overrides
320     //=======================================================================
321 
322     /**
323      * <p>
324      * Return a CharacterIterator over the text being analyzed.
325      * The returned character iterator is owned by the break iterator, and must
326      * not be deleted by the caller.  Repeated calls to this function may
327      * return the same CharacterIterator.
328      * </p>
329      * <p>
330      * The returned character iterator must not be used concurrently with
331      * the break iterator.  If concurrent operation is needed, clone the
332      * returned character iterator first and operate on the clone.
333      * </p>
334      * <p>
335      * When the break iterator is operating on text supplied via a UText,
336      * this function will fail.  Lacking any way to signal failures, it
337      * returns an CharacterIterator containing no text.
338      * The function getUText() provides similar functionality,
339      * is reliable, and is more efficient.
340      * </p>
341      *
342      * TODO:  deprecate this function?
343      *
344      * @return An iterator over the text being analyzed.
345      * @stable ICU 2.0
346      */
347     virtual  CharacterIterator& getText(void) const;
348 
349 
350     /**
351       *  Get a UText for the text being analyzed.
352       *  The returned UText is a shallow clone of the UText used internally
353       *  by the break iterator implementation.  It can safely be used to
354       *  access the text without impacting any break iterator operations,
355       *  but the underlying text itself must not be altered.
356       *
357       * @param fillIn A UText to be filled in.  If NULL, a new UText will be
358       *           allocated to hold the result.
359       * @param status receives any error codes.
360       * @return   The current UText for this break iterator.  If an input
361       *           UText was provided, it will always be returned.
362       * @stable ICU 3.4
363       */
364      virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
365 
366     /**
367      * Set the iterator to analyze a new piece of text.  This function resets
368      * the current iteration position to the beginning of the text.
369      * @param newText An iterator over the text to analyze.  The BreakIterator
370      * takes ownership of the character iterator.  The caller MUST NOT delete it!
371      *  @stable ICU 2.0
372      */
373     virtual void adoptText(CharacterIterator* newText);
374 
375     /**
376      * Set the iterator to analyze a new piece of text.  This function resets
377      * the current iteration position to the beginning of the text.
378      *
379      * The BreakIterator will retain a reference to the supplied string.
380      * The caller must not modify or delete the text while the BreakIterator
381      * retains the reference.
382      *
383      * @param newText The text to analyze.
384      *  @stable ICU 2.0
385      */
386     virtual void setText(const UnicodeString& newText);
387 
388     /**
389      * Reset the break iterator to operate over the text represented by
390      * the UText.  The iterator position is reset to the start.
391      *
392      * This function makes a shallow clone of the supplied UText.  This means
393      * that the caller is free to immediately close or otherwise reuse the
394      * Utext that was passed as a parameter, but that the underlying text itself
395      * must not be altered while being referenced by the break iterator.
396      *
397      * @param text    The UText used to change the text.
398      * @param status  Receives any error codes.
399      * @stable ICU 3.4
400      */
401     virtual void  setText(UText *text, UErrorCode &status);
402 
403     /**
404      * Sets the current iteration position to the beginning of the text, position zero.
405      * @return The offset of the beginning of the text, zero.
406      *  @stable ICU 2.0
407      */
408     virtual int32_t first(void);
409 
410     /**
411      * Sets the current iteration position to the end of the text.
412      * @return The text's past-the-end offset.
413      *  @stable ICU 2.0
414      */
415     virtual int32_t last(void);
416 
417     /**
418      * Advances the iterator either forward or backward the specified number of steps.
419      * Negative values move backward, and positive values move forward.  This is
420      * equivalent to repeatedly calling next() or previous().
421      * @param n The number of steps to move.  The sign indicates the direction
422      * (negative is backwards, and positive is forwards).
423      * @return The character offset of the boundary position n boundaries away from
424      * the current one.
425      *  @stable ICU 2.0
426      */
427     virtual int32_t next(int32_t n);
428 
429     /**
430      * Advances the iterator to the next boundary position.
431      * @return The position of the first boundary after this one.
432      *  @stable ICU 2.0
433      */
434     virtual int32_t next(void);
435 
436     /**
437      * Moves the iterator backwards, to the last boundary preceding this one.
438      * @return The position of the last boundary position preceding this one.
439      *  @stable ICU 2.0
440      */
441     virtual int32_t previous(void);
442 
443     /**
444      * Sets the iterator to refer to the first boundary position following
445      * the specified position.
446      * @param offset The position from which to begin searching for a break position.
447      * @return The position of the first break after the current position.
448      *  @stable ICU 2.0
449      */
450     virtual int32_t following(int32_t offset);
451 
452     /**
453      * Sets the iterator to refer to the last boundary position before the
454      * specified position.
455      * @param offset The position to begin searching for a break from.
456      * @return The position of the last boundary before the starting position.
457      *  @stable ICU 2.0
458      */
459     virtual int32_t preceding(int32_t offset);
460 
461     /**
462      * Returns true if the specfied position is a boundary position.  As a side
463      * effect, leaves the iterator pointing to the first boundary position at
464      * or after "offset".
465      * @param offset the offset to check.
466      * @return True if "offset" is a boundary position.
467      *  @stable ICU 2.0
468      */
469     virtual UBool isBoundary(int32_t offset);
470 
471     /**
472      * Returns the current iteration position.
473      * @return The current iteration position.
474      * @stable ICU 2.0
475      */
476     virtual int32_t current(void) const;
477 
478 
479     /**
480      * Return the status tag from the break rule that determined the most recently
481      * returned break position.  For break rules that do not specify a
482      * status, a default value of 0 is returned.  If more than one break rule
483      * would cause a boundary to be located at some position in the text,
484      * the numerically largest of the applicable status values is returned.
485      * <p>
486      * Of the standard types of ICU break iterators, only word break and
487      * line break provide status values.  The values are defined in
488      * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
489      * that contain alphabetic letters, "words" that appear to be numbers,
490      * punctuation and spaces, words containing ideographic characters, and
491      * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
492      * and soft (potential) break positions.
493      * <p>
494      * <code>getRuleStatus()</code> can be called after obtaining a boundary
495      * position from <code>next()</code>, <code>previous()</code>, or
496      * any other break iterator functions that returns a boundary position.
497      * <p>
498      * When creating custom break rules, one is free to define whatever
499      * status values may be convenient for the application.
500      * <p>
501      * Note: this function is not thread safe.  It should not have been
502      *       declared const, and the const remains only for compatibility
503      *       reasons.  (The function is logically const, but not bit-wise const).
504      * <p>
505      * @return the status from the break rule that determined the most recently
506      * returned break position.
507      *
508      * @see UWordBreak
509      * @stable ICU 2.2
510      */
511     virtual int32_t getRuleStatus() const;
512 
513    /**
514     * Get the status (tag) values from the break rule(s) that determined the most
515     * recently returned break position.
516     * <p>
517     * The returned status value(s) are stored into an array provided by the caller.
518     * The values are stored in sorted (ascending) order.
519     * If the capacity of the output array is insufficient to hold the data,
520     *  the output will be truncated to the available length, and a
521     *  U_BUFFER_OVERFLOW_ERROR will be signaled.
522     *
523     * @param fillInVec an array to be filled in with the status values.
524     * @param capacity  the length of the supplied vector.  A length of zero causes
525     *                  the function to return the number of status values, in the
526     *                  normal way, without attemtping to store any values.
527     * @param status    receives error codes.
528     * @return          The number of rule status values from rules that determined
529     *                  the most recent boundary returned by the break iterator.
530     *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
531     *                  is the total number of status values that were available,
532     *                  not the reduced number that were actually returned.
533     * @see getRuleStatus
534     * @stable ICU 3.0
535     */
536     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
537 
538     /**
539      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
540      * This method is to implement a simple version of RTTI, since not all
541      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
542      * clone() methods call this method.
543      *
544      * @return          The class ID for this object. All objects of a
545      *                  given class have the same class ID.  Objects of
546      *                  other classes have different class IDs.
547      * @stable ICU 2.0
548      */
549     virtual UClassID getDynamicClassID(void) const;
550 
551     /**
552      * Returns the class ID for this class.  This is useful only for
553      * comparing to a return value from getDynamicClassID().  For example:
554      *
555      *      Base* polymorphic_pointer = createPolymorphicObject();
556      *      if (polymorphic_pointer->getDynamicClassID() ==
557      *          Derived::getStaticClassID()) ...
558      *
559      * @return          The class ID for all objects of this class.
560      * @stable ICU 2.0
561      */
562     static UClassID U_EXPORT2 getStaticClassID(void);
563 
564     /**
565      * Deprecated functionality. Use clone() instead.
566      *
567      * Create a clone (copy) of this break iterator in memory provided
568      *  by the caller.  The idea is to increase performance by avoiding
569      *  a storage allocation.  Use of this functoin is NOT RECOMMENDED.
570      *  Performance gains are minimal, and correct buffer management is
571      *  tricky.  Use clone() instead.
572      *
573      * @param stackBuffer  The pointer to the memory into which the cloned object
574      *                     should be placed.  If NULL,  allocate heap memory
575      *                     for the cloned object.
576      * @param BufferSize   The size of the buffer.  If zero, return the required
577      *                     buffer size, but do not clone the object.  If the
578      *                     size was too small (but not zero), allocate heap
579      *                     storage for the cloned object.
580      *
581      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
582      *                     returned if the the provided buffer was too small, and
583      *                     the clone was therefore put on the heap.
584      *
585      * @return  Pointer to the clone object.  This may differ from the stackBuffer
586      *          address if the byte alignment of the stack buffer was not suitable
587      *          or if the stackBuffer was too small to hold the clone.
588      * @deprecated ICU 52. Use clone() instead.
589      */
590     virtual BreakIterator *  createBufferClone(void *stackBuffer,
591                                                int32_t &BufferSize,
592                                                UErrorCode &status);
593 
594 
595     /**
596      * Return the binary form of compiled break rules,
597      * which can then be used to create a new break iterator at some
598      * time in the future.  Creating a break iterator from pre-compiled rules
599      * is much faster than building one from the source form of the
600      * break rules.
601      *
602      * The binary data can only be used with the same version of ICU
603      *  and on the same platform type (processor endian-ness)
604      *
605      * @param length Returns the length of the binary data.  (Out paramter.)
606      *
607      * @return   A pointer to the binary (compiled) rule data.  The storage
608      *           belongs to the RulesBasedBreakIterator object, not the
609      *           caller, and must not be modified or deleted.
610      * @stable ICU 4.8
611      */
612     virtual const uint8_t *getBinaryRules(uint32_t &length);
613 
614     /**
615      *  Set the subject text string upon which the break iterator is operating
616      *  without changing any other aspect of the matching state.
617      *  The new and previous text strings must have the same content.
618      *
619      *  This function is intended for use in environments where ICU is operating on
620      *  strings that may move around in memory.  It provides a mechanism for notifying
621      *  ICU that the string has been relocated, and providing a new UText to access the
622      *  string in its new position.
623      *
624      *  Note that the break iterator implementation never copies the underlying text
625      *  of a string being processed, but always operates directly on the original text
626      *  provided by the user. Refreshing simply drops the references to the old text
627      *  and replaces them with references to the new.
628      *
629      *  Caution:  this function is normally used only by very specialized,
630      *  system-level code.  One example use case is with garbage collection that moves
631      *  the text in memory.
632      *
633      * @param input      The new (moved) text string.
634      * @param status     Receives errors detected by this function.
635      * @return           *this
636      *
637      * @stable ICU 49
638      */
639     virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
640 
641 
642 private:
643     //=======================================================================
644     // implementation
645     //=======================================================================
646     /**
647      * Dumps caches and performs other actions associated with a complete change
648      * in text or iteration position.
649      * @internal
650      */
651     void reset(void);
652 
653     /**
654       * Set the type of the break iterator.
655       * @internal
656       */
657     void setBreakType(int32_t type);
658 
659     /**
660       * Common initialization function, used by constructors and bufferClone.
661       * @internal
662       */
663     void init();
664 
665     /**
666      * This method backs the iterator back up to a "safe position" in the text.
667      * This is a position that we know, without any context, must be a break position.
668      * The various calling methods then iterate forward from this safe position to
669      * the appropriate position to return.  (For more information, see the description
670      * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
671      * @param statetable state table used of moving backwards
672      * @internal
673      */
674     int32_t handlePrevious(const RBBIStateTable *statetable);
675 
676     /**
677      * This method is the actual implementation of the next() method.  All iteration
678      * vectors through here.  This method initializes the state machine to state 1
679      * and advances through the text character by character until we reach the end
680      * of the text or the state machine transitions to state 0.  We update our return
681      * value every time the state machine passes through a possible end state.
682      * @param statetable state table used of moving forwards
683      * @internal
684      */
685     int32_t handleNext(const RBBIStateTable *statetable);
686 
687 
688     /**
689      * This is the function that actually implements dictionary-based
690      * breaking.  Covering at least the range from startPos to endPos,
691      * it checks for dictionary characters, and if it finds them determines
692      * the appropriate object to deal with them. It may cache found breaks in
693      * fCachedBreakPositions as it goes. It may well also look at text outside
694      * the range startPos to endPos.
695      * If going forward, endPos is the normal Unicode break result, and
696      * if goind in reverse, startPos is the normal Unicode break result
697      * @param startPos  The start position of a range of text
698      * @param endPos    The end position of a range of text
699      * @param reverse   The call is for the reverse direction
700      * @internal
701      */
702     int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
703 
704 
705     /**
706      * This function returns the appropriate LanguageBreakEngine for a
707      * given character c.
708      * @param c         A character in the dictionary set
709      * @internal
710      */
711     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
712 
713     /**
714      *  @internal
715      */
716     void makeRuleStatusValid();
717 
718 };
719 
720 //------------------------------------------------------------------------------
721 //
722 //   Inline Functions Definitions ...
723 //
724 //------------------------------------------------------------------------------
725 
726 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
727     return !operator==(that);
728 }
729 
730 U_NAMESPACE_END
731 
732 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
733 
734 #endif
735