1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * uitercollationiterator.h
7 *
8 * created on: 2012sep23 (from utf16collationiterator.h)
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __UITERCOLLATIONITERATOR_H__
13 #define __UITERCOLLATIONITERATOR_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_COLLATION
18 
19 #include "unicode/uiter.h"
20 #include "cmemory.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "normalizer2impl.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 /**
28  * UCharIterator-based collation element and character iterator.
29  * Handles normalized text inline, with length or NUL-terminated.
30  * Unnormalized text is handled by a subclass.
31  */
32 class U_I18N_API UIterCollationIterator : public CollationIterator {
33 public:
UIterCollationIterator(const CollationData * d,UBool numeric,UCharIterator & ui)34     UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
35             : CollationIterator(d, numeric), iter(ui) {}
36 
37     virtual ~UIterCollationIterator();
38 
39     virtual void resetToOffset(int32_t newOffset);
40 
41     virtual int32_t getOffset() const;
42 
43     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
44 
45     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
46 
47 protected:
48     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
49 
50     virtual UChar handleGetTrailSurrogate();
51 
52     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
53 
54     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
55 
56     UCharIterator &iter;
57 };
58 
59 /**
60  * Incrementally checks the input text for FCD and normalizes where necessary.
61  */
62 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
63 public:
FCDUIterCollationIterator(const CollationData * data,UBool numeric,UCharIterator & ui,int32_t startIndex)64     FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
65             : UIterCollationIterator(data, numeric, ui),
66               state(ITER_CHECK_FWD), start(startIndex),
67               nfcImpl(data->nfcImpl) {}
68 
69     virtual ~FCDUIterCollationIterator();
70 
71     virtual void resetToOffset(int32_t newOffset);
72 
73     virtual int32_t getOffset() const;
74 
75     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
76 
77     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
78 
79 protected:
80     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
81 
82     virtual UChar handleGetTrailSurrogate();
83 
84     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
85 
86     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
87 
88 private:
89     /**
90      * Switches to forward checking if possible.
91      */
92     void switchToForward();
93 
94     /**
95      * Extends the FCD text segment forward or normalizes around pos.
96      * @return TRUE if success
97      */
98     UBool nextSegment(UErrorCode &errorCode);
99 
100     /**
101      * Switches to backward checking.
102      */
103     void switchToBackward();
104 
105     /**
106      * Extends the FCD text segment backward or normalizes around pos.
107      * @return TRUE if success
108      */
109     UBool previousSegment(UErrorCode &errorCode);
110 
111     UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
112 
113     enum State {
114         /**
115          * The input text [start..(iter index)[ passes the FCD check.
116          * Moving forward checks incrementally.
117          * pos & limit are undefined.
118          */
119         ITER_CHECK_FWD,
120         /**
121          * The input text [(iter index)..limit[ passes the FCD check.
122          * Moving backward checks incrementally.
123          * start & pos are undefined.
124          */
125         ITER_CHECK_BWD,
126         /**
127          * The input text [start..limit[ passes the FCD check.
128          * pos tracks the current text index.
129          */
130         ITER_IN_FCD_SEGMENT,
131         /**
132          * The input text [start..limit[ failed the FCD check and was normalized.
133          * pos tracks the current index in the normalized string.
134          * The text iterator is at the limit index.
135          */
136         IN_NORM_ITER_AT_LIMIT,
137         /**
138          * The input text [start..limit[ failed the FCD check and was normalized.
139          * pos tracks the current index in the normalized string.
140          * The text iterator is at the start index.
141          */
142         IN_NORM_ITER_AT_START
143     };
144 
145     State state;
146 
147     int32_t start;
148     int32_t pos;
149     int32_t limit;
150 
151     const Normalizer2Impl &nfcImpl;
152     UnicodeString normalized;
153 };
154 
155 U_NAMESPACE_END
156 
157 #endif  // !UCONFIG_NO_COLLATION
158 #endif  // __UITERCOLLATIONITERATOR_H__
159