1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * utf16collationiterator.h 7 * 8 * created on: 2010oct27 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __UTF16COLLATIONITERATOR_H__ 13 #define __UTF16COLLATIONITERATOR_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "cmemory.h" 20 #include "collation.h" 21 #include "collationdata.h" 22 #include "collationiterator.h" 23 #include "normalizer2impl.h" 24 25 U_NAMESPACE_BEGIN 26 27 /** 28 * UTF-16 collation element and character iterator. 29 * Handles normalized UTF-16 text inline, with length or NUL-terminated. 30 * Unnormalized text is handled by a subclass. 31 */ 32 class U_I18N_API UTF16CollationIterator : public CollationIterator { 33 public: UTF16CollationIterator(const CollationData * d,UBool numeric,const UChar * s,const UChar * p,const UChar * lim)34 UTF16CollationIterator(const CollationData *d, UBool numeric, 35 const UChar *s, const UChar *p, const UChar *lim) 36 : CollationIterator(d, numeric), 37 start(s), pos(p), limit(lim) {} 38 39 UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText); 40 41 virtual ~UTF16CollationIterator(); 42 43 virtual UBool operator==(const CollationIterator &other) const; 44 45 virtual void resetToOffset(int32_t newOffset); 46 47 virtual int32_t getOffset() const; 48 setText(const UChar * s,const UChar * lim)49 void setText(const UChar *s, const UChar *lim) { 50 reset(); 51 start = pos = s; 52 limit = lim; 53 } 54 55 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 56 57 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 58 59 protected: 60 // Copy constructor only for subclasses which set the pointers. UTF16CollationIterator(const UTF16CollationIterator & other)61 UTF16CollationIterator(const UTF16CollationIterator &other) 62 : CollationIterator(other), 63 start(NULL), pos(NULL), limit(NULL) {} 64 65 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 66 67 virtual UChar handleGetTrailSurrogate(); 68 69 virtual UBool foundNULTerminator(); 70 71 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 72 73 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 74 75 // UTF-16 string pointers. 76 // limit can be NULL for NUL-terminated strings. 77 const UChar *start, *pos, *limit; 78 }; 79 80 /** 81 * Incrementally checks the input text for FCD and normalizes where necessary. 82 */ 83 class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator { 84 public: FCDUTF16CollationIterator(const CollationData * data,UBool numeric,const UChar * s,const UChar * p,const UChar * lim)85 FCDUTF16CollationIterator(const CollationData *data, UBool numeric, 86 const UChar *s, const UChar *p, const UChar *lim) 87 : UTF16CollationIterator(data, numeric, s, p, lim), 88 rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim), 89 nfcImpl(data->nfcImpl), 90 checkDir(1) {} 91 92 FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText); 93 94 virtual ~FCDUTF16CollationIterator(); 95 96 virtual UBool operator==(const CollationIterator &other) const; 97 98 virtual void resetToOffset(int32_t newOffset); 99 100 virtual int32_t getOffset() const; 101 102 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 103 104 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 105 106 protected: 107 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 108 109 virtual UBool foundNULTerminator(); 110 111 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 112 113 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 114 115 private: 116 /** 117 * Switches to forward checking if possible. 118 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 119 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 120 */ 121 void switchToForward(); 122 123 /** 124 * Extend the FCD text segment forward or normalize around pos. 125 * To be called when checkDir > 0 && pos != limit. 126 * @return TRUE if success, checkDir == 0 and pos != limit 127 */ 128 UBool nextSegment(UErrorCode &errorCode); 129 130 /** 131 * Switches to backward checking. 132 * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 133 * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 134 */ 135 void switchToBackward(); 136 137 /** 138 * Extend the FCD text segment backward or normalize around pos. 139 * To be called when checkDir < 0 && pos != start. 140 * @return TRUE if success, checkDir == 0 and pos != start 141 */ 142 UBool previousSegment(UErrorCode &errorCode); 143 144 UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode); 145 146 // Text pointers: The input text is [rawStart, rawLimit[ 147 // where rawLimit can be NULL for NUL-terminated text. 148 // 149 // checkDir > 0: 150 // 151 // The input text [segmentStart..pos[ passes the FCD check. 152 // Moving forward checks incrementally. 153 // segmentLimit is undefined. limit == rawLimit. 154 // 155 // checkDir < 0: 156 // The input text [pos..segmentLimit[ passes the FCD check. 157 // Moving backward checks incrementally. 158 // segmentStart is undefined, start == rawStart. 159 // 160 // checkDir == 0: 161 // 162 // The input text [segmentStart..segmentLimit[ is being processed. 163 // These pointers are at FCD boundaries. 164 // Either this text segment already passes the FCD check 165 // and segmentStart==start<=pos<=limit==segmentLimit, 166 // or the current segment had to be normalized so that 167 // [segmentStart..segmentLimit[ turned into the normalized string, 168 // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length(). 169 const UChar *rawStart; 170 const UChar *segmentStart; 171 const UChar *segmentLimit; 172 // rawLimit==NULL for a NUL-terminated string. 173 const UChar *rawLimit; 174 175 const Normalizer2Impl &nfcImpl; 176 UnicodeString normalized; 177 // Direction of incremental FCD check. See comments before rawStart. 178 int8_t checkDir; 179 }; 180 181 U_NAMESPACE_END 182 183 #endif // !UCONFIG_NO_COLLATION 184 #endif // __UTF16COLLATIONITERATOR_H__ 185