1 /*
2 **********************************************************************
3 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
4 **********************************************************************
5 *   Date        Name        Description
6 *  03/22/2000   helena      Creation.
7 **********************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/brkiter.h"
15 #include "unicode/schriter.h"
16 #include "unicode/search.h"
17 #include "usrchimp.h"
18 #include "cmemory.h"
19 
20 // public constructors and destructors -----------------------------------
21 U_NAMESPACE_BEGIN
22 
SearchIterator(const SearchIterator & other)23 SearchIterator::SearchIterator(const SearchIterator &other)
24     : UObject(other)
25 {
26     m_breakiterator_            = other.m_breakiterator_;
27     m_text_                     = other.m_text_;
28     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
29     m_search_->breakIter        = other.m_search_->breakIter;
30     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
31     m_search_->isOverlap        = other.m_search_->isOverlap;
32     m_search_->elementComparisonType = other.m_search_->elementComparisonType;
33     m_search_->matchedIndex     = other.m_search_->matchedIndex;
34     m_search_->matchedLength    = other.m_search_->matchedLength;
35     m_search_->text             = other.m_search_->text;
36     m_search_->textLength       = other.m_search_->textLength;
37 }
38 
~SearchIterator()39 SearchIterator::~SearchIterator()
40 {
41     if (m_search_ != NULL) {
42         uprv_free(m_search_);
43     }
44 }
45 
46 // public get and set methods ----------------------------------------
47 
setAttribute(USearchAttribute attribute,USearchAttributeValue value,UErrorCode & status)48 void SearchIterator::setAttribute(USearchAttribute       attribute,
49                                   USearchAttributeValue  value,
50                                   UErrorCode            &status)
51 {
52     if (U_SUCCESS(status)) {
53         switch (attribute)
54         {
55         case USEARCH_OVERLAP :
56             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
57             break;
58         case USEARCH_CANONICAL_MATCH :
59             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
60             break;
61         case USEARCH_ELEMENT_COMPARISON :
62             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
63                 m_search_->elementComparisonType = (int16_t)value;
64             } else {
65                 m_search_->elementComparisonType = 0;
66             }
67             break;
68         default:
69             status = U_ILLEGAL_ARGUMENT_ERROR;
70         }
71     }
72     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
73         status = U_ILLEGAL_ARGUMENT_ERROR;
74     }
75 }
76 
getAttribute(USearchAttribute attribute) const77 USearchAttributeValue SearchIterator::getAttribute(
78                                           USearchAttribute  attribute) const
79 {
80     switch (attribute) {
81     case USEARCH_OVERLAP :
82         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
83     case USEARCH_CANONICAL_MATCH :
84         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
85                                                                 USEARCH_OFF);
86     case USEARCH_ELEMENT_COMPARISON :
87         {
88             int16_t value = m_search_->elementComparisonType;
89             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
90                 return (USearchAttributeValue)value;
91             } else {
92                 return USEARCH_STANDARD_ELEMENT_COMPARISON;
93             }
94         }
95     default :
96         return USEARCH_DEFAULT;
97     }
98 }
99 
getMatchedStart() const100 int32_t SearchIterator::getMatchedStart() const
101 {
102     return m_search_->matchedIndex;
103 }
104 
getMatchedLength() const105 int32_t SearchIterator::getMatchedLength() const
106 {
107     return m_search_->matchedLength;
108 }
109 
getMatchedText(UnicodeString & result) const110 void SearchIterator::getMatchedText(UnicodeString &result) const
111 {
112     int32_t matchedindex  = m_search_->matchedIndex;
113     int32_t     matchedlength = m_search_->matchedLength;
114     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
115         result.setTo(m_search_->text + matchedindex, matchedlength);
116     }
117     else {
118         result.remove();
119     }
120 }
121 
setBreakIterator(BreakIterator * breakiter,UErrorCode & status)122 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
123                                       UErrorCode &status)
124 {
125     if (U_SUCCESS(status)) {
126 #if 0
127         m_search_->breakIter = NULL;
128         // the c++ breakiterator may not make use of ubreakiterator.
129         // so we'll have to keep track of it ourselves.
130 #else
131         // Well, gee... the Constructors that take a BreakIterator
132         // all cast the BreakIterator to a UBreakIterator and
133         // pass it to the corresponding usearch_openFromXXX
134         // routine, so there's no reason not to do this.
135         //
136         // Besides, a UBreakIterator is a BreakIterator, so
137         // any subclass of BreakIterator should work fine here...
138         m_search_->breakIter = (UBreakIterator *) breakiter;
139 #endif
140 
141         m_breakiterator_ = breakiter;
142     }
143 }
144 
getBreakIterator(void) const145 const BreakIterator * SearchIterator::getBreakIterator(void) const
146 {
147     return m_breakiterator_;
148 }
149 
setText(const UnicodeString & text,UErrorCode & status)150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
151 {
152     if (U_SUCCESS(status)) {
153         if (text.length() == 0) {
154             status = U_ILLEGAL_ARGUMENT_ERROR;
155         }
156         else {
157             m_text_        = text;
158             m_search_->text = m_text_.getBuffer();
159             m_search_->textLength = m_text_.length();
160         }
161     }
162 }
163 
setText(CharacterIterator & text,UErrorCode & status)164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
165 {
166     if (U_SUCCESS(status)) {
167         text.getText(m_text_);
168         setText(m_text_, status);
169     }
170 }
171 
getText(void) const172 const UnicodeString & SearchIterator::getText(void) const
173 {
174     return m_text_;
175 }
176 
177 // operator overloading ----------------------------------------------
178 
operator ==(const SearchIterator & that) const179 UBool SearchIterator::operator==(const SearchIterator &that) const
180 {
181     if (this == &that) {
182         return TRUE;
183     }
184     return (m_breakiterator_            == that.m_breakiterator_ &&
185             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
186             m_search_->isOverlap        == that.m_search_->isOverlap &&
187             m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
188             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
189             m_search_->matchedLength    == that.m_search_->matchedLength &&
190             m_search_->textLength       == that.m_search_->textLength &&
191             getOffset() == that.getOffset() &&
192             (uprv_memcmp(m_search_->text, that.m_search_->text,
193                               m_search_->textLength * sizeof(UChar)) == 0));
194 }
195 
196 // public methods ----------------------------------------------------
197 
first(UErrorCode & status)198 int32_t SearchIterator::first(UErrorCode &status)
199 {
200     if (U_FAILURE(status)) {
201         return USEARCH_DONE;
202     }
203     setOffset(0, status);
204     return handleNext(0, status);
205 }
206 
following(int32_t position,UErrorCode & status)207 int32_t SearchIterator::following(int32_t position,
208                                       UErrorCode &status)
209 {
210     if (U_FAILURE(status)) {
211         return USEARCH_DONE;
212     }
213     setOffset(position, status);
214     return handleNext(position, status);
215 }
216 
last(UErrorCode & status)217 int32_t SearchIterator::last(UErrorCode &status)
218 {
219     if (U_FAILURE(status)) {
220         return USEARCH_DONE;
221     }
222     setOffset(m_search_->textLength, status);
223     return handlePrev(m_search_->textLength, status);
224 }
225 
preceding(int32_t position,UErrorCode & status)226 int32_t SearchIterator::preceding(int32_t position,
227                                       UErrorCode &status)
228 {
229     if (U_FAILURE(status)) {
230         return USEARCH_DONE;
231     }
232     setOffset(position, status);
233     return handlePrev(position, status);
234 }
235 
next(UErrorCode & status)236 int32_t SearchIterator::next(UErrorCode &status)
237 {
238     if (U_SUCCESS(status)) {
239         int32_t offset = getOffset();
240         int32_t matchindex  = m_search_->matchedIndex;
241         int32_t     matchlength = m_search_->matchedLength;
242         m_search_->reset = FALSE;
243         if (m_search_->isForwardSearching == TRUE) {
244             int32_t textlength = m_search_->textLength;
245             if (offset == textlength || matchindex == textlength ||
246                 (matchindex != USEARCH_DONE &&
247                 matchindex + matchlength >= textlength)) {
248                 // not enough characters to match
249                 setMatchNotFound();
250                 return USEARCH_DONE;
251             }
252         }
253         else {
254             // switching direction.
255             // if matchedIndex == USEARCH_DONE, it means that either a
256             // setOffset has been called or that previous ran off the text
257             // string. the iterator would have been set to offset 0 if a
258             // match is not found.
259             m_search_->isForwardSearching = TRUE;
260             if (m_search_->matchedIndex != USEARCH_DONE) {
261                 // there's no need to set the collation element iterator
262                 // the next call to next will set the offset.
263                 return matchindex;
264             }
265         }
266 
267         if (matchlength > 0) {
268             // if matchlength is 0 we are at the start of the iteration
269             if (m_search_->isOverlap) {
270                 offset ++;
271             }
272             else {
273                 offset += matchlength;
274             }
275         }
276         return handleNext(offset, status);
277     }
278     return USEARCH_DONE;
279 }
280 
previous(UErrorCode & status)281 int32_t SearchIterator::previous(UErrorCode &status)
282 {
283     if (U_SUCCESS(status)) {
284         int32_t offset;
285         if (m_search_->reset) {
286             offset                       = m_search_->textLength;
287             m_search_->isForwardSearching = FALSE;
288             m_search_->reset              = FALSE;
289             setOffset(offset, status);
290         }
291         else {
292             offset = getOffset();
293         }
294 
295         int32_t matchindex = m_search_->matchedIndex;
296         if (m_search_->isForwardSearching == TRUE) {
297             // switching direction.
298             // if matchedIndex == USEARCH_DONE, it means that either a
299             // setOffset has been called or that next ran off the text
300             // string. the iterator would have been set to offset textLength if
301             // a match is not found.
302             m_search_->isForwardSearching = FALSE;
303             if (matchindex != USEARCH_DONE) {
304                 return matchindex;
305             }
306         }
307         else {
308             if (offset == 0 || matchindex == 0) {
309                 // not enough characters to match
310                 setMatchNotFound();
311                 return USEARCH_DONE;
312             }
313         }
314 
315         if (matchindex != USEARCH_DONE) {
316             if (m_search_->isOverlap) {
317                 matchindex += m_search_->matchedLength - 2;
318             }
319 
320             return handlePrev(matchindex, status);
321         }
322 
323         return handlePrev(offset, status);
324     }
325 
326     return USEARCH_DONE;
327 }
328 
reset()329 void SearchIterator::reset()
330 {
331     UErrorCode status = U_ZERO_ERROR;
332     setMatchNotFound();
333     setOffset(0, status);
334     m_search_->isOverlap          = FALSE;
335     m_search_->isCanonicalMatch   = FALSE;
336     m_search_->elementComparisonType = 0;
337     m_search_->isForwardSearching = TRUE;
338     m_search_->reset              = TRUE;
339 }
340 
341 // protected constructors and destructors -----------------------------
342 
SearchIterator()343 SearchIterator::SearchIterator()
344 {
345     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
346     m_search_->breakIter          = NULL;
347     m_search_->isOverlap          = FALSE;
348     m_search_->isCanonicalMatch   = FALSE;
349     m_search_->elementComparisonType = 0;
350     m_search_->isForwardSearching = TRUE;
351     m_search_->reset              = TRUE;
352     m_search_->matchedIndex       = USEARCH_DONE;
353     m_search_->matchedLength      = 0;
354     m_search_->text               = NULL;
355     m_search_->textLength         = 0;
356     m_breakiterator_              = NULL;
357 }
358 
SearchIterator(const UnicodeString & text,BreakIterator * breakiter)359 SearchIterator::SearchIterator(const UnicodeString &text,
360                                      BreakIterator *breakiter) :
361                                      m_breakiterator_(breakiter),
362                                      m_text_(text)
363 {
364     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
365     m_search_->breakIter          = NULL;
366     m_search_->isOverlap          = FALSE;
367     m_search_->isCanonicalMatch   = FALSE;
368     m_search_->elementComparisonType = 0;
369     m_search_->isForwardSearching = TRUE;
370     m_search_->reset              = TRUE;
371     m_search_->matchedIndex       = USEARCH_DONE;
372     m_search_->matchedLength      = 0;
373     m_search_->text               = m_text_.getBuffer();
374     m_search_->textLength         = text.length();
375 }
376 
SearchIterator(CharacterIterator & text,BreakIterator * breakiter)377 SearchIterator::SearchIterator(CharacterIterator &text,
378                                BreakIterator     *breakiter) :
379                                m_breakiterator_(breakiter)
380 {
381     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
382     m_search_->breakIter          = NULL;
383     m_search_->isOverlap          = FALSE;
384     m_search_->isCanonicalMatch   = FALSE;
385     m_search_->elementComparisonType = 0;
386     m_search_->isForwardSearching = TRUE;
387     m_search_->reset              = TRUE;
388     m_search_->matchedIndex       = USEARCH_DONE;
389     m_search_->matchedLength      = 0;
390     text.getText(m_text_);
391     m_search_->text               = m_text_.getBuffer();
392     m_search_->textLength         = m_text_.length();
393     m_breakiterator_             = breakiter;
394 }
395 
396 // protected methods ------------------------------------------------------
397 
operator =(const SearchIterator & that)398 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
399 {
400     if (this != &that) {
401         m_breakiterator_            = that.m_breakiterator_;
402         m_text_                     = that.m_text_;
403         m_search_->breakIter        = that.m_search_->breakIter;
404         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
405         m_search_->isOverlap        = that.m_search_->isOverlap;
406         m_search_->elementComparisonType = that.m_search_->elementComparisonType;
407         m_search_->matchedIndex     = that.m_search_->matchedIndex;
408         m_search_->matchedLength    = that.m_search_->matchedLength;
409         m_search_->text             = that.m_search_->text;
410         m_search_->textLength       = that.m_search_->textLength;
411     }
412     return *this;
413 }
414 
setMatchLength(int32_t length)415 void SearchIterator::setMatchLength(int32_t length)
416 {
417     m_search_->matchedLength = length;
418 }
419 
setMatchStart(int32_t position)420 void SearchIterator::setMatchStart(int32_t position)
421 {
422     m_search_->matchedIndex = position;
423 }
424 
setMatchNotFound()425 void SearchIterator::setMatchNotFound()
426 {
427     setMatchStart(USEARCH_DONE);
428     setMatchLength(0);
429     UErrorCode status = U_ZERO_ERROR;
430     // by default no errors should be returned here since offsets are within
431     // range.
432     if (m_search_->isForwardSearching) {
433         setOffset(m_search_->textLength, status);
434     }
435     else {
436         setOffset(0, status);
437     }
438 }
439 
440 
441 U_NAMESPACE_END
442 
443 #endif /* #if !UCONFIG_NO_COLLATION */
444