1 /*
2 **********************************************************************
3 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
4 **********************************************************************
5 *   Date        Name        Description
6 *  03/22/2000   helena      Creation.
7 **********************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17 
18 U_NAMESPACE_BEGIN
19 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21 
22 // public constructors and destructors -----------------------------------
23 
24 StringSearch::StringSearch(const UnicodeString &pattern,
25                            const UnicodeString &text,
26                            const Locale        &locale,
27                                  BreakIterator *breakiter,
28                                  UErrorCode    &status) :
29                            SearchIterator(text, breakiter),
30                            m_pattern_(pattern)
31 {
32     if (U_FAILURE(status)) {
33         m_strsrch_ = NULL;
34         return;
35     }
36 
37     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
38                               m_text_.getBuffer(), m_text_.length(),
39                               locale.getName(), (UBreakIterator *)breakiter,
40                               &status);
41     uprv_free(m_search_);
42     m_search_ = NULL;
43 
44     if (U_SUCCESS(status)) {
45         // m_search_ has been created by the base SearchIterator class
46         m_search_        = m_strsrch_->search;
47     }
48 }
49 
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)50 StringSearch::StringSearch(const UnicodeString     &pattern,
51                            const UnicodeString     &text,
52                                  RuleBasedCollator *coll,
53                                  BreakIterator     *breakiter,
54                                  UErrorCode        &status) :
55                            SearchIterator(text, breakiter),
56                            m_pattern_(pattern)
57 {
58     if (U_FAILURE(status)) {
59         m_strsrch_ = NULL;
60         return;
61     }
62     if (coll == NULL) {
63         status     = U_ILLEGAL_ARGUMENT_ERROR;
64         m_strsrch_ = NULL;
65         return;
66     }
67     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
68                                           m_pattern_.length(),
69                                           m_text_.getBuffer(),
70                                           m_text_.length(), coll->toUCollator(),
71                                           (UBreakIterator *)breakiter,
72                                           &status);
73     uprv_free(m_search_);
74     m_search_ = NULL;
75 
76     if (U_SUCCESS(status)) {
77         // m_search_ has been created by the base SearchIterator class
78         m_search_ = m_strsrch_->search;
79     }
80 }
81 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)82 StringSearch::StringSearch(const UnicodeString     &pattern,
83                                  CharacterIterator &text,
84                            const Locale            &locale,
85                                  BreakIterator     *breakiter,
86                                  UErrorCode        &status) :
87                            SearchIterator(text, breakiter),
88                            m_pattern_(pattern)
89 {
90     if (U_FAILURE(status)) {
91         m_strsrch_ = NULL;
92         return;
93     }
94     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
95                               m_text_.getBuffer(), m_text_.length(),
96                               locale.getName(), (UBreakIterator *)breakiter,
97                               &status);
98     uprv_free(m_search_);
99     m_search_ = NULL;
100 
101     if (U_SUCCESS(status)) {
102         // m_search_ has been created by the base SearchIterator class
103         m_search_ = m_strsrch_->search;
104     }
105 }
106 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)107 StringSearch::StringSearch(const UnicodeString     &pattern,
108                                  CharacterIterator &text,
109                                  RuleBasedCollator *coll,
110                                  BreakIterator     *breakiter,
111                                  UErrorCode        &status) :
112                            SearchIterator(text, breakiter),
113                            m_pattern_(pattern)
114 {
115     if (U_FAILURE(status)) {
116         m_strsrch_ = NULL;
117         return;
118     }
119     if (coll == NULL) {
120         status     = U_ILLEGAL_ARGUMENT_ERROR;
121         m_strsrch_ = NULL;
122         return;
123     }
124     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
125                                           m_pattern_.length(),
126                                           m_text_.getBuffer(),
127                                           m_text_.length(), coll->toUCollator(),
128                                           (UBreakIterator *)breakiter,
129                                           &status);
130     uprv_free(m_search_);
131     m_search_ = NULL;
132 
133     if (U_SUCCESS(status)) {
134         // m_search_ has been created by the base SearchIterator class
135         m_search_ = m_strsrch_->search;
136     }
137 }
138 
StringSearch(const StringSearch & that)139 StringSearch::StringSearch(const StringSearch &that) :
140                        SearchIterator(that.m_text_, that.m_breakiterator_),
141                        m_pattern_(that.m_pattern_)
142 {
143     UErrorCode status = U_ZERO_ERROR;
144 
145     // Free m_search_ from the superclass
146     uprv_free(m_search_);
147     m_search_ = NULL;
148 
149     if (that.m_strsrch_ == NULL) {
150         // This was not a good copy
151         m_strsrch_ = NULL;
152     }
153     else {
154         // Make a deep copy
155         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
156                                               m_pattern_.length(),
157                                               m_text_.getBuffer(),
158                                               m_text_.length(),
159                                               that.m_strsrch_->collator,
160                                              (UBreakIterator *)that.m_breakiterator_,
161                                               &status);
162         if (U_SUCCESS(status)) {
163             // m_search_ has been created by the base SearchIterator class
164             m_search_        = m_strsrch_->search;
165         }
166     }
167 }
168 
~StringSearch()169 StringSearch::~StringSearch()
170 {
171     if (m_strsrch_ != NULL) {
172         usearch_close(m_strsrch_);
173         m_search_ = NULL;
174     }
175 }
176 
177 StringSearch *
clone() const178 StringSearch::clone() const {
179     return new StringSearch(*this);
180 }
181 
182 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)183 StringSearch & StringSearch::operator=(const StringSearch &that)
184 {
185     if ((*this) != that) {
186         UErrorCode status = U_ZERO_ERROR;
187         m_text_          = that.m_text_;
188         m_breakiterator_ = that.m_breakiterator_;
189         m_pattern_       = that.m_pattern_;
190         // all m_search_ in the parent class is linked up with m_strsrch_
191         usearch_close(m_strsrch_);
192         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
193                                               m_pattern_.length(),
194                                               m_text_.getBuffer(),
195                                               m_text_.length(),
196                                               that.m_strsrch_->collator,
197                                               NULL, &status);
198         // Check null pointer
199         if (m_strsrch_ != NULL) {
200             m_search_ = m_strsrch_->search;
201         }
202     }
203     return *this;
204 }
205 
operator ==(const SearchIterator & that) const206 UBool StringSearch::operator==(const SearchIterator &that) const
207 {
208     if (this == &that) {
209         return TRUE;
210     }
211     if (SearchIterator::operator ==(that)) {
212         StringSearch &thatsrch = (StringSearch &)that;
213         return (this->m_pattern_ == thatsrch.m_pattern_ &&
214                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
215     }
216     return FALSE;
217 }
218 
219 // public get and set methods ----------------------------------------
220 
setOffset(int32_t position,UErrorCode & status)221 void StringSearch::setOffset(int32_t position, UErrorCode &status)
222 {
223     // status checked in usearch_setOffset
224     usearch_setOffset(m_strsrch_, position, &status);
225 }
226 
getOffset(void) const227 int32_t StringSearch::getOffset(void) const
228 {
229     return usearch_getOffset(m_strsrch_);
230 }
231 
setText(const UnicodeString & text,UErrorCode & status)232 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
233 {
234     if (U_SUCCESS(status)) {
235         m_text_ = text;
236         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
237     }
238 }
239 
setText(CharacterIterator & text,UErrorCode & status)240 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
241 {
242     if (U_SUCCESS(status)) {
243         text.getText(m_text_);
244         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
245     }
246 }
247 
getCollator() const248 RuleBasedCollator * StringSearch::getCollator() const
249 {
250     // Note the const_cast. It would be cleaner if this const method returned a const collator.
251     return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
252 }
253 
setCollator(RuleBasedCollator * coll,UErrorCode & status)254 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
255 {
256     if (U_SUCCESS(status)) {
257         usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
258     }
259 }
260 
setPattern(const UnicodeString & pattern,UErrorCode & status)261 void StringSearch::setPattern(const UnicodeString &pattern,
262                                     UErrorCode    &status)
263 {
264     if (U_SUCCESS(status)) {
265         m_pattern_ = pattern;
266         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
267                            &status);
268     }
269 }
270 
getPattern() const271 const UnicodeString & StringSearch::getPattern() const
272 {
273     return m_pattern_;
274 }
275 
276 // public methods ----------------------------------------------------
277 
reset()278 void StringSearch::reset()
279 {
280     usearch_reset(m_strsrch_);
281 }
282 
safeClone(void) const283 SearchIterator * StringSearch::safeClone(void) const
284 {
285     UErrorCode status = U_ZERO_ERROR;
286     StringSearch *result = new StringSearch(m_pattern_, m_text_,
287                                             getCollator(),
288                                             m_breakiterator_,
289                                             status);
290     /* test for NULL */
291     if (result == 0) {
292         status = U_MEMORY_ALLOCATION_ERROR;
293         return 0;
294     }
295     result->setOffset(getOffset(), status);
296     result->setMatchStart(m_strsrch_->search->matchedIndex);
297     result->setMatchLength(m_strsrch_->search->matchedLength);
298     if (U_FAILURE(status)) {
299         return NULL;
300     }
301     return result;
302 }
303 
304 // protected method -------------------------------------------------
305 
handleNext(int32_t position,UErrorCode & status)306 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
307 {
308     // values passed here are already in the pre-shift position
309     if (U_SUCCESS(status)) {
310         if (m_strsrch_->pattern.cesLength == 0) {
311             m_search_->matchedIndex =
312                                     m_search_->matchedIndex == USEARCH_DONE ?
313                                     getOffset() : m_search_->matchedIndex + 1;
314             m_search_->matchedLength = 0;
315             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
316                            &status);
317             if (m_search_->matchedIndex == m_search_->textLength) {
318                 m_search_->matchedIndex = USEARCH_DONE;
319             }
320         }
321         else {
322             // looking at usearch.cpp, this part is shifted out to
323             // StringSearch instead of SearchIterator because m_strsrch_ is
324             // not accessible in SearchIterator
325 #if 0
326             if (position + m_strsrch_->pattern.defaultShiftSize
327                 > m_search_->textLength) {
328                 setMatchNotFound();
329                 return USEARCH_DONE;
330             }
331 #endif
332             if (m_search_->matchedLength <= 0) {
333                 // the flipping direction issue has already been handled
334                 // in next()
335                 // for boundary check purposes. this will ensure that the
336                 // next match will not preceed the current offset
337                 // note search->matchedIndex will always be set to something
338                 // in the code
339                 m_search_->matchedIndex = position - 1;
340             }
341 
342             ucol_setOffset(m_strsrch_->textIter, position, &status);
343 
344 #if 0
345             for (;;) {
346                 if (m_search_->isCanonicalMatch) {
347                     // can't use exact here since extra accents are allowed.
348                     usearch_handleNextCanonical(m_strsrch_, &status);
349                 }
350                 else {
351                     usearch_handleNextExact(m_strsrch_, &status);
352                 }
353                 if (U_FAILURE(status)) {
354                     return USEARCH_DONE;
355                 }
356                 if (m_breakiterator_ == NULL
357 #if !UCONFIG_NO_BREAK_ITERATION
358                     ||
359                     m_search_->matchedIndex == USEARCH_DONE ||
360                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
361                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
362                                                   m_search_->matchedLength))
363 #endif
364                 ) {
365                     if (m_search_->matchedIndex == USEARCH_DONE) {
366                         ucol_setOffset(m_strsrch_->textIter,
367                                        m_search_->textLength, &status);
368                     }
369                     else {
370                         ucol_setOffset(m_strsrch_->textIter,
371                                        m_search_->matchedIndex, &status);
372                     }
373                     return m_search_->matchedIndex;
374                 }
375             }
376 #else
377             // if m_strsrch_->breakIter is always the same as m_breakiterator_
378             // then we don't need to check the match boundaries here because
379             // usearch_handleNextXXX will already have done it.
380             if (m_search_->isCanonicalMatch) {
381             	// *could* actually use exact here 'cause no extra accents allowed...
382             	usearch_handleNextCanonical(m_strsrch_, &status);
383             } else {
384             	usearch_handleNextExact(m_strsrch_, &status);
385             }
386 
387             if (U_FAILURE(status)) {
388             	return USEARCH_DONE;
389             }
390 
391             if (m_search_->matchedIndex == USEARCH_DONE) {
392             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
393             } else {
394             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
395             }
396 
397             return m_search_->matchedIndex;
398 #endif
399         }
400     }
401     return USEARCH_DONE;
402 }
403 
handlePrev(int32_t position,UErrorCode & status)404 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
405 {
406     // values passed here are already in the pre-shift position
407     if (U_SUCCESS(status)) {
408         if (m_strsrch_->pattern.cesLength == 0) {
409             m_search_->matchedIndex =
410                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
411                    m_search_->matchedIndex);
412             if (m_search_->matchedIndex == 0) {
413                 setMatchNotFound();
414             }
415             else {
416                 m_search_->matchedIndex --;
417                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
418                                &status);
419                 m_search_->matchedLength = 0;
420             }
421         }
422         else {
423             // looking at usearch.cpp, this part is shifted out to
424             // StringSearch instead of SearchIterator because m_strsrch_ is
425             // not accessible in SearchIterator
426 #if 0
427             if (!m_search_->isOverlap &&
428                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
429                 setMatchNotFound();
430                 return USEARCH_DONE;
431             }
432 
433             for (;;) {
434                 if (m_search_->isCanonicalMatch) {
435                     // can't use exact here since extra accents are allowed.
436                     usearch_handlePreviousCanonical(m_strsrch_, &status);
437                 }
438                 else {
439                     usearch_handlePreviousExact(m_strsrch_, &status);
440                 }
441                 if (U_FAILURE(status)) {
442                     return USEARCH_DONE;
443                 }
444                 if (m_breakiterator_ == NULL
445 #if !UCONFIG_NO_BREAK_ITERATION
446                     ||
447                     m_search_->matchedIndex == USEARCH_DONE ||
448                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
449                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
450                                                   m_search_->matchedLength))
451 #endif
452                 ) {
453                     return m_search_->matchedIndex;
454                 }
455             }
456 #else
457             ucol_setOffset(m_strsrch_->textIter, position, &status);
458 
459             if (m_search_->isCanonicalMatch) {
460             	// *could* use exact match here since extra accents *not* allowed!
461             	usearch_handlePreviousCanonical(m_strsrch_, &status);
462             } else {
463             	usearch_handlePreviousExact(m_strsrch_, &status);
464             }
465 
466             if (U_FAILURE(status)) {
467             	return USEARCH_DONE;
468             }
469 
470             return m_search_->matchedIndex;
471 #endif
472         }
473 
474         return m_search_->matchedIndex;
475     }
476     return USEARCH_DONE;
477 }
478 
479 U_NAMESPACE_END
480 
481 #endif /* #if !UCONFIG_NO_COLLATION */
482