• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *   Copyright (C) 2001-2015, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 *
7 * File ucoleitr.cpp
8 *
9 * Modification History:
10 *
11 * Date        Name        Description
12 * 02/15/2001  synwee      Modified all methods to process its own function
13 *                         instead of calling the equivalent c++ api (coleitr.h)
14 * 2012-2014   markus      Rewritten in C++ again.
15 ******************************************************************************/
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/coleitr.h"
22 #include "unicode/tblcoll.h"
23 #include "unicode/ucoleitr.h"
24 #include "unicode/ustring.h"
25 #include "unicode/sortkey.h"
26 #include "unicode/uobject.h"
27 #include "cmemory.h"
28 #include "usrchimp.h"
29 
30 U_NAMESPACE_USE
31 
32 #define BUFFER_LENGTH             100
33 
34 #define DEFAULT_BUFFER_SIZE 16
35 #define BUFFER_GROW 8
36 
37 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
38 
39 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
40 
41 #define DELETE_ARRAY(array) uprv_free((void *) (array))
42 
43 struct RCEI
44 {
45     uint32_t ce;
46     int32_t  low;
47     int32_t  high;
48 };
49 
50 U_NAMESPACE_BEGIN
51 
52 struct RCEBuffer
53 {
54     RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
55     RCEI   *buffer;
56     int32_t bufferIndex;
57     int32_t bufferSize;
58 
59     RCEBuffer();
60     ~RCEBuffer();
61 
62     UBool isEmpty() const;
63     void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
64     const RCEI *get();
65 };
66 
RCEBuffer()67 RCEBuffer::RCEBuffer()
68 {
69     buffer = defaultBuffer;
70     bufferIndex = 0;
71     bufferSize = UPRV_LENGTHOF(defaultBuffer);
72 }
73 
~RCEBuffer()74 RCEBuffer::~RCEBuffer()
75 {
76     if (buffer != defaultBuffer) {
77         DELETE_ARRAY(buffer);
78     }
79 }
80 
isEmpty() const81 UBool RCEBuffer::isEmpty() const
82 {
83     return bufferIndex <= 0;
84 }
85 
put(uint32_t ce,int32_t ixLow,int32_t ixHigh,UErrorCode & errorCode)86 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
87 {
88     if (U_FAILURE(errorCode)) {
89         return;
90     }
91     if (bufferIndex >= bufferSize) {
92         RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
93         if (newBuffer == NULL) {
94             errorCode = U_MEMORY_ALLOCATION_ERROR;
95             return;
96         }
97 
98         ARRAY_COPY(newBuffer, buffer, bufferSize);
99 
100         if (buffer != defaultBuffer) {
101             DELETE_ARRAY(buffer);
102         }
103 
104         buffer = newBuffer;
105         bufferSize += BUFFER_GROW;
106     }
107 
108     buffer[bufferIndex].ce   = ce;
109     buffer[bufferIndex].low  = ixLow;
110     buffer[bufferIndex].high = ixHigh;
111 
112     bufferIndex += 1;
113 }
114 
get()115 const RCEI *RCEBuffer::get()
116 {
117     if (bufferIndex > 0) {
118      return &buffer[--bufferIndex];
119     }
120 
121     return NULL;
122 }
123 
PCEBuffer()124 PCEBuffer::PCEBuffer()
125 {
126     buffer = defaultBuffer;
127     bufferIndex = 0;
128     bufferSize = UPRV_LENGTHOF(defaultBuffer);
129 }
130 
~PCEBuffer()131 PCEBuffer::~PCEBuffer()
132 {
133     if (buffer != defaultBuffer) {
134         DELETE_ARRAY(buffer);
135     }
136 }
137 
reset()138 void PCEBuffer::reset()
139 {
140     bufferIndex = 0;
141 }
142 
isEmpty() const143 UBool PCEBuffer::isEmpty() const
144 {
145     return bufferIndex <= 0;
146 }
147 
put(uint64_t ce,int32_t ixLow,int32_t ixHigh,UErrorCode & errorCode)148 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
149 {
150     if (U_FAILURE(errorCode)) {
151         return;
152     }
153     if (bufferIndex >= bufferSize) {
154         PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
155         if (newBuffer == NULL) {
156             errorCode = U_MEMORY_ALLOCATION_ERROR;
157             return;
158         }
159 
160         ARRAY_COPY(newBuffer, buffer, bufferSize);
161 
162         if (buffer != defaultBuffer) {
163             DELETE_ARRAY(buffer);
164         }
165 
166         buffer = newBuffer;
167         bufferSize += BUFFER_GROW;
168     }
169 
170     buffer[bufferIndex].ce   = ce;
171     buffer[bufferIndex].low  = ixLow;
172     buffer[bufferIndex].high = ixHigh;
173 
174     bufferIndex += 1;
175 }
176 
get()177 const PCEI *PCEBuffer::get()
178 {
179     if (bufferIndex > 0) {
180      return &buffer[--bufferIndex];
181     }
182 
183     return NULL;
184 }
185 
UCollationPCE(UCollationElements * elems)186 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
187 
UCollationPCE(CollationElementIterator * iter)188 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
189 
init(UCollationElements * elems)190 void UCollationPCE::init(UCollationElements *elems) {
191     init(CollationElementIterator::fromUCollationElements(elems));
192 }
193 
init(CollationElementIterator * iter)194 void UCollationPCE::init(CollationElementIterator *iter)
195 {
196     cei = iter;
197     init(*iter->rbc_);
198 }
199 
init(const Collator & coll)200 void UCollationPCE::init(const Collator &coll)
201 {
202     UErrorCode status = U_ZERO_ERROR;
203 
204     strength    = coll.getAttribute(UCOL_STRENGTH, status);
205     toShift     = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
206     isShifted   = FALSE;
207     variableTop = coll.getVariableTop(status);
208 }
209 
~UCollationPCE()210 UCollationPCE::~UCollationPCE()
211 {
212     // nothing to do
213 }
214 
processCE(uint32_t ce)215 uint64_t UCollationPCE::processCE(uint32_t ce)
216 {
217     uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
218 
219     // This is clean, but somewhat slow...
220     // We could apply the mask to ce and then
221     // just get all three orders...
222     switch(strength) {
223     default:
224         tertiary = ucol_tertiaryOrder(ce);
225         /* note fall-through */
226 
227     case UCOL_SECONDARY:
228         secondary = ucol_secondaryOrder(ce);
229         /* note fall-through */
230 
231     case UCOL_PRIMARY:
232         primary = ucol_primaryOrder(ce);
233     }
234 
235     // **** This should probably handle continuations too.  ****
236     // **** That means that we need 24 bits for the primary ****
237     // **** instead of the 16 that we're currently using.   ****
238     // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
239     // **** Another complication with continuations is that ****
240     // **** the *second* CE is marked as a continuation, so ****
241     // **** we always have to peek ahead to know how long   ****
242     // **** the primary is...                               ****
243     if ((toShift && variableTop > ce && primary != 0)
244                 || (isShifted && primary == 0)) {
245 
246         if (primary == 0) {
247             return UCOL_IGNORABLE;
248         }
249 
250         if (strength >= UCOL_QUATERNARY) {
251             quaternary = primary;
252         }
253 
254         primary = secondary = tertiary = 0;
255         isShifted = TRUE;
256     } else {
257         if (strength >= UCOL_QUATERNARY) {
258             quaternary = 0xFFFF;
259         }
260 
261         isShifted = FALSE;
262     }
263 
264     return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
265 }
266 
267 U_NAMESPACE_END
268 
269 /* public methods ---------------------------------------------------- */
270 
271 U_CAPI UCollationElements* U_EXPORT2
ucol_openElements(const UCollator * coll,const UChar * text,int32_t textLength,UErrorCode * status)272 ucol_openElements(const UCollator  *coll,
273                   const UChar      *text,
274                         int32_t    textLength,
275                         UErrorCode *status)
276 {
277     if (U_FAILURE(*status)) {
278         return NULL;
279     }
280     if (coll == NULL || (text == NULL && textLength != 0)) {
281         *status = U_ILLEGAL_ARGUMENT_ERROR;
282         return NULL;
283     }
284     const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
285     if (rbc == NULL) {
286         *status = U_UNSUPPORTED_ERROR;  // coll is a Collator but not a RuleBasedCollator
287         return NULL;
288     }
289 
290     UnicodeString s((UBool)(textLength < 0), text, textLength);
291     CollationElementIterator *cei = rbc->createCollationElementIterator(s);
292     if (cei == NULL) {
293         *status = U_MEMORY_ALLOCATION_ERROR;
294         return NULL;
295     }
296 
297     return cei->toUCollationElements();
298 }
299 
300 
301 U_CAPI void U_EXPORT2
ucol_closeElements(UCollationElements * elems)302 ucol_closeElements(UCollationElements *elems)
303 {
304     delete CollationElementIterator::fromUCollationElements(elems);
305 }
306 
307 U_CAPI void U_EXPORT2
ucol_reset(UCollationElements * elems)308 ucol_reset(UCollationElements *elems)
309 {
310     CollationElementIterator::fromUCollationElements(elems)->reset();
311 }
312 
313 U_CAPI int32_t U_EXPORT2
ucol_next(UCollationElements * elems,UErrorCode * status)314 ucol_next(UCollationElements *elems,
315           UErrorCode         *status)
316 {
317     if (U_FAILURE(*status)) {
318         return UCOL_NULLORDER;
319     }
320 
321     return CollationElementIterator::fromUCollationElements(elems)->next(*status);
322 }
323 
324 U_NAMESPACE_BEGIN
325 
326 int64_t
nextProcessed(int32_t * ixLow,int32_t * ixHigh,UErrorCode * status)327 UCollationPCE::nextProcessed(
328                    int32_t            *ixLow,
329                    int32_t            *ixHigh,
330                    UErrorCode         *status)
331 {
332     int64_t result = UCOL_IGNORABLE;
333     uint32_t low = 0, high = 0;
334 
335     if (U_FAILURE(*status)) {
336         return UCOL_PROCESSED_NULLORDER;
337     }
338 
339     pceBuffer.reset();
340 
341     do {
342         low = cei->getOffset();
343         int32_t ce = cei->next(*status);
344         high = cei->getOffset();
345 
346         if (ce == UCOL_NULLORDER) {
347              result = UCOL_PROCESSED_NULLORDER;
348              break;
349         }
350 
351         result = processCE((uint32_t)ce);
352     } while (result == UCOL_IGNORABLE);
353 
354     if (ixLow != NULL) {
355         *ixLow = low;
356     }
357 
358     if (ixHigh != NULL) {
359         *ixHigh = high;
360     }
361 
362     return result;
363 }
364 
365 U_NAMESPACE_END
366 
367 U_CAPI int32_t U_EXPORT2
ucol_previous(UCollationElements * elems,UErrorCode * status)368 ucol_previous(UCollationElements *elems,
369               UErrorCode         *status)
370 {
371     if(U_FAILURE(*status)) {
372         return UCOL_NULLORDER;
373     }
374     return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
375 }
376 
377 U_NAMESPACE_BEGIN
378 
379 int64_t
previousProcessed(int32_t * ixLow,int32_t * ixHigh,UErrorCode * status)380 UCollationPCE::previousProcessed(
381                    int32_t            *ixLow,
382                    int32_t            *ixHigh,
383                    UErrorCode         *status)
384 {
385     int64_t result = UCOL_IGNORABLE;
386     int32_t  low = 0, high = 0;
387 
388     if (U_FAILURE(*status)) {
389         return UCOL_PROCESSED_NULLORDER;
390     }
391 
392     // pceBuffer.reset();
393 
394     while (pceBuffer.isEmpty()) {
395         // buffer raw CEs up to non-ignorable primary
396         RCEBuffer rceb;
397         int32_t ce;
398 
399         // **** do we need to reset rceb, or will it always be empty at this point ****
400         do {
401             high = cei->getOffset();
402             ce   = cei->previous(*status);
403             low  = cei->getOffset();
404 
405             if (ce == UCOL_NULLORDER) {
406                 if (!rceb.isEmpty()) {
407                     break;
408                 }
409 
410                 goto finish;
411             }
412 
413             rceb.put((uint32_t)ce, low, high, *status);
414         } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
415 
416         // process the raw CEs
417         while (U_SUCCESS(*status) && !rceb.isEmpty()) {
418             const RCEI *rcei = rceb.get();
419 
420             result = processCE(rcei->ce);
421 
422             if (result != UCOL_IGNORABLE) {
423                 pceBuffer.put(result, rcei->low, rcei->high, *status);
424             }
425         }
426         if (U_FAILURE(*status)) {
427             return UCOL_PROCESSED_NULLORDER;
428         }
429     }
430 
431 finish:
432     if (pceBuffer.isEmpty()) {
433         // **** Is -1 the right value for ixLow, ixHigh? ****
434     	if (ixLow != NULL) {
435     		*ixLow = -1;
436     	}
437 
438     	if (ixHigh != NULL) {
439     		*ixHigh = -1
440     		;
441     	}
442         return UCOL_PROCESSED_NULLORDER;
443     }
444 
445     const PCEI *pcei = pceBuffer.get();
446 
447     if (ixLow != NULL) {
448         *ixLow = pcei->low;
449     }
450 
451     if (ixHigh != NULL) {
452         *ixHigh = pcei->high;
453     }
454 
455     return pcei->ce;
456 }
457 
458 U_NAMESPACE_END
459 
460 U_CAPI int32_t U_EXPORT2
ucol_getMaxExpansion(const UCollationElements * elems,int32_t order)461 ucol_getMaxExpansion(const UCollationElements *elems,
462                            int32_t            order)
463 {
464     return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
465 
466     // TODO: The old code masked the order according to strength and then did a binary search.
467     // However this was probably at least partially broken because of the following comment.
468     // Still, it might have found a match when this version may not.
469 
470     // FIXME: with a masked search, there might be more than one hit,
471     // so we need to look forward and backward from the match to find all
472     // of the hits...
473 }
474 
475 U_CAPI void U_EXPORT2
ucol_setText(UCollationElements * elems,const UChar * text,int32_t textLength,UErrorCode * status)476 ucol_setText(      UCollationElements *elems,
477              const UChar              *text,
478                    int32_t            textLength,
479                    UErrorCode         *status)
480 {
481     if (U_FAILURE(*status)) {
482         return;
483     }
484 
485     if ((text == NULL && textLength != 0)) {
486         *status = U_ILLEGAL_ARGUMENT_ERROR;
487         return;
488     }
489     UnicodeString s((UBool)(textLength < 0), text, textLength);
490     return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
491 }
492 
493 U_CAPI int32_t U_EXPORT2
ucol_getOffset(const UCollationElements * elems)494 ucol_getOffset(const UCollationElements *elems)
495 {
496     return CollationElementIterator::fromUCollationElements(elems)->getOffset();
497 }
498 
499 U_CAPI void U_EXPORT2
ucol_setOffset(UCollationElements * elems,int32_t offset,UErrorCode * status)500 ucol_setOffset(UCollationElements    *elems,
501                int32_t           offset,
502                UErrorCode            *status)
503 {
504     if (U_FAILURE(*status)) {
505         return;
506     }
507 
508     CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
509 }
510 
511 U_CAPI int32_t U_EXPORT2
ucol_primaryOrder(int32_t order)512 ucol_primaryOrder (int32_t order)
513 {
514     return (order >> 16) & 0xffff;
515 }
516 
517 U_CAPI int32_t U_EXPORT2
ucol_secondaryOrder(int32_t order)518 ucol_secondaryOrder (int32_t order)
519 {
520     return (order >> 8) & 0xff;
521 }
522 
523 U_CAPI int32_t U_EXPORT2
ucol_tertiaryOrder(int32_t order)524 ucol_tertiaryOrder (int32_t order)
525 {
526     return order & 0xff;
527 }
528 
529 #endif /* #if !UCONFIG_NO_COLLATION */
530