1 /*
2 *******************************************************************************
3 * Copyright (C) 2009-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include "unicode/utypes.h"
9 
10 #if !UCONFIG_NO_COLLATION
11 
12 #include "unicode/alphaindex.h"
13 #include "unicode/coll.h"
14 #include "unicode/localpointer.h"
15 #include "unicode/normalizer2.h"
16 #include "unicode/tblcoll.h"
17 #include "unicode/uchar.h"
18 #include "unicode/ulocdata.h"
19 #include "unicode/uniset.h"
20 #include "unicode/uobject.h"
21 #include "unicode/usetiter.h"
22 #include "unicode/utf16.h"
23 
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "uassert.h"
27 #include "uvector.h"
28 #include "uvectr64.h"
29 
30 //#include <string>
31 //#include <iostream>
32 
33 U_NAMESPACE_BEGIN
34 
35 namespace {
36 
37 /**
38  * Prefix string for Chinese index buckets.
39  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
40  */
41 const UChar BASE[1] = { 0xFDD0 };
42 const int32_t BASE_LENGTH = 1;
43 
44 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
45                                 const UnicodeString &one, const UnicodeString &other);
46 
47 }  // namespace
48 
49 static int32_t U_CALLCONV
50 collatorComparator(const void *context, const void *left, const void *right);
51 
52 static int32_t U_CALLCONV
53 recordCompareFn(const void *context, const void *left, const void *right);
54 
55 //  UVector<Record *> support function, delete a Record.
56 static void U_CALLCONV
alphaIndex_deleteRecord(void * obj)57 alphaIndex_deleteRecord(void *obj) {
58     delete static_cast<AlphabeticIndex::Record *>(obj);
59 }
60 
61 namespace {
62 
ownedString(const UnicodeString & s,LocalPointer<UnicodeString> & owned,UErrorCode & errorCode)63 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
64                            UErrorCode &errorCode) {
65     if (U_FAILURE(errorCode)) { return NULL; }
66     if (owned.isValid()) {
67         return owned.orphan();
68     }
69     UnicodeString *p = new UnicodeString(s);
70     if (p == NULL) {
71         errorCode = U_MEMORY_ALLOCATION_ERROR;
72     }
73     return p;
74 }
75 
getString(const UVector & list,int32_t i)76 inline UnicodeString *getString(const UVector &list, int32_t i) {
77     return static_cast<UnicodeString *>(list[i]);
78 }
79 
getBucket(const UVector & list,int32_t i)80 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
81     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
82 }
83 
getRecord(const UVector & list,int32_t i)84 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
85     return static_cast<AlphabeticIndex::Record *>(list[i]);
86 }
87 
88 /**
89  * Like Java Collections.binarySearch(List, String, Comparator).
90  *
91  * @return the index>=0 where the item was found,
92  *         or the index<0 for inserting the string at ~index in sorted order
93  */
binarySearch(const UVector & list,const UnicodeString & s,const Collator & coll)94 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
95     if (list.size() == 0) { return ~0; }
96     int32_t start = 0;
97     int32_t limit = list.size();
98     for (;;) {
99         int32_t i = (start + limit) / 2;
100         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
101         UErrorCode errorCode = U_ZERO_ERROR;
102         UCollationResult cmp = coll.compare(s, *si, errorCode);
103         if (cmp == UCOL_EQUAL) {
104             return i;
105         } else if (cmp < 0) {
106             if (i == start) {
107                 return ~start;  // insert s before *si
108             }
109             limit = i;
110         } else {
111             if (i == start) {
112                 return ~(start + 1);  // insert s after *si
113             }
114             start = i;
115         }
116     }
117 }
118 
119 }  // namespace
120 
121 // The BucketList is not in the anonymous namespace because only Clang
122 // seems to support its use in other classes from there.
123 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
124 class BucketList : public UObject {
125 public:
BucketList(UVector * bucketList,UVector * publicBucketList)126     BucketList(UVector *bucketList, UVector *publicBucketList)
127             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
128         int32_t displayIndex = 0;
129         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
130             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
131         }
132     }
133 
134     // The virtual destructor must not be inline.
135     // See ticket #8454 for details.
136     virtual ~BucketList();
137 
getBucketCount() const138     int32_t getBucketCount() const {
139         return immutableVisibleList_->size();
140     }
141 
getBucketIndex(const UnicodeString & name,const Collator & collatorPrimaryOnly,UErrorCode & errorCode)142     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
143                            UErrorCode &errorCode) {
144         // binary search
145         int32_t start = 0;
146         int32_t limit = bucketList_->size();
147         while ((start + 1) < limit) {
148             int32_t i = (start + limit) / 2;
149             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
150             UCollationResult nameVsBucket =
151                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
152             if (nameVsBucket < 0) {
153                 limit = i;
154             } else {
155                 start = i;
156             }
157         }
158         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
159         if (bucket->displayBucket_ != NULL) {
160             bucket = bucket->displayBucket_;
161         }
162         return bucket->displayIndex_;
163     }
164 
165     /** All of the buckets, visible and invisible. */
166     UVector *bucketList_;
167     /** Just the visible buckets. */
168     UVector *immutableVisibleList_;
169 };
170 
~BucketList()171 BucketList::~BucketList() {
172     delete bucketList_;
173     if (immutableVisibleList_ != bucketList_) {
174         delete immutableVisibleList_;
175     }
176 }
177 
~ImmutableIndex()178 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
179     delete buckets_;
180     delete collatorPrimaryOnly_;
181 }
182 
183 int32_t
getBucketCount() const184 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
185     return buckets_->getBucketCount();
186 }
187 
188 int32_t
getBucketIndex(const UnicodeString & name,UErrorCode & errorCode) const189 AlphabeticIndex::ImmutableIndex::getBucketIndex(
190         const UnicodeString &name, UErrorCode &errorCode) const {
191     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
192 }
193 
194 const AlphabeticIndex::Bucket *
getBucket(int32_t index) const195 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
196     if (0 <= index && index < buckets_->getBucketCount()) {
197         return icu::getBucket(*buckets_->immutableVisibleList_, index);
198     } else {
199         return NULL;
200     }
201 }
202 
AlphabeticIndex(const Locale & locale,UErrorCode & status)203 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
204         : inputList_(NULL),
205           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
206           maxLabelCount_(99),
207           initialLabels_(NULL), firstCharsInScripts_(NULL),
208           collator_(NULL), collatorPrimaryOnly_(NULL),
209           buckets_(NULL) {
210     init(&locale, status);
211 }
212 
213 
AlphabeticIndex(RuleBasedCollator * collator,UErrorCode & status)214 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
215         : inputList_(NULL),
216           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
217           maxLabelCount_(99),
218           initialLabels_(NULL), firstCharsInScripts_(NULL),
219           collator_(collator), collatorPrimaryOnly_(NULL),
220           buckets_(NULL) {
221     init(NULL, status);
222 }
223 
224 
225 
~AlphabeticIndex()226 AlphabeticIndex::~AlphabeticIndex() {
227     delete collator_;
228     delete collatorPrimaryOnly_;
229     delete firstCharsInScripts_;
230     delete buckets_;
231     delete inputList_;
232     delete initialLabels_;
233 }
234 
235 
addLabels(const UnicodeSet & additions,UErrorCode & status)236 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
237     if (U_FAILURE(status)) {
238         return *this;
239     }
240     initialLabels_->addAll(additions);
241     clearBuckets();
242     return *this;
243 }
244 
245 
addLabels(const Locale & locale,UErrorCode & status)246 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
247     addIndexExemplars(locale, status);
248     clearBuckets();
249     return *this;
250 }
251 
252 
buildImmutableIndex(UErrorCode & errorCode)253 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
254     if (U_FAILURE(errorCode)) { return NULL; }
255     // In C++, the ImmutableIndex must own its copy of the BucketList,
256     // even if it contains no records, for proper memory management.
257     // We could clone the buckets_ if they are not NULL,
258     // but that would be worth it only if this method is called multiple times,
259     // or called after using the old-style bucket iterator API.
260     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
261     LocalPointer<RuleBasedCollator> coll(
262         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
263     if (immutableBucketList.isNull() || coll.isNull()) {
264         errorCode = U_MEMORY_ALLOCATION_ERROR;
265         return NULL;
266     }
267     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
268     if (immIndex == NULL) {
269         errorCode = U_MEMORY_ALLOCATION_ERROR;
270         return NULL;
271     }
272     // The ImmutableIndex adopted its parameter objects.
273     immutableBucketList.orphan();
274     coll.orphan();
275     return immIndex;
276 }
277 
getBucketCount(UErrorCode & status)278 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
279     initBuckets(status);
280     if (U_FAILURE(status)) {
281         return 0;
282     }
283     return buckets_->getBucketCount();
284 }
285 
286 
getRecordCount(UErrorCode & status)287 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
288     if (U_FAILURE(status) || inputList_ == NULL) {
289         return 0;
290     }
291     return inputList_->size();
292 }
293 
initLabels(UVector & indexCharacters,UErrorCode & errorCode) const294 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
295     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
296     if (U_FAILURE(errorCode)) { return; }
297 
298     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
299     const UnicodeString &overflowBoundary =
300         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
301 
302     // We make a sorted array of elements.
303     // Some of the input may be redundant.
304     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
305     // We filter out those cases.
306     UnicodeSetIterator iter(*initialLabels_);
307     while (iter.next()) {
308         const UnicodeString *item = &iter.getString();
309         LocalPointer<UnicodeString> ownedItem;
310         UBool checkDistinct;
311         int32_t itemLength = item->length();
312         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
313             checkDistinct = FALSE;
314         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
315                 item->charAt(itemLength - 2) != 0x2a) {
316             // Use a label if it is marked with one trailing star,
317             // even if the label string sorts the same when all contractions are suppressed.
318             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
319             item = ownedItem.getAlias();
320             if (item == NULL) {
321                 errorCode = U_MEMORY_ALLOCATION_ERROR;
322                 return;
323             }
324             checkDistinct = FALSE;
325         } else {
326             checkDistinct = TRUE;
327         }
328         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
329             // Ignore a primary-ignorable or non-alphabetic index character.
330         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
331             // Ignore an index character that will land in the overflow bucket.
332         } else if (checkDistinct &&
333                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
334             // Ignore a multi-code point index character that does not sort distinctly
335             // from the sequence of its separate characters.
336         } else {
337             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
338             if (insertionPoint < 0) {
339                 indexCharacters.insertElementAt(
340                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
341             } else {
342                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
343                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
344                     indexCharacters.setElementAt(
345                         ownedString(*item, ownedItem, errorCode), insertionPoint);
346                 }
347             }
348         }
349     }
350     if (U_FAILURE(errorCode)) { return; }
351 
352     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
353 
354     int32_t size = indexCharacters.size() - 1;
355     if (size > maxLabelCount_) {
356         int32_t count = 0;
357         int32_t old = -1;
358         for (int32_t i = 0; i < indexCharacters.size();) {
359             ++count;
360             int32_t bump = count * maxLabelCount_ / size;
361             if (bump == old) {
362                 indexCharacters.removeElementAt(i);
363             } else {
364                 old = bump;
365                 ++i;
366             }
367         }
368     }
369 }
370 
371 namespace {
372 
fixLabel(const UnicodeString & current,UnicodeString & temp)373 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
374     if (!current.startsWith(BASE, BASE_LENGTH)) {
375         return current;
376     }
377     UChar rest = current.charAt(BASE_LENGTH);
378     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
379         int32_t count = rest-0x2800;
380         temp.setTo((UChar)(0x30 + count % 10));
381         if (count >= 10) {
382             count /= 10;
383             temp.insert(0, (UChar)(0x30 + count % 10));
384             if (count >= 10) {
385                 count /= 10;
386                 temp.insert(0, (UChar)(0x30 + count));
387             }
388         }
389         return temp.append((UChar)0x5283);
390     }
391     return temp.setTo(current, BASE_LENGTH);
392 }
393 
hasMultiplePrimaryWeights(const RuleBasedCollator & coll,uint32_t variableTop,const UnicodeString & s,UVector64 & ces,UErrorCode & errorCode)394 UBool hasMultiplePrimaryWeights(
395         const RuleBasedCollator &coll, uint32_t variableTop,
396         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
397     ces.removeAllElements();
398     coll.internalGetCEs(s, ces, errorCode);
399     if (U_FAILURE(errorCode)) { return FALSE; }
400     UBool seenPrimary = FALSE;
401     for (int32_t i = 0; i < ces.size(); ++i) {
402         int64_t ce = ces.elementAti(i);
403         uint32_t p = (uint32_t)(ce >> 32);
404         if (p > variableTop) {
405             // not primary ignorable
406             if (seenPrimary) {
407                 return TRUE;
408             }
409             seenPrimary = TRUE;
410         }
411     }
412     return FALSE;
413 }
414 
415 }  // namespace
416 
createBucketList(UErrorCode & errorCode) const417 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
418     // Initialize indexCharacters.
419     UVector indexCharacters(errorCode);
420     indexCharacters.setDeleter(uprv_deleteUObject);
421     initLabels(indexCharacters, errorCode);
422     if (U_FAILURE(errorCode)) { return NULL; }
423 
424     // Variables for hasMultiplePrimaryWeights().
425     UVector64 ces(errorCode);
426     uint32_t variableTop;
427     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
428         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
429     } else {
430         variableTop = 0;
431     }
432     UBool hasInvisibleBuckets = FALSE;
433 
434     // Helper arrays for Chinese Pinyin collation.
435     Bucket *asciiBuckets[26] = {
436         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
437         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
438     };
439     Bucket *pinyinBuckets[26] = {
440         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
441         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
442     };
443     UBool hasPinyin = FALSE;
444 
445     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
446     if (U_FAILURE(errorCode)) {
447         return NULL;
448     }
449     bucketList->setDeleter(uprv_deleteUObject);
450 
451     // underflow bucket
452     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
453     if (bucket == NULL) {
454         errorCode = U_MEMORY_ALLOCATION_ERROR;
455         return NULL;
456     }
457     bucketList->addElement(bucket, errorCode);
458     if (U_FAILURE(errorCode)) { return NULL; }
459 
460     UnicodeString temp;
461 
462     // fix up the list, adding underflow, additions, overflow
463     // Insert inflow labels as needed.
464     int32_t scriptIndex = -1;
465     const UnicodeString *scriptUpperBoundary = &emptyString_;
466     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
467         UnicodeString &current = *getString(indexCharacters, i);
468         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
469             // We crossed the script boundary into a new script.
470             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
471             UBool skippedScript = FALSE;
472             for (;;) {
473                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
474                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
475                     break;
476                 }
477                 skippedScript = TRUE;
478             }
479             if (skippedScript && bucketList->size() > 1) {
480                 // We are skipping one or more scripts,
481                 // and we are not just getting out of the underflow label.
482                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
483                 if (bucket == NULL) {
484                     errorCode = U_MEMORY_ALLOCATION_ERROR;
485                     return NULL;
486                 }
487                 bucketList->addElement(bucket, errorCode);
488             }
489         }
490         // Add a bucket with the current label.
491         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
492         if (bucket == NULL) {
493             errorCode = U_MEMORY_ALLOCATION_ERROR;
494             return NULL;
495         }
496         bucketList->addElement(bucket, errorCode);
497         // Remember ASCII and Pinyin buckets for Pinyin redirects.
498         UChar c;
499         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
500             asciiBuckets[c - 0x41] = bucket;
501         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
502                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
503             pinyinBuckets[c - 0x41] = bucket;
504             hasPinyin = TRUE;
505         }
506         // Check for multiple primary weights.
507         if (!current.startsWith(BASE, BASE_LENGTH) &&
508                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
509                                           ces, errorCode) &&
510                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
511             // "AE-ligature" or "Sch" etc.
512             for (int32_t i = bucketList->size() - 2;; --i) {
513                 Bucket *singleBucket = getBucket(*bucketList, i);
514                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
515                     // There is no single-character bucket since the last
516                     // underflow or inflow label.
517                     break;
518                 }
519                 if (singleBucket->displayBucket_ == NULL &&
520                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
521                                                    singleBucket->lowerBoundary_,
522                                                    ces, errorCode)) {
523                     // Add an invisible bucket that redirects strings greater than the expansion
524                     // to the previous single-character bucket.
525                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
526                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
527                     bucket = new Bucket(emptyString_,
528                         UnicodeString(current).append((UChar)0xFFFF),
529                         U_ALPHAINDEX_NORMAL);
530                     if (bucket == NULL) {
531                         errorCode = U_MEMORY_ALLOCATION_ERROR;
532                         return NULL;
533                     }
534                     bucket->displayBucket_ = singleBucket;
535                     bucketList->addElement(bucket, errorCode);
536                     hasInvisibleBuckets = TRUE;
537                     break;
538                 }
539             }
540         }
541     }
542     if (U_FAILURE(errorCode)) { return NULL; }
543     if (bucketList->size() == 1) {
544         // No real labels, show only the underflow label.
545         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
546         if (bl == NULL) {
547             errorCode = U_MEMORY_ALLOCATION_ERROR;
548             return NULL;
549         }
550         bucketList.orphan();
551         return bl;
552     }
553     // overflow bucket
554     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
555     if (bucket == NULL) {
556         errorCode = U_MEMORY_ALLOCATION_ERROR;
557         return NULL;
558     }
559     bucketList->addElement(bucket, errorCode); // final
560 
561     if (hasPinyin) {
562         // Redirect Pinyin buckets.
563         Bucket *asciiBucket = NULL;
564         for (int32_t i = 0; i < 26; ++i) {
565             if (asciiBuckets[i] != NULL) {
566                 asciiBucket = asciiBuckets[i];
567             }
568             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
569                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
570                 hasInvisibleBuckets = TRUE;
571             }
572         }
573     }
574 
575     if (U_FAILURE(errorCode)) { return NULL; }
576     if (!hasInvisibleBuckets) {
577         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
578         if (bl == NULL) {
579             errorCode = U_MEMORY_ALLOCATION_ERROR;
580             return NULL;
581         }
582         bucketList.orphan();
583         return bl;
584     }
585     // Merge inflow buckets that are visually adjacent.
586     // Iterate backwards: Merge inflow into overflow rather than the other way around.
587     int32_t i = bucketList->size() - 1;
588     Bucket *nextBucket = getBucket(*bucketList, i);
589     while (--i > 0) {
590         bucket = getBucket(*bucketList, i);
591         if (bucket->displayBucket_ != NULL) {
592             continue;  // skip invisible buckets
593         }
594         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
595             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
596                 bucket->displayBucket_ = nextBucket;
597                 continue;
598             }
599         }
600         nextBucket = bucket;
601     }
602 
603     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
604     if (U_FAILURE(errorCode)) {
605         return NULL;
606     }
607     // Do not call publicBucketList->setDeleter():
608     // This vector shares its objects with the bucketList.
609     for (int32_t i = 0; i < bucketList->size(); ++i) {
610         bucket = getBucket(*bucketList, i);
611         if (bucket->displayBucket_ == NULL) {
612             publicBucketList->addElement(bucket, errorCode);
613         }
614     }
615     if (U_FAILURE(errorCode)) { return NULL; }
616     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
617     if (bl == NULL) {
618         errorCode = U_MEMORY_ALLOCATION_ERROR;
619         return NULL;
620     }
621     bucketList.orphan();
622     publicBucketList.orphan();
623     return bl;
624 }
625 
626 /**
627  * Creates an index, and buckets and sorts the list of records into the index.
628  */
initBuckets(UErrorCode & errorCode)629 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
630     if (U_FAILURE(errorCode) || buckets_ != NULL) {
631         return;
632     }
633     buckets_ = createBucketList(errorCode);
634     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
635         return;
636     }
637 
638     // Sort the records by name.
639     // Stable sort preserves input order of collation duplicates.
640     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
641 
642     // Now, we traverse all of the input, which is now sorted.
643     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
644     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
645     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
646     // we need to improve it for that case.
647 
648     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
649     int32_t bucketIndex = 1;
650     Bucket *nextBucket;
651     const UnicodeString *upperBoundary;
652     if (bucketIndex < buckets_->bucketList_->size()) {
653         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
654         upperBoundary = &nextBucket->lowerBoundary_;
655     } else {
656         nextBucket = NULL;
657         upperBoundary = NULL;
658     }
659     for (int32_t i = 0; i < inputList_->size(); ++i) {
660         Record *r = getRecord(*inputList_, i);
661         // if the current bucket isn't the right one, find the one that is
662         // We have a special flag for the last bucket so that we don't look any further
663         while (upperBoundary != NULL &&
664                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
665             currentBucket = nextBucket;
666             // now reset the boundary that we compare against
667             if (bucketIndex < buckets_->bucketList_->size()) {
668                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
669                 upperBoundary = &nextBucket->lowerBoundary_;
670             } else {
671                 upperBoundary = NULL;
672             }
673         }
674         // now put the record into the bucket.
675         Bucket *bucket = currentBucket;
676         if (bucket->displayBucket_ != NULL) {
677             bucket = bucket->displayBucket_;
678         }
679         if (bucket->records_ == NULL) {
680             bucket->records_ = new UVector(errorCode);
681             if (bucket->records_ == NULL) {
682                 errorCode = U_MEMORY_ALLOCATION_ERROR;
683                 return;
684             }
685         }
686         bucket->records_->addElement(r, errorCode);
687     }
688 }
689 
clearBuckets()690 void AlphabeticIndex::clearBuckets() {
691     if (buckets_ != NULL) {
692         delete buckets_;
693         buckets_ = NULL;
694         internalResetBucketIterator();
695     }
696 }
697 
internalResetBucketIterator()698 void AlphabeticIndex::internalResetBucketIterator() {
699     labelsIterIndex_ = -1;
700     currentBucket_ = NULL;
701 }
702 
703 
addIndexExemplars(const Locale & locale,UErrorCode & status)704 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
705     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
706     if (U_FAILURE(status)) {
707         return;
708     }
709 
710     UnicodeSet exemplars;
711     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
712     if (U_SUCCESS(status)) {
713         initialLabels_->addAll(exemplars);
714         return;
715     }
716     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
717 
718     // The locale data did not include explicit Index characters.
719     // Synthesize a set of them from the locale's standard exemplar characters.
720     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
721     if (U_FAILURE(status)) {
722         return;
723     }
724 
725     // question: should we add auxiliary exemplars?
726     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
727         exemplars.add(0x61, 0x7A);
728     }
729     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
730         // cut down to small list
731         exemplars.remove(0xAC00, 0xD7A3).
732             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
733             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
734             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
735             add(0xD30C).add(0xD558);
736     }
737     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
738         // cut down to small list
739         // make use of the fact that Ethiopic is allocated in 8's, where
740         // the base is 0 mod 8.
741         UnicodeSet ethiopic(
742             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
743         UnicodeSetIterator it(ethiopic);
744         while (it.next() && !it.isString()) {
745             if ((it.getCodepoint() & 0x7) != 0) {
746                 exemplars.remove(it.getCodepoint());
747             }
748         }
749     }
750 
751     // Upper-case any that aren't already so.
752     //   (We only do this for synthesized index characters.)
753     UnicodeSetIterator it(exemplars);
754     UnicodeString upperC;
755     while (it.next()) {
756         const UnicodeString &exemplarC = it.getString();
757         upperC = exemplarC;
758         upperC.toUpper(locale);
759         initialLabels_->add(upperC);
760     }
761 }
762 
addChineseIndexCharacters(UErrorCode & errorCode)763 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
764     UnicodeSet contractions;
765     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
766     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
767     initialLabels_->addAll(contractions);
768     UnicodeSetIterator iter(contractions);
769     while (iter.next()) {
770         const UnicodeString &s = iter.getString();
771         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
772         UChar c = s.charAt(s.length() - 1);
773         if (0x41 <= c && c <= 0x5A) {  // A-Z
774             // There are Pinyin labels, add ASCII A-Z labels as well.
775             initialLabels_->add(0x41, 0x5A);  // A-Z
776             break;
777         }
778     }
779     return TRUE;
780 }
781 
782 
783 /*
784  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
785  */
786 static const UChar CGJ = 0x034F;
separated(const UnicodeString & item)787 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
788     UnicodeString result;
789     if (item.length() == 0) {
790         return result;
791     }
792     int32_t i = 0;
793     for (;;) {
794         UChar32  cp = item.char32At(i);
795         result.append(cp);
796         i = item.moveIndex32(i, 1);
797         if (i >= item.length()) {
798             break;
799         }
800         result.append(CGJ);
801     }
802     return result;
803 }
804 
805 
operator ==(const AlphabeticIndex &) const806 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
807     return FALSE;
808 }
809 
810 
operator !=(const AlphabeticIndex &) const811 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
812     return FALSE;
813 }
814 
815 
getCollator() const816 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
817     return *collator_;
818 }
819 
820 
getInflowLabel() const821 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
822     return inflowLabel_;
823 }
824 
getOverflowLabel() const825 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
826     return overflowLabel_;
827 }
828 
829 
getUnderflowLabel() const830 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
831     return underflowLabel_;
832 }
833 
834 
setInflowLabel(const UnicodeString & label,UErrorCode &)835 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
836     inflowLabel_ = label;
837     clearBuckets();
838     return *this;
839 }
840 
841 
setOverflowLabel(const UnicodeString & label,UErrorCode &)842 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
843     overflowLabel_ = label;
844     clearBuckets();
845     return *this;
846 }
847 
848 
setUnderflowLabel(const UnicodeString & label,UErrorCode &)849 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
850     underflowLabel_ = label;
851     clearBuckets();
852     return *this;
853 }
854 
855 
getMaxLabelCount() const856 int32_t AlphabeticIndex::getMaxLabelCount() const {
857     return maxLabelCount_;
858 }
859 
860 
setMaxLabelCount(int32_t maxLabelCount,UErrorCode & status)861 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
862     if (U_FAILURE(status)) {
863         return *this;
864     }
865     if (maxLabelCount <= 0) {
866         status = U_ILLEGAL_ARGUMENT_ERROR;
867         return *this;
868     }
869     maxLabelCount_ = maxLabelCount;
870     clearBuckets();
871     return *this;
872 }
873 
874 
875 //
876 //  init() - Common code for constructors.
877 //
878 
init(const Locale * locale,UErrorCode & status)879 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
880     if (U_FAILURE(status)) { return; }
881     if (locale == NULL && collator_ == NULL) {
882         status = U_ILLEGAL_ARGUMENT_ERROR;
883         return;
884     }
885 
886     initialLabels_         = new UnicodeSet();
887     if (initialLabels_ == NULL) {
888         status = U_MEMORY_ALLOCATION_ERROR;
889         return;
890     }
891 
892     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
893     overflowLabel_ = inflowLabel_;
894     underflowLabel_ = inflowLabel_;
895 
896     if (collator_ == NULL) {
897         Collator *coll = Collator::createInstance(*locale, status);
898         if (U_FAILURE(status)) {
899             delete coll;
900             return;
901         }
902         if (coll == NULL) {
903             status = U_MEMORY_ALLOCATION_ERROR;
904             return;
905         }
906         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
907         if (collator_ == NULL) {
908             delete coll;
909             status = U_UNSUPPORTED_ERROR;
910             return;
911         }
912     }
913     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
914     if (collatorPrimaryOnly_ == NULL) {
915         status = U_MEMORY_ALLOCATION_ERROR;
916         return;
917     }
918     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
919     firstCharsInScripts_ = firstStringsInScript(status);
920     if (U_FAILURE(status)) { return; }
921     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
922     // Guard against a degenerate collator where
923     // some script boundary strings are primary ignorable.
924     for (;;) {
925         if (U_FAILURE(status)) { return; }
926         if (firstCharsInScripts_->isEmpty()) {
927             // AlphabeticIndex requires some non-ignorable script boundary strings.
928             status = U_ILLEGAL_ARGUMENT_ERROR;
929             return;
930         }
931         if (collatorPrimaryOnly_->compare(
932                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
933                 emptyString_, status) == UCOL_EQUAL) {
934             firstCharsInScripts_->removeElementAt(0);
935         } else {
936             break;
937         }
938     }
939 
940     // Chinese index characters, which are specific to each of the several Chinese tailorings,
941     // take precedence over the single locale data exemplar set per language.
942     if (!addChineseIndexCharacters(status) && locale != NULL) {
943         addIndexExemplars(*locale, status);
944     }
945 }
946 
947 
948 //
949 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
950 //
951 static int32_t U_CALLCONV
collatorComparator(const void * context,const void * left,const void * right)952 collatorComparator(const void *context, const void *left, const void *right) {
953     const UElement *leftElement = static_cast<const UElement *>(left);
954     const UElement *rightElement = static_cast<const UElement *>(right);
955     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
956     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
957 
958     if (leftString == rightString) {
959         // Catches case where both are NULL
960         return 0;
961     }
962     if (leftString == NULL) {
963         return 1;
964     };
965     if (rightString == NULL) {
966         return -1;
967     }
968     const Collator *col = static_cast<const Collator *>(context);
969     UErrorCode errorCode = U_ZERO_ERROR;
970     return col->compare(*leftString, *rightString, errorCode);
971 }
972 
973 //
974 //  Comparison function for UVector<Record *> sorting with a collator.
975 //
976 static int32_t U_CALLCONV
recordCompareFn(const void * context,const void * left,const void * right)977 recordCompareFn(const void *context, const void *left, const void *right) {
978     const UElement *leftElement = static_cast<const UElement *>(left);
979     const UElement *rightElement = static_cast<const UElement *>(right);
980     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
981     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
982     const Collator *col = static_cast<const Collator *>(context);
983     UErrorCode errorCode = U_ZERO_ERROR;
984     return col->compare(leftRec->name_, rightRec->name_, errorCode);
985 }
986 
firstStringsInScript(UErrorCode & status)987 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
988     if (U_FAILURE(status)) {
989         return NULL;
990     }
991     LocalPointer<UVector> dest(new UVector(status), status);
992     if (U_FAILURE(status)) {
993         return NULL;
994     }
995     dest->setDeleter(uprv_deleteUObject);
996     // Fetch the script-first-primary contractions which are defined in the root collator.
997     // They all start with U+FDD1.
998     UnicodeSet set;
999     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1000     if (U_FAILURE(status)) {
1001         return NULL;
1002     }
1003     if (set.isEmpty()) {
1004         status = U_UNSUPPORTED_ERROR;
1005         return NULL;
1006     }
1007     UnicodeSetIterator iter(set);
1008     while (iter.next()) {
1009         const UnicodeString &boundary = iter.getString();
1010         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1011         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1012             // Ignore boundaries for the special reordering groups.
1013             // Take only those for "real scripts" (where the sample character is a Letter,
1014             // and the one for unassigned implicit weights (Cn).
1015             continue;
1016         }
1017         UnicodeString *s = new UnicodeString(boundary);
1018         if (s == NULL) {
1019             status = U_MEMORY_ALLOCATION_ERROR;
1020             return NULL;
1021         }
1022         dest->addElement(s, status);
1023     }
1024     return dest.orphan();
1025 }
1026 
1027 
1028 namespace {
1029 
1030 /**
1031  * Returns true if one index character string is "better" than the other.
1032  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1033  * better, and otherwise binary-less-than is better.
1034  */
isOneLabelBetterThanOther(const Normalizer2 & nfkdNormalizer,const UnicodeString & one,const UnicodeString & other)1035 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1036                                 const UnicodeString &one, const UnicodeString &other) {
1037     // This is called with primary-equal strings, but never with one.equals(other).
1038     UErrorCode status = U_ZERO_ERROR;
1039     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1040     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1041     if (U_FAILURE(status)) { return FALSE; }
1042     int32_t result = n1.countChar32() - n2.countChar32();
1043     if (result != 0) {
1044         return result < 0;
1045     }
1046     result = n1.compareCodePointOrder(n2);
1047     if (result != 0) {
1048         return result < 0;
1049     }
1050     return one.compareCodePointOrder(other) < 0;
1051 }
1052 
1053 }  // namespace
1054 
1055 //
1056 //  Constructor & Destructor for AlphabeticIndex::Record
1057 //
1058 //     Records are internal only, instances are not directly surfaced in the public API.
1059 //     This class is mostly struct-like, with all public fields.
1060 
Record(const UnicodeString & name,const void * data)1061 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1062         : name_(name), data_(data) {}
1063 
~Record()1064 AlphabeticIndex::Record::~Record() {
1065 }
1066 
1067 
addRecord(const UnicodeString & name,const void * data,UErrorCode & status)1068 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1069     if (U_FAILURE(status)) {
1070         return *this;
1071     }
1072     if (inputList_ == NULL) {
1073         inputList_ = new UVector(status);
1074         if (inputList_ == NULL) {
1075             status = U_MEMORY_ALLOCATION_ERROR;
1076             return *this;
1077         }
1078         inputList_->setDeleter(alphaIndex_deleteRecord);
1079     }
1080     Record *r = new Record(name, data);
1081     if (r == NULL) {
1082         status = U_MEMORY_ALLOCATION_ERROR;
1083         return *this;
1084     }
1085     inputList_->addElement(r, status);
1086     clearBuckets();
1087     //std::string ss;
1088     //std::string ss2;
1089     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1090     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1091     return *this;
1092 }
1093 
1094 
clearRecords(UErrorCode & status)1095 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1096     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1097         inputList_->removeAllElements();
1098         clearBuckets();
1099     }
1100     return *this;
1101 }
1102 
getBucketIndex(const UnicodeString & name,UErrorCode & status)1103 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1104     initBuckets(status);
1105     if (U_FAILURE(status)) {
1106         return 0;
1107     }
1108     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1109 }
1110 
1111 
getBucketIndex() const1112 int32_t AlphabeticIndex::getBucketIndex() const {
1113     return labelsIterIndex_;
1114 }
1115 
1116 
nextBucket(UErrorCode & status)1117 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1118     if (U_FAILURE(status)) {
1119         return FALSE;
1120     }
1121     if (buckets_ == NULL && currentBucket_ != NULL) {
1122         status = U_ENUM_OUT_OF_SYNC_ERROR;
1123         return FALSE;
1124     }
1125     initBuckets(status);
1126     if (U_FAILURE(status)) {
1127         return FALSE;
1128     }
1129     ++labelsIterIndex_;
1130     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1131         labelsIterIndex_ = buckets_->getBucketCount();
1132         return FALSE;
1133     }
1134     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1135     resetRecordIterator();
1136     return TRUE;
1137 }
1138 
getBucketLabel() const1139 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1140     if (currentBucket_ != NULL) {
1141         return currentBucket_->label_;
1142     } else {
1143         return emptyString_;
1144     }
1145 }
1146 
1147 
getBucketLabelType() const1148 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1149     if (currentBucket_ != NULL) {
1150         return currentBucket_->labelType_;
1151     } else {
1152         return U_ALPHAINDEX_NORMAL;
1153     }
1154 }
1155 
1156 
getBucketRecordCount() const1157 int32_t AlphabeticIndex::getBucketRecordCount() const {
1158     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1159         return currentBucket_->records_->size();
1160     } else {
1161         return 0;
1162     }
1163 }
1164 
1165 
resetBucketIterator(UErrorCode & status)1166 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1167     if (U_FAILURE(status)) {
1168         return *this;
1169     }
1170     internalResetBucketIterator();
1171     return *this;
1172 }
1173 
1174 
nextRecord(UErrorCode & status)1175 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1176     if (U_FAILURE(status)) {
1177         return FALSE;
1178     }
1179     if (currentBucket_ == NULL) {
1180         // We are trying to iterate over the items in a bucket, but there is no
1181         // current bucket from the enumeration of buckets.
1182         status = U_INVALID_STATE_ERROR;
1183         return FALSE;
1184     }
1185     if (buckets_ == NULL) {
1186         status = U_ENUM_OUT_OF_SYNC_ERROR;
1187         return FALSE;
1188     }
1189     if (currentBucket_->records_ == NULL) {
1190         return FALSE;
1191     }
1192     ++itemsIterIndex_;
1193     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1194         itemsIterIndex_  = currentBucket_->records_->size();
1195         return FALSE;
1196     }
1197     return TRUE;
1198 }
1199 
1200 
getRecordName() const1201 const UnicodeString &AlphabeticIndex::getRecordName() const {
1202     const UnicodeString *retStr = &emptyString_;
1203     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1204         itemsIterIndex_ >= 0 &&
1205         itemsIterIndex_ < currentBucket_->records_->size()) {
1206             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1207             retStr = &item->name_;
1208     }
1209     return *retStr;
1210 }
1211 
getRecordData() const1212 const void *AlphabeticIndex::getRecordData() const {
1213     const void *retPtr = NULL;
1214     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1215         itemsIterIndex_ >= 0 &&
1216         itemsIterIndex_ < currentBucket_->records_->size()) {
1217             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1218             retPtr = item->data_;
1219     }
1220     return retPtr;
1221 }
1222 
1223 
resetRecordIterator()1224 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1225     itemsIterIndex_ = -1;
1226     return *this;
1227 }
1228 
1229 
1230 
Bucket(const UnicodeString & label,const UnicodeString & lowerBoundary,UAlphabeticIndexLabelType type)1231 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1232                                 const UnicodeString &lowerBoundary,
1233                                 UAlphabeticIndexLabelType type)
1234         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1235           displayBucket_(NULL), displayIndex_(-1),
1236           records_(NULL) {
1237 }
1238 
1239 
~Bucket()1240 AlphabeticIndex::Bucket::~Bucket() {
1241     delete records_;
1242 }
1243 
1244 U_NAMESPACE_END
1245 
1246 #endif  // !UCONFIG_NO_COLLATION
1247