1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // file: rbbi_cache.cpp
5 
6 #include "unicode/utypes.h"
7 
8 #if !UCONFIG_NO_BREAK_ITERATION
9 
10 #include "unicode/ubrk.h"
11 #include "unicode/rbbi.h"
12 
13 #include "rbbi_cache.h"
14 
15 #include "brkeng.h"
16 #include "cmemory.h"
17 #include "rbbidata.h"
18 #include "rbbirb.h"
19 #include "uassert.h"
20 #include "uvectr32.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 /*
25  * DictionaryCache implementation
26  */
27 
DictionaryCache(RuleBasedBreakIterator * bi,UErrorCode & status)28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29         fBI(bi), fBreaks(status), fPositionInCache(-1),
30         fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31 }
32 
~DictionaryCache()33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34 }
35 
reset()36 void RuleBasedBreakIterator::DictionaryCache::reset() {
37     fPositionInCache = -1;
38     fStart = 0;
39     fLimit = 0;
40     fFirstRuleStatusIndex = 0;
41     fOtherRuleStatusIndex = 0;
42     fBreaks.removeAllElements();
43 }
44 
following(int32_t fromPos,int32_t * result,int32_t * statusIndex)45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46     if (fromPos >= fLimit || fromPos < fStart) {
47         fPositionInCache = -1;
48         return FALSE;
49     }
50 
51     // Sequential iteration, move from previous boundary to the following
52 
53     int32_t r = 0;
54     if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55         ++fPositionInCache;
56         if (fPositionInCache >= fBreaks.size()) {
57             fPositionInCache = -1;
58             return FALSE;
59         }
60         r = fBreaks.elementAti(fPositionInCache);
61         U_ASSERT(r > fromPos);
62         *result = r;
63         *statusIndex = fOtherRuleStatusIndex;
64         return TRUE;
65     }
66 
67     // Random indexing. Linear search for the boundary following the given position.
68 
69     for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70         r= fBreaks.elementAti(fPositionInCache);
71         if (r > fromPos) {
72             *result = r;
73             *statusIndex = fOtherRuleStatusIndex;
74             return TRUE;
75         }
76     }
77     U_ASSERT(FALSE);
78     fPositionInCache = -1;
79     return FALSE;
80 }
81 
82 
preceding(int32_t fromPos,int32_t * result,int32_t * statusIndex)83 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
84     if (fromPos <= fStart || fromPos > fLimit) {
85         fPositionInCache = -1;
86         return FALSE;
87     }
88 
89     if (fromPos == fLimit) {
90         fPositionInCache = fBreaks.size() - 1;
91         if (fPositionInCache >= 0) {
92             U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
93         }
94     }
95 
96     int32_t r;
97     if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
98         --fPositionInCache;
99         r = fBreaks.elementAti(fPositionInCache);
100         U_ASSERT(r < fromPos);
101         *result = r;
102         *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
103         return TRUE;
104     }
105 
106     if (fPositionInCache == 0) {
107         fPositionInCache = -1;
108         return FALSE;
109     }
110 
111     for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
112         r = fBreaks.elementAti(fPositionInCache);
113         if (r < fromPos) {
114             *result = r;
115             *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
116             return TRUE;
117         }
118     }
119     U_ASSERT(FALSE);
120     fPositionInCache = -1;
121     return FALSE;
122 }
123 
populateDictionary(int32_t startPos,int32_t endPos,int32_t firstRuleStatus,int32_t otherRuleStatus)124 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
125                                        int32_t firstRuleStatus, int32_t otherRuleStatus) {
126     if ((endPos - startPos) <= 1) {
127         return;
128     }
129 
130     reset();
131     fFirstRuleStatusIndex = firstRuleStatus;
132     fOtherRuleStatusIndex = otherRuleStatus;
133 
134     int32_t rangeStart = startPos;
135     int32_t rangeEnd = endPos;
136 
137     uint16_t    category;
138     int32_t     current;
139     UErrorCode  status = U_ZERO_ERROR;
140     int32_t     foundBreakCount = 0;
141     UText      *text = &fBI->fText;
142 
143     // Loop through the text, looking for ranges of dictionary characters.
144     // For each span, find the appropriate break engine, and ask it to find
145     // any breaks within the span.
146 
147     utext_setNativeIndex(text, rangeStart);
148     UChar32     c = utext_current32(text);
149     category = UTRIE2_GET16(fBI->fData->fTrie, c);
150 
151     while(U_SUCCESS(status)) {
152         while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
153             utext_next32(text);           // TODO: cleaner loop structure.
154             c = utext_current32(text);
155             category = UTRIE2_GET16(fBI->fData->fTrie, c);
156         }
157         if (current >= rangeEnd) {
158             break;
159         }
160 
161         // We now have a dictionary character. Get the appropriate language object
162         // to deal with it.
163         const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
164 
165         // Ask the language object if there are any breaks. It will add them to the cache and
166         // leave the text pointer on the other side of its range, ready to search for the next one.
167         if (lbe != NULL) {
168             foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
169         }
170 
171         // Reload the loop variables for the next go-round
172         c = utext_current32(text);
173         category = UTRIE2_GET16(fBI->fData->fTrie, c);
174     }
175 
176     // If we found breaks, ensure that the first and last entries are
177     // the original starting and ending position. And initialize the
178     // cache iteration position to the first entry.
179 
180     // printf("foundBreakCount = %d\n", foundBreakCount);
181     if (foundBreakCount > 0) {
182         U_ASSERT(foundBreakCount == fBreaks.size());
183         if (startPos < fBreaks.elementAti(0)) {
184             // The dictionary did not place a boundary at the start of the segment of text.
185             // Add one now. This should not commonly happen, but it would be easy for interactions
186             // of the rules for dictionary segments and the break engine implementations to
187             // inadvertently cause it. Cover it here, just in case.
188             fBreaks.insertElementAt(startPos, 0, status);
189         }
190         if (endPos > fBreaks.peeki()) {
191             fBreaks.push(endPos, status);
192         }
193         fPositionInCache = 0;
194         // Note: Dictionary matching may extend beyond the original limit.
195         fStart = fBreaks.elementAti(0);
196         fLimit = fBreaks.peeki();
197     } else {
198         // there were no language-based breaks, even though the segment contained
199         // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
200         // for this range will fail, and the calling code will fall back to the rule based boundaries.
201     }
202 }
203 
204 
205 /*
206  *   BreakCache implemetation
207  */
208 
BreakCache(RuleBasedBreakIterator * bi,UErrorCode & status)209 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
210         fBI(bi), fSideBuffer(status) {
211     reset();
212 }
213 
214 
~BreakCache()215 RuleBasedBreakIterator::BreakCache::~BreakCache() {
216 }
217 
218 
reset(int32_t pos,int32_t ruleStatus)219 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
220     fStartBufIdx = 0;
221     fEndBufIdx = 0;
222     fTextIdx = pos;
223     fBufIdx = 0;
224     fBoundaries[0] = pos;
225     fStatuses[0] = (uint16_t)ruleStatus;
226 }
227 
228 
current()229 int32_t  RuleBasedBreakIterator::BreakCache::current() {
230     fBI->fPosition = fTextIdx;
231     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
232     fBI->fDone = FALSE;
233     return fTextIdx;
234 }
235 
236 
following(int32_t startPos,UErrorCode & status)237 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
238     if (U_FAILURE(status)) {
239         return;
240     }
241     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
242         // startPos is in the cache. Do a next() from that position.
243         // TODO: an awkward set of interactions with bi->fDone
244         //       seek() does not clear it; it can't because of interactions with populateNear().
245         //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
246         //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
247         fBI->fDone = false;
248         next();
249     }
250     return;
251 }
252 
253 
preceding(int32_t startPos,UErrorCode & status)254 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
255     if (U_FAILURE(status)) {
256         return;
257     }
258     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
259         if (startPos == fTextIdx) {
260             previous(status);
261         } else {
262             // seek() leaves the BreakCache positioned at the preceding boundary
263             //        if the requested position is between two bounaries.
264             // current() pushes the BreakCache position out to the BreakIterator itself.
265             U_ASSERT(startPos > fTextIdx);
266             current();
267         }
268     }
269     return;
270 }
271 
272 
273 /*
274  * Out-of-line code for BreakCache::next().
275  * Cache does not already contain the boundary
276  */
nextOL()277 void RuleBasedBreakIterator::BreakCache::nextOL() {
278     fBI->fDone = !populateFollowing();
279     fBI->fPosition = fTextIdx;
280     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
281     return;
282 }
283 
284 
previous(UErrorCode & status)285 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
286     if (U_FAILURE(status)) {
287         return;
288     }
289     int32_t initialBufIdx = fBufIdx;
290     if (fBufIdx == fStartBufIdx) {
291         // At start of cache. Prepend to it.
292         populatePreceding(status);
293     } else {
294         // Cache already holds the next boundary
295         fBufIdx = modChunkSize(fBufIdx - 1);
296         fTextIdx = fBoundaries[fBufIdx];
297     }
298     fBI->fDone = (fBufIdx == initialBufIdx);
299     fBI->fPosition = fTextIdx;
300     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
301     return;
302 }
303 
304 
seek(int32_t pos)305 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
306     if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
307         return FALSE;
308     }
309     if (pos == fBoundaries[fStartBufIdx]) {
310         // Common case: seek(0), from BreakIterator::first()
311         fBufIdx = fStartBufIdx;
312         fTextIdx = fBoundaries[fBufIdx];
313         return TRUE;
314     }
315     if (pos == fBoundaries[fEndBufIdx]) {
316         fBufIdx = fEndBufIdx;
317         fTextIdx = fBoundaries[fBufIdx];
318         return TRUE;
319     }
320 
321     int32_t min = fStartBufIdx;
322     int32_t max = fEndBufIdx;
323     while (min != max) {
324         int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
325         probe = modChunkSize(probe);
326         if (fBoundaries[probe] > pos) {
327             max = probe;
328         } else {
329             min = modChunkSize(probe + 1);
330         }
331     }
332     U_ASSERT(fBoundaries[max] > pos);
333     fBufIdx = modChunkSize(max - 1);
334     fTextIdx = fBoundaries[fBufIdx];
335     U_ASSERT(fTextIdx <= pos);
336     return TRUE;
337 }
338 
339 
populateNear(int32_t position,UErrorCode & status)340 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
341     if (U_FAILURE(status)) {
342         return FALSE;
343     }
344     U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
345 
346     // Find a boundary somewhere in the vicinity of the requested position.
347     // Depending on the safe rules and the text data, it could be either before, at, or after
348     // the requested position.
349 
350 
351     // If the requested position is not near already cached positions, clear the existing cache,
352     // find a near-by boundary and begin new cache contents there.
353 
354     if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
355         int32_t aBoundary = 0;
356         int32_t ruleStatusIndex = 0;
357         if (position > 20) {
358             int32_t backupPos = fBI->handleSafePrevious(position);
359 
360             if (backupPos > 0) {
361                 // Advance to the boundary following the backup position.
362                 // There is a complication: the safe reverse rules identify pairs of code points
363                 // that are safe. If advancing from the safe point moves forwards by less than
364                 // two code points, we need to advance one more time to ensure that the boundary
365                 // is good, including a correct rules status value.
366                 //
367                 fBI->fPosition = backupPos;
368                 aBoundary = fBI->handleNext();
369                 if (aBoundary <= backupPos + 4) {
370                     // +4 is a quick test for possibly having advanced only one codepoint.
371                     // Four being the length of the longest potential code point, a supplementary in UTF-8
372                     utext_setNativeIndex(&fBI->fText, aBoundary);
373                     if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
374                         // The initial handleNext() only advanced by a single code point. Go again.
375                         aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
376                     }
377                 }
378                 ruleStatusIndex = fBI->fRuleStatusIndex;
379             }
380         }
381         reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
382     }
383 
384     // Fill in boundaries between existing cache content and the new requested position.
385 
386     if (fBoundaries[fEndBufIdx] < position) {
387         // The last position in the cache precedes the requested position.
388         // Add following position(s) to the cache.
389         while (fBoundaries[fEndBufIdx] < position) {
390             if (!populateFollowing()) {
391                 U_ASSERT(false);
392                 return false;
393             }
394         }
395         fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
396         fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
397         while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
398             previous(status);
399         }
400         return true;
401     }
402 
403     if (fBoundaries[fStartBufIdx] > position) {
404         // The first position in the cache is beyond the requested position.
405         // back up more until we get a boundary <= the requested position.
406         while (fBoundaries[fStartBufIdx] > position) {
407             populatePreceding(status);
408         }
409         fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
410         fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
411         while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
412             next();
413         }
414         if (fTextIdx > position) {
415             // If position is not itself a boundary, the next() loop above will overshoot.
416             // Back up one, leaving cache position at the boundary preceding the requested position.
417             previous(status);
418         }
419         return true;
420     }
421 
422     U_ASSERT(fTextIdx == position);
423     return true;
424 }
425 
426 
427 
populateFollowing()428 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
429     int32_t fromPosition = fBoundaries[fEndBufIdx];
430     int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
431     int32_t pos = 0;
432     int32_t ruleStatusIdx = 0;
433 
434     if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
435         addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
436         return TRUE;
437     }
438 
439     fBI->fPosition = fromPosition;
440     pos = fBI->handleNext();
441     if (pos == UBRK_DONE) {
442         return FALSE;
443     }
444 
445     ruleStatusIdx = fBI->fRuleStatusIndex;
446     if (fBI->fDictionaryCharCount > 0) {
447         // The text segment obtained from the rules includes dictionary characters.
448         // Subdivide it, with subdivided results going into the dictionary cache.
449         fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
450         if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
451             addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
452             return TRUE;
453             // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
454             //       But be careful with interactions with populateNear().
455         }
456     }
457 
458     // Rule based segment did not include dictionary characters.
459     // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
460     //    meaning that we didn't take the return, above.
461     // Add its end point to the cache.
462     addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
463 
464     // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
465     //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
466     //
467     for (int count=0; count<6; ++count) {
468         pos = fBI->handleNext();
469         if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
470             break;
471         }
472         addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
473     }
474 
475     return TRUE;
476 }
477 
478 
populatePreceding(UErrorCode & status)479 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
480     if (U_FAILURE(status)) {
481         return FALSE;
482     }
483 
484     int32_t fromPosition = fBoundaries[fStartBufIdx];
485     if (fromPosition == 0) {
486         return FALSE;
487     }
488 
489     int32_t position = 0;
490     int32_t positionStatusIdx = 0;
491 
492     if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
493         addPreceding(position, positionStatusIdx, UpdateCachePosition);
494         return TRUE;
495     }
496 
497     int32_t backupPosition = fromPosition;
498 
499     // Find a boundary somewhere preceding the first already-cached boundary
500     do {
501         backupPosition = backupPosition - 30;
502         if (backupPosition <= 0) {
503             backupPosition = 0;
504         } else {
505             backupPosition = fBI->handleSafePrevious(backupPosition);
506         }
507         if (backupPosition == UBRK_DONE || backupPosition == 0) {
508             position = 0;
509             positionStatusIdx = 0;
510         } else {
511             // Advance to the boundary following the backup position.
512             // There is a complication: the safe reverse rules identify pairs of code points
513             // that are safe. If advancing from the safe point moves forwards by less than
514             // two code points, we need to advance one more time to ensure that the boundary
515             // is good, including a correct rules status value.
516             //
517             fBI->fPosition = backupPosition;
518             position = fBI->handleNext();
519             if (position <= backupPosition + 4) {
520                 // +4 is a quick test for possibly having advanced only one codepoint.
521                 // Four being the length of the longest potential code point, a supplementary in UTF-8
522                 utext_setNativeIndex(&fBI->fText, position);
523                 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
524                     // The initial handleNext() only advanced by a single code point. Go again.
525                     position = fBI->handleNext();   // Safe rules identify safe pairs.
526                 }
527             };
528             positionStatusIdx = fBI->fRuleStatusIndex;
529         }
530     } while (position >= fromPosition);
531 
532     // Find boundaries between the one we just located and the first already-cached boundary
533     // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
534 
535     fSideBuffer.removeAllElements();
536     fSideBuffer.addElement(position, status);
537     fSideBuffer.addElement(positionStatusIdx, status);
538 
539     do {
540         int32_t prevPosition = fBI->fPosition = position;
541         int32_t prevStatusIdx = positionStatusIdx;
542         position = fBI->handleNext();
543         positionStatusIdx = fBI->fRuleStatusIndex;
544         if (position == UBRK_DONE) {
545             break;
546         }
547 
548         UBool segmentHandledByDictionary = FALSE;
549         if (fBI->fDictionaryCharCount != 0) {
550             // Segment from the rules includes dictionary characters.
551             // Subdivide it, with subdivided results going into the dictionary cache.
552             int32_t dictSegEndPosition = position;
553             fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
554             while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
555                 segmentHandledByDictionary = true;
556                 U_ASSERT(position > prevPosition);
557                 if (position >= fromPosition) {
558                     break;
559                 }
560                 U_ASSERT(position <= dictSegEndPosition);
561                 fSideBuffer.addElement(position, status);
562                 fSideBuffer.addElement(positionStatusIdx, status);
563                 prevPosition = position;
564             }
565             U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
566         }
567 
568         if (!segmentHandledByDictionary && position < fromPosition) {
569             fSideBuffer.addElement(position, status);
570             fSideBuffer.addElement(positionStatusIdx, status);
571         }
572     } while (position < fromPosition);
573 
574     // Move boundaries from the side buffer to the main circular buffer.
575     UBool success = FALSE;
576     if (!fSideBuffer.isEmpty()) {
577         positionStatusIdx = fSideBuffer.popi();
578         position = fSideBuffer.popi();
579         addPreceding(position, positionStatusIdx, UpdateCachePosition);
580         success = TRUE;
581     }
582 
583     while (!fSideBuffer.isEmpty()) {
584         positionStatusIdx = fSideBuffer.popi();
585         position = fSideBuffer.popi();
586         if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
587             // No space in circular buffer to hold a new preceding result while
588             // also retaining the current cache (iteration) position.
589             // Bailing out is safe; the cache will refill again if needed.
590             break;
591         }
592     }
593 
594     return success;
595 }
596 
597 
addFollowing(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)598 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
599     U_ASSERT(position > fBoundaries[fEndBufIdx]);
600     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
601     int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
602     if (nextIdx == fStartBufIdx) {
603         fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
604     }
605     fBoundaries[nextIdx] = position;
606     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
607     fEndBufIdx = nextIdx;
608     if (update == UpdateCachePosition) {
609         // Set current position to the newly added boundary.
610         fBufIdx = nextIdx;
611         fTextIdx = position;
612     } else {
613         // Retaining the original cache position.
614         // Check if the added boundary wraps around the buffer, and would over-write the original position.
615         // It's the responsibility of callers of this function to not add too many.
616         U_ASSERT(nextIdx != fBufIdx);
617     }
618 }
619 
addPreceding(int32_t position,int32_t ruleStatusIdx,UpdatePositionValues update)620 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
621     U_ASSERT(position < fBoundaries[fStartBufIdx]);
622     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
623     int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
624     if (nextIdx == fEndBufIdx) {
625         if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
626             // Failure. The insertion of the new boundary would claim the buffer position that is the
627             // current iteration position. And we also want to retain the current iteration position.
628             // (The buffer is already completely full of entries that precede the iteration position.)
629             return false;
630         }
631         fEndBufIdx = modChunkSize(fEndBufIdx - 1);
632     }
633     fBoundaries[nextIdx] = position;
634     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
635     fStartBufIdx = nextIdx;
636     if (update == UpdateCachePosition) {
637         fBufIdx = nextIdx;
638         fTextIdx = position;
639     }
640     return true;
641 }
642 
643 
dumpCache()644 void RuleBasedBreakIterator::BreakCache::dumpCache() {
645 #ifdef RBBI_DEBUG
646     RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
647     for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
648         RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
649         if (i == fEndBufIdx) {
650             break;
651         }
652     }
653 #endif
654 }
655 
656 U_NAMESPACE_END
657 
658 #endif // #if !UCONFIG_NO_BREAK_ITERATION
659