1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2013, International Business Machines Corporation and         *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.impl;
10 
11 import java.text.CharacterIterator;
12 
13 import com.ibm.icu.text.UTF16;
14 
15 public final class CharacterIteration {
16     // disallow instantiation
CharacterIteration()17     private CharacterIteration() { }
18 
19     // 32 bit Char value returned from when an iterator has run out of range.
20     //     Positive value so fast case (not end, not surrogate) can be checked
21     //     with a single test.
22     public static final int DONE32 = 0x7fffffff;
23 
24     /**
25      * Move the iterator forward to the next code point, and return that code point,
26      *   leaving the iterator positioned at char returned.
27      *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
28      * @param ci  The character iterator
29      * @return    The next code point.
30      */
next32(CharacterIterator ci)31     public static int next32(CharacterIterator ci) {
32         // If the current position is at a surrogate pair, move to the trail surrogate
33         //   which leaves it in position for underlying iterator's next() to work.
34         int c = ci.current();
35         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
36             c = ci.next();
37             if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
38                 ci.previous();
39             }
40         }
41 
42         // For BMP chars, this next() is the real deal.
43         c = ci.next();
44 
45         // If we might have a lead surrogate, we need to peak ahead to get the trail
46         //  even though we don't want to really be positioned there.
47         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
48             c = nextTrail32(ci, c);
49         }
50 
51         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
52             // We got a supplementary char.  Back the iterator up to the postion
53             // of the lead surrogate.
54             ci.previous();
55         }
56         return c;
57    }
58 
59 
60     // Out-of-line portion of the in-line Next32 code.
61     // The call site does an initial ci.next() and calls this function
62     //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
63     // NOTE:  we leave the underlying char iterator positioned in the
64     //        middle of a surrogate pair.  ci.next() will work correctly
65     //        from there, but the ci.getIndex() will be wrong, and needs
66     //        adjustment.
nextTrail32(CharacterIterator ci, int lead)67     public static int nextTrail32(CharacterIterator ci, int lead) {
68         if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
69             return DONE32;
70         }
71         int retVal = lead;
72         if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
73             char  cTrail = ci.next();
74             if (UTF16.isTrailSurrogate(cTrail)) {
75                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
76                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
77                             UTF16.SUPPLEMENTARY_MIN_VALUE;
78             } else {
79                 ci.previous();
80             }
81         }
82         return retVal;
83     }
84 
previous32(CharacterIterator ci)85     public static int previous32(CharacterIterator ci) {
86         if (ci.getIndex() <= ci.getBeginIndex()) {
87             return DONE32;
88         }
89         char trail = ci.previous();
90         int retVal = trail;
91         if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
92             char lead = ci.previous();
93             if (UTF16.isLeadSurrogate(lead)) {
94                 retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
95                           ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
96                           UTF16.SUPPLEMENTARY_MIN_VALUE;
97             } else {
98                 ci.next();
99             }
100         }
101         return retVal;
102     }
103 
current32(CharacterIterator ci)104     public static int current32(CharacterIterator ci) {
105         char  lead   = ci.current();
106         int   retVal = lead;
107         if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
108             return retVal;
109         }
110         if (UTF16.isLeadSurrogate(lead)) {
111             int  trail = (int)ci.next();
112             ci.previous();
113             if (UTF16.isTrailSurrogate((char)trail)) {
114                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
115                          (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
116                          UTF16.SUPPLEMENTARY_MIN_VALUE;
117             }
118          } else {
119             if (lead == CharacterIterator.DONE) {
120                 if (ci.getIndex() >= ci.getEndIndex())   {
121                     retVal = DONE32;
122                 }
123             }
124          }
125         return retVal;
126     }
127 }
128