1 /*
2  *******************************************************************************
3  * Copyright (C) 2013, International Business Machines Corporation and         *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.impl;
8 
9 import java.text.CharacterIterator;
10 
11 import com.ibm.icu.text.UTF16;
12 
13 public final class CharacterIteration {
14     // disallow instantiation
CharacterIteration()15     private CharacterIteration() { }
16 
17     // 32 bit Char value returned from when an iterator has run out of range.
18     //     Positive value so fast case (not end, not surrogate) can be checked
19     //     with a single test.
20     public static final int DONE32 = 0x7fffffff;
21 
22     /**
23      * Move the iterator forward to the next code point, and return that code point,
24      *   leaving the iterator positioned at char returned.
25      *   For Supplementary chars, the iterator is left positioned at the lead surrogate.
26      * @param ci  The character iterator
27      * @return    The next code point.
28      */
next32(CharacterIterator ci)29     public static int next32(CharacterIterator ci) {
30         // If the current position is at a surrogate pair, move to the trail surrogate
31         //   which leaves it in position for underlying iterator's next() to work.
32         int c= ci.current();
33         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) {
34             c = ci.next();
35             if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) {
36                c = ci.previous();
37             }
38         }
39 
40         // For BMP chars, this next() is the real deal.
41         c = ci.next();
42 
43         // If we might have a lead surrogate, we need to peak ahead to get the trail
44         //  even though we don't want to really be positioned there.
45         if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
46             c = nextTrail32(ci, c);
47         }
48 
49         if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != DONE32) {
50             // We got a supplementary char.  Back the iterator up to the postion
51             // of the lead surrogate.
52             ci.previous();
53         }
54         return c;
55    }
56 
57 
58     // Out-of-line portion of the in-line Next32 code.
59     // The call site does an initial ci.next() and calls this function
60     //    if the 16 bit value it gets is >= LEAD_SURROGATE_MIN_VALUE.
61     // NOTE:  we leave the underlying char iterator positioned in the
62     //        middle of a surrogate pair.  ci.next() will work correctly
63     //        from there, but the ci.getIndex() will be wrong, and needs
64     //        adjustment.
nextTrail32(CharacterIterator ci, int lead)65     public static int nextTrail32(CharacterIterator ci, int lead) {
66         if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
67             return DONE32;
68         }
69         int retVal = lead;
70         if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
71             char  cTrail = ci.next();
72             if (UTF16.isTrailSurrogate(cTrail)) {
73                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
74                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
75                             UTF16.SUPPLEMENTARY_MIN_VALUE;
76             } else {
77                 ci.previous();
78             }
79         }
80         return retVal;
81     }
82 
previous32(CharacterIterator ci)83     public static int previous32(CharacterIterator ci) {
84         if (ci.getIndex() <= ci.getBeginIndex()) {
85             return DONE32;
86         }
87         char trail = ci.previous();
88         int retVal = trail;
89         if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
90             char lead = ci.previous();
91             if (UTF16.isLeadSurrogate(lead)) {
92                 retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
93                           ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
94                           UTF16.SUPPLEMENTARY_MIN_VALUE;
95             } else {
96                 ci.next();
97             }
98         }
99         return retVal;
100     }
101 
current32(CharacterIterator ci)102     public static int current32(CharacterIterator ci) {
103         char  lead   = ci.current();
104         int   retVal = lead;
105         if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
106             return retVal;
107         }
108         if (UTF16.isLeadSurrogate(lead)) {
109             int  trail = (int)ci.next();
110             ci.previous();
111             if (UTF16.isTrailSurrogate((char)trail)) {
112                 retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
113                          (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
114                          UTF16.SUPPLEMENTARY_MIN_VALUE;
115             }
116          } else {
117             if (lead == CharacterIterator.DONE) {
118                 if (ci.getIndex() >= ci.getEndIndex())   {
119                     retVal = DONE32;
120                 }
121             }
122          }
123         return retVal;
124     }
125 }
126