1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  ********************************************************************************
5  * Copyright (C) 2010-2014, Google, International Business Machines Corporation *
6  * and others. All Rights Reserved.                                                 *
7  ********************************************************************************
8  */
9 package com.ibm.icu.lang;
10 
11 
12 /**
13  * A number of utilities for dealing with CharSequences and related classes.
14  * For accessing codepoints with a CharSequence, also see
15  * <ul>
16  * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
17  * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
18  * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
19  * <li>{@link java.lang.Character#charCount(int)}</li>
20  * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
21  * <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
22  * <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
23  * </ul>
24  * @author markdavis
25  * @internal
26  * @deprecated This API is ICU internal only.
27  */
28 @Deprecated
29 public class CharSequences {
30     // TODO
31     // compareTo(a, b);
32     // compareToIgnoreCase(a, b)
33     // contentEquals(a, b)
34     // contentEqualsIgnoreCase(a, b)
35 
36     // contains(a, b) => indexOf >= 0
37     // endsWith(a, b)
38     // startsWith(a, b)
39 
40     // lastIndexOf(a, b, fromIndex)
41     // indexOf(a, ch, fromIndex)
42     // lastIndexOf(a, ch, fromIndex);
43 
44     // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
45     // add UnicodeSet.split(CharSequence s);
46 
47     /**
48      * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
49      * @internal
50      * @deprecated This API is ICU internal only.
51      */
52     @Deprecated
matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex)53     public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
54         int i = aIndex, j = bIndex;
55         int alen = a.length();
56         int blen = b.length();
57         for (; i < alen && j < blen; ++i, ++j) {
58             char ca = a.charAt(i);
59             char cb = b.charAt(j);
60             if (ca != cb) {
61                 break;
62             }
63         }
64         // if we failed a match make sure that we didn't match half a character
65         int result = i - aIndex;
66         if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
67             --result; // backup
68         }
69         return result;
70     }
71 
72     /**
73      * Count the code point length. Unpaired surrogates count as 1.
74      * @internal
75      * @deprecated This API is ICU internal only.
76      */
77     @Deprecated
codePointLength(CharSequence s)78     public int codePointLength(CharSequence s) {
79         return Character.codePointCount(s, 0, s.length());
80 //        int length = s.length();
81 //        int result = length;
82 //        for (int i = 1; i < length; ++i) {
83 //            char ch = s.charAt(i);
84 //            if (0xDC00 <= ch && ch <= 0xDFFF) {
85 //                char ch0 = s.charAt(i-1);
86 //                if (0xD800 <= ch && ch <= 0xDbFF) {
87 //                    --result;
88 //                }
89 //            }
90 //        }
91     }
92 
93     /**
94      * Utility function for comparing codepoint to string without generating new
95      * string.
96      *
97      * @internal
98      * @deprecated This API is ICU internal only.
99      */
100     @Deprecated
equals(int codepoint, CharSequence other)101     public static final boolean equals(int codepoint, CharSequence other) {
102         if (other == null) {
103             return false;
104         }
105         switch (other.length()) {
106         case 1: return codepoint == other.charAt(0);
107         case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
108         default: return false;
109         }
110     }
111 
112     /**
113      * @internal
114      * @deprecated This API is ICU internal only.
115      */
116     @Deprecated
equals(CharSequence other, int codepoint)117     public static final boolean equals(CharSequence other, int codepoint) {
118         return equals(codepoint, other);
119     }
120 
121     /**
122      * Utility to compare a string to a code point.
123      * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
124      * and comparing, but much faster (no object creation).
125      * Actually, there is one difference; a null compares as less.
126      * Note that this (=String) order is UTF-16 order -- *not* code point order.
127      *
128      * @internal
129      * @deprecated This API is ICU internal only.
130      */
131     @Deprecated
compare(CharSequence string, int codePoint)132     public static int compare(CharSequence string, int codePoint) {
133         if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
134             throw new IllegalArgumentException();
135         }
136         int stringLength = string.length();
137         if (stringLength == 0) {
138             return -1;
139         }
140         char firstChar = string.charAt(0);
141         int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
142 
143         if (offset < 0) { // BMP codePoint
144             int result = firstChar - codePoint;
145             if (result != 0) {
146                 return result;
147             }
148             return stringLength - 1;
149         }
150         // non BMP
151         char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
152         int result = firstChar - lead;
153         if (result != 0) {
154             return result;
155         }
156         if (stringLength > 1) {
157             char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
158             result = string.charAt(1) - trail;
159             if (result != 0) {
160                 return result;
161             }
162         }
163         return stringLength - 2;
164     }
165 
166     /**
167      * Utility to compare a string to a code point.
168      * Same results as turning the code point into a string and comparing, but much faster (no object creation).
169      * Actually, there is one difference; a null compares as less.
170      * Note that this (=String) order is UTF-16 order -- *not* code point order.
171      *
172      * @internal
173      * @deprecated This API is ICU internal only.
174      */
175     @Deprecated
compare(int codepoint, CharSequence a)176     public static int compare(int codepoint, CharSequence a) {
177         int result = compare(a, codepoint);
178         return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.
179     }
180 
181     /**
182      * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
183      *
184      * @internal
185      * @deprecated This API is ICU internal only.
186      */
187     @Deprecated
getSingleCodePoint(CharSequence s)188     public static int getSingleCodePoint(CharSequence s) {
189         int length = s.length();
190         if (length < 1 || length > 2) {
191             return Integer.MAX_VALUE;
192         }
193         int result = Character.codePointAt(s, 0);
194         return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
195     }
196 
197     /**
198      * Utility function for comparing objects that may be null
199      * string.
200      *
201      * @internal
202      * @deprecated This API is ICU internal only.
203      */
204     @Deprecated
equals(T a, T b)205     public static final <T extends Object> boolean equals(T a, T b) {
206         return a == null ? b == null
207                 : b == null ? false
208                         : a.equals(b);
209     }
210 
211     /**
212      * Utility for comparing the contents of CharSequences
213      *
214      * @internal
215      * @deprecated This API is ICU internal only.
216      */
217     @Deprecated
compare(CharSequence a, CharSequence b)218     public static int compare(CharSequence a, CharSequence b) {
219         int alength = a.length();
220         int blength = b.length();
221         int min = alength <= blength ? alength : blength;
222         for (int i = 0; i < min; ++i) {
223             int diff = a.charAt(i) - b.charAt(i);
224             if (diff != 0) {
225                 return diff;
226             }
227         }
228         return alength - blength;
229     }
230 
231     /**
232      * Utility for comparing the contents of CharSequences
233      *
234      * @internal
235      * @deprecated This API is ICU internal only.
236      */
237     @Deprecated
equalsChars(CharSequence a, CharSequence b)238     public static boolean equalsChars(CharSequence a, CharSequence b) {
239         // do length test first for fast path
240         return a.length() == b.length() && compare(a,b) == 0;
241     }
242 
243     /**
244      * Are we on a character boundary?
245      *
246      * @internal
247      * @deprecated This API is ICU internal only.
248      */
249     @Deprecated
onCharacterBoundary(CharSequence s, int i)250     public static boolean onCharacterBoundary(CharSequence s, int i) {
251         return i <= 0
252         || i >= s.length()
253         || !Character.isHighSurrogate(s.charAt(i-1))
254         || !Character.isLowSurrogate(s.charAt(i));
255     }
256 
257     /**
258      * Find code point in string.
259      *
260      * @internal
261      * @deprecated This API is ICU internal only.
262      */
263     @Deprecated
indexOf(CharSequence s, int codePoint)264     public static int indexOf(CharSequence s, int codePoint) {
265         int cp;
266         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
267             cp = Character.codePointAt(s, i);
268             if (cp == codePoint) {
269                 return i;
270             }
271         }
272         return -1;
273     }
274 
275     /**
276      * Utility function for simplified, more robust loops, such as:
277      * <pre>
278      *   for (int codePoint : CharSequences.codePoints(string)) {
279      *     doSomethingWith(codePoint);
280      *   }
281      * </pre>
282      *
283      * @internal
284      * @deprecated This API is ICU internal only.
285      */
286     @Deprecated
codePoints(CharSequence s)287     public static int[] codePoints(CharSequence s) {
288         int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
289         int j = 0;
290         for (int i = 0; i < s.length(); ++i) {
291             char cp = s.charAt(i);
292             if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
293                 char last = (char) result[j-1];
294                 if (last >= 0xD800 && last <= 0xDBFF) {
295                     // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
296                     result[j-1] = Character.toCodePoint(last, cp);
297                     continue;
298                 }
299             }
300             result[j++] = cp;
301         }
302         if (j == result.length) {
303             return result;
304         }
305         int[] shortResult = new int[j];
306         System.arraycopy(result, 0, shortResult, 0, j);
307         return shortResult;
308     }
309 
CharSequences()310     private CharSequences() {
311     }
312 }
313