1 /*
2  ********************************************************************************
3  * Copyright (C) 2010-2014, Google, International Business Machines Corporation *
4  * and others. All Rights Reserved.                                                 *
5  ********************************************************************************
6  */
7 package com.ibm.icu.lang;
8 
9 
10 /**
11  * A number of utilities for dealing with CharSequences and related classes.
12  * For accessing codepoints with a CharSequence, also see
13  * <ul>
14  * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
15  * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
16  * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
17  * <li>{@link java.lang.Character#charCount(int)}</li>
18  * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
19  * <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
20  * <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
21  * </ul>
22  * @author markdavis
23  * @internal
24  * @deprecated This API is ICU internal only.
25  */
26 @Deprecated
27 public class CharSequences {
28     // TODO
29     // compareTo(a, b);
30     // compareToIgnoreCase(a, b)
31     // contentEquals(a, b)
32     // contentEqualsIgnoreCase(a, b)
33 
34     // contains(a, b) => indexOf >= 0
35     // endsWith(a, b)
36     // startsWith(a, b)
37 
38     // lastIndexOf(a, b, fromIndex)
39     // indexOf(a, ch, fromIndex)
40     // lastIndexOf(a, ch, fromIndex);
41 
42     // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
43     // add UnicodeSet.split(CharSequence s);
44 
45     /**
46      * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
47      * @internal
48      * @deprecated This API is ICU internal only.
49      */
50     @Deprecated
matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex)51     public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
52         int i = aIndex, j = bIndex;
53         int alen = a.length();
54         int blen = b.length();
55         for (; i < alen && j < blen; ++i, ++j) {
56             char ca = a.charAt(i);
57             char cb = b.charAt(j);
58             if (ca != cb) {
59                 break;
60             }
61         }
62         // if we failed a match make sure that we didn't match half a character
63         int result = i - aIndex;
64         if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
65             --result; // backup
66         }
67         return result;
68     }
69 
70     /**
71      * Count the code point length. Unpaired surrogates count as 1.
72      * @internal
73      * @deprecated This API is ICU internal only.
74      */
75     @Deprecated
codePointLength(CharSequence s)76     public int codePointLength(CharSequence s) {
77         return Character.codePointCount(s, 0, s.length());
78 //        int length = s.length();
79 //        int result = length;
80 //        for (int i = 1; i < length; ++i) {
81 //            char ch = s.charAt(i);
82 //            if (0xDC00 <= ch && ch <= 0xDFFF) {
83 //                char ch0 = s.charAt(i-1);
84 //                if (0xD800 <= ch && ch <= 0xDbFF) {
85 //                    --result;
86 //                }
87 //            }
88 //        }
89     }
90 
91     /**
92      * Utility function for comparing codepoint to string without generating new
93      * string.
94      *
95      * @internal
96      * @deprecated This API is ICU internal only.
97      */
98     @Deprecated
equals(int codepoint, CharSequence other)99     public static final boolean equals(int codepoint, CharSequence other) {
100         if (other == null) {
101             return false;
102         }
103         switch (other.length()) {
104         case 1: return codepoint == other.charAt(0);
105         case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
106         default: return false;
107         }
108     }
109 
110     /**
111      * @internal
112      * @deprecated This API is ICU internal only.
113      */
114     @Deprecated
equals(CharSequence other, int codepoint)115     public static final boolean equals(CharSequence other, int codepoint) {
116         return equals(codepoint, other);
117     }
118 
119     /**
120      * Utility to compare a string to a code point.
121      * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
122      * and comparing, but much faster (no object creation).
123      * Actually, there is one difference; a null compares as less.
124      * Note that this (=String) order is UTF-16 order -- *not* code point order.
125      *
126      * @internal
127      * @deprecated This API is ICU internal only.
128      */
129     @Deprecated
compare(CharSequence string, int codePoint)130     public static int compare(CharSequence string, int codePoint) {
131         if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
132             throw new IllegalArgumentException();
133         }
134         int stringLength = string.length();
135         if (stringLength == 0) {
136             return -1;
137         }
138         char firstChar = string.charAt(0);
139         int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
140 
141         if (offset < 0) { // BMP codePoint
142             int result = firstChar - codePoint;
143             if (result != 0) {
144                 return result;
145             }
146             return stringLength - 1;
147         }
148         // non BMP
149         char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
150         int result = firstChar - lead;
151         if (result != 0) {
152             return result;
153         }
154         if (stringLength > 1) {
155             char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
156             result = string.charAt(1) - trail;
157             if (result != 0) {
158                 return result;
159             }
160         }
161         return stringLength - 2;
162     }
163 
164     /**
165      * Utility to compare a string to a code point.
166      * Same results as turning the code point into a string and comparing, but much faster (no object creation).
167      * Actually, there is one difference; a null compares as less.
168      * Note that this (=String) order is UTF-16 order -- *not* code point order.
169      *
170      * @internal
171      * @deprecated This API is ICU internal only.
172      */
173     @Deprecated
compare(int codepoint, CharSequence a)174     public static int compare(int codepoint, CharSequence a) {
175         return -compare(a, codepoint);
176     }
177 
178     /**
179      * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
180      *
181      * @internal
182      * @deprecated This API is ICU internal only.
183      */
184     @Deprecated
getSingleCodePoint(CharSequence s)185     public static int getSingleCodePoint(CharSequence s) {
186         int length = s.length();
187         if (length < 1 || length > 2) {
188             return Integer.MAX_VALUE;
189         }
190         int result = Character.codePointAt(s, 0);
191         return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
192     }
193 
194     /**
195      * Utility function for comparing objects that may be null
196      * string.
197      *
198      * @internal
199      * @deprecated This API is ICU internal only.
200      */
201     @Deprecated
equals(T a, T b)202     public static final <T extends Object> boolean equals(T a, T b) {
203         return a == null ? b == null
204                 : b == null ? false
205                         : a.equals(b);
206     }
207 
208     /**
209      * Utility for comparing the contents of CharSequences
210      *
211      * @internal
212      * @deprecated This API is ICU internal only.
213      */
214     @Deprecated
compare(CharSequence a, CharSequence b)215     public static int compare(CharSequence a, CharSequence b) {
216         int alength = a.length();
217         int blength = b.length();
218         int min = alength <= blength ? alength : blength;
219         for (int i = 0; i < min; ++i) {
220             int diff = a.charAt(i) - b.charAt(i);
221             if (diff != 0) {
222                 return diff;
223             }
224         }
225         return alength - blength;
226     }
227 
228     /**
229      * Utility for comparing the contents of CharSequences
230      *
231      * @internal
232      * @deprecated This API is ICU internal only.
233      */
234     @Deprecated
equalsChars(CharSequence a, CharSequence b)235     public static boolean equalsChars(CharSequence a, CharSequence b) {
236         // do length test first for fast path
237         return a.length() == b.length() && compare(a,b) == 0;
238     }
239 
240     /**
241      * Are we on a character boundary?
242      *
243      * @internal
244      * @deprecated This API is ICU internal only.
245      */
246     @Deprecated
onCharacterBoundary(CharSequence s, int i)247     public static boolean onCharacterBoundary(CharSequence s, int i) {
248         return i <= 0
249         || i >= s.length()
250         || !Character.isHighSurrogate(s.charAt(i-1))
251         || !Character.isLowSurrogate(s.charAt(i));
252     }
253 
254     /**
255      * Find code point in string.
256      *
257      * @internal
258      * @deprecated This API is ICU internal only.
259      */
260     @Deprecated
indexOf(CharSequence s, int codePoint)261     public static int indexOf(CharSequence s, int codePoint) {
262         int cp;
263         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
264             cp = Character.codePointAt(s, i);
265             if (cp == codePoint) {
266                 return i;
267             }
268         }
269         return -1;
270     }
271 
272     /**
273      * Utility function for simplified, more robust loops, such as:
274      * <pre>
275      *   for (int codePoint : CharSequences.codePoints(string)) {
276      *     doSomethingWith(codePoint);
277      *   }
278      * </pre>
279      *
280      * @internal
281      * @deprecated This API is ICU internal only.
282      */
283     @Deprecated
codePoints(CharSequence s)284     public static int[] codePoints(CharSequence s) {
285         int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
286         int j = 0;
287         for (int i = 0; i < s.length(); ++i) {
288             char cp = s.charAt(i);
289             if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
290                 char last = (char) result[j-1];
291                 if (last >= 0xD800 && last <= 0xDBFF) {
292                     // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
293                     result[j-1] = Character.toCodePoint(last, cp);
294                     continue;
295                 }
296             }
297             result[j++] = cp;
298         }
299         if (j == result.length) {
300             return result;
301         }
302         int[] shortResult = new int[j];
303         System.arraycopy(result, 0, shortResult, 0, j);
304         return shortResult;
305     }
306 
CharSequences()307     private CharSequences() {
308     }
309 }
310