1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************** 5 * Copyright (C) 2010-2014, Google, International Business Machines Corporation * 6 * and others. All Rights Reserved. * 7 ******************************************************************************** 8 */ 9 package com.ibm.icu.lang; 10 11 12 /** 13 * A number of utilities for dealing with CharSequences and related classes. 14 * For accessing codepoints with a CharSequence, also see 15 * <ul> 16 * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li> 17 * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li> 18 * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li> 19 * <li>{@link java.lang.Character#charCount(int)}</li> 20 * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li> 21 * <li>{@link java.lang.Character#toChars(int, char[], int)}</li> 22 * <li>{@link java.lang.Character#toCodePoint(char, char)}</li> 23 * </ul> 24 * @author markdavis 25 * @internal 26 * @deprecated This API is ICU internal only. 27 */ 28 @Deprecated 29 public class CharSequences { 30 // TODO 31 // compareTo(a, b); 32 // compareToIgnoreCase(a, b) 33 // contentEquals(a, b) 34 // contentEqualsIgnoreCase(a, b) 35 36 // contains(a, b) => indexOf >= 0 37 // endsWith(a, b) 38 // startsWith(a, b) 39 40 // lastIndexOf(a, b, fromIndex) 41 // indexOf(a, ch, fromIndex) 42 // lastIndexOf(a, ch, fromIndex); 43 44 // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set. 45 // add UnicodeSet.split(CharSequence s); 46 47 /** 48 * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary. 49 * @internal 50 * @deprecated This API is ICU internal only. 51 */ 52 @Deprecated matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex)53 public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) { 54 int i = aIndex, j = bIndex; 55 int alen = a.length(); 56 int blen = b.length(); 57 for (; i < alen && j < blen; ++i, ++j) { 58 char ca = a.charAt(i); 59 char cb = b.charAt(j); 60 if (ca != cb) { 61 break; 62 } 63 } 64 // if we failed a match make sure that we didn't match half a character 65 int result = i - aIndex; 66 if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) { 67 --result; // backup 68 } 69 return result; 70 } 71 72 /** 73 * Count the code point length. Unpaired surrogates count as 1. 74 * @internal 75 * @deprecated This API is ICU internal only. 76 */ 77 @Deprecated codePointLength(CharSequence s)78 public int codePointLength(CharSequence s) { 79 return Character.codePointCount(s, 0, s.length()); 80 // int length = s.length(); 81 // int result = length; 82 // for (int i = 1; i < length; ++i) { 83 // char ch = s.charAt(i); 84 // if (0xDC00 <= ch && ch <= 0xDFFF) { 85 // char ch0 = s.charAt(i-1); 86 // if (0xD800 <= ch && ch <= 0xDbFF) { 87 // --result; 88 // } 89 // } 90 // } 91 } 92 93 /** 94 * Utility function for comparing codepoint to string without generating new 95 * string. 96 * 97 * @internal 98 * @deprecated This API is ICU internal only. 99 */ 100 @Deprecated equals(int codepoint, CharSequence other)101 public static final boolean equals(int codepoint, CharSequence other) { 102 if (other == null) { 103 return false; 104 } 105 switch (other.length()) { 106 case 1: return codepoint == other.charAt(0); 107 case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0); 108 default: return false; 109 } 110 } 111 112 /** 113 * @internal 114 * @deprecated This API is ICU internal only. 115 */ 116 @Deprecated equals(CharSequence other, int codepoint)117 public static final boolean equals(CharSequence other, int codepoint) { 118 return equals(codepoint, other); 119 } 120 121 /** 122 * Utility to compare a string to a code point. 123 * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) 124 * and comparing, but much faster (no object creation). 125 * Actually, there is one difference; a null compares as less. 126 * Note that this (=String) order is UTF-16 order -- *not* code point order. 127 * 128 * @internal 129 * @deprecated This API is ICU internal only. 130 */ 131 @Deprecated compare(CharSequence string, int codePoint)132 public static int compare(CharSequence string, int codePoint) { 133 if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) { 134 throw new IllegalArgumentException(); 135 } 136 int stringLength = string.length(); 137 if (stringLength == 0) { 138 return -1; 139 } 140 char firstChar = string.charAt(0); 141 int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; 142 143 if (offset < 0) { // BMP codePoint 144 int result = firstChar - codePoint; 145 if (result != 0) { 146 return result; 147 } 148 return stringLength - 1; 149 } 150 // non BMP 151 char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE); 152 int result = firstChar - lead; 153 if (result != 0) { 154 return result; 155 } 156 if (stringLength > 1) { 157 char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); 158 result = string.charAt(1) - trail; 159 if (result != 0) { 160 return result; 161 } 162 } 163 return stringLength - 2; 164 } 165 166 /** 167 * Utility to compare a string to a code point. 168 * Same results as turning the code point into a string and comparing, but much faster (no object creation). 169 * Actually, there is one difference; a null compares as less. 170 * Note that this (=String) order is UTF-16 order -- *not* code point order. 171 * 172 * @internal 173 * @deprecated This API is ICU internal only. 174 */ 175 @Deprecated compare(int codepoint, CharSequence a)176 public static int compare(int codepoint, CharSequence a) { 177 int result = compare(a, codepoint); 178 return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order. 179 } 180 181 /** 182 * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE. 183 * 184 * @internal 185 * @deprecated This API is ICU internal only. 186 */ 187 @Deprecated getSingleCodePoint(CharSequence s)188 public static int getSingleCodePoint(CharSequence s) { 189 int length = s.length(); 190 if (length < 1 || length > 2) { 191 return Integer.MAX_VALUE; 192 } 193 int result = Character.codePointAt(s, 0); 194 return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE; 195 } 196 197 /** 198 * Utility function for comparing objects that may be null 199 * string. 200 * 201 * @internal 202 * @deprecated This API is ICU internal only. 203 */ 204 @Deprecated equals(T a, T b)205 public static final <T extends Object> boolean equals(T a, T b) { 206 return a == null ? b == null 207 : b == null ? false 208 : a.equals(b); 209 } 210 211 /** 212 * Utility for comparing the contents of CharSequences 213 * 214 * @internal 215 * @deprecated This API is ICU internal only. 216 */ 217 @Deprecated compare(CharSequence a, CharSequence b)218 public static int compare(CharSequence a, CharSequence b) { 219 int alength = a.length(); 220 int blength = b.length(); 221 int min = alength <= blength ? alength : blength; 222 for (int i = 0; i < min; ++i) { 223 int diff = a.charAt(i) - b.charAt(i); 224 if (diff != 0) { 225 return diff; 226 } 227 } 228 return alength - blength; 229 } 230 231 /** 232 * Utility for comparing the contents of CharSequences 233 * 234 * @internal 235 * @deprecated This API is ICU internal only. 236 */ 237 @Deprecated equalsChars(CharSequence a, CharSequence b)238 public static boolean equalsChars(CharSequence a, CharSequence b) { 239 // do length test first for fast path 240 return a.length() == b.length() && compare(a,b) == 0; 241 } 242 243 /** 244 * Are we on a character boundary? 245 * 246 * @internal 247 * @deprecated This API is ICU internal only. 248 */ 249 @Deprecated onCharacterBoundary(CharSequence s, int i)250 public static boolean onCharacterBoundary(CharSequence s, int i) { 251 return i <= 0 252 || i >= s.length() 253 || !Character.isHighSurrogate(s.charAt(i-1)) 254 || !Character.isLowSurrogate(s.charAt(i)); 255 } 256 257 /** 258 * Find code point in string. 259 * 260 * @internal 261 * @deprecated This API is ICU internal only. 262 */ 263 @Deprecated indexOf(CharSequence s, int codePoint)264 public static int indexOf(CharSequence s, int codePoint) { 265 int cp; 266 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 267 cp = Character.codePointAt(s, i); 268 if (cp == codePoint) { 269 return i; 270 } 271 } 272 return -1; 273 } 274 275 /** 276 * Utility function for simplified, more robust loops, such as: 277 * <pre> 278 * for (int codePoint : CharSequences.codePoints(string)) { 279 * doSomethingWith(codePoint); 280 * } 281 * </pre> 282 * 283 * @internal 284 * @deprecated This API is ICU internal only. 285 */ 286 @Deprecated codePoints(CharSequence s)287 public static int[] codePoints(CharSequence s) { 288 int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same 289 int j = 0; 290 for (int i = 0; i < s.length(); ++i) { 291 char cp = s.charAt(i); 292 if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed 293 char last = (char) result[j-1]; 294 if (last >= 0xD800 && last <= 0xDBFF) { 295 // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block. 296 result[j-1] = Character.toCodePoint(last, cp); 297 continue; 298 } 299 } 300 result[j++] = cp; 301 } 302 if (j == result.length) { 303 return result; 304 } 305 int[] shortResult = new int[j]; 306 System.arraycopy(result, 0, shortResult, 0, j); 307 return shortResult; 308 } 309 CharSequences()310 private CharSequences() { 311 } 312 } 313