1 /* 2 ******************************************************************************** 3 * Copyright (C) 2010-2014, Google, International Business Machines Corporation * 4 * and others. All Rights Reserved. * 5 ******************************************************************************** 6 */ 7 package com.ibm.icu.lang; 8 9 10 /** 11 * A number of utilities for dealing with CharSequences and related classes. 12 * For accessing codepoints with a CharSequence, also see 13 * <ul> 14 * <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li> 15 * <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li> 16 * <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li> 17 * <li>{@link java.lang.Character#charCount(int)}</li> 18 * <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li> 19 * <li>{@link java.lang.Character#toChars(int, char[], int)}</li> 20 * <li>{@link java.lang.Character#toCodePoint(char, char)}</li> 21 * </ul> 22 * @author markdavis 23 * @internal 24 * @deprecated This API is ICU internal only. 25 */ 26 @Deprecated 27 public class CharSequences { 28 // TODO 29 // compareTo(a, b); 30 // compareToIgnoreCase(a, b) 31 // contentEquals(a, b) 32 // contentEqualsIgnoreCase(a, b) 33 34 // contains(a, b) => indexOf >= 0 35 // endsWith(a, b) 36 // startsWith(a, b) 37 38 // lastIndexOf(a, b, fromIndex) 39 // indexOf(a, ch, fromIndex) 40 // lastIndexOf(a, ch, fromIndex); 41 42 // s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set. 43 // add UnicodeSet.split(CharSequence s); 44 45 /** 46 * Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary. 47 * @internal 48 * @deprecated This API is ICU internal only. 49 */ 50 @Deprecated matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex)51 public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) { 52 int i = aIndex, j = bIndex; 53 int alen = a.length(); 54 int blen = b.length(); 55 for (; i < alen && j < blen; ++i, ++j) { 56 char ca = a.charAt(i); 57 char cb = b.charAt(j); 58 if (ca != cb) { 59 break; 60 } 61 } 62 // if we failed a match make sure that we didn't match half a character 63 int result = i - aIndex; 64 if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) { 65 --result; // backup 66 } 67 return result; 68 } 69 70 /** 71 * Count the code point length. Unpaired surrogates count as 1. 72 * @internal 73 * @deprecated This API is ICU internal only. 74 */ 75 @Deprecated codePointLength(CharSequence s)76 public int codePointLength(CharSequence s) { 77 return Character.codePointCount(s, 0, s.length()); 78 // int length = s.length(); 79 // int result = length; 80 // for (int i = 1; i < length; ++i) { 81 // char ch = s.charAt(i); 82 // if (0xDC00 <= ch && ch <= 0xDFFF) { 83 // char ch0 = s.charAt(i-1); 84 // if (0xD800 <= ch && ch <= 0xDbFF) { 85 // --result; 86 // } 87 // } 88 // } 89 } 90 91 /** 92 * Utility function for comparing codepoint to string without generating new 93 * string. 94 * 95 * @internal 96 * @deprecated This API is ICU internal only. 97 */ 98 @Deprecated equals(int codepoint, CharSequence other)99 public static final boolean equals(int codepoint, CharSequence other) { 100 if (other == null) { 101 return false; 102 } 103 switch (other.length()) { 104 case 1: return codepoint == other.charAt(0); 105 case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0); 106 default: return false; 107 } 108 } 109 110 /** 111 * @internal 112 * @deprecated This API is ICU internal only. 113 */ 114 @Deprecated equals(CharSequence other, int codepoint)115 public static final boolean equals(CharSequence other, int codepoint) { 116 return equals(codepoint, other); 117 } 118 119 /** 120 * Utility to compare a string to a code point. 121 * Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString()) 122 * and comparing, but much faster (no object creation). 123 * Actually, there is one difference; a null compares as less. 124 * Note that this (=String) order is UTF-16 order -- *not* code point order. 125 * 126 * @internal 127 * @deprecated This API is ICU internal only. 128 */ 129 @Deprecated compare(CharSequence string, int codePoint)130 public static int compare(CharSequence string, int codePoint) { 131 if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) { 132 throw new IllegalArgumentException(); 133 } 134 int stringLength = string.length(); 135 if (stringLength == 0) { 136 return -1; 137 } 138 char firstChar = string.charAt(0); 139 int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT; 140 141 if (offset < 0) { // BMP codePoint 142 int result = firstChar - codePoint; 143 if (result != 0) { 144 return result; 145 } 146 return stringLength - 1; 147 } 148 // non BMP 149 char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE); 150 int result = firstChar - lead; 151 if (result != 0) { 152 return result; 153 } 154 if (stringLength > 1) { 155 char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE); 156 result = string.charAt(1) - trail; 157 if (result != 0) { 158 return result; 159 } 160 } 161 return stringLength - 2; 162 } 163 164 /** 165 * Utility to compare a string to a code point. 166 * Same results as turning the code point into a string and comparing, but much faster (no object creation). 167 * Actually, there is one difference; a null compares as less. 168 * Note that this (=String) order is UTF-16 order -- *not* code point order. 169 * 170 * @internal 171 * @deprecated This API is ICU internal only. 172 */ 173 @Deprecated compare(int codepoint, CharSequence a)174 public static int compare(int codepoint, CharSequence a) { 175 return -compare(a, codepoint); 176 } 177 178 /** 179 * Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE. 180 * 181 * @internal 182 * @deprecated This API is ICU internal only. 183 */ 184 @Deprecated getSingleCodePoint(CharSequence s)185 public static int getSingleCodePoint(CharSequence s) { 186 int length = s.length(); 187 if (length < 1 || length > 2) { 188 return Integer.MAX_VALUE; 189 } 190 int result = Character.codePointAt(s, 0); 191 return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE; 192 } 193 194 /** 195 * Utility function for comparing objects that may be null 196 * string. 197 * 198 * @internal 199 * @deprecated This API is ICU internal only. 200 */ 201 @Deprecated equals(T a, T b)202 public static final <T extends Object> boolean equals(T a, T b) { 203 return a == null ? b == null 204 : b == null ? false 205 : a.equals(b); 206 } 207 208 /** 209 * Utility for comparing the contents of CharSequences 210 * 211 * @internal 212 * @deprecated This API is ICU internal only. 213 */ 214 @Deprecated compare(CharSequence a, CharSequence b)215 public static int compare(CharSequence a, CharSequence b) { 216 int alength = a.length(); 217 int blength = b.length(); 218 int min = alength <= blength ? alength : blength; 219 for (int i = 0; i < min; ++i) { 220 int diff = a.charAt(i) - b.charAt(i); 221 if (diff != 0) { 222 return diff; 223 } 224 } 225 return alength - blength; 226 } 227 228 /** 229 * Utility for comparing the contents of CharSequences 230 * 231 * @internal 232 * @deprecated This API is ICU internal only. 233 */ 234 @Deprecated equalsChars(CharSequence a, CharSequence b)235 public static boolean equalsChars(CharSequence a, CharSequence b) { 236 // do length test first for fast path 237 return a.length() == b.length() && compare(a,b) == 0; 238 } 239 240 /** 241 * Are we on a character boundary? 242 * 243 * @internal 244 * @deprecated This API is ICU internal only. 245 */ 246 @Deprecated onCharacterBoundary(CharSequence s, int i)247 public static boolean onCharacterBoundary(CharSequence s, int i) { 248 return i <= 0 249 || i >= s.length() 250 || !Character.isHighSurrogate(s.charAt(i-1)) 251 || !Character.isLowSurrogate(s.charAt(i)); 252 } 253 254 /** 255 * Find code point in string. 256 * 257 * @internal 258 * @deprecated This API is ICU internal only. 259 */ 260 @Deprecated indexOf(CharSequence s, int codePoint)261 public static int indexOf(CharSequence s, int codePoint) { 262 int cp; 263 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 264 cp = Character.codePointAt(s, i); 265 if (cp == codePoint) { 266 return i; 267 } 268 } 269 return -1; 270 } 271 272 /** 273 * Utility function for simplified, more robust loops, such as: 274 * <pre> 275 * for (int codePoint : CharSequences.codePoints(string)) { 276 * doSomethingWith(codePoint); 277 * } 278 * </pre> 279 * 280 * @internal 281 * @deprecated This API is ICU internal only. 282 */ 283 @Deprecated codePoints(CharSequence s)284 public static int[] codePoints(CharSequence s) { 285 int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same 286 int j = 0; 287 for (int i = 0; i < s.length(); ++i) { 288 char cp = s.charAt(i); 289 if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed 290 char last = (char) result[j-1]; 291 if (last >= 0xD800 && last <= 0xDBFF) { 292 // Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block. 293 result[j-1] = Character.toCodePoint(last, cp); 294 continue; 295 } 296 } 297 result[j++] = cp; 298 } 299 if (j == result.length) { 300 return result; 301 } 302 int[] shortResult = new int[j]; 303 System.arraycopy(result, 0, shortResult, 0, j); 304 return shortResult; 305 } 306 CharSequences()307 private CharSequences() { 308 } 309 } 310