1 /* 2 * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.NoSuchElementException; 29 import java.util.Objects; 30 import java.util.PrimitiveIterator; 31 import java.util.Spliterator; 32 import java.util.Spliterators; 33 import java.util.function.IntConsumer; 34 import java.util.stream.IntStream; 35 import java.util.stream.StreamSupport; 36 37 /** 38 * A {@code CharSequence} is a readable sequence of {@code char} values. This 39 * interface provides uniform, read-only access to many different kinds of 40 * {@code char} sequences. 41 * A {@code char} value represents a character in the <i>Basic 42 * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a 43 * href="Character.html#unicode">Unicode Character Representation</a> for details. 44 * 45 * <p> This interface does not refine the general contracts of the {@link 46 * java.lang.Object#equals(java.lang.Object) equals} and {@link 47 * java.lang.Object#hashCode() hashCode} methods. The result of testing two objects 48 * that implement {@code CharSequence} for equality is therefore, in general, undefined. 49 * Each object may be implemented by a different class, and there 50 * is no guarantee that each class will be capable of testing its instances 51 * for equality with those of the other. It is therefore inappropriate to use 52 * arbitrary {@code CharSequence} instances as elements in a set or as keys in 53 * a map. </p> 54 * 55 * @author Mike McCloskey 56 * @since 1.4 57 */ 58 59 public interface CharSequence { 60 61 /** 62 * Returns the length of this character sequence. The length is the number 63 * of 16-bit {@code char}s in the sequence. 64 * 65 * @return the number of {@code char}s in this sequence 66 */ length()67 int length(); 68 69 /** 70 * Returns the {@code char} value at the specified index. An index ranges from zero 71 * to {@code length() - 1}. The first {@code char} value of the sequence is at 72 * index zero, the next at index one, and so on, as for array 73 * indexing. 74 * 75 * <p>If the {@code char} value specified by the index is a 76 * <a href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate</a>, the surrogate 77 * value is returned. 78 * 79 * @param index the index of the {@code char} value to be returned 80 * 81 * @return the specified {@code char} value 82 * 83 * @throws IndexOutOfBoundsException 84 * if the {@code index} argument is negative or not less than 85 * {@code length()} 86 */ charAt(int index)87 char charAt(int index); 88 89 /** 90 * Returns {@code true} if this character sequence is empty. 91 * 92 * @implSpec 93 * The default implementation returns the result of calling {@code length() == 0}. 94 * 95 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 96 * {@code false} 97 * 98 * @since 15 99 */ isEmpty()100 default boolean isEmpty() { 101 return this.length() == 0; 102 } 103 104 /** 105 * Returns a {@code CharSequence} that is a subsequence of this sequence. 106 * The subsequence starts with the {@code char} value at the specified index and 107 * ends with the {@code char} value at index {@code end - 1}. The length 108 * (in {@code char}s) of the 109 * returned sequence is {@code end - start}, so if {@code start == end} 110 * then an empty sequence is returned. 111 * 112 * @param start the start index, inclusive 113 * @param end the end index, exclusive 114 * 115 * @return the specified subsequence 116 * 117 * @throws IndexOutOfBoundsException 118 * if {@code start} or {@code end} are negative, 119 * if {@code end} is greater than {@code length()}, 120 * or if {@code start} is greater than {@code end} 121 */ subSequence(int start, int end)122 CharSequence subSequence(int start, int end); 123 124 /** 125 * Returns a string containing the characters in this sequence in the same 126 * order as this sequence. The length of the string will be the length of 127 * this sequence. 128 * 129 * @return a string consisting of exactly this sequence of characters 130 */ toString()131 public String toString(); 132 133 /** 134 * Returns a stream of {@code int} zero-extending the {@code char} values 135 * from this sequence. Any char which maps to a <a 136 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 137 * point</a> is passed through uninterpreted. 138 * 139 * <p>The stream binds to this sequence when the terminal stream operation 140 * commences (specifically, for mutable sequences the spliterator for the 141 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 142 * If the sequence is modified during that operation then the result is 143 * undefined. 144 * 145 * @return an IntStream of char values from this sequence 146 * @since 1.8 147 */ chars()148 public default IntStream chars() { 149 class CharIterator implements PrimitiveIterator.OfInt { 150 int cur = 0; 151 152 public boolean hasNext() { 153 return cur < length(); 154 } 155 156 public int nextInt() { 157 if (hasNext()) { 158 return charAt(cur++); 159 } else { 160 throw new NoSuchElementException(); 161 } 162 } 163 164 @Override 165 public void forEachRemaining(IntConsumer block) { 166 for (; cur < length(); cur++) { 167 block.accept(charAt(cur)); 168 } 169 } 170 } 171 172 return StreamSupport.intStream(() -> 173 Spliterators.spliterator( 174 new CharIterator(), 175 length(), 176 Spliterator.ORDERED), 177 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, 178 false); 179 } 180 181 /** 182 * Returns a stream of code point values from this sequence. Any surrogate 183 * pairs encountered in the sequence are combined as if by {@linkplain 184 * Character#toCodePoint Character.toCodePoint} and the result is passed 185 * to the stream. Any other code units, including ordinary BMP characters, 186 * unpaired surrogates, and undefined code units, are zero-extended to 187 * {@code int} values which are then passed to the stream. 188 * 189 * <p>The stream binds to this sequence when the terminal stream operation 190 * commences (specifically, for mutable sequences the spliterator for the 191 * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>). 192 * If the sequence is modified during that operation then the result is 193 * undefined. 194 * 195 * @return an IntStream of Unicode code points from this sequence 196 * @since 1.8 197 */ codePoints()198 public default IntStream codePoints() { 199 class CodePointIterator implements PrimitiveIterator.OfInt { 200 int cur = 0; 201 202 @Override 203 public void forEachRemaining(IntConsumer block) { 204 final int length = length(); 205 int i = cur; 206 try { 207 while (i < length) { 208 char c1 = charAt(i++); 209 if (!Character.isHighSurrogate(c1) || i >= length) { 210 block.accept(c1); 211 } else { 212 char c2 = charAt(i); 213 if (Character.isLowSurrogate(c2)) { 214 i++; 215 block.accept(Character.toCodePoint(c1, c2)); 216 } else { 217 block.accept(c1); 218 } 219 } 220 } 221 } finally { 222 cur = i; 223 } 224 } 225 226 public boolean hasNext() { 227 return cur < length(); 228 } 229 230 public int nextInt() { 231 final int length = length(); 232 233 if (cur >= length) { 234 throw new NoSuchElementException(); 235 } 236 char c1 = charAt(cur++); 237 if (Character.isHighSurrogate(c1) && cur < length) { 238 char c2 = charAt(cur); 239 if (Character.isLowSurrogate(c2)) { 240 cur++; 241 return Character.toCodePoint(c1, c2); 242 } 243 } 244 return c1; 245 } 246 } 247 248 return StreamSupport.intStream(() -> 249 Spliterators.spliteratorUnknownSize( 250 new CodePointIterator(), 251 Spliterator.ORDERED), 252 Spliterator.ORDERED, 253 false); 254 } 255 256 /** 257 * Compares two {@code CharSequence} instances lexicographically. Returns a 258 * negative value, zero, or a positive value if the first sequence is lexicographically 259 * less than, equal to, or greater than the second, respectively. 260 * 261 * <p> 262 * The lexicographical ordering of {@code CharSequence} is defined as follows. 263 * Consider a {@code CharSequence} <i>cs</i> of length <i>len</i> to be a 264 * sequence of char values, <i>cs[0]</i> to <i>cs[len-1]</i>. Suppose <i>k</i> 265 * is the lowest index at which the corresponding char values from each sequence 266 * differ. The lexicographic ordering of the sequences is determined by a numeric 267 * comparison of the char values <i>cs1[k]</i> with <i>cs2[k]</i>. If there is 268 * no such index <i>k</i>, the shorter sequence is considered lexicographically 269 * less than the other. If the sequences have the same length, the sequences are 270 * considered lexicographically equal. 271 * 272 * 273 * @param cs1 the first {@code CharSequence} 274 * @param cs2 the second {@code CharSequence} 275 * 276 * @return the value {@code 0} if the two {@code CharSequence} are equal; 277 * a negative integer if the first {@code CharSequence} 278 * is lexicographically less than the second; or a 279 * positive integer if the first {@code CharSequence} is 280 * lexicographically greater than the second. 281 * 282 * @since 11 283 */ 284 @SuppressWarnings("unchecked") compare(CharSequence cs1, CharSequence cs2)285 public static int compare(CharSequence cs1, CharSequence cs2) { 286 if (Objects.requireNonNull(cs1) == Objects.requireNonNull(cs2)) { 287 return 0; 288 } 289 290 if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) { 291 return ((Comparable<Object>) cs1).compareTo(cs2); 292 } 293 294 for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) { 295 char a = cs1.charAt(i); 296 char b = cs2.charAt(i); 297 if (a != b) { 298 return a - b; 299 } 300 } 301 302 return cs1.length() - cs2.length(); 303 } 304 305 } 306