1 /*
2  * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import java.util.NoSuchElementException;
29 import java.util.Objects;
30 import java.util.PrimitiveIterator;
31 import java.util.Spliterator;
32 import java.util.Spliterators;
33 import java.util.function.IntConsumer;
34 import java.util.stream.IntStream;
35 import java.util.stream.StreamSupport;
36 
37 /**
38  * A {@code CharSequence} is a readable sequence of {@code char} values. This
39  * interface provides uniform, read-only access to many different kinds of
40  * {@code char} sequences.
41  * A {@code char} value represents a character in the <i>Basic
42  * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
43  * href="Character.html#unicode">Unicode Character Representation</a> for details.
44  *
45  * <p> This interface does not refine the general contracts of the {@link
46  * java.lang.Object#equals(java.lang.Object) equals} and {@link
47  * java.lang.Object#hashCode() hashCode} methods. The result of testing two objects
48  * that implement {@code CharSequence} for equality is therefore, in general, undefined.
49  * Each object may be implemented by a different class, and there
50  * is no guarantee that each class will be capable of testing its instances
51  * for equality with those of the other.  It is therefore inappropriate to use
52  * arbitrary {@code CharSequence} instances as elements in a set or as keys in
53  * a map. </p>
54  *
55  * @author Mike McCloskey
56  * @since 1.4
57  */
58 
59 public interface CharSequence {
60 
61     /**
62      * Returns the length of this character sequence.  The length is the number
63      * of 16-bit {@code char}s in the sequence.
64      *
65      * @return  the number of {@code char}s in this sequence
66      */
length()67     int length();
68 
69     /**
70      * Returns the {@code char} value at the specified index.  An index ranges from zero
71      * to {@code length() - 1}.  The first {@code char} value of the sequence is at
72      * index zero, the next at index one, and so on, as for array
73      * indexing.
74      *
75      * <p>If the {@code char} value specified by the index is a
76      * <a href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate</a>, the surrogate
77      * value is returned.
78      *
79      * @param   index   the index of the {@code char} value to be returned
80      *
81      * @return  the specified {@code char} value
82      *
83      * @throws  IndexOutOfBoundsException
84      *          if the {@code index} argument is negative or not less than
85      *          {@code length()}
86      */
charAt(int index)87     char charAt(int index);
88 
89     /**
90      * Returns {@code true} if this character sequence is empty.
91      *
92      * @implSpec
93      * The default implementation returns the result of calling {@code length() == 0}.
94      *
95      * @return {@code true} if {@link #length()} is {@code 0}, otherwise
96      * {@code false}
97      *
98      * @since 15
99      */
isEmpty()100     default boolean isEmpty() {
101         return this.length() == 0;
102     }
103 
104     /**
105      * Returns a {@code CharSequence} that is a subsequence of this sequence.
106      * The subsequence starts with the {@code char} value at the specified index and
107      * ends with the {@code char} value at index {@code end - 1}.  The length
108      * (in {@code char}s) of the
109      * returned sequence is {@code end - start}, so if {@code start == end}
110      * then an empty sequence is returned.
111      *
112      * @param   start   the start index, inclusive
113      * @param   end     the end index, exclusive
114      *
115      * @return  the specified subsequence
116      *
117      * @throws  IndexOutOfBoundsException
118      *          if {@code start} or {@code end} are negative,
119      *          if {@code end} is greater than {@code length()},
120      *          or if {@code start} is greater than {@code end}
121      */
subSequence(int start, int end)122     CharSequence subSequence(int start, int end);
123 
124     /**
125      * Returns a string containing the characters in this sequence in the same
126      * order as this sequence.  The length of the string will be the length of
127      * this sequence.
128      *
129      * @return  a string consisting of exactly this sequence of characters
130      */
toString()131     public String toString();
132 
133     /**
134      * Returns a stream of {@code int} zero-extending the {@code char} values
135      * from this sequence.  Any char which maps to a <a
136      * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code
137      * point</a> is passed through uninterpreted.
138      *
139      * <p>The stream binds to this sequence when the terminal stream operation
140      * commences (specifically, for mutable sequences the spliterator for the
141      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
142      * If the sequence is modified during that operation then the result is
143      * undefined.
144      *
145      * @return an IntStream of char values from this sequence
146      * @since 1.8
147      */
chars()148     public default IntStream chars() {
149         class CharIterator implements PrimitiveIterator.OfInt {
150             int cur = 0;
151 
152             public boolean hasNext() {
153                 return cur < length();
154             }
155 
156             public int nextInt() {
157                 if (hasNext()) {
158                     return charAt(cur++);
159                 } else {
160                     throw new NoSuchElementException();
161                 }
162             }
163 
164             @Override
165             public void forEachRemaining(IntConsumer block) {
166                 for (; cur < length(); cur++) {
167                     block.accept(charAt(cur));
168                 }
169             }
170         }
171 
172         return StreamSupport.intStream(() ->
173                 Spliterators.spliterator(
174                         new CharIterator(),
175                         length(),
176                         Spliterator.ORDERED),
177                 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
178                 false);
179     }
180 
181     /**
182      * Returns a stream of code point values from this sequence.  Any surrogate
183      * pairs encountered in the sequence are combined as if by {@linkplain
184      * Character#toCodePoint Character.toCodePoint} and the result is passed
185      * to the stream. Any other code units, including ordinary BMP characters,
186      * unpaired surrogates, and undefined code units, are zero-extended to
187      * {@code int} values which are then passed to the stream.
188      *
189      * <p>The stream binds to this sequence when the terminal stream operation
190      * commences (specifically, for mutable sequences the spliterator for the
191      * stream is <a href="../util/Spliterator.html#binding"><em>late-binding</em></a>).
192      * If the sequence is modified during that operation then the result is
193      * undefined.
194      *
195      * @return an IntStream of Unicode code points from this sequence
196      * @since 1.8
197      */
codePoints()198     public default IntStream codePoints() {
199         class CodePointIterator implements PrimitiveIterator.OfInt {
200             int cur = 0;
201 
202             @Override
203             public void forEachRemaining(IntConsumer block) {
204                 final int length = length();
205                 int i = cur;
206                 try {
207                     while (i < length) {
208                         char c1 = charAt(i++);
209                         if (!Character.isHighSurrogate(c1) || i >= length) {
210                             block.accept(c1);
211                         } else {
212                             char c2 = charAt(i);
213                             if (Character.isLowSurrogate(c2)) {
214                                 i++;
215                                 block.accept(Character.toCodePoint(c1, c2));
216                             } else {
217                                 block.accept(c1);
218                             }
219                         }
220                     }
221                 } finally {
222                     cur = i;
223                 }
224             }
225 
226             public boolean hasNext() {
227                 return cur < length();
228             }
229 
230             public int nextInt() {
231                 final int length = length();
232 
233                 if (cur >= length) {
234                     throw new NoSuchElementException();
235                 }
236                 char c1 = charAt(cur++);
237                 if (Character.isHighSurrogate(c1) && cur < length) {
238                     char c2 = charAt(cur);
239                     if (Character.isLowSurrogate(c2)) {
240                         cur++;
241                         return Character.toCodePoint(c1, c2);
242                     }
243                 }
244                 return c1;
245             }
246         }
247 
248         return StreamSupport.intStream(() ->
249                 Spliterators.spliteratorUnknownSize(
250                         new CodePointIterator(),
251                         Spliterator.ORDERED),
252                 Spliterator.ORDERED,
253                 false);
254     }
255 
256     /**
257      * Compares two {@code CharSequence} instances lexicographically. Returns a
258      * negative value, zero, or a positive value if the first sequence is lexicographically
259      * less than, equal to, or greater than the second, respectively.
260      *
261      * <p>
262      * The lexicographical ordering of {@code CharSequence} is defined as follows.
263      * Consider a {@code CharSequence} <i>cs</i> of length <i>len</i> to be a
264      * sequence of char values, <i>cs[0]</i> to <i>cs[len-1]</i>. Suppose <i>k</i>
265      * is the lowest index at which the corresponding char values from each sequence
266      * differ. The lexicographic ordering of the sequences is determined by a numeric
267      * comparison of the char values <i>cs1[k]</i> with <i>cs2[k]</i>. If there is
268      * no such index <i>k</i>, the shorter sequence is considered lexicographically
269      * less than the other. If the sequences have the same length, the sequences are
270      * considered lexicographically equal.
271      *
272      *
273      * @param cs1 the first {@code CharSequence}
274      * @param cs2 the second {@code CharSequence}
275      *
276      * @return  the value {@code 0} if the two {@code CharSequence} are equal;
277      *          a negative integer if the first {@code CharSequence}
278      *          is lexicographically less than the second; or a
279      *          positive integer if the first {@code CharSequence} is
280      *          lexicographically greater than the second.
281      *
282      * @since 11
283      */
284     @SuppressWarnings("unchecked")
compare(CharSequence cs1, CharSequence cs2)285     public static int compare(CharSequence cs1, CharSequence cs2) {
286         if (Objects.requireNonNull(cs1) == Objects.requireNonNull(cs2)) {
287             return 0;
288         }
289 
290         if (cs1.getClass() == cs2.getClass() && cs1 instanceof Comparable) {
291             return ((Comparable<Object>) cs1).compareTo(cs2);
292         }
293 
294         for (int i = 0, len = Math.min(cs1.length(), cs2.length()); i < len; i++) {
295             char a = cs1.charAt(i);
296             char b = cs2.charAt(i);
297             if (a != b) {
298                 return a - b;
299             }
300         }
301 
302         return cs1.length() - cs2.length();
303     }
304 
305 }
306