1 /*
2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import java.util.NoSuchElementException;
29 import java.util.PrimitiveIterator;
30 import java.util.Spliterator;
31 import java.util.Spliterators;
32 import java.util.function.IntConsumer;
33 import java.util.stream.IntStream;
34 import java.util.stream.StreamSupport;
35 
36 /**
37  * A <tt>CharSequence</tt> is a readable sequence of <code>char</code> values. This
38  * interface provides uniform, read-only access to many different kinds of
39  * <code>char</code> sequences.
40  * A <code>char</code> value represents a character in the <i>Basic
41  * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a
42  * href="Character.html#unicode">Unicode Character Representation</a> for details.
43  *
44  * <p> This interface does not refine the general contracts of the {@link
45  * java.lang.Object#equals(java.lang.Object) equals} and {@link
46  * java.lang.Object#hashCode() hashCode} methods.  The result of comparing two
47  * objects that implement <tt>CharSequence</tt> is therefore, in general,
48  * undefined.  Each object may be implemented by a different class, and there
49  * is no guarantee that each class will be capable of testing its instances
50  * for equality with those of the other.  It is therefore inappropriate to use
51  * arbitrary <tt>CharSequence</tt> instances as elements in a set or as keys in
52  * a map. </p>
53  *
54  * @author Mike McCloskey
55  * @since 1.4
56  * @spec JSR-51
57  */
58 
59 public interface CharSequence {
60 
61     /**
62      * Returns the length of this character sequence.  The length is the number
63      * of 16-bit <code>char</code>s in the sequence.
64      *
65      * @return  the number of <code>char</code>s in this sequence
66      */
length()67     int length();
68 
69     /**
70      * Returns the <code>char</code> value at the specified index.  An index ranges from zero
71      * to <tt>length() - 1</tt>.  The first <code>char</code> value of the sequence is at
72      * index zero, the next at index one, and so on, as for array
73      * indexing.
74      *
75      * <p>If the <code>char</code> value specified by the index is a
76      * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate
77      * value is returned.
78      *
79      * @param   index   the index of the <code>char</code> value to be returned
80      *
81      * @return  the specified <code>char</code> value
82      *
83      * @throws  IndexOutOfBoundsException
84      *          if the <tt>index</tt> argument is negative or not less than
85      *          <tt>length()</tt>
86      */
charAt(int index)87     char charAt(int index);
88 
89     /**
90      * Returns a <code>CharSequence</code> that is a subsequence of this sequence.
91      * The subsequence starts with the <code>char</code> value at the specified index and
92      * ends with the <code>char</code> value at index <tt>end - 1</tt>.  The length
93      * (in <code>char</code>s) of the
94      * returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt>
95      * then an empty sequence is returned.
96      *
97      * @param   start   the start index, inclusive
98      * @param   end     the end index, exclusive
99      *
100      * @return  the specified subsequence
101      *
102      * @throws  IndexOutOfBoundsException
103      *          if <tt>start</tt> or <tt>end</tt> are negative,
104      *          if <tt>end</tt> is greater than <tt>length()</tt>,
105      *          or if <tt>start</tt> is greater than <tt>end</tt>
106      */
subSequence(int start, int end)107     CharSequence subSequence(int start, int end);
108 
109     /**
110      * Returns a string containing the characters in this sequence in the same
111      * order as this sequence.  The length of the string will be the length of
112      * this sequence.
113      *
114      * @return  a string consisting of exactly this sequence of characters
115      */
toString()116     public String toString();
117 
118     /**
119      * Returns a stream of {@code int} zero-extending the {@code char} values
120      * from this sequence.  Any char which maps to a <a
121      * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
122      * point</a> is passed through uninterpreted.
123      *
124      * <p>If the sequence is mutated while the stream is being read, the
125      * result is undefined.
126      *
127      * @return an IntStream of char values from this sequence
128      * @since 1.8
129      */
chars()130     public default IntStream chars() {
131         class CharIterator implements PrimitiveIterator.OfInt {
132             int cur = 0;
133 
134             public boolean hasNext() {
135                 return cur < length();
136             }
137 
138             public int nextInt() {
139                 if (hasNext()) {
140                     return charAt(cur++);
141                 } else {
142                     throw new NoSuchElementException();
143                 }
144             }
145 
146             @Override
147             public void forEachRemaining(IntConsumer block) {
148                 for (; cur < length(); cur++) {
149                     block.accept(charAt(cur));
150                 }
151             }
152         }
153 
154         return StreamSupport.intStream(() ->
155                 Spliterators.spliterator(
156                         new CharIterator(),
157                         length(),
158                         Spliterator.ORDERED),
159                 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED,
160                 false);
161     }
162 
163     /**
164      * Returns a stream of code point values from this sequence.  Any surrogate
165      * pairs encountered in the sequence are combined as if by {@linkplain
166      * Character#toCodePoint Character.toCodePoint} and the result is passed
167      * to the stream. Any other code units, including ordinary BMP characters,
168      * unpaired surrogates, and undefined code units, are zero-extended to
169      * {@code int} values which are then passed to the stream.
170      *
171      * <p>If the sequence is mutated while the stream is being read, the result
172      * is undefined.
173      *
174      * @return an IntStream of Unicode code points from this sequence
175      * @since 1.8
176      */
codePoints()177     public default IntStream codePoints() {
178         class CodePointIterator implements PrimitiveIterator.OfInt {
179             int cur = 0;
180 
181             @Override
182             public void forEachRemaining(IntConsumer block) {
183                 final int length = length();
184                 int i = cur;
185                 try {
186                     while (i < length) {
187                         char c1 = charAt(i++);
188                         if (!Character.isHighSurrogate(c1) || i >= length) {
189                             block.accept(c1);
190                         } else {
191                             char c2 = charAt(i);
192                             if (Character.isLowSurrogate(c2)) {
193                                 i++;
194                                 block.accept(Character.toCodePoint(c1, c2));
195                             } else {
196                                 block.accept(c1);
197                             }
198                         }
199                     }
200                 } finally {
201                     cur = i;
202                 }
203             }
204 
205             public boolean hasNext() {
206                 return cur < length();
207             }
208 
209             public int nextInt() {
210                 final int length = length();
211 
212                 if (cur >= length) {
213                     throw new NoSuchElementException();
214                 }
215                 char c1 = charAt(cur++);
216                 if (Character.isHighSurrogate(c1) && cur < length) {
217                     char c2 = charAt(cur);
218                     if (Character.isLowSurrogate(c2)) {
219                         cur++;
220                         return Character.toCodePoint(c1, c2);
221                     }
222                 }
223                 return c1;
224             }
225         }
226 
227         return StreamSupport.intStream(() ->
228                 Spliterators.spliteratorUnknownSize(
229                         new CodePointIterator(),
230                         Spliterator.ORDERED),
231                 Spliterator.ORDERED,
232                 false);
233     }
234 }
235