1 /*
2  * Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.io;
27 
28 /**
29  * The {@code DataInput} interface provides
30  * for reading bytes from a binary stream and
31  * reconstructing from them data in any of
32  * the Java primitive types. There is also
33  * a
34  * facility for reconstructing a {@code String}
35  * from data in
36  * <a href="#modified-utf-8">modified UTF-8</a>
37  * format.
38  * <p>
39  * It is generally true of all the reading
40  * routines in this interface that if end of
41  * file is reached before the desired number
42  * of bytes has been read, an {@code EOFException}
43  * (which is a kind of {@code IOException})
44  * is thrown. If any byte cannot be read for
45  * any reason other than end of file, an {@code IOException}
46  * other than {@code EOFException} is
47  * thrown. In particular, an {@code IOException}
48  * may be thrown if the input stream has been
49  * closed.
50  *
51  * <h3><a id="modified-utf-8">Modified UTF-8</a></h3>
52  * <p>
53  * Implementations of the DataInput and DataOutput interfaces represent
54  * Unicode strings in a format that is a slight modification of UTF-8.
55  * (For information regarding the standard UTF-8 format, see section
56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
57  * 4.0</i>)
58  *
59  * <ul>
60  * <li>Characters in the range {@code '\u005Cu0001'} to
61  *         {@code '\u005Cu007F'} are represented by a single byte.
62  * <li>The null character {@code '\u005Cu0000'} and characters
63  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
64  *         represented by a pair of bytes.
65  * <li>Characters in the range {@code '\u005Cu0800'}
66  *         to {@code '\u005CuFFFF'} are represented by three bytes.
67  * </ul>
68  *
69  *   <table class="plain" style="margin-left:2em;">
70  *     <caption>Encoding of UTF-8 values</caption>
71  *     <thead>
72  *     <tr>
73  *       <th scope="col" rowspan="2">Value</th>
74  *       <th scope="col" rowspan="2">Byte</th>
75  *       <th scope="col" colspan="8" id="bit_a">Bit Values</th>
76  *     </tr>
77  *     <tr>
78  *       <!-- Value -->
79  *       <!-- Byte -->
80  *       <th scope="col" style="width:3em"> 7 </th>
81  *       <th scope="col" style="width:3em"> 6 </th>
82  *       <th scope="col" style="width:3em"> 5 </th>
83  *       <th scope="col" style="width:3em"> 4 </th>
84  *       <th scope="col" style="width:3em"> 3 </th>
85  *       <th scope="col" style="width:3em"> 2 </th>
86  *       <th scope="col" style="width:3em"> 1 </th>
87  *       <th scope="col" style="width:3em"> 0 </th>
88  *     </thead>
89  *     <tbody>
90  *     <tr>
91  *       <th scope="row" style="text-align:left; font-weight:normal">
92  *         {@code \u005Cu0001} to {@code \u005Cu007F} </th>
93  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
94  *       <td style="text-align:center">0
95  *       <td colspan="7" style="text-align:right; padding-right:6em">bits 6-0
96  *     </tr>
97  *     <tr>
98  *       <th scope="row" rowspan="2" style="text-align:left; font-weight:normal">
99  *           {@code \u005Cu0000},<br>
100  *           {@code \u005Cu0080} to {@code \u005Cu07FF} </th>
101  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
102  *       <td style="text-align:center">1
103  *       <td style="text-align:center">1
104  *       <td style="text-align:center">0
105  *       <td colspan="5" style="text-align:right; padding-right:6em">bits 10-6
106  *     </tr>
107  *     <tr>
108  *       <!-- (value) -->
109  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
110  *       <td style="text-align:center">1
111  *       <td style="text-align:center">0
112  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
113  *     </tr>
114  *     <tr>
115  *       <th scope="row" rowspan="3" style="text-align:left; font-weight:normal">
116  *         {@code \u005Cu0800} to {@code \u005CuFFFF} </th>
117  *       <th scope="row" style="font-weight:normal; text-align:center"> 1 </th>
118  *       <td style="text-align:center">1
119  *       <td style="text-align:center">1
120  *       <td style="text-align:center">1
121  *       <td style="text-align:center">0
122  *       <td colspan="4" style="text-align:right; padding-right:6em">bits 15-12
123  *     </tr>
124  *     <tr>
125  *       <!-- (value) -->
126  *       <th scope="row" style="font-weight:normal; text-align:center"> 2 </th>
127  *       <td style="text-align:center">1
128  *       <td style="text-align:center">0
129  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 11-6
130  *     </tr>
131  *     <tr>
132  *       <!-- (value) -->
133  *       <th scope="row" style="font-weight:normal; text-align:center"> 3 </th>
134  *       <td style="text-align:center">1
135  *       <td style="text-align:center">0
136  *       <td colspan="6" style="text-align:right; padding-right:6em">bits 5-0
137  *     </tr>
138  *     </tbody>
139  *   </table>
140  *
141  * <p>
142  * The differences between this format and the
143  * standard UTF-8 format are the following:
144  * <ul>
145  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
146  *     rather than 1-byte, so that the encoded strings never have
147  *     embedded nulls.
148  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
149  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
150  *     are represented in the form of surrogate pairs.
151  * </ul>
152  * @author  Frank Yellin
153  * @see     java.io.DataInputStream
154  * @see     java.io.DataOutput
155  * @since   1.0
156  */
157 public
158 interface DataInput {
159     /**
160      * Reads some bytes from an input
161      * stream and stores them into the buffer
162      * array {@code b}. The number of bytes
163      * read is equal
164      * to the length of {@code b}.
165      * <p>
166      * This method blocks until one of the
167      * following conditions occurs:
168      * <ul>
169      * <li>{@code b.length}
170      * bytes of input data are available, in which
171      * case a normal return is made.
172      *
173      * <li>End of
174      * file is detected, in which case an {@code EOFException}
175      * is thrown.
176      *
177      * <li>An I/O error occurs, in
178      * which case an {@code IOException} other
179      * than {@code EOFException} is thrown.
180      * </ul>
181      * <p>
182      * If {@code b} is {@code null},
183      * a {@code NullPointerException} is thrown.
184      * If {@code b.length} is zero, then
185      * no bytes are read. Otherwise, the first
186      * byte read is stored into element {@code b[0]},
187      * the next one into {@code b[1]}, and
188      * so on.
189      * If an exception is thrown from
190      * this method, then it may be that some but
191      * not all bytes of {@code b} have been
192      * updated with data from the input stream.
193      *
194      * @param   b   the buffer into which the data is read.
195      * @throws  NullPointerException if {@code b} is {@code null}.
196      * @throws  EOFException  if this stream reaches the end before reading
197      *          all the bytes.
198      * @throws  IOException   if an I/O error occurs.
199      */
readFully(byte b[])200     void readFully(byte b[]) throws IOException;
201 
202     /**
203      *
204      * Reads {@code len}
205      * bytes from
206      * an input stream.
207      * <p>
208      * This method
209      * blocks until one of the following conditions
210      * occurs:
211      * <ul>
212      * <li>{@code len} bytes
213      * of input data are available, in which case
214      * a normal return is made.
215      *
216      * <li>End of file
217      * is detected, in which case an {@code EOFException}
218      * is thrown.
219      *
220      * <li>An I/O error occurs, in
221      * which case an {@code IOException} other
222      * than {@code EOFException} is thrown.
223      * </ul>
224      * <p>
225      * If {@code b} is {@code null},
226      * a {@code NullPointerException} is thrown.
227      * If {@code off} is negative, or {@code len}
228      * is negative, or {@code off+len} is
229      * greater than the length of the array {@code b},
230      * then an {@code IndexOutOfBoundsException}
231      * is thrown.
232      * If {@code len} is zero,
233      * then no bytes are read. Otherwise, the first
234      * byte read is stored into element {@code b[off]},
235      * the next one into {@code b[off+1]},
236      * and so on. The number of bytes read is,
237      * at most, equal to {@code len}.
238      *
239      * @param   b    the buffer into which the data is read.
240      * @param   off  an int specifying the offset in the data array {@code b}.
241      * @param   len  an int specifying the number of bytes to read.
242      * @throws  NullPointerException if {@code b} is {@code null}.
243      * @throws  IndexOutOfBoundsException if {@code off} is negative,
244      *          {@code len} is negative, or {@code len} is greater than
245      *          {@code b.length - off}.
246      * @throws  EOFException  if this stream reaches the end before reading
247      *          all the bytes.
248      * @throws  IOException   if an I/O error occurs.
249      */
readFully(byte b[], int off, int len)250     void readFully(byte b[], int off, int len) throws IOException;
251 
252     /**
253      * Makes an attempt to skip over
254      * {@code n} bytes
255      * of data from the input
256      * stream, discarding the skipped bytes. However,
257      * it may skip
258      * over some smaller number of
259      * bytes, possibly zero. This may result from
260      * any of a
261      * number of conditions; reaching
262      * end of file before {@code n} bytes
263      * have been skipped is
264      * only one possibility.
265      * This method never throws an {@code EOFException}.
266      * The actual
267      * number of bytes skipped is returned.
268      *
269      * @param      n   the number of bytes to be skipped.
270      * @return     the number of bytes actually skipped.
271      * @exception  IOException   if an I/O error occurs.
272      */
skipBytes(int n)273     int skipBytes(int n) throws IOException;
274 
275     /**
276      * Reads one input byte and returns
277      * {@code true} if that byte is nonzero,
278      * {@code false} if that byte is zero.
279      * This method is suitable for reading
280      * the byte written by the {@code writeBoolean}
281      * method of interface {@code DataOutput}.
282      *
283      * @return     the {@code boolean} value read.
284      * @exception  EOFException  if this stream reaches the end before reading
285      *               all the bytes.
286      * @exception  IOException   if an I/O error occurs.
287      */
readBoolean()288     boolean readBoolean() throws IOException;
289 
290     /**
291      * Reads and returns one input byte.
292      * The byte is treated as a signed value in
293      * the range {@code -128} through {@code 127},
294      * inclusive.
295      * This method is suitable for
296      * reading the byte written by the {@code writeByte}
297      * method of interface {@code DataOutput}.
298      *
299      * @return     the 8-bit value read.
300      * @exception  EOFException  if this stream reaches the end before reading
301      *               all the bytes.
302      * @exception  IOException   if an I/O error occurs.
303      */
readByte()304     byte readByte() throws IOException;
305 
306     /**
307      * Reads one input byte, zero-extends
308      * it to type {@code int}, and returns
309      * the result, which is therefore in the range
310      * {@code 0}
311      * through {@code 255}.
312      * This method is suitable for reading
313      * the byte written by the {@code writeByte}
314      * method of interface {@code DataOutput}
315      * if the argument to {@code writeByte}
316      * was intended to be a value in the range
317      * {@code 0} through {@code 255}.
318      *
319      * @return     the unsigned 8-bit value read.
320      * @exception  EOFException  if this stream reaches the end before reading
321      *               all the bytes.
322      * @exception  IOException   if an I/O error occurs.
323      */
readUnsignedByte()324     int readUnsignedByte() throws IOException;
325 
326     /**
327      * Reads two input bytes and returns
328      * a {@code short} value. Let {@code a}
329      * be the first byte read and {@code b}
330      * be the second byte. The value
331      * returned
332      * is:
333      * <pre>{@code (short)((a << 8) | (b & 0xff))
334      * }</pre>
335      * This method
336      * is suitable for reading the bytes written
337      * by the {@code writeShort} method of
338      * interface {@code DataOutput}.
339      *
340      * @return     the 16-bit value read.
341      * @exception  EOFException  if this stream reaches the end before reading
342      *               all the bytes.
343      * @exception  IOException   if an I/O error occurs.
344      */
readShort()345     short readShort() throws IOException;
346 
347     /**
348      * Reads two input bytes and returns
349      * an {@code int} value in the range {@code 0}
350      * through {@code 65535}. Let {@code a}
351      * be the first byte read and
352      * {@code b}
353      * be the second byte. The value returned is:
354      * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
355      * }</pre>
356      * This method is suitable for reading the bytes
357      * written by the {@code writeShort} method
358      * of interface {@code DataOutput}  if
359      * the argument to {@code writeShort}
360      * was intended to be a value in the range
361      * {@code 0} through {@code 65535}.
362      *
363      * @return     the unsigned 16-bit value read.
364      * @exception  EOFException  if this stream reaches the end before reading
365      *               all the bytes.
366      * @exception  IOException   if an I/O error occurs.
367      */
readUnsignedShort()368     int readUnsignedShort() throws IOException;
369 
370     /**
371      * Reads two input bytes and returns a {@code char} value.
372      * Let {@code a}
373      * be the first byte read and {@code b}
374      * be the second byte. The value
375      * returned is:
376      * <pre>{@code (char)((a << 8) | (b & 0xff))
377      * }</pre>
378      * This method
379      * is suitable for reading bytes written by
380      * the {@code writeChar} method of interface
381      * {@code DataOutput}.
382      *
383      * @return     the {@code char} value read.
384      * @exception  EOFException  if this stream reaches the end before reading
385      *               all the bytes.
386      * @exception  IOException   if an I/O error occurs.
387      */
readChar()388     char readChar() throws IOException;
389 
390     /**
391      * Reads four input bytes and returns an
392      * {@code int} value. Let {@code a-d}
393      * be the first through fourth bytes read. The value returned is:
394      * <pre>{@code
395      * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
396      *  ((c & 0xff) <<  8) | (d & 0xff))
397      * }</pre>
398      * This method is suitable
399      * for reading bytes written by the {@code writeInt}
400      * method of interface {@code DataOutput}.
401      *
402      * @return     the {@code int} value read.
403      * @exception  EOFException  if this stream reaches the end before reading
404      *               all the bytes.
405      * @exception  IOException   if an I/O error occurs.
406      */
readInt()407     int readInt() throws IOException;
408 
409     /**
410      * Reads eight input bytes and returns
411      * a {@code long} value. Let {@code a-h}
412      * be the first through eighth bytes read.
413      * The value returned is:
414      * <pre>{@code
415      * (((long)(a & 0xff) << 56) |
416      *  ((long)(b & 0xff) << 48) |
417      *  ((long)(c & 0xff) << 40) |
418      *  ((long)(d & 0xff) << 32) |
419      *  ((long)(e & 0xff) << 24) |
420      *  ((long)(f & 0xff) << 16) |
421      *  ((long)(g & 0xff) <<  8) |
422      *  ((long)(h & 0xff)))
423      * }</pre>
424      * <p>
425      * This method is suitable
426      * for reading bytes written by the {@code writeLong}
427      * method of interface {@code DataOutput}.
428      *
429      * @return     the {@code long} value read.
430      * @exception  EOFException  if this stream reaches the end before reading
431      *               all the bytes.
432      * @exception  IOException   if an I/O error occurs.
433      */
readLong()434     long readLong() throws IOException;
435 
436     /**
437      * Reads four input bytes and returns
438      * a {@code float} value. It does this
439      * by first constructing an {@code int}
440      * value in exactly the manner
441      * of the {@code readInt}
442      * method, then converting this {@code int}
443      * value to a {@code float} in
444      * exactly the manner of the method {@code Float.intBitsToFloat}.
445      * This method is suitable for reading
446      * bytes written by the {@code writeFloat}
447      * method of interface {@code DataOutput}.
448      *
449      * @return     the {@code float} value read.
450      * @exception  EOFException  if this stream reaches the end before reading
451      *               all the bytes.
452      * @exception  IOException   if an I/O error occurs.
453      */
readFloat()454     float readFloat() throws IOException;
455 
456     /**
457      * Reads eight input bytes and returns
458      * a {@code double} value. It does this
459      * by first constructing a {@code long}
460      * value in exactly the manner
461      * of the {@code readLong}
462      * method, then converting this {@code long}
463      * value to a {@code double} in exactly
464      * the manner of the method {@code Double.longBitsToDouble}.
465      * This method is suitable for reading
466      * bytes written by the {@code writeDouble}
467      * method of interface {@code DataOutput}.
468      *
469      * @return     the {@code double} value read.
470      * @exception  EOFException  if this stream reaches the end before reading
471      *               all the bytes.
472      * @exception  IOException   if an I/O error occurs.
473      */
readDouble()474     double readDouble() throws IOException;
475 
476     /**
477      * Reads the next line of text from the input stream.
478      * It reads successive bytes, converting
479      * each byte separately into a character,
480      * until it encounters a line terminator or
481      * end of
482      * file; the characters read are then
483      * returned as a {@code String}. Note
484      * that because this
485      * method processes bytes,
486      * it does not support input of the full Unicode
487      * character set.
488      * <p>
489      * If end of file is encountered
490      * before even one byte can be read, then {@code null}
491      * is returned. Otherwise, each byte that is
492      * read is converted to type {@code char}
493      * by zero-extension. If the character {@code '\n'}
494      * is encountered, it is discarded and reading
495      * ceases. If the character {@code '\r'}
496      * is encountered, it is discarded and, if
497      * the following byte converts &#32;to the
498      * character {@code '\n'}, then that is
499      * discarded also; reading then ceases. If
500      * end of file is encountered before either
501      * of the characters {@code '\n'} and
502      * {@code '\r'} is encountered, reading
503      * ceases. Once reading has ceased, a {@code String}
504      * is returned that contains all the characters
505      * read and not discarded, taken in order.
506      * Note that every character in this string
507      * will have a value less than {@code \u005Cu0100},
508      * that is, {@code (char)256}.
509      *
510      * @return the next line of text from the input stream,
511      *         or {@code null} if the end of file is
512      *         encountered before a byte can be read.
513      * @exception  IOException  if an I/O error occurs.
514      */
readLine()515     String readLine() throws IOException;
516 
517     /**
518      * Reads in a string that has been encoded using a
519      * <a href="#modified-utf-8">modified UTF-8</a>
520      * format.
521      * The general contract of {@code readUTF}
522      * is that it reads a representation of a Unicode
523      * character string encoded in modified
524      * UTF-8 format; this string of characters
525      * is then returned as a {@code String}.
526      * <p>
527      * First, two bytes are read and used to
528      * construct an unsigned 16-bit integer in
529      * exactly the manner of the {@code readUnsignedShort}
530      * method . This integer value is called the
531      * <i>UTF length</i> and specifies the number
532      * of additional bytes to be read. These bytes
533      * are then converted to characters by considering
534      * them in groups. The length of each group
535      * is computed from the value of the first
536      * byte of the group. The byte following a
537      * group, if any, is the first byte of the
538      * next group.
539      * <p>
540      * If the first byte of a group
541      * matches the bit pattern {@code 0xxxxxxx}
542      * (where {@code x} means "may be {@code 0}
543      * or {@code 1}"), then the group consists
544      * of just that byte. The byte is zero-extended
545      * to form a character.
546      * <p>
547      * If the first byte
548      * of a group matches the bit pattern {@code 110xxxxx},
549      * then the group consists of that byte {@code a}
550      * and a second byte {@code b}. If there
551      * is no byte {@code b} (because byte
552      * {@code a} was the last of the bytes
553      * to be read), or if byte {@code b} does
554      * not match the bit pattern {@code 10xxxxxx},
555      * then a {@code UTFDataFormatException}
556      * is thrown. Otherwise, the group is converted
557      * to the character:
558      * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
559      * }</pre>
560      * If the first byte of a group
561      * matches the bit pattern {@code 1110xxxx},
562      * then the group consists of that byte {@code a}
563      * and two more bytes {@code b} and {@code c}.
564      * If there is no byte {@code c} (because
565      * byte {@code a} was one of the last
566      * two of the bytes to be read), or either
567      * byte {@code b} or byte {@code c}
568      * does not match the bit pattern {@code 10xxxxxx},
569      * then a {@code UTFDataFormatException}
570      * is thrown. Otherwise, the group is converted
571      * to the character:
572      * <pre>{@code
573      * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
574      * }</pre>
575      * If the first byte of a group matches the
576      * pattern {@code 1111xxxx} or the pattern
577      * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
578      * is thrown.
579      * <p>
580      * If end of file is encountered
581      * at any time during this entire process,
582      * then an {@code EOFException} is thrown.
583      * <p>
584      * After every group has been converted to
585      * a character by this process, the characters
586      * are gathered, in the same order in which
587      * their corresponding groups were read from
588      * the input stream, to form a {@code String},
589      * which is returned.
590      * <p>
591      * The {@code writeUTF}
592      * method of interface {@code DataOutput}
593      * may be used to write data that is suitable
594      * for reading by this method.
595      * @return     a Unicode string.
596      * @exception  EOFException            if this stream reaches the end
597      *               before reading all the bytes.
598      * @exception  IOException             if an I/O error occurs.
599      * @exception  UTFDataFormatException  if the bytes do not represent a
600      *               valid modified UTF-8 encoding of a string.
601      */
readUTF()602     String readUTF() throws IOException;
603 }
604