1 /*
2  * Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.io;
27 
28 /**
29  * The {@code DataInput} interface provides
30  * for reading bytes from a binary stream and
31  * reconstructing from them data in any of
32  * the Java primitive types. There is also
33  * a
34  * facility for reconstructing a {@code String}
35  * from data in
36  * <a href="#modified-utf-8">modified UTF-8</a>
37  * format.
38  * <p>
39  * It is generally true of all the reading
40  * routines in this interface that if end of
41  * file is reached before the desired number
42  * of bytes has been read, an {@code EOFException}
43  * (which is a kind of {@code IOException})
44  * is thrown. If any byte cannot be read for
45  * any reason other than end of file, an {@code IOException}
46  * other than {@code EOFException} is
47  * thrown. In particular, an {@code IOException}
48  * may be thrown if the input stream has been
49  * closed.
50  *
51  * <h3><a name="modified-utf-8">Modified UTF-8</a></h3>
52  * <p>
53  * Implementations of the DataInput and DataOutput interfaces represent
54  * Unicode strings in a format that is a slight modification of UTF-8.
55  * (For information regarding the standard UTF-8 format, see section
56  * <i>3.9 Unicode Encoding Forms</i> of <i>The Unicode Standard, Version
57  * 4.0</i>).
58  * Note that in the following table, the most significant bit appears in the
59  * far left-hand column.
60  *
61  * <blockquote>
62  *   <table border="1" cellspacing="0" cellpadding="8"
63  *          summary="Bit values and bytes">
64  *     <tr>
65  *       <th colspan="9"><span style="font-weight:normal">
66  *         All characters in the range {@code '\u005Cu0001'} to
67  *         {@code '\u005Cu007F'} are represented by a single byte:</span></th>
68  *     </tr>
69  *     <tr>
70  *       <td></td>
71  *       <th colspan="8" id="bit_a">Bit Values</th>
72  *     </tr>
73  *     <tr>
74  *       <th id="byte1_a">Byte 1</th>
75  *       <td><center>0</center>
76  *       <td colspan="7"><center>bits 6-0</center>
77  *     </tr>
78  *     <tr>
79  *       <th colspan="9"><span style="font-weight:normal">
80  *         The null character {@code '\u005Cu0000'} and characters
81  *         in the range {@code '\u005Cu0080'} to {@code '\u005Cu07FF'} are
82  *         represented by a pair of bytes:</span></th>
83  *     </tr>
84  *     <tr>
85  *       <td></td>
86  *       <th colspan="8" id="bit_b">Bit Values</th>
87  *     </tr>
88  *     <tr>
89  *       <th id="byte1_b">Byte 1</th>
90  *       <td><center>1</center>
91  *       <td><center>1</center>
92  *       <td><center>0</center>
93  *       <td colspan="5"><center>bits 10-6</center>
94  *     </tr>
95  *     <tr>
96  *       <th id="byte2_a">Byte 2</th>
97  *       <td><center>1</center>
98  *       <td><center>0</center>
99  *       <td colspan="6"><center>bits 5-0</center>
100  *     </tr>
101  *     <tr>
102  *       <th colspan="9"><span style="font-weight:normal">
103  *         {@code char} values in the range {@code '\u005Cu0800'}
104  *         to {@code '\u005CuFFFF'} are represented by three bytes:</span></th>
105  *     </tr>
106  *     <tr>
107  *       <td></td>
108  *       <th colspan="8"id="bit_c">Bit Values</th>
109  *     </tr>
110  *     <tr>
111  *       <th id="byte1_c">Byte 1</th>
112  *       <td><center>1</center>
113  *       <td><center>1</center>
114  *       <td><center>1</center>
115  *       <td><center>0</center>
116  *       <td colspan="4"><center>bits 15-12</center>
117  *     </tr>
118  *     <tr>
119  *       <th id="byte2_b">Byte 2</th>
120  *       <td><center>1</center>
121  *       <td><center>0</center>
122  *       <td colspan="6"><center>bits 11-6</center>
123  *     </tr>
124  *     <tr>
125  *       <th id="byte3">Byte 3</th>
126  *       <td><center>1</center>
127  *       <td><center>0</center>
128  *       <td colspan="6"><center>bits 5-0</center>
129  *     </tr>
130  *   </table>
131  * </blockquote>
132  * <p>
133  * The differences between this format and the
134  * standard UTF-8 format are the following:
135  * <ul>
136  * <li>The null byte {@code '\u005Cu0000'} is encoded in 2-byte format
137  *     rather than 1-byte, so that the encoded strings never have
138  *     embedded nulls.
139  * <li>Only the 1-byte, 2-byte, and 3-byte formats are used.
140  * <li><a href="../lang/Character.html#unicode">Supplementary characters</a>
141  *     are represented in the form of surrogate pairs.
142  * </ul>
143  * @author  Frank Yellin
144  * @see     java.io.DataInputStream
145  * @see     java.io.DataOutput
146  * @since   JDK1.0
147  */
148 public
149 interface DataInput {
150     /**
151      * Reads some bytes from an input
152      * stream and stores them into the buffer
153      * array {@code b}. The number of bytes
154      * read is equal
155      * to the length of {@code b}.
156      * <p>
157      * This method blocks until one of the
158      * following conditions occurs:
159      * <ul>
160      * <li>{@code b.length}
161      * bytes of input data are available, in which
162      * case a normal return is made.
163      *
164      * <li>End of
165      * file is detected, in which case an {@code EOFException}
166      * is thrown.
167      *
168      * <li>An I/O error occurs, in
169      * which case an {@code IOException} other
170      * than {@code EOFException} is thrown.
171      * </ul>
172      * <p>
173      * If {@code b} is {@code null},
174      * a {@code NullPointerException} is thrown.
175      * If {@code b.length} is zero, then
176      * no bytes are read. Otherwise, the first
177      * byte read is stored into element {@code b[0]},
178      * the next one into {@code b[1]}, and
179      * so on.
180      * If an exception is thrown from
181      * this method, then it may be that some but
182      * not all bytes of {@code b} have been
183      * updated with data from the input stream.
184      *
185      * @param     b   the buffer into which the data is read.
186      * @exception  EOFException  if this stream reaches the end before reading
187      *               all the bytes.
188      * @exception  IOException   if an I/O error occurs.
189      */
readFully(byte b[])190     void readFully(byte b[]) throws IOException;
191 
192     /**
193      *
194      * Reads {@code len}
195      * bytes from
196      * an input stream.
197      * <p>
198      * This method
199      * blocks until one of the following conditions
200      * occurs:
201      * <ul>
202      * <li>{@code len} bytes
203      * of input data are available, in which case
204      * a normal return is made.
205      *
206      * <li>End of file
207      * is detected, in which case an {@code EOFException}
208      * is thrown.
209      *
210      * <li>An I/O error occurs, in
211      * which case an {@code IOException} other
212      * than {@code EOFException} is thrown.
213      * </ul>
214      * <p>
215      * If {@code b} is {@code null},
216      * a {@code NullPointerException} is thrown.
217      * If {@code off} is negative, or {@code len}
218      * is negative, or {@code off+len} is
219      * greater than the length of the array {@code b},
220      * then an {@code IndexOutOfBoundsException}
221      * is thrown.
222      * If {@code len} is zero,
223      * then no bytes are read. Otherwise, the first
224      * byte read is stored into element {@code b[off]},
225      * the next one into {@code b[off+1]},
226      * and so on. The number of bytes read is,
227      * at most, equal to {@code len}.
228      *
229      * @param     b   the buffer into which the data is read.
230      * @param off  an int specifying the offset into the data.
231      * @param len  an int specifying the number of bytes to read.
232      * @exception  EOFException  if this stream reaches the end before reading
233      *               all the bytes.
234      * @exception  IOException   if an I/O error occurs.
235      */
readFully(byte b[], int off, int len)236     void readFully(byte b[], int off, int len) throws IOException;
237 
238     /**
239      * Makes an attempt to skip over
240      * {@code n} bytes
241      * of data from the input
242      * stream, discarding the skipped bytes. However,
243      * it may skip
244      * over some smaller number of
245      * bytes, possibly zero. This may result from
246      * any of a
247      * number of conditions; reaching
248      * end of file before {@code n} bytes
249      * have been skipped is
250      * only one possibility.
251      * This method never throws an {@code EOFException}.
252      * The actual
253      * number of bytes skipped is returned.
254      *
255      * @param      n   the number of bytes to be skipped.
256      * @return     the number of bytes actually skipped.
257      * @exception  IOException   if an I/O error occurs.
258      */
skipBytes(int n)259     int skipBytes(int n) throws IOException;
260 
261     /**
262      * Reads one input byte and returns
263      * {@code true} if that byte is nonzero,
264      * {@code false} if that byte is zero.
265      * This method is suitable for reading
266      * the byte written by the {@code writeBoolean}
267      * method of interface {@code DataOutput}.
268      *
269      * @return     the {@code boolean} value read.
270      * @exception  EOFException  if this stream reaches the end before reading
271      *               all the bytes.
272      * @exception  IOException   if an I/O error occurs.
273      */
readBoolean()274     boolean readBoolean() throws IOException;
275 
276     /**
277      * Reads and returns one input byte.
278      * The byte is treated as a signed value in
279      * the range {@code -128} through {@code 127},
280      * inclusive.
281      * This method is suitable for
282      * reading the byte written by the {@code writeByte}
283      * method of interface {@code DataOutput}.
284      *
285      * @return     the 8-bit value read.
286      * @exception  EOFException  if this stream reaches the end before reading
287      *               all the bytes.
288      * @exception  IOException   if an I/O error occurs.
289      */
readByte()290     byte readByte() throws IOException;
291 
292     /**
293      * Reads one input byte, zero-extends
294      * it to type {@code int}, and returns
295      * the result, which is therefore in the range
296      * {@code 0}
297      * through {@code 255}.
298      * This method is suitable for reading
299      * the byte written by the {@code writeByte}
300      * method of interface {@code DataOutput}
301      * if the argument to {@code writeByte}
302      * was intended to be a value in the range
303      * {@code 0} through {@code 255}.
304      *
305      * @return     the unsigned 8-bit value read.
306      * @exception  EOFException  if this stream reaches the end before reading
307      *               all the bytes.
308      * @exception  IOException   if an I/O error occurs.
309      */
readUnsignedByte()310     int readUnsignedByte() throws IOException;
311 
312     /**
313      * Reads two input bytes and returns
314      * a {@code short} value. Let {@code a}
315      * be the first byte read and {@code b}
316      * be the second byte. The value
317      * returned
318      * is:
319      * <pre>{@code (short)((a << 8) | (b & 0xff))
320      * }</pre>
321      * This method
322      * is suitable for reading the bytes written
323      * by the {@code writeShort} method of
324      * interface {@code DataOutput}.
325      *
326      * @return     the 16-bit value read.
327      * @exception  EOFException  if this stream reaches the end before reading
328      *               all the bytes.
329      * @exception  IOException   if an I/O error occurs.
330      */
readShort()331     short readShort() throws IOException;
332 
333     /**
334      * Reads two input bytes and returns
335      * an {@code int} value in the range {@code 0}
336      * through {@code 65535}. Let {@code a}
337      * be the first byte read and
338      * {@code b}
339      * be the second byte. The value returned is:
340      * <pre>{@code (((a & 0xff) << 8) | (b & 0xff))
341      * }</pre>
342      * This method is suitable for reading the bytes
343      * written by the {@code writeShort} method
344      * of interface {@code DataOutput}  if
345      * the argument to {@code writeShort}
346      * was intended to be a value in the range
347      * {@code 0} through {@code 65535}.
348      *
349      * @return     the unsigned 16-bit value read.
350      * @exception  EOFException  if this stream reaches the end before reading
351      *               all the bytes.
352      * @exception  IOException   if an I/O error occurs.
353      */
readUnsignedShort()354     int readUnsignedShort() throws IOException;
355 
356     /**
357      * Reads two input bytes and returns a {@code char} value.
358      * Let {@code a}
359      * be the first byte read and {@code b}
360      * be the second byte. The value
361      * returned is:
362      * <pre>{@code (char)((a << 8) | (b & 0xff))
363      * }</pre>
364      * This method
365      * is suitable for reading bytes written by
366      * the {@code writeChar} method of interface
367      * {@code DataOutput}.
368      *
369      * @return     the {@code char} value read.
370      * @exception  EOFException  if this stream reaches the end before reading
371      *               all the bytes.
372      * @exception  IOException   if an I/O error occurs.
373      */
readChar()374     char readChar() throws IOException;
375 
376     /**
377      * Reads four input bytes and returns an
378      * {@code int} value. Let {@code a-d}
379      * be the first through fourth bytes read. The value returned is:
380      * <pre>{@code
381      * (((a & 0xff) << 24) | ((b & 0xff) << 16) |
382      *  ((c & 0xff) <<  8) | (d & 0xff))
383      * }</pre>
384      * This method is suitable
385      * for reading bytes written by the {@code writeInt}
386      * method of interface {@code DataOutput}.
387      *
388      * @return     the {@code int} value read.
389      * @exception  EOFException  if this stream reaches the end before reading
390      *               all the bytes.
391      * @exception  IOException   if an I/O error occurs.
392      */
readInt()393     int readInt() throws IOException;
394 
395     /**
396      * Reads eight input bytes and returns
397      * a {@code long} value. Let {@code a-h}
398      * be the first through eighth bytes read.
399      * The value returned is:
400      * <pre>{@code
401      * (((long)(a & 0xff) << 56) |
402      *  ((long)(b & 0xff) << 48) |
403      *  ((long)(c & 0xff) << 40) |
404      *  ((long)(d & 0xff) << 32) |
405      *  ((long)(e & 0xff) << 24) |
406      *  ((long)(f & 0xff) << 16) |
407      *  ((long)(g & 0xff) <<  8) |
408      *  ((long)(h & 0xff)))
409      * }</pre>
410      * <p>
411      * This method is suitable
412      * for reading bytes written by the {@code writeLong}
413      * method of interface {@code DataOutput}.
414      *
415      * @return     the {@code long} value read.
416      * @exception  EOFException  if this stream reaches the end before reading
417      *               all the bytes.
418      * @exception  IOException   if an I/O error occurs.
419      */
readLong()420     long readLong() throws IOException;
421 
422     /**
423      * Reads four input bytes and returns
424      * a {@code float} value. It does this
425      * by first constructing an {@code int}
426      * value in exactly the manner
427      * of the {@code readInt}
428      * method, then converting this {@code int}
429      * value to a {@code float} in
430      * exactly the manner of the method {@code Float.intBitsToFloat}.
431      * This method is suitable for reading
432      * bytes written by the {@code writeFloat}
433      * method of interface {@code DataOutput}.
434      *
435      * @return     the {@code float} value read.
436      * @exception  EOFException  if this stream reaches the end before reading
437      *               all the bytes.
438      * @exception  IOException   if an I/O error occurs.
439      */
readFloat()440     float readFloat() throws IOException;
441 
442     /**
443      * Reads eight input bytes and returns
444      * a {@code double} value. It does this
445      * by first constructing a {@code long}
446      * value in exactly the manner
447      * of the {@code readLong}
448      * method, then converting this {@code long}
449      * value to a {@code double} in exactly
450      * the manner of the method {@code Double.longBitsToDouble}.
451      * This method is suitable for reading
452      * bytes written by the {@code writeDouble}
453      * method of interface {@code DataOutput}.
454      *
455      * @return     the {@code double} value read.
456      * @exception  EOFException  if this stream reaches the end before reading
457      *               all the bytes.
458      * @exception  IOException   if an I/O error occurs.
459      */
readDouble()460     double readDouble() throws IOException;
461 
462     /**
463      * Reads the next line of text from the input stream.
464      * It reads successive bytes, converting
465      * each byte separately into a character,
466      * until it encounters a line terminator or
467      * end of
468      * file; the characters read are then
469      * returned as a {@code String}. Note
470      * that because this
471      * method processes bytes,
472      * it does not support input of the full Unicode
473      * character set.
474      * <p>
475      * If end of file is encountered
476      * before even one byte can be read, then {@code null}
477      * is returned. Otherwise, each byte that is
478      * read is converted to type {@code char}
479      * by zero-extension. If the character {@code '\n'}
480      * is encountered, it is discarded and reading
481      * ceases. If the character {@code '\r'}
482      * is encountered, it is discarded and, if
483      * the following byte converts &#32;to the
484      * character {@code '\n'}, then that is
485      * discarded also; reading then ceases. If
486      * end of file is encountered before either
487      * of the characters {@code '\n'} and
488      * {@code '\r'} is encountered, reading
489      * ceases. Once reading has ceased, a {@code String}
490      * is returned that contains all the characters
491      * read and not discarded, taken in order.
492      * Note that every character in this string
493      * will have a value less than {@code \u005Cu0100},
494      * that is, {@code (char)256}.
495      *
496      * @return the next line of text from the input stream,
497      *         or {@code null} if the end of file is
498      *         encountered before a byte can be read.
499      * @exception  IOException  if an I/O error occurs.
500      */
readLine()501     String readLine() throws IOException;
502 
503     /**
504      * Reads in a string that has been encoded using a
505      * <a href="#modified-utf-8">modified UTF-8</a>
506      * format.
507      * The general contract of {@code readUTF}
508      * is that it reads a representation of a Unicode
509      * character string encoded in modified
510      * UTF-8 format; this string of characters
511      * is then returned as a {@code String}.
512      * <p>
513      * First, two bytes are read and used to
514      * construct an unsigned 16-bit integer in
515      * exactly the manner of the {@code readUnsignedShort}
516      * method . This integer value is called the
517      * <i>UTF length</i> and specifies the number
518      * of additional bytes to be read. These bytes
519      * are then converted to characters by considering
520      * them in groups. The length of each group
521      * is computed from the value of the first
522      * byte of the group. The byte following a
523      * group, if any, is the first byte of the
524      * next group.
525      * <p>
526      * If the first byte of a group
527      * matches the bit pattern {@code 0xxxxxxx}
528      * (where {@code x} means "may be {@code 0}
529      * or {@code 1}"), then the group consists
530      * of just that byte. The byte is zero-extended
531      * to form a character.
532      * <p>
533      * If the first byte
534      * of a group matches the bit pattern {@code 110xxxxx},
535      * then the group consists of that byte {@code a}
536      * and a second byte {@code b}. If there
537      * is no byte {@code b} (because byte
538      * {@code a} was the last of the bytes
539      * to be read), or if byte {@code b} does
540      * not match the bit pattern {@code 10xxxxxx},
541      * then a {@code UTFDataFormatException}
542      * is thrown. Otherwise, the group is converted
543      * to the character:
544      * <pre>{@code (char)(((a & 0x1F) << 6) | (b & 0x3F))
545      * }</pre>
546      * If the first byte of a group
547      * matches the bit pattern {@code 1110xxxx},
548      * then the group consists of that byte {@code a}
549      * and two more bytes {@code b} and {@code c}.
550      * If there is no byte {@code c} (because
551      * byte {@code a} was one of the last
552      * two of the bytes to be read), or either
553      * byte {@code b} or byte {@code c}
554      * does not match the bit pattern {@code 10xxxxxx},
555      * then a {@code UTFDataFormatException}
556      * is thrown. Otherwise, the group is converted
557      * to the character:
558      * <pre>{@code
559      * (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F))
560      * }</pre>
561      * If the first byte of a group matches the
562      * pattern {@code 1111xxxx} or the pattern
563      * {@code 10xxxxxx}, then a {@code UTFDataFormatException}
564      * is thrown.
565      * <p>
566      * If end of file is encountered
567      * at any time during this entire process,
568      * then an {@code EOFException} is thrown.
569      * <p>
570      * After every group has been converted to
571      * a character by this process, the characters
572      * are gathered, in the same order in which
573      * their corresponding groups were read from
574      * the input stream, to form a {@code String},
575      * which is returned.
576      * <p>
577      * The {@code writeUTF}
578      * method of interface {@code DataOutput}
579      * may be used to write data that is suitable
580      * for reading by this method.
581      * @return     a Unicode string.
582      * @exception  EOFException            if this stream reaches the end
583      *               before reading all the bytes.
584      * @exception  IOException             if an I/O error occurs.
585      * @exception  UTFDataFormatException  if the bytes do not represent a
586      *               valid modified UTF-8 encoding of a string.
587      */
readUTF()588     String readUTF() throws IOException;
589 }
590