1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.io;
18 
19 import java.io.ByteArrayOutputStream;
20 import java.io.Closeable;
21 import java.io.EOFException;
22 import java.io.InputStream;
23 import java.io.IOException;
24 import java.nio.charset.Charset;
25 import java.nio.charset.StandardCharsets;
26 
27 /**
28  * Buffers input from an {@link InputStream} for reading lines.
29  *
30  * This class is used for buffered reading of lines. For purposes of this class, a line ends with
31  * "\n" or "\r\n". End of input is reported by throwing {@code EOFException}. Unterminated line at
32  * end of input is invalid and will be ignored, the caller may use {@code hasUnterminatedLine()}
33  * to detect it after catching the {@code EOFException}.
34  *
35  * This class is intended for reading input that strictly consists of lines, such as line-based
36  * cache entries or cache journal. Unlike the {@link BufferedReader} which in conjunction with
37  * {@link InputStreamReader} provides similar functionality, this class uses different
38  * end-of-input reporting and a more restrictive definition of a line.
39  *
40  * This class supports only charsets that encode '\r' and '\n' as a single byte with value 13
41  * and 10, respectively, and the representation of no other character contains these values.
42  * We currently check in constructor that the charset is one of US-ASCII, UTF-8 and ISO-8859-1.
43  * The default charset is US_ASCII.
44  */
45 public class StrictLineReader implements Closeable {
46     private static final byte CR = (byte)'\r';
47     private static final byte LF = (byte)'\n';
48 
49     private final InputStream in;
50     private final Charset charset;
51 
52     /*
53      * Buffered data is stored in {@code buf}. As long as no exception occurs, 0 <= pos <= end
54      * and the data in the range [pos, end) is buffered for reading. At end of input, if there is
55      * an unterminated line, we set end == -1, otherwise end == pos. If the underlying
56      * {@code InputStream} throws an {@code IOException}, end may remain as either pos or -1.
57      */
58     private byte[] buf;
59     private int pos;
60     private int end;
61 
62     /**
63      * Constructs a new {@code StrictLineReader} with the default capacity and charset.
64      *
65      * @param in the {@code InputStream} to read data from.
66      * @throws NullPointerException if {@code in} is null.
67      */
StrictLineReader(InputStream in)68     public StrictLineReader(InputStream in) {
69         this(in, 8192);
70     }
71 
72     /**
73      * Constructs a new {@code LineReader} with the specified capacity and the default charset.
74      *
75      * @param in the {@code InputStream} to read data from.
76      * @param capacity the capacity of the buffer.
77      * @throws NullPointerException if {@code in} is null.
78      * @throws IllegalArgumentException for negative or zero {@code capacity}.
79      */
StrictLineReader(InputStream in, int capacity)80     public StrictLineReader(InputStream in, int capacity) {
81         this(in, capacity, StandardCharsets.US_ASCII);
82     }
83 
84     /**
85      * Constructs a new {@code LineReader} with the specified charset and the default capacity.
86      *
87      * @param in the {@code InputStream} to read data from.
88      * @param charset the charset used to decode data.
89      *         Only US-ASCII, UTF-8 and ISO-8859-1 is supported.
90      * @throws NullPointerException if {@code in} or {@code charset} is null.
91      * @throws IllegalArgumentException if the specified charset is not supported.
92      */
StrictLineReader(InputStream in, Charset charset)93     public StrictLineReader(InputStream in, Charset charset) {
94         this(in, 8192, charset);
95     }
96 
97     /**
98      * Constructs a new {@code LineReader} with the specified capacity and charset.
99      *
100      * @param in the {@code InputStream} to read data from.
101      * @param capacity the capacity of the buffer.
102      * @param charset the charset used to decode data.
103      *         Only US-ASCII, UTF-8 and ISO-8859-1 is supported.
104      * @throws NullPointerException if {@code in} or {@code charset} is null.
105      * @throws IllegalArgumentException if {@code capacity} is negative or zero
106      *         or the specified charset is not supported.
107      */
StrictLineReader(InputStream in, int capacity, Charset charset)108     public StrictLineReader(InputStream in, int capacity, Charset charset) {
109         if (in == null) {
110             throw new NullPointerException("in == null");
111         } else if (charset == null) {
112             throw new NullPointerException("charset == null");
113         }
114         if (capacity < 0) {
115             throw new IllegalArgumentException("capacity <= 0");
116         }
117         if (!(charset.equals(StandardCharsets.US_ASCII) || charset.equals(StandardCharsets.UTF_8) ||
118                 charset.equals(StandardCharsets.ISO_8859_1))) {
119             throw new IllegalArgumentException("Unsupported encoding");
120         }
121 
122         this.in = in;
123         this.charset = charset;
124         buf = new byte[capacity];
125     }
126 
127     /**
128      * Closes the reader by closing the underlying {@code InputStream} and
129      * marking this reader as closed.
130      *
131      * @throws IOException for errors when closing the underlying {@code InputStream}.
132      */
133     @Override
close()134     public void close() throws IOException {
135         synchronized (in) {
136             if (buf != null) {
137                 buf = null;
138                 in.close();
139             }
140         }
141     }
142 
143     /**
144      * Reads the next line. A line ends with {@code "\n"} or {@code "\r\n"},
145      * this end of line marker is not included in the result.
146      *
147      * @return the next line from the input.
148      * @throws IOException for underlying {@code InputStream} errors.
149      * @throws EOFException for the end of source stream.
150      */
readLine()151     public String readLine() throws IOException {
152         synchronized (in) {
153             if (buf == null) {
154                 throw new IOException("LineReader is closed");
155             }
156 
157             // Read more data if we are at the end of the buffered data.
158             // Though it's an error to read after an exception, we will let {@code fillBuf()}
159             // throw again if that happens; thus we need to handle end == -1 as well as end == pos.
160             if (pos >= end) {
161                 fillBuf();
162             }
163             // Try to find LF in the buffered data and return the line if successful.
164             for (int i = pos; i != end; ++i) {
165                 if (buf[i] == LF) {
166                     int lineEnd = (i != pos && buf[i - 1] == CR) ? i - 1 : i;
167                     String res = new String(buf, pos, lineEnd - pos, charset);
168                     pos = i + 1;
169                     return res;
170                 }
171             }
172 
173             // Let's anticipate up to 80 characters on top of those already read.
174             ByteArrayOutputStream out = new ByteArrayOutputStream(end - pos + 80) {
175                 @Override
176                 public String toString() {
177                     int length = (count > 0 && buf[count - 1] == CR) ? count - 1 : count;
178                     return new String(buf, 0, length, charset);
179                 }
180             };
181 
182             while (true) {
183                 out.write(buf, pos, end - pos);
184                 // Mark unterminated line in case fillBuf throws EOFException or IOException.
185                 end = -1;
186                 fillBuf();
187                 // Try to find LF in the buffered data and return the line if successful.
188                 for (int i = pos; i != end; ++i) {
189                     if (buf[i] == LF) {
190                         if (i != pos) {
191                             out.write(buf, pos, i - pos);
192                         }
193                         pos = i + 1;
194                         return out.toString();
195                     }
196                 }
197             }
198         }
199     }
200 
201     /**
202      * Read an {@code int} from a line containing its decimal representation.
203      *
204      * @return the value of the {@code int} from the next line.
205      * @throws IOException for underlying {@code InputStream} errors or conversion error.
206      * @throws EOFException for the end of source stream.
207      */
readInt()208     public int readInt() throws IOException {
209         String intString = readLine();
210         try {
211             return Integer.parseInt(intString);
212         } catch (NumberFormatException e) {
213             throw new IOException("expected an int but was \"" + intString + "\"");
214         }
215     }
216 
217     /**
218      * Check whether there was an unterminated line at end of input after the line reader reported
219      * end-of-input with EOFException. The value is meaningless in any other situation.
220      *
221      * @return true if there was an unterminated line at end of input.
222      */
hasUnterminatedLine()223     public boolean hasUnterminatedLine() {
224         return end == -1;
225     }
226 
227     /**
228      * Reads new input data into the buffer. Call only with pos == end or end == -1,
229      * depending on the desired outcome if the function throws.
230      *
231      * @throws IOException for underlying {@code InputStream} errors.
232      * @throws EOFException for the end of source stream.
233      */
fillBuf()234     private void fillBuf() throws IOException {
235         int result = in.read(buf, 0, buf.length);
236         if (result == -1) {
237             throw new EOFException();
238         }
239         pos = 0;
240         end = result;
241     }
242 }
243