1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.io; 18 19 import java.io.ByteArrayOutputStream; 20 import java.io.Closeable; 21 import java.io.EOFException; 22 import java.io.InputStream; 23 import java.io.IOException; 24 import java.nio.charset.Charset; 25 import java.nio.charset.StandardCharsets; 26 27 /** 28 * Buffers input from an {@link InputStream} for reading lines. 29 * 30 * This class is used for buffered reading of lines. For purposes of this class, a line ends with 31 * "\n" or "\r\n". End of input is reported by throwing {@code EOFException}. Unterminated line at 32 * end of input is invalid and will be ignored, the caller may use {@code hasUnterminatedLine()} 33 * to detect it after catching the {@code EOFException}. 34 * 35 * This class is intended for reading input that strictly consists of lines, such as line-based 36 * cache entries or cache journal. Unlike the {@link BufferedReader} which in conjunction with 37 * {@link InputStreamReader} provides similar functionality, this class uses different 38 * end-of-input reporting and a more restrictive definition of a line. 39 * 40 * This class supports only charsets that encode '\r' and '\n' as a single byte with value 13 41 * and 10, respectively, and the representation of no other character contains these values. 42 * We currently check in constructor that the charset is one of US-ASCII, UTF-8 and ISO-8859-1. 43 * The default charset is US_ASCII. 44 */ 45 public class StrictLineReader implements Closeable { 46 private static final byte CR = (byte)'\r'; 47 private static final byte LF = (byte)'\n'; 48 49 private final InputStream in; 50 private final Charset charset; 51 52 /* 53 * Buffered data is stored in {@code buf}. As long as no exception occurs, 0 <= pos <= end 54 * and the data in the range [pos, end) is buffered for reading. At end of input, if there is 55 * an unterminated line, we set end == -1, otherwise end == pos. If the underlying 56 * {@code InputStream} throws an {@code IOException}, end may remain as either pos or -1. 57 */ 58 private byte[] buf; 59 private int pos; 60 private int end; 61 62 /** 63 * Constructs a new {@code StrictLineReader} with the default capacity and charset. 64 * 65 * @param in the {@code InputStream} to read data from. 66 * @throws NullPointerException if {@code in} is null. 67 */ StrictLineReader(InputStream in)68 public StrictLineReader(InputStream in) { 69 this(in, 8192); 70 } 71 72 /** 73 * Constructs a new {@code LineReader} with the specified capacity and the default charset. 74 * 75 * @param in the {@code InputStream} to read data from. 76 * @param capacity the capacity of the buffer. 77 * @throws NullPointerException if {@code in} is null. 78 * @throws IllegalArgumentException for negative or zero {@code capacity}. 79 */ StrictLineReader(InputStream in, int capacity)80 public StrictLineReader(InputStream in, int capacity) { 81 this(in, capacity, StandardCharsets.US_ASCII); 82 } 83 84 /** 85 * Constructs a new {@code LineReader} with the specified charset and the default capacity. 86 * 87 * @param in the {@code InputStream} to read data from. 88 * @param charset the charset used to decode data. 89 * Only US-ASCII, UTF-8 and ISO-8859-1 is supported. 90 * @throws NullPointerException if {@code in} or {@code charset} is null. 91 * @throws IllegalArgumentException if the specified charset is not supported. 92 */ StrictLineReader(InputStream in, Charset charset)93 public StrictLineReader(InputStream in, Charset charset) { 94 this(in, 8192, charset); 95 } 96 97 /** 98 * Constructs a new {@code LineReader} with the specified capacity and charset. 99 * 100 * @param in the {@code InputStream} to read data from. 101 * @param capacity the capacity of the buffer. 102 * @param charset the charset used to decode data. 103 * Only US-ASCII, UTF-8 and ISO-8859-1 is supported. 104 * @throws NullPointerException if {@code in} or {@code charset} is null. 105 * @throws IllegalArgumentException if {@code capacity} is negative or zero 106 * or the specified charset is not supported. 107 */ StrictLineReader(InputStream in, int capacity, Charset charset)108 public StrictLineReader(InputStream in, int capacity, Charset charset) { 109 if (in == null) { 110 throw new NullPointerException("in == null"); 111 } else if (charset == null) { 112 throw new NullPointerException("charset == null"); 113 } 114 if (capacity < 0) { 115 throw new IllegalArgumentException("capacity <= 0"); 116 } 117 if (!(charset.equals(StandardCharsets.US_ASCII) || charset.equals(StandardCharsets.UTF_8) || 118 charset.equals(StandardCharsets.ISO_8859_1))) { 119 throw new IllegalArgumentException("Unsupported encoding"); 120 } 121 122 this.in = in; 123 this.charset = charset; 124 buf = new byte[capacity]; 125 } 126 127 /** 128 * Closes the reader by closing the underlying {@code InputStream} and 129 * marking this reader as closed. 130 * 131 * @throws IOException for errors when closing the underlying {@code InputStream}. 132 */ 133 @Override close()134 public void close() throws IOException { 135 synchronized (in) { 136 if (buf != null) { 137 buf = null; 138 in.close(); 139 } 140 } 141 } 142 143 /** 144 * Reads the next line. A line ends with {@code "\n"} or {@code "\r\n"}, 145 * this end of line marker is not included in the result. 146 * 147 * @return the next line from the input. 148 * @throws IOException for underlying {@code InputStream} errors. 149 * @throws EOFException for the end of source stream. 150 */ readLine()151 public String readLine() throws IOException { 152 synchronized (in) { 153 if (buf == null) { 154 throw new IOException("LineReader is closed"); 155 } 156 157 // Read more data if we are at the end of the buffered data. 158 // Though it's an error to read after an exception, we will let {@code fillBuf()} 159 // throw again if that happens; thus we need to handle end == -1 as well as end == pos. 160 if (pos >= end) { 161 fillBuf(); 162 } 163 // Try to find LF in the buffered data and return the line if successful. 164 for (int i = pos; i != end; ++i) { 165 if (buf[i] == LF) { 166 int lineEnd = (i != pos && buf[i - 1] == CR) ? i - 1 : i; 167 String res = new String(buf, pos, lineEnd - pos, charset); 168 pos = i + 1; 169 return res; 170 } 171 } 172 173 // Let's anticipate up to 80 characters on top of those already read. 174 ByteArrayOutputStream out = new ByteArrayOutputStream(end - pos + 80) { 175 @Override 176 public String toString() { 177 int length = (count > 0 && buf[count - 1] == CR) ? count - 1 : count; 178 return new String(buf, 0, length, charset); 179 } 180 }; 181 182 while (true) { 183 out.write(buf, pos, end - pos); 184 // Mark unterminated line in case fillBuf throws EOFException or IOException. 185 end = -1; 186 fillBuf(); 187 // Try to find LF in the buffered data and return the line if successful. 188 for (int i = pos; i != end; ++i) { 189 if (buf[i] == LF) { 190 if (i != pos) { 191 out.write(buf, pos, i - pos); 192 } 193 pos = i + 1; 194 return out.toString(); 195 } 196 } 197 } 198 } 199 } 200 201 /** 202 * Read an {@code int} from a line containing its decimal representation. 203 * 204 * @return the value of the {@code int} from the next line. 205 * @throws IOException for underlying {@code InputStream} errors or conversion error. 206 * @throws EOFException for the end of source stream. 207 */ readInt()208 public int readInt() throws IOException { 209 String intString = readLine(); 210 try { 211 return Integer.parseInt(intString); 212 } catch (NumberFormatException e) { 213 throw new IOException("expected an int but was \"" + intString + "\""); 214 } 215 } 216 217 /** 218 * Check whether there was an unterminated line at end of input after the line reader reported 219 * end-of-input with EOFException. The value is meaningless in any other situation. 220 * 221 * @return true if there was an unterminated line at end of input. 222 */ hasUnterminatedLine()223 public boolean hasUnterminatedLine() { 224 return end == -1; 225 } 226 227 /** 228 * Reads new input data into the buffer. Call only with pos == end or end == -1, 229 * depending on the desired outcome if the function throws. 230 * 231 * @throws IOException for underlying {@code InputStream} errors. 232 * @throws EOFException for the end of source stream. 233 */ fillBuf()234 private void fillBuf() throws IOException { 235 int result = in.read(buf, 0, buf.length); 236 if (result == -1) { 237 throw new EOFException(); 238 } 239 pos = 0; 240 end = result; 241 } 242 } 243