1 /** 2 * Copyright (c) 2008, http://www.snakeyaml.org 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package org.yaml.snakeyaml.reader; 17 18 import java.io.IOException; 19 import java.io.Reader; 20 import java.nio.charset.Charset; 21 import java.util.regex.Matcher; 22 import java.util.regex.Pattern; 23 24 import org.yaml.snakeyaml.error.Mark; 25 import org.yaml.snakeyaml.error.YAMLException; 26 import org.yaml.snakeyaml.scanner.Constant; 27 28 /** 29 * Reader: checks if characters are in allowed range, adds '\0' to the end. 30 */ 31 public class StreamReader { 32 public final static Pattern NON_PRINTABLE = Pattern 33 .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]"); 34 private String name; 35 private final Reader stream; 36 private int pointer = 0; 37 private boolean eof = true; 38 private String buffer; 39 private int index = 0; 40 private int line = 0; 41 private int column = 0; 42 private char[] data; 43 StreamReader(String stream)44 public StreamReader(String stream) { 45 this.name = "'string'"; 46 this.buffer = ""; // to set length to 0 47 checkPrintable(stream); 48 this.buffer = stream + "\0"; 49 this.stream = null; 50 this.eof = true; 51 this.data = null; 52 } 53 StreamReader(Reader reader)54 public StreamReader(Reader reader) { 55 this.name = "'reader'"; 56 this.buffer = ""; 57 this.stream = reader; 58 this.eof = false; 59 this.data = new char[1024]; 60 this.update(); 61 } 62 checkPrintable(CharSequence data)63 void checkPrintable(CharSequence data) { 64 Matcher em = NON_PRINTABLE.matcher(data); 65 if (em.find()) { 66 int position = this.index + this.buffer.length() - this.pointer + em.start(); 67 throw new ReaderException(name, position, em.group().charAt(0), 68 "special characters are not allowed"); 69 } 70 } 71 72 /** 73 * Checks <code>chars</chars> for the non-printable characters. 74 * 75 * @param chars 76 * the array where to search. 77 * @param begin 78 * the beginning index, inclusive. 79 * @param end 80 * the ending index, exclusive. 81 * @throws ReaderException 82 * if <code>chars</code> contains non-printable character(s). 83 */ checkPrintable(final char[] chars, final int begin, final int end)84 void checkPrintable(final char[] chars, final int begin, final int end) { 85 for (int i = begin; i < end; i++) { 86 final char c = chars[i]; 87 88 if (isPrintable(c)) { 89 continue; 90 } 91 92 int position = this.index + this.buffer.length() - this.pointer + i; 93 throw new ReaderException(name, position, c, "special characters are not allowed"); 94 } 95 } 96 isPrintable(final char c)97 public static boolean isPrintable(final char c) { 98 return (c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t' 99 || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF') 100 || (c >= '\uE000' && c <= '\uFFFD'); 101 } 102 getMark()103 public Mark getMark() { 104 return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer); 105 } 106 forward()107 public void forward() { 108 forward(1); 109 } 110 111 /** 112 * read the next length characters and move the pointer. 113 * 114 * @param length 115 */ forward(int length)116 public void forward(int length) { 117 if (this.pointer + length + 1 >= this.buffer.length()) { 118 update(); 119 } 120 char ch = 0; 121 for (int i = 0; i < length; i++) { 122 ch = this.buffer.charAt(this.pointer); 123 this.pointer++; 124 this.index++; 125 if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) { 126 this.line++; 127 this.column = 0; 128 } else if (ch != '\uFEFF') { 129 this.column++; 130 } 131 } 132 } 133 peek()134 public char peek() { 135 return this.buffer.charAt(this.pointer); 136 } 137 138 /** 139 * Peek the next index-th character 140 * 141 * @param index 142 * @return the next index-th character 143 */ peek(int index)144 public char peek(int index) { 145 if (this.pointer + index + 1 > this.buffer.length()) { 146 update(); 147 } 148 return this.buffer.charAt(this.pointer + index); 149 } 150 151 /** 152 * peek the next length characters 153 * 154 * @param length 155 * @return the next length characters 156 */ prefix(int length)157 public String prefix(int length) { 158 if (this.pointer + length >= this.buffer.length()) { 159 update(); 160 } 161 if (this.pointer + length > this.buffer.length()) { 162 return this.buffer.substring(this.pointer); 163 } 164 return this.buffer.substring(this.pointer, this.pointer + length); 165 } 166 167 /** 168 * prefix(length) immediately followed by forward(length) 169 */ prefixForward(int length)170 public String prefixForward(int length) { 171 final String prefix = prefix(length); 172 this.pointer += length; 173 this.index += length; 174 // prefix never contains new line characters 175 this.column += length; 176 return prefix; 177 } 178 update()179 private void update() { 180 if (!this.eof) { 181 this.buffer = buffer.substring(this.pointer); 182 this.pointer = 0; 183 try { 184 int converted = this.stream.read(data); 185 if (converted > 0) { 186 /* 187 * Let's create StringBuilder manually. Anyway str1 + str2 188 * generates new StringBuilder(str1).append(str2).toSting() 189 * Giving correct capacity to the constructor prevents 190 * unnecessary operations in appends. 191 */ 192 checkPrintable(data, 0, converted); 193 this.buffer = new StringBuilder(buffer.length() + converted).append(buffer) 194 .append(data, 0, converted).toString(); 195 } else { 196 this.eof = true; 197 this.buffer += "\0"; 198 } 199 } catch (IOException ioe) { 200 throw new YAMLException(ioe); 201 } 202 } 203 } 204 getColumn()205 public int getColumn() { 206 return column; 207 } 208 getEncoding()209 public Charset getEncoding() { 210 return Charset.forName(((UnicodeReader) this.stream).getEncoding()); 211 } 212 getIndex()213 public int getIndex() { 214 return index; 215 } 216 getLine()217 public int getLine() { 218 return line; 219 } 220 } 221