1 /**
2  * Copyright (c) 2008, http://www.snakeyaml.org
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package org.yaml.snakeyaml.reader;
17 
18 import java.io.IOException;
19 import java.io.Reader;
20 import java.nio.charset.Charset;
21 import java.util.regex.Matcher;
22 import java.util.regex.Pattern;
23 
24 import org.yaml.snakeyaml.error.Mark;
25 import org.yaml.snakeyaml.error.YAMLException;
26 import org.yaml.snakeyaml.scanner.Constant;
27 
28 /**
29  * Reader: checks if characters are in allowed range, adds '\0' to the end.
30  */
31 public class StreamReader {
32     public final static Pattern NON_PRINTABLE = Pattern
33             .compile("[^\t\n\r\u0020-\u007E\u0085\u00A0-\uD7FF\uE000-\uFFFD]");
34     private String name;
35     private final Reader stream;
36     private int pointer = 0;
37     private boolean eof = true;
38     private String buffer;
39     private int index = 0;
40     private int line = 0;
41     private int column = 0;
42     private char[] data;
43 
StreamReader(String stream)44     public StreamReader(String stream) {
45         this.name = "'string'";
46         this.buffer = ""; // to set length to 0
47         checkPrintable(stream);
48         this.buffer = stream + "\0";
49         this.stream = null;
50         this.eof = true;
51         this.data = null;
52     }
53 
StreamReader(Reader reader)54     public StreamReader(Reader reader) {
55         this.name = "'reader'";
56         this.buffer = "";
57         this.stream = reader;
58         this.eof = false;
59         this.data = new char[1024];
60         this.update();
61     }
62 
checkPrintable(CharSequence data)63     void checkPrintable(CharSequence data) {
64         Matcher em = NON_PRINTABLE.matcher(data);
65         if (em.find()) {
66             int position = this.index + this.buffer.length() - this.pointer + em.start();
67             throw new ReaderException(name, position, em.group().charAt(0),
68                     "special characters are not allowed");
69         }
70     }
71 
72     /**
73      * Checks <code>chars</chars> for the non-printable characters.
74      *
75      * @param chars
76      *            the array where to search.
77      * @param begin
78      *            the beginning index, inclusive.
79      * @param end
80      *            the ending index, exclusive.
81      * @throws ReaderException
82      *             if <code>chars</code> contains non-printable character(s).
83      */
checkPrintable(final char[] chars, final int begin, final int end)84     void checkPrintable(final char[] chars, final int begin, final int end) {
85         for (int i = begin; i < end; i++) {
86             final char c = chars[i];
87 
88             if (isPrintable(c)) {
89                 continue;
90             }
91 
92             int position = this.index + this.buffer.length() - this.pointer + i;
93             throw new ReaderException(name, position, c, "special characters are not allowed");
94         }
95     }
96 
isPrintable(final char c)97     public static boolean isPrintable(final char c) {
98         return (c >= '\u0020' && c <= '\u007E') || c == '\n' || c == '\r' || c == '\t'
99                 || c == '\u0085' || (c >= '\u00A0' && c <= '\uD7FF')
100                 || (c >= '\uE000' && c <= '\uFFFD');
101     }
102 
getMark()103     public Mark getMark() {
104         return new Mark(name, this.index, this.line, this.column, this.buffer, this.pointer);
105     }
106 
forward()107     public void forward() {
108         forward(1);
109     }
110 
111     /**
112      * read the next length characters and move the pointer.
113      *
114      * @param length
115      */
forward(int length)116     public void forward(int length) {
117         if (this.pointer + length + 1 >= this.buffer.length()) {
118             update();
119         }
120         char ch = 0;
121         for (int i = 0; i < length; i++) {
122             ch = this.buffer.charAt(this.pointer);
123             this.pointer++;
124             this.index++;
125             if (Constant.LINEBR.has(ch) || (ch == '\r' && buffer.charAt(pointer) != '\n')) {
126                 this.line++;
127                 this.column = 0;
128             } else if (ch != '\uFEFF') {
129                 this.column++;
130             }
131         }
132     }
133 
peek()134     public char peek() {
135         return this.buffer.charAt(this.pointer);
136     }
137 
138     /**
139      * Peek the next index-th character
140      *
141      * @param index
142      * @return the next index-th character
143      */
peek(int index)144     public char peek(int index) {
145         if (this.pointer + index + 1 > this.buffer.length()) {
146             update();
147         }
148         return this.buffer.charAt(this.pointer + index);
149     }
150 
151     /**
152      * peek the next length characters
153      *
154      * @param length
155      * @return the next length characters
156      */
prefix(int length)157     public String prefix(int length) {
158         if (this.pointer + length >= this.buffer.length()) {
159             update();
160         }
161         if (this.pointer + length > this.buffer.length()) {
162             return this.buffer.substring(this.pointer);
163         }
164         return this.buffer.substring(this.pointer, this.pointer + length);
165     }
166 
167     /**
168      * prefix(length) immediately followed by forward(length)
169      */
prefixForward(int length)170     public String prefixForward(int length) {
171         final String prefix = prefix(length);
172         this.pointer += length;
173         this.index += length;
174         // prefix never contains new line characters
175         this.column += length;
176         return prefix;
177     }
178 
update()179     private void update() {
180         if (!this.eof) {
181             this.buffer = buffer.substring(this.pointer);
182             this.pointer = 0;
183             try {
184                 int converted = this.stream.read(data);
185                 if (converted > 0) {
186                     /*
187                      * Let's create StringBuilder manually. Anyway str1 + str2
188                      * generates new StringBuilder(str1).append(str2).toSting()
189                      * Giving correct capacity to the constructor prevents
190                      * unnecessary operations in appends.
191                      */
192                     checkPrintable(data, 0, converted);
193                     this.buffer = new StringBuilder(buffer.length() + converted).append(buffer)
194                             .append(data, 0, converted).toString();
195                 } else {
196                     this.eof = true;
197                     this.buffer += "\0";
198                 }
199             } catch (IOException ioe) {
200                 throw new YAMLException(ioe);
201             }
202         }
203     }
204 
getColumn()205     public int getColumn() {
206         return column;
207     }
208 
getEncoding()209     public Charset getEncoding() {
210         return Charset.forName(((UnicodeReader) this.stream).getEncoding());
211     }
212 
getIndex()213     public int getIndex() {
214         return index;
215     }
216 
getLine()217     public int getLine() {
218         return line;
219     }
220 }
221