1 /**
2  * Copyright (c) 2008, http://www.snakeyaml.org
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package org.pyyaml;
17 
18 import java.util.ArrayList;
19 import java.util.List;
20 import java.util.Map;
21 
22 import org.yaml.snakeyaml.error.Mark;
23 import org.yaml.snakeyaml.nodes.Tag;
24 import org.yaml.snakeyaml.scanner.Scanner;
25 import org.yaml.snakeyaml.scanner.ScannerImpl;
26 import org.yaml.snakeyaml.tokens.AliasToken;
27 import org.yaml.snakeyaml.tokens.AnchorToken;
28 import org.yaml.snakeyaml.tokens.DirectiveToken;
29 import org.yaml.snakeyaml.tokens.DocumentStartToken;
30 import org.yaml.snakeyaml.tokens.FlowEntryToken;
31 import org.yaml.snakeyaml.tokens.FlowMappingEndToken;
32 import org.yaml.snakeyaml.tokens.FlowMappingStartToken;
33 import org.yaml.snakeyaml.tokens.FlowSequenceEndToken;
34 import org.yaml.snakeyaml.tokens.FlowSequenceStartToken;
35 import org.yaml.snakeyaml.tokens.KeyToken;
36 import org.yaml.snakeyaml.tokens.ScalarToken;
37 import org.yaml.snakeyaml.tokens.StreamEndToken;
38 import org.yaml.snakeyaml.tokens.StreamStartToken;
39 import org.yaml.snakeyaml.tokens.TagToken;
40 import org.yaml.snakeyaml.tokens.TagTuple;
41 import org.yaml.snakeyaml.tokens.Token;
42 import org.yaml.snakeyaml.tokens.ValueToken;
43 
44 public class CanonicalScanner implements Scanner {
45     private static final String DIRECTIVE = "%YAML 1.1";
46     private final static Map<Character, Integer> QUOTE_CODES = ScannerImpl.ESCAPE_CODES;
47 
48     private final static Map<Character, String> QUOTE_REPLACES = ScannerImpl.ESCAPE_REPLACEMENTS;
49 
50     private String data;
51     private int index;
52     public ArrayList<Token> tokens;
53     private boolean scanned;
54     private Mark mark;
55 
CanonicalScanner(String data)56     public CanonicalScanner(String data) {
57         this.data = data + "\0";
58         this.index = 0;
59         this.tokens = new ArrayList<Token>();
60         this.scanned = false;
61         this.mark = new Mark("test", 0, 0, 0, data, 0);
62     }
63 
checkToken(Token.ID... choices)64     public boolean checkToken(Token.ID... choices) {
65         if (!scanned) {
66             scan();
67         }
68         if (!tokens.isEmpty()) {
69             if (choices.length == 0) {
70                 return true;
71             }
72             Token first = this.tokens.get(0);
73             for (Token.ID choice : choices) {
74                 if (first.getTokenId() == choice) {
75                     return true;
76                 }
77             }
78         }
79         return false;
80     }
81 
peekToken()82     public Token peekToken() {
83         if (!scanned) {
84             scan();
85         }
86         if (!tokens.isEmpty()) {
87             return this.tokens.get(0);
88         }
89         return null;
90     }
91 
getToken()92     public Token getToken() {
93         if (!scanned) {
94             scan();
95         }
96         return this.tokens.remove(0);
97     }
98 
getToken(Token.ID choice)99     public Token getToken(Token.ID choice) {
100         Token token = getToken();
101         if (choice != null && token.getTokenId() != choice) {
102             throw new CanonicalException("unexpected token " + token);
103         }
104         return token;
105     }
106 
scan()107     private void scan() {
108         this.tokens.add(new StreamStartToken(mark, mark));
109         boolean stop = false;
110         while (!stop) {
111             findToken();
112             char ch = data.charAt(index);
113             switch (ch) {
114             case '\0':
115                 tokens.add(new StreamEndToken(mark, mark));
116                 stop = true;
117                 break;
118 
119             case '%':
120                 tokens.add(scanDirective());
121                 break;
122 
123             case '-':
124                 if ("---".equals(data.substring(index, index + 3))) {
125                     index += 3;
126                     tokens.add(new DocumentStartToken(mark, mark));
127                 }
128                 break;
129 
130             case '[':
131                 index++;
132                 tokens.add(new FlowSequenceStartToken(mark, mark));
133                 break;
134 
135             case '{':
136                 index++;
137                 tokens.add(new FlowMappingStartToken(mark, mark));
138                 break;
139 
140             case ']':
141                 index++;
142                 tokens.add(new FlowSequenceEndToken(mark, mark));
143                 break;
144 
145             case '}':
146                 index++;
147                 tokens.add(new FlowMappingEndToken(mark, mark));
148                 break;
149 
150             case '?':
151                 index++;
152                 tokens.add(new KeyToken(mark, mark));
153                 break;
154 
155             case ':':
156                 index++;
157                 tokens.add(new ValueToken(mark, mark));
158                 break;
159 
160             case ',':
161                 index++;
162                 tokens.add(new FlowEntryToken(mark, mark));
163                 break;
164 
165             case '*':
166                 tokens.add(scanAlias());
167                 break;
168 
169             case '&':
170                 tokens.add(scanAlias());
171                 break;
172 
173             case '!':
174                 tokens.add(scanTag());
175                 break;
176 
177             case '"':
178                 tokens.add(scanScalar());
179                 break;
180 
181             default:
182                 throw new CanonicalException("invalid token");
183             }
184         }
185         scanned = true;
186     }
187 
scanDirective()188     private Token scanDirective() {
189         String chunk1 = data.substring(index, index + DIRECTIVE.length());
190         char chunk2 = data.charAt(index + DIRECTIVE.length());
191         if (DIRECTIVE.equals(chunk1) && "\n\0".indexOf(chunk2) != -1) {
192             index += DIRECTIVE.length();
193             List<Integer> implicit = new ArrayList<Integer>(2);
194             implicit.add(new Integer(1));
195             implicit.add(new Integer(1));
196             return new DirectiveToken<Integer>("YAML", implicit, mark, mark);
197         } else {
198             throw new CanonicalException("invalid directive");
199         }
200     }
201 
scanAlias()202     private Token scanAlias() {
203         boolean isTokenClassAlias;
204         if (data.charAt(index) == '*') {
205             isTokenClassAlias = true;
206         } else {
207             isTokenClassAlias = false;
208         }
209         index++;
210         int start = index;
211         while (", \n\0".indexOf(data.charAt(index)) == -1) {
212             index++;
213         }
214         String value = data.substring(start, index);
215         Token token;
216         if (isTokenClassAlias) {
217             token = new AliasToken(value, mark, mark);
218         } else {
219             token = new AnchorToken(value, mark, mark);
220         }
221         return token;
222     }
223 
scanTag()224     private Token scanTag() {
225         index++;
226         int start = index;
227         while (" \n\0".indexOf(data.charAt(index)) == -1) {
228             index++;
229         }
230         String value = data.substring(start, index);
231         if (value.length() == 0) {
232             value = "!";
233         } else if (value.charAt(0) == '!') {
234             value = Tag.PREFIX + value.substring(1);
235         } else if (value.charAt(0) == '<' && value.charAt(value.length() - 1) == '>') {
236             value = value.substring(1, value.length() - 1);
237         } else {
238             value = "!" + value;
239         }
240         return new TagToken(new TagTuple("", value), mark, mark);
241     }
242 
scanScalar()243     private Token scanScalar() {
244         index++;
245         StringBuilder chunks = new StringBuilder();
246         int start = index;
247         boolean ignoreSpaces = false;
248         while (data.charAt(index) != '"') {
249             if (data.charAt(index) == '\\') {
250                 ignoreSpaces = false;
251                 chunks.append(data.substring(start, index));
252                 index++;
253                 char ch = data.charAt(index);
254                 index++;
255                 if (ch == '\n') {
256                     ignoreSpaces = true;
257                 } else if (QUOTE_CODES.keySet().contains(ch)) {
258                     int length = QUOTE_CODES.get(ch);
259                     int code = Integer.parseInt(data.substring(index, index + length), 16);
260                     chunks.append(String.valueOf((char) code));
261                     index += length;
262                 } else {
263                     if (!QUOTE_REPLACES.keySet().contains(ch)) {
264                         throw new CanonicalException("invalid escape code");
265                     }
266                     chunks.append(QUOTE_REPLACES.get(ch));
267                 }
268                 start = index;
269             } else if (data.charAt(index) == '\n') {
270                 chunks.append(data.substring(start, index));
271                 chunks.append(" ");
272                 index++;
273                 start = index;
274                 ignoreSpaces = true;
275             } else if (ignoreSpaces && data.charAt(index) == ' ') {
276                 index++;
277                 start = index;
278             } else {
279                 ignoreSpaces = false;
280                 index++;
281             }
282         }
283         chunks.append(data.substring(start, index));
284         index++;
285         return new ScalarToken(chunks.toString(), mark, mark, false);
286     }
287 
findToken()288     private void findToken() {
289         boolean found = false;
290         while (!found) {
291             while (" \t".indexOf(data.charAt(index)) != -1) {
292                 index++;
293             }
294             if (data.charAt(index) == '#') {
295                 while (data.charAt(index) != '\n') {
296                     index++;
297                 }
298             }
299             if (data.charAt(index) == '\n') {
300                 index++;
301             } else {
302                 found = true;
303             }
304         }
305     }
306 }
307