1 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
2 //
3 // TagSoup is licensed under the Apache License,
4 // Version 2.0.  You may obtain a copy of this license at
5 // http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
6 // additional legal rights not granted by this license.
7 //
8 // TagSoup is distributed in the hope that it will be useful, but
9 // unless required by applicable law or agreed to in writing, TagSoup
10 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
11 // OF ANY KIND, either express or implied; not even the implied warranty
12 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 //
14 //
15 // This file is part of TagSoup.
16 //
17 // This program is free software; you can redistribute it and/or modify
18 // it under the terms of the GNU General Public License as published by
19 // the Free Software Foundation; either version 2 of the License, or
20 // (at your option) any later version.  You may also distribute
21 // and/or modify it under version 2.1 of the Academic Free License.
22 //
23 // This program is distributed in the hope that it will be useful,
24 // but WITHOUT ANY WARRANTY; without even the implied warranty of
25 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
26 //
27 //
28 // PYX Scanner
29 
30 package org.ccil.cowan.tagsoup;
31 import java.io.*;
32 import org.xml.sax.SAXException;
33 
34 /**
35 A Scanner that accepts PYX format instead of HTML.
36 Useful primarily for debugging.
37 **/
38 public class PYXScanner implements Scanner {
39 
resetDocumentLocator(String publicid, String systemid)40         public void resetDocumentLocator(String publicid, String systemid) {
41 	// Need this method for interface compatibility, but note
42 	// that PyxScanner does not implement Locator.
43         }
44 
scan(Reader r, ScanHandler h)45 	public void scan(Reader r, ScanHandler h) throws IOException, SAXException {
46 		BufferedReader br = new BufferedReader(r);
47 		String s;
48 		char[] buff = null;
49 		boolean instag = false;
50 		while ((s = br.readLine()) != null) {
51 			int size = s.length();
52 			if (buff == null || buff.length < size) {
53 				buff = new char[size];
54 				}
55 			s.getChars(0, size, buff, 0);
56 			switch (buff[0]) {
57 			case '(':
58 				if (instag) {
59 					h.stagc(buff, 0, 0);
60 					instag = false;
61 					}
62 				h.gi(buff, 1, size - 1);
63 				instag = true;
64 				break;
65 			case ')':
66 				if (instag) {
67 					h.stagc(buff, 0, 0);
68 					instag = false;
69 					}
70 				h.etag(buff, 1, size - 1);
71 				break;
72 			case '?':
73 				if (instag) {
74 					h.stagc(buff, 0, 0);
75 					instag = false;
76 					}
77 				h.pi(buff, 1, size - 1);
78 				break;
79 			case 'A':
80 				int sp = s.indexOf(' ');
81 				h.aname(buff, 1, sp - 1);
82 				h.aval(buff, sp + 1, size - sp - 1);
83 				break;
84 			case '-':
85 				if (instag) {
86 					h.stagc(buff, 0, 0);
87 					instag = false;
88 					}
89 				if (s.equals("-\\n")) {
90 					buff[0] = '\n';
91 					h.pcdata(buff, 0, 1);
92 					}
93 				else {
94 					// FIXME:
95 					// Does not decode \t and \\ in input
96 					h.pcdata(buff, 1, size - 1);
97 					}
98 				break;
99 			case 'E':
100 				if (instag) {
101 					h.stagc(buff, 0, 0);
102 					instag = false;
103 					}
104 				h.entity(buff, 1, size - 1);
105 				break;
106 			default:
107 //				System.err.print("Gotcha ");
108 //				System.err.print(s);
109 //				System.err.print('\n');
110 				break;
111 				}
112 			}
113 		h.eof(buff, 0, 0);
114 		}
115 
startCDATA()116 	public void startCDATA() { }
117 
main(String[] argv)118 	public static void main(String[] argv) throws IOException, SAXException {
119 		Scanner s = new PYXScanner();
120 		Reader r = new InputStreamReader(System.in, "UTF-8");
121 		Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8"));
122 		s.scan(r, new PYXWriter(w));
123 		}
124 	}
125