1lexer grammar t012lexerXMLLexer;
2options {
3  language = Python3;
4}
5
6@header {
7from io import StringIO
8}
9
10@lexer::init {
11self.outbuf = StringIO()
12}
13
14@lexer::members {
15def output(self, line):
16    self.outbuf.write(line + "\n")
17}
18
19DOCUMENT
20    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
21    ;
22
23fragment DOCTYPE
24    :
25        '<!DOCTYPE' WS rootElementName=GENERIC_ID
26        {self.output("ROOTELEMENT: "+rootElementName.text)}
27        WS
28        (
29            ( 'SYSTEM' WS sys1=VALUE
30                {self.output("SYSTEM: "+sys1.text)}
31
32            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
33                {self.output("PUBLIC: "+pub.text)}
34                {self.output("SYSTEM: "+sys2.text)}
35            )
36            ( WS )?
37        )?
38        ( dtd=INTERNAL_DTD
39            {self.output("INTERNAL DTD: "+dtd.text)}
40        )?
41		'>'
42	;
43
44fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
45
46fragment PI :
47        '<?' target=GENERIC_ID WS?
48          {self.output("PI: "+target.text)}
49        ( ATTRIBUTE WS? )*  '?>'
50	;
51
52fragment XMLDECL :
53        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
54          {self.output("XML declaration")}
55        ( ATTRIBUTE WS? )*  '?>'
56	;
57
58
59fragment ELEMENT
60    : ( START_TAG
61            (ELEMENT
62            | t=PCDATA
63                {self.output('PCDATA: "{}"'.format($t.text))}
64            | t=CDATA
65                {self.output('CDATA: "{}"'.format($t.text))}
66            | t=COMMENT
67                {self.output('Comment: "{}"'.format($t.text))}
68            | pi=PI
69            )*
70            END_TAG
71        | EMPTY_ELEMENT
72        )
73    ;
74
75fragment START_TAG
76    : '<' WS? name=GENERIC_ID WS?
77          {self.output("Start Tag: "+name.text)}
78        ( ATTRIBUTE WS? )* '>'
79    ;
80
81fragment EMPTY_ELEMENT
82    : '<' WS? name=GENERIC_ID WS?
83          {self.output("Empty Element: "+name.text)}
84        ( ATTRIBUTE WS? )* '/>'
85    ;
86
87fragment ATTRIBUTE
88    : name=GENERIC_ID WS? '=' WS? value=VALUE
89        {self.output("Attr: {}={}".format(name.text, value.text))}
90    ;
91
92fragment END_TAG
93    : '</' WS? name=GENERIC_ID WS? '>'
94        {self.output("End Tag: "+name.text)}
95    ;
96
97fragment COMMENT
98	:	'<!--' (options {greedy=false;} : .)* '-->'
99	;
100
101fragment CDATA
102	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
103	;
104
105fragment PCDATA : (~'<')+ ;
106
107fragment VALUE :
108        ( '\"' (~'\"')* '\"'
109        | '\'' (~'\'')* '\''
110        )
111	;
112
113fragment GENERIC_ID
114    : ( LETTER | '_' | ':')
115        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
116	;
117
118fragment LETTER
119	: 'a'..'z'
120	| 'A'..'Z'
121	;
122
123fragment WS  :
124        (   ' '
125        |   '\t'
126        |  ( '\n'
127            |	'\r\n'
128            |	'\r'
129            )
130        )+
131    ;
132
133