1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3/test/functional'
5
6class XMLLexerTest < ANTLR3::Test::Functional
7  inline_grammar( <<-'END' )
8    lexer grammar XML;
9    options { language = Ruby; }
10
11    @members {
12      include ANTLR3::Test::CaptureOutput
13      include ANTLR3::Test::RaiseErrors
14
15      def quote(text)
16        text = text.gsub(/\"/, '\\"')
17        \%("#{ text }")
18      end
19    }
20
21    DOCUMENT
22        :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
23        ;
24
25    fragment DOCTYPE
26        :
27            '<!DOCTYPE' WS rootElementName=GENERIC_ID
28            {say("ROOTELEMENT: " + $rootElementName.text)}
29            WS
30            (
31                ( 'SYSTEM' WS sys1=VALUE
32            {say("SYSTEM: " + $sys1.text)}
33
34                | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
35                    {say("PUBLIC: " + $pub.text)}
36                    {say("SYSTEM: " + $sys2.text)}
37                )
38                ( WS )?
39            )?
40            ( dtd=INTERNAL_DTD
41                {say("INTERNAL DTD: " + $dtd.text)}
42            )?
43        '>'
44      ;
45
46    fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
47
48    fragment PI :
49            '<?' target=GENERIC_ID WS?
50              {say("PI: " + $target.text)}
51            ( ATTRIBUTE WS? )*  '?>'
52      ;
53
54    fragment XMLDECL :
55            '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
56              {say("XML declaration")}
57            ( ATTRIBUTE WS? )*  '?>'
58      ;
59
60
61    fragment ELEMENT
62        : ( START_TAG
63                (ELEMENT
64                | t=PCDATA
65                    {say("PCDATA: " << quote($t.text))}
66                | t=CDATA
67                    {say("CDATA: " << quote($t.text))}
68                | t=COMMENT
69                    {say("Comment: " << quote($t.text))}
70                | pi=PI
71                )*
72                END_TAG
73            | EMPTY_ELEMENT
74            )
75        ;
76
77    fragment START_TAG
78        : '<' WS? name=GENERIC_ID WS?
79              {say("Start Tag: " + $name.text)}
80            ( ATTRIBUTE WS? )* '>'
81        ;
82
83    fragment EMPTY_ELEMENT
84        : '<' WS? name=GENERIC_ID WS?
85              {say("Empty Element: " + $name.text)}
86            ( ATTRIBUTE WS? )* '/>'
87        ;
88
89    fragment ATTRIBUTE
90        : name=GENERIC_ID WS? '=' WS? value=VALUE
91            {say("Attr: " + $name.text + " = "+ $value.text)}
92        ;
93
94    fragment END_TAG
95        : '</' WS? name=GENERIC_ID WS? '>'
96            {say("End Tag: " + $name.text)}
97        ;
98
99    fragment COMMENT
100      :	'<!--' (options {greedy=false;} : .)* '-->'
101      ;
102
103    fragment CDATA
104      :	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
105      ;
106
107    fragment PCDATA : (~'<')+ ;
108
109    fragment VALUE :
110            ( '\"' (~'\"')* '\"'
111            | '\'' (~'\'')* '\''
112            )
113      ;
114
115    fragment GENERIC_ID
116        : ( LETTER | '_' | ':')
117            ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
118      ;
119
120    fragment LETTER
121      : 'a'..'z'
122      | 'A'..'Z'
123      ;
124
125    fragment WS  :
126            (   ' '
127            |   '\t'
128            |  ( '\n'
129                |	'\r\n'
130                |	'\r'
131                )
132            )+
133        ;
134  END
135
136  it "should be valid" do
137    lexer = XML::Lexer.new( <<-'END'.fixed_indent( 0 ) )
138      <?xml version='1.0'?>
139      <!DOCTYPE component [
140      <!ELEMENT component (PCDATA|sub)*>
141      <!ATTLIST component
142                attr CDATA #IMPLIED
143                attr2 CDATA #IMPLIED
144      >
145      <!ELMENT sub EMPTY>
146
147      ]>
148      <component attr="val'ue" attr2='val"ue'>
149      <!-- This is a comment -->
150      Text
151      <![CDATA[huhu]]>
152      öäüß
153      &amp;
154      &lt;
155      <?xtal cursor='11'?>
156      <sub/>
157      <sub></sub>
158      </component>
159    END
160
161    lexer.map { |tk| tk }
162
163    lexer.output.should == <<-'END'.fixed_indent( 0 )
164      XML declaration
165      Attr: version = '1.0'
166      ROOTELEMENT: component
167      INTERNAL DTD: [
168      <!ELEMENT component (PCDATA|sub)*>
169      <!ATTLIST component
170                attr CDATA #IMPLIED
171                attr2 CDATA #IMPLIED
172      >
173      <!ELMENT sub EMPTY>
174
175      ]
176      Start Tag: component
177      Attr: attr = "val'ue"
178      Attr: attr2 = 'val"ue'
179      PCDATA: "
180      "
181      Comment: "<!-- This is a comment -->"
182      PCDATA: "
183      Text
184      "
185      CDATA: "<![CDATA[huhu]]>"
186      PCDATA: "
187      öäüß
188      &amp;
189      &lt;
190      "
191      PI: xtal
192      Attr: cursor = '11'
193      PCDATA: "
194      "
195      Empty Element: sub
196      PCDATA: "
197      "
198      Start Tag: sub
199      End Tag: sub
200      PCDATA: "
201      "
202      End Tag: component
203    END
204  end
205
206end
207