1#!/usr/bin/python -u
2# -*- coding: utf-8 -*-
3#
4# this tests the DTD validation with the XmlTextReader interface
5#
6import sys
7import glob
8import string
9import libxml2
10try:
11    import StringIO
12    str_io = StringIO.StringIO
13except:
14    import io
15    str_io = io.StringIO
16
17# Memory debug specific
18libxml2.debugMemory(1)
19
20err = ""
21dir_prefix = "../../test/valid/"
22# This dictionary reflects the contents of the files
23# ../../test/valid/*.xml.err that are not empty, except that
24# the file paths in the messages start with ../../test/
25
26expect = {
27    '766956':
28"""../../test/valid/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
29%ä%ent;
30   ^
31../../test/valid/dtds/766956.dtd:2: parser error : Content error in the external subset
32%ä%ent;
33        ^
34Entity: line 1:
35value
36^
37""",
38    '781333':
39"""../../test/valid/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
40<a/>
41    ^
42../../test/valid/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more child
43
44^
45""",
46    'cond_sect2':
47"""../../test/valid/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
48    %ent;
49         ^
50Entity: line 1:
51]]>
52^
53../../test/valid/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
54
55^
56""",
57    'rss':
58"""../../test/valid/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
59</rss>
60      ^
61""",
62    't8':
63"""../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
64
65%defroot; %defmiddle; %deftest;
66         ^
67Entity: line 1:
68&lt;!ELEMENT root (middle) >
69^
70../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
71
72%defroot; %defmiddle; %deftest;
73                     ^
74Entity: line 1:
75&lt;!ELEMENT middle (test) >
76^
77../../test/valid/t8.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
78
79%defroot; %defmiddle; %deftest;
80                               ^
81Entity: line 1:
82&lt;!ELEMENT test (#PCDATA) >
83^
84""",
85    't8a':
86"""../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
87
88%defroot;%defmiddle;%deftest;
89         ^
90Entity: line 1:
91&lt;!ELEMENT root (middle) >
92^
93../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
94
95%defroot;%defmiddle;%deftest;
96                    ^
97Entity: line 1:
98&lt;!ELEMENT middle (test) >
99^
100../../test/valid/t8a.xml:6: parser error : internal error: xmlParseInternalSubset: error detected in Markup declaration
101
102%defroot;%defmiddle;%deftest;
103                             ^
104Entity: line 1:
105&lt;!ELEMENT test (#PCDATA) >
106^
107""",
108    'xlink':
109"""../../test/valid/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
110	<p><termdef id="dt-arc" term="Arc">An <ter
111	                                  ^
112validity error : attribute def line 199 references an unknown ID "dt-xlg"
113""",
114}
115
116# Add prefix_dir and extension to the keys
117expect = {"{}{}.xml".format(dir_prefix, key): val for key, val in expect.items()}
118
119def callback(ctx, str):
120    global err
121    err = err + "%s" % (str)
122libxml2.registerErrorHandler(callback, "")
123
124parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
125expect_parsing_error = ["{}{}.xml".format(dir_prefix, f) for f in parsing_error_files]
126
127valid_files = glob.glob(dir_prefix + "*.x*")
128valid_files.sort()
129for file in valid_files:
130    err = ""
131    reader = libxml2.newTextReaderFilename(file)
132    #print "%s:" % (file)
133    reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
134    ret = reader.Read()
135    while ret == 1:
136        ret = reader.Read()
137    if ret != 0 and file not in expect_parsing_error:
138        print("Error parsing and validating %s" % (file))
139        #sys.exit(1)
140    if (err):
141        if not(file in expect and err == expect[file]):
142            print("Error: ", err)
143            if file in expect:
144                print("Expected: ", expect[file])
145#
146# another separate test based on Stephane Bidoul one
147#
148s = """
149<!DOCTYPE test [
150<!ELEMENT test (x,b)>
151<!ELEMENT x (c)>
152<!ELEMENT b (#PCDATA)>
153<!ELEMENT c (#PCDATA)>
154<!ENTITY x "<x><c>xxx</c></x>">
155]>
156<test>
157    &x;
158    <b>bbb</b>
159</test>
160"""
161expect="""10,test
1621,test
16314,#text
1641,x
1651,c
1663,#text
16715,c
16815,x
16914,#text
1701,b
1713,#text
17215,b
17314,#text
17415,test
175"""
176res=""
177err=""
178
179input = libxml2.inputBuffer(str_io(s))
180reader = input.newTextReader("test2")
181reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
182reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
183reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
184reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
185while reader.Read() == 1:
186    res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
187
188if res != expect:
189    print("test2 failed: unexpected output")
190    print(res)
191    sys.exit(1)
192if err != "":
193    print("test2 failed: validation error found")
194    print(err)
195    sys.exit(1)
196
197#
198# Another test for external entity parsing and validation
199#
200
201s = """<!DOCTYPE test [
202<!ELEMENT test (x)>
203<!ELEMENT x (#PCDATA)>
204<!ENTITY e SYSTEM "tst.ent">
205]>
206<test>
207  &e;
208</test>
209"""
210tst_ent = """<x>hello</x>"""
211expect="""10 test
2121 test
21314 #text
2141 x
2153 #text
21615 x
21714 #text
21815 test
219"""
220res=""
221
222def myResolver(URL, ID, ctxt):
223    if URL == "tst.ent":
224        return(str_io(tst_ent))
225    return None
226
227libxml2.setEntityLoader(myResolver)
228
229input = libxml2.inputBuffer(str_io(s))
230reader = input.newTextReader("test3")
231reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
232reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
233reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
234reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
235while reader.Read() == 1:
236    res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
237
238if res != expect:
239    print("test3 failed: unexpected output")
240    print(res)
241    sys.exit(1)
242if err != "":
243    print("test3 failed: validation error found")
244    print(err)
245    sys.exit(1)
246
247#
248# Another test for recursive entity parsing, validation, and replacement of
249# entities, making sure the entity ref node doesn't show up in that case
250#
251
252s = """<!DOCTYPE test [
253<!ELEMENT test (x, x)>
254<!ELEMENT x (y)>
255<!ELEMENT y (#PCDATA)>
256<!ENTITY x "<x>&y;</x>">
257<!ENTITY y "<y>yyy</y>">
258]>
259<test>
260  &x;
261  &x;
262</test>"""
263expect="""10 test 0
2641 test 0
26514 #text 1
2661 x 1
2671 y 2
2683 #text 3
26915 y 2
27015 x 1
27114 #text 1
2721 x 1
2731 y 2
2743 #text 3
27515 y 2
27615 x 1
27714 #text 1
27815 test 0
279"""
280res=""
281err=""
282
283input = libxml2.inputBuffer(str_io(s))
284reader = input.newTextReader("test4")
285reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
286reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
287reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
288reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
289while reader.Read() == 1:
290    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
291
292if res != expect:
293    print("test4 failed: unexpected output")
294    print(res)
295    sys.exit(1)
296if err != "":
297    print("test4 failed: validation error found")
298    print(err)
299    sys.exit(1)
300
301#
302# The same test but without entity substitution this time
303#
304
305s = """<!DOCTYPE test [
306<!ELEMENT test (x, x)>
307<!ELEMENT x (y)>
308<!ELEMENT y (#PCDATA)>
309<!ENTITY x "<x>&y;</x>">
310<!ENTITY y "<y>yyy</y>">
311]>
312<test>
313  &x;
314  &x;
315</test>"""
316expect="""10 test 0
3171 test 0
31814 #text 1
3195 x 1
32014 #text 1
3215 x 1
32214 #text 1
32315 test 0
324"""
325res=""
326err=""
327
328input = libxml2.inputBuffer(str_io(s))
329reader = input.newTextReader("test5")
330reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
331while reader.Read() == 1:
332    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
333
334if res != expect:
335    print("test5 failed: unexpected output")
336    print(res)
337if err != "":
338    print("test5 failed: validation error found")
339    print(err)
340
341#
342# cleanup
343#
344del input
345del reader
346
347# Memory debug specific
348libxml2.cleanupParser()
349if libxml2.debugMemory(1) == 0:
350    print("OK")
351else:
352    print("Memory leak %d bytes" % (libxml2.debugMemory(1)))
353    libxml2.dumpMemory()
354