1 /**
2  * section: Parsing
3  * synopsis: Parse an XML document chunk by chunk to a tree and free it
4  * purpose: Demonstrate the use of xmlCreatePushParserCtxt() and
5  *          xmlParseChunk() to read an XML file progressively
6  *          into a tree and xmlFreeDoc() to free the resulting tree
7  * usage: parse4 test3.xml
8  * test: parse4 test3.xml
9  * author: Daniel Veillard
10  * copy: see Copyright for the status of this software.
11  */
12 
13 #include <stdio.h>
14 #include <libxml/parser.h>
15 #include <libxml/tree.h>
16 
17 #ifdef LIBXML_PUSH_ENABLED
18 static FILE *desc;
19 
20 /**
21  * readPacket:
22  * @mem: array to store the packet
23  * @size: the packet size
24  *
25  * read at most @size bytes from the document and store it in @mem
26  *
27  * Returns the number of bytes read
28  */
29 static int
readPacket(char * mem,int size)30 readPacket(char *mem, int size) {
31     int res;
32 
33     res = fread(mem, 1, size, desc);
34     return(res);
35 }
36 
37 /**
38  * example4Func:
39  * @filename: a filename or an URL
40  *
41  * Parse the resource and free the resulting tree
42  */
43 static void
example4Func(const char * filename)44 example4Func(const char *filename) {
45     xmlParserCtxtPtr ctxt;
46     char chars[4];
47     xmlDocPtr doc; /* the resulting document tree */
48     int res;
49 
50     /*
51      * Read a few first byte to check the input used for the
52      * encoding detection at the parser level.
53      */
54     res = readPacket(chars, 4);
55     if (res <= 0) {
56         fprintf(stderr, "Failed to parse %s\n", filename);
57 	return;
58     }
59 
60     /*
61      * Create a progressive parsing context, the 2 first arguments
62      * are not used since we want to build a tree and not use a SAX
63      * parsing interface. We also pass the first bytes of the document
64      * to allow encoding detection when creating the parser but this
65      * is optional.
66      */
67     ctxt = xmlCreatePushParserCtxt(NULL, NULL,
68                                    chars, res, filename);
69     if (ctxt == NULL) {
70         fprintf(stderr, "Failed to create parser context !\n");
71 	return;
72     }
73 
74     /*
75      * loop on the input getting the document data, of course 4 bytes
76      * at a time is not realistic but allows to verify testing on small
77      * documents.
78      */
79     while ((res = readPacket(chars, 4)) > 0) {
80         xmlParseChunk(ctxt, chars, res, 0);
81     }
82 
83     /*
84      * there is no more input, indicate the parsing is finished.
85      */
86     xmlParseChunk(ctxt, chars, 0, 1);
87 
88     /*
89      * collect the document back and if it was wellformed
90      * and destroy the parser context.
91      */
92     doc = ctxt->myDoc;
93     res = ctxt->wellFormed;
94     xmlFreeParserCtxt(ctxt);
95 
96     if (!res) {
97         fprintf(stderr, "Failed to parse %s\n", filename);
98     }
99 
100     /*
101      * since we don't use the document, destroy it now.
102      */
103     xmlFreeDoc(doc);
104 }
105 
main(int argc,char ** argv)106 int main(int argc, char **argv) {
107     if (argc != 2)
108         return(1);
109 
110     /*
111      * this initialize the library and check potential ABI mismatches
112      * between the version it was compiled for and the actual shared
113      * library used.
114      */
115     LIBXML_TEST_VERSION
116 
117     /*
118      * simulate a progressive parsing using the input file.
119      */
120     desc = fopen(argv[1], "rb");
121     if (desc != NULL) {
122 	example4Func(argv[1]);
123 	fclose(desc);
124     } else {
125         fprintf(stderr, "Failed to parse %s\n", argv[1]);
126     }
127 
128     /*
129      * Cleanup function for the XML library.
130      */
131     xmlCleanupParser();
132     /*
133      * this is to debug memory for regression tests
134      */
135     xmlMemoryDump();
136     return(0);
137 }
138 #else /* ! LIBXML_PUSH_ENABLED */
main(int argc,char ** argv)139 int main(int argc, char **argv) {
140     fprintf(stderr, "Library not compiled with push parser support\n");
141     return(1);
142 }
143 #endif
144