1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stddef.h>
36 #include <string.h>
37 #include <fcntl.h>
38
39 #ifdef _WIN32
40 #include "winconfig.h"
41 #elif defined(HAVE_EXPAT_CONFIG_H)
42 #include <expat_config.h>
43 #endif /* ndef _WIN32 */
44
45 #include "expat.h"
46 #include "internal.h" /* for UNUSED_P only */
47 #include "xmlfile.h"
48 #include "xmltchar.h"
49 #include "filemap.h"
50
51 #if defined(_MSC_VER)
52 #include <io.h>
53 #endif
54
55 #ifdef HAVE_UNISTD_H
56 #include <unistd.h>
57 #endif
58
59 #ifndef O_BINARY
60 #ifdef _O_BINARY
61 #define O_BINARY _O_BINARY
62 #else
63 #define O_BINARY 0
64 #endif
65 #endif
66
67 #ifdef _DEBUG
68 #define READ_SIZE 16
69 #else
70 #define READ_SIZE (1024*8)
71 #endif
72
73
74 typedef struct {
75 XML_Parser parser;
76 int *retPtr;
77 } PROCESS_ARGS;
78
79 static int
80 processStream(const XML_Char *filename, XML_Parser parser);
81
82 static void
reportError(XML_Parser parser,const XML_Char * filename)83 reportError(XML_Parser parser, const XML_Char *filename)
84 {
85 enum XML_Error code = XML_GetErrorCode(parser);
86 const XML_Char *message = XML_ErrorString(code);
87 if (message)
88 ftprintf(stdout,
89 T("%s")
90 T(":%") T(XML_FMT_INT_MOD) T("u")
91 T(":%") T(XML_FMT_INT_MOD) T("u")
92 T(": %s\n"),
93 filename,
94 XML_GetErrorLineNumber(parser),
95 XML_GetErrorColumnNumber(parser),
96 message);
97 else
98 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
99 }
100
101 /* This implementation will give problems on files larger than INT_MAX. */
102 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)103 processFile(const void *data, size_t size,
104 const XML_Char *filename, void *args)
105 {
106 XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
107 int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
108 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
109 reportError(parser, filename);
110 *retPtr = 0;
111 }
112 else
113 *retPtr = 1;
114 }
115
116 #if defined(_WIN32)
117
118 static int
isAsciiLetter(XML_Char c)119 isAsciiLetter(XML_Char c)
120 {
121 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
122 }
123
124 #endif /* _WIN32 */
125
126 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)127 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
128 XML_Char **toFree)
129 {
130 XML_Char *s;
131 *toFree = 0;
132 if (!base
133 || *systemId == T('/')
134 #if defined(_WIN32)
135 || *systemId == T('\\')
136 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
137 #endif
138 )
139 return systemId;
140 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
141 * sizeof(XML_Char));
142 if (!*toFree)
143 return systemId;
144 tcscpy(*toFree, base);
145 s = *toFree;
146 if (tcsrchr(s, T('/')))
147 s = tcsrchr(s, T('/')) + 1;
148 #if defined(_WIN32)
149 if (tcsrchr(s, T('\\')))
150 s = tcsrchr(s, T('\\')) + 1;
151 #endif
152 tcscpy(s, systemId);
153 return *toFree;
154 }
155
156 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * UNUSED_P (publicId))157 externalEntityRefFilemap(XML_Parser parser,
158 const XML_Char *context,
159 const XML_Char *base,
160 const XML_Char *systemId,
161 const XML_Char *UNUSED_P(publicId))
162 {
163 int result;
164 XML_Char *s;
165 const XML_Char *filename;
166 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
167 int filemapRes;
168 PROCESS_ARGS args;
169 args.retPtr = &result;
170 args.parser = entParser;
171 filename = resolveSystemId(base, systemId, &s);
172 XML_SetBase(entParser, filename);
173 filemapRes = filemap(filename, processFile, &args);
174 switch (filemapRes) {
175 case 0:
176 result = 0;
177 break;
178 case 2:
179 ftprintf(stderr, T("%s: file too large for memory-mapping")
180 T(", switching to streaming\n"), filename);
181 result = processStream(filename, entParser);
182 break;
183 }
184 free(s);
185 XML_ParserFree(entParser);
186 return result;
187 }
188
189 static int
processStream(const XML_Char * filename,XML_Parser parser)190 processStream(const XML_Char *filename, XML_Parser parser)
191 {
192 /* passing NULL for filename means read intput from stdin */
193 int fd = 0; /* 0 is the fileno for stdin */
194
195 if (filename != NULL) {
196 fd = topen(filename, O_BINARY|O_RDONLY);
197 if (fd < 0) {
198 tperror(filename);
199 return 0;
200 }
201 }
202 for (;;) {
203 int nread;
204 char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
205 if (!buf) {
206 if (filename != NULL)
207 close(fd);
208 ftprintf(stderr, T("%s: out of memory\n"),
209 filename != NULL ? filename : T("xmlwf"));
210 return 0;
211 }
212 nread = read(fd, buf, READ_SIZE);
213 if (nread < 0) {
214 tperror(filename != NULL ? filename : T("STDIN"));
215 if (filename != NULL)
216 close(fd);
217 return 0;
218 }
219 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
220 reportError(parser, filename != NULL ? filename : T("STDIN"));
221 if (filename != NULL)
222 close(fd);
223 return 0;
224 }
225 if (nread == 0) {
226 if (filename != NULL)
227 close(fd);
228 break;;
229 }
230 }
231 return 1;
232 }
233
234 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * UNUSED_P (publicId))235 externalEntityRefStream(XML_Parser parser,
236 const XML_Char *context,
237 const XML_Char *base,
238 const XML_Char *systemId,
239 const XML_Char *UNUSED_P(publicId))
240 {
241 XML_Char *s;
242 const XML_Char *filename;
243 int ret;
244 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
245 filename = resolveSystemId(base, systemId, &s);
246 XML_SetBase(entParser, filename);
247 ret = processStream(filename, entParser);
248 free(s);
249 XML_ParserFree(entParser);
250 return ret;
251 }
252
253 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)254 XML_ProcessFile(XML_Parser parser,
255 const XML_Char *filename,
256 unsigned flags)
257 {
258 int result;
259
260 if (!XML_SetBase(parser, filename)) {
261 ftprintf(stderr, T("%s: out of memory"), filename);
262 exit(1);
263 }
264
265 if (flags & XML_EXTERNAL_ENTITIES)
266 XML_SetExternalEntityRefHandler(parser,
267 (flags & XML_MAP_FILE)
268 ? externalEntityRefFilemap
269 : externalEntityRefStream);
270 if (flags & XML_MAP_FILE) {
271 int filemapRes;
272 PROCESS_ARGS args;
273 args.retPtr = &result;
274 args.parser = parser;
275 filemapRes = filemap(filename, processFile, &args);
276 switch (filemapRes) {
277 case 0:
278 result = 0;
279 break;
280 case 2:
281 ftprintf(stderr, T("%s: file too large for memory-mapping")
282 T(", switching to streaming\n"), filename);
283 result = processStream(filename, parser);
284 break;
285 }
286 }
287 else
288 result = processStream(filename, parser);
289 return result;
290 }
291