1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000-2017 Expat development team
11 Licensed under the MIT license:
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to permit
18 persons to whom the Software is furnished to do so, subject to the
19 following conditions:
20
21 The above copyright notice and this permission notice shall be included
22 in all copies or substantial portions of the Software.
23
24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stddef.h>
36 #include <string.h>
37 #include <fcntl.h>
38
39 #ifdef _WIN32
40 # include "winconfig.h"
41 #elif defined(HAVE_EXPAT_CONFIG_H)
42 # include <expat_config.h>
43 #endif /* ndef _WIN32 */
44
45 #include "expat.h"
46 #include "internal.h" /* for UNUSED_P only */
47 #include "xmlfile.h"
48 #include "xmltchar.h"
49 #include "filemap.h"
50
51 #if defined(_MSC_VER)
52 # include <io.h>
53 #endif
54
55 #ifdef HAVE_UNISTD_H
56 # include <unistd.h>
57 #endif
58
59 #ifndef O_BINARY
60 # ifdef _O_BINARY
61 # define O_BINARY _O_BINARY
62 # else
63 # define O_BINARY 0
64 # endif
65 #endif
66
67 #ifdef _DEBUG
68 # define READ_SIZE 16
69 #else
70 # define READ_SIZE (1024 * 8)
71 #endif
72
73 typedef struct {
74 XML_Parser parser;
75 int *retPtr;
76 } PROCESS_ARGS;
77
78 static int processStream(const XML_Char *filename, XML_Parser parser);
79
80 static void
reportError(XML_Parser parser,const XML_Char * filename)81 reportError(XML_Parser parser, const XML_Char *filename) {
82 enum XML_Error code = XML_GetErrorCode(parser);
83 const XML_Char *message = XML_ErrorString(code);
84 if (message)
85 ftprintf(stdout,
86 T("%s") T(":%") T(XML_FMT_INT_MOD) T("u") T(":%")
87 T(XML_FMT_INT_MOD) T("u") T(": %s\n"),
88 filename, XML_GetErrorLineNumber(parser),
89 XML_GetErrorColumnNumber(parser), message);
90 else
91 ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
92 }
93
94 /* This implementation will give problems on files larger than INT_MAX. */
95 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)96 processFile(const void *data, size_t size, const XML_Char *filename,
97 void *args) {
98 XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
99 int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
100 if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
101 reportError(parser, filename);
102 *retPtr = 0;
103 } else
104 *retPtr = 1;
105 }
106
107 #if defined(_WIN32)
108
109 static int
isAsciiLetter(XML_Char c)110 isAsciiLetter(XML_Char c) {
111 return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
112 }
113
114 #endif /* _WIN32 */
115
116 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)117 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
118 XML_Char **toFree) {
119 XML_Char *s;
120 *toFree = 0;
121 if (! base || *systemId == T('/')
122 #if defined(_WIN32)
123 || *systemId == T('\\')
124 || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
125 #endif
126 )
127 return systemId;
128 *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
129 * sizeof(XML_Char));
130 if (! *toFree)
131 return systemId;
132 tcscpy(*toFree, base);
133 s = *toFree;
134 if (tcsrchr(s, T('/')))
135 s = tcsrchr(s, T('/')) + 1;
136 #if defined(_WIN32)
137 if (tcsrchr(s, T('\\')))
138 s = tcsrchr(s, T('\\')) + 1;
139 #endif
140 tcscpy(s, systemId);
141 return *toFree;
142 }
143
144 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)145 externalEntityRefFilemap(XML_Parser parser, const XML_Char *context,
146 const XML_Char *base, const XML_Char *systemId,
147 const XML_Char *publicId) {
148 int result;
149 XML_Char *s;
150 const XML_Char *filename;
151 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
152 int filemapRes;
153 PROCESS_ARGS args;
154 UNUSED_P(publicId);
155 args.retPtr = &result;
156 args.parser = entParser;
157 filename = resolveSystemId(base, systemId, &s);
158 XML_SetBase(entParser, filename);
159 filemapRes = filemap(filename, processFile, &args);
160 switch (filemapRes) {
161 case 0:
162 result = 0;
163 break;
164 case 2:
165 ftprintf(stderr,
166 T("%s: file too large for memory-mapping")
167 T(", switching to streaming\n"),
168 filename);
169 result = processStream(filename, entParser);
170 break;
171 }
172 free(s);
173 XML_ParserFree(entParser);
174 return result;
175 }
176
177 static int
processStream(const XML_Char * filename,XML_Parser parser)178 processStream(const XML_Char *filename, XML_Parser parser) {
179 /* passing NULL for filename means read input from stdin */
180 int fd = 0; /* 0 is the fileno for stdin */
181
182 if (filename != NULL) {
183 fd = topen(filename, O_BINARY | O_RDONLY);
184 if (fd < 0) {
185 tperror(filename);
186 return 0;
187 }
188 }
189 for (;;) {
190 int nread;
191 char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
192 if (! buf) {
193 if (filename != NULL)
194 close(fd);
195 ftprintf(stderr, T("%s: out of memory\n"),
196 filename != NULL ? filename : T("xmlwf"));
197 return 0;
198 }
199 nread = read(fd, buf, READ_SIZE);
200 if (nread < 0) {
201 tperror(filename != NULL ? filename : T("STDIN"));
202 if (filename != NULL)
203 close(fd);
204 return 0;
205 }
206 if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
207 reportError(parser, filename != NULL ? filename : T("STDIN"));
208 if (filename != NULL)
209 close(fd);
210 return 0;
211 }
212 if (nread == 0) {
213 if (filename != NULL)
214 close(fd);
215 break;
216 ;
217 }
218 }
219 return 1;
220 }
221
222 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)223 externalEntityRefStream(XML_Parser parser, const XML_Char *context,
224 const XML_Char *base, const XML_Char *systemId,
225 const XML_Char *publicId) {
226 XML_Char *s;
227 const XML_Char *filename;
228 int ret;
229 XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
230 UNUSED_P(publicId);
231 filename = resolveSystemId(base, systemId, &s);
232 XML_SetBase(entParser, filename);
233 ret = processStream(filename, entParser);
234 free(s);
235 XML_ParserFree(entParser);
236 return ret;
237 }
238
239 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)240 XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) {
241 int result;
242
243 if (! XML_SetBase(parser, filename)) {
244 ftprintf(stderr, T("%s: out of memory"), filename);
245 exit(1);
246 }
247
248 if (flags & XML_EXTERNAL_ENTITIES)
249 XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE)
250 ? externalEntityRefFilemap
251 : externalEntityRefStream);
252 if (flags & XML_MAP_FILE) {
253 int filemapRes;
254 PROCESS_ARGS args;
255 args.retPtr = &result;
256 args.parser = parser;
257 filemapRes = filemap(filename, processFile, &args);
258 switch (filemapRes) {
259 case 0:
260 result = 0;
261 break;
262 case 2:
263 ftprintf(stderr,
264 T("%s: file too large for memory-mapping")
265 T(", switching to streaming\n"),
266 filename);
267 result = processStream(filename, parser);
268 break;
269 }
270 } else
271 result = processStream(filename, parser);
272 return result;
273 }
274