1 /*
2                             __  __            _
3                          ___\ \/ /_ __   __ _| |_
4                         / _ \\  /| '_ \ / _` | __|
5                        |  __//  \| |_) | (_| | |_
6                         \___/_/\_\ .__/ \__,_|\__|
7                                  |_| XML parser
8 
9    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10    Copyright (c) 2000-2017 Expat development team
11    Licensed under the MIT license:
12 
13    Permission is  hereby granted,  free of charge,  to any  person obtaining
14    a  copy  of  this  software   and  associated  documentation  files  (the
15    "Software"),  to  deal in  the  Software  without restriction,  including
16    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17    distribute, sublicense, and/or sell copies of the Software, and to permit
18    persons  to whom  the Software  is  furnished to  do so,  subject to  the
19    following conditions:
20 
21    The above copyright  notice and this permission notice  shall be included
22    in all copies or substantial portions of the Software.
23 
24    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30    USE OR OTHER DEALINGS IN THE SOFTWARE.
31 */
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stddef.h>
36 #include <string.h>
37 #include <fcntl.h>
38 
39 #ifdef _WIN32
40 #include "winconfig.h"
41 #elif defined(HAVE_EXPAT_CONFIG_H)
42 #include <expat_config.h>
43 #endif /* ndef _WIN32 */
44 
45 #include "expat.h"
46 #include "internal.h"  /* for UNUSED_P only */
47 #include "xmlfile.h"
48 #include "xmltchar.h"
49 #include "filemap.h"
50 
51 #if defined(_MSC_VER)
52 #include <io.h>
53 #endif
54 
55 #ifdef HAVE_UNISTD_H
56 #include <unistd.h>
57 #endif
58 
59 #ifndef O_BINARY
60 #ifdef _O_BINARY
61 #define O_BINARY _O_BINARY
62 #else
63 #define O_BINARY 0
64 #endif
65 #endif
66 
67 #ifdef _DEBUG
68 #define READ_SIZE 16
69 #else
70 #define READ_SIZE (1024*8)
71 #endif
72 
73 
74 typedef struct {
75   XML_Parser parser;
76   int *retPtr;
77 } PROCESS_ARGS;
78 
79 static int
80 processStream(const XML_Char *filename, XML_Parser parser);
81 
82 static void
reportError(XML_Parser parser,const XML_Char * filename)83 reportError(XML_Parser parser, const XML_Char *filename)
84 {
85   enum XML_Error code = XML_GetErrorCode(parser);
86   const XML_Char *message = XML_ErrorString(code);
87   if (message)
88     ftprintf(stdout,
89              T("%s")
90                T(":%") T(XML_FMT_INT_MOD) T("u")
91                T(":%") T(XML_FMT_INT_MOD) T("u")
92                T(": %s\n"),
93              filename,
94              XML_GetErrorLineNumber(parser),
95              XML_GetErrorColumnNumber(parser),
96              message);
97   else
98     ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
99 }
100 
101 /* This implementation will give problems on files larger than INT_MAX. */
102 static void
processFile(const void * data,size_t size,const XML_Char * filename,void * args)103 processFile(const void *data, size_t size,
104             const XML_Char *filename, void *args)
105 {
106   XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
107   int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
108   if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
109     reportError(parser, filename);
110     *retPtr = 0;
111   }
112   else
113     *retPtr = 1;
114 }
115 
116 #if defined(_WIN32)
117 
118 static int
isAsciiLetter(XML_Char c)119 isAsciiLetter(XML_Char c)
120 {
121   return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
122 }
123 
124 #endif /* _WIN32 */
125 
126 static const XML_Char *
resolveSystemId(const XML_Char * base,const XML_Char * systemId,XML_Char ** toFree)127 resolveSystemId(const XML_Char *base, const XML_Char *systemId,
128                 XML_Char **toFree)
129 {
130   XML_Char *s;
131   *toFree = 0;
132   if (!base
133       || *systemId == T('/')
134 #if defined(_WIN32)
135       || *systemId == T('\\')
136       || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
137 #endif
138      )
139     return systemId;
140   *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
141                                * sizeof(XML_Char));
142   if (!*toFree)
143     return systemId;
144   tcscpy(*toFree, base);
145   s = *toFree;
146   if (tcsrchr(s, T('/')))
147     s = tcsrchr(s, T('/')) + 1;
148 #if defined(_WIN32)
149   if (tcsrchr(s, T('\\')))
150     s = tcsrchr(s, T('\\')) + 1;
151 #endif
152   tcscpy(s, systemId);
153   return *toFree;
154 }
155 
156 static int
externalEntityRefFilemap(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * UNUSED_P (publicId))157 externalEntityRefFilemap(XML_Parser parser,
158                          const XML_Char *context,
159                          const XML_Char *base,
160                          const XML_Char *systemId,
161                          const XML_Char *UNUSED_P(publicId))
162 {
163   int result;
164   XML_Char *s;
165   const XML_Char *filename;
166   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
167   int filemapRes;
168   PROCESS_ARGS args;
169   args.retPtr = &result;
170   args.parser = entParser;
171   filename = resolveSystemId(base, systemId, &s);
172   XML_SetBase(entParser, filename);
173   filemapRes = filemap(filename, processFile, &args);
174   switch (filemapRes) {
175   case 0:
176     result = 0;
177     break;
178   case 2:
179     ftprintf(stderr, T("%s: file too large for memory-mapping")
180         T(", switching to streaming\n"), filename);
181     result = processStream(filename, entParser);
182     break;
183   }
184   free(s);
185   XML_ParserFree(entParser);
186   return result;
187 }
188 
189 static int
processStream(const XML_Char * filename,XML_Parser parser)190 processStream(const XML_Char *filename, XML_Parser parser)
191 {
192   /* passing NULL for filename means read intput from stdin */
193   int fd = 0;   /* 0 is the fileno for stdin */
194 
195   if (filename != NULL) {
196     fd = topen(filename, O_BINARY|O_RDONLY);
197     if (fd < 0) {
198       tperror(filename);
199       return 0;
200     }
201   }
202   for (;;) {
203     int nread;
204     char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
205     if (!buf) {
206       if (filename != NULL)
207         close(fd);
208       ftprintf(stderr, T("%s: out of memory\n"),
209                filename != NULL ? filename : T("xmlwf"));
210       return 0;
211     }
212     nread = read(fd, buf, READ_SIZE);
213     if (nread < 0) {
214       tperror(filename != NULL ? filename : T("STDIN"));
215       if (filename != NULL)
216         close(fd);
217       return 0;
218     }
219     if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
220         reportError(parser, filename != NULL ? filename : T("STDIN"));
221       if (filename != NULL)
222         close(fd);
223       return 0;
224     }
225     if (nread == 0) {
226       if (filename != NULL)
227         close(fd);
228       break;;
229     }
230   }
231   return 1;
232 }
233 
234 static int
externalEntityRefStream(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * UNUSED_P (publicId))235 externalEntityRefStream(XML_Parser parser,
236                         const XML_Char *context,
237                         const XML_Char *base,
238                         const XML_Char *systemId,
239                         const XML_Char *UNUSED_P(publicId))
240 {
241   XML_Char *s;
242   const XML_Char *filename;
243   int ret;
244   XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
245   filename = resolveSystemId(base, systemId, &s);
246   XML_SetBase(entParser, filename);
247   ret = processStream(filename, entParser);
248   free(s);
249   XML_ParserFree(entParser);
250   return ret;
251 }
252 
253 int
XML_ProcessFile(XML_Parser parser,const XML_Char * filename,unsigned flags)254 XML_ProcessFile(XML_Parser parser,
255                 const XML_Char *filename,
256                 unsigned flags)
257 {
258   int result;
259 
260   if (!XML_SetBase(parser, filename)) {
261     ftprintf(stderr, T("%s: out of memory"), filename);
262     exit(1);
263   }
264 
265   if (flags & XML_EXTERNAL_ENTITIES)
266       XML_SetExternalEntityRefHandler(parser,
267                                       (flags & XML_MAP_FILE)
268                                       ? externalEntityRefFilemap
269                                       : externalEntityRefStream);
270   if (flags & XML_MAP_FILE) {
271     int filemapRes;
272     PROCESS_ARGS args;
273     args.retPtr = &result;
274     args.parser = parser;
275     filemapRes = filemap(filename, processFile, &args);
276     switch (filemapRes) {
277     case 0:
278       result = 0;
279       break;
280     case 2:
281       ftprintf(stderr, T("%s: file too large for memory-mapping")
282           T(", switching to streaming\n"), filename);
283       result = processStream(filename, parser);
284       break;
285     }
286   }
287   else
288     result = processStream(filename, parser);
289   return result;
290 }
291