1 /*
2  * fuzz.c: Common functions for fuzzing.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <sys/stat.h>
11 
12 #include <libxml/hash.h>
13 #include <libxml/parser.h>
14 #include <libxml/parserInternals.h>
15 #include <libxml/tree.h>
16 #include <libxml/xmlIO.h>
17 #include "fuzz.h"
18 
19 typedef struct {
20     const char *data;
21     size_t size;
22 } xmlFuzzEntityInfo;
23 
24 /* Single static instance for now */
25 static struct {
26     /* Original data */
27     const char *data;
28     size_t size;
29 
30     /* Remaining data */
31     const char *ptr;
32     size_t remaining;
33 
34     /* Buffer for unescaped strings */
35     char *outBuf;
36     char *outPtr; /* Free space at end of buffer */
37 
38     xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
39 
40     /* The first entity is the main entity. */
41     const char *mainUrl;
42     xmlFuzzEntityInfo *mainEntity;
43 } fuzzData;
44 
45 /**
46  * xmlFuzzErrorFunc:
47  *
48  * An error function that simply discards all errors.
49  */
50 void
xmlFuzzErrorFunc(void * ctx ATTRIBUTE_UNUSED,const char * msg ATTRIBUTE_UNUSED,...)51 xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
52                  ...) {
53 }
54 
55 /**
56  * xmlFuzzDataInit:
57  *
58  * Initialize fuzz data provider.
59  */
60 void
xmlFuzzDataInit(const char * data,size_t size)61 xmlFuzzDataInit(const char *data, size_t size) {
62     fuzzData.data = data;
63     fuzzData.size = size;
64     fuzzData.ptr = data;
65     fuzzData.remaining = size;
66 
67     fuzzData.outBuf = xmlMalloc(size + 1);
68     fuzzData.outPtr = fuzzData.outBuf;
69 
70     fuzzData.entities = xmlHashCreate(8);
71     fuzzData.mainUrl = NULL;
72     fuzzData.mainEntity = NULL;
73 }
74 
75 /**
76  * xmlFuzzDataFree:
77  *
78  * Cleanup fuzz data provider.
79  */
80 void
xmlFuzzDataCleanup(void)81 xmlFuzzDataCleanup(void) {
82     xmlFree(fuzzData.outBuf);
83     xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator);
84 }
85 
86 /**
87  * xmlFuzzReadInt:
88  * @size:  size of string in bytes
89  *
90  * Read an integer from the fuzz data.
91  */
92 int
xmlFuzzReadInt()93 xmlFuzzReadInt() {
94     int ret;
95 
96     if (fuzzData.remaining < sizeof(int))
97         return(0);
98     memcpy(&ret, fuzzData.ptr, sizeof(int));
99     fuzzData.ptr += sizeof(int);
100     fuzzData.remaining -= sizeof(int);
101 
102     return ret;
103 }
104 
105 /**
106  * xmlFuzzReadRemaining:
107  * @size:  size of string in bytes
108  *
109  * Read remaining bytes from fuzz data.
110  */
111 const char *
xmlFuzzReadRemaining(size_t * size)112 xmlFuzzReadRemaining(size_t *size) {
113     const char *ret = fuzzData.ptr;
114 
115     *size = fuzzData.remaining;
116     fuzzData.ptr += fuzzData.remaining;
117     fuzzData.remaining = 0;
118 
119     return(ret);
120 }
121 
122 /*
123  * xmlFuzzWriteString:
124  * @out:  output file
125  * @str:  string to write
126  *
127  * Write a random-length string to file in a format similar to
128  * FuzzedDataProvider. Backslash followed by newline marks the end of the
129  * string. Two backslashes are used to escape a backslash.
130  */
131 void
xmlFuzzWriteString(FILE * out,const char * str)132 xmlFuzzWriteString(FILE *out, const char *str) {
133     for (; *str; str++) {
134         int c = (unsigned char) *str;
135         putc(c, out);
136         if (c == '\\')
137             putc(c, out);
138     }
139     putc('\\', out);
140     putc('\n', out);
141 }
142 
143 /**
144  * xmlFuzzReadString:
145  * @size:  size of string in bytes
146  *
147  * Read a random-length string from the fuzz data.
148  *
149  * The format is similar to libFuzzer's FuzzedDataProvider but treats
150  * backslash followed by newline as end of string. This makes the fuzz data
151  * more readable. A backslash character is escaped with another backslash.
152  *
153  * Returns a zero-terminated string or NULL if the fuzz data is exhausted.
154  */
155 const char *
xmlFuzzReadString(size_t * size)156 xmlFuzzReadString(size_t *size) {
157     const char *out = fuzzData.outPtr;
158 
159     while (fuzzData.remaining > 0) {
160         int c = *fuzzData.ptr++;
161         fuzzData.remaining--;
162 
163         if ((c == '\\') && (fuzzData.remaining > 0)) {
164             int c2 = *fuzzData.ptr;
165 
166             if (c2 == '\n') {
167                 fuzzData.ptr++;
168                 fuzzData.remaining--;
169                 *size = fuzzData.outPtr - out;
170                 *fuzzData.outPtr++ = '\0';
171                 return(out);
172             }
173             if (c2 == '\\') {
174                 fuzzData.ptr++;
175                 fuzzData.remaining--;
176             }
177         }
178 
179         *fuzzData.outPtr++ = c;
180     }
181 
182     if (fuzzData.outPtr > out) {
183         *size = fuzzData.outPtr - out;
184         *fuzzData.outPtr++ = '\0';
185         return(out);
186     }
187 
188     return(NULL);
189 }
190 
191 /**
192  * xmlFuzzReadEntities:
193  *
194  * Read entities like the main XML file, external DTDs, external parsed
195  * entities from fuzz data.
196  */
197 void
xmlFuzzReadEntities(void)198 xmlFuzzReadEntities(void) {
199     size_t num = 0;
200 
201     while (1) {
202         const char *url, *entity;
203         size_t urlSize, entitySize;
204         xmlFuzzEntityInfo *entityInfo;
205 
206         url = xmlFuzzReadString(&urlSize);
207         if (url == NULL) break;
208 
209         entity = xmlFuzzReadString(&entitySize);
210         if (entity == NULL) break;
211 
212         if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) {
213             entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo));
214             if (entityInfo == NULL)
215                 break;
216             entityInfo->data = entity;
217             entityInfo->size = entitySize;
218 
219             xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo);
220 
221             if (num == 0) {
222                 fuzzData.mainUrl = url;
223                 fuzzData.mainEntity = entityInfo;
224             }
225 
226             num++;
227         }
228     }
229 }
230 
231 /**
232  * xmlFuzzMainUrl:
233  *
234  * Returns the main URL.
235  */
236 const char *
xmlFuzzMainUrl(void)237 xmlFuzzMainUrl(void) {
238     return(fuzzData.mainUrl);
239 }
240 
241 /**
242  * xmlFuzzMainEntity:
243  * @size:  size of the main entity in bytes
244  *
245  * Returns the main entity.
246  */
247 const char *
xmlFuzzMainEntity(size_t * size)248 xmlFuzzMainEntity(size_t *size) {
249     if (fuzzData.mainEntity == NULL)
250         return(NULL);
251     *size = fuzzData.mainEntity->size;
252     return(fuzzData.mainEntity->data);
253 }
254 
255 /**
256  * xmlFuzzEntityLoader:
257  *
258  * The entity loader for fuzz data.
259  */
260 xmlParserInputPtr
xmlFuzzEntityLoader(const char * URL,const char * ID ATTRIBUTE_UNUSED,xmlParserCtxtPtr ctxt)261 xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
262                     xmlParserCtxtPtr ctxt) {
263     xmlParserInputPtr input;
264     xmlFuzzEntityInfo *entity;
265 
266     if (URL == NULL)
267         return(NULL);
268     entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL);
269     if (entity == NULL)
270         return(NULL);
271 
272     input = xmlNewInputStream(ctxt);
273     input->filename = NULL;
274     input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size,
275                                                XML_CHAR_ENCODING_NONE);
276     if (input->buf == NULL) {
277         xmlFreeInputStream(input);
278         return(NULL);
279     }
280     input->base = input->cur = xmlBufContent(input->buf->buffer);
281     input->end = input->base + entity->size;
282 
283     return input;
284 }
285 
286 /**
287  * xmlFuzzExtractStrings:
288  *
289  * Extract C strings from input data. Use exact-size allocations to detect
290  * potential memory errors.
291  */
292 size_t
xmlFuzzExtractStrings(const char * data,size_t size,char ** strings,size_t numStrings)293 xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
294                       size_t numStrings) {
295     const char *start = data;
296     const char *end = data + size;
297     size_t i = 0, ret;
298 
299     while (i < numStrings) {
300         size_t strSize = end - start;
301         const char *zero = memchr(start, 0, strSize);
302 
303         if (zero != NULL)
304             strSize = zero - start;
305 
306         strings[i] = xmlMalloc(strSize + 1);
307         memcpy(strings[i], start, strSize);
308         strings[i][strSize] = '\0';
309 
310         i++;
311         if (zero != NULL)
312             start = zero + 1;
313         else
314             break;
315     }
316 
317     ret = i;
318 
319     while (i < numStrings) {
320         strings[i] = NULL;
321         i++;
322     }
323 
324     return(ret);
325 }
326 
327 char *
xmlSlurpFile(const char * path,size_t * sizeRet)328 xmlSlurpFile(const char *path, size_t *sizeRet) {
329     FILE *file;
330     struct stat statbuf;
331     char *data;
332     size_t size;
333 
334     if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
335         return(NULL);
336     size = statbuf.st_size;
337     file = fopen(path, "rb");
338     if (file == NULL)
339         return(NULL);
340     data = xmlMalloc(size + 1);
341     if (data != NULL) {
342         if (fread(data, 1, size, file) != size) {
343             xmlFree(data);
344             data = NULL;
345         } else {
346             data[size] = 0;
347             if (sizeRet != NULL)
348                 *sizeRet = size;
349         }
350     }
351     fclose(file);
352 
353     return(data);
354 }
355 
356