1 /*
2  *
3  * honggfuzz - file operations
4  * -----------------------------------------
5  *
6  * Author: Robert Swiecki <swiecki@google.com>
7  *
8  * Copyright 2010-2018 by Google Inc. All Rights Reserved.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License"); you may
11  * not use this file except in compliance with the License. You may obtain
12  * a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
19  * implied. See the License for the specific language governing
20  * permissions and limitations under the License.
21  *
22  */
23 
24 #include "input.h"
25 
26 #include <dirent.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <sys/stat.h>
37 #include <sys/types.h>
38 #include <unistd.h>
39 
40 #include "libhfcommon/common.h"
41 #include "libhfcommon/files.h"
42 #include "mangle.h"
43 #include "subproc.h"
44 
45 #if defined(_HF_ARCH_LINUX)
46 #include <sys/syscall.h>
47 #if defined(__NR_memfd_create)
48 #include <linux/memfd.h>
49 #endif /* defined(__NR_memfd_create) */
50 #endif /* defined(_HF_ARCH_LINUX) */
51 
52 #include "libhfcommon/log.h"
53 #include "libhfcommon/util.h"
54 
input_setSize(run_t * run,size_t sz)55 void input_setSize(run_t* run, size_t sz) {
56     if (sz > run->global->mutate.maxFileSz) {
57         PLOG_F("Too large size requested: %zu > maxSize: %zu", sz, run->global->mutate.maxFileSz);
58     }
59     /* ftruncate of a mmaped file fails under CygWin */
60 #if !defined(__CYGWIN__)
61     /* ftruncate for each change of a dynamic file size might be expensive though */
62     if (TEMP_FAILURE_RETRY(ftruncate(run->dynamicFileFd, sz)) == -1) {
63         PLOG_W("ftruncate(run->dynamicFileFd=%d, sz=%zu)", run->dynamicFileFd, sz);
64     }
65 #endif /* !defined(__CYGWIN__) */
66     run->dynamicFileSz = sz;
67 }
68 
input_getDirStatsAndRewind(honggfuzz_t * hfuzz)69 static bool input_getDirStatsAndRewind(honggfuzz_t* hfuzz) {
70     rewinddir(hfuzz->io.inputDirPtr);
71 
72     size_t maxSize = 0U;
73     size_t fileCnt = 0U;
74     for (;;) {
75         errno = 0;
76         struct dirent* entry = readdir(hfuzz->io.inputDirPtr);
77         if (entry == NULL && errno == EINTR) {
78             continue;
79         }
80         if (entry == NULL && errno != 0) {
81             PLOG_W("readdir('%s')", hfuzz->io.inputDir);
82             return false;
83         }
84         if (entry == NULL) {
85             break;
86         }
87 
88         char fname[PATH_MAX];
89         snprintf(fname, sizeof(fname), "%s/%s", hfuzz->io.inputDir, entry->d_name);
90         LOG_D("Analyzing file '%s'", fname);
91 
92         struct stat st;
93         if (stat(fname, &st) == -1) {
94             LOG_W("Couldn't stat() the '%s' file", fname);
95             continue;
96         }
97         if (!S_ISREG(st.st_mode)) {
98             LOG_D("'%s' is not a regular file, skipping", fname);
99             continue;
100         }
101         if (hfuzz->mutate.maxFileSz != 0UL && st.st_size > (off_t)hfuzz->mutate.maxFileSz) {
102             LOG_D("File '%s' is bigger than maximal defined file size (-F): %" PRId64 " > %" PRId64,
103                 fname, (int64_t)st.st_size, (int64_t)hfuzz->mutate.maxFileSz);
104         }
105         if ((size_t)st.st_size > maxSize) {
106             maxSize = st.st_size;
107         }
108         fileCnt++;
109     }
110 
111     ATOMIC_SET(hfuzz->io.fileCnt, fileCnt);
112     if (hfuzz->mutate.maxFileSz == 0U) {
113         if (maxSize < 8192) {
114             hfuzz->mutate.maxFileSz = 8192;
115         } else if (maxSize > _HF_INPUT_MAX_SIZE) {
116             hfuzz->mutate.maxFileSz = _HF_INPUT_MAX_SIZE;
117         } else {
118             hfuzz->mutate.maxFileSz = maxSize;
119         }
120     }
121 
122     if (hfuzz->io.fileCnt == 0U) {
123         LOG_W("No usable files in the input directory '%s'", hfuzz->io.inputDir);
124     }
125 
126     LOG_D("Re-read the '%s', maxFileSz:%zu, number of usable files:%zu", hfuzz->io.inputDir,
127         hfuzz->mutate.maxFileSz, hfuzz->io.fileCnt);
128 
129     rewinddir(hfuzz->io.inputDirPtr);
130 
131     return true;
132 }
133 
input_getNext(run_t * run,char * fname,bool rewind)134 bool input_getNext(run_t* run, char* fname, bool rewind) {
135     static pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER;
136     MX_SCOPED_LOCK(&input_mutex);
137 
138     if (run->global->io.fileCnt == 0U) {
139         LOG_W("No useful files in the input directory");
140         return false;
141     }
142 
143     for (;;) {
144         errno = 0;
145         struct dirent* entry = readdir(run->global->io.inputDirPtr);
146         if (entry == NULL && errno == EINTR) {
147             continue;
148         }
149         if (entry == NULL && errno != 0) {
150             PLOG_W("readdir_r('%s')", run->global->io.inputDir);
151             return false;
152         }
153         if (entry == NULL && rewind == false) {
154             return false;
155         }
156         if (entry == NULL && rewind == true) {
157             if (input_getDirStatsAndRewind(run->global) == false) {
158                 LOG_E("input_getDirStatsAndRewind('%s')", run->global->io.inputDir);
159                 return false;
160             }
161             continue;
162         }
163 
164         snprintf(fname, PATH_MAX, "%s/%s", run->global->io.inputDir, entry->d_name);
165 
166         struct stat st;
167         if (stat(fname, &st) == -1) {
168             LOG_W("Couldn't stat() the '%s' file", fname);
169             continue;
170         }
171         if (!S_ISREG(st.st_mode)) {
172             LOG_D("'%s' is not a regular file, skipping", fname);
173             continue;
174         }
175         return true;
176     }
177 }
178 
input_init(honggfuzz_t * hfuzz)179 bool input_init(honggfuzz_t* hfuzz) {
180     hfuzz->io.fileCnt = 0U;
181 
182     if (!hfuzz->io.inputDir) {
183         LOG_W("No input file/dir specified");
184         return false;
185     }
186 
187     int dir_fd = open(hfuzz->io.inputDir, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
188     if (dir_fd == -1) {
189         PLOG_W("open('%s', O_DIRECTORY|O_RDONLY|O_CLOEXEC)", hfuzz->io.inputDir);
190         return false;
191     }
192     if ((hfuzz->io.inputDirPtr = fdopendir(dir_fd)) == NULL) {
193         close(dir_fd);
194         PLOG_W("opendir('%s')", hfuzz->io.inputDir);
195         return false;
196     }
197     if (input_getDirStatsAndRewind(hfuzz) == false) {
198         hfuzz->io.fileCnt = 0U;
199         LOG_W("input_getDirStatsAndRewind('%s')", hfuzz->io.inputDir);
200         return false;
201     }
202 
203     return true;
204 }
205 
input_parseDictionary(honggfuzz_t * hfuzz)206 bool input_parseDictionary(honggfuzz_t* hfuzz) {
207     FILE* fDict = fopen(hfuzz->mutate.dictionaryFile, "rb");
208     if (fDict == NULL) {
209         PLOG_W("Couldn't open '%s' - R/O mode", hfuzz->mutate.dictionaryFile);
210         return false;
211     }
212     defer {
213         fclose(fDict);
214     };
215 
216     char* lineptr = NULL;
217     size_t n = 0;
218     defer {
219         free(lineptr);
220     };
221     for (;;) {
222         ssize_t len = getdelim(&lineptr, &n, '\n', fDict);
223         if (len == -1) {
224             break;
225         }
226         if (len > 1 && lineptr[len - 1] == '\n') {
227             lineptr[len - 1] = '\0';
228             len--;
229         }
230         if (lineptr[0] == '#') {
231             continue;
232         }
233         if (lineptr[0] == '\n') {
234             continue;
235         }
236         if (lineptr[0] == '\0') {
237             continue;
238         }
239         char bufn[1025] = {};
240         char bufv[1025] = {};
241         if (sscanf(lineptr, "\"%1024s", bufv) != 1 &&
242             sscanf(lineptr, "%1024[^=]=\"%1024s", bufn, bufv) != 2) {
243             LOG_W("Incorrect dictionary entry: '%s'. Skipping", lineptr);
244             continue;
245         }
246 
247         LOG_D("Parsing word: '%s'", bufv);
248 
249         char* s = util_StrDup(bufv);
250         struct strings_t* str = (struct strings_t*)util_Malloc(sizeof(struct strings_t));
251         str->len = util_decodeCString(s);
252         str->s = s;
253         hfuzz->mutate.dictionaryCnt += 1;
254         TAILQ_INSERT_TAIL(&hfuzz->mutate.dictq, str, pointers);
255 
256         LOG_D("Dictionary: loaded word: '%s' (len=%zu)", str->s, str->len);
257     }
258     LOG_I("Loaded %zu words from the dictionary", hfuzz->mutate.dictionaryCnt);
259     return true;
260 }
261 
input_parseBlacklist(honggfuzz_t * hfuzz)262 bool input_parseBlacklist(honggfuzz_t* hfuzz) {
263     FILE* fBl = fopen(hfuzz->feedback.blacklistFile, "rb");
264     if (fBl == NULL) {
265         PLOG_W("Couldn't open '%s' - R/O mode", hfuzz->feedback.blacklistFile);
266         return false;
267     }
268     defer {
269         fclose(fBl);
270     };
271 
272     char* lineptr = NULL;
273     /* lineptr can be NULL, but it's fine for free() */
274     defer {
275         free(lineptr);
276     };
277     size_t n = 0;
278     for (;;) {
279         if (getline(&lineptr, &n, fBl) == -1) {
280             break;
281         }
282 
283         if ((hfuzz->feedback.blacklist = util_Realloc(hfuzz->feedback.blacklist,
284                  (hfuzz->feedback.blacklistCnt + 1) * sizeof(hfuzz->feedback.blacklist[0]))) ==
285             NULL) {
286             PLOG_W("realloc failed (sz=%zu)",
287                 (hfuzz->feedback.blacklistCnt + 1) * sizeof(hfuzz->feedback.blacklist[0]));
288             return false;
289         }
290 
291         hfuzz->feedback.blacklist[hfuzz->feedback.blacklistCnt] = strtoull(lineptr, 0, 16);
292         LOG_D("Blacklist: loaded %'" PRIu64 "'",
293             hfuzz->feedback.blacklist[hfuzz->feedback.blacklistCnt]);
294 
295         /* Verify entries are sorted so we can use interpolation search */
296         if (hfuzz->feedback.blacklistCnt > 1) {
297             if (hfuzz->feedback.blacklist[hfuzz->feedback.blacklistCnt - 1] >
298                 hfuzz->feedback.blacklist[hfuzz->feedback.blacklistCnt]) {
299                 LOG_F("Blacklist file not sorted. Use 'tools/createStackBlacklist.sh' to sort "
300                       "records");
301                 return false;
302             }
303         }
304         hfuzz->feedback.blacklistCnt += 1;
305     }
306 
307     if (hfuzz->feedback.blacklistCnt > 0) {
308         LOG_I("Loaded %zu stack hash(es) from the blacklist file", hfuzz->feedback.blacklistCnt);
309     } else {
310         LOG_F("Empty stack hashes blacklist file '%s'", hfuzz->feedback.blacklistFile);
311     }
312     return true;
313 }
314 
input_prepareDynamicInput(run_t * run)315 bool input_prepareDynamicInput(run_t* run) {
316     {
317         MX_SCOPED_RWLOCK_READ(&run->global->io.dynfileq_mutex);
318 
319         if (run->global->io.dynfileqCnt == 0) {
320             LOG_F("The dynamic file corpus is empty. This shouldn't happen");
321         }
322 
323         if (run->dynfileqCurrent == NULL) {
324             run->dynfileqCurrent = TAILQ_FIRST(&run->global->io.dynfileq);
325         } else {
326             if (run->dynfileqCurrent == TAILQ_LAST(&run->global->io.dynfileq, dyns_t)) {
327                 run->dynfileqCurrent = TAILQ_FIRST(&run->global->io.dynfileq);
328             } else {
329                 run->dynfileqCurrent = TAILQ_NEXT(run->dynfileqCurrent, pointers);
330             }
331         }
332     }
333 
334     input_setSize(run, run->dynfileqCurrent->size);
335     memcpy(run->dynamicFile, run->dynfileqCurrent->data, run->dynfileqCurrent->size);
336     mangle_mangleContent(run);
337 
338     return true;
339 }
340 
input_prepareStaticFile(run_t * run,bool rewind)341 bool input_prepareStaticFile(run_t* run, bool rewind) {
342     char fname[PATH_MAX];
343     if (!input_getNext(run, fname, /* rewind= */ rewind)) {
344         return false;
345     }
346     snprintf(run->origFileName, sizeof(run->origFileName), "%s", fname);
347 
348     input_setSize(run, run->global->mutate.maxFileSz);
349     ssize_t fileSz = files_readFileToBufMax(fname, run->dynamicFile, run->global->mutate.maxFileSz);
350     if (fileSz < 0) {
351         LOG_E("Couldn't read contents of '%s'", fname);
352         return false;
353     }
354 
355     input_setSize(run, fileSz);
356     mangle_mangleContent(run);
357 
358     return true;
359 }
360 
input_prepareExternalFile(run_t * run)361 bool input_prepareExternalFile(run_t* run) {
362     snprintf(run->origFileName, sizeof(run->origFileName), "[EXTERNAL]");
363 
364     int fd = files_writeBufToTmpFile(run->global->io.workDir, (const uint8_t*)"", 0, 0);
365     if (fd == -1) {
366         LOG_E("Couldn't write input file to a temporary buffer");
367         return false;
368     }
369     defer {
370         close(fd);
371     };
372 
373     char fname[PATH_MAX];
374     snprintf(fname, sizeof(fname), "/dev/fd/%d", fd);
375 
376     const char* const argv[] = {run->global->exe.externalCommand, fname, NULL};
377     if (subproc_System(run, argv) != 0) {
378         LOG_E("Subprocess '%s' returned abnormally", run->global->exe.externalCommand);
379         return false;
380     }
381     LOG_D("Subporcess '%s' finished with success", run->global->exe.externalCommand);
382 
383     input_setSize(run, run->global->mutate.maxFileSz);
384     ssize_t sz = files_readFromFdSeek(fd, run->dynamicFile, run->global->mutate.maxFileSz, 0);
385     if (sz == -1) {
386         LOG_E("Couldn't read file from fd=%d", fd);
387         return false;
388     }
389 
390     input_setSize(run, (size_t)sz);
391     return true;
392 }
393 
input_postProcessFile(run_t * run)394 bool input_postProcessFile(run_t* run) {
395     int fd =
396         files_writeBufToTmpFile(run->global->io.workDir, run->dynamicFile, run->dynamicFileSz, 0);
397     if (fd == -1) {
398         LOG_E("Couldn't write input file to a temporary buffer");
399         return false;
400     }
401     defer {
402         close(fd);
403     };
404 
405     char fname[PATH_MAX];
406     snprintf(fname, sizeof(fname), "/dev/fd/%d", fd);
407 
408     const char* const argv[] = {run->global->exe.postExternalCommand, fname, NULL};
409     if (subproc_System(run, argv) != 0) {
410         LOG_E("Subprocess '%s' returned abnormally", run->global->exe.postExternalCommand);
411         return false;
412     }
413     LOG_D("Subporcess '%s' finished with success", run->global->exe.externalCommand);
414 
415     input_setSize(run, run->global->mutate.maxFileSz);
416     ssize_t sz = files_readFromFdSeek(fd, run->dynamicFile, run->global->mutate.maxFileSz, 0);
417     if (sz == -1) {
418         LOG_E("Couldn't read file from fd=%d", fd);
419         return false;
420     }
421 
422     input_setSize(run, (size_t)sz);
423     return true;
424 }
425