1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2007,2008,2012,2013 Petr Machata, Red Hat Inc.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18  * 02110-1301 USA
19  */
20 
21 #include <sys/types.h>
22 #include <regex.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <assert.h>
26 
27 static ssize_t
match_character_class(const char * glob,size_t length,size_t from)28 match_character_class(const char *glob, size_t length, size_t from)
29 {
30 	assert(length > 0);
31 	const char *colon = memchr(glob + from + 2, ':', length - 1);
32 	if (colon == NULL || colon[1] != ']')
33 		return -1;
34 	return colon - glob;
35 }
36 
37 static ssize_t
match_brack(const char * glob,size_t length,size_t from,int * exclmp)38 match_brack(const char *glob, size_t length, size_t from, int *exclmp)
39 {
40 	size_t i = from + 1;
41 
42 	if (i >= length)
43 		return -1;
44 
45 	/* Complement operator.  */
46 	*exclmp = 0;
47 	if (glob[i] == '^' || glob[i] == '!') {
48 		*exclmp = glob[i++] == '!';
49 		if (i >= length)
50 			return -1;
51 	}
52 
53 	/* On first character, both [ and ] are legal.  But when [ is
54 	 * followed with :, it's character class.  */
55 	if (glob[i] == '[' && glob[i + 1] == ':') {
56 		ssize_t j = match_character_class(glob, length, i);
57 		if (j < 0)
58 		fail:
59 			return -1;
60 		i = j;
61 	}
62 	++i; /* skip any character, including [ or ]  */
63 
64 	for (; i < length; ++i) {
65 		char c = glob[i];
66 		if (c == '[' && glob[i + 1] == ':') {
67 			ssize_t j = match_character_class(glob, length, i);
68 			if (j < 0)
69 				goto fail;
70 			i = j;
71 
72 		} else if (c == ']') {
73 			return i;
74 		}
75 	}
76 	return -1;
77 }
78 
79 static int
append(char ** bufp,const char * str,size_t str_size,size_t * sizep,size_t * allocp)80 append(char **bufp, const char *str, size_t str_size,
81        size_t *sizep, size_t *allocp)
82 {
83 	if (str_size == 0)
84 		str_size = strlen(str);
85 	size_t nsize = *sizep + str_size;
86 	if (nsize > *allocp) {
87 		size_t nalloc = nsize * 2;
88 		char *nbuf = realloc(*bufp, nalloc);
89 		if (nbuf == NULL)
90 			return -1;
91 		*allocp = nalloc;
92 		*bufp = nbuf;
93 	}
94 
95 	memcpy(*bufp + *sizep, str, str_size);
96 	*sizep = nsize;
97 	return 0;
98 }
99 
100 static int
glob_to_regex(const char * glob,char ** retp)101 glob_to_regex(const char *glob, char **retp)
102 {
103 	size_t allocd = 0;
104 	size_t size = 0;
105 	char *buf = NULL;
106 
107 	size_t length = strlen(glob);
108 	int escape = 0;
109 	size_t i;
110 	for(i = 0; i < length; ++i) {
111 		char c = glob[i];
112 		if (escape) {
113 			if (c == '\\') {
114 				if (append(&buf, "\\\\", 0,
115 					   &size, &allocd) < 0) {
116 				fail:
117 					free(buf);
118 					return REG_ESPACE;
119 				}
120 
121 			} else if (c == '*') {
122 				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
123 					goto fail;
124 			} else if (c == '?') {
125 				if (append(&buf, "?", 0, &size, &allocd) < 0)
126 					goto fail;
127 			} else if (append(&buf, (char[]){ '\\', c }, 2,
128 					  &size, &allocd) < 0)
129 				goto fail;
130 			escape = 0;
131 		} else {
132 			if (c == '\\')
133 				escape = 1;
134 			else if (c == '[') {
135 				int exclm;
136 				ssize_t j = match_brack(glob, length, i, &exclm);
137 				if (j < 0) {
138 					free(buf);
139 					return REG_EBRACK;
140 				}
141 				if (exclm
142 				    && append(&buf, "[^", 2,
143 					      &size, &allocd) < 0)
144 					goto fail;
145 				if (append(&buf, glob + i + 2*exclm,
146 					   j - i + 1 - 2*exclm,
147 					   &size, &allocd) < 0)
148 					goto fail;
149 				i = j;
150 
151 			} else if (c == '*') {
152 				if (append(&buf, ".*", 0, &size, &allocd) < 0)
153 					goto fail;
154 			} else if (c == '?') {
155 				if (append(&buf, ".", 0, &size, &allocd) < 0)
156 					goto fail;
157 			} else if (c == '.') {
158 				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
159 					goto fail;
160 			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
161 				goto fail;
162 		}
163 	}
164 
165 	if (escape) {
166 		free(buf);
167 		return REG_EESCAPE;
168 	}
169 
170 	{
171 		char c = 0;
172 		if (append(&buf, &c, 1, &size, &allocd) < 0)
173 			goto fail;
174 	}
175 	*retp = buf;
176 	return 0;
177 }
178 
179 int
globcomp(regex_t * preg,const char * glob,int cflags)180 globcomp(regex_t *preg, const char *glob, int cflags)
181 {
182 	char *regex = NULL;
183 	int status = glob_to_regex(glob, &regex);
184 	if (status != 0)
185 		return status;
186 	assert(regex != NULL);
187 	status = regcomp(preg, regex, cflags);
188 	free(regex);
189 	return status;
190 }
191 
192 #ifdef TEST
193 #include <stdio.h>
194 
195 static void
translate(const char * glob,int exp_status,const char * expect)196 translate(const char *glob, int exp_status, const char *expect)
197 {
198 	char *pattern = NULL;
199 	int status = glob_to_regex(glob, &pattern);
200 	if (status != exp_status) {
201 		fprintf(stderr, "translating %s, expected status %d, got %d\n",
202 			glob, exp_status, status);
203 		return;
204 	}
205 
206 	if (status == 0) {
207 		assert(pattern != NULL);
208 		if (strcmp(pattern, expect) != 0)
209 			fprintf(stderr, "translating %s, expected %s, got %s\n",
210 				glob, expect, pattern);
211 		free(pattern);
212 	} else {
213 		assert(pattern == NULL);
214 	}
215 }
216 
217 static void
try_match(const char * glob,const char * str,int expect)218 try_match(const char *glob, const char *str, int expect)
219 {
220 	regex_t preg;
221 	int status = globcomp(&preg, glob, 0);
222 	assert(status == 0);
223 	status = regexec(&preg, str, 0, NULL, 0);
224 	assert(status == expect);
225 	regfree(&preg);
226 }
227 
228 int
main(void)229 main(void)
230 {
231         translate("*", 0, ".*");
232         translate("?", 0, ".");
233         translate(".*", 0, "\\..*");
234         translate("*.*", 0, ".*\\..*");
235         translate("*a*", 0, ".*a.*");
236         translate("[abc]", 0, "[abc]");
237         translate("[^abc]", 0, "[^abc]");
238         translate("[!abc]", 0, "[^abc]");
239         translate("[]]", 0, "[]]");
240         translate("[[]", 0, "[[]");
241         translate("[^]]", 0, "[^]]");
242         translate("[^a-z]", 0, "[^a-z]");
243         translate("[abc\\]]", 0, "[abc\\]]");
244         translate("[abc\\]def]", 0, "[abc\\]def]");
245         translate("[[:space:]]", 0, "[[:space:]]");
246         translate("[^[:space:]]", 0, "[^[:space:]]");
247         translate("[![:space:]]", 0, "[^[:space:]]");
248         translate("[^a-z]*", 0, "[^a-z].*");
249         translate("[^a-z]bar*", 0, "[^a-z]bar.*");
250 	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
251 		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
252 		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
253 
254         translate("\\", REG_EESCAPE, NULL);
255         translate("[^[:naotuh\\", REG_EBRACK, NULL);
256         translate("[^[:", REG_EBRACK, NULL);
257         translate("[^[", REG_EBRACK, NULL);
258         translate("[^", REG_EBRACK, NULL);
259         translate("[\\", REG_EBRACK, NULL);
260         translate("[", REG_EBRACK, NULL);
261         translate("abc[", REG_EBRACK, NULL);
262 
263 	try_match("abc*def", "abc012def", 0);
264 	try_match("abc*def", "ab012def", REG_NOMATCH);
265 	try_match("[abc]*def", "a1def", 0);
266 	try_match("[abc]*def", "b1def", 0);
267 	try_match("[abc]*def", "d1def", REG_NOMATCH);
268 
269 	return 0;
270 }
271 
272 #endif
273