1 #include <assert.h>
2 #include <pthread.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <string.h>
6 
7 #include "regex.h"
8 #include "label_file.h"
9 #include "selinux_internal.h"
10 
11 #ifdef USE_PCRE2
12 #define REGEX_ARCH_SIZE_T PCRE2_SIZE
13 #else
14 #define REGEX_ARCH_SIZE_T size_t
15 #endif
16 
17 #ifndef __BYTE_ORDER__
18 
19 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C
20  * library <endian.h> header definitions. */
21 #include <endian.h>
22 #ifndef __BYTE_ORDER
23 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
24 #endif
25 
26 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
27 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN
28 #define __BYTE_ORDER__ __BYTE_ORDER
29 
30 #endif
31 
32 #ifdef USE_PCRE2
regex_arch_string(void)33 char const *regex_arch_string(void)
34 {
35 	static char arch_string_buffer[32];
36 	static char const *arch_string = "";
37 	char const *endianness = NULL;
38 	int rc;
39 
40 	if (arch_string[0] == '\0') {
41 		if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
42 			endianness = "el";
43 		else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
44 			endianness = "eb";
45 
46 		if (!endianness)
47 			return NULL;
48 
49 		rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
50 				"%zu-%zu-%s", sizeof(void *),
51 				sizeof(REGEX_ARCH_SIZE_T),
52 				endianness);
53 		if (rc < 0)
54 			abort();
55 
56 		arch_string = &arch_string_buffer[0];
57 	}
58 	return arch_string;
59 }
60 
61 struct regex_data {
62 	pcre2_code *regex; /* compiled regular expression */
63 	/*
64 	 * match data block required for the compiled
65 	 * pattern in pcre2
66 	 */
67 	pcre2_match_data *match_data;
68 	pthread_mutex_t match_mutex;
69 };
70 
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)71 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
72 		       struct regex_error_data *errordata)
73 {
74 	memset(errordata, 0, sizeof(struct regex_error_data));
75 
76 	*regex = regex_data_create();
77 	if (!(*regex))
78 		return -1;
79 
80 	(*regex)->regex = pcre2_compile(
81 	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
82 	    &errordata->error_code, &errordata->error_offset, NULL);
83 	if (!(*regex)->regex) {
84 		goto err;
85 	}
86 
87 	(*regex)->match_data =
88 	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
89 	if (!(*regex)->match_data) {
90 		goto err;
91 	}
92 	return 0;
93 
94 err:
95 	regex_data_free(*regex);
96 	*regex = NULL;
97 	return -1;
98 }
99 
regex_version(void)100 char const *regex_version(void)
101 {
102 	static char version_buf[256];
103 	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
104 	if (len <= 0 || len > sizeof(version_buf))
105 		return NULL;
106 
107 	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
108 	return version_buf;
109 }
110 
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int do_load_precompregex,bool * regex_compiled)111 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
112 		    int do_load_precompregex, bool *regex_compiled)
113 {
114 	int rc;
115 	uint32_t entry_len;
116 
117 	*regex_compiled = false;
118 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
119 	if (rc < 0)
120 		return -1;
121 
122 	if (entry_len && do_load_precompregex) {
123 		/*
124 		 * this should yield exactly one because we store one pattern at
125 		 * a time
126 		 */
127 		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
128 		if (rc != 1)
129 			return -1;
130 
131 		*regex = regex_data_create();
132 		if (!*regex)
133 			return -1;
134 
135 		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
136 					    (PCRE2_SPTR)mmap_area->next_addr,
137 					    NULL);
138 		if (rc != 1)
139 			goto err;
140 
141 		(*regex)->match_data =
142 		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
143 		if (!(*regex)->match_data)
144 			goto err;
145 
146 		*regex_compiled = true;
147 	}
148 
149 	/* and skip the decoded bit */
150 	rc = next_entry(NULL, mmap_area, entry_len);
151 	if (rc < 0)
152 		goto err;
153 
154 	return 0;
155 err:
156 	regex_data_free(*regex);
157 	*regex = NULL;
158 	return -1;
159 }
160 
regex_writef(struct regex_data * regex,FILE * fp,int do_write_precompregex)161 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
162 {
163 	int rc = 0;
164 	size_t len;
165 	PCRE2_SIZE serialized_size;
166 	uint32_t to_write = 0;
167 	PCRE2_UCHAR *bytes = NULL;
168 
169 	if (do_write_precompregex) {
170 		/* encode the patter for serialization */
171 		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
172 					    1, &bytes, &serialized_size, NULL);
173 		if (rc != 1) {
174 			rc = -1;
175 			goto out;
176 		}
177 		to_write = serialized_size;
178 	}
179 
180 	/* write serialized pattern's size */
181 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
182 	if (len != 1) {
183 		rc = -1;
184 		goto out;
185 	}
186 
187 	if (do_write_precompregex) {
188 		/* write serialized pattern */
189 		len = fwrite(bytes, 1, to_write, fp);
190 		if (len != to_write)
191 			rc = -1;
192 	}
193 
194 out:
195 	if (bytes)
196 		pcre2_serialize_free(bytes);
197 
198 	return rc;
199 }
200 
regex_data_free(struct regex_data * regex)201 void regex_data_free(struct regex_data *regex)
202 {
203 	if (regex) {
204 		if (regex->regex)
205 			pcre2_code_free(regex->regex);
206 		if (regex->match_data)
207 			pcre2_match_data_free(regex->match_data);
208 		__pthread_mutex_destroy(&regex->match_mutex);
209 		free(regex);
210 	}
211 }
212 
regex_match(struct regex_data * regex,char const * subject,int partial)213 int regex_match(struct regex_data *regex, char const *subject, int partial)
214 {
215 	int rc;
216 	__pthread_mutex_lock(&regex->match_mutex);
217 	rc = pcre2_match(
218 	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
219 	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
220 	__pthread_mutex_unlock(&regex->match_mutex);
221 	if (rc > 0)
222 		return REGEX_MATCH;
223 	switch (rc) {
224 	case PCRE2_ERROR_PARTIAL:
225 		return REGEX_MATCH_PARTIAL;
226 	case PCRE2_ERROR_NOMATCH:
227 		return REGEX_NO_MATCH;
228 	default:
229 		return REGEX_ERROR;
230 	}
231 }
232 
233 /*
234  * TODO Replace this compare function with something that actually compares the
235  * regular expressions.
236  * This compare function basically just compares the binary representations of
237  * the automatons, and because this representation contains pointers and
238  * metadata, it can only return a match if regex1 == regex2.
239  * Preferably, this function would be replaced with an algorithm that computes
240  * the equivalence of the automatons systematically.
241  */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)242 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
243 {
244 	int rc;
245 	size_t len1, len2;
246 	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
247 	assert(rc == 0);
248 	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
249 	assert(rc == 0);
250 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
251 		return SELABEL_INCOMPARABLE;
252 
253 	return SELABEL_EQUAL;
254 }
255 
regex_data_create(void)256 struct regex_data *regex_data_create(void)
257 {
258 	struct regex_data *regex_data =
259 		(struct regex_data *)calloc(1, sizeof(struct regex_data));
260 	__pthread_mutex_init(&regex_data->match_mutex, NULL);
261 	return regex_data;
262 }
263 
264 #else // !USE_PCRE2
regex_arch_string(void)265 char const *regex_arch_string(void)
266 {
267 	return "N/A";
268 }
269 
270 /* Prior to version 8.20, libpcre did not have pcre_free_study() */
271 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
272 #define pcre_free_study pcre_free
273 #endif
274 
275 struct regex_data {
276 	int owned;   /*
277 		      * non zero if regex and pcre_extra is owned by this
278 		      * structure and thus must be freed on destruction.
279 		      */
280 	pcre *regex; /* compiled regular expression */
281 	union {
282 		pcre_extra *sd; /* pointer to extra compiled stuff */
283 		pcre_extra lsd; /* used to hold the mmap'd version */
284 	};
285 };
286 
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)287 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
288 		       struct regex_error_data *errordata)
289 {
290 	memset(errordata, 0, sizeof(struct regex_error_data));
291 
292 	*regex = regex_data_create();
293 	if (!(*regex))
294 		return -1;
295 
296 	(*regex)->regex =
297 	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
298 			 &errordata->error_offset, NULL);
299 	if (!(*regex)->regex)
300 		goto err;
301 
302 	(*regex)->owned = 1;
303 
304 	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
305 	if (!(*regex)->sd && errordata->error_buffer)
306 		goto err;
307 
308 	return 0;
309 
310 err:
311 	regex_data_free(*regex);
312 	*regex = NULL;
313 	return -1;
314 }
315 
regex_version(void)316 char const *regex_version(void)
317 {
318 	return pcre_version();
319 }
320 
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int unused,bool * regex_compiled)321 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
322 		    int unused __attribute__((unused)), bool *regex_compiled)
323 {
324 	int rc;
325 	uint32_t entry_len;
326 	size_t info_len;
327 
328 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
329 	if (rc < 0 || !entry_len)
330 		return -1;
331 
332 	*regex = regex_data_create();
333 	if (!(*regex))
334 		return -1;
335 
336 	(*regex)->owned = 0;
337 	(*regex)->regex = (pcre *)mmap_area->next_addr;
338 	rc = next_entry(NULL, mmap_area, entry_len);
339 	if (rc < 0)
340 		goto err;
341 
342 	/*
343 	 * Check that regex lengths match. pcre_fullinfo()
344 	 * also validates its magic number.
345 	 */
346 	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
347 	if (rc < 0 || info_len != entry_len)
348 		goto err;
349 
350 	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
351 	if (rc < 0)
352 		goto err;
353 
354 	if (entry_len) {
355 		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
356 		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
357 		rc = next_entry(NULL, mmap_area, entry_len);
358 		if (rc < 0)
359 			goto err;
360 
361 		/* Check that study data lengths match. */
362 		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
363 				   PCRE_INFO_STUDYSIZE, &info_len);
364 		if (rc < 0 || info_len != entry_len)
365 			goto err;
366 	}
367 
368 	*regex_compiled = true;
369 	return 0;
370 
371 err:
372 	regex_data_free(*regex);
373 	*regex = NULL;
374 	return -1;
375 }
376 
get_pcre_extra(struct regex_data * regex)377 static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
378 {
379 	if (!regex) return NULL;
380 	if (regex->owned) {
381 		return regex->sd;
382 	} else if (regex->lsd.study_data) {
383 		return &regex->lsd;
384 	} else {
385 		return NULL;
386 	}
387 }
388 
regex_writef(struct regex_data * regex,FILE * fp,int unused)389 int regex_writef(struct regex_data *regex, FILE *fp,
390 		 int unused __attribute__((unused)))
391 {
392 	int rc;
393 	size_t len;
394 	uint32_t to_write;
395 	size_t size;
396 	pcre_extra *sd = get_pcre_extra(regex);
397 
398 	/* determine the size of the pcre data in bytes */
399 	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
400 	if (rc < 0)
401 		return -1;
402 
403 	/* write the number of bytes in the pcre data */
404 	to_write = size;
405 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
406 	if (len != 1)
407 		return -1;
408 
409 	/* write the actual pcre data as a char array */
410 	len = fwrite(regex->regex, 1, to_write, fp);
411 	if (len != to_write)
412 		return -1;
413 
414 	if (sd) {
415 		/* determine the size of the pcre study info */
416 		rc =
417 		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
418 		if (rc < 0)
419 			return -1;
420 	} else
421 		size = 0;
422 
423 	/* write the number of bytes in the pcre study data */
424 	to_write = size;
425 	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
426 	if (len != 1)
427 		return -1;
428 
429 	if (sd) {
430 		/* write the actual pcre study data as a char array */
431 		len = fwrite(sd->study_data, 1, to_write, fp);
432 		if (len != to_write)
433 			return -1;
434 	}
435 
436 	return 0;
437 }
438 
regex_data_free(struct regex_data * regex)439 void regex_data_free(struct regex_data *regex)
440 {
441 	if (regex) {
442 		if (regex->owned) {
443 			if (regex->regex)
444 				pcre_free(regex->regex);
445 			if (regex->sd)
446 				pcre_free_study(regex->sd);
447 		}
448 		free(regex);
449 	}
450 }
451 
regex_match(struct regex_data * regex,char const * subject,int partial)452 int regex_match(struct regex_data *regex, char const *subject, int partial)
453 {
454 	int rc;
455 
456 	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
457 		       subject, strlen(subject), 0,
458 		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
459 	switch (rc) {
460 	case 0:
461 		return REGEX_MATCH;
462 	case PCRE_ERROR_PARTIAL:
463 		return REGEX_MATCH_PARTIAL;
464 	case PCRE_ERROR_NOMATCH:
465 		return REGEX_NO_MATCH;
466 	default:
467 		return REGEX_ERROR;
468 	}
469 }
470 
471 /*
472  * TODO Replace this compare function with something that actually compares the
473  * regular expressions.
474  * This compare function basically just compares the binary representations of
475  * the automatons, and because this representation contains pointers and
476  * metadata, it can only return a match if regex1 == regex2.
477  * Preferably, this function would be replaced with an algorithm that computes
478  * the equivalence of the automatons systematically.
479  */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)480 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
481 {
482 	int rc;
483 	size_t len1, len2;
484 	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
485 	assert(rc == 0);
486 	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
487 	assert(rc == 0);
488 	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
489 		return SELABEL_INCOMPARABLE;
490 
491 	return SELABEL_EQUAL;
492 }
493 
regex_data_create(void)494 struct regex_data *regex_data_create(void)
495 {
496 	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
497 }
498 
499 #endif
500 
regex_format_error(struct regex_error_data const * error_data,char * buffer,size_t buf_size)501 void regex_format_error(struct regex_error_data const *error_data, char *buffer,
502 			size_t buf_size)
503 {
504 	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
505 	char *ptr = &buffer[buf_size - the_end_length];
506 	int rc = 0;
507 	size_t pos = 0;
508 	if (!buffer || !buf_size)
509 		return;
510 	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
511 	if (rc < 0)
512 		/*
513 		 * If snprintf fails it constitutes a logical error that needs
514 		 * fixing.
515 		 */
516 		abort();
517 
518 	pos += rc;
519 	if (pos >= buf_size)
520 		goto truncated;
521 
522 	if (error_data->error_offset > 0) {
523 #ifdef USE_PCRE2
524 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
525 			      error_data->error_offset);
526 #else
527 		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
528 			      error_data->error_offset);
529 #endif
530 		if (rc < 0)
531 			abort();
532 	}
533 	pos += rc;
534 	if (pos >= buf_size)
535 		goto truncated;
536 
537 #ifdef USE_PCRE2
538 	rc = pcre2_get_error_message(error_data->error_code,
539 				     (PCRE2_UCHAR *)(buffer + pos),
540 				     buf_size - pos);
541 	if (rc == PCRE2_ERROR_NOMEMORY)
542 		goto truncated;
543 #else
544 	rc = snprintf(buffer + pos, buf_size - pos, "%s",
545 		      error_data->error_buffer);
546 	if (rc < 0)
547 		abort();
548 
549 	if ((size_t)rc < strlen(error_data->error_buffer))
550 		goto truncated;
551 #endif
552 
553 	return;
554 
555 truncated:
556 	/* replace end of string with "..." to indicate that it was truncated */
557 	switch (the_end_length) {
558 	/* no break statements, fall-through is intended */
559 	case 4:
560 		*ptr++ = '.';
561 		/* FALLTHRU */
562 	case 3:
563 		*ptr++ = '.';
564 		/* FALLTHRU */
565 	case 2:
566 		*ptr++ = '.';
567 		/* FALLTHRU */
568 	case 1:
569 		*ptr++ = '\0';
570 		/* FALLTHRU */
571 	default:
572 		break;
573 	}
574 	return;
575 }
576