1 #include <assert.h>
2 #include <pthread.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <string.h>
6
7 #include "regex.h"
8 #include "label_file.h"
9 #include "selinux_internal.h"
10
11 #ifdef USE_PCRE2
12 #define REGEX_ARCH_SIZE_T PCRE2_SIZE
13 #else
14 #define REGEX_ARCH_SIZE_T size_t
15 #endif
16
17 #ifndef __BYTE_ORDER__
18
19 /* If the compiler doesn't define __BYTE_ORDER__, try to use the C
20 * library <endian.h> header definitions. */
21 #include <endian.h>
22 #ifndef __BYTE_ORDER
23 #error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
24 #endif
25
26 #define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
27 #define __ORDER_BIG_ENDIAN __BIG_ENDIAN
28 #define __BYTE_ORDER__ __BYTE_ORDER
29
30 #endif
31
32 #ifdef USE_PCRE2
regex_arch_string(void)33 char const *regex_arch_string(void)
34 {
35 static char arch_string_buffer[32];
36 static char const *arch_string = "";
37 char const *endianness = NULL;
38 int rc;
39
40 if (arch_string[0] == '\0') {
41 if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
42 endianness = "el";
43 else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
44 endianness = "eb";
45
46 if (!endianness)
47 return NULL;
48
49 rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
50 "%zu-%zu-%s", sizeof(void *),
51 sizeof(REGEX_ARCH_SIZE_T),
52 endianness);
53 if (rc < 0)
54 abort();
55
56 arch_string = &arch_string_buffer[0];
57 }
58 return arch_string;
59 }
60
61 struct regex_data {
62 pcre2_code *regex; /* compiled regular expression */
63 /*
64 * match data block required for the compiled
65 * pattern in pcre2
66 */
67 pcre2_match_data *match_data;
68 pthread_mutex_t match_mutex;
69 };
70
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)71 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
72 struct regex_error_data *errordata)
73 {
74 memset(errordata, 0, sizeof(struct regex_error_data));
75
76 *regex = regex_data_create();
77 if (!(*regex))
78 return -1;
79
80 (*regex)->regex = pcre2_compile(
81 (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
82 &errordata->error_code, &errordata->error_offset, NULL);
83 if (!(*regex)->regex) {
84 goto err;
85 }
86
87 (*regex)->match_data =
88 pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
89 if (!(*regex)->match_data) {
90 goto err;
91 }
92 return 0;
93
94 err:
95 regex_data_free(*regex);
96 *regex = NULL;
97 return -1;
98 }
99
regex_version(void)100 char const *regex_version(void)
101 {
102 static char version_buf[256];
103 size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
104 if (len <= 0 || len > sizeof(version_buf))
105 return NULL;
106
107 pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
108 return version_buf;
109 }
110
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int do_load_precompregex,bool * regex_compiled)111 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
112 int do_load_precompregex, bool *regex_compiled)
113 {
114 int rc;
115 uint32_t entry_len;
116
117 *regex_compiled = false;
118 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
119 if (rc < 0)
120 return -1;
121
122 if (entry_len && do_load_precompregex) {
123 /*
124 * this should yield exactly one because we store one pattern at
125 * a time
126 */
127 rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
128 if (rc != 1)
129 return -1;
130
131 *regex = regex_data_create();
132 if (!*regex)
133 return -1;
134
135 rc = pcre2_serialize_decode(&(*regex)->regex, 1,
136 (PCRE2_SPTR)mmap_area->next_addr,
137 NULL);
138 if (rc != 1)
139 goto err;
140
141 (*regex)->match_data =
142 pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
143 if (!(*regex)->match_data)
144 goto err;
145
146 *regex_compiled = true;
147 }
148
149 /* and skip the decoded bit */
150 rc = next_entry(NULL, mmap_area, entry_len);
151 if (rc < 0)
152 goto err;
153
154 return 0;
155 err:
156 regex_data_free(*regex);
157 *regex = NULL;
158 return -1;
159 }
160
regex_writef(struct regex_data * regex,FILE * fp,int do_write_precompregex)161 int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
162 {
163 int rc = 0;
164 size_t len;
165 PCRE2_SIZE serialized_size;
166 uint32_t to_write = 0;
167 PCRE2_UCHAR *bytes = NULL;
168
169 if (do_write_precompregex) {
170 /* encode the patter for serialization */
171 rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex,
172 1, &bytes, &serialized_size, NULL);
173 if (rc != 1) {
174 rc = -1;
175 goto out;
176 }
177 to_write = serialized_size;
178 }
179
180 /* write serialized pattern's size */
181 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
182 if (len != 1) {
183 rc = -1;
184 goto out;
185 }
186
187 if (do_write_precompregex) {
188 /* write serialized pattern */
189 len = fwrite(bytes, 1, to_write, fp);
190 if (len != to_write)
191 rc = -1;
192 }
193
194 out:
195 if (bytes)
196 pcre2_serialize_free(bytes);
197
198 return rc;
199 }
200
regex_data_free(struct regex_data * regex)201 void regex_data_free(struct regex_data *regex)
202 {
203 if (regex) {
204 if (regex->regex)
205 pcre2_code_free(regex->regex);
206 if (regex->match_data)
207 pcre2_match_data_free(regex->match_data);
208 __pthread_mutex_destroy(®ex->match_mutex);
209 free(regex);
210 }
211 }
212
regex_match(struct regex_data * regex,char const * subject,int partial)213 int regex_match(struct regex_data *regex, char const *subject, int partial)
214 {
215 int rc;
216 __pthread_mutex_lock(®ex->match_mutex);
217 rc = pcre2_match(
218 regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
219 partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
220 __pthread_mutex_unlock(®ex->match_mutex);
221 if (rc > 0)
222 return REGEX_MATCH;
223 switch (rc) {
224 case PCRE2_ERROR_PARTIAL:
225 return REGEX_MATCH_PARTIAL;
226 case PCRE2_ERROR_NOMATCH:
227 return REGEX_NO_MATCH;
228 default:
229 return REGEX_ERROR;
230 }
231 }
232
233 /*
234 * TODO Replace this compare function with something that actually compares the
235 * regular expressions.
236 * This compare function basically just compares the binary representations of
237 * the automatons, and because this representation contains pointers and
238 * metadata, it can only return a match if regex1 == regex2.
239 * Preferably, this function would be replaced with an algorithm that computes
240 * the equivalence of the automatons systematically.
241 */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)242 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
243 {
244 int rc;
245 size_t len1, len2;
246 rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
247 assert(rc == 0);
248 rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
249 assert(rc == 0);
250 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
251 return SELABEL_INCOMPARABLE;
252
253 return SELABEL_EQUAL;
254 }
255
regex_data_create(void)256 struct regex_data *regex_data_create(void)
257 {
258 struct regex_data *regex_data =
259 (struct regex_data *)calloc(1, sizeof(struct regex_data));
260 __pthread_mutex_init(®ex_data->match_mutex, NULL);
261 return regex_data;
262 }
263
264 #else // !USE_PCRE2
regex_arch_string(void)265 char const *regex_arch_string(void)
266 {
267 return "N/A";
268 }
269
270 /* Prior to version 8.20, libpcre did not have pcre_free_study() */
271 #if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
272 #define pcre_free_study pcre_free
273 #endif
274
275 struct regex_data {
276 int owned; /*
277 * non zero if regex and pcre_extra is owned by this
278 * structure and thus must be freed on destruction.
279 */
280 pcre *regex; /* compiled regular expression */
281 union {
282 pcre_extra *sd; /* pointer to extra compiled stuff */
283 pcre_extra lsd; /* used to hold the mmap'd version */
284 };
285 };
286
regex_prepare_data(struct regex_data ** regex,char const * pattern_string,struct regex_error_data * errordata)287 int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
288 struct regex_error_data *errordata)
289 {
290 memset(errordata, 0, sizeof(struct regex_error_data));
291
292 *regex = regex_data_create();
293 if (!(*regex))
294 return -1;
295
296 (*regex)->regex =
297 pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
298 &errordata->error_offset, NULL);
299 if (!(*regex)->regex)
300 goto err;
301
302 (*regex)->owned = 1;
303
304 (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
305 if (!(*regex)->sd && errordata->error_buffer)
306 goto err;
307
308 return 0;
309
310 err:
311 regex_data_free(*regex);
312 *regex = NULL;
313 return -1;
314 }
315
regex_version(void)316 char const *regex_version(void)
317 {
318 return pcre_version();
319 }
320
regex_load_mmap(struct mmap_area * mmap_area,struct regex_data ** regex,int unused,bool * regex_compiled)321 int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
322 int unused __attribute__((unused)), bool *regex_compiled)
323 {
324 int rc;
325 uint32_t entry_len;
326 size_t info_len;
327
328 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
329 if (rc < 0 || !entry_len)
330 return -1;
331
332 *regex = regex_data_create();
333 if (!(*regex))
334 return -1;
335
336 (*regex)->owned = 0;
337 (*regex)->regex = (pcre *)mmap_area->next_addr;
338 rc = next_entry(NULL, mmap_area, entry_len);
339 if (rc < 0)
340 goto err;
341
342 /*
343 * Check that regex lengths match. pcre_fullinfo()
344 * also validates its magic number.
345 */
346 rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
347 if (rc < 0 || info_len != entry_len)
348 goto err;
349
350 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
351 if (rc < 0)
352 goto err;
353
354 if (entry_len) {
355 (*regex)->lsd.study_data = (void *)mmap_area->next_addr;
356 (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
357 rc = next_entry(NULL, mmap_area, entry_len);
358 if (rc < 0)
359 goto err;
360
361 /* Check that study data lengths match. */
362 rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
363 PCRE_INFO_STUDYSIZE, &info_len);
364 if (rc < 0 || info_len != entry_len)
365 goto err;
366 }
367
368 *regex_compiled = true;
369 return 0;
370
371 err:
372 regex_data_free(*regex);
373 *regex = NULL;
374 return -1;
375 }
376
get_pcre_extra(struct regex_data * regex)377 static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
378 {
379 if (!regex) return NULL;
380 if (regex->owned) {
381 return regex->sd;
382 } else if (regex->lsd.study_data) {
383 return ®ex->lsd;
384 } else {
385 return NULL;
386 }
387 }
388
regex_writef(struct regex_data * regex,FILE * fp,int unused)389 int regex_writef(struct regex_data *regex, FILE *fp,
390 int unused __attribute__((unused)))
391 {
392 int rc;
393 size_t len;
394 uint32_t to_write;
395 size_t size;
396 pcre_extra *sd = get_pcre_extra(regex);
397
398 /* determine the size of the pcre data in bytes */
399 rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
400 if (rc < 0)
401 return -1;
402
403 /* write the number of bytes in the pcre data */
404 to_write = size;
405 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
406 if (len != 1)
407 return -1;
408
409 /* write the actual pcre data as a char array */
410 len = fwrite(regex->regex, 1, to_write, fp);
411 if (len != to_write)
412 return -1;
413
414 if (sd) {
415 /* determine the size of the pcre study info */
416 rc =
417 pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
418 if (rc < 0)
419 return -1;
420 } else
421 size = 0;
422
423 /* write the number of bytes in the pcre study data */
424 to_write = size;
425 len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
426 if (len != 1)
427 return -1;
428
429 if (sd) {
430 /* write the actual pcre study data as a char array */
431 len = fwrite(sd->study_data, 1, to_write, fp);
432 if (len != to_write)
433 return -1;
434 }
435
436 return 0;
437 }
438
regex_data_free(struct regex_data * regex)439 void regex_data_free(struct regex_data *regex)
440 {
441 if (regex) {
442 if (regex->owned) {
443 if (regex->regex)
444 pcre_free(regex->regex);
445 if (regex->sd)
446 pcre_free_study(regex->sd);
447 }
448 free(regex);
449 }
450 }
451
regex_match(struct regex_data * regex,char const * subject,int partial)452 int regex_match(struct regex_data *regex, char const *subject, int partial)
453 {
454 int rc;
455
456 rc = pcre_exec(regex->regex, get_pcre_extra(regex),
457 subject, strlen(subject), 0,
458 partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
459 switch (rc) {
460 case 0:
461 return REGEX_MATCH;
462 case PCRE_ERROR_PARTIAL:
463 return REGEX_MATCH_PARTIAL;
464 case PCRE_ERROR_NOMATCH:
465 return REGEX_NO_MATCH;
466 default:
467 return REGEX_ERROR;
468 }
469 }
470
471 /*
472 * TODO Replace this compare function with something that actually compares the
473 * regular expressions.
474 * This compare function basically just compares the binary representations of
475 * the automatons, and because this representation contains pointers and
476 * metadata, it can only return a match if regex1 == regex2.
477 * Preferably, this function would be replaced with an algorithm that computes
478 * the equivalence of the automatons systematically.
479 */
regex_cmp(struct regex_data * regex1,struct regex_data * regex2)480 int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
481 {
482 int rc;
483 size_t len1, len2;
484 rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
485 assert(rc == 0);
486 rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
487 assert(rc == 0);
488 if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
489 return SELABEL_INCOMPARABLE;
490
491 return SELABEL_EQUAL;
492 }
493
regex_data_create(void)494 struct regex_data *regex_data_create(void)
495 {
496 return (struct regex_data *)calloc(1, sizeof(struct regex_data));
497 }
498
499 #endif
500
regex_format_error(struct regex_error_data const * error_data,char * buffer,size_t buf_size)501 void regex_format_error(struct regex_error_data const *error_data, char *buffer,
502 size_t buf_size)
503 {
504 unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
505 char *ptr = &buffer[buf_size - the_end_length];
506 int rc = 0;
507 size_t pos = 0;
508 if (!buffer || !buf_size)
509 return;
510 rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
511 if (rc < 0)
512 /*
513 * If snprintf fails it constitutes a logical error that needs
514 * fixing.
515 */
516 abort();
517
518 pos += rc;
519 if (pos >= buf_size)
520 goto truncated;
521
522 /* Return early if there is no error to format */
523 #ifdef USE_PCRE2
524 if (!error_data->error_code) {
525 rc = snprintf(buffer + pos, buf_size - pos, "no error code");
526 if (rc < 0)
527 abort();
528 pos += rc;
529 if (pos >= buf_size)
530 goto truncated;
531 return;
532 }
533 #else
534 if (!error_data->error_buffer) {
535 rc = snprintf(buffer + pos, buf_size - pos, "empty error");
536 if (rc < 0)
537 abort();
538 pos += rc;
539 if (pos >= buf_size)
540 goto truncated;
541 return;
542 }
543 #endif
544
545 if (error_data->error_offset > 0) {
546 #ifdef USE_PCRE2
547 rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
548 error_data->error_offset);
549 #else
550 rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
551 error_data->error_offset);
552 #endif
553 if (rc < 0)
554 abort();
555 pos += rc;
556 if (pos >= buf_size)
557 goto truncated;
558 }
559
560 #ifdef USE_PCRE2
561 rc = pcre2_get_error_message(error_data->error_code,
562 (PCRE2_UCHAR *)(buffer + pos),
563 buf_size - pos);
564 if (rc == PCRE2_ERROR_NOMEMORY)
565 goto truncated;
566 #else
567 rc = snprintf(buffer + pos, buf_size - pos, "%s",
568 error_data->error_buffer);
569 if (rc < 0)
570 abort();
571
572 if ((size_t)rc < strlen(error_data->error_buffer))
573 goto truncated;
574 #endif
575
576 return;
577
578 truncated:
579 /* replace end of string with "..." to indicate that it was truncated */
580 switch (the_end_length) {
581 /* no break statements, fall-through is intended */
582 case 4:
583 *ptr++ = '.';
584 /* FALLTHRU */
585 case 3:
586 *ptr++ = '.';
587 /* FALLTHRU */
588 case 2:
589 *ptr++ = '.';
590 /* FALLTHRU */
591 case 1:
592 *ptr++ = '\0';
593 /* FALLTHRU */
594 default:
595 break;
596 }
597 return;
598 }
599