1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 // * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39
40 #include <stdio.h>
41 #include <string.h> /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
45
46 using pcrecpp::StringPiece;
47 using pcrecpp::RE;
48 using pcrecpp::RE_Options;
49 using pcrecpp::Hex;
50 using pcrecpp::Octal;
51 using pcrecpp::CRadix;
52
53 static bool VERBOSE_TEST = false;
54
55 // CHECK dies with a fatal error if condition is not true. It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode. Therefore, it is safe to do things like:
58 // CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do { \
60 if (!(condition)) { \
61 fprintf(stderr, "%s:%d: Check failed: %s\n", \
62 __FILE__, __LINE__, #condition); \
63 exit(1); \
64 } \
65 } while (0)
66
67 #define CHECK_EQ(a, b) CHECK(a == b)
68
Timing1(int num_iters)69 static void Timing1(int num_iters) {
70 // Same pattern lots of times
71 RE pattern("ruby:\\d+");
72 StringPiece p("ruby:1234");
73 for (int j = num_iters; j > 0; j--) {
74 CHECK(pattern.FullMatch(p));
75 }
76 }
77
Timing2(int num_iters)78 static void Timing2(int num_iters) {
79 // Same pattern lots of times
80 RE pattern("ruby:(\\d+)");
81 int i;
82 for (int j = num_iters; j > 0; j--) {
83 CHECK(pattern.FullMatch("ruby:1234", &i));
84 CHECK_EQ(i, 1234);
85 }
86 }
87
Timing3(int num_iters)88 static void Timing3(int num_iters) {
89 string text_string;
90 for (int j = num_iters; j > 0; j--) {
91 text_string += "this is another line\n";
92 }
93
94 RE line_matcher(".*\n");
95 string line;
96 StringPiece text(text_string);
97 int counter = 0;
98 while (line_matcher.Consume(&text)) {
99 counter++;
100 }
101 printf("Matched %d lines\n", counter);
102 }
103
104 #if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105
106 static void LeakTest() {
107 // Check for memory leaks
108 unsigned long long initial_size = 0;
109 for (int i = 0; i < 100000; i++) {
110 if (i == 50000) {
111 initial_size = VirtualProcessSize();
112 printf("Size after 50000: %llu\n", initial_size);
113 }
114 char buf[100]; // definitely big enough
115 sprintf(buf, "pat%09d", i);
116 RE newre(buf);
117 }
118 uint64 final_size = VirtualProcessSize();
119 printf("Size after 100000: %llu\n", final_size);
120 const double growth = double(final_size - initial_size) / final_size;
121 printf("Growth: %0.2f%%", growth * 100);
122 CHECK(growth < 0.02); // Allow < 2% growth
123 }
124
125 #endif
126
RadixTests()127 static void RadixTests() {
128 printf("Testing hex\n");
129
130 #define CHECK_HEX(type, value) \
131 do { \
132 type v; \
133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134 CHECK_EQ(v, 0x ## value); \
135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136 CHECK_EQ(v, 0x ## value); \
137 } while(0)
138
139 CHECK_HEX(short, 2bad);
140 CHECK_HEX(unsigned short, 2badU);
141 CHECK_HEX(int, dead);
142 CHECK_HEX(unsigned int, deadU);
143 CHECK_HEX(long, 7eadbeefL);
144 CHECK_HEX(unsigned long, deadbeefUL);
145 #ifdef HAVE_LONG_LONG
146 CHECK_HEX(long long, 12345678deadbeefLL);
147 #endif
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150 #endif
151
152 #undef CHECK_HEX
153
154 printf("Testing octal\n");
155
156 #define CHECK_OCTAL(type, value) \
157 do { \
158 type v; \
159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160 CHECK_EQ(v, 0 ## value); \
161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162 CHECK_EQ(v, 0 ## value); \
163 } while(0)
164
165 CHECK_OCTAL(short, 77777);
166 CHECK_OCTAL(unsigned short, 177777U);
167 CHECK_OCTAL(int, 17777777777);
168 CHECK_OCTAL(unsigned int, 37777777777U);
169 CHECK_OCTAL(long, 17777777777L);
170 CHECK_OCTAL(unsigned long, 37777777777UL);
171 #ifdef HAVE_LONG_LONG
172 CHECK_OCTAL(long long, 777777777777777777777LL);
173 #endif
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176 #endif
177
178 #undef CHECK_OCTAL
179
180 printf("Testing decimal\n");
181
182 #define CHECK_DECIMAL(type, value) \
183 do { \
184 type v; \
185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186 CHECK_EQ(v, value); \
187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188 CHECK_EQ(v, value); \
189 } while(0)
190
191 CHECK_DECIMAL(short, -1);
192 CHECK_DECIMAL(unsigned short, 9999);
193 CHECK_DECIMAL(int, -1000);
194 CHECK_DECIMAL(unsigned int, 12345U);
195 CHECK_DECIMAL(long, -10000000L);
196 CHECK_DECIMAL(unsigned long, 3083324652U);
197 #ifdef HAVE_LONG_LONG
198 CHECK_DECIMAL(long long, -100000000000000LL);
199 #endif
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202 #endif
203
204 #undef CHECK_DECIMAL
205
206 }
207
TestReplace()208 static void TestReplace() {
209 printf("Testing Replace\n");
210
211 struct ReplaceTest {
212 const char *regexp;
213 const char *rewrite;
214 const char *original;
215 const char *single;
216 const char *global;
217 int global_count; // the expected return value from ReplaceAll
218 };
219 static const ReplaceTest tests[] = {
220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "\\2\\1ay",
222 "the quick brown fox jumps over the lazy dogs.",
223 "ethay quick brown fox jumps over the lazy dogs.",
224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225 9 },
226 { "\\w+",
227 "\\0-NOSPAM",
228 "paul.haahr@google.com",
229 "paul-NOSPAM.haahr@google.com",
230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231 4 },
232 { "^",
233 "(START)",
234 "foo",
235 "(START)foo",
236 "(START)foo",
237 1 },
238 { "^",
239 "(START)",
240 "",
241 "(START)",
242 "(START)",
243 1 },
244 { "$",
245 "(END)",
246 "",
247 "(END)",
248 "(END)",
249 1 },
250 { "b",
251 "bb",
252 "ababababab",
253 "abbabababab",
254 "abbabbabbabbabb",
255 5 },
256 { "b",
257 "bb",
258 "bbbbbb",
259 "bbbbbbb",
260 "bbbbbbbbbbbb",
261 6 },
262 { "b+",
263 "bb",
264 "bbbbbb",
265 "bb",
266 "bb",
267 1 },
268 { "b*",
269 "bb",
270 "bbbbbb",
271 "bb",
272 "bbbb",
273 2 },
274 { "b*",
275 "bb",
276 "aaaaa",
277 "bbaaaaa",
278 "bbabbabbabbabbabb",
279 6 },
280 { "b*",
281 "bb",
282 "aa\naa\n",
283 "bbaa\naa\n",
284 "bbabbabb\nbbabbabb\nbb",
285 7 },
286 { "b*",
287 "bb",
288 "aa\raa\r",
289 "bbaa\raa\r",
290 "bbabbabb\rbbabbabb\rbb",
291 7 },
292 { "b*",
293 "bb",
294 "aa\r\naa\r\n",
295 "bbaa\r\naa\r\n",
296 "bbabbabb\r\nbbabbabb\r\nbb",
297 7 },
298 // Check empty-string matching (it's tricky!)
299 { "aa|b*",
300 "@",
301 "aa",
302 "@",
303 "@@",
304 2 },
305 { "b*|aa",
306 "@",
307 "aa",
308 "@aa",
309 "@@@",
310 3 },
311 #ifdef SUPPORT_UTF8
312 { "b*",
313 "bb",
314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317 5 },
318 { "b*",
319 "bb",
320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324 9 },
325 #endif
326 { "", NULL, NULL, NULL, NULL, 0 }
327 };
328
329 #ifdef SUPPORT_UTF8
330 const bool support_utf8 = true;
331 #else
332 const bool support_utf8 = false;
333 #endif
334
335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF)
337 .set_utf(support_utf8));
338 assert(re.error().empty());
339 string one(t->original);
340 CHECK(re.Replace(t->rewrite, &one));
341 CHECK_EQ(one, t->single);
342 string all(t->original);
343 const int replace_count = re.GlobalReplace(t->rewrite, &all);
344 CHECK_EQ(all, t->global);
345 CHECK_EQ(replace_count, t->global_count);
346 }
347
348 // One final test: test \r\n replacement when we're not in CRLF mode
349 {
350 RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR)
351 .set_utf(support_utf8));
352 assert(re.error().empty());
353 string all("aa\r\naa\r\n");
354 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
355 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
356 }
357 {
358 RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF)
359 .set_utf(support_utf8));
360 assert(re.error().empty());
361 string all("aa\r\naa\r\n");
362 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
363 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
364 }
365 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
366 // Alas, the answer depends on how pcre was compiled.
367 }
368
TestExtract()369 static void TestExtract() {
370 printf("Testing Extract\n");
371
372 string s;
373
374 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
375 CHECK_EQ(s, "kremvax!boris");
376
377 // check the RE interface as well
378 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
379 CHECK_EQ(s, "'foo'");
380 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
381 CHECK_EQ(s, "'foo'");
382 }
383
TestConsume()384 static void TestConsume() {
385 printf("Testing Consume\n");
386
387 string word;
388
389 string s(" aaa b!@#$@#$cccc");
390 StringPiece input(s);
391
392 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
393 CHECK(r.Consume(&input, &word));
394 CHECK_EQ(word, "aaa");
395 CHECK(r.Consume(&input, &word));
396 CHECK_EQ(word, "b");
397 CHECK(! r.Consume(&input, &word));
398 }
399
TestFindAndConsume()400 static void TestFindAndConsume() {
401 printf("Testing FindAndConsume\n");
402
403 string word;
404
405 string s(" aaa b!@#$@#$cccc");
406 StringPiece input(s);
407
408 RE r("(\\w+)"); // matches a word
409 CHECK(r.FindAndConsume(&input, &word));
410 CHECK_EQ(word, "aaa");
411 CHECK(r.FindAndConsume(&input, &word));
412 CHECK_EQ(word, "b");
413 CHECK(r.FindAndConsume(&input, &word));
414 CHECK_EQ(word, "cccc");
415 CHECK(! r.FindAndConsume(&input, &word));
416 }
417
TestMatchNumberPeculiarity()418 static void TestMatchNumberPeculiarity() {
419 printf("Testing match-number peculiarity\n");
420
421 string word1;
422 string word2;
423 string word3;
424
425 RE r("(foo)|(bar)|(baz)");
426 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
427 CHECK_EQ(word1, "foo");
428 CHECK_EQ(word2, "");
429 CHECK_EQ(word3, "");
430 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
431 CHECK_EQ(word1, "");
432 CHECK_EQ(word2, "bar");
433 CHECK_EQ(word3, "");
434 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
435 CHECK_EQ(word1, "");
436 CHECK_EQ(word2, "");
437 CHECK_EQ(word3, "baz");
438 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
439
440 string a;
441 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
442 CHECK_EQ(a, "");
443 }
444
TestRecursion()445 static void TestRecursion() {
446 printf("Testing recursion\n");
447
448 // Get one string that passes (sometimes), one that never does.
449 string text_good("abcdefghijk");
450 string text_bad("acdefghijkl");
451
452 // According to pcretest, matching text_good against (\w+)*b
453 // requires match_limit of at least 8192, and match_recursion_limit
454 // of at least 37.
455
456 RE_Options options_ml;
457 options_ml.set_match_limit(8192);
458 RE re("(\\w+)*b", options_ml);
459 CHECK(re.PartialMatch(text_good) == true);
460 CHECK(re.PartialMatch(text_bad) == false);
461 CHECK(re.FullMatch(text_good) == false);
462 CHECK(re.FullMatch(text_bad) == false);
463
464 options_ml.set_match_limit(1024);
465 RE re2("(\\w+)*b", options_ml);
466 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
467 CHECK(re2.PartialMatch(text_bad) == false);
468 CHECK(re2.FullMatch(text_good) == false);
469 CHECK(re2.FullMatch(text_bad) == false);
470
471 RE_Options options_mlr;
472 options_mlr.set_match_limit_recursion(50);
473 RE re3("(\\w+)*b", options_mlr);
474 CHECK(re3.PartialMatch(text_good) == true);
475 CHECK(re3.PartialMatch(text_bad) == false);
476 CHECK(re3.FullMatch(text_good) == false);
477 CHECK(re3.FullMatch(text_bad) == false);
478
479 options_mlr.set_match_limit_recursion(10);
480 RE re4("(\\w+)*b", options_mlr);
481 CHECK(re4.PartialMatch(text_good) == false);
482 CHECK(re4.PartialMatch(text_bad) == false);
483 CHECK(re4.FullMatch(text_good) == false);
484 CHECK(re4.FullMatch(text_bad) == false);
485 }
486
487 // A meta-quoted string, interpreted as a pattern, should always match
488 // the original unquoted string.
TestQuoteMeta(string unquoted,RE_Options options=RE_Options ())489 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
490 string quoted = RE::QuoteMeta(unquoted);
491 RE re(quoted, options);
492 CHECK(re.FullMatch(unquoted));
493 }
494
495 // A string containing meaningful regexp characters, which is then meta-
496 // quoted, should not generally match a string the unquoted string does.
NegativeTestQuoteMeta(string unquoted,string should_not_match,RE_Options options=RE_Options ())497 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
498 RE_Options options = RE_Options()) {
499 string quoted = RE::QuoteMeta(unquoted);
500 RE re(quoted, options);
501 CHECK(!re.FullMatch(should_not_match));
502 }
503
504 // Tests that quoted meta characters match their original strings,
505 // and that a few things that shouldn't match indeed do not.
TestQuotaMetaSimple()506 static void TestQuotaMetaSimple() {
507 TestQuoteMeta("foo");
508 TestQuoteMeta("foo.bar");
509 TestQuoteMeta("foo\\.bar");
510 TestQuoteMeta("[1-9]");
511 TestQuoteMeta("1.5-2.0?");
512 TestQuoteMeta("\\d");
513 TestQuoteMeta("Who doesn't like ice cream?");
514 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
515 TestQuoteMeta("((?!)xxx).*yyy");
516 TestQuoteMeta("([");
517 TestQuoteMeta(string("foo\0bar", 7));
518 }
519
TestQuoteMetaSimpleNegative()520 static void TestQuoteMetaSimpleNegative() {
521 NegativeTestQuoteMeta("foo", "bar");
522 NegativeTestQuoteMeta("...", "bar");
523 NegativeTestQuoteMeta("\\.", ".");
524 NegativeTestQuoteMeta("\\.", "..");
525 NegativeTestQuoteMeta("(a)", "a");
526 NegativeTestQuoteMeta("(a|b)", "a");
527 NegativeTestQuoteMeta("(a|b)", "(a)");
528 NegativeTestQuoteMeta("(a|b)", "a|b");
529 NegativeTestQuoteMeta("[0-9]", "0");
530 NegativeTestQuoteMeta("[0-9]", "0-9");
531 NegativeTestQuoteMeta("[0-9]", "[9]");
532 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
533 }
534
TestQuoteMetaLatin1()535 static void TestQuoteMetaLatin1() {
536 TestQuoteMeta("3\xb2 = 9");
537 }
538
TestQuoteMetaUtf8()539 static void TestQuoteMetaUtf8() {
540 #ifdef SUPPORT_UTF8
541 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
542 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
543 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
544 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
545 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
546 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
547 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
548 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
549 "27\\\xc2\\\xb0",
550 pcrecpp::UTF8());
551 #endif
552 }
553
TestQuoteMetaAll()554 static void TestQuoteMetaAll() {
555 printf("Testing QuoteMeta\n");
556 TestQuotaMetaSimple();
557 TestQuoteMetaSimpleNegative();
558 TestQuoteMetaLatin1();
559 TestQuoteMetaUtf8();
560 }
561
562 //
563 // Options tests contributed by
564 // Giuseppe Maxia, CTO, Stardata s.r.l.
565 // July 2005
566 //
GetOneOptionResult(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,string expected)567 static void GetOneOptionResult(
568 const char *option_name,
569 const char *regex,
570 const char *str,
571 RE_Options options,
572 bool full,
573 string expected) {
574
575 printf("Testing Option <%s>\n", option_name);
576 if(VERBOSE_TEST)
577 printf("/%s/ finds \"%s\" within \"%s\" \n",
578 regex,
579 expected.c_str(),
580 str);
581 string captured("");
582 if (full)
583 RE(regex,options).FullMatch(str, &captured);
584 else
585 RE(regex,options).PartialMatch(str, &captured);
586 CHECK_EQ(captured, expected);
587 }
588
TestOneOption(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,bool assertive=true)589 static void TestOneOption(
590 const char *option_name,
591 const char *regex,
592 const char *str,
593 RE_Options options,
594 bool full,
595 bool assertive = true) {
596
597 printf("Testing Option <%s>\n", option_name);
598 if (VERBOSE_TEST)
599 printf("'%s' %s /%s/ \n",
600 str,
601 (assertive? "matches" : "doesn't match"),
602 regex);
603 if (assertive) {
604 if (full)
605 CHECK(RE(regex,options).FullMatch(str));
606 else
607 CHECK(RE(regex,options).PartialMatch(str));
608 } else {
609 if (full)
610 CHECK(!RE(regex,options).FullMatch(str));
611 else
612 CHECK(!RE(regex,options).PartialMatch(str));
613 }
614 }
615
Test_CASELESS()616 static void Test_CASELESS() {
617 RE_Options options;
618 RE_Options options2;
619
620 options.set_caseless(true);
621 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
622 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
623 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
624
625 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
626 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
627 options.set_caseless(false);
628 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
629 }
630
Test_MULTILINE()631 static void Test_MULTILINE() {
632 RE_Options options;
633 RE_Options options2;
634 const char *str = "HELLO\n" "cruel\n" "world\n";
635
636 options.set_multiline(true);
637 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
638 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
639 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
640 options.set_multiline(false);
641 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
642 }
643
Test_DOTALL()644 static void Test_DOTALL() {
645 RE_Options options;
646 RE_Options options2;
647 const char *str = "HELLO\n" "cruel\n" "world";
648
649 options.set_dotall(true);
650 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
651 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
652 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
653 options.set_dotall(false);
654 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
655 }
656
Test_DOLLAR_ENDONLY()657 static void Test_DOLLAR_ENDONLY() {
658 RE_Options options;
659 RE_Options options2;
660 const char *str = "HELLO world\n";
661
662 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
663 options.set_dollar_endonly(true);
664 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
665 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
666 }
667
Test_EXTENDED()668 static void Test_EXTENDED() {
669 RE_Options options;
670 RE_Options options2;
671 const char *str = "HELLO world";
672
673 options.set_extended(true);
674 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
675 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
676 TestOneOption("EXTENDED (class)",
677 "^ HE L{2} O "
678 "\\s+ "
679 "\\w+ $ ",
680 str,
681 options,
682 false);
683
684 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
685 TestOneOption("EXTENDED (function)",
686 "^ HE L{2} O "
687 "\\s+ "
688 "\\w+ $ ",
689 str,
690 pcrecpp::EXTENDED(),
691 false);
692
693 options.set_extended(false);
694 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
695 }
696
Test_NO_AUTO_CAPTURE()697 static void Test_NO_AUTO_CAPTURE() {
698 RE_Options options;
699 const char *str = "HELLO world";
700 string captured;
701
702 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
703 if (VERBOSE_TEST)
704 printf("parentheses capture text\n");
705 RE re("(world|universe)$", options);
706 CHECK(re.Extract("\\1", str , &captured));
707 CHECK_EQ(captured, "world");
708 options.set_no_auto_capture(true);
709 printf("testing Option <NO_AUTO_CAPTURE>\n");
710 if (VERBOSE_TEST)
711 printf("parentheses do not capture text\n");
712 re.Extract("\\1",str, &captured );
713 CHECK_EQ(captured, "world");
714 }
715
Test_UNGREEDY()716 static void Test_UNGREEDY() {
717 RE_Options options;
718 const char *str = "HELLO, 'this' is the 'world'";
719
720 options.set_ungreedy(true);
721 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
722 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
723 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
724
725 options.set_ungreedy(false);
726 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
727 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
728 }
729
Test_all_options()730 static void Test_all_options() {
731 const char *str = "HELLO\n" "cruel\n" "world";
732 RE_Options options;
733 options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL);
734
735 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
736 options.set_all_options(0);
737 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
738 options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED);
739
740 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
741 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
742 " ^ c r u e l $ ",
743 str,
744 RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED),
745 false);
746
747 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
748 " ^ c r u e l $ ",
749 str,
750 RE_Options()
751 .set_multiline(true)
752 .set_extended(true),
753 false);
754
755 options.set_all_options(0);
756 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
757
758 }
759
TestOptions()760 static void TestOptions() {
761 printf("Testing Options\n");
762 Test_CASELESS();
763 Test_MULTILINE();
764 Test_DOTALL();
765 Test_DOLLAR_ENDONLY();
766 Test_EXTENDED();
767 Test_NO_AUTO_CAPTURE();
768 Test_UNGREEDY();
769 Test_all_options();
770 }
771
TestConstructors()772 static void TestConstructors() {
773 printf("Testing constructors\n");
774
775 RE_Options options;
776 options.set_dotall(true);
777 const char *str = "HELLO\n" "cruel\n" "world";
778
779 RE orig("HELLO.*world", options);
780 CHECK(orig.FullMatch(str));
781
782 RE copy1(orig);
783 CHECK(copy1.FullMatch(str));
784
785 RE copy2("not a match");
786 CHECK(!copy2.FullMatch(str));
787 copy2 = copy1;
788 CHECK(copy2.FullMatch(str));
789 copy2 = orig;
790 CHECK(copy2.FullMatch(str));
791
792 // Make sure when we assign to ourselves, nothing bad happens
793 orig = orig;
794 copy1 = copy1;
795 copy2 = copy2;
796 CHECK(orig.FullMatch(str));
797 CHECK(copy1.FullMatch(str));
798 CHECK(copy2.FullMatch(str));
799 }
800
main(int argc,char ** argv)801 int main(int argc, char** argv) {
802 // Treat any flag as --help
803 if (argc > 1 && argv[1][0] == '-') {
804 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
805 " If 'timingX ###' is specified, run the given timing test\n"
806 " with the given number of iterations, rather than running\n"
807 " the default corectness test.\n", argv[0]);
808 return 0;
809 }
810
811 if (argc > 1) {
812 if ( argc == 2 || atoi(argv[2]) == 0) {
813 printf("timing mode needs a num-iters argument\n");
814 return 1;
815 }
816 if (!strcmp(argv[1], "timing1"))
817 Timing1(atoi(argv[2]));
818 else if (!strcmp(argv[1], "timing2"))
819 Timing2(atoi(argv[2]));
820 else if (!strcmp(argv[1], "timing3"))
821 Timing3(atoi(argv[2]));
822 else
823 printf("Unknown argument '%s'\n", argv[1]);
824 return 0;
825 }
826
827 printf("PCRE C++ wrapper tests\n");
828 printf("Testing FullMatch\n");
829
830 int i;
831 string s;
832
833 /***** FullMatch with no args *****/
834
835 CHECK(RE("h.*o").FullMatch("hello"));
836 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
837 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
838 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
839 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
840 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
841
842 /***** FullMatch with args *****/
843
844 // Zero-arg
845 CHECK(RE("\\d+").FullMatch("1001"));
846
847 // Single-arg
848 CHECK(RE("(\\d+)").FullMatch("1001", &i));
849 CHECK_EQ(i, 1001);
850 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
851 CHECK_EQ(i, -123);
852 CHECK(!RE("()\\d+").FullMatch("10", &i));
853 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
854 &i));
855
856 // Digits surrounding integer-arg
857 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
858 CHECK_EQ(i, 23);
859 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
860 CHECK_EQ(i, 1);
861 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
862 CHECK_EQ(i, -1);
863 CHECK(RE("(\\d)").PartialMatch("1234", &i));
864 CHECK_EQ(i, 1);
865 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
866 CHECK_EQ(i, -1);
867
868 // String-arg
869 CHECK(RE("h(.*)o").FullMatch("hello", &s));
870 CHECK_EQ(s, string("ell"));
871
872 // StringPiece-arg
873 StringPiece sp;
874 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
875 CHECK_EQ(sp.size(), 4);
876 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
877 CHECK_EQ(i, 1234);
878
879 // Multi-arg
880 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
881 CHECK_EQ(s, string("ruby"));
882 CHECK_EQ(i, 1234);
883
884 // Ignore non-void* NULL arg
885 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
886 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
887 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
888 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
889 #ifdef HAVE_LONG_LONG
890 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
891 #endif
892 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
893 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
894
895 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
896 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
897 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
898 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
899 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
900 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
901
902 // Ignored arg
903 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
904 CHECK_EQ(s, string("ruby"));
905 CHECK_EQ(i, 1234);
906
907 // Type tests
908 {
909 char c;
910 CHECK(RE("(H)ello").FullMatch("Hello", &c));
911 CHECK_EQ(c, 'H');
912 }
913 {
914 unsigned char c;
915 CHECK(RE("(H)ello").FullMatch("Hello", &c));
916 CHECK_EQ(c, static_cast<unsigned char>('H'));
917 }
918 {
919 short v;
920 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
921 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
922 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
923 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
924 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
925 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
926 }
927 {
928 unsigned short v;
929 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
931 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
932 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
933 }
934 {
935 int v;
936 static const int max_value = 0x7fffffff;
937 static const int min_value = -max_value - 1;
938 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
939 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
940 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
941 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
942 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
943 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
944 }
945 {
946 unsigned int v;
947 static const unsigned int max_value = 0xfffffffful;
948 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
949 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
950 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
951 }
952 #ifdef HAVE_LONG_LONG
953 # if defined(__MINGW__) || defined(__MINGW32__)
954 # define LLD "%I64d"
955 # define LLU "%I64u"
956 # else
957 # define LLD "%lld"
958 # define LLU "%llu"
959 # endif
960 {
961 long long v;
962 static const long long max_value = 0x7fffffffffffffffLL;
963 static const long long min_value = -max_value - 1;
964 char buf[32]; // definitely big enough for a long long
965
966 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
967 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
968
969 sprintf(buf, LLD, max_value);
970 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
971
972 sprintf(buf, LLD, min_value);
973 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
974
975 sprintf(buf, LLD, max_value);
976 assert(buf[strlen(buf)-1] != '9');
977 buf[strlen(buf)-1]++;
978 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
979
980 sprintf(buf, LLD, min_value);
981 assert(buf[strlen(buf)-1] != '9');
982 buf[strlen(buf)-1]++;
983 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
984 }
985 #endif
986 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
987 {
988 unsigned long long v;
989 long long v2;
990 static const unsigned long long max_value = 0xffffffffffffffffULL;
991 char buf[32]; // definitely big enough for a unsigned long long
992
993 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
994 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
995
996 sprintf(buf, LLU, max_value);
997 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
998
999 assert(buf[strlen(buf)-1] != '9');
1000 buf[strlen(buf)-1]++;
1001 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1002 }
1003 #endif
1004 {
1005 float v;
1006 CHECK(RE("(.*)").FullMatch("100", &v));
1007 CHECK(RE("(.*)").FullMatch("-100.", &v));
1008 CHECK(RE("(.*)").FullMatch("1e23", &v));
1009 }
1010 {
1011 double v;
1012 CHECK(RE("(.*)").FullMatch("100", &v));
1013 CHECK(RE("(.*)").FullMatch("-100.", &v));
1014 CHECK(RE("(.*)").FullMatch("1e23", &v));
1015 }
1016
1017 // Check that matching is fully anchored
1018 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1019 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1020 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1021 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1022
1023 // Braces
1024 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1025 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1026 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1027
1028 // Complicated RE
1029 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1030 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1031 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1032 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1033
1034 // Check full-match handling (needs '$' tacked on internally)
1035 CHECK(RE("fo|foo").FullMatch("fo"));
1036 CHECK(RE("fo|foo").FullMatch("foo"));
1037 CHECK(RE("fo|foo$").FullMatch("fo"));
1038 CHECK(RE("fo|foo$").FullMatch("foo"));
1039 CHECK(RE("foo$").FullMatch("foo"));
1040 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1041 CHECK(!RE("fo|bar").FullMatch("fox"));
1042
1043 // Uncomment the following if we change the handling of '$' to
1044 // prevent it from matching a trailing newline
1045 if (false) {
1046 // Check that we don't get bitten by pcre's special handling of a
1047 // '\n' at the end of the string matching '$'
1048 CHECK(!RE("foo$").PartialMatch("foo\n"));
1049 }
1050
1051 // Number of args
1052 int a[16];
1053 CHECK(RE("").FullMatch(""));
1054
1055 memset(a, 0, sizeof(0));
1056 CHECK(RE("(\\d){1}").FullMatch("1",
1057 &a[0]));
1058 CHECK_EQ(a[0], 1);
1059
1060 memset(a, 0, sizeof(0));
1061 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1062 &a[0], &a[1]));
1063 CHECK_EQ(a[0], 1);
1064 CHECK_EQ(a[1], 2);
1065
1066 memset(a, 0, sizeof(0));
1067 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1068 &a[0], &a[1], &a[2]));
1069 CHECK_EQ(a[0], 1);
1070 CHECK_EQ(a[1], 2);
1071 CHECK_EQ(a[2], 3);
1072
1073 memset(a, 0, sizeof(0));
1074 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1075 &a[0], &a[1], &a[2], &a[3]));
1076 CHECK_EQ(a[0], 1);
1077 CHECK_EQ(a[1], 2);
1078 CHECK_EQ(a[2], 3);
1079 CHECK_EQ(a[3], 4);
1080
1081 memset(a, 0, sizeof(0));
1082 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1083 &a[0], &a[1], &a[2],
1084 &a[3], &a[4]));
1085 CHECK_EQ(a[0], 1);
1086 CHECK_EQ(a[1], 2);
1087 CHECK_EQ(a[2], 3);
1088 CHECK_EQ(a[3], 4);
1089 CHECK_EQ(a[4], 5);
1090
1091 memset(a, 0, sizeof(0));
1092 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1093 &a[0], &a[1], &a[2],
1094 &a[3], &a[4], &a[5]));
1095 CHECK_EQ(a[0], 1);
1096 CHECK_EQ(a[1], 2);
1097 CHECK_EQ(a[2], 3);
1098 CHECK_EQ(a[3], 4);
1099 CHECK_EQ(a[4], 5);
1100 CHECK_EQ(a[5], 6);
1101
1102 memset(a, 0, sizeof(0));
1103 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1104 &a[0], &a[1], &a[2], &a[3],
1105 &a[4], &a[5], &a[6]));
1106 CHECK_EQ(a[0], 1);
1107 CHECK_EQ(a[1], 2);
1108 CHECK_EQ(a[2], 3);
1109 CHECK_EQ(a[3], 4);
1110 CHECK_EQ(a[4], 5);
1111 CHECK_EQ(a[5], 6);
1112 CHECK_EQ(a[6], 7);
1113
1114 memset(a, 0, sizeof(0));
1115 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1116 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1117 "1234567890123456",
1118 &a[0], &a[1], &a[2], &a[3],
1119 &a[4], &a[5], &a[6], &a[7],
1120 &a[8], &a[9], &a[10], &a[11],
1121 &a[12], &a[13], &a[14], &a[15]));
1122 CHECK_EQ(a[0], 1);
1123 CHECK_EQ(a[1], 2);
1124 CHECK_EQ(a[2], 3);
1125 CHECK_EQ(a[3], 4);
1126 CHECK_EQ(a[4], 5);
1127 CHECK_EQ(a[5], 6);
1128 CHECK_EQ(a[6], 7);
1129 CHECK_EQ(a[7], 8);
1130 CHECK_EQ(a[8], 9);
1131 CHECK_EQ(a[9], 0);
1132 CHECK_EQ(a[10], 1);
1133 CHECK_EQ(a[11], 2);
1134 CHECK_EQ(a[12], 3);
1135 CHECK_EQ(a[13], 4);
1136 CHECK_EQ(a[14], 5);
1137 CHECK_EQ(a[15], 6);
1138
1139 /***** PartialMatch *****/
1140
1141 printf("Testing PartialMatch\n");
1142
1143 CHECK(RE("h.*o").PartialMatch("hello"));
1144 CHECK(RE("h.*o").PartialMatch("othello"));
1145 CHECK(RE("h.*o").PartialMatch("hello!"));
1146 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1147
1148 /***** other tests *****/
1149
1150 RadixTests();
1151 TestReplace();
1152 TestExtract();
1153 TestConsume();
1154 TestFindAndConsume();
1155 TestQuoteMetaAll();
1156 TestMatchNumberPeculiarity();
1157
1158 // Check the pattern() accessor
1159 {
1160 const string kPattern = "http://([^/]+)/.*";
1161 const RE re(kPattern);
1162 CHECK_EQ(kPattern, re.pattern());
1163 }
1164
1165 // Check RE error field.
1166 {
1167 RE re("foo");
1168 CHECK(re.error().empty()); // Must have no error
1169 }
1170
1171 #ifdef SUPPORT_UTF8
1172 // Check UTF-8 handling
1173 {
1174 printf("Testing UTF-8 handling\n");
1175
1176 // Three Japanese characters (nihongo)
1177 const unsigned char utf8_string[] = {
1178 0xe6, 0x97, 0xa5, // 65e5
1179 0xe6, 0x9c, 0xac, // 627c
1180 0xe8, 0xaa, 0x9e, // 8a9e
1181 0
1182 };
1183 const unsigned char utf8_pattern[] = {
1184 '.',
1185 0xe6, 0x9c, 0xac, // 627c
1186 '.',
1187 0
1188 };
1189
1190 // Both should match in either mode, bytes or UTF-8
1191 RE re_test1(".........");
1192 CHECK(re_test1.FullMatch(utf8_string));
1193 RE re_test2("...", pcrecpp::UTF8());
1194 CHECK(re_test2.FullMatch(utf8_string));
1195
1196 // Check that '.' matches one byte or UTF-8 character
1197 // according to the mode.
1198 string ss;
1199 RE re_test3("(.)");
1200 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1201 CHECK_EQ(ss, string("\xe6"));
1202 RE re_test4("(.)", pcrecpp::UTF8());
1203 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1204 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1205
1206 // Check that string matches itself in either mode
1207 RE re_test5(utf8_string);
1208 CHECK(re_test5.FullMatch(utf8_string));
1209 RE re_test6(utf8_string, pcrecpp::UTF8());
1210 CHECK(re_test6.FullMatch(utf8_string));
1211
1212 // Check that pattern matches string only in UTF8 mode
1213 RE re_test7(utf8_pattern);
1214 CHECK(!re_test7.FullMatch(utf8_string));
1215 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1216 CHECK(re_test8.FullMatch(utf8_string));
1217 }
1218
1219 // Check that ungreedy, UTF8 regular expressions don't match when they
1220 // oughtn't -- see bug 82246.
1221 {
1222 // This code always worked.
1223 const char* pattern = "\\w+X";
1224 const string target = "a aX";
1225 RE match_sentence(pattern);
1226 RE match_sentence_re(pattern, pcrecpp::UTF8());
1227
1228 CHECK(!match_sentence.FullMatch(target));
1229 CHECK(!match_sentence_re.FullMatch(target));
1230 }
1231
1232 {
1233 const char* pattern = "(?U)\\w+X";
1234 const string target = "a aX";
1235 RE match_sentence(pattern);
1236 RE match_sentence_re(pattern, pcrecpp::UTF8());
1237
1238 CHECK(!match_sentence.FullMatch(target));
1239 CHECK(!match_sentence_re.FullMatch(target));
1240 }
1241 #endif /* def SUPPORT_UTF8 */
1242
1243 printf("Testing error reporting\n");
1244
1245 { RE re("a\\1"); CHECK(!re.error().empty()); }
1246 {
1247 RE re("a[x");
1248 CHECK(!re.error().empty());
1249 }
1250 {
1251 RE re("a[z-a]");
1252 CHECK(!re.error().empty());
1253 }
1254 {
1255 RE re("a[[:foobar:]]");
1256 CHECK(!re.error().empty());
1257 }
1258 {
1259 RE re("a(b");
1260 CHECK(!re.error().empty());
1261 }
1262 {
1263 RE re("a\\");
1264 CHECK(!re.error().empty());
1265 }
1266
1267 // Test that recursion is stopped
1268 TestRecursion();
1269
1270 // Test Options
1271 if (getenv("VERBOSE_TEST") != NULL)
1272 VERBOSE_TEST = true;
1273 TestOptions();
1274
1275 // Test the constructors
1276 TestConstructors();
1277
1278 // Done
1279 printf("OK\n");
1280
1281 return 0;
1282 }
1283