1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35 
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39 
40 #include <stdio.h>
41 #include <string.h>      /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
45 
46 using pcrecpp::StringPiece;
47 using pcrecpp::RE;
48 using pcrecpp::RE_Options;
49 using pcrecpp::Hex;
50 using pcrecpp::Octal;
51 using pcrecpp::CRadix;
52 
53 static bool VERBOSE_TEST  = false;
54 
55 // CHECK dies with a fatal error if condition is not true.  It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode.  Therefore, it is safe to do things like:
58 //    CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do {                           \
60   if (!(condition)) {                                   \
61     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
62             __FILE__, __LINE__, #condition);            \
63     exit(1);                                            \
64   }                                                     \
65 } while (0)
66 
67 #define CHECK_EQ(a, b)   CHECK(a == b)
68 
Timing1(int num_iters)69 static void Timing1(int num_iters) {
70   // Same pattern lots of times
71   RE pattern("ruby:\\d+");
72   StringPiece p("ruby:1234");
73   for (int j = num_iters; j > 0; j--) {
74     CHECK(pattern.FullMatch(p));
75   }
76 }
77 
Timing2(int num_iters)78 static void Timing2(int num_iters) {
79   // Same pattern lots of times
80   RE pattern("ruby:(\\d+)");
81   int i;
82   for (int j = num_iters; j > 0; j--) {
83     CHECK(pattern.FullMatch("ruby:1234", &i));
84     CHECK_EQ(i, 1234);
85   }
86 }
87 
Timing3(int num_iters)88 static void Timing3(int num_iters) {
89   string text_string;
90   for (int j = num_iters; j > 0; j--) {
91     text_string += "this is another line\n";
92   }
93 
94   RE line_matcher(".*\n");
95   string line;
96   StringPiece text(text_string);
97   int counter = 0;
98   while (line_matcher.Consume(&text)) {
99     counter++;
100   }
101   printf("Matched %d lines\n", counter);
102 }
103 
104 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
105 
106 static void LeakTest() {
107   // Check for memory leaks
108   unsigned long long initial_size = 0;
109   for (int i = 0; i < 100000; i++) {
110     if (i == 50000) {
111       initial_size = VirtualProcessSize();
112       printf("Size after 50000: %llu\n", initial_size);
113     }
114     char buf[100];  // definitely big enough
115     sprintf(buf, "pat%09d", i);
116     RE newre(buf);
117   }
118   uint64 final_size = VirtualProcessSize();
119   printf("Size after 100000: %llu\n", final_size);
120   const double growth = double(final_size - initial_size) / final_size;
121   printf("Growth: %0.2f%%", growth * 100);
122   CHECK(growth < 0.02);       // Allow < 2% growth
123 }
124 
125 #endif
126 
RadixTests()127 static void RadixTests() {
128   printf("Testing hex\n");
129 
130 #define CHECK_HEX(type, value) \
131   do { \
132     type v; \
133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134     CHECK_EQ(v, 0x ## value); \
135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136     CHECK_EQ(v, 0x ## value); \
137   } while(0)
138 
139   CHECK_HEX(short,              2bad);
140   CHECK_HEX(unsigned short,     2badU);
141   CHECK_HEX(int,                dead);
142   CHECK_HEX(unsigned int,       deadU);
143   CHECK_HEX(long,               7eadbeefL);
144   CHECK_HEX(unsigned long,      deadbeefUL);
145 #ifdef HAVE_LONG_LONG
146   CHECK_HEX(long long,          12345678deadbeefLL);
147 #endif
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150 #endif
151 
152 #undef CHECK_HEX
153 
154   printf("Testing octal\n");
155 
156 #define CHECK_OCTAL(type, value) \
157   do { \
158     type v; \
159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160     CHECK_EQ(v, 0 ## value); \
161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162     CHECK_EQ(v, 0 ## value); \
163   } while(0)
164 
165   CHECK_OCTAL(short,              77777);
166   CHECK_OCTAL(unsigned short,     177777U);
167   CHECK_OCTAL(int,                17777777777);
168   CHECK_OCTAL(unsigned int,       37777777777U);
169   CHECK_OCTAL(long,               17777777777L);
170   CHECK_OCTAL(unsigned long,      37777777777UL);
171 #ifdef HAVE_LONG_LONG
172   CHECK_OCTAL(long long,          777777777777777777777LL);
173 #endif
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176 #endif
177 
178 #undef CHECK_OCTAL
179 
180   printf("Testing decimal\n");
181 
182 #define CHECK_DECIMAL(type, value) \
183   do { \
184     type v; \
185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186     CHECK_EQ(v, value); \
187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188     CHECK_EQ(v, value); \
189   } while(0)
190 
191   CHECK_DECIMAL(short,              -1);
192   CHECK_DECIMAL(unsigned short,     9999);
193   CHECK_DECIMAL(int,                -1000);
194   CHECK_DECIMAL(unsigned int,       12345U);
195   CHECK_DECIMAL(long,               -10000000L);
196   CHECK_DECIMAL(unsigned long,      3083324652U);
197 #ifdef HAVE_LONG_LONG
198   CHECK_DECIMAL(long long,          -100000000000000LL);
199 #endif
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202 #endif
203 
204 #undef CHECK_DECIMAL
205 
206 }
207 
TestReplace()208 static void TestReplace() {
209   printf("Testing Replace\n");
210 
211   struct ReplaceTest {
212     const char *regexp;
213     const char *rewrite;
214     const char *original;
215     const char *single;
216     const char *global;
217     int global_count;         // the expected return value from ReplaceAll
218   };
219   static const ReplaceTest tests[] = {
220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221       "\\2\\1ay",
222       "the quick brown fox jumps over the lazy dogs.",
223       "ethay quick brown fox jumps over the lazy dogs.",
224       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225       9 },
226     { "\\w+",
227       "\\0-NOSPAM",
228       "paul.haahr@google.com",
229       "paul-NOSPAM.haahr@google.com",
230       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231       4 },
232     { "^",
233       "(START)",
234       "foo",
235       "(START)foo",
236       "(START)foo",
237       1 },
238     { "^",
239       "(START)",
240       "",
241       "(START)",
242       "(START)",
243       1 },
244     { "$",
245       "(END)",
246       "",
247       "(END)",
248       "(END)",
249       1 },
250     { "b",
251       "bb",
252       "ababababab",
253       "abbabababab",
254       "abbabbabbabbabb",
255        5 },
256     { "b",
257       "bb",
258       "bbbbbb",
259       "bbbbbbb",
260       "bbbbbbbbbbbb",
261       6 },
262     { "b+",
263       "bb",
264       "bbbbbb",
265       "bb",
266       "bb",
267       1 },
268     { "b*",
269       "bb",
270       "bbbbbb",
271       "bb",
272       "bbbb",
273       2 },
274     { "b*",
275       "bb",
276       "aaaaa",
277       "bbaaaaa",
278       "bbabbabbabbabbabb",
279       6 },
280     { "b*",
281       "bb",
282       "aa\naa\n",
283       "bbaa\naa\n",
284       "bbabbabb\nbbabbabb\nbb",
285       7 },
286     { "b*",
287       "bb",
288       "aa\raa\r",
289       "bbaa\raa\r",
290       "bbabbabb\rbbabbabb\rbb",
291       7 },
292     { "b*",
293       "bb",
294       "aa\r\naa\r\n",
295       "bbaa\r\naa\r\n",
296       "bbabbabb\r\nbbabbabb\r\nbb",
297       7 },
298     // Check empty-string matching (it's tricky!)
299     { "aa|b*",
300       "@",
301       "aa",
302       "@",
303       "@@",
304       2 },
305     { "b*|aa",
306       "@",
307       "aa",
308       "@aa",
309       "@@@",
310       3 },
311 #ifdef SUPPORT_UTF8
312     { "b*",
313       "bb",
314       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
315       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317       5 },
318     { "b*",
319       "bb",
320       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
321       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324       9 },
325 #endif
326     { "", NULL, NULL, NULL, NULL, 0 }
327   };
328 
329 #ifdef SUPPORT_UTF8
330   const bool support_utf8 = true;
331 #else
332   const bool support_utf8 = false;
333 #endif
334 
335   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337     assert(re.error().empty());
338     string one(t->original);
339     CHECK(re.Replace(t->rewrite, &one));
340     CHECK_EQ(one, t->single);
341     string all(t->original);
342     const int replace_count = re.GlobalReplace(t->rewrite, &all);
343     CHECK_EQ(all, t->global);
344     CHECK_EQ(replace_count, t->global_count);
345   }
346 
347   // One final test: test \r\n replacement when we're not in CRLF mode
348   {
349     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350     assert(re.error().empty());
351     string all("aa\r\naa\r\n");
352     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354   }
355   {
356     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357     assert(re.error().empty());
358     string all("aa\r\naa\r\n");
359     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361   }
362   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363   //       Alas, the answer depends on how pcre was compiled.
364 }
365 
TestExtract()366 static void TestExtract() {
367   printf("Testing Extract\n");
368 
369   string s;
370 
371   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372   CHECK_EQ(s, "kremvax!boris");
373 
374   // check the RE interface as well
375   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376   CHECK_EQ(s, "'foo'");
377   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378   CHECK_EQ(s, "'foo'");
379 }
380 
TestConsume()381 static void TestConsume() {
382   printf("Testing Consume\n");
383 
384   string word;
385 
386   string s("   aaa b!@#$@#$cccc");
387   StringPiece input(s);
388 
389   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
390   CHECK(r.Consume(&input, &word));
391   CHECK_EQ(word, "aaa");
392   CHECK(r.Consume(&input, &word));
393   CHECK_EQ(word, "b");
394   CHECK(! r.Consume(&input, &word));
395 }
396 
TestFindAndConsume()397 static void TestFindAndConsume() {
398   printf("Testing FindAndConsume\n");
399 
400   string word;
401 
402   string s("   aaa b!@#$@#$cccc");
403   StringPiece input(s);
404 
405   RE r("(\\w+)");      // matches a word
406   CHECK(r.FindAndConsume(&input, &word));
407   CHECK_EQ(word, "aaa");
408   CHECK(r.FindAndConsume(&input, &word));
409   CHECK_EQ(word, "b");
410   CHECK(r.FindAndConsume(&input, &word));
411   CHECK_EQ(word, "cccc");
412   CHECK(! r.FindAndConsume(&input, &word));
413 }
414 
TestMatchNumberPeculiarity()415 static void TestMatchNumberPeculiarity() {
416   printf("Testing match-number peculiarity\n");
417 
418   string word1;
419   string word2;
420   string word3;
421 
422   RE r("(foo)|(bar)|(baz)");
423   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424   CHECK_EQ(word1, "foo");
425   CHECK_EQ(word2, "");
426   CHECK_EQ(word3, "");
427   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428   CHECK_EQ(word1, "");
429   CHECK_EQ(word2, "bar");
430   CHECK_EQ(word3, "");
431   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432   CHECK_EQ(word1, "");
433   CHECK_EQ(word2, "");
434   CHECK_EQ(word3, "baz");
435   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436 
437   string a;
438   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439   CHECK_EQ(a, "");
440 }
441 
TestRecursion()442 static void TestRecursion() {
443   printf("Testing recursion\n");
444 
445   // Get one string that passes (sometimes), one that never does.
446   string text_good("abcdefghijk");
447   string text_bad("acdefghijkl");
448 
449   // According to pcretest, matching text_good against (\w+)*b
450   // requires match_limit of at least 8192, and match_recursion_limit
451   // of at least 37.
452 
453   RE_Options options_ml;
454   options_ml.set_match_limit(8192);
455   RE re("(\\w+)*b", options_ml);
456   CHECK(re.PartialMatch(text_good) == true);
457   CHECK(re.PartialMatch(text_bad) == false);
458   CHECK(re.FullMatch(text_good) == false);
459   CHECK(re.FullMatch(text_bad) == false);
460 
461   options_ml.set_match_limit(1024);
462   RE re2("(\\w+)*b", options_ml);
463   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
464   CHECK(re2.PartialMatch(text_bad) == false);
465   CHECK(re2.FullMatch(text_good) == false);
466   CHECK(re2.FullMatch(text_bad) == false);
467 
468   RE_Options options_mlr;
469   options_mlr.set_match_limit_recursion(50);
470   RE re3("(\\w+)*b", options_mlr);
471   CHECK(re3.PartialMatch(text_good) == true);
472   CHECK(re3.PartialMatch(text_bad) == false);
473   CHECK(re3.FullMatch(text_good) == false);
474   CHECK(re3.FullMatch(text_bad) == false);
475 
476   options_mlr.set_match_limit_recursion(10);
477   RE re4("(\\w+)*b", options_mlr);
478   CHECK(re4.PartialMatch(text_good) == false);
479   CHECK(re4.PartialMatch(text_bad) == false);
480   CHECK(re4.FullMatch(text_good) == false);
481   CHECK(re4.FullMatch(text_bad) == false);
482 }
483 
484 // A meta-quoted string, interpreted as a pattern, should always match
485 // the original unquoted string.
TestQuoteMeta(string unquoted,RE_Options options=RE_Options ())486 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487   string quoted = RE::QuoteMeta(unquoted);
488   RE re(quoted, options);
489   CHECK(re.FullMatch(unquoted));
490 }
491 
492 // A string containing meaningful regexp characters, which is then meta-
493 // quoted, should not generally match a string the unquoted string does.
NegativeTestQuoteMeta(string unquoted,string should_not_match,RE_Options options=RE_Options ())494 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495                                   RE_Options options = RE_Options()) {
496   string quoted = RE::QuoteMeta(unquoted);
497   RE re(quoted, options);
498   CHECK(!re.FullMatch(should_not_match));
499 }
500 
501 // Tests that quoted meta characters match their original strings,
502 // and that a few things that shouldn't match indeed do not.
TestQuotaMetaSimple()503 static void TestQuotaMetaSimple() {
504   TestQuoteMeta("foo");
505   TestQuoteMeta("foo.bar");
506   TestQuoteMeta("foo\\.bar");
507   TestQuoteMeta("[1-9]");
508   TestQuoteMeta("1.5-2.0?");
509   TestQuoteMeta("\\d");
510   TestQuoteMeta("Who doesn't like ice cream?");
511   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512   TestQuoteMeta("((?!)xxx).*yyy");
513   TestQuoteMeta("([");
514   TestQuoteMeta(string("foo\0bar", 7));
515 }
516 
TestQuoteMetaSimpleNegative()517 static void TestQuoteMetaSimpleNegative() {
518   NegativeTestQuoteMeta("foo", "bar");
519   NegativeTestQuoteMeta("...", "bar");
520   NegativeTestQuoteMeta("\\.", ".");
521   NegativeTestQuoteMeta("\\.", "..");
522   NegativeTestQuoteMeta("(a)", "a");
523   NegativeTestQuoteMeta("(a|b)", "a");
524   NegativeTestQuoteMeta("(a|b)", "(a)");
525   NegativeTestQuoteMeta("(a|b)", "a|b");
526   NegativeTestQuoteMeta("[0-9]", "0");
527   NegativeTestQuoteMeta("[0-9]", "0-9");
528   NegativeTestQuoteMeta("[0-9]", "[9]");
529   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530 }
531 
TestQuoteMetaLatin1()532 static void TestQuoteMetaLatin1() {
533   TestQuoteMeta("3\xb2 = 9");
534 }
535 
TestQuoteMetaUtf8()536 static void TestQuoteMetaUtf8() {
537 #ifdef SUPPORT_UTF8
538   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
540   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
541   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
542   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
543   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
546                         "27\\\xc2\\\xb0",
547                         pcrecpp::UTF8());
548 #endif
549 }
550 
TestQuoteMetaAll()551 static void TestQuoteMetaAll() {
552   printf("Testing QuoteMeta\n");
553   TestQuotaMetaSimple();
554   TestQuoteMetaSimpleNegative();
555   TestQuoteMetaLatin1();
556   TestQuoteMetaUtf8();
557 }
558 
559 //
560 // Options tests contributed by
561 // Giuseppe Maxia, CTO, Stardata s.r.l.
562 // July 2005
563 //
GetOneOptionResult(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,string expected)564 static void GetOneOptionResult(
565                 const char *option_name,
566                 const char *regex,
567                 const char *str,
568                 RE_Options options,
569                 bool full,
570                 string expected) {
571 
572   printf("Testing Option <%s>\n", option_name);
573   if(VERBOSE_TEST)
574     printf("/%s/ finds \"%s\" within \"%s\" \n",
575                     regex,
576                     expected.c_str(),
577                     str);
578   string captured("");
579   if (full)
580     RE(regex,options).FullMatch(str, &captured);
581   else
582     RE(regex,options).PartialMatch(str, &captured);
583   CHECK_EQ(captured, expected);
584 }
585 
TestOneOption(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,bool assertive=true)586 static void TestOneOption(
587                 const char *option_name,
588                 const char *regex,
589                 const char *str,
590                 RE_Options options,
591                 bool full,
592                 bool assertive = true) {
593 
594   printf("Testing Option <%s>\n", option_name);
595   if (VERBOSE_TEST)
596     printf("'%s' %s /%s/ \n",
597                   str,
598                   (assertive? "matches" : "doesn't match"),
599                   regex);
600   if (assertive) {
601     if (full)
602       CHECK(RE(regex,options).FullMatch(str));
603     else
604       CHECK(RE(regex,options).PartialMatch(str));
605   } else {
606     if (full)
607       CHECK(!RE(regex,options).FullMatch(str));
608     else
609       CHECK(!RE(regex,options).PartialMatch(str));
610   }
611 }
612 
Test_CASELESS()613 static void Test_CASELESS() {
614   RE_Options options;
615   RE_Options options2;
616 
617   options.set_caseless(true);
618   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
619   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
620   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
621 
622   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
623   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624   options.set_caseless(false);
625   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
626 }
627 
Test_MULTILINE()628 static void Test_MULTILINE() {
629   RE_Options options;
630   RE_Options options2;
631   const char *str = "HELLO\n" "cruel\n" "world\n";
632 
633   options.set_multiline(true);
634   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
635   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
636   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637   options.set_multiline(false);
638   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639 }
640 
Test_DOTALL()641 static void Test_DOTALL() {
642   RE_Options options;
643   RE_Options options2;
644   const char *str = "HELLO\n" "cruel\n" "world";
645 
646   options.set_dotall(true);
647   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
648   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
649   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
650   options.set_dotall(false);
651   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652 }
653 
Test_DOLLAR_ENDONLY()654 static void Test_DOLLAR_ENDONLY() {
655   RE_Options options;
656   RE_Options options2;
657   const char *str = "HELLO world\n";
658 
659   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660   options.set_dollar_endonly(true);
661   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
662   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
663 }
664 
Test_EXTRA()665 static void Test_EXTRA() {
666   RE_Options options;
667   const char *str = "HELLO";
668 
669   options.set_extra(true);
670   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672   options.set_extra(false);
673   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674 }
675 
Test_EXTENDED()676 static void Test_EXTENDED() {
677   RE_Options options;
678   RE_Options options2;
679   const char *str = "HELLO world";
680 
681   options.set_extended(true);
682   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
683   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
684   TestOneOption("EXTENDED (class)",
685                     "^ HE L{2} O "
686                     "\\s+        "
687                     "\\w+ $      ",
688                     str,
689                     options,
690                     false);
691 
692   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693   TestOneOption("EXTENDED (function)",
694                     "^ HE L{2} O "
695                     "\\s+        "
696                     "\\w+ $      ",
697                     str,
698                     pcrecpp::EXTENDED(),
699                     false);
700 
701   options.set_extended(false);
702   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703 }
704 
Test_NO_AUTO_CAPTURE()705 static void Test_NO_AUTO_CAPTURE() {
706   RE_Options options;
707   const char *str = "HELLO world";
708   string captured;
709 
710   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711   if (VERBOSE_TEST)
712     printf("parentheses capture text\n");
713   RE re("(world|universe)$", options);
714   CHECK(re.Extract("\\1", str , &captured));
715   CHECK_EQ(captured, "world");
716   options.set_no_auto_capture(true);
717   printf("testing Option <NO_AUTO_CAPTURE>\n");
718   if (VERBOSE_TEST)
719     printf("parentheses do not capture text\n");
720   re.Extract("\\1",str, &captured );
721   CHECK_EQ(captured, "world");
722 }
723 
Test_UNGREEDY()724 static void Test_UNGREEDY() {
725   RE_Options options;
726   const char *str = "HELLO, 'this' is the 'world'";
727 
728   options.set_ungreedy(true);
729   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732 
733   options.set_ungreedy(false);
734   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736 }
737 
Test_all_options()738 static void Test_all_options() {
739   const char *str = "HELLO\n" "cruel\n" "world";
740   RE_Options options;
741   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742 
743   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744   options.set_all_options(0);
745   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747 
748   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750                   " ^ c r u e l $ ",
751                   str,
752                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753                   false);
754 
755   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756                   " ^ c r u e l $ ",
757                   str,
758                   RE_Options()
759                        .set_multiline(true)
760                        .set_extended(true),
761                   false);
762 
763   options.set_all_options(0);
764   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765 
766 }
767 
TestOptions()768 static void TestOptions() {
769   printf("Testing Options\n");
770   Test_CASELESS();
771   Test_MULTILINE();
772   Test_DOTALL();
773   Test_DOLLAR_ENDONLY();
774   Test_EXTENDED();
775   Test_NO_AUTO_CAPTURE();
776   Test_UNGREEDY();
777   Test_EXTRA();
778   Test_all_options();
779 }
780 
TestConstructors()781 static void TestConstructors() {
782   printf("Testing constructors\n");
783 
784   RE_Options options;
785   options.set_dotall(true);
786   const char *str = "HELLO\n" "cruel\n" "world";
787 
788   RE orig("HELLO.*world", options);
789   CHECK(orig.FullMatch(str));
790 
791   RE copy1(orig);
792   CHECK(copy1.FullMatch(str));
793 
794   RE copy2("not a match");
795   CHECK(!copy2.FullMatch(str));
796   copy2 = copy1;
797   CHECK(copy2.FullMatch(str));
798   copy2 = orig;
799   CHECK(copy2.FullMatch(str));
800 
801   // Make sure when we assign to ourselves, nothing bad happens
802   orig = orig;
803   copy1 = copy1;
804   copy2 = copy2;
805   CHECK(orig.FullMatch(str));
806   CHECK(copy1.FullMatch(str));
807   CHECK(copy2.FullMatch(str));
808 }
809 
main(int argc,char ** argv)810 int main(int argc, char** argv) {
811   // Treat any flag as --help
812   if (argc > 1 && argv[1][0] == '-') {
813     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814            "       If 'timingX ###' is specified, run the given timing test\n"
815            "       with the given number of iterations, rather than running\n"
816            "       the default corectness test.\n", argv[0]);
817     return 0;
818   }
819 
820   if (argc > 1) {
821     if ( argc == 2 || atoi(argv[2]) == 0) {
822       printf("timing mode needs a num-iters argument\n");
823       return 1;
824     }
825     if (!strcmp(argv[1], "timing1"))
826       Timing1(atoi(argv[2]));
827     else if (!strcmp(argv[1], "timing2"))
828       Timing2(atoi(argv[2]));
829     else if (!strcmp(argv[1], "timing3"))
830       Timing3(atoi(argv[2]));
831     else
832       printf("Unknown argument '%s'\n", argv[1]);
833     return 0;
834   }
835 
836   printf("PCRE C++ wrapper tests\n");
837   printf("Testing FullMatch\n");
838 
839   int i;
840   string s;
841 
842   /***** FullMatch with no args *****/
843 
844   CHECK(RE("h.*o").FullMatch("hello"));
845   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
846   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
847   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
848   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
849   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
850 
851   /***** FullMatch with args *****/
852 
853   // Zero-arg
854   CHECK(RE("\\d+").FullMatch("1001"));
855 
856   // Single-arg
857   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
858   CHECK_EQ(i, 1001);
859   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860   CHECK_EQ(i, -123);
861   CHECK(!RE("()\\d+").FullMatch("10", &i));
862   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863                                 &i));
864 
865   // Digits surrounding integer-arg
866   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867   CHECK_EQ(i, 23);
868   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869   CHECK_EQ(i, 1);
870   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871   CHECK_EQ(i, -1);
872   CHECK(RE("(\\d)").PartialMatch("1234", &i));
873   CHECK_EQ(i, 1);
874   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875   CHECK_EQ(i, -1);
876 
877   // String-arg
878   CHECK(RE("h(.*)o").FullMatch("hello", &s));
879   CHECK_EQ(s, string("ell"));
880 
881   // StringPiece-arg
882   StringPiece sp;
883   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884   CHECK_EQ(sp.size(), 4);
885   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886   CHECK_EQ(i, 1234);
887 
888   // Multi-arg
889   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890   CHECK_EQ(s, string("ruby"));
891   CHECK_EQ(i, 1234);
892 
893   // Ignore non-void* NULL arg
894   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898 #ifdef HAVE_LONG_LONG
899   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900 #endif
901   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
903 
904   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910 
911   // Ignored arg
912   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913   CHECK_EQ(s, string("ruby"));
914   CHECK_EQ(i, 1234);
915 
916   // Type tests
917   {
918     char c;
919     CHECK(RE("(H)ello").FullMatch("Hello", &c));
920     CHECK_EQ(c, 'H');
921   }
922   {
923     unsigned char c;
924     CHECK(RE("(H)ello").FullMatch("Hello", &c));
925     CHECK_EQ(c, static_cast<unsigned char>('H'));
926   }
927   {
928     short v;
929     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
930     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
931     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
932     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
933     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
935   }
936   {
937     unsigned short v;
938     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
939     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
940     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
941     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
942   }
943   {
944     int v;
945     static const int max_value = 0x7fffffff;
946     static const int min_value = -max_value - 1;
947     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
948     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
949     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
950     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
953   }
954   {
955     unsigned int v;
956     static const unsigned int max_value = 0xfffffffful;
957     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
958     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
959     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
960   }
961 #ifdef HAVE_LONG_LONG
962 # if defined(__MINGW__) || defined(__MINGW32__)
963 #   define LLD "%I64d"
964 #   define LLU "%I64u"
965 # else
966 #   define LLD "%lld"
967 #   define LLU "%llu"
968 # endif
969   {
970     long long v;
971     static const long long max_value = 0x7fffffffffffffffLL;
972     static const long long min_value = -max_value - 1;
973     char buf[32];  // definitely big enough for a long long
974 
975     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
977 
978     sprintf(buf, LLD, max_value);
979     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
980 
981     sprintf(buf, LLD, min_value);
982     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
983 
984     sprintf(buf, LLD, max_value);
985     assert(buf[strlen(buf)-1] != '9');
986     buf[strlen(buf)-1]++;
987     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
988 
989     sprintf(buf, LLD, min_value);
990     assert(buf[strlen(buf)-1] != '9');
991     buf[strlen(buf)-1]++;
992     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993   }
994 #endif
995 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
996   {
997     unsigned long long v;
998     long long v2;
999     static const unsigned long long max_value = 0xffffffffffffffffULL;
1000     char buf[32];  // definitely big enough for a unsigned long long
1001 
1002     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1004 
1005     sprintf(buf, LLU, max_value);
1006     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1007 
1008     assert(buf[strlen(buf)-1] != '9');
1009     buf[strlen(buf)-1]++;
1010     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1011   }
1012 #endif
1013   {
1014     float v;
1015     CHECK(RE("(.*)").FullMatch("100", &v));
1016     CHECK(RE("(.*)").FullMatch("-100.", &v));
1017     CHECK(RE("(.*)").FullMatch("1e23", &v));
1018   }
1019   {
1020     double v;
1021     CHECK(RE("(.*)").FullMatch("100", &v));
1022     CHECK(RE("(.*)").FullMatch("-100.", &v));
1023     CHECK(RE("(.*)").FullMatch("1e23", &v));
1024   }
1025 
1026   // Check that matching is fully anchored
1027   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1028   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1029   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031 
1032   // Braces
1033   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036 
1037   // Complicated RE
1038   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1042 
1043   // Check full-match handling (needs '$' tacked on internally)
1044   CHECK(RE("fo|foo").FullMatch("fo"));
1045   CHECK(RE("fo|foo").FullMatch("foo"));
1046   CHECK(RE("fo|foo$").FullMatch("fo"));
1047   CHECK(RE("fo|foo$").FullMatch("foo"));
1048   CHECK(RE("foo$").FullMatch("foo"));
1049   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050   CHECK(!RE("fo|bar").FullMatch("fox"));
1051 
1052   // Uncomment the following if we change the handling of '$' to
1053   // prevent it from matching a trailing newline
1054   if (false) {
1055     // Check that we don't get bitten by pcre's special handling of a
1056     // '\n' at the end of the string matching '$'
1057     CHECK(!RE("foo$").PartialMatch("foo\n"));
1058   }
1059 
1060   // Number of args
1061   int a[16];
1062   CHECK(RE("").FullMatch(""));
1063 
1064   memset(a, 0, sizeof(0));
1065   CHECK(RE("(\\d){1}").FullMatch("1",
1066                                  &a[0]));
1067   CHECK_EQ(a[0], 1);
1068 
1069   memset(a, 0, sizeof(0));
1070   CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071                                    &a[0],  &a[1]));
1072   CHECK_EQ(a[0], 1);
1073   CHECK_EQ(a[1], 2);
1074 
1075   memset(a, 0, sizeof(0));
1076   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077                                         &a[0],  &a[1],  &a[2]));
1078   CHECK_EQ(a[0], 1);
1079   CHECK_EQ(a[1], 2);
1080   CHECK_EQ(a[2], 3);
1081 
1082   memset(a, 0, sizeof(0));
1083   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084                                              &a[0],  &a[1],  &a[2],  &a[3]));
1085   CHECK_EQ(a[0], 1);
1086   CHECK_EQ(a[1], 2);
1087   CHECK_EQ(a[2], 3);
1088   CHECK_EQ(a[3], 4);
1089 
1090   memset(a, 0, sizeof(0));
1091   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092                                                   &a[0],  &a[1],  &a[2],
1093                                                   &a[3],  &a[4]));
1094   CHECK_EQ(a[0], 1);
1095   CHECK_EQ(a[1], 2);
1096   CHECK_EQ(a[2], 3);
1097   CHECK_EQ(a[3], 4);
1098   CHECK_EQ(a[4], 5);
1099 
1100   memset(a, 0, sizeof(0));
1101   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102                                                        &a[0],  &a[1],  &a[2],
1103                                                        &a[3],  &a[4],  &a[5]));
1104   CHECK_EQ(a[0], 1);
1105   CHECK_EQ(a[1], 2);
1106   CHECK_EQ(a[2], 3);
1107   CHECK_EQ(a[3], 4);
1108   CHECK_EQ(a[4], 5);
1109   CHECK_EQ(a[5], 6);
1110 
1111   memset(a, 0, sizeof(0));
1112   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113                                                             &a[0],  &a[1],  &a[2],  &a[3],
1114                                                             &a[4],  &a[5],  &a[6]));
1115   CHECK_EQ(a[0], 1);
1116   CHECK_EQ(a[1], 2);
1117   CHECK_EQ(a[2], 3);
1118   CHECK_EQ(a[3], 4);
1119   CHECK_EQ(a[4], 5);
1120   CHECK_EQ(a[5], 6);
1121   CHECK_EQ(a[6], 7);
1122 
1123   memset(a, 0, sizeof(0));
1124   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126                "1234567890123456",
1127                &a[0],  &a[1],  &a[2],  &a[3],
1128                &a[4],  &a[5],  &a[6],  &a[7],
1129                &a[8],  &a[9],  &a[10], &a[11],
1130                &a[12], &a[13], &a[14], &a[15]));
1131   CHECK_EQ(a[0], 1);
1132   CHECK_EQ(a[1], 2);
1133   CHECK_EQ(a[2], 3);
1134   CHECK_EQ(a[3], 4);
1135   CHECK_EQ(a[4], 5);
1136   CHECK_EQ(a[5], 6);
1137   CHECK_EQ(a[6], 7);
1138   CHECK_EQ(a[7], 8);
1139   CHECK_EQ(a[8], 9);
1140   CHECK_EQ(a[9], 0);
1141   CHECK_EQ(a[10], 1);
1142   CHECK_EQ(a[11], 2);
1143   CHECK_EQ(a[12], 3);
1144   CHECK_EQ(a[13], 4);
1145   CHECK_EQ(a[14], 5);
1146   CHECK_EQ(a[15], 6);
1147 
1148   /***** PartialMatch *****/
1149 
1150   printf("Testing PartialMatch\n");
1151 
1152   CHECK(RE("h.*o").PartialMatch("hello"));
1153   CHECK(RE("h.*o").PartialMatch("othello"));
1154   CHECK(RE("h.*o").PartialMatch("hello!"));
1155   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1156 
1157   /***** other tests *****/
1158 
1159   RadixTests();
1160   TestReplace();
1161   TestExtract();
1162   TestConsume();
1163   TestFindAndConsume();
1164   TestQuoteMetaAll();
1165   TestMatchNumberPeculiarity();
1166 
1167   // Check the pattern() accessor
1168   {
1169     const string kPattern = "http://([^/]+)/.*";
1170     const RE re(kPattern);
1171     CHECK_EQ(kPattern, re.pattern());
1172   }
1173 
1174   // Check RE error field.
1175   {
1176     RE re("foo");
1177     CHECK(re.error().empty());  // Must have no error
1178   }
1179 
1180 #ifdef SUPPORT_UTF8
1181   // Check UTF-8 handling
1182   {
1183     printf("Testing UTF-8 handling\n");
1184 
1185     // Three Japanese characters (nihongo)
1186     const unsigned char utf8_string[] = {
1187          0xe6, 0x97, 0xa5, // 65e5
1188          0xe6, 0x9c, 0xac, // 627c
1189          0xe8, 0xaa, 0x9e, // 8a9e
1190          0
1191     };
1192     const unsigned char utf8_pattern[] = {
1193          '.',
1194          0xe6, 0x9c, 0xac, // 627c
1195          '.',
1196          0
1197     };
1198 
1199     // Both should match in either mode, bytes or UTF-8
1200     RE re_test1(".........");
1201     CHECK(re_test1.FullMatch(utf8_string));
1202     RE re_test2("...", pcrecpp::UTF8());
1203     CHECK(re_test2.FullMatch(utf8_string));
1204 
1205     // Check that '.' matches one byte or UTF-8 character
1206     // according to the mode.
1207     string ss;
1208     RE re_test3("(.)");
1209     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210     CHECK_EQ(ss, string("\xe6"));
1211     RE re_test4("(.)", pcrecpp::UTF8());
1212     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1214 
1215     // Check that string matches itself in either mode
1216     RE re_test5(utf8_string);
1217     CHECK(re_test5.FullMatch(utf8_string));
1218     RE re_test6(utf8_string, pcrecpp::UTF8());
1219     CHECK(re_test6.FullMatch(utf8_string));
1220 
1221     // Check that pattern matches string only in UTF8 mode
1222     RE re_test7(utf8_pattern);
1223     CHECK(!re_test7.FullMatch(utf8_string));
1224     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225     CHECK(re_test8.FullMatch(utf8_string));
1226   }
1227 
1228   // Check that ungreedy, UTF8 regular expressions don't match when they
1229   // oughtn't -- see bug 82246.
1230   {
1231     // This code always worked.
1232     const char* pattern = "\\w+X";
1233     const string target = "a aX";
1234     RE match_sentence(pattern);
1235     RE match_sentence_re(pattern, pcrecpp::UTF8());
1236 
1237     CHECK(!match_sentence.FullMatch(target));
1238     CHECK(!match_sentence_re.FullMatch(target));
1239   }
1240 
1241   {
1242     const char* pattern = "(?U)\\w+X";
1243     const string target = "a aX";
1244     RE match_sentence(pattern);
1245     RE match_sentence_re(pattern, pcrecpp::UTF8());
1246 
1247     CHECK(!match_sentence.FullMatch(target));
1248     CHECK(!match_sentence_re.FullMatch(target));
1249   }
1250 #endif  /* def SUPPORT_UTF8 */
1251 
1252   printf("Testing error reporting\n");
1253 
1254   { RE re("a\\1"); CHECK(!re.error().empty()); }
1255   {
1256     RE re("a[x");
1257     CHECK(!re.error().empty());
1258   }
1259   {
1260     RE re("a[z-a]");
1261     CHECK(!re.error().empty());
1262   }
1263   {
1264     RE re("a[[:foobar:]]");
1265     CHECK(!re.error().empty());
1266   }
1267   {
1268     RE re("a(b");
1269     CHECK(!re.error().empty());
1270   }
1271   {
1272     RE re("a\\");
1273     CHECK(!re.error().empty());
1274   }
1275 
1276   // Test that recursion is stopped
1277   TestRecursion();
1278 
1279   // Test Options
1280   if (getenv("VERBOSE_TEST") != NULL)
1281     VERBOSE_TEST  = true;
1282   TestOptions();
1283 
1284   // Test the constructors
1285   TestConstructors();
1286 
1287   // Done
1288   printf("OK\n");
1289 
1290   return 0;
1291 }
1292