1 // -*- coding: utf-8 -*-
2 //
3 // Copyright (c) 2005 - 2010, Google Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following disclaimer
14 // in the documentation and/or other materials provided with the
15 // distribution.
16 //     * Neither the name of Google Inc. nor the names of its
17 // contributors may be used to endorse or promote products derived from
18 // this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Author: Sanjay Ghemawat
33 //
34 // TODO: Test extractions for PartialMatch/Consume
35 
36 #ifdef HAVE_CONFIG_H
37 #include "config.h"
38 #endif
39 
40 #include <stdio.h>
41 #include <string.h>      /* for memset and strcmp */
42 #include <cassert>
43 #include <vector>
44 #include "pcrecpp.h"
45 
46 using pcrecpp::StringPiece;
47 using pcrecpp::RE;
48 using pcrecpp::RE_Options;
49 using pcrecpp::Hex;
50 using pcrecpp::Octal;
51 using pcrecpp::CRadix;
52 
53 static bool VERBOSE_TEST  = false;
54 
55 // CHECK dies with a fatal error if condition is not true.  It is *not*
56 // controlled by NDEBUG, so the check will be executed regardless of
57 // compilation mode.  Therefore, it is safe to do things like:
58 //    CHECK_EQ(fp->Write(x), 4)
59 #define CHECK(condition) do {                           \
60   if (!(condition)) {                                   \
61     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
62             __FILE__, __LINE__, #condition);            \
63     exit(1);                                            \
64   }                                                     \
65 } while (0)
66 
67 #define CHECK_EQ(a, b)   CHECK(a == b)
68 
Timing1(int num_iters)69 static void Timing1(int num_iters) {
70   // Same pattern lots of times
71   RE pattern("ruby:\\d+");
72   StringPiece p("ruby:1234");
73   for (int j = num_iters; j > 0; j--) {
74     CHECK(pattern.FullMatch(p));
75   }
76 }
77 
Timing2(int num_iters)78 static void Timing2(int num_iters) {
79   // Same pattern lots of times
80   RE pattern("ruby:(\\d+)");
81   int i;
82   for (int j = num_iters; j > 0; j--) {
83     CHECK(pattern.FullMatch("ruby:1234", &i));
84     CHECK_EQ(i, 1234);
85   }
86 }
87 
Timing3(int num_iters)88 static void Timing3(int num_iters) {
89   string text_string;
90   for (int j = num_iters; j > 0; j--) {
91     text_string += "this is another line\n";
92   }
93 
94   RE line_matcher(".*\n");
95   string line;
96   StringPiece text(text_string);
97   int counter = 0;
98   while (line_matcher.Consume(&text)) {
99     counter++;
100   }
101   printf("Matched %d lines\n", counter);
102 }
103 
104 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
105 
106 static void LeakTest() {
107   // Check for memory leaks
108   unsigned long long initial_size = 0;
109   for (int i = 0; i < 100000; i++) {
110     if (i == 50000) {
111       initial_size = VirtualProcessSize();
112       printf("Size after 50000: %llu\n", initial_size);
113     }
114     char buf[100];  // definitely big enough
115     sprintf(buf, "pat%09d", i);
116     RE newre(buf);
117   }
118   uint64 final_size = VirtualProcessSize();
119   printf("Size after 100000: %llu\n", final_size);
120   const double growth = double(final_size - initial_size) / final_size;
121   printf("Growth: %0.2f%%", growth * 100);
122   CHECK(growth < 0.02);       // Allow < 2% growth
123 }
124 
125 #endif
126 
RadixTests()127 static void RadixTests() {
128   printf("Testing hex\n");
129 
130 #define CHECK_HEX(type, value) \
131   do { \
132     type v; \
133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134     CHECK_EQ(v, 0x ## value); \
135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136     CHECK_EQ(v, 0x ## value); \
137   } while(0)
138 
139   CHECK_HEX(short,              2bad);
140   CHECK_HEX(unsigned short,     2badU);
141   CHECK_HEX(int,                dead);
142   CHECK_HEX(unsigned int,       deadU);
143   CHECK_HEX(long,               7eadbeefL);
144   CHECK_HEX(unsigned long,      deadbeefUL);
145 #ifdef HAVE_LONG_LONG
146   CHECK_HEX(long long,          12345678deadbeefLL);
147 #endif
148 #ifdef HAVE_UNSIGNED_LONG_LONG
149   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150 #endif
151 
152 #undef CHECK_HEX
153 
154   printf("Testing octal\n");
155 
156 #define CHECK_OCTAL(type, value) \
157   do { \
158     type v; \
159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160     CHECK_EQ(v, 0 ## value); \
161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162     CHECK_EQ(v, 0 ## value); \
163   } while(0)
164 
165   CHECK_OCTAL(short,              77777);
166   CHECK_OCTAL(unsigned short,     177777U);
167   CHECK_OCTAL(int,                17777777777);
168   CHECK_OCTAL(unsigned int,       37777777777U);
169   CHECK_OCTAL(long,               17777777777L);
170   CHECK_OCTAL(unsigned long,      37777777777UL);
171 #ifdef HAVE_LONG_LONG
172   CHECK_OCTAL(long long,          777777777777777777777LL);
173 #endif
174 #ifdef HAVE_UNSIGNED_LONG_LONG
175   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176 #endif
177 
178 #undef CHECK_OCTAL
179 
180   printf("Testing decimal\n");
181 
182 #define CHECK_DECIMAL(type, value) \
183   do { \
184     type v; \
185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186     CHECK_EQ(v, value); \
187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188     CHECK_EQ(v, value); \
189   } while(0)
190 
191   CHECK_DECIMAL(short,              -1);
192   CHECK_DECIMAL(unsigned short,     9999);
193   CHECK_DECIMAL(int,                -1000);
194   CHECK_DECIMAL(unsigned int,       12345U);
195   CHECK_DECIMAL(long,               -10000000L);
196   CHECK_DECIMAL(unsigned long,      3083324652U);
197 #ifdef HAVE_LONG_LONG
198   CHECK_DECIMAL(long long,          -100000000000000LL);
199 #endif
200 #ifdef HAVE_UNSIGNED_LONG_LONG
201   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202 #endif
203 
204 #undef CHECK_DECIMAL
205 
206 }
207 
TestReplace()208 static void TestReplace() {
209   printf("Testing Replace\n");
210 
211   struct ReplaceTest {
212     const char *regexp;
213     const char *rewrite;
214     const char *original;
215     const char *single;
216     const char *global;
217     int global_count;         // the expected return value from ReplaceAll
218   };
219   static const ReplaceTest tests[] = {
220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221       "\\2\\1ay",
222       "the quick brown fox jumps over the lazy dogs.",
223       "ethay quick brown fox jumps over the lazy dogs.",
224       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225       9 },
226     { "\\w+",
227       "\\0-NOSPAM",
228       "paul.haahr@google.com",
229       "paul-NOSPAM.haahr@google.com",
230       "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231       4 },
232     { "^",
233       "(START)",
234       "foo",
235       "(START)foo",
236       "(START)foo",
237       1 },
238     { "^",
239       "(START)",
240       "",
241       "(START)",
242       "(START)",
243       1 },
244     { "$",
245       "(END)",
246       "",
247       "(END)",
248       "(END)",
249       1 },
250     { "b",
251       "bb",
252       "ababababab",
253       "abbabababab",
254       "abbabbabbabbabb",
255        5 },
256     { "b",
257       "bb",
258       "bbbbbb",
259       "bbbbbbb",
260       "bbbbbbbbbbbb",
261       6 },
262     { "b+",
263       "bb",
264       "bbbbbb",
265       "bb",
266       "bb",
267       1 },
268     { "b*",
269       "bb",
270       "bbbbbb",
271       "bb",
272       "bbbb",
273       2 },
274     { "b*",
275       "bb",
276       "aaaaa",
277       "bbaaaaa",
278       "bbabbabbabbabbabb",
279       6 },
280     { "b*",
281       "bb",
282       "aa\naa\n",
283       "bbaa\naa\n",
284       "bbabbabb\nbbabbabb\nbb",
285       7 },
286     { "b*",
287       "bb",
288       "aa\raa\r",
289       "bbaa\raa\r",
290       "bbabbabb\rbbabbabb\rbb",
291       7 },
292     { "b*",
293       "bb",
294       "aa\r\naa\r\n",
295       "bbaa\r\naa\r\n",
296       "bbabbabb\r\nbbabbabb\r\nbb",
297       7 },
298     // Check empty-string matching (it's tricky!)
299     { "aa|b*",
300       "@",
301       "aa",
302       "@",
303       "@@",
304       2 },
305     { "b*|aa",
306       "@",
307       "aa",
308       "@aa",
309       "@@@",
310       3 },
311 #ifdef SUPPORT_UTF8
312     { "b*",
313       "bb",
314       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
315       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317       5 },
318     { "b*",
319       "bb",
320       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
321       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324       9 },
325 #endif
326     { "", NULL, NULL, NULL, NULL, 0 }
327   };
328 
329 #ifdef SUPPORT_UTF8
330   const bool support_utf8 = true;
331 #else
332   const bool support_utf8 = false;
333 #endif
334 
335   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336     RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF)
337                                  .set_utf(support_utf8));
338     assert(re.error().empty());
339     string one(t->original);
340     CHECK(re.Replace(t->rewrite, &one));
341     CHECK_EQ(one, t->single);
342     string all(t->original);
343     const int replace_count = re.GlobalReplace(t->rewrite, &all);
344     CHECK_EQ(all, t->global);
345     CHECK_EQ(replace_count, t->global_count);
346   }
347 
348   // One final test: test \r\n replacement when we're not in CRLF mode
349   {
350     RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR)
351                             .set_utf(support_utf8));
352     assert(re.error().empty());
353     string all("aa\r\naa\r\n");
354     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
355     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
356   }
357   {
358     RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF)
359                             .set_utf(support_utf8));
360     assert(re.error().empty());
361     string all("aa\r\naa\r\n");
362     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
363     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
364   }
365   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
366   //       Alas, the answer depends on how pcre was compiled.
367 }
368 
TestExtract()369 static void TestExtract() {
370   printf("Testing Extract\n");
371 
372   string s;
373 
374   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
375   CHECK_EQ(s, "kremvax!boris");
376 
377   // check the RE interface as well
378   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
379   CHECK_EQ(s, "'foo'");
380   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
381   CHECK_EQ(s, "'foo'");
382 }
383 
TestConsume()384 static void TestConsume() {
385   printf("Testing Consume\n");
386 
387   string word;
388 
389   string s("   aaa b!@#$@#$cccc");
390   StringPiece input(s);
391 
392   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
393   CHECK(r.Consume(&input, &word));
394   CHECK_EQ(word, "aaa");
395   CHECK(r.Consume(&input, &word));
396   CHECK_EQ(word, "b");
397   CHECK(! r.Consume(&input, &word));
398 }
399 
TestFindAndConsume()400 static void TestFindAndConsume() {
401   printf("Testing FindAndConsume\n");
402 
403   string word;
404 
405   string s("   aaa b!@#$@#$cccc");
406   StringPiece input(s);
407 
408   RE r("(\\w+)");      // matches a word
409   CHECK(r.FindAndConsume(&input, &word));
410   CHECK_EQ(word, "aaa");
411   CHECK(r.FindAndConsume(&input, &word));
412   CHECK_EQ(word, "b");
413   CHECK(r.FindAndConsume(&input, &word));
414   CHECK_EQ(word, "cccc");
415   CHECK(! r.FindAndConsume(&input, &word));
416 }
417 
TestMatchNumberPeculiarity()418 static void TestMatchNumberPeculiarity() {
419   printf("Testing match-number peculiarity\n");
420 
421   string word1;
422   string word2;
423   string word3;
424 
425   RE r("(foo)|(bar)|(baz)");
426   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
427   CHECK_EQ(word1, "foo");
428   CHECK_EQ(word2, "");
429   CHECK_EQ(word3, "");
430   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
431   CHECK_EQ(word1, "");
432   CHECK_EQ(word2, "bar");
433   CHECK_EQ(word3, "");
434   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
435   CHECK_EQ(word1, "");
436   CHECK_EQ(word2, "");
437   CHECK_EQ(word3, "baz");
438   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
439 
440   string a;
441   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
442   CHECK_EQ(a, "");
443 }
444 
TestRecursion()445 static void TestRecursion() {
446   printf("Testing recursion\n");
447 
448   // Get one string that passes (sometimes), one that never does.
449   string text_good("abcdefghijk");
450   string text_bad("acdefghijkl");
451 
452   // According to pcretest, matching text_good against (\w+)*b
453   // requires match_limit of at least 8192, and match_recursion_limit
454   // of at least 37.
455 
456   RE_Options options_ml;
457   options_ml.set_match_limit(8192);
458   RE re("(\\w+)*b", options_ml);
459   CHECK(re.PartialMatch(text_good) == true);
460   CHECK(re.PartialMatch(text_bad) == false);
461   CHECK(re.FullMatch(text_good) == false);
462   CHECK(re.FullMatch(text_bad) == false);
463 
464   options_ml.set_match_limit(1024);
465   RE re2("(\\w+)*b", options_ml);
466   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
467   CHECK(re2.PartialMatch(text_bad) == false);
468   CHECK(re2.FullMatch(text_good) == false);
469   CHECK(re2.FullMatch(text_bad) == false);
470 
471   RE_Options options_mlr;
472   options_mlr.set_match_limit_recursion(50);
473   RE re3("(\\w+)*b", options_mlr);
474   CHECK(re3.PartialMatch(text_good) == true);
475   CHECK(re3.PartialMatch(text_bad) == false);
476   CHECK(re3.FullMatch(text_good) == false);
477   CHECK(re3.FullMatch(text_bad) == false);
478 
479   options_mlr.set_match_limit_recursion(10);
480   RE re4("(\\w+)*b", options_mlr);
481   CHECK(re4.PartialMatch(text_good) == false);
482   CHECK(re4.PartialMatch(text_bad) == false);
483   CHECK(re4.FullMatch(text_good) == false);
484   CHECK(re4.FullMatch(text_bad) == false);
485 }
486 
487 // A meta-quoted string, interpreted as a pattern, should always match
488 // the original unquoted string.
TestQuoteMeta(string unquoted,RE_Options options=RE_Options ())489 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
490   string quoted = RE::QuoteMeta(unquoted);
491   RE re(quoted, options);
492   CHECK(re.FullMatch(unquoted));
493 }
494 
495 // A string containing meaningful regexp characters, which is then meta-
496 // quoted, should not generally match a string the unquoted string does.
NegativeTestQuoteMeta(string unquoted,string should_not_match,RE_Options options=RE_Options ())497 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
498                                   RE_Options options = RE_Options()) {
499   string quoted = RE::QuoteMeta(unquoted);
500   RE re(quoted, options);
501   CHECK(!re.FullMatch(should_not_match));
502 }
503 
504 // Tests that quoted meta characters match their original strings,
505 // and that a few things that shouldn't match indeed do not.
TestQuotaMetaSimple()506 static void TestQuotaMetaSimple() {
507   TestQuoteMeta("foo");
508   TestQuoteMeta("foo.bar");
509   TestQuoteMeta("foo\\.bar");
510   TestQuoteMeta("[1-9]");
511   TestQuoteMeta("1.5-2.0?");
512   TestQuoteMeta("\\d");
513   TestQuoteMeta("Who doesn't like ice cream?");
514   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
515   TestQuoteMeta("((?!)xxx).*yyy");
516   TestQuoteMeta("([");
517   TestQuoteMeta(string("foo\0bar", 7));
518 }
519 
TestQuoteMetaSimpleNegative()520 static void TestQuoteMetaSimpleNegative() {
521   NegativeTestQuoteMeta("foo", "bar");
522   NegativeTestQuoteMeta("...", "bar");
523   NegativeTestQuoteMeta("\\.", ".");
524   NegativeTestQuoteMeta("\\.", "..");
525   NegativeTestQuoteMeta("(a)", "a");
526   NegativeTestQuoteMeta("(a|b)", "a");
527   NegativeTestQuoteMeta("(a|b)", "(a)");
528   NegativeTestQuoteMeta("(a|b)", "a|b");
529   NegativeTestQuoteMeta("[0-9]", "0");
530   NegativeTestQuoteMeta("[0-9]", "0-9");
531   NegativeTestQuoteMeta("[0-9]", "[9]");
532   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
533 }
534 
TestQuoteMetaLatin1()535 static void TestQuoteMetaLatin1() {
536   TestQuoteMeta("3\xb2 = 9");
537 }
538 
TestQuoteMetaUtf8()539 static void TestQuoteMetaUtf8() {
540 #ifdef SUPPORT_UTF8
541   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
542   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
543   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
544   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
545   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
546   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
547   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
548   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
549                         "27\\\xc2\\\xb0",
550                         pcrecpp::UTF8());
551 #endif
552 }
553 
TestQuoteMetaAll()554 static void TestQuoteMetaAll() {
555   printf("Testing QuoteMeta\n");
556   TestQuotaMetaSimple();
557   TestQuoteMetaSimpleNegative();
558   TestQuoteMetaLatin1();
559   TestQuoteMetaUtf8();
560 }
561 
562 //
563 // Options tests contributed by
564 // Giuseppe Maxia, CTO, Stardata s.r.l.
565 // July 2005
566 //
GetOneOptionResult(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,string expected)567 static void GetOneOptionResult(
568                 const char *option_name,
569                 const char *regex,
570                 const char *str,
571                 RE_Options options,
572                 bool full,
573                 string expected) {
574 
575   printf("Testing Option <%s>\n", option_name);
576   if(VERBOSE_TEST)
577     printf("/%s/ finds \"%s\" within \"%s\" \n",
578                     regex,
579                     expected.c_str(),
580                     str);
581   string captured("");
582   if (full)
583     RE(regex,options).FullMatch(str, &captured);
584   else
585     RE(regex,options).PartialMatch(str, &captured);
586   CHECK_EQ(captured, expected);
587 }
588 
TestOneOption(const char * option_name,const char * regex,const char * str,RE_Options options,bool full,bool assertive=true)589 static void TestOneOption(
590                 const char *option_name,
591                 const char *regex,
592                 const char *str,
593                 RE_Options options,
594                 bool full,
595                 bool assertive = true) {
596 
597   printf("Testing Option <%s>\n", option_name);
598   if (VERBOSE_TEST)
599     printf("'%s' %s /%s/ \n",
600                   str,
601                   (assertive? "matches" : "doesn't match"),
602                   regex);
603   if (assertive) {
604     if (full)
605       CHECK(RE(regex,options).FullMatch(str));
606     else
607       CHECK(RE(regex,options).PartialMatch(str));
608   } else {
609     if (full)
610       CHECK(!RE(regex,options).FullMatch(str));
611     else
612       CHECK(!RE(regex,options).PartialMatch(str));
613   }
614 }
615 
Test_CASELESS()616 static void Test_CASELESS() {
617   RE_Options options;
618   RE_Options options2;
619 
620   options.set_caseless(true);
621   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
622   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
623   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
624 
625   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
626   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
627   options.set_caseless(false);
628   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
629 }
630 
Test_MULTILINE()631 static void Test_MULTILINE() {
632   RE_Options options;
633   RE_Options options2;
634   const char *str = "HELLO\n" "cruel\n" "world\n";
635 
636   options.set_multiline(true);
637   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
638   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
639   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
640   options.set_multiline(false);
641   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
642 }
643 
Test_DOTALL()644 static void Test_DOTALL() {
645   RE_Options options;
646   RE_Options options2;
647   const char *str = "HELLO\n" "cruel\n" "world";
648 
649   options.set_dotall(true);
650   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
651   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
652   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
653   options.set_dotall(false);
654   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
655 }
656 
Test_DOLLAR_ENDONLY()657 static void Test_DOLLAR_ENDONLY() {
658   RE_Options options;
659   RE_Options options2;
660   const char *str = "HELLO world\n";
661 
662   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
663   options.set_dollar_endonly(true);
664   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
665   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
666 }
667 
Test_EXTENDED()668 static void Test_EXTENDED() {
669   RE_Options options;
670   RE_Options options2;
671   const char *str = "HELLO world";
672 
673   options.set_extended(true);
674   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
675   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
676   TestOneOption("EXTENDED (class)",
677                     "^ HE L{2} O "
678                     "\\s+        "
679                     "\\w+ $      ",
680                     str,
681                     options,
682                     false);
683 
684   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
685   TestOneOption("EXTENDED (function)",
686                     "^ HE L{2} O "
687                     "\\s+        "
688                     "\\w+ $      ",
689                     str,
690                     pcrecpp::EXTENDED(),
691                     false);
692 
693   options.set_extended(false);
694   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
695 }
696 
Test_NO_AUTO_CAPTURE()697 static void Test_NO_AUTO_CAPTURE() {
698   RE_Options options;
699   const char *str = "HELLO world";
700   string captured;
701 
702   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
703   if (VERBOSE_TEST)
704     printf("parentheses capture text\n");
705   RE re("(world|universe)$", options);
706   CHECK(re.Extract("\\1", str , &captured));
707   CHECK_EQ(captured, "world");
708   options.set_no_auto_capture(true);
709   printf("testing Option <NO_AUTO_CAPTURE>\n");
710   if (VERBOSE_TEST)
711     printf("parentheses do not capture text\n");
712   re.Extract("\\1",str, &captured );
713   CHECK_EQ(captured, "world");
714 }
715 
Test_UNGREEDY()716 static void Test_UNGREEDY() {
717   RE_Options options;
718   const char *str = "HELLO, 'this' is the 'world'";
719 
720   options.set_ungreedy(true);
721   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
722   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
723   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
724 
725   options.set_ungreedy(false);
726   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
727   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
728 }
729 
Test_all_options()730 static void Test_all_options() {
731   const char *str = "HELLO\n" "cruel\n" "world";
732   RE_Options options;
733   options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL);
734 
735   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
736   options.set_all_options(0);
737   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
738   options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED);
739 
740   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
741   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
742                   " ^ c r u e l $ ",
743                   str,
744                   RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED),
745                   false);
746 
747   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
748                   " ^ c r u e l $ ",
749                   str,
750                   RE_Options()
751                        .set_multiline(true)
752                        .set_extended(true),
753                   false);
754 
755   options.set_all_options(0);
756   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
757 
758 }
759 
TestOptions()760 static void TestOptions() {
761   printf("Testing Options\n");
762   Test_CASELESS();
763   Test_MULTILINE();
764   Test_DOTALL();
765   Test_DOLLAR_ENDONLY();
766   Test_EXTENDED();
767   Test_NO_AUTO_CAPTURE();
768   Test_UNGREEDY();
769   Test_all_options();
770 }
771 
TestConstructors()772 static void TestConstructors() {
773   printf("Testing constructors\n");
774 
775   RE_Options options;
776   options.set_dotall(true);
777   const char *str = "HELLO\n" "cruel\n" "world";
778 
779   RE orig("HELLO.*world", options);
780   CHECK(orig.FullMatch(str));
781 
782   RE copy1(orig);
783   CHECK(copy1.FullMatch(str));
784 
785   RE copy2("not a match");
786   CHECK(!copy2.FullMatch(str));
787   copy2 = copy1;
788   CHECK(copy2.FullMatch(str));
789   copy2 = orig;
790   CHECK(copy2.FullMatch(str));
791 
792   // Make sure when we assign to ourselves, nothing bad happens
793   orig = orig;
794   copy1 = copy1;
795   copy2 = copy2;
796   CHECK(orig.FullMatch(str));
797   CHECK(copy1.FullMatch(str));
798   CHECK(copy2.FullMatch(str));
799 }
800 
main(int argc,char ** argv)801 int main(int argc, char** argv) {
802   // Treat any flag as --help
803   if (argc > 1 && argv[1][0] == '-') {
804     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
805            "       If 'timingX ###' is specified, run the given timing test\n"
806            "       with the given number of iterations, rather than running\n"
807            "       the default corectness test.\n", argv[0]);
808     return 0;
809   }
810 
811   if (argc > 1) {
812     if ( argc == 2 || atoi(argv[2]) == 0) {
813       printf("timing mode needs a num-iters argument\n");
814       return 1;
815     }
816     if (!strcmp(argv[1], "timing1"))
817       Timing1(atoi(argv[2]));
818     else if (!strcmp(argv[1], "timing2"))
819       Timing2(atoi(argv[2]));
820     else if (!strcmp(argv[1], "timing3"))
821       Timing3(atoi(argv[2]));
822     else
823       printf("Unknown argument '%s'\n", argv[1]);
824     return 0;
825   }
826 
827   printf("PCRE C++ wrapper tests\n");
828   printf("Testing FullMatch\n");
829 
830   int i;
831   string s;
832 
833   /***** FullMatch with no args *****/
834 
835   CHECK(RE("h.*o").FullMatch("hello"));
836   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
837   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
838   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
839   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
840   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
841 
842   /***** FullMatch with args *****/
843 
844   // Zero-arg
845   CHECK(RE("\\d+").FullMatch("1001"));
846 
847   // Single-arg
848   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
849   CHECK_EQ(i, 1001);
850   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
851   CHECK_EQ(i, -123);
852   CHECK(!RE("()\\d+").FullMatch("10", &i));
853   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
854                                 &i));
855 
856   // Digits surrounding integer-arg
857   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
858   CHECK_EQ(i, 23);
859   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
860   CHECK_EQ(i, 1);
861   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
862   CHECK_EQ(i, -1);
863   CHECK(RE("(\\d)").PartialMatch("1234", &i));
864   CHECK_EQ(i, 1);
865   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
866   CHECK_EQ(i, -1);
867 
868   // String-arg
869   CHECK(RE("h(.*)o").FullMatch("hello", &s));
870   CHECK_EQ(s, string("ell"));
871 
872   // StringPiece-arg
873   StringPiece sp;
874   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
875   CHECK_EQ(sp.size(), 4);
876   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
877   CHECK_EQ(i, 1234);
878 
879   // Multi-arg
880   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
881   CHECK_EQ(s, string("ruby"));
882   CHECK_EQ(i, 1234);
883 
884   // Ignore non-void* NULL arg
885   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
886   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
887   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
888   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
889 #ifdef HAVE_LONG_LONG
890   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
891 #endif
892   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
893   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
894 
895   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
896   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
897   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
898   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
899   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
900   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
901 
902   // Ignored arg
903   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
904   CHECK_EQ(s, string("ruby"));
905   CHECK_EQ(i, 1234);
906 
907   // Type tests
908   {
909     char c;
910     CHECK(RE("(H)ello").FullMatch("Hello", &c));
911     CHECK_EQ(c, 'H');
912   }
913   {
914     unsigned char c;
915     CHECK(RE("(H)ello").FullMatch("Hello", &c));
916     CHECK_EQ(c, static_cast<unsigned char>('H'));
917   }
918   {
919     short v;
920     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
921     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
922     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
923     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
924     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
925     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
926   }
927   {
928     unsigned short v;
929     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
930     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
931     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
932     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
933   }
934   {
935     int v;
936     static const int max_value = 0x7fffffff;
937     static const int min_value = -max_value - 1;
938     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
939     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
940     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
941     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
942     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
943     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
944   }
945   {
946     unsigned int v;
947     static const unsigned int max_value = 0xfffffffful;
948     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
949     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
950     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
951   }
952 #ifdef HAVE_LONG_LONG
953 # if defined(__MINGW__) || defined(__MINGW32__)
954 #   define LLD "%I64d"
955 #   define LLU "%I64u"
956 # else
957 #   define LLD "%lld"
958 #   define LLU "%llu"
959 # endif
960   {
961     long long v;
962     static const long long max_value = 0x7fffffffffffffffLL;
963     static const long long min_value = -max_value - 1;
964     char buf[32];  // definitely big enough for a long long
965 
966     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
967     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
968 
969     sprintf(buf, LLD, max_value);
970     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
971 
972     sprintf(buf, LLD, min_value);
973     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
974 
975     sprintf(buf, LLD, max_value);
976     assert(buf[strlen(buf)-1] != '9');
977     buf[strlen(buf)-1]++;
978     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
979 
980     sprintf(buf, LLD, min_value);
981     assert(buf[strlen(buf)-1] != '9');
982     buf[strlen(buf)-1]++;
983     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
984   }
985 #endif
986 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
987   {
988     unsigned long long v;
989     long long v2;
990     static const unsigned long long max_value = 0xffffffffffffffffULL;
991     char buf[32];  // definitely big enough for a unsigned long long
992 
993     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
994     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
995 
996     sprintf(buf, LLU, max_value);
997     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
998 
999     assert(buf[strlen(buf)-1] != '9');
1000     buf[strlen(buf)-1]++;
1001     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1002   }
1003 #endif
1004   {
1005     float v;
1006     CHECK(RE("(.*)").FullMatch("100", &v));
1007     CHECK(RE("(.*)").FullMatch("-100.", &v));
1008     CHECK(RE("(.*)").FullMatch("1e23", &v));
1009   }
1010   {
1011     double v;
1012     CHECK(RE("(.*)").FullMatch("100", &v));
1013     CHECK(RE("(.*)").FullMatch("-100.", &v));
1014     CHECK(RE("(.*)").FullMatch("1e23", &v));
1015   }
1016 
1017   // Check that matching is fully anchored
1018   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
1019   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
1020   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1021   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1022 
1023   // Braces
1024   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1025   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1026   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1027 
1028   // Complicated RE
1029   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1030   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1031   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1032   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1033 
1034   // Check full-match handling (needs '$' tacked on internally)
1035   CHECK(RE("fo|foo").FullMatch("fo"));
1036   CHECK(RE("fo|foo").FullMatch("foo"));
1037   CHECK(RE("fo|foo$").FullMatch("fo"));
1038   CHECK(RE("fo|foo$").FullMatch("foo"));
1039   CHECK(RE("foo$").FullMatch("foo"));
1040   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1041   CHECK(!RE("fo|bar").FullMatch("fox"));
1042 
1043   // Uncomment the following if we change the handling of '$' to
1044   // prevent it from matching a trailing newline
1045   if (false) {
1046     // Check that we don't get bitten by pcre's special handling of a
1047     // '\n' at the end of the string matching '$'
1048     CHECK(!RE("foo$").PartialMatch("foo\n"));
1049   }
1050 
1051   // Number of args
1052   int a[16];
1053   CHECK(RE("").FullMatch(""));
1054 
1055   memset(a, 0, sizeof(0));
1056   CHECK(RE("(\\d){1}").FullMatch("1",
1057                                  &a[0]));
1058   CHECK_EQ(a[0], 1);
1059 
1060   memset(a, 0, sizeof(0));
1061   CHECK(RE("(\\d)(\\d)").FullMatch("12",
1062                                    &a[0],  &a[1]));
1063   CHECK_EQ(a[0], 1);
1064   CHECK_EQ(a[1], 2);
1065 
1066   memset(a, 0, sizeof(0));
1067   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1068                                         &a[0],  &a[1],  &a[2]));
1069   CHECK_EQ(a[0], 1);
1070   CHECK_EQ(a[1], 2);
1071   CHECK_EQ(a[2], 3);
1072 
1073   memset(a, 0, sizeof(0));
1074   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1075                                              &a[0],  &a[1],  &a[2],  &a[3]));
1076   CHECK_EQ(a[0], 1);
1077   CHECK_EQ(a[1], 2);
1078   CHECK_EQ(a[2], 3);
1079   CHECK_EQ(a[3], 4);
1080 
1081   memset(a, 0, sizeof(0));
1082   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1083                                                   &a[0],  &a[1],  &a[2],
1084                                                   &a[3],  &a[4]));
1085   CHECK_EQ(a[0], 1);
1086   CHECK_EQ(a[1], 2);
1087   CHECK_EQ(a[2], 3);
1088   CHECK_EQ(a[3], 4);
1089   CHECK_EQ(a[4], 5);
1090 
1091   memset(a, 0, sizeof(0));
1092   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1093                                                        &a[0],  &a[1],  &a[2],
1094                                                        &a[3],  &a[4],  &a[5]));
1095   CHECK_EQ(a[0], 1);
1096   CHECK_EQ(a[1], 2);
1097   CHECK_EQ(a[2], 3);
1098   CHECK_EQ(a[3], 4);
1099   CHECK_EQ(a[4], 5);
1100   CHECK_EQ(a[5], 6);
1101 
1102   memset(a, 0, sizeof(0));
1103   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1104                                                             &a[0],  &a[1],  &a[2],  &a[3],
1105                                                             &a[4],  &a[5],  &a[6]));
1106   CHECK_EQ(a[0], 1);
1107   CHECK_EQ(a[1], 2);
1108   CHECK_EQ(a[2], 3);
1109   CHECK_EQ(a[3], 4);
1110   CHECK_EQ(a[4], 5);
1111   CHECK_EQ(a[5], 6);
1112   CHECK_EQ(a[6], 7);
1113 
1114   memset(a, 0, sizeof(0));
1115   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1116            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1117                "1234567890123456",
1118                &a[0],  &a[1],  &a[2],  &a[3],
1119                &a[4],  &a[5],  &a[6],  &a[7],
1120                &a[8],  &a[9],  &a[10], &a[11],
1121                &a[12], &a[13], &a[14], &a[15]));
1122   CHECK_EQ(a[0], 1);
1123   CHECK_EQ(a[1], 2);
1124   CHECK_EQ(a[2], 3);
1125   CHECK_EQ(a[3], 4);
1126   CHECK_EQ(a[4], 5);
1127   CHECK_EQ(a[5], 6);
1128   CHECK_EQ(a[6], 7);
1129   CHECK_EQ(a[7], 8);
1130   CHECK_EQ(a[8], 9);
1131   CHECK_EQ(a[9], 0);
1132   CHECK_EQ(a[10], 1);
1133   CHECK_EQ(a[11], 2);
1134   CHECK_EQ(a[12], 3);
1135   CHECK_EQ(a[13], 4);
1136   CHECK_EQ(a[14], 5);
1137   CHECK_EQ(a[15], 6);
1138 
1139   /***** PartialMatch *****/
1140 
1141   printf("Testing PartialMatch\n");
1142 
1143   CHECK(RE("h.*o").PartialMatch("hello"));
1144   CHECK(RE("h.*o").PartialMatch("othello"));
1145   CHECK(RE("h.*o").PartialMatch("hello!"));
1146   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1147 
1148   /***** other tests *****/
1149 
1150   RadixTests();
1151   TestReplace();
1152   TestExtract();
1153   TestConsume();
1154   TestFindAndConsume();
1155   TestQuoteMetaAll();
1156   TestMatchNumberPeculiarity();
1157 
1158   // Check the pattern() accessor
1159   {
1160     const string kPattern = "http://([^/]+)/.*";
1161     const RE re(kPattern);
1162     CHECK_EQ(kPattern, re.pattern());
1163   }
1164 
1165   // Check RE error field.
1166   {
1167     RE re("foo");
1168     CHECK(re.error().empty());  // Must have no error
1169   }
1170 
1171 #ifdef SUPPORT_UTF8
1172   // Check UTF-8 handling
1173   {
1174     printf("Testing UTF-8 handling\n");
1175 
1176     // Three Japanese characters (nihongo)
1177     const unsigned char utf8_string[] = {
1178          0xe6, 0x97, 0xa5, // 65e5
1179          0xe6, 0x9c, 0xac, // 627c
1180          0xe8, 0xaa, 0x9e, // 8a9e
1181          0
1182     };
1183     const unsigned char utf8_pattern[] = {
1184          '.',
1185          0xe6, 0x9c, 0xac, // 627c
1186          '.',
1187          0
1188     };
1189 
1190     // Both should match in either mode, bytes or UTF-8
1191     RE re_test1(".........");
1192     CHECK(re_test1.FullMatch(utf8_string));
1193     RE re_test2("...", pcrecpp::UTF8());
1194     CHECK(re_test2.FullMatch(utf8_string));
1195 
1196     // Check that '.' matches one byte or UTF-8 character
1197     // according to the mode.
1198     string ss;
1199     RE re_test3("(.)");
1200     CHECK(re_test3.PartialMatch(utf8_string, &ss));
1201     CHECK_EQ(ss, string("\xe6"));
1202     RE re_test4("(.)", pcrecpp::UTF8());
1203     CHECK(re_test4.PartialMatch(utf8_string, &ss));
1204     CHECK_EQ(ss, string("\xe6\x97\xa5"));
1205 
1206     // Check that string matches itself in either mode
1207     RE re_test5(utf8_string);
1208     CHECK(re_test5.FullMatch(utf8_string));
1209     RE re_test6(utf8_string, pcrecpp::UTF8());
1210     CHECK(re_test6.FullMatch(utf8_string));
1211 
1212     // Check that pattern matches string only in UTF8 mode
1213     RE re_test7(utf8_pattern);
1214     CHECK(!re_test7.FullMatch(utf8_string));
1215     RE re_test8(utf8_pattern, pcrecpp::UTF8());
1216     CHECK(re_test8.FullMatch(utf8_string));
1217   }
1218 
1219   // Check that ungreedy, UTF8 regular expressions don't match when they
1220   // oughtn't -- see bug 82246.
1221   {
1222     // This code always worked.
1223     const char* pattern = "\\w+X";
1224     const string target = "a aX";
1225     RE match_sentence(pattern);
1226     RE match_sentence_re(pattern, pcrecpp::UTF8());
1227 
1228     CHECK(!match_sentence.FullMatch(target));
1229     CHECK(!match_sentence_re.FullMatch(target));
1230   }
1231 
1232   {
1233     const char* pattern = "(?U)\\w+X";
1234     const string target = "a aX";
1235     RE match_sentence(pattern);
1236     RE match_sentence_re(pattern, pcrecpp::UTF8());
1237 
1238     CHECK(!match_sentence.FullMatch(target));
1239     CHECK(!match_sentence_re.FullMatch(target));
1240   }
1241 #endif  /* def SUPPORT_UTF8 */
1242 
1243   printf("Testing error reporting\n");
1244 
1245   { RE re("a\\1"); CHECK(!re.error().empty()); }
1246   {
1247     RE re("a[x");
1248     CHECK(!re.error().empty());
1249   }
1250   {
1251     RE re("a[z-a]");
1252     CHECK(!re.error().empty());
1253   }
1254   {
1255     RE re("a[[:foobar:]]");
1256     CHECK(!re.error().empty());
1257   }
1258   {
1259     RE re("a(b");
1260     CHECK(!re.error().empty());
1261   }
1262   {
1263     RE re("a\\");
1264     CHECK(!re.error().empty());
1265   }
1266 
1267   // Test that recursion is stopped
1268   TestRecursion();
1269 
1270   // Test Options
1271   if (getenv("VERBOSE_TEST") != NULL)
1272     VERBOSE_TEST  = true;
1273   TestOptions();
1274 
1275   // Test the constructors
1276   TestConstructors();
1277 
1278   // Done
1279   printf("OK\n");
1280 
1281   return 0;
1282 }
1283