1 // Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Regular expression interface RE2.
6 //
7 // Originally the PCRE C++ wrapper, but adapted to use
8 // the new automata-based regular expression engines.
9 
10 #include "re2/re2.h"
11 
12 #include <assert.h>
13 #include <ctype.h>
14 #include <errno.h>
15 #ifdef _MSC_VER
16 #include <intrin.h>
17 #endif
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <algorithm>
22 #include <atomic>
23 #include <iterator>
24 #include <mutex>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 #include "util/util.h"
30 #include "util/logging.h"
31 #include "util/strutil.h"
32 #include "util/utf.h"
33 #include "re2/prog.h"
34 #include "re2/regexp.h"
35 #include "re2/sparse_array.h"
36 
37 namespace re2 {
38 
39 // Maximum number of args we can set
40 static const int kMaxArgs = 16;
41 static const int kVecSize = 1+kMaxArgs;
42 
43 const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
44 
Options(RE2::CannedOptions opt)45 RE2::Options::Options(RE2::CannedOptions opt)
46   : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
47     posix_syntax_(opt == RE2::POSIX),
48     longest_match_(opt == RE2::POSIX),
49     log_errors_(opt != RE2::Quiet),
50     max_mem_(kDefaultMaxMem),
51     literal_(false),
52     never_nl_(false),
53     dot_nl_(false),
54     never_capture_(false),
55     case_sensitive_(true),
56     perl_classes_(false),
57     word_boundary_(false),
58     one_line_(false) {
59 }
60 
61 // static empty objects for use as const references.
62 // To avoid global constructors, allocated in RE2::Init().
63 static const std::string* empty_string;
64 static const std::map<std::string, int>* empty_named_groups;
65 static const std::map<int, std::string>* empty_group_names;
66 
67 // Converts from Regexp error code to RE2 error code.
68 // Maybe some day they will diverge.  In any event, this
69 // hides the existence of Regexp from RE2 users.
RegexpErrorToRE2(re2::RegexpStatusCode code)70 static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
71   switch (code) {
72     case re2::kRegexpSuccess:
73       return RE2::NoError;
74     case re2::kRegexpInternalError:
75       return RE2::ErrorInternal;
76     case re2::kRegexpBadEscape:
77       return RE2::ErrorBadEscape;
78     case re2::kRegexpBadCharClass:
79       return RE2::ErrorBadCharClass;
80     case re2::kRegexpBadCharRange:
81       return RE2::ErrorBadCharRange;
82     case re2::kRegexpMissingBracket:
83       return RE2::ErrorMissingBracket;
84     case re2::kRegexpMissingParen:
85       return RE2::ErrorMissingParen;
86     case re2::kRegexpTrailingBackslash:
87       return RE2::ErrorTrailingBackslash;
88     case re2::kRegexpRepeatArgument:
89       return RE2::ErrorRepeatArgument;
90     case re2::kRegexpRepeatSize:
91       return RE2::ErrorRepeatSize;
92     case re2::kRegexpRepeatOp:
93       return RE2::ErrorRepeatOp;
94     case re2::kRegexpBadPerlOp:
95       return RE2::ErrorBadPerlOp;
96     case re2::kRegexpBadUTF8:
97       return RE2::ErrorBadUTF8;
98     case re2::kRegexpBadNamedCapture:
99       return RE2::ErrorBadNamedCapture;
100   }
101   return RE2::ErrorInternal;
102 }
103 
trunc(const StringPiece & pattern)104 static std::string trunc(const StringPiece& pattern) {
105   if (pattern.size() < 100)
106     return std::string(pattern);
107   return std::string(pattern.substr(0, 100)) + "...";
108 }
109 
110 
RE2(const char * pattern)111 RE2::RE2(const char* pattern) {
112   Init(pattern, DefaultOptions);
113 }
114 
RE2(const std::string & pattern)115 RE2::RE2(const std::string& pattern) {
116   Init(pattern, DefaultOptions);
117 }
118 
RE2(const StringPiece & pattern)119 RE2::RE2(const StringPiece& pattern) {
120   Init(pattern, DefaultOptions);
121 }
122 
RE2(const StringPiece & pattern,const Options & options)123 RE2::RE2(const StringPiece& pattern, const Options& options) {
124   Init(pattern, options);
125 }
126 
ParseFlags() const127 int RE2::Options::ParseFlags() const {
128   int flags = Regexp::ClassNL;
129   switch (encoding()) {
130     default:
131       if (log_errors())
132         LOG(ERROR) << "Unknown encoding " << encoding();
133       break;
134     case RE2::Options::EncodingUTF8:
135       break;
136     case RE2::Options::EncodingLatin1:
137       flags |= Regexp::Latin1;
138       break;
139   }
140 
141   if (!posix_syntax())
142     flags |= Regexp::LikePerl;
143 
144   if (literal())
145     flags |= Regexp::Literal;
146 
147   if (never_nl())
148     flags |= Regexp::NeverNL;
149 
150   if (dot_nl())
151     flags |= Regexp::DotNL;
152 
153   if (never_capture())
154     flags |= Regexp::NeverCapture;
155 
156   if (!case_sensitive())
157     flags |= Regexp::FoldCase;
158 
159   if (perl_classes())
160     flags |= Regexp::PerlClasses;
161 
162   if (word_boundary())
163     flags |= Regexp::PerlB;
164 
165   if (one_line())
166     flags |= Regexp::OneLine;
167 
168   return flags;
169 }
170 
Init(const StringPiece & pattern,const Options & options)171 void RE2::Init(const StringPiece& pattern, const Options& options) {
172   static std::once_flag empty_once;
173   std::call_once(empty_once, []() {
174     empty_string = new std::string;
175     empty_named_groups = new std::map<std::string, int>;
176     empty_group_names = new std::map<int, std::string>;
177   });
178 
179   pattern_.assign(pattern.data(), pattern.size());
180   options_.Copy(options);
181   entire_regexp_ = NULL;
182   error_ = empty_string;
183   error_code_ = NoError;
184   error_arg_.clear();
185   prefix_.clear();
186   prefix_foldcase_ = false;
187   suffix_regexp_ = NULL;
188   prog_ = NULL;
189   num_captures_ = -1;
190   is_one_pass_ = false;
191 
192   rprog_ = NULL;
193   named_groups_ = NULL;
194   group_names_ = NULL;
195 
196   RegexpStatus status;
197   entire_regexp_ = Regexp::Parse(
198     pattern_,
199     static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
200     &status);
201   if (entire_regexp_ == NULL) {
202     if (options_.log_errors()) {
203       LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
204                  << status.Text();
205     }
206     error_ = new std::string(status.Text());
207     error_code_ = RegexpErrorToRE2(status.code());
208     error_arg_ = std::string(status.error_arg());
209     return;
210   }
211 
212   re2::Regexp* suffix;
213   if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
214     suffix_regexp_ = suffix;
215   else
216     suffix_regexp_ = entire_regexp_->Incref();
217 
218   // Two thirds of the memory goes to the forward Prog,
219   // one third to the reverse prog, because the forward
220   // Prog has two DFAs but the reverse prog has one.
221   prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
222   if (prog_ == NULL) {
223     if (options_.log_errors())
224       LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
225     error_ = new std::string("pattern too large - compile failed");
226     error_code_ = RE2::ErrorPatternTooLarge;
227     return;
228   }
229 
230   // We used to compute this lazily, but it's used during the
231   // typical control flow for a match call, so we now compute
232   // it eagerly, which avoids the overhead of std::once_flag.
233   num_captures_ = suffix_regexp_->NumCaptures();
234 
235   // Could delay this until the first match call that
236   // cares about submatch information, but the one-pass
237   // machine's memory gets cut from the DFA memory budget,
238   // and that is harder to do if the DFA has already
239   // been built.
240   is_one_pass_ = prog_->IsOnePass();
241 }
242 
243 // Returns rprog_, computing it if needed.
ReverseProg() const244 re2::Prog* RE2::ReverseProg() const {
245   std::call_once(rprog_once_, [](const RE2* re) {
246     re->rprog_ =
247         re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
248     if (re->rprog_ == NULL) {
249       if (re->options_.log_errors())
250         LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
251       // We no longer touch error_ and error_code_ because failing to compile
252       // the reverse Prog is not a showstopper: falling back to NFA execution
253       // is fine. More importantly, an RE2 object is supposed to be logically
254       // immutable: whatever ok() would have returned after Init() completed,
255       // it should continue to return that no matter what ReverseProg() does.
256     }
257   }, this);
258   return rprog_;
259 }
260 
~RE2()261 RE2::~RE2() {
262   if (suffix_regexp_)
263     suffix_regexp_->Decref();
264   if (entire_regexp_)
265     entire_regexp_->Decref();
266   delete prog_;
267   delete rprog_;
268   if (error_ != empty_string)
269     delete error_;
270   if (named_groups_ != NULL && named_groups_ != empty_named_groups)
271     delete named_groups_;
272   if (group_names_ != NULL &&  group_names_ != empty_group_names)
273     delete group_names_;
274 }
275 
ProgramSize() const276 int RE2::ProgramSize() const {
277   if (prog_ == NULL)
278     return -1;
279   return prog_->size();
280 }
281 
ReverseProgramSize() const282 int RE2::ReverseProgramSize() const {
283   if (prog_ == NULL)
284     return -1;
285   Prog* prog = ReverseProg();
286   if (prog == NULL)
287     return -1;
288   return prog->size();
289 }
290 
291 // Finds the most significant non-zero bit in n.
FindMSBSet(uint32_t n)292 static int FindMSBSet(uint32_t n) {
293   DCHECK_NE(n, 0);
294 #if defined(__GNUC__)
295   return 31 ^ __builtin_clz(n);
296 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
297   unsigned long c;
298   _BitScanReverse(&c, n);
299   return static_cast<int>(c);
300 #else
301   int c = 0;
302   for (int shift = 1 << 4; shift != 0; shift >>= 1) {
303     uint32_t word = n >> shift;
304     if (word != 0) {
305       n = word;
306       c += shift;
307     }
308   }
309   return c;
310 #endif
311 }
312 
Fanout(Prog * prog,std::vector<int> * histogram)313 static int Fanout(Prog* prog, std::vector<int>* histogram) {
314   SparseArray<int> fanout(prog->size());
315   prog->Fanout(&fanout);
316   int data[32] = {};
317   int size = 0;
318   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
319     if (i->value() == 0)
320       continue;
321     uint32_t value = i->value();
322     int bucket = FindMSBSet(value);
323     bucket += value & (value-1) ? 1 : 0;
324     ++data[bucket];
325     size = std::max(size, bucket+1);
326   }
327   if (histogram != NULL)
328     histogram->assign(data, data+size);
329   return size-1;
330 }
331 
ProgramFanout(std::vector<int> * histogram) const332 int RE2::ProgramFanout(std::vector<int>* histogram) const {
333   if (prog_ == NULL)
334     return -1;
335   return Fanout(prog_, histogram);
336 }
337 
ReverseProgramFanout(std::vector<int> * histogram) const338 int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
339   if (prog_ == NULL)
340     return -1;
341   Prog* prog = ReverseProg();
342   if (prog == NULL)
343     return -1;
344   return Fanout(prog, histogram);
345 }
346 
347 // Returns named_groups_, computing it if needed.
NamedCapturingGroups() const348 const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
349   std::call_once(named_groups_once_, [](const RE2* re) {
350     if (re->suffix_regexp_ != NULL)
351       re->named_groups_ = re->suffix_regexp_->NamedCaptures();
352     if (re->named_groups_ == NULL)
353       re->named_groups_ = empty_named_groups;
354   }, this);
355   return *named_groups_;
356 }
357 
358 // Returns group_names_, computing it if needed.
CapturingGroupNames() const359 const std::map<int, std::string>& RE2::CapturingGroupNames() const {
360   std::call_once(group_names_once_, [](const RE2* re) {
361     if (re->suffix_regexp_ != NULL)
362       re->group_names_ = re->suffix_regexp_->CaptureNames();
363     if (re->group_names_ == NULL)
364       re->group_names_ = empty_group_names;
365   }, this);
366   return *group_names_;
367 }
368 
369 /***** Convenience interfaces *****/
370 
FullMatchN(const StringPiece & text,const RE2 & re,const Arg * const args[],int n)371 bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
372                      const Arg* const args[], int n) {
373   return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
374 }
375 
PartialMatchN(const StringPiece & text,const RE2 & re,const Arg * const args[],int n)376 bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
377                         const Arg* const args[], int n) {
378   return re.DoMatch(text, UNANCHORED, NULL, args, n);
379 }
380 
ConsumeN(StringPiece * input,const RE2 & re,const Arg * const args[],int n)381 bool RE2::ConsumeN(StringPiece* input, const RE2& re,
382                    const Arg* const args[], int n) {
383   size_t consumed;
384   if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
385     input->remove_prefix(consumed);
386     return true;
387   } else {
388     return false;
389   }
390 }
391 
FindAndConsumeN(StringPiece * input,const RE2 & re,const Arg * const args[],int n)392 bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
393                           const Arg* const args[], int n) {
394   size_t consumed;
395   if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
396     input->remove_prefix(consumed);
397     return true;
398   } else {
399     return false;
400   }
401 }
402 
Replace(std::string * str,const RE2 & re,const StringPiece & rewrite)403 bool RE2::Replace(std::string* str,
404                   const RE2& re,
405                   const StringPiece& rewrite) {
406   StringPiece vec[kVecSize];
407   int nvec = 1 + MaxSubmatch(rewrite);
408   if (nvec > 1 + re.NumberOfCapturingGroups())
409     return false;
410   if (nvec > static_cast<int>(arraysize(vec)))
411     return false;
412   if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
413     return false;
414 
415   std::string s;
416   if (!re.Rewrite(&s, rewrite, vec, nvec))
417     return false;
418 
419   assert(vec[0].data() >= str->data());
420   assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
421   str->replace(vec[0].data() - str->data(), vec[0].size(), s);
422   return true;
423 }
424 
GlobalReplace(std::string * str,const RE2 & re,const StringPiece & rewrite)425 int RE2::GlobalReplace(std::string* str,
426                        const RE2& re,
427                        const StringPiece& rewrite) {
428   StringPiece vec[kVecSize];
429   int nvec = 1 + MaxSubmatch(rewrite);
430   if (nvec > 1 + re.NumberOfCapturingGroups())
431     return false;
432   if (nvec > static_cast<int>(arraysize(vec)))
433     return false;
434 
435   const char* p = str->data();
436   const char* ep = p + str->size();
437   const char* lastend = NULL;
438   std::string out;
439   int count = 0;
440 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
441   // Iterate just once when fuzzing. Otherwise, we easily get bogged down
442   // and coverage is unlikely to improve despite significant expense.
443   while (p == str->data()) {
444 #else
445   while (p <= ep) {
446 #endif
447     if (!re.Match(*str, static_cast<size_t>(p - str->data()),
448                   str->size(), UNANCHORED, vec, nvec))
449       break;
450     if (p < vec[0].data())
451       out.append(p, vec[0].data() - p);
452     if (vec[0].data() == lastend && vec[0].empty()) {
453       // Disallow empty match at end of last match: skip ahead.
454       //
455       // fullrune() takes int, not ptrdiff_t. However, it just looks
456       // at the leading byte and treats any length >= 4 the same.
457       if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
458           fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
459         // re is in UTF-8 mode and there is enough left of str
460         // to allow us to advance by up to UTFmax bytes.
461         Rune r;
462         int n = chartorune(&r, p);
463         // Some copies of chartorune have a bug that accepts
464         // encodings of values in (10FFFF, 1FFFFF] as valid.
465         if (r > Runemax) {
466           n = 1;
467           r = Runeerror;
468         }
469         if (!(n == 1 && r == Runeerror)) {  // no decoding error
470           out.append(p, n);
471           p += n;
472           continue;
473         }
474       }
475       // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
476       // we fell through from above and the GIGO principle applies.
477       if (p < ep)
478         out.append(p, 1);
479       p++;
480       continue;
481     }
482     re.Rewrite(&out, rewrite, vec, nvec);
483     p = vec[0].data() + vec[0].size();
484     lastend = p;
485     count++;
486   }
487 
488   if (count == 0)
489     return 0;
490 
491   if (p < ep)
492     out.append(p, ep - p);
493   using std::swap;
494   swap(out, *str);
495   return count;
496 }
497 
498 bool RE2::Extract(const StringPiece& text,
499                   const RE2& re,
500                   const StringPiece& rewrite,
501                   std::string* out) {
502   StringPiece vec[kVecSize];
503   int nvec = 1 + MaxSubmatch(rewrite);
504   if (nvec > 1 + re.NumberOfCapturingGroups())
505     return false;
506   if (nvec > static_cast<int>(arraysize(vec)))
507     return false;
508   if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
509     return false;
510 
511   out->clear();
512   return re.Rewrite(out, rewrite, vec, nvec);
513 }
514 
515 std::string RE2::QuoteMeta(const StringPiece& unquoted) {
516   std::string result;
517   result.reserve(unquoted.size() << 1);
518 
519   // Escape any ascii character not in [A-Za-z_0-9].
520   //
521   // Note that it's legal to escape a character even if it has no
522   // special meaning in a regular expression -- so this function does
523   // that.  (This also makes it identical to the perl function of the
524   // same name except for the null-character special case;
525   // see `perldoc -f quotemeta`.)
526   for (size_t ii = 0; ii < unquoted.size(); ++ii) {
527     // Note that using 'isalnum' here raises the benchmark time from
528     // 32ns to 58ns:
529     if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
530         (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
531         (unquoted[ii] < '0' || unquoted[ii] > '9') &&
532         unquoted[ii] != '_' &&
533         // If this is the part of a UTF8 or Latin1 character, we need
534         // to copy this byte without escaping.  Experimentally this is
535         // what works correctly with the regexp library.
536         !(unquoted[ii] & 128)) {
537       if (unquoted[ii] == '\0') {  // Special handling for null chars.
538         // Note that this special handling is not strictly required for RE2,
539         // but this quoting is required for other regexp libraries such as
540         // PCRE.
541         // Can't use "\\0" since the next character might be a digit.
542         result += "\\x00";
543         continue;
544       }
545       result += '\\';
546     }
547     result += unquoted[ii];
548   }
549 
550   return result;
551 }
552 
553 bool RE2::PossibleMatchRange(std::string* min, std::string* max,
554                              int maxlen) const {
555   if (prog_ == NULL)
556     return false;
557 
558   int n = static_cast<int>(prefix_.size());
559   if (n > maxlen)
560     n = maxlen;
561 
562   // Determine initial min max from prefix_ literal.
563   *min = prefix_.substr(0, n);
564   *max = prefix_.substr(0, n);
565   if (prefix_foldcase_) {
566     // prefix is ASCII lowercase; change *min to uppercase.
567     for (int i = 0; i < n; i++) {
568       char& c = (*min)[i];
569       if ('a' <= c && c <= 'z')
570         c += 'A' - 'a';
571     }
572   }
573 
574   // Add to prefix min max using PossibleMatchRange on regexp.
575   std::string dmin, dmax;
576   maxlen -= n;
577   if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
578     min->append(dmin);
579     max->append(dmax);
580   } else if (!max->empty()) {
581     // prog_->PossibleMatchRange has failed us,
582     // but we still have useful information from prefix_.
583     // Round up *max to allow any possible suffix.
584     PrefixSuccessor(max);
585   } else {
586     // Nothing useful.
587     *min = "";
588     *max = "";
589     return false;
590   }
591 
592   return true;
593 }
594 
595 // Avoid possible locale nonsense in standard strcasecmp.
596 // The string a is known to be all lowercase.
597 static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
598   const char* ae = a + len;
599 
600   for (; a < ae; a++, b++) {
601     uint8_t x = *a;
602     uint8_t y = *b;
603     if ('A' <= y && y <= 'Z')
604       y += 'a' - 'A';
605     if (x != y)
606       return x - y;
607   }
608   return 0;
609 }
610 
611 
612 /***** Actual matching and rewriting code *****/
613 
614 bool RE2::Match(const StringPiece& text,
615                 size_t startpos,
616                 size_t endpos,
617                 Anchor re_anchor,
618                 StringPiece* submatch,
619                 int nsubmatch) const {
620   if (!ok()) {
621     if (options_.log_errors())
622       LOG(ERROR) << "Invalid RE2: " << *error_;
623     return false;
624   }
625 
626   if (startpos > endpos || endpos > text.size()) {
627     if (options_.log_errors())
628       LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
629                  << "startpos: " << startpos << ", "
630                  << "endpos: " << endpos << ", "
631                  << "text size: " << text.size() << "]";
632     return false;
633   }
634 
635   StringPiece subtext = text;
636   subtext.remove_prefix(startpos);
637   subtext.remove_suffix(text.size() - endpos);
638 
639   // Use DFAs to find exact location of match, filter out non-matches.
640 
641   // Don't ask for the location if we won't use it.
642   // SearchDFA can do extra optimizations in that case.
643   StringPiece match;
644   StringPiece* matchp = &match;
645   if (nsubmatch == 0)
646     matchp = NULL;
647 
648   int ncap = 1 + NumberOfCapturingGroups();
649   if (ncap > nsubmatch)
650     ncap = nsubmatch;
651 
652   // If the regexp is anchored explicitly, must not be in middle of text.
653   if (prog_->anchor_start() && startpos != 0)
654     return false;
655   if (prog_->anchor_end() && endpos != text.size())
656     return false;
657 
658   // If the regexp is anchored explicitly, update re_anchor
659   // so that we can potentially fall into a faster case below.
660   if (prog_->anchor_start() && prog_->anchor_end())
661     re_anchor = ANCHOR_BOTH;
662   else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
663     re_anchor = ANCHOR_START;
664 
665   // Check for the required prefix, if any.
666   size_t prefixlen = 0;
667   if (!prefix_.empty()) {
668     if (startpos != 0)
669       return false;
670     prefixlen = prefix_.size();
671     if (prefixlen > subtext.size())
672       return false;
673     if (prefix_foldcase_) {
674       if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
675         return false;
676     } else {
677       if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
678         return false;
679     }
680     subtext.remove_prefix(prefixlen);
681     // If there is a required prefix, the anchor must be at least ANCHOR_START.
682     if (re_anchor != ANCHOR_BOTH)
683       re_anchor = ANCHOR_START;
684   }
685 
686   Prog::Anchor anchor = Prog::kUnanchored;
687   Prog::MatchKind kind = Prog::kFirstMatch;
688   if (options_.longest_match())
689     kind = Prog::kLongestMatch;
690 
691   bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
692 
693   // BitState allocates a bitmap of size prog_->list_count() * text.size().
694   // It also allocates a stack of 3-word structures which could potentially
695   // grow as large as prog_->list_count() * text.size(), but in practice is
696   // much smaller.
697   const int kMaxBitStateBitmapSize = 256*1024;  // bitmap size <= max (bits)
698   bool can_bit_state = prog_->CanBitState();
699   size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
700 
701 #ifdef RE2_HAVE_THREAD_LOCAL
702   hooks::context = this;
703 #endif
704   bool dfa_failed = false;
705   bool skipped_test = false;
706   switch (re_anchor) {
707     default:
708       LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
709       return false;
710 
711     case UNANCHORED: {
712       if (prog_->anchor_end()) {
713         // This is a very special case: we don't need the forward DFA because
714         // we already know where the match must end! Instead, the reverse DFA
715         // can say whether there is a match and (optionally) where it starts.
716         Prog* prog = ReverseProg();
717         if (prog == NULL) {
718           // Fall back to NFA below.
719           skipped_test = true;
720           break;
721         }
722         if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
723                              Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
724           if (dfa_failed) {
725             if (options_.log_errors())
726               LOG(ERROR) << "DFA out of memory: "
727                          << "pattern length " << pattern_.size() << ", "
728                          << "program size " << prog->size() << ", "
729                          << "list count " << prog->list_count() << ", "
730                          << "bytemap range " << prog->bytemap_range();
731             // Fall back to NFA below.
732             skipped_test = true;
733             break;
734           }
735           return false;
736         }
737         if (matchp == NULL)  // Matched.  Don't care where.
738           return true;
739         break;
740       }
741 
742       if (!prog_->SearchDFA(subtext, text, anchor, kind,
743                             matchp, &dfa_failed, NULL)) {
744         if (dfa_failed) {
745           if (options_.log_errors())
746             LOG(ERROR) << "DFA out of memory: "
747                        << "pattern length " << pattern_.size() << ", "
748                        << "program size " << prog_->size() << ", "
749                        << "list count " << prog_->list_count() << ", "
750                        << "bytemap range " << prog_->bytemap_range();
751           // Fall back to NFA below.
752           skipped_test = true;
753           break;
754         }
755         return false;
756       }
757       if (matchp == NULL)  // Matched.  Don't care where.
758         return true;
759       // SearchDFA set match.end() but didn't know where the
760       // match started.  Run the regexp backward from match.end()
761       // to find the longest possible match -- that's where it started.
762       Prog* prog = ReverseProg();
763       if (prog == NULL) {
764         // Fall back to NFA below.
765         skipped_test = true;
766         break;
767       }
768       if (!prog->SearchDFA(match, text, Prog::kAnchored,
769                            Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
770         if (dfa_failed) {
771           if (options_.log_errors())
772             LOG(ERROR) << "DFA out of memory: "
773                        << "pattern length " << pattern_.size() << ", "
774                        << "program size " << prog->size() << ", "
775                        << "list count " << prog->list_count() << ", "
776                        << "bytemap range " << prog->bytemap_range();
777           // Fall back to NFA below.
778           skipped_test = true;
779           break;
780         }
781         if (options_.log_errors())
782           LOG(ERROR) << "SearchDFA inconsistency";
783         return false;
784       }
785       break;
786     }
787 
788     case ANCHOR_BOTH:
789     case ANCHOR_START:
790       if (re_anchor == ANCHOR_BOTH)
791         kind = Prog::kFullMatch;
792       anchor = Prog::kAnchored;
793 
794       // If only a small amount of text and need submatch
795       // information anyway and we're going to use OnePass or BitState
796       // to get it, we might as well not even bother with the DFA:
797       // OnePass or BitState will be fast enough.
798       // On tiny texts, OnePass outruns even the DFA, and
799       // it doesn't have the shared state and occasional mutex that
800       // the DFA does.
801       if (can_one_pass && text.size() <= 4096 &&
802           (ncap > 1 || text.size() <= 8)) {
803         skipped_test = true;
804         break;
805       }
806       if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
807         skipped_test = true;
808         break;
809       }
810       if (!prog_->SearchDFA(subtext, text, anchor, kind,
811                             &match, &dfa_failed, NULL)) {
812         if (dfa_failed) {
813           if (options_.log_errors())
814             LOG(ERROR) << "DFA out of memory: "
815                        << "pattern length " << pattern_.size() << ", "
816                        << "program size " << prog_->size() << ", "
817                        << "list count " << prog_->list_count() << ", "
818                        << "bytemap range " << prog_->bytemap_range();
819           // Fall back to NFA below.
820           skipped_test = true;
821           break;
822         }
823         return false;
824       }
825       break;
826   }
827 
828   if (!skipped_test && ncap <= 1) {
829     // We know exactly where it matches.  That's enough.
830     if (ncap == 1)
831       submatch[0] = match;
832   } else {
833     StringPiece subtext1;
834     if (skipped_test) {
835       // DFA ran out of memory or was skipped:
836       // need to search in entire original text.
837       subtext1 = subtext;
838     } else {
839       // DFA found the exact match location:
840       // let NFA run an anchored, full match search
841       // to find submatch locations.
842       subtext1 = match;
843       anchor = Prog::kAnchored;
844       kind = Prog::kFullMatch;
845     }
846 
847     if (can_one_pass && anchor != Prog::kUnanchored) {
848       if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
849         if (!skipped_test && options_.log_errors())
850           LOG(ERROR) << "SearchOnePass inconsistency";
851         return false;
852       }
853     } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
854       if (!prog_->SearchBitState(subtext1, text, anchor,
855                                  kind, submatch, ncap)) {
856         if (!skipped_test && options_.log_errors())
857           LOG(ERROR) << "SearchBitState inconsistency";
858         return false;
859       }
860     } else {
861       if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
862         if (!skipped_test && options_.log_errors())
863           LOG(ERROR) << "SearchNFA inconsistency";
864         return false;
865       }
866     }
867   }
868 
869   // Adjust overall match for required prefix that we stripped off.
870   if (prefixlen > 0 && nsubmatch > 0)
871     submatch[0] = StringPiece(submatch[0].data() - prefixlen,
872                               submatch[0].size() + prefixlen);
873 
874   // Zero submatches that don't exist in the regexp.
875   for (int i = ncap; i < nsubmatch; i++)
876     submatch[i] = StringPiece();
877   return true;
878 }
879 
880 // Internal matcher - like Match() but takes Args not StringPieces.
881 bool RE2::DoMatch(const StringPiece& text,
882                   Anchor re_anchor,
883                   size_t* consumed,
884                   const Arg* const* args,
885                   int n) const {
886   if (!ok()) {
887     if (options_.log_errors())
888       LOG(ERROR) << "Invalid RE2: " << *error_;
889     return false;
890   }
891 
892   if (NumberOfCapturingGroups() < n) {
893     // RE has fewer capturing groups than number of Arg pointers passed in.
894     return false;
895   }
896 
897   // Count number of capture groups needed.
898   int nvec;
899   if (n == 0 && consumed == NULL)
900     nvec = 0;
901   else
902     nvec = n+1;
903 
904   StringPiece* vec;
905   StringPiece stkvec[kVecSize];
906   StringPiece* heapvec = NULL;
907 
908   if (nvec <= static_cast<int>(arraysize(stkvec))) {
909     vec = stkvec;
910   } else {
911     vec = new StringPiece[nvec];
912     heapvec = vec;
913   }
914 
915   if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
916     delete[] heapvec;
917     return false;
918   }
919 
920   if (consumed != NULL)
921     *consumed = static_cast<size_t>(vec[0].end() - text.begin());
922 
923   if (n == 0 || args == NULL) {
924     // We are not interested in results
925     delete[] heapvec;
926     return true;
927   }
928 
929   // If we got here, we must have matched the whole pattern.
930   for (int i = 0; i < n; i++) {
931     const StringPiece& s = vec[i+1];
932     if (!args[i]->Parse(s.data(), s.size())) {
933       // TODO: Should we indicate what the error was?
934       delete[] heapvec;
935       return false;
936     }
937   }
938 
939   delete[] heapvec;
940   return true;
941 }
942 
943 // Checks that the rewrite string is well-formed with respect to this
944 // regular expression.
945 bool RE2::CheckRewriteString(const StringPiece& rewrite,
946                              std::string* error) const {
947   int max_token = -1;
948   for (const char *s = rewrite.data(), *end = s + rewrite.size();
949        s < end; s++) {
950     int c = *s;
951     if (c != '\\') {
952       continue;
953     }
954     if (++s == end) {
955       *error = "Rewrite schema error: '\\' not allowed at end.";
956       return false;
957     }
958     c = *s;
959     if (c == '\\') {
960       continue;
961     }
962     if (!isdigit(c)) {
963       *error = "Rewrite schema error: "
964                "'\\' must be followed by a digit or '\\'.";
965       return false;
966     }
967     int n = (c - '0');
968     if (max_token < n) {
969       max_token = n;
970     }
971   }
972 
973   if (max_token > NumberOfCapturingGroups()) {
974     *error = StringPrintf(
975         "Rewrite schema requests %d matches, but the regexp only has %d "
976         "parenthesized subexpressions.",
977         max_token, NumberOfCapturingGroups());
978     return false;
979   }
980   return true;
981 }
982 
983 // Returns the maximum submatch needed for the rewrite to be done by Replace().
984 // E.g. if rewrite == "foo \\2,\\1", returns 2.
985 int RE2::MaxSubmatch(const StringPiece& rewrite) {
986   int max = 0;
987   for (const char *s = rewrite.data(), *end = s + rewrite.size();
988        s < end; s++) {
989     if (*s == '\\') {
990       s++;
991       int c = (s < end) ? *s : -1;
992       if (isdigit(c)) {
993         int n = (c - '0');
994         if (n > max)
995           max = n;
996       }
997     }
998   }
999   return max;
1000 }
1001 
1002 // Append the "rewrite" string, with backslash subsitutions from "vec",
1003 // to string "out".
1004 bool RE2::Rewrite(std::string* out,
1005                   const StringPiece& rewrite,
1006                   const StringPiece* vec,
1007                   int veclen) const {
1008   for (const char *s = rewrite.data(), *end = s + rewrite.size();
1009        s < end; s++) {
1010     if (*s != '\\') {
1011       out->push_back(*s);
1012       continue;
1013     }
1014     s++;
1015     int c = (s < end) ? *s : -1;
1016     if (isdigit(c)) {
1017       int n = (c - '0');
1018       if (n >= veclen) {
1019         if (options_.log_errors()) {
1020           LOG(ERROR) << "invalid substitution \\" << n
1021                      << " from " << veclen << " groups";
1022         }
1023         return false;
1024       }
1025       StringPiece snip = vec[n];
1026       if (!snip.empty())
1027         out->append(snip.data(), snip.size());
1028     } else if (c == '\\') {
1029       out->push_back('\\');
1030     } else {
1031       if (options_.log_errors())
1032         LOG(ERROR) << "invalid rewrite pattern: " << rewrite.data();
1033       return false;
1034     }
1035   }
1036   return true;
1037 }
1038 
1039 /***** Parsers for various types *****/
1040 
1041 bool RE2::Arg::parse_null(const char* str, size_t n, void* dest) {
1042   // We fail if somebody asked us to store into a non-NULL void* pointer
1043   return (dest == NULL);
1044 }
1045 
1046 bool RE2::Arg::parse_string(const char* str, size_t n, void* dest) {
1047   if (dest == NULL) return true;
1048   reinterpret_cast<std::string*>(dest)->assign(str, n);
1049   return true;
1050 }
1051 
1052 bool RE2::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
1053   if (dest == NULL) return true;
1054   *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
1055   return true;
1056 }
1057 
1058 bool RE2::Arg::parse_char(const char* str, size_t n, void* dest) {
1059   if (n != 1) return false;
1060   if (dest == NULL) return true;
1061   *(reinterpret_cast<char*>(dest)) = str[0];
1062   return true;
1063 }
1064 
1065 bool RE2::Arg::parse_schar(const char* str, size_t n, void* dest) {
1066   if (n != 1) return false;
1067   if (dest == NULL) return true;
1068   *(reinterpret_cast<signed char*>(dest)) = str[0];
1069   return true;
1070 }
1071 
1072 bool RE2::Arg::parse_uchar(const char* str, size_t n, void* dest) {
1073   if (n != 1) return false;
1074   if (dest == NULL) return true;
1075   *(reinterpret_cast<unsigned char*>(dest)) = str[0];
1076   return true;
1077 }
1078 
1079 // Largest number spec that we are willing to parse
1080 static const int kMaxNumberLength = 32;
1081 
1082 // REQUIRES "buf" must have length at least nbuf.
1083 // Copies "str" into "buf" and null-terminates.
1084 // Overwrites *np with the new length.
1085 static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
1086                                    size_t* np, bool accept_spaces) {
1087   size_t n = *np;
1088   if (n == 0) return "";
1089   if (n > 0 && isspace(*str)) {
1090     // We are less forgiving than the strtoxxx() routines and do not
1091     // allow leading spaces. We do allow leading spaces for floats.
1092     if (!accept_spaces) {
1093       return "";
1094     }
1095     while (n > 0 && isspace(*str)) {
1096       n--;
1097       str++;
1098     }
1099   }
1100 
1101   // Although buf has a fixed maximum size, we can still handle
1102   // arbitrarily large integers correctly by omitting leading zeros.
1103   // (Numbers that are still too long will be out of range.)
1104   // Before deciding whether str is too long,
1105   // remove leading zeros with s/000+/00/.
1106   // Leaving the leading two zeros in place means that
1107   // we don't change 0000x123 (invalid) into 0x123 (valid).
1108   // Skip over leading - before replacing.
1109   bool neg = false;
1110   if (n >= 1 && str[0] == '-') {
1111     neg = true;
1112     n--;
1113     str++;
1114   }
1115 
1116   if (n >= 3 && str[0] == '0' && str[1] == '0') {
1117     while (n >= 3 && str[2] == '0') {
1118       n--;
1119       str++;
1120     }
1121   }
1122 
1123   if (neg) {  // make room in buf for -
1124     n++;
1125     str--;
1126   }
1127 
1128   if (n > nbuf-1) return "";
1129 
1130   memmove(buf, str, n);
1131   if (neg) {
1132     buf[0] = '-';
1133   }
1134   buf[n] = '\0';
1135   *np = n;
1136   return buf;
1137 }
1138 
1139 bool RE2::Arg::parse_long_radix(const char* str,
1140                                 size_t n,
1141                                 void* dest,
1142                                 int radix) {
1143   if (n == 0) return false;
1144   char buf[kMaxNumberLength+1];
1145   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1146   char* end;
1147   errno = 0;
1148   long r = strtol(str, &end, radix);
1149   if (end != str + n) return false;   // Leftover junk
1150   if (errno) return false;
1151   if (dest == NULL) return true;
1152   *(reinterpret_cast<long*>(dest)) = r;
1153   return true;
1154 }
1155 
1156 bool RE2::Arg::parse_ulong_radix(const char* str,
1157                                  size_t n,
1158                                  void* dest,
1159                                  int radix) {
1160   if (n == 0) return false;
1161   char buf[kMaxNumberLength+1];
1162   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1163   if (str[0] == '-') {
1164     // strtoul() will silently accept negative numbers and parse
1165     // them.  This module is more strict and treats them as errors.
1166     return false;
1167   }
1168 
1169   char* end;
1170   errno = 0;
1171   unsigned long r = strtoul(str, &end, radix);
1172   if (end != str + n) return false;   // Leftover junk
1173   if (errno) return false;
1174   if (dest == NULL) return true;
1175   *(reinterpret_cast<unsigned long*>(dest)) = r;
1176   return true;
1177 }
1178 
1179 bool RE2::Arg::parse_short_radix(const char* str,
1180                                  size_t n,
1181                                  void* dest,
1182                                  int radix) {
1183   long r;
1184   if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
1185   if ((short)r != r) return false;                         // Out of range
1186   if (dest == NULL) return true;
1187   *(reinterpret_cast<short*>(dest)) = (short)r;
1188   return true;
1189 }
1190 
1191 bool RE2::Arg::parse_ushort_radix(const char* str,
1192                                   size_t n,
1193                                   void* dest,
1194                                   int radix) {
1195   unsigned long r;
1196   if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
1197   if ((unsigned short)r != r) return false;                 // Out of range
1198   if (dest == NULL) return true;
1199   *(reinterpret_cast<unsigned short*>(dest)) = (unsigned short)r;
1200   return true;
1201 }
1202 
1203 bool RE2::Arg::parse_int_radix(const char* str,
1204                                size_t n,
1205                                void* dest,
1206                                int radix) {
1207   long r;
1208   if (!parse_long_radix(str, n, &r, radix)) return false;  // Could not parse
1209   if ((int)r != r) return false;                           // Out of range
1210   if (dest == NULL) return true;
1211   *(reinterpret_cast<int*>(dest)) = (int)r;
1212   return true;
1213 }
1214 
1215 bool RE2::Arg::parse_uint_radix(const char* str,
1216                                 size_t n,
1217                                 void* dest,
1218                                 int radix) {
1219   unsigned long r;
1220   if (!parse_ulong_radix(str, n, &r, radix)) return false;  // Could not parse
1221   if ((unsigned int)r != r) return false;                   // Out of range
1222   if (dest == NULL) return true;
1223   *(reinterpret_cast<unsigned int*>(dest)) = (unsigned int)r;
1224   return true;
1225 }
1226 
1227 bool RE2::Arg::parse_longlong_radix(const char* str,
1228                                     size_t n,
1229                                     void* dest,
1230                                     int radix) {
1231   if (n == 0) return false;
1232   char buf[kMaxNumberLength+1];
1233   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1234   char* end;
1235   errno = 0;
1236   long long r = strtoll(str, &end, radix);
1237   if (end != str + n) return false;   // Leftover junk
1238   if (errno) return false;
1239   if (dest == NULL) return true;
1240   *(reinterpret_cast<long long*>(dest)) = r;
1241   return true;
1242 }
1243 
1244 bool RE2::Arg::parse_ulonglong_radix(const char* str,
1245                                      size_t n,
1246                                      void* dest,
1247                                      int radix) {
1248   if (n == 0) return false;
1249   char buf[kMaxNumberLength+1];
1250   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1251   if (str[0] == '-') {
1252     // strtoull() will silently accept negative numbers and parse
1253     // them.  This module is more strict and treats them as errors.
1254     return false;
1255   }
1256   char* end;
1257   errno = 0;
1258   unsigned long long r = strtoull(str, &end, radix);
1259   if (end != str + n) return false;   // Leftover junk
1260   if (errno) return false;
1261   if (dest == NULL) return true;
1262   *(reinterpret_cast<unsigned long long*>(dest)) = r;
1263   return true;
1264 }
1265 
1266 static bool parse_double_float(const char* str, size_t n, bool isfloat,
1267                                void* dest) {
1268   if (n == 0) return false;
1269   static const int kMaxLength = 200;
1270   char buf[kMaxLength+1];
1271   str = TerminateNumber(buf, sizeof buf, str, &n, true);
1272   char* end;
1273   errno = 0;
1274   double r;
1275   if (isfloat) {
1276     r = strtof(str, &end);
1277   } else {
1278     r = strtod(str, &end);
1279   }
1280   if (end != str + n) return false;   // Leftover junk
1281   if (errno) return false;
1282   if (dest == NULL) return true;
1283   if (isfloat) {
1284     *(reinterpret_cast<float*>(dest)) = (float)r;
1285   } else {
1286     *(reinterpret_cast<double*>(dest)) = r;
1287   }
1288   return true;
1289 }
1290 
1291 bool RE2::Arg::parse_double(const char* str, size_t n, void* dest) {
1292   return parse_double_float(str, n, false, dest);
1293 }
1294 
1295 bool RE2::Arg::parse_float(const char* str, size_t n, void* dest) {
1296   return parse_double_float(str, n, true, dest);
1297 }
1298 
1299 #define DEFINE_INTEGER_PARSER(name)                                            \
1300   bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) {         \
1301     return parse_##name##_radix(str, n, dest, 10);                             \
1302   }                                                                            \
1303   bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) {   \
1304     return parse_##name##_radix(str, n, dest, 16);                             \
1305   }                                                                            \
1306   bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
1307     return parse_##name##_radix(str, n, dest, 8);                              \
1308   }                                                                            \
1309   bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n,              \
1310                                        void* dest) {                           \
1311     return parse_##name##_radix(str, n, dest, 0);                              \
1312   }
1313 
1314 DEFINE_INTEGER_PARSER(short)
1315 DEFINE_INTEGER_PARSER(ushort)
1316 DEFINE_INTEGER_PARSER(int)
1317 DEFINE_INTEGER_PARSER(uint)
1318 DEFINE_INTEGER_PARSER(long)
1319 DEFINE_INTEGER_PARSER(ulong)
1320 DEFINE_INTEGER_PARSER(longlong)
1321 DEFINE_INTEGER_PARSER(ulonglong)
1322 
1323 #undef DEFINE_INTEGER_PARSER
1324 
1325 namespace hooks {
1326 
1327 #ifdef RE2_HAVE_THREAD_LOCAL
1328 thread_local const RE2* context = NULL;
1329 #endif
1330 
1331 template <typename T>
1332 union Hook {
1333   void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
1334   T* Load() const { return cb_.load(std::memory_order_acquire); }
1335 
1336 #if !defined(__clang__) && defined(_MSC_VER)
1337   // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
1338   // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
1339   static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
1340                 "std::atomic<T*> must be always lock-free");
1341   T* cb_for_constinit_;
1342 #endif
1343 
1344   std::atomic<T*> cb_;
1345 };
1346 
1347 template <typename T>
1348 static void DoNothing(const T&) {}
1349 
1350 #define DEFINE_HOOK(type, name)                                       \
1351   static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
1352   void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
1353   type##Callback* Get##type##Hook() { return name##_hook.Load(); }
1354 
1355 DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
1356 DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
1357 
1358 #undef DEFINE_HOOK
1359 
1360 }  // namespace hooks
1361 
1362 }  // namespace re2
1363