1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  file:  repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2016 International Business Machines Corporation
9 *   and others. All rights reserved.
10 ***************************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16 
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29 
30 U_NAMESPACE_BEGIN
31 
32 //--------------------------------------------------------------------------
33 //
34 //    RegexPattern    Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38     // Init all of this instances data.
39     init();
40 }
41 
42 
43 //--------------------------------------------------------------------------
44 //
45 //   Copy Constructor        Note:  This is a rather inefficient implementation,
46 //                                  but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50     init();
51     *this = other;
52 }
53 
54 
55 
56 //--------------------------------------------------------------------------
57 //
58 //    Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62     if (this == &other) {
63         // Source and destination are the same.  Don't do anything.
64         return *this;
65     }
66 
67     // Clean out any previous contents of object being assigned to.
68     zap();
69 
70     // Give target object a default initialization
71     init();
72 
73     // Copy simple fields
74     fDeferredStatus   = other.fDeferredStatus;
75 
76     if (U_FAILURE(fDeferredStatus)) {
77         return *this;
78     }
79 
80     if (other.fPatternString == NULL) {
81         fPatternString = NULL;
82         fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83     } else {
84         fPatternString = new UnicodeString(*(other.fPatternString));
85         if (fPatternString == NULL) {
86             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87         } else {
88             fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89         }
90     }
91     if (U_FAILURE(fDeferredStatus)) {
92         return *this;
93     }
94 
95     fFlags            = other.fFlags;
96     fLiteralText      = other.fLiteralText;
97     fMinMatchLen      = other.fMinMatchLen;
98     fFrameSize        = other.fFrameSize;
99     fDataSize         = other.fDataSize;
100     fStaticSets       = other.fStaticSets;
101     fStaticSets8      = other.fStaticSets8;
102 
103     fStartType        = other.fStartType;
104     fInitialStringIdx = other.fInitialStringIdx;
105     fInitialStringLen = other.fInitialStringLen;
106     *fInitialChars    = *other.fInitialChars;
107     fInitialChar      = other.fInitialChar;
108     *fInitialChars8   = *other.fInitialChars8;
109     fNeedsAltInput    = other.fNeedsAltInput;
110 
111     //  Copy the pattern.  It's just values, nothing deep to copy.
112     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114 
115     //  Copy the Unicode Sets.
116     //    Could be made more efficient if the sets were reference counted and shared,
117     //    but I doubt that pattern copying will be particularly common.
118     //    Note:  init() already added an empty element zero to fSets
119     int32_t i;
120     int32_t  numSets = other.fSets->size();
121     fSets8 = new Regex8BitSet[numSets];
122     if (fSets8 == NULL) {
123     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124     	return *this;
125     }
126     for (i=1; i<numSets; i++) {
127         if (U_FAILURE(fDeferredStatus)) {
128             return *this;
129         }
130         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
132         if (newSet == NULL) {
133             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134             break;
135         }
136         fSets->addElement(newSet, fDeferredStatus);
137         fSets8[i] = other.fSets8[i];
138     }
139 
140     // Copy the named capture group hash map.
141     int32_t hashPos = UHASH_FIRST;
142     while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
143         if (U_FAILURE(fDeferredStatus)) {
144             break;
145         }
146         const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
147         UnicodeString *key = new UnicodeString(*name);
148         int32_t val = hashEl->value.integer;
149         if (key == NULL) {
150             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
151         } else {
152             uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
153         }
154     }
155     return *this;
156 }
157 
158 
159 //--------------------------------------------------------------------------
160 //
161 //    init        Shared initialization for use by constructors.
162 //                Bring an uninitialized RegexPattern up to a default state.
163 //
164 //--------------------------------------------------------------------------
init()165 void RegexPattern::init() {
166     fFlags            = 0;
167     fCompiledPat      = 0;
168     fLiteralText.remove();
169     fSets             = NULL;
170     fSets8            = NULL;
171     fDeferredStatus   = U_ZERO_ERROR;
172     fMinMatchLen      = 0;
173     fFrameSize        = 0;
174     fDataSize         = 0;
175     fGroupMap         = NULL;
176     fStaticSets       = NULL;
177     fStaticSets8      = NULL;
178     fStartType        = START_NO_INFO;
179     fInitialStringIdx = 0;
180     fInitialStringLen = 0;
181     fInitialChars     = NULL;
182     fInitialChar      = 0;
183     fInitialChars8    = NULL;
184     fNeedsAltInput    = FALSE;
185     fNamedCaptureMap  = NULL;
186 
187     fPattern          = NULL; // will be set later
188     fPatternString    = NULL; // may be set later
189     fCompiledPat      = new UVector64(fDeferredStatus);
190     fGroupMap         = new UVector32(fDeferredStatus);
191     fSets             = new UVector(fDeferredStatus);
192     fInitialChars     = new UnicodeSet;
193     fInitialChars8    = new Regex8BitSet;
194     fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
195                                    uhash_compareUnicodeString,  // Key comparator function
196                                    uhash_compareLong,           // Value comparator function
197                                    &fDeferredStatus);
198     if (U_FAILURE(fDeferredStatus)) {
199         return;
200     }
201     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
202             fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
203         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
204         return;
205     }
206 
207     // Slot zero of the vector of sets is reserved.  Fill it here.
208     fSets->addElement((int32_t)0, fDeferredStatus);
209 
210     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
211     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
212 }
213 
214 
215 //--------------------------------------------------------------------------
216 //
217 //   zap            Delete everything owned by this RegexPattern.
218 //
219 //--------------------------------------------------------------------------
zap()220 void RegexPattern::zap() {
221     delete fCompiledPat;
222     fCompiledPat = NULL;
223     int i;
224     for (i=1; i<fSets->size(); i++) {
225         UnicodeSet *s;
226         s = (UnicodeSet *)fSets->elementAt(i);
227         if (s != NULL) {
228             delete s;
229         }
230     }
231     delete fSets;
232     fSets = NULL;
233     delete[] fSets8;
234     fSets8 = NULL;
235     delete fGroupMap;
236     fGroupMap = NULL;
237     delete fInitialChars;
238     fInitialChars = NULL;
239     delete fInitialChars8;
240     fInitialChars8 = NULL;
241     if (fPattern != NULL) {
242         utext_close(fPattern);
243         fPattern = NULL;
244     }
245     if (fPatternString != NULL) {
246         delete fPatternString;
247         fPatternString = NULL;
248     }
249     uhash_close(fNamedCaptureMap);
250     fNamedCaptureMap = NULL;
251 }
252 
253 
254 //--------------------------------------------------------------------------
255 //
256 //   Destructor
257 //
258 //--------------------------------------------------------------------------
~RegexPattern()259 RegexPattern::~RegexPattern() {
260     zap();
261 }
262 
263 
264 //--------------------------------------------------------------------------
265 //
266 //   Clone
267 //
268 //--------------------------------------------------------------------------
clone() const269 RegexPattern  *RegexPattern::clone() const {
270     RegexPattern  *copy = new RegexPattern(*this);
271     return copy;
272 }
273 
274 
275 //--------------------------------------------------------------------------
276 //
277 //   operator ==   (comparison)    Consider to patterns to be == if the
278 //                                 pattern strings and the flags are the same.
279 //                                 Note that pattern strings with the same
280 //                                 characters can still be considered different.
281 //
282 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const283 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
284     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
285         if (this->fPatternString != NULL && other.fPatternString != NULL) {
286             return *(this->fPatternString) == *(other.fPatternString);
287         } else if (this->fPattern == NULL) {
288             if (other.fPattern == NULL) {
289                 return TRUE;
290             }
291         } else if (other.fPattern != NULL) {
292             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
293             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
294             return utext_equals(this->fPattern, other.fPattern);
295         }
296     }
297     return FALSE;
298 }
299 
300 //---------------------------------------------------------------------
301 //
302 //   compile
303 //
304 //---------------------------------------------------------------------
305 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)306 RegexPattern::compile(const UnicodeString &regex,
307                       uint32_t             flags,
308                       UParseError          &pe,
309                       UErrorCode           &status)
310 {
311     if (U_FAILURE(status)) {
312         return NULL;
313     }
314 
315     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
316     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
317     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
318 
319     if ((flags & ~allFlags) != 0) {
320         status = U_REGEX_INVALID_FLAG;
321         return NULL;
322     }
323 
324     if ((flags & UREGEX_CANON_EQ) != 0) {
325         status = U_REGEX_UNIMPLEMENTED;
326         return NULL;
327     }
328 
329     RegexPattern *This = new RegexPattern;
330     if (This == NULL) {
331         status = U_MEMORY_ALLOCATION_ERROR;
332         return NULL;
333     }
334     if (U_FAILURE(This->fDeferredStatus)) {
335         status = This->fDeferredStatus;
336         delete This;
337         return NULL;
338     }
339     This->fFlags = flags;
340 
341     RegexCompile     compiler(This, status);
342     compiler.compile(regex, pe, status);
343 
344     if (U_FAILURE(status)) {
345         delete This;
346         This = NULL;
347     }
348 
349     return This;
350 }
351 
352 
353 //
354 //   compile, UText mode
355 //
356 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)357 RegexPattern::compile(UText                *regex,
358                       uint32_t             flags,
359                       UParseError          &pe,
360                       UErrorCode           &status)
361 {
362     if (U_FAILURE(status)) {
363         return NULL;
364     }
365 
366     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
367                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
368                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
369 
370     if ((flags & ~allFlags) != 0) {
371         status = U_REGEX_INVALID_FLAG;
372         return NULL;
373     }
374 
375     if ((flags & UREGEX_CANON_EQ) != 0) {
376         status = U_REGEX_UNIMPLEMENTED;
377         return NULL;
378     }
379 
380     RegexPattern *This = new RegexPattern;
381     if (This == NULL) {
382         status = U_MEMORY_ALLOCATION_ERROR;
383         return NULL;
384     }
385     if (U_FAILURE(This->fDeferredStatus)) {
386         status = This->fDeferredStatus;
387         delete This;
388         return NULL;
389     }
390     This->fFlags = flags;
391 
392     RegexCompile     compiler(This, status);
393     compiler.compile(regex, pe, status);
394 
395     if (U_FAILURE(status)) {
396         delete This;
397         This = NULL;
398     }
399 
400     return This;
401 }
402 
403 //
404 //   compile with default flags.
405 //
406 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)407 RegexPattern::compile(const UnicodeString &regex,
408                       UParseError         &pe,
409                       UErrorCode          &err)
410 {
411     return compile(regex, 0, pe, err);
412 }
413 
414 
415 //
416 //   compile with default flags, UText mode
417 //
418 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)419 RegexPattern::compile(UText               *regex,
420                       UParseError         &pe,
421                       UErrorCode          &err)
422 {
423     return compile(regex, 0, pe, err);
424 }
425 
426 
427 //
428 //   compile with no UParseErr parameter.
429 //
430 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)431 RegexPattern::compile(const UnicodeString &regex,
432                       uint32_t             flags,
433                       UErrorCode          &err)
434 {
435     UParseError pe;
436     return compile(regex, flags, pe, err);
437 }
438 
439 
440 //
441 //   compile with no UParseErr parameter, UText mode
442 //
443 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)444 RegexPattern::compile(UText                *regex,
445                       uint32_t             flags,
446                       UErrorCode           &err)
447 {
448     UParseError pe;
449     return compile(regex, flags, pe, err);
450 }
451 
452 
453 //---------------------------------------------------------------------
454 //
455 //   flags
456 //
457 //---------------------------------------------------------------------
flags() const458 uint32_t RegexPattern::flags() const {
459     return fFlags;
460 }
461 
462 
463 //---------------------------------------------------------------------
464 //
465 //   matcher(UnicodeString, err)
466 //
467 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const468 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
469                                     UErrorCode          &status)  const {
470     RegexMatcher    *retMatcher = matcher(status);
471     if (retMatcher != NULL) {
472         retMatcher->fDeferredStatus = status;
473         retMatcher->reset(input);
474     }
475     return retMatcher;
476 }
477 
478 
479 //---------------------------------------------------------------------
480 //
481 //   matcher(status)
482 //
483 //---------------------------------------------------------------------
matcher(UErrorCode & status) const484 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
485     RegexMatcher    *retMatcher = NULL;
486 
487     if (U_FAILURE(status)) {
488         return NULL;
489     }
490     if (U_FAILURE(fDeferredStatus)) {
491         status = fDeferredStatus;
492         return NULL;
493     }
494 
495     retMatcher = new RegexMatcher(this);
496     if (retMatcher == NULL) {
497         status = U_MEMORY_ALLOCATION_ERROR;
498         return NULL;
499     }
500     return retMatcher;
501 }
502 
503 
504 
505 //---------------------------------------------------------------------
506 //
507 //   matches        Convenience function to test for a match, starting
508 //                  with a pattern string and a data string.
509 //
510 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)511 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
512               const UnicodeString   &input,
513                     UParseError     &pe,
514                     UErrorCode      &status) {
515 
516     if (U_FAILURE(status)) {return FALSE;}
517 
518     UBool         retVal;
519     RegexPattern *pat     = NULL;
520     RegexMatcher *matcher = NULL;
521 
522     pat     = RegexPattern::compile(regex, 0, pe, status);
523     matcher = pat->matcher(input, status);
524     retVal  = matcher->matches(status);
525 
526     delete matcher;
527     delete pat;
528     return retVal;
529 }
530 
531 
532 //
533 //   matches, UText mode
534 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)535 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
536                     UText           *input,
537                     UParseError     &pe,
538                     UErrorCode      &status) {
539 
540     if (U_FAILURE(status)) {return FALSE;}
541 
542     UBool         retVal  = FALSE;
543     RegexPattern *pat     = NULL;
544     RegexMatcher *matcher = NULL;
545 
546     pat     = RegexPattern::compile(regex, 0, pe, status);
547     matcher = pat->matcher(status);
548     if (U_SUCCESS(status)) {
549         matcher->reset(input);
550         retVal  = matcher->matches(status);
551     }
552 
553     delete matcher;
554     delete pat;
555     return retVal;
556 }
557 
558 
559 
560 
561 
562 //---------------------------------------------------------------------
563 //
564 //   pattern
565 //
566 //---------------------------------------------------------------------
pattern() const567 UnicodeString RegexPattern::pattern() const {
568     if (fPatternString != NULL) {
569         return *fPatternString;
570     } else if (fPattern == NULL) {
571         return UnicodeString();
572     } else {
573         UErrorCode status = U_ZERO_ERROR;
574         int64_t nativeLen = utext_nativeLength(fPattern);
575         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
576         UnicodeString result;
577 
578         status = U_ZERO_ERROR;
579         UChar *resultChars = result.getBuffer(len16);
580         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
581         result.releaseBuffer(len16);
582 
583         return result;
584     }
585 }
586 
587 
588 
589 
590 //---------------------------------------------------------------------
591 //
592 //   patternText
593 //
594 //---------------------------------------------------------------------
patternText(UErrorCode & status) const595 UText *RegexPattern::patternText(UErrorCode      &status) const {
596     if (U_FAILURE(status)) {return NULL;}
597     status = U_ZERO_ERROR;
598 
599     if (fPattern != NULL) {
600         return fPattern;
601     } else {
602         RegexStaticSets::initGlobals(&status);
603         return RegexStaticSets::gStaticSets->fEmptyText;
604     }
605 }
606 
607 
608 //--------------------------------------------------------------------------------
609 //
610 //  groupNumberFromName()
611 //
612 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const613 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
614     if (U_FAILURE(status)) {
615         return 0;
616     }
617 
618     // No need to explicitly check for syntactically valid names.
619     // Invalid ones will never be in the map, and the lookup will fail.
620 
621     int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
622     if (number == 0) {
623         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
624     }
625     return number;
626 }
627 
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const628 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
629     if (U_FAILURE(status)) {
630         return 0;
631     }
632     UnicodeString name(groupName, nameLength, US_INV);
633     return groupNumberFromName(name, status);
634 }
635 
636 
637 //---------------------------------------------------------------------
638 //
639 //   split
640 //
641 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const642 int32_t  RegexPattern::split(const UnicodeString &input,
643         UnicodeString    dest[],
644         int32_t          destCapacity,
645         UErrorCode      &status) const
646 {
647     if (U_FAILURE(status)) {
648         return 0;
649     };
650 
651     RegexMatcher  m(this);
652     int32_t r = 0;
653     // Check m's status to make sure all is ok.
654     if (U_SUCCESS(m.fDeferredStatus)) {
655     	r = m.split(input, dest, destCapacity, status);
656     }
657     return r;
658 }
659 
660 //
661 //   split, UText mode
662 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const663 int32_t  RegexPattern::split(UText *input,
664         UText           *dest[],
665         int32_t          destCapacity,
666         UErrorCode      &status) const
667 {
668     if (U_FAILURE(status)) {
669         return 0;
670     };
671 
672     RegexMatcher  m(this);
673     int32_t r = 0;
674     // Check m's status to make sure all is ok.
675     if (U_SUCCESS(m.fDeferredStatus)) {
676     	r = m.split(input, dest, destCapacity, status);
677     }
678     return r;
679 }
680 
681 
682 //---------------------------------------------------------------------
683 //
684 //   dump    Output the compiled form of the pattern.
685 //           Debugging function only.
686 //
687 //---------------------------------------------------------------------
dumpOp(int32_t index) const688 void   RegexPattern::dumpOp(int32_t index) const {
689     (void)index;  // Suppress warnings in non-debug build.
690 #if defined(REGEX_DEBUG)
691     static const char * const opNames[] = {URX_OPCODE_NAMES};
692     int32_t op          = fCompiledPat->elementAti(index);
693     int32_t val         = URX_VAL(op);
694     int32_t type        = URX_TYPE(op);
695     int32_t pinnedType  = type;
696     if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
697         pinnedType = 0;
698     }
699 
700     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
701     switch (type) {
702     case URX_NOP:
703     case URX_DOTANY:
704     case URX_DOTANY_ALL:
705     case URX_FAIL:
706     case URX_CARET:
707     case URX_DOLLAR:
708     case URX_BACKSLASH_G:
709     case URX_BACKSLASH_X:
710     case URX_END:
711     case URX_DOLLAR_M:
712     case URX_CARET_M:
713         // Types with no operand field of interest.
714         break;
715 
716     case URX_RESERVED_OP:
717     case URX_START_CAPTURE:
718     case URX_END_CAPTURE:
719     case URX_STATE_SAVE:
720     case URX_JMP:
721     case URX_JMP_SAV:
722     case URX_JMP_SAV_X:
723     case URX_BACKSLASH_B:
724     case URX_BACKSLASH_BU:
725     case URX_BACKSLASH_D:
726     case URX_BACKSLASH_Z:
727     case URX_STRING_LEN:
728     case URX_CTR_INIT:
729     case URX_CTR_INIT_NG:
730     case URX_CTR_LOOP:
731     case URX_CTR_LOOP_NG:
732     case URX_RELOC_OPRND:
733     case URX_STO_SP:
734     case URX_LD_SP:
735     case URX_BACKREF:
736     case URX_STO_INP_LOC:
737     case URX_JMPX:
738     case URX_LA_START:
739     case URX_LA_END:
740     case URX_BACKREF_I:
741     case URX_LB_START:
742     case URX_LB_CONT:
743     case URX_LB_END:
744     case URX_LBN_CONT:
745     case URX_LBN_END:
746     case URX_LOOP_C:
747     case URX_LOOP_DOT_I:
748     case URX_BACKSLASH_H:
749     case URX_BACKSLASH_R:
750     case URX_BACKSLASH_V:
751         // types with an integer operand field.
752         printf("%d", val);
753         break;
754 
755     case URX_ONECHAR:
756     case URX_ONECHAR_I:
757         if (val < 0x20) {
758             printf("%#x", val);
759         } else {
760             printf("'%s'", CStr(UnicodeString(val))());
761         }
762         break;
763 
764     case URX_STRING:
765     case URX_STRING_I:
766         {
767             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
768             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
769             int32_t length = URX_VAL(lengthOp);
770             UnicodeString str(fLiteralText, val, length);
771             printf("%s", CStr(str)());
772         }
773         break;
774 
775     case URX_SETREF:
776     case URX_LOOP_SR_I:
777         {
778             UnicodeString s;
779             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
780             set->toPattern(s, TRUE);
781             printf("%s", CStr(s)());
782         }
783         break;
784 
785     case URX_STATIC_SETREF:
786     case URX_STAT_SETREF_N:
787         {
788             UnicodeString s;
789             if (val & URX_NEG_SET) {
790                 printf("NOT ");
791                 val &= ~URX_NEG_SET;
792             }
793             UnicodeSet *set = fStaticSets[val];
794             set->toPattern(s, TRUE);
795             printf("%s", CStr(s)());
796         }
797         break;
798 
799 
800     default:
801         printf("??????");
802         break;
803     }
804     printf("\n");
805 #endif
806 }
807 
808 
dumpPattern() const809 void RegexPattern::dumpPattern() const {
810 #if defined(REGEX_DEBUG)
811     int      index;
812 
813     UnicodeString patStr;
814     for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
815         patStr.append(c);
816     }
817     printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
818     printf("   Min Match Length:  %d\n", fMinMatchLen);
819     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
820     if (fStartType == START_STRING) {
821         UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
822         printf("   Initial match string: \"%s\"\n", CStr(initialString)());
823     } else if (fStartType == START_SET) {
824         UnicodeString s;
825         fInitialChars->toPattern(s, TRUE);
826         printf("    Match First Chars: %s\n", CStr(s)());
827 
828     } else if (fStartType == START_CHAR) {
829         printf("    First char of Match: ");
830         if (fInitialChar > 0x20) {
831                 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
832             } else {
833                 printf("%#x\n", fInitialChar);
834             }
835     }
836 
837     printf("Named Capture Groups:\n");
838     if (uhash_count(fNamedCaptureMap) == 0) {
839         printf("   None\n");
840     } else {
841         int32_t pos = UHASH_FIRST;
842         const UHashElement *el = NULL;
843         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
844             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
845             int32_t number = el->value.integer;
846             printf("   %d\t%s\n", number, CStr(*name)());
847         }
848     }
849 
850     printf("\nIndex   Binary     Type             Operand\n" \
851            "-------------------------------------------\n");
852     for (index = 0; index<fCompiledPat->size(); index++) {
853         dumpOp(index);
854     }
855     printf("\n\n");
856 #endif
857 }
858 
859 
860 
861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
862 
863 U_NAMESPACE_END
864 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
865