1 //
2 //   Copyright (C) 2012 International Business Machines Corporation
3 //   and others. All rights reserved.
4 //
5 //   file:  regeximp.cpp
6 //
7 //           ICU Regular Expressions,
8 //             miscellaneous implementation functions.
9 //
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 #include "regeximp.h"
15 #include "unicode/utf16.h"
16 
17 U_NAMESPACE_BEGIN
18 
CaseFoldingUTextIterator(UText & text)19 CaseFoldingUTextIterator::CaseFoldingUTextIterator(UText &text) :
20    fUText(text), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) {
21    fcsp = ucase_getSingleton();
22 }
23 
~CaseFoldingUTextIterator()24 CaseFoldingUTextIterator::~CaseFoldingUTextIterator() {}
25 
next()26 UChar32 CaseFoldingUTextIterator::next() {
27     UChar32  foldedC;
28     UChar32  originalC;
29     if (fFoldChars == NULL) {
30         // We are not in a string folding of an earlier character.
31         // Start handling the next char from the input UText.
32         originalC = UTEXT_NEXT32(&fUText);
33         if (originalC == U_SENTINEL) {
34             return originalC;
35         }
36         fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
37         if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
38             // input code point folds to a single code point, possibly itself.
39             // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
40             if (fFoldLength < 0) {
41                 fFoldLength = ~fFoldLength;
42             }
43             foldedC = (UChar32)fFoldLength;
44             fFoldChars = NULL;
45             return foldedC;
46         }
47         // String foldings fall through here.
48         fFoldIndex = 0;
49     }
50 
51     U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
52     if (fFoldIndex >= fFoldLength) {
53         fFoldChars = NULL;
54     }
55     return foldedC;
56 }
57 
58 
inExpansion()59 UBool CaseFoldingUTextIterator::inExpansion() {
60     return fFoldChars != NULL;
61 }
62 
63 
64 
CaseFoldingUCharIterator(const UChar * chars,int64_t start,int64_t limit)65 CaseFoldingUCharIterator::CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit) :
66    fChars(chars), fIndex(start), fLimit(limit), fcsp(NULL), fFoldChars(NULL), fFoldLength(0) {
67    fcsp = ucase_getSingleton();
68 }
69 
70 
~CaseFoldingUCharIterator()71 CaseFoldingUCharIterator::~CaseFoldingUCharIterator() {}
72 
73 
next()74 UChar32 CaseFoldingUCharIterator::next() {
75     UChar32  foldedC;
76     UChar32  originalC;
77     if (fFoldChars == NULL) {
78         // We are not in a string folding of an earlier character.
79         // Start handling the next char from the input UText.
80         if (fIndex >= fLimit) {
81             return U_SENTINEL;
82         }
83         U16_NEXT(fChars, fIndex, fLimit, originalC);
84 
85         fFoldLength = ucase_toFullFolding(fcsp, originalC, &fFoldChars, U_FOLD_CASE_DEFAULT);
86         if (fFoldLength >= UCASE_MAX_STRING_LENGTH || fFoldLength < 0) {
87             // input code point folds to a single code point, possibly itself.
88             // See comment in ucase.h for explanation of return values from ucase_toFullFoldings.
89             if (fFoldLength < 0) {
90                 fFoldLength = ~fFoldLength;
91             }
92             foldedC = (UChar32)fFoldLength;
93             fFoldChars = NULL;
94             return foldedC;
95         }
96         // String foldings fall through here.
97         fFoldIndex = 0;
98     }
99 
100     U16_NEXT(fFoldChars, fFoldIndex, fFoldLength, foldedC);
101     if (fFoldIndex >= fFoldLength) {
102         fFoldChars = NULL;
103     }
104     return foldedC;
105 }
106 
107 
inExpansion()108 UBool CaseFoldingUCharIterator::inExpansion() {
109     return fFoldChars != NULL;
110 }
111 
getIndex()112 int64_t CaseFoldingUCharIterator::getIndex() {
113     return fIndex;
114 }
115 
116 
117 U_NAMESPACE_END
118 
119 #endif
120 
121