1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2001-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  casetrn.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2004sep03
14 *   created by: Markus W. Scherer
15 *
16 *   Implementation class for lower-/upper-/title-casing transliterators.
17 */
18 
19 #include "unicode/utypes.h"
20 
21 #if !UCONFIG_NO_TRANSLITERATION
22 
23 #include "unicode/uchar.h"
24 #include "unicode/ustring.h"
25 #include "unicode/utf.h"
26 #include "unicode/utf16.h"
27 #include "tolowtrn.h"
28 #include "ucase.h"
29 #include "cpputils.h"
30 
31 /* case context iterator using a Replaceable */
32 U_CFUNC UChar32 U_CALLCONV
utrans_rep_caseContextIterator(void * context,int8_t dir)33 utrans_rep_caseContextIterator(void *context, int8_t dir)
34 {
35     U_NAMESPACE_USE
36 
37     UCaseContext *csc=(UCaseContext *)context;
38     Replaceable *rep=(Replaceable *)csc->p;
39     UChar32 c;
40 
41     if(dir<0) {
42         /* reset for backward iteration */
43         csc->index=csc->cpStart;
44         csc->dir=dir;
45     } else if(dir>0) {
46         /* reset for forward iteration */
47         csc->index=csc->cpLimit;
48         csc->dir=dir;
49     } else {
50         /* continue current iteration direction */
51         dir=csc->dir;
52     }
53 
54     // automatically adjust start and limit if the Replaceable disagrees
55     // with the original values
56     if(dir<0) {
57         if(csc->start<csc->index) {
58             c=rep->char32At(csc->index-1);
59             if(c<0) {
60                 csc->start=csc->index;
61             } else {
62                 csc->index-=U16_LENGTH(c);
63                 return c;
64             }
65         }
66     } else {
67         // detect, and store in csc->b1, if we hit the limit
68         if(csc->index<csc->limit) {
69             c=rep->char32At(csc->index);
70             if(c<0) {
71                 csc->limit=csc->index;
72                 csc->b1=TRUE;
73             } else {
74                 csc->index+=U16_LENGTH(c);
75                 return c;
76             }
77         } else {
78             csc->b1=TRUE;
79         }
80     }
81     return U_SENTINEL;
82 }
83 
84 U_NAMESPACE_BEGIN
85 
UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)86 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
87 
88 /**
89  * Constructs a transliterator.
90  */
91 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) :
92     Transliterator(id, 0),
93     fCsp(ucase_getSingleton()),
94     fMap(map)
95 {
96     // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
97     // TODO need to call setMaximumContextLength()?!
98 }
99 
100 /**
101  * Destructor.
102  */
~CaseMapTransliterator()103 CaseMapTransliterator::~CaseMapTransliterator() {
104 }
105 
106 /**
107  * Copy constructor.
108  */
CaseMapTransliterator(const CaseMapTransliterator & o)109 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
110     Transliterator(o),
111     fCsp(o.fCsp), fMap(o.fMap)
112 {
113 }
114 
115 /**
116  * Assignment operator.
117  */
118 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
119     Transliterator::operator=(o);
120     fCsp = o.fCsp;
121     fMap = o.fMap;
122     return *this;
123 }*/
124 
125 /**
126  * Transliterator API.
127  */
128 /*Transliterator* CaseMapTransliterator::clone(void) const {
129     return new CaseMapTransliterator(*this);
130 }*/
131 
132 /**
133  * Implements {@link Transliterator#handleTransliterate}.
134  */
handleTransliterate(Replaceable & text,UTransPosition & offsets,UBool isIncremental) const135 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
136                                  UTransPosition& offsets,
137                                  UBool isIncremental) const
138 {
139     if (offsets.start >= offsets.limit) {
140         return;
141     }
142 
143     UCaseContext csc;
144     uprv_memset(&csc, 0, sizeof(csc));
145     csc.p = &text;
146     csc.start = offsets.contextStart;
147     csc.limit = offsets.contextLimit;
148 
149     UnicodeString tmp;
150     const UChar *s;
151     UChar32 c;
152     int32_t textPos, delta, result, locCache=0;
153 
154     for(textPos=offsets.start; textPos<offsets.limit;) {
155         csc.cpStart=textPos;
156         c=text.char32At(textPos);
157         csc.cpLimit=textPos+=U16_LENGTH(c);
158 
159         result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
160 
161         if(csc.b1 && isIncremental) {
162             // fMap() tried to look beyond the context limit
163             // wait for more input
164             offsets.start=csc.cpStart;
165             return;
166         }
167 
168         if(result>=0) {
169             // replace the current code point with its full case mapping result
170             // see UCASE_MAX_STRING_LENGTH
171             if(result<=UCASE_MAX_STRING_LENGTH) {
172                 // string s[result]
173                 tmp.setTo(FALSE, s, result);
174                 delta=result-U16_LENGTH(c);
175             } else {
176                 // single code point
177                 tmp.setTo(result);
178                 delta=tmp.length()-U16_LENGTH(c);
179             }
180             text.handleReplaceBetween(csc.cpStart, textPos, tmp);
181             if(delta!=0) {
182                 textPos+=delta;
183                 csc.limit=offsets.contextLimit+=delta;
184                 offsets.limit+=delta;
185             }
186         }
187     }
188     offsets.start=textPos;
189 }
190 
191 U_NAMESPACE_END
192 
193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
194