• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  ucasemap.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005may06
16 *   created by: Markus W. Scherer
17 *
18 *   Case mapping service object and functions using it.
19 */
20 
21 #include "unicode/utypes.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucasemap.h"
27 #if !UCONFIG_NO_BREAK_ITERATION
28 #include "unicode/utext.h"
29 #endif
30 #include "unicode/utf.h"
31 #include "unicode/utf8.h"
32 #include "unicode/utf16.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "ucase.h"
36 #include "ustr_imp.h"
37 
38 U_NAMESPACE_USE
39 
40 /* UCaseMap service object -------------------------------------------------- */
41 
42 U_CAPI UCaseMap * U_EXPORT2
ucasemap_open(const char * locale,uint32_t options,UErrorCode * pErrorCode)43 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
44     UCaseMap *csm;
45 
46     if(U_FAILURE(*pErrorCode)) {
47         return NULL;
48     }
49 
50     csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap));
51     if(csm==NULL) {
52         return NULL;
53     }
54     uprv_memset(csm, 0, sizeof(UCaseMap));
55 
56     csm->csp=ucase_getSingleton();
57     ucasemap_setLocale(csm, locale, pErrorCode);
58     if(U_FAILURE(*pErrorCode)) {
59         uprv_free(csm);
60         return NULL;
61     }
62 
63     csm->options=options;
64     return csm;
65 }
66 
67 U_CAPI void U_EXPORT2
ucasemap_close(UCaseMap * csm)68 ucasemap_close(UCaseMap *csm) {
69     if(csm!=NULL) {
70 #if !UCONFIG_NO_BREAK_ITERATION
71         // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
72         delete reinterpret_cast<BreakIterator *>(csm->iter);
73 #endif
74         uprv_free(csm);
75     }
76 }
77 
78 U_CAPI const char * U_EXPORT2
ucasemap_getLocale(const UCaseMap * csm)79 ucasemap_getLocale(const UCaseMap *csm) {
80     return csm->locale;
81 }
82 
83 U_CAPI uint32_t U_EXPORT2
ucasemap_getOptions(const UCaseMap * csm)84 ucasemap_getOptions(const UCaseMap *csm) {
85     return csm->options;
86 }
87 
88 U_CAPI void U_EXPORT2
ucasemap_setLocale(UCaseMap * csm,const char * locale,UErrorCode * pErrorCode)89 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
90     int32_t length;
91 
92     if(U_FAILURE(*pErrorCode)) {
93         return;
94     }
95 
96     length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
97     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
98         *pErrorCode=U_ZERO_ERROR;
99         /* we only really need the language code for case mappings */
100         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
101     }
102     if(length==sizeof(csm->locale)) {
103         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
104     }
105     csm->locCache=0;
106     if(U_SUCCESS(*pErrorCode)) {
107         ucase_getCaseLocale(csm->locale, &csm->locCache);
108     } else {
109         csm->locale[0]=0;
110     }
111 }
112 
113 U_CAPI void U_EXPORT2
ucasemap_setOptions(UCaseMap * csm,uint32_t options,UErrorCode *)114 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) {
115     csm->options=options;
116 }
117 
118 /* UTF-8 string case mappings ----------------------------------------------- */
119 
120 /* TODO(markus): Move to a new, separate utf8case.c file. */
121 
122 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
123 static inline int32_t
appendResult(uint8_t * dest,int32_t destIndex,int32_t destCapacity,int32_t result,const UChar * s)124 appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
125              int32_t result, const UChar *s) {
126     UChar32 c;
127     int32_t length;
128     UErrorCode errorCode;
129 
130     /* decode the result */
131     if(result<0) {
132         /* (not) original code point */
133         c=~result;
134         length=U8_LENGTH(c);
135     } else if(result<=UCASE_MAX_STRING_LENGTH) {
136         c=U_SENTINEL;
137         length=result;
138     } else {
139         c=result;
140         length=U8_LENGTH(c);
141     }
142     if(length>(INT32_MAX-destIndex)) {
143         return -1;  // integer overflow
144     }
145 
146     if(destIndex<destCapacity) {
147         /* append the result */
148         if(c>=0) {
149             /* code point */
150             UBool isError=FALSE;
151             U8_APPEND(dest, destIndex, destCapacity, c, isError);
152             if(isError) {
153                 /* overflow, nothing written */
154                 destIndex+=length;
155             }
156         } else {
157             /* string */
158             int32_t destLength;
159             errorCode=U_ZERO_ERROR;
160             u_strToUTF8(
161                 (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
162                 s, length,
163                 &errorCode);
164             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
165                 return -1;
166             }
167             if(destLength>(INT32_MAX-destIndex)) {
168                 return -1;  // integer overflow
169             }
170             destIndex+=destLength;
171             /* we might have an overflow, but we know the actual length */
172         }
173     } else {
174         /* preflight */
175         if(c>=0) {
176             destIndex+=length;
177         } else {
178             int32_t destLength;
179             errorCode=U_ZERO_ERROR;
180             u_strToUTF8(
181                 NULL, 0, &destLength,
182                 s, length,
183                 &errorCode);
184             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
185                 return -1;
186             }
187             if(destLength>(INT32_MAX-destIndex)) {
188                 return -1;  // integer overflow
189             }
190             destIndex+=destLength;
191         }
192     }
193     return destIndex;
194 }
195 
196 static inline int32_t
appendUChar(uint8_t * dest,int32_t destIndex,int32_t destCapacity,UChar c)197 appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
198     int32_t length=U8_LENGTH(c);
199     if(length>(INT32_MAX-destIndex)) {
200         return -1;  // integer overflow
201     }
202     int32_t limit=destIndex+length;
203     if(limit<=destCapacity) {
204         U8_APPEND_UNSAFE(dest, destIndex, c);
205     }
206     return limit;
207 }
208 
209 static inline int32_t
appendString(uint8_t * dest,int32_t destIndex,int32_t destCapacity,const uint8_t * s,int32_t length)210 appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
211              const uint8_t *s, int32_t length) {
212     if(length>0) {
213         if(length>(INT32_MAX-destIndex)) {
214             return -1;  // integer overflow
215         }
216         if((destIndex+length)<=destCapacity) {
217             uprv_memcpy(dest+destIndex, s, length);
218         }
219         destIndex+=length;
220     }
221     return destIndex;
222 }
223 
224 static UChar32 U_CALLCONV
utf8_caseContextIterator(void * context,int8_t dir)225 utf8_caseContextIterator(void *context, int8_t dir) {
226     UCaseContext *csc=(UCaseContext *)context;
227     UChar32 c;
228 
229     if(dir<0) {
230         /* reset for backward iteration */
231         csc->index=csc->cpStart;
232         csc->dir=dir;
233     } else if(dir>0) {
234         /* reset for forward iteration */
235         csc->index=csc->cpLimit;
236         csc->dir=dir;
237     } else {
238         /* continue current iteration direction */
239         dir=csc->dir;
240     }
241 
242     if(dir<0) {
243         if(csc->start<csc->index) {
244             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
245             return c;
246         }
247     } else {
248         if(csc->index<csc->limit) {
249             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
250             return c;
251         }
252     }
253     return U_SENTINEL;
254 }
255 
256 /*
257  * Case-maps [srcStart..srcLimit[ but takes
258  * context [0..srcLength[ into account.
259  */
260 static int32_t
_caseMap(const UCaseMap * csm,UCaseMapFull * map,uint8_t * dest,int32_t destCapacity,const uint8_t * src,UCaseContext * csc,int32_t srcStart,int32_t srcLimit,UErrorCode * pErrorCode)261 _caseMap(const UCaseMap *csm, UCaseMapFull *map,
262          uint8_t *dest, int32_t destCapacity,
263          const uint8_t *src, UCaseContext *csc,
264          int32_t srcStart, int32_t srcLimit,
265          UErrorCode *pErrorCode) {
266     const UChar *s = NULL;
267     UChar32 c, c2 = 0;
268     int32_t srcIndex, destIndex;
269     int32_t locCache;
270 
271     locCache=csm->locCache;
272 
273     /* case mapping loop */
274     srcIndex=srcStart;
275     destIndex=0;
276     while(srcIndex<srcLimit) {
277         csc->cpStart=srcIndex;
278         U8_NEXT(src, srcIndex, srcLimit, c);
279         csc->cpLimit=srcIndex;
280         if(c<0) {
281             // Malformed UTF-8.
282             destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
283             if(destIndex<0) {
284                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
285                 return 0;
286             }
287             continue;
288         }
289         c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
290         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
291             /* fast path version of appendResult() for ASCII results */
292             dest[destIndex++]=(uint8_t)c2;
293         } else {
294             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
295             if(destIndex<0) {
296                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
297                 return 0;
298             }
299         }
300     }
301 
302     if(destIndex>destCapacity) {
303         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
304     }
305     return destIndex;
306 }
307 
308 #if !UCONFIG_NO_BREAK_ITERATION
309 
310 U_CFUNC int32_t U_CALLCONV
ucasemap_internalUTF8ToTitle(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UErrorCode * pErrorCode)311 ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
312          uint8_t *dest, int32_t destCapacity,
313          const uint8_t *src, int32_t srcLength,
314          UErrorCode *pErrorCode) {
315     const UChar *s;
316     UChar32 c;
317     int32_t prev, titleStart, titleLimit, idx, destIndex;
318     UBool isFirstIndex;
319 
320     if(U_FAILURE(*pErrorCode)) {
321         return 0;
322     }
323 
324     // Use the C++ abstract base class to minimize dependencies.
325     // TODO: Change UCaseMap.iter to store a BreakIterator directly.
326     BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
327 
328     /* set up local variables */
329     int32_t locCache=csm->locCache;
330     UCaseContext csc=UCASECONTEXT_INITIALIZER;
331     csc.p=(void *)src;
332     csc.limit=srcLength;
333     destIndex=0;
334     prev=0;
335     isFirstIndex=TRUE;
336 
337     /* titlecasing loop */
338     while(prev<srcLength) {
339         /* find next index where to titlecase */
340         if(isFirstIndex) {
341             isFirstIndex=FALSE;
342             idx=bi->first();
343         } else {
344             idx=bi->next();
345         }
346         if(idx==UBRK_DONE || idx>srcLength) {
347             idx=srcLength;
348         }
349 
350         /*
351          * Unicode 4 & 5 section 3.13 Default Case Operations:
352          *
353          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
354          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
355          * cased character F. If F exists, map F to default_title(F); then map each
356          * subsequent character C to default_lower(C).
357          *
358          * In this implementation, segment [prev..index[ into 3 parts:
359          * a) uncased characters (copy as-is) [prev..titleStart[
360          * b) first case letter (titlecase)         [titleStart..titleLimit[
361          * c) subsequent characters (lowercase)                 [titleLimit..index[
362          */
363         if(prev<idx) {
364             /* find and copy uncased characters [prev..titleStart[ */
365             titleStart=titleLimit=prev;
366             U8_NEXT(src, titleLimit, idx, c);
367             if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
368                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
369                 for(;;) {
370                     titleStart=titleLimit;
371                     if(titleLimit==idx) {
372                         /*
373                          * only uncased characters in [prev..index[
374                          * stop with titleStart==titleLimit==index
375                          */
376                         break;
377                     }
378                     U8_NEXT(src, titleLimit, idx, c);
379                     if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
380                         break; /* cased letter at [titleStart..titleLimit[ */
381                     }
382                 }
383                 destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
384                 if(destIndex<0) {
385                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
386                     return 0;
387                 }
388             }
389 
390             if(titleStart<titleLimit) {
391                 /* titlecase c which is from [titleStart..titleLimit[ */
392                 if(c>=0) {
393                     csc.cpStart=titleStart;
394                     csc.cpLimit=titleLimit;
395                     c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
396                     destIndex=appendResult(dest, destIndex, destCapacity, c, s);
397                 } else {
398                     // Malformed UTF-8.
399                     destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
400                 }
401                 if(destIndex<0) {
402                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
403                     return 0;
404                 }
405 
406                 /* Special case Dutch IJ titlecasing */
407                 if (titleStart+1 < idx &&
408                         ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
409                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
410                         (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
411                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
412                     titleLimit++;
413                 }
414                 /* lowercase [titleLimit..index[ */
415                 if(titleLimit<idx) {
416                     if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
417                         /* Normal operation: Lowercase the rest of the word. */
418                         destIndex+=
419                             _caseMap(
420                                 csm, ucase_toFullLower,
421                                 dest+destIndex, destCapacity-destIndex,
422                                 src, &csc,
423                                 titleLimit, idx,
424                                 pErrorCode);
425                         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
426                             *pErrorCode=U_ZERO_ERROR;
427                         }
428                         if(U_FAILURE(*pErrorCode)) {
429                             return destIndex;
430                         }
431                     } else {
432                         /* Optionally just copy the rest of the word unchanged. */
433                         destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
434                         if(destIndex<0) {
435                             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
436                             return 0;
437                         }
438                     }
439                 }
440             }
441         }
442 
443         prev=idx;
444     }
445 
446     if(destIndex>destCapacity) {
447         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
448     }
449     return destIndex;
450 }
451 
452 #endif
453 
454 U_NAMESPACE_BEGIN
455 namespace GreekUpper {
456 
isFollowedByCasedLetter(const UCaseProps * csp,const uint8_t * s,int32_t i,int32_t length)457 UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i, int32_t length) {
458     while (i < length) {
459         UChar32 c;
460         U8_NEXT(s, i, length, c);
461         int32_t type = ucase_getTypeOrIgnorable(csp, c);
462         if ((type & UCASE_IGNORABLE) != 0) {
463             // Case-ignorable, continue with the loop.
464         } else if (type != UCASE_NONE) {
465             return TRUE;  // Followed by cased letter.
466         } else {
467             return FALSE;  // Uncased and not case-ignorable.
468         }
469     }
470     return FALSE;  // Not followed by cased letter.
471 }
472 
473 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
toUpper(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UErrorCode * pErrorCode)474 int32_t toUpper(const UCaseMap *csm,
475                 uint8_t *dest, int32_t destCapacity,
476                 const uint8_t *src, int32_t srcLength,
477                 UErrorCode *pErrorCode) {
478     int32_t locCache = UCASE_LOC_GREEK;
479     int32_t destIndex=0;
480     uint32_t state = 0;
481     for (int32_t i = 0; i < srcLength;) {
482         int32_t nextIndex = i;
483         UChar32 c;
484         U8_NEXT(src, nextIndex, srcLength, c);
485         uint32_t nextState = 0;
486         int32_t type = ucase_getTypeOrIgnorable(csm->csp, c);
487         if ((type & UCASE_IGNORABLE) != 0) {
488             // c is case-ignorable
489             nextState |= (state & AFTER_CASED);
490         } else if (type != UCASE_NONE) {
491             // c is cased
492             nextState |= AFTER_CASED;
493         }
494         uint32_t data = getLetterData(c);
495         if (data > 0) {
496             uint32_t upper = data & UPPER_MASK;
497             // Add a dialytika to this iota or ypsilon vowel
498             // if we removed a tonos from the previous vowel,
499             // and that previous vowel did not also have (or gain) a dialytika.
500             // Adding one only to the final vowel in a longer sequence
501             // (which does not occur in normal writing) would require lookahead.
502             // Set the same flag as for preserving an existing dialytika.
503             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
504                     (upper == 0x399 || upper == 0x3A5)) {
505                 data |= HAS_DIALYTIKA;
506             }
507             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
508             if ((data & HAS_YPOGEGRAMMENI) != 0) {
509                 numYpogegrammeni = 1;
510             }
511             // Skip combining diacritics after this Greek letter.
512             int32_t nextNextIndex = nextIndex;
513             while (nextIndex < srcLength) {
514                 UChar32 c2;
515                 U8_NEXT(src, nextNextIndex, srcLength, c2);
516                 uint32_t diacriticData = getDiacriticData(c2);
517                 if (diacriticData != 0) {
518                     data |= diacriticData;
519                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
520                         ++numYpogegrammeni;
521                     }
522                     nextIndex = nextNextIndex;
523                 } else {
524                     break;  // not a Greek diacritic
525                 }
526             }
527             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
528                 nextState |= AFTER_VOWEL_WITH_ACCENT;
529             }
530             // Map according to Greek rules.
531             UBool addTonos = FALSE;
532             if (upper == 0x397 &&
533                     (data & HAS_ACCENT) != 0 &&
534                     numYpogegrammeni == 0 &&
535                     (state & AFTER_CASED) == 0 &&
536                     !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) {
537                 // Keep disjunctive "or" with (only) a tonos.
538                 // We use the same "word boundary" conditions as for the Final_Sigma test.
539                 if (i == nextIndex) {
540                     upper = 0x389;  // Preserve the precomposed form.
541                 } else {
542                     addTonos = TRUE;
543                 }
544             } else if ((data & HAS_DIALYTIKA) != 0) {
545                 // Preserve a vowel with dialytika in precomposed form if it exists.
546                 if (upper == 0x399) {
547                     upper = 0x3AA;
548                     data &= ~HAS_EITHER_DIALYTIKA;
549                 } else if (upper == 0x3A5) {
550                     upper = 0x3AB;
551                     data &= ~HAS_EITHER_DIALYTIKA;
552                 }
553             }
554             destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
555             if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
556                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
557             }
558             if (destIndex >= 0 && addTonos) {
559                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
560             }
561             while (destIndex >= 0 && numYpogegrammeni > 0) {
562                 destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
563                 --numYpogegrammeni;
564             }
565             if(destIndex<0) {
566                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
567                 return 0;
568             }
569         } else if(c>=0) {
570             const UChar *s;
571             UChar32 c2 = 0;
572             c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache);
573             if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
574                 /* fast path version of appendResult() for ASCII results */
575                 dest[destIndex++]=(uint8_t)c2;
576             } else {
577                 destIndex=appendResult(dest, destIndex, destCapacity, c, s);
578                 if(destIndex<0) {
579                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
580                     return 0;
581                 }
582             }
583         } else {
584             // Malformed UTF-8.
585             destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
586             if(destIndex<0) {
587                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
588                 return 0;
589             }
590         }
591         i = nextIndex;
592         state = nextState;
593     }
594 
595     if(destIndex>destCapacity) {
596         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
597     }
598     return destIndex;
599 }
600 
601 }  // namespace GreekUpper
602 U_NAMESPACE_END
603 
604 static int32_t U_CALLCONV
ucasemap_internalUTF8ToLower(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UErrorCode * pErrorCode)605 ucasemap_internalUTF8ToLower(const UCaseMap *csm,
606                              uint8_t *dest, int32_t destCapacity,
607                              const uint8_t *src, int32_t srcLength,
608                              UErrorCode *pErrorCode) {
609     UCaseContext csc=UCASECONTEXT_INITIALIZER;
610     csc.p=(void *)src;
611     csc.limit=srcLength;
612     return _caseMap(
613         csm, ucase_toFullLower,
614         dest, destCapacity,
615         src, &csc, 0, srcLength,
616         pErrorCode);
617 }
618 
619 static int32_t U_CALLCONV
ucasemap_internalUTF8ToUpper(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UErrorCode * pErrorCode)620 ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
621                              uint8_t *dest, int32_t destCapacity,
622                              const uint8_t *src, int32_t srcLength,
623                              UErrorCode *pErrorCode) {
624     int32_t locCache = csm->locCache;
625     if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
626         return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode);
627     }
628     UCaseContext csc=UCASECONTEXT_INITIALIZER;
629     csc.p=(void *)src;
630     csc.limit=srcLength;
631     return _caseMap(
632         csm, ucase_toFullUpper,
633         dest, destCapacity,
634         src, &csc, 0, srcLength,
635         pErrorCode);
636 }
637 
638 static int32_t
utf8_foldCase(const UCaseProps * csp,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,uint32_t options,UErrorCode * pErrorCode)639 utf8_foldCase(const UCaseProps *csp,
640               uint8_t *dest, int32_t destCapacity,
641               const uint8_t *src, int32_t srcLength,
642               uint32_t options,
643               UErrorCode *pErrorCode) {
644     int32_t srcIndex, destIndex;
645 
646     const UChar *s;
647     UChar32 c, c2;
648     int32_t start;
649 
650     /* case mapping loop */
651     srcIndex=destIndex=0;
652     while(srcIndex<srcLength) {
653         start=srcIndex;
654         U8_NEXT(src, srcIndex, srcLength, c);
655         if(c<0) {
656             // Malformed UTF-8.
657             destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
658             if(destIndex<0) {
659                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
660                 return 0;
661             }
662             continue;
663         }
664         c=ucase_toFullFolding(csp, c, &s, options);
665         if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
666             /* fast path version of appendResult() for ASCII results */
667             dest[destIndex++]=(uint8_t)c2;
668         } else {
669             destIndex=appendResult(dest, destIndex, destCapacity, c, s);
670             if(destIndex<0) {
671                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
672                 return 0;
673             }
674         }
675     }
676 
677     if(destIndex>destCapacity) {
678         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
679     }
680     return destIndex;
681 }
682 
683 static int32_t U_CALLCONV
ucasemap_internalUTF8Fold(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UErrorCode * pErrorCode)684 ucasemap_internalUTF8Fold(const UCaseMap *csm,
685                           uint8_t *dest, int32_t destCapacity,
686                           const uint8_t *src, int32_t srcLength,
687                           UErrorCode *pErrorCode) {
688     return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
689 }
690 
691 U_CFUNC int32_t
ucasemap_mapUTF8(const UCaseMap * csm,uint8_t * dest,int32_t destCapacity,const uint8_t * src,int32_t srcLength,UTF8CaseMapper * stringCaseMapper,UErrorCode * pErrorCode)692 ucasemap_mapUTF8(const UCaseMap *csm,
693                  uint8_t *dest, int32_t destCapacity,
694                  const uint8_t *src, int32_t srcLength,
695                  UTF8CaseMapper *stringCaseMapper,
696                  UErrorCode *pErrorCode) {
697     int32_t destLength;
698 
699     /* check argument values */
700     if(U_FAILURE(*pErrorCode)) {
701         return 0;
702     }
703     if( destCapacity<0 ||
704         (dest==NULL && destCapacity>0) ||
705         src==NULL ||
706         srcLength<-1
707     ) {
708         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
709         return 0;
710     }
711 
712     /* get the string length */
713     if(srcLength==-1) {
714         srcLength=(int32_t)uprv_strlen((const char *)src);
715     }
716 
717     /* check for overlapping source and destination */
718     if( dest!=NULL &&
719         ((src>=dest && src<(dest+destCapacity)) ||
720          (dest>=src && dest<(src+srcLength)))
721     ) {
722         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
723         return 0;
724     }
725 
726     destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
727     return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
728 }
729 
730 /* public API functions */
731 
732 U_CAPI int32_t U_EXPORT2
ucasemap_utf8ToLower(const UCaseMap * csm,char * dest,int32_t destCapacity,const char * src,int32_t srcLength,UErrorCode * pErrorCode)733 ucasemap_utf8ToLower(const UCaseMap *csm,
734                      char *dest, int32_t destCapacity,
735                      const char *src, int32_t srcLength,
736                      UErrorCode *pErrorCode) {
737     return ucasemap_mapUTF8(csm,
738                    (uint8_t *)dest, destCapacity,
739                    (const uint8_t *)src, srcLength,
740                    ucasemap_internalUTF8ToLower, pErrorCode);
741 }
742 
743 U_CAPI int32_t U_EXPORT2
ucasemap_utf8ToUpper(const UCaseMap * csm,char * dest,int32_t destCapacity,const char * src,int32_t srcLength,UErrorCode * pErrorCode)744 ucasemap_utf8ToUpper(const UCaseMap *csm,
745                      char *dest, int32_t destCapacity,
746                      const char *src, int32_t srcLength,
747                      UErrorCode *pErrorCode) {
748     return ucasemap_mapUTF8(csm,
749                    (uint8_t *)dest, destCapacity,
750                    (const uint8_t *)src, srcLength,
751                    ucasemap_internalUTF8ToUpper, pErrorCode);
752 }
753 
754 U_CAPI int32_t U_EXPORT2
ucasemap_utf8FoldCase(const UCaseMap * csm,char * dest,int32_t destCapacity,const char * src,int32_t srcLength,UErrorCode * pErrorCode)755 ucasemap_utf8FoldCase(const UCaseMap *csm,
756                       char *dest, int32_t destCapacity,
757                       const char *src, int32_t srcLength,
758                       UErrorCode *pErrorCode) {
759     return ucasemap_mapUTF8(csm,
760                    (uint8_t *)dest, destCapacity,
761                    (const uint8_t *)src, srcLength,
762                    ucasemap_internalUTF8Fold, pErrorCode);
763 }
764