1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *  * Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *  * Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <wctype.h>
30 
31 #include <ctype.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <wchar.h>
36 
37 #include "private/icu.h"
38 
39 enum {
40   WC_TYPE_INVALID = 0,
41   WC_TYPE_ALNUM,
42   WC_TYPE_ALPHA,
43   WC_TYPE_BLANK,
44   WC_TYPE_CNTRL,
45   WC_TYPE_DIGIT,
46   WC_TYPE_GRAPH,
47   WC_TYPE_LOWER,
48   WC_TYPE_PRINT,
49   WC_TYPE_PUNCT,
50   WC_TYPE_SPACE,
51   WC_TYPE_UPPER,
52   WC_TYPE_XDIGIT,
53   WC_TYPE_MAX
54 };
55 
iswalnum(wint_t wc)56 int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
iswalpha(wint_t wc)57 int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
iswblank(wint_t wc)58 int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
iswgraph(wint_t wc)59 int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); }
iswlower(wint_t wc)60 int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); }
iswprint(wint_t wc)61 int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); }
iswspace(wint_t wc)62 int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); }
iswupper(wint_t wc)63 int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); }
iswxdigit(wint_t wc)64 int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); }
65 
iswcntrl(wint_t wc)66 int iswcntrl(wint_t wc) {
67   typedef int8_t (*FnT)(UChar32);
68   static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
69   return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
70 }
71 
iswdigit(wint_t wc)72 int iswdigit(wint_t wc) {
73   typedef UBool (*FnT)(UChar32);
74   static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
75   return u_isdigit ? u_isdigit(wc) : isdigit(wc);
76 }
77 
iswpunct(wint_t wc)78 int iswpunct(wint_t wc) {
79   typedef UBool (*FnT)(UChar32);
80   static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
81   return u_ispunct ? u_ispunct(wc) : ispunct(wc);
82 }
83 
iswalnum_l(wint_t c,locale_t)84 int iswalnum_l(wint_t c, locale_t) { return iswalnum(c); }
iswalpha_l(wint_t c,locale_t)85 int iswalpha_l(wint_t c, locale_t) { return iswalpha(c); }
iswblank_l(wint_t c,locale_t)86 int iswblank_l(wint_t c, locale_t) { return iswblank(c); }
iswcntrl_l(wint_t c,locale_t)87 int iswcntrl_l(wint_t c, locale_t) { return iswcntrl(c); }
iswdigit_l(wint_t c,locale_t)88 int iswdigit_l(wint_t c, locale_t) { return iswdigit(c); }
iswgraph_l(wint_t c,locale_t)89 int iswgraph_l(wint_t c, locale_t) { return iswgraph(c); }
iswlower_l(wint_t c,locale_t)90 int iswlower_l(wint_t c, locale_t) { return iswlower(c); }
iswprint_l(wint_t c,locale_t)91 int iswprint_l(wint_t c, locale_t) { return iswprint(c); }
iswpunct_l(wint_t c,locale_t)92 int iswpunct_l(wint_t c, locale_t) { return iswpunct(c); }
iswspace_l(wint_t c,locale_t)93 int iswspace_l(wint_t c, locale_t) { return iswspace(c); }
iswupper_l(wint_t c,locale_t)94 int iswupper_l(wint_t c, locale_t) { return iswupper(c); }
iswxdigit_l(wint_t c,locale_t)95 int iswxdigit_l(wint_t c, locale_t) { return iswxdigit(c); }
96 
iswctype(wint_t wc,wctype_t char_class)97 int iswctype(wint_t wc, wctype_t char_class) {
98   switch (char_class) {
99     case WC_TYPE_ALNUM: return iswalnum(wc);
100     case WC_TYPE_ALPHA: return iswalpha(wc);
101     case WC_TYPE_BLANK: return iswblank(wc);
102     case WC_TYPE_CNTRL: return iswcntrl(wc);
103     case WC_TYPE_DIGIT: return iswdigit(wc);
104     case WC_TYPE_GRAPH: return iswgraph(wc);
105     case WC_TYPE_LOWER: return iswlower(wc);
106     case WC_TYPE_PRINT: return iswprint(wc);
107     case WC_TYPE_PUNCT: return iswpunct(wc);
108     case WC_TYPE_SPACE: return iswspace(wc);
109     case WC_TYPE_UPPER: return iswupper(wc);
110     case WC_TYPE_XDIGIT: return iswxdigit(wc);
111     default: return 0;
112   }
113 }
114 
iswctype_l(wint_t wc,wctype_t char_class,locale_t)115 int iswctype_l(wint_t wc, wctype_t char_class, locale_t) {
116   return iswctype(wc, char_class);
117 }
118 
towlower(wint_t wc)119 wint_t towlower(wint_t wc) {
120   if (wc < 0x80) {
121     if (wc >= 'A' && wc <= 'Z') return wc | 0x20;
122     return wc;
123   }
124 
125   typedef UChar32 (*FnT)(UChar32);
126   static auto u_tolower = reinterpret_cast<FnT>(__find_icu_symbol("u_tolower"));
127   return u_tolower ? u_tolower(wc) : tolower(wc);
128 }
129 
towupper(wint_t wc)130 wint_t towupper(wint_t wc) {
131   if (wc < 0x80) {
132     // Using EOR rather than AND makes no difference on arm, but saves an
133     // instruction on arm64.
134     if (wc >= 'a' && wc <= 'z') return wc ^ 0x20;
135     return wc;
136   }
137 
138   typedef UChar32 (*FnT)(UChar32);
139   static auto u_toupper = reinterpret_cast<FnT>(__find_icu_symbol("u_toupper"));
140   return u_toupper ? u_toupper(wc) : toupper(wc);
141 }
142 
towupper_l(wint_t c,locale_t)143 wint_t towupper_l(wint_t c, locale_t) { return towupper(c); }
towlower_l(wint_t c,locale_t)144 wint_t towlower_l(wint_t c, locale_t) { return towlower(c); }
145 
wctype(const char * property)146 wctype_t wctype(const char* property) {
147   static const char* const  properties[WC_TYPE_MAX] = {
148     "<invalid>",
149     "alnum", "alpha", "blank", "cntrl", "digit", "graph",
150     "lower", "print", "punct", "space", "upper", "xdigit"
151   };
152   for (size_t i = 0; i < WC_TYPE_MAX; ++i) {
153     if (!strcmp(properties[i], property)) {
154       return static_cast<wctype_t>(i);
155     }
156   }
157   return static_cast<wctype_t>(0);
158 }
159 
wctype_l(const char * property,locale_t)160 wctype_t wctype_l(const char* property, locale_t) {
161   return wctype(property);
162 }
163 
164 static wctrans_t wctrans_tolower = wctrans_t(1);
165 static wctrans_t wctrans_toupper = wctrans_t(2);
166 
wctrans(const char * name)167 wctrans_t wctrans(const char* name) {
168   if (strcmp(name, "tolower") == 0) return wctrans_tolower;
169   if (strcmp(name, "toupper") == 0) return wctrans_toupper;
170   return nullptr;
171 }
172 
wctrans_l(const char * name,locale_t)173 wctrans_t wctrans_l(const char* name, locale_t) {
174   return wctrans(name);
175 }
176 
towctrans(wint_t c,wctrans_t t)177 wint_t towctrans(wint_t c, wctrans_t t) {
178   if (t == wctrans_tolower) return towlower(c);
179   if (t == wctrans_toupper) return towupper(c);
180   errno = EINVAL;
181   return 0;
182 }
183 
towctrans_l(wint_t c,wctrans_t t,locale_t)184 wint_t towctrans_l(wint_t c, wctrans_t t, locale_t) {
185   return towctrans(c, t);
186 }
187