1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File CSTRING.C
10 *
11 * @author Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 6/18/98 hshih Created
17 * 09/08/98 stephen Added include for ctype, for Mac Port
18 * 11/15/99 helena Integrated S/390 IEEE changes.
19 ******************************************************************************
20 */
21
22
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include "unicode/utypes.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "uassert.h"
30
31 /*
32 * We hardcode case conversion for invariant characters to match our expectation
33 * and the compiler execution charset.
34 * This prevents problems on systems
35 * - with non-default casing behavior, like Turkish system locales where
36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I
37 * - where there are no lowercase Latin characters at all, or using different
38 * codes (some old EBCDIC codepages)
39 *
40 * This works because the compiler usually runs on a platform where the execution
41 * charset includes all of the invariant characters at their expected
42 * code positions, so that the char * string literals in ICU code match
43 * the char literals here.
44 *
45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
46 * and the set of uppercase Latin letters is discontiguous as well.
47 */
48
49 U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c)50 uprv_isASCIILetter(char c) {
51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
52 return
53 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
54 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
55 #else
56 return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
57 #endif
58 }
59
60 U_CAPI char U_EXPORT2
uprv_toupper(char c)61 uprv_toupper(char c) {
62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
63 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
64 c=(char)(c+('A'-'a'));
65 }
66 #else
67 if('a'<=c && c<='z') {
68 c=(char)(c+('A'-'a'));
69 }
70 #endif
71 return c;
72 }
73
74
75 #if 0
76 /*
77 * Commented out because cstring.h defines uprv_tolower() to be
78 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
79 * to reduce the amount of code to cover with tests.
80 *
81 * Note that this uprv_tolower() definition is likely to work for most
82 * charset families, not just ASCII and EBCDIC, because its #else branch
83 * is written generically.
84 */
85 U_CAPI char U_EXPORT2
86 uprv_tolower(char c) {
87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
88 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
89 c=(char)(c+('a'-'A'));
90 }
91 #else
92 if('A'<=c && c<='Z') {
93 c=(char)(c+('a'-'A'));
94 }
95 #endif
96 return c;
97 }
98 #endif
99
100 U_CAPI char U_EXPORT2
uprv_asciitolower(char c)101 uprv_asciitolower(char c) {
102 if(0x41<=c && c<=0x5a) {
103 c=(char)(c+0x20);
104 }
105 return c;
106 }
107
108 U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c)109 uprv_ebcdictolower(char c) {
110 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
111 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
112 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
113 ) {
114 c=(char)(c-0x40);
115 }
116 return c;
117 }
118
119
120 U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char * str)121 T_CString_toLowerCase(char* str)
122 {
123 char* origPtr = str;
124
125 if (str) {
126 do
127 *str = (char)uprv_tolower(*str);
128 while (*(str++));
129 }
130
131 return origPtr;
132 }
133
134 U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char * str)135 T_CString_toUpperCase(char* str)
136 {
137 char* origPtr = str;
138
139 if (str) {
140 do
141 *str = (char)uprv_toupper(*str);
142 while (*(str++));
143 }
144
145 return origPtr;
146 }
147
148 /*
149 * Takes a int32_t and fills in a char* string with that number "radix"-based.
150 * Does not handle negative values (makes an empty string for them).
151 * Writes at most 12 chars ("-2147483647" plus NUL).
152 * Returns the length of the string (not including the NUL).
153 */
154 U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char * buffer,int32_t v,int32_t radix)155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
156 {
157 char tbuf[30];
158 int32_t tbx = sizeof(tbuf);
159 uint8_t digit;
160 int32_t length = 0;
161 uint32_t uval;
162
163 U_ASSERT(radix>=2 && radix<=16);
164 uval = (uint32_t) v;
165 if(v<0 && radix == 10) {
166 /* Only in base 10 do we conside numbers to be signed. */
167 uval = (uint32_t)(-v);
168 buffer[length++] = '-';
169 }
170
171 tbx = sizeof(tbuf)-1;
172 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
173 do {
174 digit = (uint8_t)(uval % radix);
175 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
176 uval = uval / radix;
177 } while (uval != 0);
178
179 /* copy converted number into user buffer */
180 uprv_strcpy(buffer+length, tbuf+tbx);
181 length += sizeof(tbuf) - tbx -1;
182 return length;
183 }
184
185
186
187 /*
188 * Takes a int64_t and fills in a char* string with that number "radix"-based.
189 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
190 * Returns the length of the string, not including the terminating NULL.
191 */
192 U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char * buffer,int64_t v,uint32_t radix)193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
194 {
195 char tbuf[30];
196 int32_t tbx = sizeof(tbuf);
197 uint8_t digit;
198 int32_t length = 0;
199 uint64_t uval;
200
201 U_ASSERT(radix>=2 && radix<=16);
202 uval = (uint64_t) v;
203 if(v<0 && radix == 10) {
204 /* Only in base 10 do we conside numbers to be signed. */
205 uval = (uint64_t)(-v);
206 buffer[length++] = '-';
207 }
208
209 tbx = sizeof(tbuf)-1;
210 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
211 do {
212 digit = (uint8_t)(uval % radix);
213 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
214 uval = uval / radix;
215 } while (uval != 0);
216
217 /* copy converted number into user buffer */
218 uprv_strcpy(buffer+length, tbuf+tbx);
219 length += sizeof(tbuf) - tbx -1;
220 return length;
221 }
222
223
224 U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char * integerString,int32_t radix)225 T_CString_stringToInteger(const char *integerString, int32_t radix)
226 {
227 char *end;
228 return uprv_strtoul(integerString, &end, radix);
229
230 }
231
232 U_CAPI int U_EXPORT2
uprv_stricmp(const char * str1,const char * str2)233 uprv_stricmp(const char *str1, const char *str2) {
234 if(str1==NULL) {
235 if(str2==NULL) {
236 return 0;
237 } else {
238 return -1;
239 }
240 } else if(str2==NULL) {
241 return 1;
242 } else {
243 /* compare non-NULL strings lexically with lowercase */
244 int rc;
245 unsigned char c1, c2;
246
247 for(;;) {
248 c1=(unsigned char)*str1;
249 c2=(unsigned char)*str2;
250 if(c1==0) {
251 if(c2==0) {
252 return 0;
253 } else {
254 return -1;
255 }
256 } else if(c2==0) {
257 return 1;
258 } else {
259 /* compare non-zero characters with lowercase */
260 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
261 if(rc!=0) {
262 return rc;
263 }
264 }
265 ++str1;
266 ++str2;
267 }
268 }
269 }
270
271 U_CAPI int U_EXPORT2
uprv_strnicmp(const char * str1,const char * str2,uint32_t n)272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
273 if(str1==NULL) {
274 if(str2==NULL) {
275 return 0;
276 } else {
277 return -1;
278 }
279 } else if(str2==NULL) {
280 return 1;
281 } else {
282 /* compare non-NULL strings lexically with lowercase */
283 int rc;
284 unsigned char c1, c2;
285
286 for(; n--;) {
287 c1=(unsigned char)*str1;
288 c2=(unsigned char)*str2;
289 if(c1==0) {
290 if(c2==0) {
291 return 0;
292 } else {
293 return -1;
294 }
295 } else if(c2==0) {
296 return 1;
297 } else {
298 /* compare non-zero characters with lowercase */
299 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
300 if(rc!=0) {
301 return rc;
302 }
303 }
304 ++str1;
305 ++str2;
306 }
307 }
308
309 return 0;
310 }
311
312 U_CAPI char* U_EXPORT2
uprv_strdup(const char * src)313 uprv_strdup(const char *src) {
314 size_t len = uprv_strlen(src) + 1;
315 char *dup = (char *) uprv_malloc(len);
316
317 if (dup) {
318 uprv_memcpy(dup, src, len);
319 }
320
321 return dup;
322 }
323
324 U_CAPI char* U_EXPORT2
uprv_strndup(const char * src,int32_t n)325 uprv_strndup(const char *src, int32_t n) {
326 char *dup;
327
328 if(n < 0) {
329 dup = uprv_strdup(src);
330 } else {
331 dup = (char*)uprv_malloc(n+1);
332 if (dup) {
333 uprv_memcpy(dup, src, n);
334 dup[n] = 0;
335 }
336 }
337
338 return dup;
339 }
340