1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1997-2011, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 * File CSTRING.C
12 *
13 * @author       Helena Shih
14 *
15 * Modification History:
16 *
17 *   Date        Name        Description
18 *   6/18/98     hshih       Created
19 *   09/08/98    stephen     Added include for ctype, for Mac Port
20 *   11/15/99    helena      Integrated S/390 IEEE changes.
21 ******************************************************************************
22 */
23 
24 
25 
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include "unicode/utypes.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uassert.h"
32 
33 /*
34  * We hardcode case conversion for invariant characters to match our expectation
35  * and the compiler execution charset.
36  * This prevents problems on systems
37  * - with non-default casing behavior, like Turkish system locales where
38  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
39  * - where there are no lowercase Latin characters at all, or using different
40  *   codes (some old EBCDIC codepages)
41  *
42  * This works because the compiler usually runs on a platform where the execution
43  * charset includes all of the invariant characters at their expected
44  * code positions, so that the char * string literals in ICU code match
45  * the char literals here.
46  *
47  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48  * and the set of uppercase Latin letters is discontiguous as well.
49  */
50 
51 U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c)52 uprv_isASCIILetter(char c) {
53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54     return
55         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
56         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
57 #else
58     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
59 #endif
60 }
61 
62 U_CAPI char U_EXPORT2
uprv_toupper(char c)63 uprv_toupper(char c) {
64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
66         c=(char)(c+('A'-'a'));
67     }
68 #else
69     if('a'<=c && c<='z') {
70         c=(char)(c+('A'-'a'));
71     }
72 #endif
73     return c;
74 }
75 
76 
77 #if 0
78 /*
79  * Commented out because cstring.h defines uprv_tolower() to be
80  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
81  * to reduce the amount of code to cover with tests.
82  *
83  * Note that this uprv_tolower() definition is likely to work for most
84  * charset families, not just ASCII and EBCDIC, because its #else branch
85  * is written generically.
86  */
87 U_CAPI char U_EXPORT2
88 uprv_tolower(char c) {
89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
91         c=(char)(c+('a'-'A'));
92     }
93 #else
94     if('A'<=c && c<='Z') {
95         c=(char)(c+('a'-'A'));
96     }
97 #endif
98     return c;
99 }
100 #endif
101 
102 U_CAPI char U_EXPORT2
uprv_asciitolower(char c)103 uprv_asciitolower(char c) {
104     if(0x41<=c && c<=0x5a) {
105         c=(char)(c+0x20);
106     }
107     return c;
108 }
109 
110 U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c)111 uprv_ebcdictolower(char c) {
112     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
113         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
114         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
115     ) {
116         c=(char)(c-0x40);
117     }
118     return c;
119 }
120 
121 
122 U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char * str)123 T_CString_toLowerCase(char* str)
124 {
125     char* origPtr = str;
126 
127     if (str) {
128         do
129             *str = (char)uprv_tolower(*str);
130         while (*(str++));
131     }
132 
133     return origPtr;
134 }
135 
136 U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char * str)137 T_CString_toUpperCase(char* str)
138 {
139     char* origPtr = str;
140 
141     if (str) {
142         do
143             *str = (char)uprv_toupper(*str);
144         while (*(str++));
145     }
146 
147     return origPtr;
148 }
149 
150 /*
151  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
152  * Does not handle negative values (makes an empty string for them).
153  * Writes at most 12 chars ("-2147483647" plus NUL).
154  * Returns the length of the string (not including the NUL).
155  */
156 U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char * buffer,int32_t v,int32_t radix)157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158 {
159     char      tbuf[30];
160     int32_t   tbx    = sizeof(tbuf);
161     uint8_t   digit;
162     int32_t   length = 0;
163     uint32_t  uval;
164 
165     U_ASSERT(radix>=2 && radix<=16);
166     uval = (uint32_t) v;
167     if(v<0 && radix == 10) {
168         /* Only in base 10 do we conside numbers to be signed. */
169         uval = (uint32_t)(-v);
170         buffer[length++] = '-';
171     }
172 
173     tbx = sizeof(tbuf)-1;
174     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
175     do {
176         digit = (uint8_t)(uval % radix);
177         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178         uval  = uval / radix;
179     } while (uval != 0);
180 
181     /* copy converted number into user buffer  */
182     uprv_strcpy(buffer+length, tbuf+tbx);
183     length += sizeof(tbuf) - tbx -1;
184     return length;
185 }
186 
187 
188 
189 /*
190  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
191  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
192  * Returns the length of the string, not including the terminating NULL.
193  */
194 U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char * buffer,int64_t v,uint32_t radix)195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196 {
197     char      tbuf[30];
198     int32_t   tbx    = sizeof(tbuf);
199     uint8_t   digit;
200     int32_t   length = 0;
201     uint64_t  uval;
202 
203     U_ASSERT(radix>=2 && radix<=16);
204     uval = (uint64_t) v;
205     if(v<0 && radix == 10) {
206         /* Only in base 10 do we conside numbers to be signed. */
207         uval = (uint64_t)(-v);
208         buffer[length++] = '-';
209     }
210 
211     tbx = sizeof(tbuf)-1;
212     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
213     do {
214         digit = (uint8_t)(uval % radix);
215         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216         uval  = uval / radix;
217     } while (uval != 0);
218 
219     /* copy converted number into user buffer  */
220     uprv_strcpy(buffer+length, tbuf+tbx);
221     length += sizeof(tbuf) - tbx -1;
222     return length;
223 }
224 
225 
226 U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char * integerString,int32_t radix)227 T_CString_stringToInteger(const char *integerString, int32_t radix)
228 {
229     char *end;
230     return uprv_strtoul(integerString, &end, radix);
231 
232 }
233 
234 U_CAPI int U_EXPORT2
uprv_stricmp(const char * str1,const char * str2)235 uprv_stricmp(const char *str1, const char *str2) {
236     if(str1==NULL) {
237         if(str2==NULL) {
238             return 0;
239         } else {
240             return -1;
241         }
242     } else if(str2==NULL) {
243         return 1;
244     } else {
245         /* compare non-NULL strings lexically with lowercase */
246         int rc;
247         unsigned char c1, c2;
248 
249         for(;;) {
250             c1=(unsigned char)*str1;
251             c2=(unsigned char)*str2;
252             if(c1==0) {
253                 if(c2==0) {
254                     return 0;
255                 } else {
256                     return -1;
257                 }
258             } else if(c2==0) {
259                 return 1;
260             } else {
261                 /* compare non-zero characters with lowercase */
262                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263                 if(rc!=0) {
264                     return rc;
265                 }
266             }
267             ++str1;
268             ++str2;
269         }
270     }
271 }
272 
273 U_CAPI int U_EXPORT2
uprv_strnicmp(const char * str1,const char * str2,uint32_t n)274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
275     if(str1==NULL) {
276         if(str2==NULL) {
277             return 0;
278         } else {
279             return -1;
280         }
281     } else if(str2==NULL) {
282         return 1;
283     } else {
284         /* compare non-NULL strings lexically with lowercase */
285         int rc;
286         unsigned char c1, c2;
287 
288         for(; n--;) {
289             c1=(unsigned char)*str1;
290             c2=(unsigned char)*str2;
291             if(c1==0) {
292                 if(c2==0) {
293                     return 0;
294                 } else {
295                     return -1;
296                 }
297             } else if(c2==0) {
298                 return 1;
299             } else {
300                 /* compare non-zero characters with lowercase */
301                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302                 if(rc!=0) {
303                     return rc;
304                 }
305             }
306             ++str1;
307             ++str2;
308         }
309     }
310 
311     return 0;
312 }
313 
314 U_CAPI char* U_EXPORT2
uprv_strdup(const char * src)315 uprv_strdup(const char *src) {
316     size_t len = uprv_strlen(src) + 1;
317     char *dup = (char *) uprv_malloc(len);
318 
319     if (dup) {
320         uprv_memcpy(dup, src, len);
321     }
322 
323     return dup;
324 }
325 
326 U_CAPI char* U_EXPORT2
uprv_strndup(const char * src,int32_t n)327 uprv_strndup(const char *src, int32_t n) {
328     char *dup;
329 
330     if(n < 0) {
331         dup = uprv_strdup(src);
332     } else {
333         dup = (char*)uprv_malloc(n+1);
334         if (dup) {
335             uprv_memcpy(dup, src, n);
336             dup[n] = 0;
337         }
338     }
339 
340     return dup;
341 }
342