1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uinvchar.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:2
12 *
13 * created on: 2004sep14
14 * created by: Markus W. Scherer
15 *
16 * Functions for handling invariant characters, moved here from putil.c
17 * for better modularization.
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "udataswp.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "uassert.h"
26 #include "uinvchar.h"
27
28 /* invariant-character handling --------------------------------------------- */
29
30 /*
31 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
32 * appropriately for most EBCDIC codepages.
33 *
34 * They currently also map most other ASCII graphic characters,
35 * appropriately for codepages 37 and 1047.
36 * Exceptions: The characters for []^ have different codes in 37 & 1047.
37 * Both versions are mapped to ASCII.
38 *
39 * ASCII 37 1047
40 * [ 5B BA AD
41 * ] 5D BB BD
42 * ^ 5E B0 5F
43 *
44 * There are no mappings for variant characters from Unicode to EBCDIC.
45 *
46 * Currently, C0 control codes are also included in these maps.
47 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
48 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
49 * but there is no mapping for ASCII LF back to EBCDIC.
50 *
51 * ASCII EBCDIC S/390-OE
52 * LF 0A 25 15
53 * NEL 85 15 25
54 *
55 * The maps below explicitly exclude the variant
56 * control and graphical characters that are in ASCII-based
57 * codepages at 0x80 and above.
58 * "No mapping" is expressed by mapping to a 00 byte.
59 *
60 * These tables do not establish a converter or a codepage.
61 */
62
63 static const uint8_t asciiFromEbcdic[256]={
64 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
65 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
66 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
67 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
68
69 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
70 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
71 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
73
74 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
75 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
77 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
78
79 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
80 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
81 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
83 };
84
85 static const uint8_t ebcdicFromAscii[256]={
86 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
87 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
88 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
90
91 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
92 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
93 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
94 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
95
96 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105 };
106
107 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
108 static const uint8_t lowercaseAsciiFromEbcdic[256]={
109 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
110 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
111 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
112 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
113
114 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
115 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
116 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
117 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
118
119 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
120 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
121 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
122 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
123
124 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
125 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
126 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
128 };
129
130 /*
131 * Bit sets indicating which characters of the ASCII repertoire
132 * (by ASCII/Unicode code) are "invariant".
133 * See utypes.h for more details.
134 *
135 * As invariant are considered the characters of the ASCII repertoire except
136 * for the following:
137 * 21 '!' <exclamation mark>
138 * 23 '#' <number sign>
139 * 24 '$' <dollar sign>
140 *
141 * 40 '@' <commercial at>
142 *
143 * 5b '[' <left bracket>
144 * 5c '\' <backslash>
145 * 5d ']' <right bracket>
146 * 5e '^' <circumflex>
147 *
148 * 60 '`' <grave accent>
149 *
150 * 7b '{' <left brace>
151 * 7c '|' <vertical line>
152 * 7d '}' <right brace>
153 * 7e '~' <tilde>
154 */
155 static const uint32_t invariantChars[4]={
156 0xfffffbff, /* 00..1f but not 0a */
157 0xffffffe5, /* 20..3f but not 21 23 24 */
158 0x87fffffe, /* 40..5f but not 40 5b..5e */
159 0x87fffffe /* 60..7f but not 60 7b..7e */
160 };
161
162 /*
163 * test unsigned types (or values known to be non-negative) for invariant characters,
164 * tests ASCII-family character values
165 */
166 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
167
168 /* test signed types for invariant characters, adds test for positive values */
169 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
170
171 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
172 #define CHAR_TO_UCHAR(c) c
173 #define UCHAR_TO_CHAR(c) c
174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
175 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
176 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
177 #else
178 # error U_CHARSET_FAMILY is not valid
179 #endif
180
181
182 U_CAPI void U_EXPORT2
u_charsToUChars(const char * cs,UChar * us,int32_t length)183 u_charsToUChars(const char *cs, UChar *us, int32_t length) {
184 UChar u;
185 uint8_t c;
186
187 /*
188 * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
189 * For EBCDIC systems, this works for characters with codes from
190 * codepages 37 and 1047 or compatible.
191 */
192 while(length>0) {
193 c=(uint8_t)(*cs++);
194 u=(UChar)CHAR_TO_UCHAR(c);
195 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
196 *us++=u;
197 --length;
198 }
199 }
200
201 U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar * us,char * cs,int32_t length)202 u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
203 UChar u;
204
205 while(length>0) {
206 u=*us++;
207 if(!UCHAR_IS_INVARIANT(u)) {
208 U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
209 u=0;
210 }
211 *cs++=(char)UCHAR_TO_CHAR(u);
212 --length;
213 }
214 }
215
216 U_CAPI UBool U_EXPORT2
uprv_isInvariantString(const char * s,int32_t length)217 uprv_isInvariantString(const char *s, int32_t length) {
218 uint8_t c;
219
220 for(;;) {
221 if(length<0) {
222 /* NUL-terminated */
223 c=(uint8_t)*s++;
224 if(c==0) {
225 break;
226 }
227 } else {
228 /* count length */
229 if(length==0) {
230 break;
231 }
232 --length;
233 c=(uint8_t)*s++;
234 if(c==0) {
235 continue; /* NUL is invariant */
236 }
237 }
238 /* c!=0 now, one branch below checks c==0 for variant characters */
239
240 /*
241 * no assertions here because these functions are legitimately called
242 * for strings with variant characters
243 */
244 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
245 if(!UCHAR_IS_INVARIANT(c)) {
246 return FALSE; /* found a variant char */
247 }
248 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
249 c=CHAR_TO_UCHAR(c);
250 if(c==0 || !UCHAR_IS_INVARIANT(c)) {
251 return FALSE; /* found a variant char */
252 }
253 #else
254 # error U_CHARSET_FAMILY is not valid
255 #endif
256 }
257 return TRUE;
258 }
259
260 U_CAPI UBool U_EXPORT2
uprv_isInvariantUString(const UChar * s,int32_t length)261 uprv_isInvariantUString(const UChar *s, int32_t length) {
262 UChar c;
263
264 for(;;) {
265 if(length<0) {
266 /* NUL-terminated */
267 c=*s++;
268 if(c==0) {
269 break;
270 }
271 } else {
272 /* count length */
273 if(length==0) {
274 break;
275 }
276 --length;
277 c=*s++;
278 }
279
280 /*
281 * no assertions here because these functions are legitimately called
282 * for strings with variant characters
283 */
284 if(!UCHAR_IS_INVARIANT(c)) {
285 return FALSE; /* found a variant char */
286 }
287 }
288 return TRUE;
289 }
290
291 /* UDataSwapFn implementations used in udataswp.c ------- */
292
293 /* convert ASCII to EBCDIC and verify that all characters are invariant */
294 U_CAPI int32_t U_EXPORT2
uprv_ebcdicFromAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)295 uprv_ebcdicFromAscii(const UDataSwapper *ds,
296 const void *inData, int32_t length, void *outData,
297 UErrorCode *pErrorCode) {
298 const uint8_t *s;
299 uint8_t *t;
300 uint8_t c;
301
302 int32_t count;
303
304 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
305 return 0;
306 }
307 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
308 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
309 return 0;
310 }
311
312 /* setup and swapping */
313 s=(const uint8_t *)inData;
314 t=(uint8_t *)outData;
315 count=length;
316 while(count>0) {
317 c=*s++;
318 if(!UCHAR_IS_INVARIANT(c)) {
319 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
320 length, length-count);
321 *pErrorCode=U_INVALID_CHAR_FOUND;
322 return 0;
323 }
324 *t++=ebcdicFromAscii[c];
325 --count;
326 }
327
328 return length;
329 }
330
331 /* this function only checks and copies ASCII strings without conversion */
332 U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)333 uprv_copyAscii(const UDataSwapper *ds,
334 const void *inData, int32_t length, void *outData,
335 UErrorCode *pErrorCode) {
336 const uint8_t *s;
337 uint8_t c;
338
339 int32_t count;
340
341 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
342 return 0;
343 }
344 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
345 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
346 return 0;
347 }
348
349 /* setup and checking */
350 s=(const uint8_t *)inData;
351 count=length;
352 while(count>0) {
353 c=*s++;
354 if(!UCHAR_IS_INVARIANT(c)) {
355 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
356 length, length-count);
357 *pErrorCode=U_INVALID_CHAR_FOUND;
358 return 0;
359 }
360 --count;
361 }
362
363 if(length>0 && inData!=outData) {
364 uprv_memcpy(outData, inData, length);
365 }
366
367 return length;
368 }
369
370 /* convert EBCDIC to ASCII and verify that all characters are invariant */
371 U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)372 uprv_asciiFromEbcdic(const UDataSwapper *ds,
373 const void *inData, int32_t length, void *outData,
374 UErrorCode *pErrorCode) {
375 const uint8_t *s;
376 uint8_t *t;
377 uint8_t c;
378
379 int32_t count;
380
381 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
382 return 0;
383 }
384 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
385 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
386 return 0;
387 }
388
389 /* setup and swapping */
390 s=(const uint8_t *)inData;
391 t=(uint8_t *)outData;
392 count=length;
393 while(count>0) {
394 c=*s++;
395 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
396 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
397 length, length-count);
398 *pErrorCode=U_INVALID_CHAR_FOUND;
399 return 0;
400 }
401 *t++=c;
402 --count;
403 }
404
405 return length;
406 }
407
408 /* this function only checks and copies EBCDIC strings without conversion */
409 U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)410 uprv_copyEbcdic(const UDataSwapper *ds,
411 const void *inData, int32_t length, void *outData,
412 UErrorCode *pErrorCode) {
413 const uint8_t *s;
414 uint8_t c;
415
416 int32_t count;
417
418 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
419 return 0;
420 }
421 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
422 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
423 return 0;
424 }
425
426 /* setup and checking */
427 s=(const uint8_t *)inData;
428 count=length;
429 while(count>0) {
430 c=*s++;
431 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
432 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
433 length, length-count);
434 *pErrorCode=U_INVALID_CHAR_FOUND;
435 return 0;
436 }
437 --count;
438 }
439
440 if(length>0 && inData!=outData) {
441 uprv_memcpy(outData, inData, length);
442 }
443
444 return length;
445 }
446
447 /* compare invariant strings; variant characters compare less than others and unlike each other */
448 U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)449 uprv_compareInvAscii(const UDataSwapper *ds,
450 const char *outString, int32_t outLength,
451 const UChar *localString, int32_t localLength) {
452 int32_t minLength;
453 UChar32 c1, c2;
454 uint8_t c;
455
456 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
457 return 0;
458 }
459
460 if(outLength<0) {
461 outLength=(int32_t)uprv_strlen(outString);
462 }
463 if(localLength<0) {
464 localLength=u_strlen(localString);
465 }
466
467 minLength= outLength<localLength ? outLength : localLength;
468
469 while(minLength>0) {
470 c=(uint8_t)*outString++;
471 if(UCHAR_IS_INVARIANT(c)) {
472 c1=c;
473 } else {
474 c1=-1;
475 }
476
477 c2=*localString++;
478 if(!UCHAR_IS_INVARIANT(c2)) {
479 c2=-2;
480 }
481
482 if((c1-=c2)!=0) {
483 return c1;
484 }
485
486 --minLength;
487 }
488
489 /* strings start with same prefix, compare lengths */
490 return outLength-localLength;
491 }
492
493 U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)494 uprv_compareInvEbcdic(const UDataSwapper *ds,
495 const char *outString, int32_t outLength,
496 const UChar *localString, int32_t localLength) {
497 int32_t minLength;
498 UChar32 c1, c2;
499 uint8_t c;
500
501 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
502 return 0;
503 }
504
505 if(outLength<0) {
506 outLength=(int32_t)uprv_strlen(outString);
507 }
508 if(localLength<0) {
509 localLength=u_strlen(localString);
510 }
511
512 minLength= outLength<localLength ? outLength : localLength;
513
514 while(minLength>0) {
515 c=(uint8_t)*outString++;
516 if(c==0) {
517 c1=0;
518 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
519 /* c1 is set */
520 } else {
521 c1=-1;
522 }
523
524 c2=*localString++;
525 if(!UCHAR_IS_INVARIANT(c2)) {
526 c2=-2;
527 }
528
529 if((c1-=c2)!=0) {
530 return c1;
531 }
532
533 --minLength;
534 }
535
536 /* strings start with same prefix, compare lengths */
537 return outLength-localLength;
538 }
539
540 U_CAPI int32_t U_EXPORT2
uprv_compareInvEbcdicAsAscii(const char * s1,const char * s2)541 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
542 int32_t c1, c2;
543
544 for(;; ++s1, ++s2) {
545 c1=(uint8_t)*s1;
546 c2=(uint8_t)*s2;
547 if(c1!=c2) {
548 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
549 c1=-(int32_t)(uint8_t)*s1;
550 }
551 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
552 c2=-(int32_t)(uint8_t)*s2;
553 }
554 return c1-c2;
555 } else if(c1==0) {
556 return 0;
557 }
558 }
559 }
560
561 U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c)562 uprv_ebcdicToLowercaseAscii(char c) {
563 return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
564 }
565
566 U_INTERNAL uint8_t* U_EXPORT2
uprv_aestrncpy(uint8_t * dst,const uint8_t * src,int32_t n)567 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
568 {
569 uint8_t *orig_dst = dst;
570
571 if(n==-1) {
572 n = uprv_strlen((const char*)src)+1; /* copy NUL */
573 }
574 /* copy non-null */
575 while(*src && n>0) {
576 *(dst++) = asciiFromEbcdic[*(src++)];
577 n--;
578 }
579 /* pad */
580 while(n>0) {
581 *(dst++) = 0;
582 n--;
583 }
584 return orig_dst;
585 }
586
587 U_INTERNAL uint8_t* U_EXPORT2
uprv_eastrncpy(uint8_t * dst,const uint8_t * src,int32_t n)588 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
589 {
590 uint8_t *orig_dst = dst;
591
592 if(n==-1) {
593 n = uprv_strlen((const char*)src)+1; /* copy NUL */
594 }
595 /* copy non-null */
596 while(*src && n>0) {
597 char ch = ebcdicFromAscii[*(src++)];
598 if(ch == 0) {
599 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
600 }
601 *(dst++) = ch;
602 n--;
603 }
604 /* pad */
605 while(n>0) {
606 *(dst++) = 0;
607 n--;
608 }
609 return orig_dst;
610 }
611
612