1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * ucnv.c:
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
13 *
14 * Modification History:
15 *
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
32 #include "putilimp.h"
33 #include "cmemory.h"
34 #include "cstring.h"
35 #include "uassert.h"
36 #include "utracimp.h"
37 #include "ustr_imp.h"
38 #include "ucnv_imp.h"
39 #include "ucnv_cnv.h"
40 #include "ucnv_bld.h"
41
42 /* size of intermediate and preflighting buffers in ucnv_convert() */
43 #define CHUNK_SIZE 1024
44
45 typedef struct UAmbiguousConverter {
46 const char *name;
47 const UChar variant5c;
48 } UAmbiguousConverter;
49
50 static const UAmbiguousConverter ambiguousConverters[]={
51 { "ibm-897_P100-1995", 0xa5 },
52 { "ibm-942_P120-1999", 0xa5 },
53 { "ibm-943_P130-1999", 0xa5 },
54 { "ibm-946_P100-1995", 0xa5 },
55 { "ibm-33722_P120-1999", 0xa5 },
56 { "ibm-1041_P100-1995", 0xa5 },
57 /*{ "ibm-54191_P100-2006", 0xa5 },*/
58 /*{ "ibm-62383_P100-2007", 0xa5 },*/
59 /*{ "ibm-891_P100-1995", 0x20a9 },*/
60 { "ibm-944_P100-1995", 0x20a9 },
61 { "ibm-949_P110-1999", 0x20a9 },
62 { "ibm-1363_P110-1997", 0x20a9 },
63 { "ISO_2022,locale=ko,version=0", 0x20a9 },
64 { "ibm-1088_P100-1995", 0x20a9 }
65 };
66
67 /*Calls through createConverter */
68 U_CAPI UConverter* U_EXPORT2
ucnv_open(const char * name,UErrorCode * err)69 ucnv_open (const char *name,
70 UErrorCode * err)
71 {
72 UConverter *r;
73
74 if (err == NULL || U_FAILURE (*err)) {
75 return NULL;
76 }
77
78 r = ucnv_createConverter(NULL, name, err);
79 return r;
80 }
81
82 U_CAPI UConverter* U_EXPORT2
ucnv_openPackage(const char * packageName,const char * converterName,UErrorCode * err)83 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
84 {
85 return ucnv_createConverterFromPackage(packageName, converterName, err);
86 }
87
88 /*Extracts the UChar* to a char* and calls through createConverter */
89 U_CAPI UConverter* U_EXPORT2
ucnv_openU(const UChar * name,UErrorCode * err)90 ucnv_openU (const UChar * name,
91 UErrorCode * err)
92 {
93 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
94
95 if (err == NULL || U_FAILURE(*err))
96 return NULL;
97 if (name == NULL)
98 return ucnv_open (NULL, err);
99 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
100 {
101 *err = U_ILLEGAL_ARGUMENT_ERROR;
102 return NULL;
103 }
104 return ucnv_open(u_austrcpy(asciiName, name), err);
105 }
106
107 /* Copy the string that is represented by the UConverterPlatform enum
108 * @param platformString An output buffer
109 * @param platform An enum representing a platform
110 * @return the length of the copied string.
111 */
112 static int32_t
ucnv_copyPlatformString(char * platformString,UConverterPlatform pltfrm)113 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
114 {
115 switch (pltfrm)
116 {
117 case UCNV_IBM:
118 uprv_strcpy(platformString, "ibm-");
119 return 4;
120 case UCNV_UNKNOWN:
121 break;
122 }
123
124 /* default to empty string */
125 *platformString = 0;
126 return 0;
127 }
128
129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
130 *through createConverter*/
131 U_CAPI UConverter* U_EXPORT2
ucnv_openCCSID(int32_t codepage,UConverterPlatform platform,UErrorCode * err)132 ucnv_openCCSID (int32_t codepage,
133 UConverterPlatform platform,
134 UErrorCode * err)
135 {
136 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
137 int32_t myNameLen;
138
139 if (err == NULL || U_FAILURE (*err))
140 return NULL;
141
142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
143 myNameLen = ucnv_copyPlatformString(myName, platform);
144 T_CString_integerToString(myName + myNameLen, codepage, 10);
145
146 return ucnv_createConverter(NULL, myName, err);
147 }
148
149 /* Creating a temporary stack-based object that can be used in one thread,
150 and created from a converter that is shared across threads.
151 */
152
153 U_CAPI UConverter* U_EXPORT2
ucnv_safeClone(const UConverter * cnv,void * stackBuffer,int32_t * pBufferSize,UErrorCode * status)154 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
155 {
156 UConverter *localConverter, *allocatedConverter;
157 int32_t stackBufferSize;
158 int32_t bufferSizeNeeded;
159 char *stackBufferChars = (char *)stackBuffer;
160 UErrorCode cbErr;
161 UConverterToUnicodeArgs toUArgs = {
162 sizeof(UConverterToUnicodeArgs),
163 TRUE,
164 NULL,
165 NULL,
166 NULL,
167 NULL,
168 NULL,
169 NULL
170 };
171 UConverterFromUnicodeArgs fromUArgs = {
172 sizeof(UConverterFromUnicodeArgs),
173 TRUE,
174 NULL,
175 NULL,
176 NULL,
177 NULL,
178 NULL,
179 NULL
180 };
181
182 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
183
184 if (status == NULL || U_FAILURE(*status)){
185 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
186 return NULL;
187 }
188
189 if (cnv == NULL) {
190 *status = U_ILLEGAL_ARGUMENT_ERROR;
191 UTRACE_EXIT_STATUS(*status);
192 return NULL;
193 }
194
195 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
196 ucnv_getName(cnv, status), cnv, stackBuffer);
197
198 if (cnv->sharedData->impl->safeClone != NULL) {
199 /* call the custom safeClone function for sizing */
200 bufferSizeNeeded = 0;
201 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
202 if (U_FAILURE(*status)) {
203 UTRACE_EXIT_STATUS(*status);
204 return NULL;
205 }
206 }
207 else
208 {
209 /* inherent sizing */
210 bufferSizeNeeded = sizeof(UConverter);
211 }
212
213 if (pBufferSize == NULL) {
214 stackBufferSize = 1;
215 pBufferSize = &stackBufferSize;
216 } else {
217 stackBufferSize = *pBufferSize;
218 if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
219 *pBufferSize = bufferSizeNeeded;
220 UTRACE_EXIT_VALUE(bufferSizeNeeded);
221 return NULL;
222 }
223 }
224
225
226 /* Pointers on 64-bit platforms need to be aligned
227 * on a 64-bit boundary in memory.
228 */
229 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
230 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
231 if(stackBufferSize > offsetUp) {
232 stackBufferSize -= offsetUp;
233 stackBufferChars += offsetUp;
234 } else {
235 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
236 stackBufferSize = 1;
237 }
238 }
239
240 stackBuffer = (void *)stackBufferChars;
241
242 /* Now, see if we must allocate any memory */
243 if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
244 {
245 /* allocate one here...*/
246 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
247
248 if(localConverter == NULL) {
249 *status = U_MEMORY_ALLOCATION_ERROR;
250 UTRACE_EXIT_STATUS(*status);
251 return NULL;
252 }
253 *status = U_SAFECLONE_ALLOCATED_WARNING;
254
255 /* record the fact that memory was allocated */
256 *pBufferSize = bufferSizeNeeded;
257 } else {
258 /* just use the stack buffer */
259 localConverter = (UConverter*) stackBuffer;
260 allocatedConverter = NULL;
261 }
262
263 uprv_memset(localConverter, 0, bufferSizeNeeded);
264
265 /* Copy initial state */
266 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
267 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
268
269 /* copy the substitution string */
270 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
271 localConverter->subChars = (uint8_t *)localConverter->subUChars;
272 } else {
273 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
274 if (localConverter->subChars == NULL) {
275 uprv_free(allocatedConverter);
276 UTRACE_EXIT_STATUS(*status);
277 return NULL;
278 }
279 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
280 }
281
282 /* now either call the safeclone fcn or not */
283 if (cnv->sharedData->impl->safeClone != NULL) {
284 /* call the custom safeClone function */
285 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
286 }
287
288 if(localConverter==NULL || U_FAILURE(*status)) {
289 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
290 uprv_free(allocatedConverter->subChars);
291 }
292 uprv_free(allocatedConverter);
293 UTRACE_EXIT_STATUS(*status);
294 return NULL;
295 }
296
297 /* increment refcount of shared data if needed */
298 if (cnv->sharedData->isReferenceCounted) {
299 ucnv_incrementRefCount(cnv->sharedData);
300 }
301
302 if(localConverter == (UConverter*)stackBuffer) {
303 /* we're using user provided data - set to not destroy */
304 localConverter->isCopyLocal = TRUE;
305 }
306
307 /* allow callback functions to handle any memory allocation */
308 toUArgs.converter = fromUArgs.converter = localConverter;
309 cbErr = U_ZERO_ERROR;
310 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
311 cbErr = U_ZERO_ERROR;
312 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
313
314 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
315 return localConverter;
316 }
317
318
319
320 /*Decreases the reference counter in the shared immutable section of the object
321 *and frees the mutable part*/
322
323 U_CAPI void U_EXPORT2
ucnv_close(UConverter * converter)324 ucnv_close (UConverter * converter)
325 {
326 UErrorCode errorCode = U_ZERO_ERROR;
327
328 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
329
330 if (converter == NULL)
331 {
332 UTRACE_EXIT();
333 return;
334 }
335
336 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
337 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
338
339 /* In order to speed up the close, only call the callbacks when they have been changed.
340 This performance check will only work when the callbacks are set within a shared library
341 or from user code that statically links this code. */
342 /* first, notify the callback functions that the converter is closed */
343 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
344 UConverterToUnicodeArgs toUArgs = {
345 sizeof(UConverterToUnicodeArgs),
346 TRUE,
347 NULL,
348 NULL,
349 NULL,
350 NULL,
351 NULL,
352 NULL
353 };
354
355 toUArgs.converter = converter;
356 errorCode = U_ZERO_ERROR;
357 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
358 }
359 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
360 UConverterFromUnicodeArgs fromUArgs = {
361 sizeof(UConverterFromUnicodeArgs),
362 TRUE,
363 NULL,
364 NULL,
365 NULL,
366 NULL,
367 NULL,
368 NULL
369 };
370 fromUArgs.converter = converter;
371 errorCode = U_ZERO_ERROR;
372 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
373 }
374
375 if (converter->sharedData->impl->close != NULL) {
376 converter->sharedData->impl->close(converter);
377 }
378
379 if (converter->subChars != (uint8_t *)converter->subUChars) {
380 uprv_free(converter->subChars);
381 }
382
383 if (converter->sharedData->isReferenceCounted) {
384 ucnv_unloadSharedDataIfReady(converter->sharedData);
385 }
386
387 if(!converter->isCopyLocal){
388 uprv_free(converter);
389 }
390
391 UTRACE_EXIT();
392 }
393
394 /*returns a single Name from the list, will return NULL if out of bounds
395 */
396 U_CAPI const char* U_EXPORT2
ucnv_getAvailableName(int32_t n)397 ucnv_getAvailableName (int32_t n)
398 {
399 if (0 <= n && n <= 0xffff) {
400 UErrorCode err = U_ZERO_ERROR;
401 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
402 if (U_SUCCESS(err)) {
403 return name;
404 }
405 }
406 return NULL;
407 }
408
409 U_CAPI int32_t U_EXPORT2
ucnv_countAvailable()410 ucnv_countAvailable ()
411 {
412 UErrorCode err = U_ZERO_ERROR;
413 return ucnv_bld_countAvailableConverters(&err);
414 }
415
416 U_CAPI void U_EXPORT2
ucnv_getSubstChars(const UConverter * converter,char * mySubChar,int8_t * len,UErrorCode * err)417 ucnv_getSubstChars (const UConverter * converter,
418 char *mySubChar,
419 int8_t * len,
420 UErrorCode * err)
421 {
422 if (U_FAILURE (*err))
423 return;
424
425 if (converter->subCharLen <= 0) {
426 /* Unicode string or empty string from ucnv_setSubstString(). */
427 *len = 0;
428 return;
429 }
430
431 if (*len < converter->subCharLen) /*not enough space in subChars */
432 {
433 *err = U_INDEX_OUTOFBOUNDS_ERROR;
434 return;
435 }
436
437 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
438 *len = converter->subCharLen; /*store # of bytes copied to buffer */
439 }
440
441 U_CAPI void U_EXPORT2
ucnv_setSubstChars(UConverter * converter,const char * mySubChar,int8_t len,UErrorCode * err)442 ucnv_setSubstChars (UConverter * converter,
443 const char *mySubChar,
444 int8_t len,
445 UErrorCode * err)
446 {
447 if (U_FAILURE (*err))
448 return;
449
450 /*Makes sure that the subChar is within the codepages char length boundaries */
451 if ((len > converter->sharedData->staticData->maxBytesPerChar)
452 || (len < converter->sharedData->staticData->minBytesPerChar))
453 {
454 *err = U_ILLEGAL_ARGUMENT_ERROR;
455 return;
456 }
457
458 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
459 converter->subCharLen = len; /*sets the new len */
460
461 /*
462 * There is currently (2001Feb) no separate API to set/get subChar1.
463 * In order to always have subChar written after it is explicitly set,
464 * we set subChar1 to 0.
465 */
466 converter->subChar1 = 0;
467
468 return;
469 }
470
471 U_CAPI void U_EXPORT2
ucnv_setSubstString(UConverter * cnv,const UChar * s,int32_t length,UErrorCode * err)472 ucnv_setSubstString(UConverter *cnv,
473 const UChar *s,
474 int32_t length,
475 UErrorCode *err) {
476 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
477 char chars[UCNV_ERROR_BUFFER_LENGTH];
478
479 UConverter *clone;
480 uint8_t *subChars;
481 int32_t cloneSize, length8;
482
483 /* Let the following functions check all arguments. */
484 cloneSize = sizeof(cloneBuffer);
485 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
486 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
487 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
488 ucnv_close(clone);
489 if (U_FAILURE(*err)) {
490 return;
491 }
492
493 if (cnv->sharedData->impl->writeSub == NULL
494 #if !UCONFIG_NO_LEGACY_CONVERSION
495 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
496 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
497 #endif
498 ) {
499 /* The converter is not stateful. Store the charset bytes as a fixed string. */
500 subChars = (uint8_t *)chars;
501 } else {
502 /*
503 * The converter has a non-default writeSub() function, indicating
504 * that it is stateful.
505 * Store the Unicode string for on-the-fly conversion for correct
506 * state handling.
507 */
508 if (length > UCNV_ERROR_BUFFER_LENGTH) {
509 /*
510 * Should not occur. The converter should output at least one byte
511 * per UChar, which means that ucnv_fromUChars() should catch all
512 * overflows.
513 */
514 *err = U_BUFFER_OVERFLOW_ERROR;
515 return;
516 }
517 subChars = (uint8_t *)s;
518 if (length < 0) {
519 length = u_strlen(s);
520 }
521 length8 = length * U_SIZEOF_UCHAR;
522 }
523
524 /*
525 * For storing the substitution string, select either the small buffer inside
526 * UConverter or allocate a subChars buffer.
527 */
528 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
529 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
530 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
531 /* Allocate a new buffer for the string. */
532 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
533 if (cnv->subChars == NULL) {
534 cnv->subChars = (uint8_t *)cnv->subUChars;
535 *err = U_MEMORY_ALLOCATION_ERROR;
536 return;
537 }
538 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
539 }
540 }
541
542 /* Copy the substitution string into the UConverter or its subChars buffer. */
543 if (length8 == 0) {
544 cnv->subCharLen = 0;
545 } else {
546 uprv_memcpy(cnv->subChars, subChars, length8);
547 if (subChars == (uint8_t *)chars) {
548 cnv->subCharLen = (int8_t)length8;
549 } else /* subChars == s */ {
550 cnv->subCharLen = (int8_t)-length;
551 }
552 }
553
554 /* See comment in ucnv_setSubstChars(). */
555 cnv->subChar1 = 0;
556 }
557
558 /*resets the internal states of a converter
559 *goal : have the same behaviour than a freshly created converter
560 */
_reset(UConverter * converter,UConverterResetChoice choice,UBool callCallback)561 static void _reset(UConverter *converter, UConverterResetChoice choice,
562 UBool callCallback) {
563 if(converter == NULL) {
564 return;
565 }
566
567 if(callCallback) {
568 /* first, notify the callback functions that the converter is reset */
569 UErrorCode errorCode;
570
571 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
572 UConverterToUnicodeArgs toUArgs = {
573 sizeof(UConverterToUnicodeArgs),
574 TRUE,
575 NULL,
576 NULL,
577 NULL,
578 NULL,
579 NULL,
580 NULL
581 };
582 toUArgs.converter = converter;
583 errorCode = U_ZERO_ERROR;
584 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
585 }
586 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
587 UConverterFromUnicodeArgs fromUArgs = {
588 sizeof(UConverterFromUnicodeArgs),
589 TRUE,
590 NULL,
591 NULL,
592 NULL,
593 NULL,
594 NULL,
595 NULL
596 };
597 fromUArgs.converter = converter;
598 errorCode = U_ZERO_ERROR;
599 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
600 }
601 }
602
603 /* now reset the converter itself */
604 if(choice<=UCNV_RESET_TO_UNICODE) {
605 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
606 converter->mode = 0;
607 converter->toULength = 0;
608 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
609 converter->preToULength = 0;
610 }
611 if(choice!=UCNV_RESET_TO_UNICODE) {
612 converter->fromUnicodeStatus = 0;
613 converter->fromUChar32 = 0;
614 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
615 converter->preFromUFirstCP = U_SENTINEL;
616 converter->preFromULength = 0;
617 }
618
619 if (converter->sharedData->impl->reset != NULL) {
620 /* call the custom reset function */
621 converter->sharedData->impl->reset(converter, choice);
622 }
623 }
624
625 U_CAPI void U_EXPORT2
ucnv_reset(UConverter * converter)626 ucnv_reset(UConverter *converter)
627 {
628 _reset(converter, UCNV_RESET_BOTH, TRUE);
629 }
630
631 U_CAPI void U_EXPORT2
ucnv_resetToUnicode(UConverter * converter)632 ucnv_resetToUnicode(UConverter *converter)
633 {
634 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
635 }
636
637 U_CAPI void U_EXPORT2
ucnv_resetFromUnicode(UConverter * converter)638 ucnv_resetFromUnicode(UConverter *converter)
639 {
640 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
641 }
642
643 U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize(const UConverter * converter)644 ucnv_getMaxCharSize (const UConverter * converter)
645 {
646 return converter->maxBytesPerUChar;
647 }
648
649
650 U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize(const UConverter * converter)651 ucnv_getMinCharSize (const UConverter * converter)
652 {
653 return converter->sharedData->staticData->minBytesPerChar;
654 }
655
656 U_CAPI const char* U_EXPORT2
ucnv_getName(const UConverter * converter,UErrorCode * err)657 ucnv_getName (const UConverter * converter, UErrorCode * err)
658
659 {
660 if (U_FAILURE (*err))
661 return NULL;
662 if(converter->sharedData->impl->getName){
663 const char* temp= converter->sharedData->impl->getName(converter);
664 if(temp)
665 return temp;
666 }
667 return converter->sharedData->staticData->name;
668 }
669
670 U_CAPI int32_t U_EXPORT2
ucnv_getCCSID(const UConverter * converter,UErrorCode * err)671 ucnv_getCCSID(const UConverter * converter,
672 UErrorCode * err)
673 {
674 int32_t ccsid;
675 if (U_FAILURE (*err))
676 return -1;
677
678 ccsid = converter->sharedData->staticData->codepage;
679 if (ccsid == 0) {
680 /* Rare case. This is for cases like gb18030,
681 which doesn't have an IBM canonical name, but does have an IBM alias. */
682 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
683 if (U_SUCCESS(*err) && standardName) {
684 const char *ccsidStr = uprv_strchr(standardName, '-');
685 if (ccsidStr) {
686 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
687 }
688 }
689 }
690 return ccsid;
691 }
692
693
694 U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform(const UConverter * converter,UErrorCode * err)695 ucnv_getPlatform (const UConverter * converter,
696 UErrorCode * err)
697 {
698 if (U_FAILURE (*err))
699 return UCNV_UNKNOWN;
700
701 return (UConverterPlatform)converter->sharedData->staticData->platform;
702 }
703
704 U_CAPI void U_EXPORT2
ucnv_getToUCallBack(const UConverter * converter,UConverterToUCallback * action,const void ** context)705 ucnv_getToUCallBack (const UConverter * converter,
706 UConverterToUCallback *action,
707 const void **context)
708 {
709 *action = converter->fromCharErrorBehaviour;
710 *context = converter->toUContext;
711 }
712
713 U_CAPI void U_EXPORT2
ucnv_getFromUCallBack(const UConverter * converter,UConverterFromUCallback * action,const void ** context)714 ucnv_getFromUCallBack (const UConverter * converter,
715 UConverterFromUCallback *action,
716 const void **context)
717 {
718 *action = converter->fromUCharErrorBehaviour;
719 *context = converter->fromUContext;
720 }
721
722 U_CAPI void U_EXPORT2
ucnv_setToUCallBack(UConverter * converter,UConverterToUCallback newAction,const void * newContext,UConverterToUCallback * oldAction,const void ** oldContext,UErrorCode * err)723 ucnv_setToUCallBack (UConverter * converter,
724 UConverterToUCallback newAction,
725 const void* newContext,
726 UConverterToUCallback *oldAction,
727 const void** oldContext,
728 UErrorCode * err)
729 {
730 if (U_FAILURE (*err))
731 return;
732 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
733 converter->fromCharErrorBehaviour = newAction;
734 if (oldContext) *oldContext = converter->toUContext;
735 converter->toUContext = newContext;
736 }
737
738 U_CAPI void U_EXPORT2
ucnv_setFromUCallBack(UConverter * converter,UConverterFromUCallback newAction,const void * newContext,UConverterFromUCallback * oldAction,const void ** oldContext,UErrorCode * err)739 ucnv_setFromUCallBack (UConverter * converter,
740 UConverterFromUCallback newAction,
741 const void* newContext,
742 UConverterFromUCallback *oldAction,
743 const void** oldContext,
744 UErrorCode * err)
745 {
746 if (U_FAILURE (*err))
747 return;
748 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
749 converter->fromUCharErrorBehaviour = newAction;
750 if (oldContext) *oldContext = converter->fromUContext;
751 converter->fromUContext = newContext;
752 }
753
754 static void
_updateOffsets(int32_t * offsets,int32_t length,int32_t sourceIndex,int32_t errorInputLength)755 _updateOffsets(int32_t *offsets, int32_t length,
756 int32_t sourceIndex, int32_t errorInputLength) {
757 int32_t *limit;
758 int32_t delta, offset;
759
760 if(sourceIndex>=0) {
761 /*
762 * adjust each offset by adding the previous sourceIndex
763 * minus the length of the input sequence that caused an
764 * error, if any
765 */
766 delta=sourceIndex-errorInputLength;
767 } else {
768 /*
769 * set each offset to -1 because this conversion function
770 * does not handle offsets
771 */
772 delta=-1;
773 }
774
775 limit=offsets+length;
776 if(delta==0) {
777 /* most common case, nothing to do */
778 } else if(delta>0) {
779 /* add the delta to each offset (but not if the offset is <0) */
780 while(offsets<limit) {
781 offset=*offsets;
782 if(offset>=0) {
783 *offsets=offset+delta;
784 }
785 ++offsets;
786 }
787 } else /* delta<0 */ {
788 /*
789 * set each offset to -1 because this conversion function
790 * does not handle offsets
791 * or the error input sequence started in a previous buffer
792 */
793 while(offsets<limit) {
794 *offsets++=-1;
795 }
796 }
797 }
798
799 /* ucnv_fromUnicode --------------------------------------------------------- */
800
801 /*
802 * Implementation note for m:n conversions
803 *
804 * While collecting source units to find the longest match for m:n conversion,
805 * some source units may need to be stored for a partial match.
806 * When a second buffer does not yield a match on all of the previously stored
807 * source units, then they must be "replayed", i.e., fed back into the converter.
808 *
809 * The code relies on the fact that replaying will not nest -
810 * converting a replay buffer will not result in a replay.
811 * This is because a replay is necessary only after the _continuation_ of a
812 * partial match failed, but a replay buffer is converted as a whole.
813 * It may result in some of its units being stored again for a partial match,
814 * but there will not be a continuation _during_ the replay which could fail.
815 *
816 * It is conceivable that a callback function could call the converter
817 * recursively in a way that causes another replay to be stored, but that
818 * would be an error in the callback function.
819 * Such violations will cause assertion failures in a debug build,
820 * and wrong output, but they will not cause a crash.
821 */
822
823 static void
_fromUnicodeWithCallback(UConverterFromUnicodeArgs * pArgs,UErrorCode * err)824 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
825 UConverterFromUnicode fromUnicode;
826 UConverter *cnv;
827 const UChar *s;
828 char *t;
829 int32_t *offsets;
830 int32_t sourceIndex;
831 int32_t errorInputLength;
832 UBool converterSawEndOfInput, calledCallback;
833
834 /* variables for m:n conversion */
835 UChar replay[UCNV_EXT_MAX_UCHARS];
836 const UChar *realSource, *realSourceLimit;
837 int32_t realSourceIndex;
838 UBool realFlush;
839
840 cnv=pArgs->converter;
841 s=pArgs->source;
842 t=pArgs->target;
843 offsets=pArgs->offsets;
844
845 /* get the converter implementation function */
846 sourceIndex=0;
847 if(offsets==NULL) {
848 fromUnicode=cnv->sharedData->impl->fromUnicode;
849 } else {
850 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
851 if(fromUnicode==NULL) {
852 /* there is no WithOffsets implementation */
853 fromUnicode=cnv->sharedData->impl->fromUnicode;
854 /* we will write -1 for each offset */
855 sourceIndex=-1;
856 }
857 }
858
859 if(cnv->preFromULength>=0) {
860 /* normal mode */
861 realSource=NULL;
862
863 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
864 realSourceLimit=NULL;
865 realFlush=FALSE;
866 realSourceIndex=0;
867 } else {
868 /*
869 * Previous m:n conversion stored source units from a partial match
870 * and failed to consume all of them.
871 * We need to "replay" them from a temporary buffer and convert them first.
872 */
873 realSource=pArgs->source;
874 realSourceLimit=pArgs->sourceLimit;
875 realFlush=pArgs->flush;
876 realSourceIndex=sourceIndex;
877
878 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
879 pArgs->source=replay;
880 pArgs->sourceLimit=replay-cnv->preFromULength;
881 pArgs->flush=FALSE;
882 sourceIndex=-1;
883
884 cnv->preFromULength=0;
885 }
886
887 /*
888 * loop for conversion and error handling
889 *
890 * loop {
891 * convert
892 * loop {
893 * update offsets
894 * handle end of input
895 * handle errors/call callback
896 * }
897 * }
898 */
899 for(;;) {
900 if(U_SUCCESS(*err)) {
901 /* convert */
902 fromUnicode(pArgs, err);
903
904 /*
905 * set a flag for whether the converter
906 * successfully processed the end of the input
907 *
908 * need not check cnv->preFromULength==0 because a replay (<0) will cause
909 * s<sourceLimit before converterSawEndOfInput is checked
910 */
911 converterSawEndOfInput=
912 (UBool)(U_SUCCESS(*err) &&
913 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
914 cnv->fromUChar32==0);
915 } else {
916 /* handle error from ucnv_convertEx() */
917 converterSawEndOfInput=FALSE;
918 }
919
920 /* no callback called yet for this iteration */
921 calledCallback=FALSE;
922
923 /* no sourceIndex adjustment for conversion, only for callback output */
924 errorInputLength=0;
925
926 /*
927 * loop for offsets and error handling
928 *
929 * iterates at most 3 times:
930 * 1. to clean up after the conversion function
931 * 2. after the callback
932 * 3. after the callback again if there was truncated input
933 */
934 for(;;) {
935 /* update offsets if we write any */
936 if(offsets!=NULL) {
937 int32_t length=(int32_t)(pArgs->target-t);
938 if(length>0) {
939 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
940
941 /*
942 * if a converter handles offsets and updates the offsets
943 * pointer at the end, then pArgs->offset should not change
944 * here;
945 * however, some converters do not handle offsets at all
946 * (sourceIndex<0) or may not update the offsets pointer
947 */
948 pArgs->offsets=offsets+=length;
949 }
950
951 if(sourceIndex>=0) {
952 sourceIndex+=(int32_t)(pArgs->source-s);
953 }
954 }
955
956 if(cnv->preFromULength<0) {
957 /*
958 * switch the source to new replay units (cannot occur while replaying)
959 * after offset handling and before end-of-input and callback handling
960 */
961 if(realSource==NULL) {
962 realSource=pArgs->source;
963 realSourceLimit=pArgs->sourceLimit;
964 realFlush=pArgs->flush;
965 realSourceIndex=sourceIndex;
966
967 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
968 pArgs->source=replay;
969 pArgs->sourceLimit=replay-cnv->preFromULength;
970 pArgs->flush=FALSE;
971 if((sourceIndex+=cnv->preFromULength)<0) {
972 sourceIndex=-1;
973 }
974
975 cnv->preFromULength=0;
976 } else {
977 /* see implementation note before _fromUnicodeWithCallback() */
978 U_ASSERT(realSource==NULL);
979 *err=U_INTERNAL_PROGRAM_ERROR;
980 }
981 }
982
983 /* update pointers */
984 s=pArgs->source;
985 t=pArgs->target;
986
987 if(U_SUCCESS(*err)) {
988 if(s<pArgs->sourceLimit) {
989 /*
990 * continue with the conversion loop while there is still input left
991 * (continue converting by breaking out of only the inner loop)
992 */
993 break;
994 } else if(realSource!=NULL) {
995 /* switch back from replaying to the real source and continue */
996 pArgs->source=realSource;
997 pArgs->sourceLimit=realSourceLimit;
998 pArgs->flush=realFlush;
999 sourceIndex=realSourceIndex;
1000
1001 realSource=NULL;
1002 break;
1003 } else if(pArgs->flush && cnv->fromUChar32!=0) {
1004 /*
1005 * the entire input stream is consumed
1006 * and there is a partial, truncated input sequence left
1007 */
1008
1009 /* inject an error and continue with callback handling */
1010 *err=U_TRUNCATED_CHAR_FOUND;
1011 calledCallback=FALSE; /* new error condition */
1012 } else {
1013 /* input consumed */
1014 if(pArgs->flush) {
1015 /*
1016 * return to the conversion loop once more if the flush
1017 * flag is set and the conversion function has not
1018 * successfully processed the end of the input yet
1019 *
1020 * (continue converting by breaking out of only the inner loop)
1021 */
1022 if(!converterSawEndOfInput) {
1023 break;
1024 }
1025
1026 /* reset the converter without calling the callback function */
1027 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1028 }
1029
1030 /* done successfully */
1031 return;
1032 }
1033 }
1034
1035 /* U_FAILURE(*err) */
1036 {
1037 UErrorCode e;
1038
1039 if( calledCallback ||
1040 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1041 (e!=U_INVALID_CHAR_FOUND &&
1042 e!=U_ILLEGAL_CHAR_FOUND &&
1043 e!=U_TRUNCATED_CHAR_FOUND)
1044 ) {
1045 /*
1046 * the callback did not or cannot resolve the error:
1047 * set output pointers and return
1048 *
1049 * the check for buffer overflow is redundant but it is
1050 * a high-runner case and hopefully documents the intent
1051 * well
1052 *
1053 * if we were replaying, then the replay buffer must be
1054 * copied back into the UConverter
1055 * and the real arguments must be restored
1056 */
1057 if(realSource!=NULL) {
1058 int32_t length;
1059
1060 U_ASSERT(cnv->preFromULength==0);
1061
1062 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1063 if(length>0) {
1064 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1065 cnv->preFromULength=(int8_t)-length;
1066 }
1067
1068 pArgs->source=realSource;
1069 pArgs->sourceLimit=realSourceLimit;
1070 pArgs->flush=realFlush;
1071 }
1072
1073 return;
1074 }
1075 }
1076
1077 /* callback handling */
1078 {
1079 UChar32 codePoint;
1080
1081 /* get and write the code point */
1082 codePoint=cnv->fromUChar32;
1083 errorInputLength=0;
1084 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1085 cnv->invalidUCharLength=(int8_t)errorInputLength;
1086
1087 /* set the converter state to deal with the next character */
1088 cnv->fromUChar32=0;
1089
1090 /* call the callback function */
1091 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1092 cnv->invalidUCharBuffer, errorInputLength, codePoint,
1093 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1094 err);
1095 }
1096
1097 /*
1098 * loop back to the offset handling
1099 *
1100 * this flag will indicate after offset handling
1101 * that a callback was called;
1102 * if the callback did not resolve the error, then we return
1103 */
1104 calledCallback=TRUE;
1105 }
1106 }
1107 }
1108
1109 /*
1110 * Output the fromUnicode overflow buffer.
1111 * Call this function if(cnv->charErrorBufferLength>0).
1112 * @return TRUE if overflow
1113 */
1114 static UBool
ucnv_outputOverflowFromUnicode(UConverter * cnv,char ** target,const char * targetLimit,int32_t ** pOffsets,UErrorCode * err)1115 ucnv_outputOverflowFromUnicode(UConverter *cnv,
1116 char **target, const char *targetLimit,
1117 int32_t **pOffsets,
1118 UErrorCode *err) {
1119 int32_t *offsets;
1120 char *overflow, *t;
1121 int32_t i, length;
1122
1123 t=*target;
1124 if(pOffsets!=NULL) {
1125 offsets=*pOffsets;
1126 } else {
1127 offsets=NULL;
1128 }
1129
1130 overflow=(char *)cnv->charErrorBuffer;
1131 length=cnv->charErrorBufferLength;
1132 i=0;
1133 while(i<length) {
1134 if(t==targetLimit) {
1135 /* the overflow buffer contains too much, keep the rest */
1136 int32_t j=0;
1137
1138 do {
1139 overflow[j++]=overflow[i++];
1140 } while(i<length);
1141
1142 cnv->charErrorBufferLength=(int8_t)j;
1143 *target=t;
1144 if(offsets!=NULL) {
1145 *pOffsets=offsets;
1146 }
1147 *err=U_BUFFER_OVERFLOW_ERROR;
1148 return TRUE;
1149 }
1150
1151 /* copy the overflow contents to the target */
1152 *t++=overflow[i++];
1153 if(offsets!=NULL) {
1154 *offsets++=-1; /* no source index available for old output */
1155 }
1156 }
1157
1158 /* the overflow buffer is completely copied to the target */
1159 cnv->charErrorBufferLength=0;
1160 *target=t;
1161 if(offsets!=NULL) {
1162 *pOffsets=offsets;
1163 }
1164 return FALSE;
1165 }
1166
1167 U_CAPI void U_EXPORT2
ucnv_fromUnicode(UConverter * cnv,char ** target,const char * targetLimit,const UChar ** source,const UChar * sourceLimit,int32_t * offsets,UBool flush,UErrorCode * err)1168 ucnv_fromUnicode(UConverter *cnv,
1169 char **target, const char *targetLimit,
1170 const UChar **source, const UChar *sourceLimit,
1171 int32_t *offsets,
1172 UBool flush,
1173 UErrorCode *err) {
1174 UConverterFromUnicodeArgs args;
1175 const UChar *s;
1176 char *t;
1177
1178 /* check parameters */
1179 if(err==NULL || U_FAILURE(*err)) {
1180 return;
1181 }
1182
1183 if(cnv==NULL || target==NULL || source==NULL) {
1184 *err=U_ILLEGAL_ARGUMENT_ERROR;
1185 return;
1186 }
1187
1188 s=*source;
1189 t=*target;
1190
1191 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1192 /*
1193 Prevent code from going into an infinite loop in case we do hit this
1194 limit. The limit pointer is expected to be on a UChar * boundary.
1195 This also prevents the next argument check from failing.
1196 */
1197 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1198 }
1199
1200 /*
1201 * All these conditions should never happen.
1202 *
1203 * 1) Make sure that the limits are >= to the address source or target
1204 *
1205 * 2) Make sure that the buffer sizes do not exceed the number range for
1206 * int32_t because some functions use the size (in units or bytes)
1207 * rather than comparing pointers, and because offsets are int32_t values.
1208 *
1209 * size_t is guaranteed to be unsigned and large enough for the job.
1210 *
1211 * Return with an error instead of adjusting the limits because we would
1212 * not be able to maintain the semantics that either the source must be
1213 * consumed or the target filled (unless an error occurs).
1214 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1215 *
1216 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1217 * to a char * pointer and provide an incomplete UChar code unit.
1218 */
1219 if (sourceLimit<s || targetLimit<t ||
1220 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1221 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1222 (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1223 {
1224 *err=U_ILLEGAL_ARGUMENT_ERROR;
1225 return;
1226 }
1227
1228 /* output the target overflow buffer */
1229 if( cnv->charErrorBufferLength>0 &&
1230 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1231 ) {
1232 /* U_BUFFER_OVERFLOW_ERROR */
1233 return;
1234 }
1235 /* *target may have moved, therefore stop using t */
1236
1237 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1238 /* the overflow buffer is emptied and there is no new input: we are done */
1239 return;
1240 }
1241
1242 /*
1243 * Do not simply return with a buffer overflow error if
1244 * !flush && t==targetLimit
1245 * because it is possible that the source will not generate any output.
1246 * For example, the skip callback may be called;
1247 * it does not output anything.
1248 */
1249
1250 /* prepare the converter arguments */
1251 args.converter=cnv;
1252 args.flush=flush;
1253 args.offsets=offsets;
1254 args.source=s;
1255 args.sourceLimit=sourceLimit;
1256 args.target=*target;
1257 args.targetLimit=targetLimit;
1258 args.size=sizeof(args);
1259
1260 _fromUnicodeWithCallback(&args, err);
1261
1262 *source=args.source;
1263 *target=args.target;
1264 }
1265
1266 /* ucnv_toUnicode() --------------------------------------------------------- */
1267
1268 static void
_toUnicodeWithCallback(UConverterToUnicodeArgs * pArgs,UErrorCode * err)1269 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1270 UConverterToUnicode toUnicode;
1271 UConverter *cnv;
1272 const char *s;
1273 UChar *t;
1274 int32_t *offsets;
1275 int32_t sourceIndex;
1276 int32_t errorInputLength;
1277 UBool converterSawEndOfInput, calledCallback;
1278
1279 /* variables for m:n conversion */
1280 char replay[UCNV_EXT_MAX_BYTES];
1281 const char *realSource, *realSourceLimit;
1282 int32_t realSourceIndex;
1283 UBool realFlush;
1284
1285 cnv=pArgs->converter;
1286 s=pArgs->source;
1287 t=pArgs->target;
1288 offsets=pArgs->offsets;
1289
1290 /* get the converter implementation function */
1291 sourceIndex=0;
1292 if(offsets==NULL) {
1293 toUnicode=cnv->sharedData->impl->toUnicode;
1294 } else {
1295 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1296 if(toUnicode==NULL) {
1297 /* there is no WithOffsets implementation */
1298 toUnicode=cnv->sharedData->impl->toUnicode;
1299 /* we will write -1 for each offset */
1300 sourceIndex=-1;
1301 }
1302 }
1303
1304 if(cnv->preToULength>=0) {
1305 /* normal mode */
1306 realSource=NULL;
1307
1308 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1309 realSourceLimit=NULL;
1310 realFlush=FALSE;
1311 realSourceIndex=0;
1312 } else {
1313 /*
1314 * Previous m:n conversion stored source units from a partial match
1315 * and failed to consume all of them.
1316 * We need to "replay" them from a temporary buffer and convert them first.
1317 */
1318 realSource=pArgs->source;
1319 realSourceLimit=pArgs->sourceLimit;
1320 realFlush=pArgs->flush;
1321 realSourceIndex=sourceIndex;
1322
1323 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1324 pArgs->source=replay;
1325 pArgs->sourceLimit=replay-cnv->preToULength;
1326 pArgs->flush=FALSE;
1327 sourceIndex=-1;
1328
1329 cnv->preToULength=0;
1330 }
1331
1332 /*
1333 * loop for conversion and error handling
1334 *
1335 * loop {
1336 * convert
1337 * loop {
1338 * update offsets
1339 * handle end of input
1340 * handle errors/call callback
1341 * }
1342 * }
1343 */
1344 for(;;) {
1345 if(U_SUCCESS(*err)) {
1346 /* convert */
1347 toUnicode(pArgs, err);
1348
1349 /*
1350 * set a flag for whether the converter
1351 * successfully processed the end of the input
1352 *
1353 * need not check cnv->preToULength==0 because a replay (<0) will cause
1354 * s<sourceLimit before converterSawEndOfInput is checked
1355 */
1356 converterSawEndOfInput=
1357 (UBool)(U_SUCCESS(*err) &&
1358 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1359 cnv->toULength==0);
1360 } else {
1361 /* handle error from getNextUChar() or ucnv_convertEx() */
1362 converterSawEndOfInput=FALSE;
1363 }
1364
1365 /* no callback called yet for this iteration */
1366 calledCallback=FALSE;
1367
1368 /* no sourceIndex adjustment for conversion, only for callback output */
1369 errorInputLength=0;
1370
1371 /*
1372 * loop for offsets and error handling
1373 *
1374 * iterates at most 3 times:
1375 * 1. to clean up after the conversion function
1376 * 2. after the callback
1377 * 3. after the callback again if there was truncated input
1378 */
1379 for(;;) {
1380 /* update offsets if we write any */
1381 if(offsets!=NULL) {
1382 int32_t length=(int32_t)(pArgs->target-t);
1383 if(length>0) {
1384 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1385
1386 /*
1387 * if a converter handles offsets and updates the offsets
1388 * pointer at the end, then pArgs->offset should not change
1389 * here;
1390 * however, some converters do not handle offsets at all
1391 * (sourceIndex<0) or may not update the offsets pointer
1392 */
1393 pArgs->offsets=offsets+=length;
1394 }
1395
1396 if(sourceIndex>=0) {
1397 sourceIndex+=(int32_t)(pArgs->source-s);
1398 }
1399 }
1400
1401 if(cnv->preToULength<0) {
1402 /*
1403 * switch the source to new replay units (cannot occur while replaying)
1404 * after offset handling and before end-of-input and callback handling
1405 */
1406 if(realSource==NULL) {
1407 realSource=pArgs->source;
1408 realSourceLimit=pArgs->sourceLimit;
1409 realFlush=pArgs->flush;
1410 realSourceIndex=sourceIndex;
1411
1412 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1413 pArgs->source=replay;
1414 pArgs->sourceLimit=replay-cnv->preToULength;
1415 pArgs->flush=FALSE;
1416 if((sourceIndex+=cnv->preToULength)<0) {
1417 sourceIndex=-1;
1418 }
1419
1420 cnv->preToULength=0;
1421 } else {
1422 /* see implementation note before _fromUnicodeWithCallback() */
1423 U_ASSERT(realSource==NULL);
1424 *err=U_INTERNAL_PROGRAM_ERROR;
1425 }
1426 }
1427
1428 /* update pointers */
1429 s=pArgs->source;
1430 t=pArgs->target;
1431
1432 if(U_SUCCESS(*err)) {
1433 if(s<pArgs->sourceLimit) {
1434 /*
1435 * continue with the conversion loop while there is still input left
1436 * (continue converting by breaking out of only the inner loop)
1437 */
1438 break;
1439 } else if(realSource!=NULL) {
1440 /* switch back from replaying to the real source and continue */
1441 pArgs->source=realSource;
1442 pArgs->sourceLimit=realSourceLimit;
1443 pArgs->flush=realFlush;
1444 sourceIndex=realSourceIndex;
1445
1446 realSource=NULL;
1447 break;
1448 } else if(pArgs->flush && cnv->toULength>0) {
1449 /*
1450 * the entire input stream is consumed
1451 * and there is a partial, truncated input sequence left
1452 */
1453
1454 /* inject an error and continue with callback handling */
1455 *err=U_TRUNCATED_CHAR_FOUND;
1456 calledCallback=FALSE; /* new error condition */
1457 } else {
1458 /* input consumed */
1459 if(pArgs->flush) {
1460 /*
1461 * return to the conversion loop once more if the flush
1462 * flag is set and the conversion function has not
1463 * successfully processed the end of the input yet
1464 *
1465 * (continue converting by breaking out of only the inner loop)
1466 */
1467 if(!converterSawEndOfInput) {
1468 break;
1469 }
1470
1471 /* reset the converter without calling the callback function */
1472 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1473 }
1474
1475 /* done successfully */
1476 return;
1477 }
1478 }
1479
1480 /* U_FAILURE(*err) */
1481 {
1482 UErrorCode e;
1483
1484 if( calledCallback ||
1485 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1486 (e!=U_INVALID_CHAR_FOUND &&
1487 e!=U_ILLEGAL_CHAR_FOUND &&
1488 e!=U_TRUNCATED_CHAR_FOUND &&
1489 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1490 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1491 ) {
1492 /*
1493 * the callback did not or cannot resolve the error:
1494 * set output pointers and return
1495 *
1496 * the check for buffer overflow is redundant but it is
1497 * a high-runner case and hopefully documents the intent
1498 * well
1499 *
1500 * if we were replaying, then the replay buffer must be
1501 * copied back into the UConverter
1502 * and the real arguments must be restored
1503 */
1504 if(realSource!=NULL) {
1505 int32_t length;
1506
1507 U_ASSERT(cnv->preToULength==0);
1508
1509 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1510 if(length>0) {
1511 uprv_memcpy(cnv->preToU, pArgs->source, length);
1512 cnv->preToULength=(int8_t)-length;
1513 }
1514
1515 pArgs->source=realSource;
1516 pArgs->sourceLimit=realSourceLimit;
1517 pArgs->flush=realFlush;
1518 }
1519
1520 return;
1521 }
1522 }
1523
1524 /* copy toUBytes[] to invalidCharBuffer[] */
1525 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1526 if(errorInputLength>0) {
1527 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1528 }
1529
1530 /* set the converter state to deal with the next character */
1531 cnv->toULength=0;
1532
1533 /* call the callback function */
1534 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1535 cnv->toUCallbackReason = UCNV_UNASSIGNED;
1536 }
1537 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1538 cnv->invalidCharBuffer, errorInputLength,
1539 cnv->toUCallbackReason,
1540 err);
1541 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
1542
1543 /*
1544 * loop back to the offset handling
1545 *
1546 * this flag will indicate after offset handling
1547 * that a callback was called;
1548 * if the callback did not resolve the error, then we return
1549 */
1550 calledCallback=TRUE;
1551 }
1552 }
1553 }
1554
1555 /*
1556 * Output the toUnicode overflow buffer.
1557 * Call this function if(cnv->UCharErrorBufferLength>0).
1558 * @return TRUE if overflow
1559 */
1560 static UBool
ucnv_outputOverflowToUnicode(UConverter * cnv,UChar ** target,const UChar * targetLimit,int32_t ** pOffsets,UErrorCode * err)1561 ucnv_outputOverflowToUnicode(UConverter *cnv,
1562 UChar **target, const UChar *targetLimit,
1563 int32_t **pOffsets,
1564 UErrorCode *err) {
1565 int32_t *offsets;
1566 UChar *overflow, *t;
1567 int32_t i, length;
1568
1569 t=*target;
1570 if(pOffsets!=NULL) {
1571 offsets=*pOffsets;
1572 } else {
1573 offsets=NULL;
1574 }
1575
1576 overflow=cnv->UCharErrorBuffer;
1577 length=cnv->UCharErrorBufferLength;
1578 i=0;
1579 while(i<length) {
1580 if(t==targetLimit) {
1581 /* the overflow buffer contains too much, keep the rest */
1582 int32_t j=0;
1583
1584 do {
1585 overflow[j++]=overflow[i++];
1586 } while(i<length);
1587
1588 cnv->UCharErrorBufferLength=(int8_t)j;
1589 *target=t;
1590 if(offsets!=NULL) {
1591 *pOffsets=offsets;
1592 }
1593 *err=U_BUFFER_OVERFLOW_ERROR;
1594 return TRUE;
1595 }
1596
1597 /* copy the overflow contents to the target */
1598 *t++=overflow[i++];
1599 if(offsets!=NULL) {
1600 *offsets++=-1; /* no source index available for old output */
1601 }
1602 }
1603
1604 /* the overflow buffer is completely copied to the target */
1605 cnv->UCharErrorBufferLength=0;
1606 *target=t;
1607 if(offsets!=NULL) {
1608 *pOffsets=offsets;
1609 }
1610 return FALSE;
1611 }
1612
1613 U_CAPI void U_EXPORT2
ucnv_toUnicode(UConverter * cnv,UChar ** target,const UChar * targetLimit,const char ** source,const char * sourceLimit,int32_t * offsets,UBool flush,UErrorCode * err)1614 ucnv_toUnicode(UConverter *cnv,
1615 UChar **target, const UChar *targetLimit,
1616 const char **source, const char *sourceLimit,
1617 int32_t *offsets,
1618 UBool flush,
1619 UErrorCode *err) {
1620 UConverterToUnicodeArgs args;
1621 const char *s;
1622 UChar *t;
1623
1624 /* check parameters */
1625 if(err==NULL || U_FAILURE(*err)) {
1626 return;
1627 }
1628
1629 if(cnv==NULL || target==NULL || source==NULL) {
1630 *err=U_ILLEGAL_ARGUMENT_ERROR;
1631 return;
1632 }
1633
1634 s=*source;
1635 t=*target;
1636
1637 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1638 /*
1639 Prevent code from going into an infinite loop in case we do hit this
1640 limit. The limit pointer is expected to be on a UChar * boundary.
1641 This also prevents the next argument check from failing.
1642 */
1643 targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1644 }
1645
1646 /*
1647 * All these conditions should never happen.
1648 *
1649 * 1) Make sure that the limits are >= to the address source or target
1650 *
1651 * 2) Make sure that the buffer sizes do not exceed the number range for
1652 * int32_t because some functions use the size (in units or bytes)
1653 * rather than comparing pointers, and because offsets are int32_t values.
1654 *
1655 * size_t is guaranteed to be unsigned and large enough for the job.
1656 *
1657 * Return with an error instead of adjusting the limits because we would
1658 * not be able to maintain the semantics that either the source must be
1659 * consumed or the target filled (unless an error occurs).
1660 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1661 *
1662 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1663 * to a char * pointer and provide an incomplete UChar code unit.
1664 */
1665 if (sourceLimit<s || targetLimit<t ||
1666 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1667 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1668 (((const char *)targetLimit-(const char *)t) & 1) != 0
1669 ) {
1670 *err=U_ILLEGAL_ARGUMENT_ERROR;
1671 return;
1672 }
1673
1674 /* output the target overflow buffer */
1675 if( cnv->UCharErrorBufferLength>0 &&
1676 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1677 ) {
1678 /* U_BUFFER_OVERFLOW_ERROR */
1679 return;
1680 }
1681 /* *target may have moved, therefore stop using t */
1682
1683 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1684 /* the overflow buffer is emptied and there is no new input: we are done */
1685 return;
1686 }
1687
1688 /*
1689 * Do not simply return with a buffer overflow error if
1690 * !flush && t==targetLimit
1691 * because it is possible that the source will not generate any output.
1692 * For example, the skip callback may be called;
1693 * it does not output anything.
1694 */
1695
1696 /* prepare the converter arguments */
1697 args.converter=cnv;
1698 args.flush=flush;
1699 args.offsets=offsets;
1700 args.source=s;
1701 args.sourceLimit=sourceLimit;
1702 args.target=*target;
1703 args.targetLimit=targetLimit;
1704 args.size=sizeof(args);
1705
1706 _toUnicodeWithCallback(&args, err);
1707
1708 *source=args.source;
1709 *target=args.target;
1710 }
1711
1712 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1713
1714 U_CAPI int32_t U_EXPORT2
ucnv_fromUChars(UConverter * cnv,char * dest,int32_t destCapacity,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)1715 ucnv_fromUChars(UConverter *cnv,
1716 char *dest, int32_t destCapacity,
1717 const UChar *src, int32_t srcLength,
1718 UErrorCode *pErrorCode) {
1719 const UChar *srcLimit;
1720 char *originalDest, *destLimit;
1721 int32_t destLength;
1722
1723 /* check arguments */
1724 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1725 return 0;
1726 }
1727
1728 if( cnv==NULL ||
1729 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1730 srcLength<-1 || (srcLength!=0 && src==NULL)
1731 ) {
1732 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1733 return 0;
1734 }
1735
1736 /* initialize */
1737 ucnv_resetFromUnicode(cnv);
1738 originalDest=dest;
1739 if(srcLength==-1) {
1740 srcLength=u_strlen(src);
1741 }
1742 if(srcLength>0) {
1743 srcLimit=src+srcLength;
1744 destLimit=dest+destCapacity;
1745
1746 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1747 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1748 destLimit=(char *)U_MAX_PTR(dest);
1749 }
1750
1751 /* perform the conversion */
1752 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1753 destLength=(int32_t)(dest-originalDest);
1754
1755 /* if an overflow occurs, then get the preflighting length */
1756 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1757 char buffer[1024];
1758
1759 destLimit=buffer+sizeof(buffer);
1760 do {
1761 dest=buffer;
1762 *pErrorCode=U_ZERO_ERROR;
1763 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1764 destLength+=(int32_t)(dest-buffer);
1765 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1766 }
1767 } else {
1768 destLength=0;
1769 }
1770
1771 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1772 }
1773
1774 U_CAPI int32_t U_EXPORT2
ucnv_toUChars(UConverter * cnv,UChar * dest,int32_t destCapacity,const char * src,int32_t srcLength,UErrorCode * pErrorCode)1775 ucnv_toUChars(UConverter *cnv,
1776 UChar *dest, int32_t destCapacity,
1777 const char *src, int32_t srcLength,
1778 UErrorCode *pErrorCode) {
1779 const char *srcLimit;
1780 UChar *originalDest, *destLimit;
1781 int32_t destLength;
1782
1783 /* check arguments */
1784 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1785 return 0;
1786 }
1787
1788 if( cnv==NULL ||
1789 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1790 srcLength<-1 || (srcLength!=0 && src==NULL))
1791 {
1792 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1793 return 0;
1794 }
1795
1796 /* initialize */
1797 ucnv_resetToUnicode(cnv);
1798 originalDest=dest;
1799 if(srcLength==-1) {
1800 srcLength=(int32_t)uprv_strlen(src);
1801 }
1802 if(srcLength>0) {
1803 srcLimit=src+srcLength;
1804 destLimit=dest+destCapacity;
1805
1806 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1807 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1808 destLimit=(UChar *)U_MAX_PTR(dest);
1809 }
1810
1811 /* perform the conversion */
1812 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1813 destLength=(int32_t)(dest-originalDest);
1814
1815 /* if an overflow occurs, then get the preflighting length */
1816 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1817 {
1818 UChar buffer[1024];
1819
1820 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1821 do {
1822 dest=buffer;
1823 *pErrorCode=U_ZERO_ERROR;
1824 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1825 destLength+=(int32_t)(dest-buffer);
1826 }
1827 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1828 }
1829 } else {
1830 destLength=0;
1831 }
1832
1833 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1834 }
1835
1836 /* ucnv_getNextUChar() ------------------------------------------------------ */
1837
1838 U_CAPI UChar32 U_EXPORT2
ucnv_getNextUChar(UConverter * cnv,const char ** source,const char * sourceLimit,UErrorCode * err)1839 ucnv_getNextUChar(UConverter *cnv,
1840 const char **source, const char *sourceLimit,
1841 UErrorCode *err) {
1842 UConverterToUnicodeArgs args;
1843 UChar buffer[U16_MAX_LENGTH];
1844 const char *s;
1845 UChar32 c;
1846 int32_t i, length;
1847
1848 /* check parameters */
1849 if(err==NULL || U_FAILURE(*err)) {
1850 return 0xffff;
1851 }
1852
1853 if(cnv==NULL || source==NULL) {
1854 *err=U_ILLEGAL_ARGUMENT_ERROR;
1855 return 0xffff;
1856 }
1857
1858 s=*source;
1859 if(sourceLimit<s) {
1860 *err=U_ILLEGAL_ARGUMENT_ERROR;
1861 return 0xffff;
1862 }
1863
1864 /*
1865 * Make sure that the buffer sizes do not exceed the number range for
1866 * int32_t because some functions use the size (in units or bytes)
1867 * rather than comparing pointers, and because offsets are int32_t values.
1868 *
1869 * size_t is guaranteed to be unsigned and large enough for the job.
1870 *
1871 * Return with an error instead of adjusting the limits because we would
1872 * not be able to maintain the semantics that either the source must be
1873 * consumed or the target filled (unless an error occurs).
1874 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1875 */
1876 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1877 *err=U_ILLEGAL_ARGUMENT_ERROR;
1878 return 0xffff;
1879 }
1880
1881 c=U_SENTINEL;
1882
1883 /* flush the target overflow buffer */
1884 if(cnv->UCharErrorBufferLength>0) {
1885 UChar *overflow;
1886
1887 overflow=cnv->UCharErrorBuffer;
1888 i=0;
1889 length=cnv->UCharErrorBufferLength;
1890 U16_NEXT(overflow, i, length, c);
1891
1892 /* move the remaining overflow contents up to the beginning */
1893 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1894 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1895 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1896 }
1897
1898 if(!U16_IS_LEAD(c) || i<length) {
1899 return c;
1900 }
1901 /*
1902 * Continue if the overflow buffer contained only a lead surrogate,
1903 * in case the converter outputs single surrogates from complete
1904 * input sequences.
1905 */
1906 }
1907
1908 /*
1909 * flush==TRUE is implied for ucnv_getNextUChar()
1910 *
1911 * do not simply return even if s==sourceLimit because the converter may
1912 * not have seen flush==TRUE before
1913 */
1914
1915 /* prepare the converter arguments */
1916 args.converter=cnv;
1917 args.flush=TRUE;
1918 args.offsets=NULL;
1919 args.source=s;
1920 args.sourceLimit=sourceLimit;
1921 args.target=buffer;
1922 args.targetLimit=buffer+1;
1923 args.size=sizeof(args);
1924
1925 if(c<0) {
1926 /*
1927 * call the native getNextUChar() implementation if we are
1928 * at a character boundary (toULength==0)
1929 *
1930 * unlike with _toUnicode(), getNextUChar() implementations must set
1931 * U_TRUNCATED_CHAR_FOUND for truncated input,
1932 * in addition to setting toULength/toUBytes[]
1933 */
1934 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1935 c=cnv->sharedData->impl->getNextUChar(&args, err);
1936 *source=s=args.source;
1937 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1938 /* reset the converter without calling the callback function */
1939 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1940 return 0xffff; /* no output */
1941 } else if(U_SUCCESS(*err) && c>=0) {
1942 return c;
1943 /*
1944 * else fall through to use _toUnicode() because
1945 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1946 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1947 */
1948 }
1949 }
1950
1951 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1952 _toUnicodeWithCallback(&args, err);
1953
1954 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1955 *err=U_ZERO_ERROR;
1956 }
1957
1958 i=0;
1959 length=(int32_t)(args.target-buffer);
1960 } else {
1961 /* write the lead surrogate from the overflow buffer */
1962 buffer[0]=(UChar)c;
1963 args.target=buffer+1;
1964 i=0;
1965 length=1;
1966 }
1967
1968 /* buffer contents starts at i and ends before length */
1969
1970 if(U_FAILURE(*err)) {
1971 c=0xffff; /* no output */
1972 } else if(length==0) {
1973 /* no input or only state changes */
1974 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1975 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1976 c=0xffff; /* no output */
1977 } else {
1978 c=buffer[0];
1979 i=1;
1980 if(!U16_IS_LEAD(c)) {
1981 /* consume c=buffer[0], done */
1982 } else {
1983 /* got a lead surrogate, see if a trail surrogate follows */
1984 UChar c2;
1985
1986 if(cnv->UCharErrorBufferLength>0) {
1987 /* got overflow output from the conversion */
1988 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1989 /* got a trail surrogate, too */
1990 c=U16_GET_SUPPLEMENTARY(c, c2);
1991
1992 /* move the remaining overflow contents up to the beginning */
1993 if((--cnv->UCharErrorBufferLength)>0) {
1994 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1995 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1996 }
1997 } else {
1998 /* c is an unpaired lead surrogate, just return it */
1999 }
2000 } else if(args.source<sourceLimit) {
2001 /* convert once more, to buffer[1] */
2002 args.targetLimit=buffer+2;
2003 _toUnicodeWithCallback(&args, err);
2004 if(*err==U_BUFFER_OVERFLOW_ERROR) {
2005 *err=U_ZERO_ERROR;
2006 }
2007
2008 length=(int32_t)(args.target-buffer);
2009 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2010 /* got a trail surrogate, too */
2011 c=U16_GET_SUPPLEMENTARY(c, c2);
2012 i=2;
2013 }
2014 }
2015 }
2016 }
2017
2018 /*
2019 * move leftover output from buffer[i..length[
2020 * into the beginning of the overflow buffer
2021 */
2022 if(i<length) {
2023 /* move further overflow back */
2024 int32_t delta=length-i;
2025 if((length=cnv->UCharErrorBufferLength)>0) {
2026 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2027 length*U_SIZEOF_UCHAR);
2028 }
2029 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2030
2031 cnv->UCharErrorBuffer[0]=buffer[i++];
2032 if(delta>1) {
2033 cnv->UCharErrorBuffer[1]=buffer[i];
2034 }
2035 }
2036
2037 *source=args.source;
2038 return c;
2039 }
2040
2041 /* ucnv_convert() and siblings ---------------------------------------------- */
2042
2043 U_CAPI void U_EXPORT2
ucnv_convertEx(UConverter * targetCnv,UConverter * sourceCnv,char ** target,const char * targetLimit,const char ** source,const char * sourceLimit,UChar * pivotStart,UChar ** pivotSource,UChar ** pivotTarget,const UChar * pivotLimit,UBool reset,UBool flush,UErrorCode * pErrorCode)2044 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2045 char **target, const char *targetLimit,
2046 const char **source, const char *sourceLimit,
2047 UChar *pivotStart, UChar **pivotSource,
2048 UChar **pivotTarget, const UChar *pivotLimit,
2049 UBool reset, UBool flush,
2050 UErrorCode *pErrorCode) {
2051 UChar pivotBuffer[CHUNK_SIZE];
2052 const UChar *myPivotSource;
2053 UChar *myPivotTarget;
2054 const char *s;
2055 char *t;
2056
2057 UConverterToUnicodeArgs toUArgs;
2058 UConverterFromUnicodeArgs fromUArgs;
2059 UConverterConvert convert;
2060
2061 /* error checking */
2062 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2063 return;
2064 }
2065
2066 if( targetCnv==NULL || sourceCnv==NULL ||
2067 source==NULL || *source==NULL ||
2068 target==NULL || *target==NULL || targetLimit==NULL
2069 ) {
2070 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2071 return;
2072 }
2073
2074 s=*source;
2075 t=*target;
2076 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2077 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2078 return;
2079 }
2080
2081 /*
2082 * Make sure that the buffer sizes do not exceed the number range for
2083 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2084 */
2085 if(
2086 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2087 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2088 ) {
2089 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2090 return;
2091 }
2092
2093 if(pivotStart==NULL) {
2094 if(!flush) {
2095 /* streaming conversion requires an explicit pivot buffer */
2096 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2097 return;
2098 }
2099
2100 /* use the stack pivot buffer */
2101 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2102 pivotSource=(UChar **)&myPivotSource;
2103 pivotTarget=&myPivotTarget;
2104 pivotLimit=pivotBuffer+CHUNK_SIZE;
2105 } else if( pivotStart>=pivotLimit ||
2106 pivotSource==NULL || *pivotSource==NULL ||
2107 pivotTarget==NULL || *pivotTarget==NULL ||
2108 pivotLimit==NULL
2109 ) {
2110 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2111 return;
2112 }
2113
2114 if(sourceLimit==NULL) {
2115 /* get limit of single-byte-NUL-terminated source string */
2116 sourceLimit=uprv_strchr(*source, 0);
2117 }
2118
2119 if(reset) {
2120 ucnv_resetToUnicode(sourceCnv);
2121 ucnv_resetFromUnicode(targetCnv);
2122 *pivotSource=*pivotTarget=pivotStart;
2123 } else if(targetCnv->charErrorBufferLength>0) {
2124 /* output the targetCnv overflow buffer */
2125 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2126 /* U_BUFFER_OVERFLOW_ERROR */
2127 return;
2128 }
2129 /* *target has moved, therefore stop using t */
2130
2131 if( !flush &&
2132 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2133 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2134 ) {
2135 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2136 return;
2137 }
2138 }
2139
2140 /* Is direct-UTF-8 conversion available? */
2141 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2142 targetCnv->sharedData->impl->fromUTF8!=NULL
2143 ) {
2144 convert=targetCnv->sharedData->impl->fromUTF8;
2145 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2146 sourceCnv->sharedData->impl->toUTF8!=NULL
2147 ) {
2148 convert=sourceCnv->sharedData->impl->toUTF8;
2149 } else {
2150 convert=NULL;
2151 }
2152
2153 /*
2154 * If direct-UTF-8 conversion is available, then we use a smaller
2155 * pivot buffer for error handling and partial matches
2156 * so that we quickly return to direct conversion.
2157 *
2158 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2159 *
2160 * We could reduce the pivot buffer size further, at the cost of
2161 * buffer overflows from callbacks.
2162 * The pivot buffer should not be smaller than the maximum number of
2163 * fromUnicode extension table input UChars
2164 * (for m:n conversion, see
2165 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2166 * or 2 for surrogate pairs.
2167 *
2168 * Too small a buffer can cause thrashing between pivoting and direct
2169 * conversion, with function call overhead outweighing the benefits
2170 * of direct conversion.
2171 */
2172 if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2173 pivotLimit=pivotStart+32;
2174 }
2175
2176 /* prepare the converter arguments */
2177 fromUArgs.converter=targetCnv;
2178 fromUArgs.flush=FALSE;
2179 fromUArgs.offsets=NULL;
2180 fromUArgs.target=*target;
2181 fromUArgs.targetLimit=targetLimit;
2182 fromUArgs.size=sizeof(fromUArgs);
2183
2184 toUArgs.converter=sourceCnv;
2185 toUArgs.flush=flush;
2186 toUArgs.offsets=NULL;
2187 toUArgs.source=s;
2188 toUArgs.sourceLimit=sourceLimit;
2189 toUArgs.targetLimit=pivotLimit;
2190 toUArgs.size=sizeof(toUArgs);
2191
2192 /*
2193 * TODO: Consider separating this function into two functions,
2194 * extracting exactly the conversion loop,
2195 * for readability and to reduce the set of visible variables.
2196 *
2197 * Otherwise stop using s and t from here on.
2198 */
2199 s=t=NULL;
2200
2201 /*
2202 * conversion loop
2203 *
2204 * The sequence of steps in the loop may appear backward,
2205 * but the principle is simple:
2206 * In the chain of
2207 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2208 * empty out later buffers before refilling them from earlier ones.
2209 *
2210 * The targetCnv overflow buffer is flushed out only once before the loop.
2211 */
2212 for(;;) {
2213 /*
2214 * if(pivot not empty or error or replay or flush fromUnicode) {
2215 * fromUnicode(pivot -> target);
2216 * }
2217 *
2218 * For pivoting conversion; and for direct conversion for
2219 * error callback handling and flushing the replay buffer.
2220 */
2221 if( *pivotSource<*pivotTarget ||
2222 U_FAILURE(*pErrorCode) ||
2223 targetCnv->preFromULength<0 ||
2224 fromUArgs.flush
2225 ) {
2226 fromUArgs.source=*pivotSource;
2227 fromUArgs.sourceLimit=*pivotTarget;
2228 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2229 if(U_FAILURE(*pErrorCode)) {
2230 /* target overflow, or conversion error */
2231 *pivotSource=(UChar *)fromUArgs.source;
2232 break;
2233 }
2234
2235 /*
2236 * _fromUnicodeWithCallback() must have consumed the pivot contents
2237 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2238 */
2239 }
2240
2241 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2242 *pivotSource=*pivotTarget=pivotStart;
2243
2244 /*
2245 * if(sourceCnv overflow buffer not empty) {
2246 * move(sourceCnv overflow buffer -> pivot);
2247 * continue;
2248 * }
2249 */
2250 /* output the sourceCnv overflow buffer */
2251 if(sourceCnv->UCharErrorBufferLength>0) {
2252 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2253 /* U_BUFFER_OVERFLOW_ERROR */
2254 *pErrorCode=U_ZERO_ERROR;
2255 }
2256 continue;
2257 }
2258
2259 /*
2260 * check for end of input and break if done
2261 *
2262 * Checking both flush and fromUArgs.flush ensures that the converters
2263 * have been called with the flush flag set if the ucnv_convertEx()
2264 * caller set it.
2265 */
2266 if( toUArgs.source==sourceLimit &&
2267 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2268 (!flush || fromUArgs.flush)
2269 ) {
2270 /* done successfully */
2271 break;
2272 }
2273
2274 /*
2275 * use direct conversion if available
2276 * but not if continuing a partial match
2277 * or flushing the toUnicode replay buffer
2278 */
2279 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2280 if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2281 /* remove a warning that may be set by this function */
2282 *pErrorCode=U_ZERO_ERROR;
2283 }
2284 convert(&fromUArgs, &toUArgs, pErrorCode);
2285 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2286 break;
2287 } else if(U_FAILURE(*pErrorCode)) {
2288 if(sourceCnv->toULength>0) {
2289 /*
2290 * Fall through to calling _toUnicodeWithCallback()
2291 * for callback handling.
2292 *
2293 * The pivot buffer will be reset with
2294 * *pivotSource=*pivotTarget=pivotStart;
2295 * which indicates a toUnicode error to the caller
2296 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2297 */
2298 } else {
2299 /*
2300 * Indicate a fromUnicode error to the caller
2301 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2302 */
2303 *pivotSource=*pivotTarget=pivotStart+1;
2304 /*
2305 * Loop around to calling _fromUnicodeWithCallbacks()
2306 * for callback handling.
2307 */
2308 continue;
2309 }
2310 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2311 /*
2312 * No error, but the implementation requested to temporarily
2313 * fall back to pivoting.
2314 */
2315 *pErrorCode=U_ZERO_ERROR;
2316 /*
2317 * The following else branches are almost identical to the end-of-input
2318 * handling in _toUnicodeWithCallback().
2319 * Avoid calling it just for the end of input.
2320 */
2321 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2322 /*
2323 * the entire input stream is consumed
2324 * and there is a partial, truncated input sequence left
2325 */
2326
2327 /* inject an error and continue with callback handling */
2328 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2329 } else {
2330 /* input consumed */
2331 if(flush) {
2332 /* reset the converters without calling the callback functions */
2333 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2334 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2335 }
2336
2337 /* done successfully */
2338 break;
2339 }
2340 }
2341
2342 /*
2343 * toUnicode(source -> pivot);
2344 *
2345 * For pivoting conversion; and for direct conversion for
2346 * error callback handling, continuing partial matches
2347 * and flushing the replay buffer.
2348 *
2349 * The pivot buffer is empty and reset.
2350 */
2351 toUArgs.target=pivotStart; /* ==*pivotTarget */
2352 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2353 _toUnicodeWithCallback(&toUArgs, pErrorCode);
2354 *pivotTarget=toUArgs.target;
2355 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2356 /* pivot overflow: continue with the conversion loop */
2357 *pErrorCode=U_ZERO_ERROR;
2358 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2359 /* conversion error, or there was nothing left to convert */
2360 break;
2361 }
2362 /*
2363 * else:
2364 * _toUnicodeWithCallback() wrote into the pivot buffer,
2365 * continue with fromUnicode conversion.
2366 *
2367 * Set the fromUnicode flush flag if we flush and if toUnicode has
2368 * processed the end of the input.
2369 */
2370 if( flush && toUArgs.source==sourceLimit &&
2371 sourceCnv->preToULength>=0 &&
2372 sourceCnv->UCharErrorBufferLength==0
2373 ) {
2374 fromUArgs.flush=TRUE;
2375 }
2376 }
2377
2378 /*
2379 * The conversion loop is exited when one of the following is true:
2380 * - the entire source text has been converted successfully to the target buffer
2381 * - a target buffer overflow occurred
2382 * - a conversion error occurred
2383 */
2384
2385 *source=toUArgs.source;
2386 *target=fromUArgs.target;
2387
2388 /* terminate the target buffer if possible */
2389 if(flush && U_SUCCESS(*pErrorCode)) {
2390 if(*target!=targetLimit) {
2391 **target=0;
2392 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2393 *pErrorCode=U_ZERO_ERROR;
2394 }
2395 } else {
2396 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2397 }
2398 }
2399 }
2400
2401 /* internal implementation of ucnv_convert() etc. with preflighting */
2402 static int32_t
ucnv_internalConvert(UConverter * outConverter,UConverter * inConverter,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2403 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2404 char *target, int32_t targetCapacity,
2405 const char *source, int32_t sourceLength,
2406 UErrorCode *pErrorCode) {
2407 UChar pivotBuffer[CHUNK_SIZE];
2408 UChar *pivot, *pivot2;
2409
2410 char *myTarget;
2411 const char *sourceLimit;
2412 const char *targetLimit;
2413 int32_t targetLength=0;
2414
2415 /* set up */
2416 if(sourceLength<0) {
2417 sourceLimit=uprv_strchr(source, 0);
2418 } else {
2419 sourceLimit=source+sourceLength;
2420 }
2421
2422 /* if there is no input data, we're done */
2423 if(source==sourceLimit) {
2424 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2425 }
2426
2427 pivot=pivot2=pivotBuffer;
2428 myTarget=target;
2429 targetLength=0;
2430
2431 if(targetCapacity>0) {
2432 /* perform real conversion */
2433 targetLimit=target+targetCapacity;
2434 ucnv_convertEx(outConverter, inConverter,
2435 &myTarget, targetLimit,
2436 &source, sourceLimit,
2437 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2438 FALSE,
2439 TRUE,
2440 pErrorCode);
2441 targetLength=(int32_t)(myTarget-target);
2442 }
2443
2444 /*
2445 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2446 * to it but continue the conversion in order to store in targetCapacity
2447 * the number of bytes that was required.
2448 */
2449 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2450 {
2451 char targetBuffer[CHUNK_SIZE];
2452
2453 targetLimit=targetBuffer+CHUNK_SIZE;
2454 do {
2455 *pErrorCode=U_ZERO_ERROR;
2456 myTarget=targetBuffer;
2457 ucnv_convertEx(outConverter, inConverter,
2458 &myTarget, targetLimit,
2459 &source, sourceLimit,
2460 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2461 FALSE,
2462 TRUE,
2463 pErrorCode);
2464 targetLength+=(int32_t)(myTarget-targetBuffer);
2465 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2466
2467 /* done with preflighting, set warnings and errors as appropriate */
2468 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2469 }
2470
2471 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2472 return targetLength;
2473 }
2474
2475 U_CAPI int32_t U_EXPORT2
ucnv_convert(const char * toConverterName,const char * fromConverterName,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2476 ucnv_convert(const char *toConverterName, const char *fromConverterName,
2477 char *target, int32_t targetCapacity,
2478 const char *source, int32_t sourceLength,
2479 UErrorCode *pErrorCode) {
2480 UConverter in, out; /* stack-allocated */
2481 UConverter *inConverter, *outConverter;
2482 int32_t targetLength;
2483
2484 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2485 return 0;
2486 }
2487
2488 if( source==NULL || sourceLength<-1 ||
2489 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2490 ) {
2491 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2492 return 0;
2493 }
2494
2495 /* if there is no input data, we're done */
2496 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2497 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2498 }
2499
2500 /* create the converters */
2501 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2502 if(U_FAILURE(*pErrorCode)) {
2503 return 0;
2504 }
2505
2506 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2507 if(U_FAILURE(*pErrorCode)) {
2508 ucnv_close(inConverter);
2509 return 0;
2510 }
2511
2512 targetLength=ucnv_internalConvert(outConverter, inConverter,
2513 target, targetCapacity,
2514 source, sourceLength,
2515 pErrorCode);
2516
2517 ucnv_close(inConverter);
2518 ucnv_close(outConverter);
2519
2520 return targetLength;
2521 }
2522
2523 /* @internal */
2524 static int32_t
ucnv_convertAlgorithmic(UBool convertToAlgorithmic,UConverterType algorithmicType,UConverter * cnv,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2525 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2526 UConverterType algorithmicType,
2527 UConverter *cnv,
2528 char *target, int32_t targetCapacity,
2529 const char *source, int32_t sourceLength,
2530 UErrorCode *pErrorCode) {
2531 UConverter algoConverterStatic; /* stack-allocated */
2532 UConverter *algoConverter, *to, *from;
2533 int32_t targetLength;
2534
2535 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2536 return 0;
2537 }
2538
2539 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2540 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2541 ) {
2542 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2543 return 0;
2544 }
2545
2546 /* if there is no input data, we're done */
2547 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2548 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2549 }
2550
2551 /* create the algorithmic converter */
2552 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2553 "", 0, pErrorCode);
2554 if(U_FAILURE(*pErrorCode)) {
2555 return 0;
2556 }
2557
2558 /* reset the other converter */
2559 if(convertToAlgorithmic) {
2560 /* cnv->Unicode->algo */
2561 ucnv_resetToUnicode(cnv);
2562 to=algoConverter;
2563 from=cnv;
2564 } else {
2565 /* algo->Unicode->cnv */
2566 ucnv_resetFromUnicode(cnv);
2567 from=algoConverter;
2568 to=cnv;
2569 }
2570
2571 targetLength=ucnv_internalConvert(to, from,
2572 target, targetCapacity,
2573 source, sourceLength,
2574 pErrorCode);
2575
2576 ucnv_close(algoConverter);
2577
2578 return targetLength;
2579 }
2580
2581 U_CAPI int32_t U_EXPORT2
ucnv_toAlgorithmic(UConverterType algorithmicType,UConverter * cnv,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2582 ucnv_toAlgorithmic(UConverterType algorithmicType,
2583 UConverter *cnv,
2584 char *target, int32_t targetCapacity,
2585 const char *source, int32_t sourceLength,
2586 UErrorCode *pErrorCode) {
2587 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2588 target, targetCapacity,
2589 source, sourceLength,
2590 pErrorCode);
2591 }
2592
2593 U_CAPI int32_t U_EXPORT2
ucnv_fromAlgorithmic(UConverter * cnv,UConverterType algorithmicType,char * target,int32_t targetCapacity,const char * source,int32_t sourceLength,UErrorCode * pErrorCode)2594 ucnv_fromAlgorithmic(UConverter *cnv,
2595 UConverterType algorithmicType,
2596 char *target, int32_t targetCapacity,
2597 const char *source, int32_t sourceLength,
2598 UErrorCode *pErrorCode) {
2599 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2600 target, targetCapacity,
2601 source, sourceLength,
2602 pErrorCode);
2603 }
2604
2605 U_CAPI UConverterType U_EXPORT2
ucnv_getType(const UConverter * converter)2606 ucnv_getType(const UConverter* converter)
2607 {
2608 int8_t type = converter->sharedData->staticData->conversionType;
2609 #if !UCONFIG_NO_LEGACY_CONVERSION
2610 if(type == UCNV_MBCS) {
2611 return ucnv_MBCSGetType(converter);
2612 }
2613 #endif
2614 return (UConverterType)type;
2615 }
2616
2617 U_CAPI void U_EXPORT2
ucnv_getStarters(const UConverter * converter,UBool starters[256],UErrorCode * err)2618 ucnv_getStarters(const UConverter* converter,
2619 UBool starters[256],
2620 UErrorCode* err)
2621 {
2622 if (err == NULL || U_FAILURE(*err)) {
2623 return;
2624 }
2625
2626 if(converter->sharedData->impl->getStarters != NULL) {
2627 converter->sharedData->impl->getStarters(converter, starters, err);
2628 } else {
2629 *err = U_ILLEGAL_ARGUMENT_ERROR;
2630 }
2631 }
2632
ucnv_getAmbiguous(const UConverter * cnv)2633 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2634 {
2635 UErrorCode errorCode;
2636 const char *name;
2637 int32_t i;
2638
2639 if(cnv==NULL) {
2640 return NULL;
2641 }
2642
2643 errorCode=U_ZERO_ERROR;
2644 name=ucnv_getName(cnv, &errorCode);
2645 if(U_FAILURE(errorCode)) {
2646 return NULL;
2647 }
2648
2649 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2650 {
2651 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2652 {
2653 return ambiguousConverters+i;
2654 }
2655 }
2656
2657 return NULL;
2658 }
2659
2660 U_CAPI void U_EXPORT2
ucnv_fixFileSeparator(const UConverter * cnv,UChar * source,int32_t sourceLength)2661 ucnv_fixFileSeparator(const UConverter *cnv,
2662 UChar* source,
2663 int32_t sourceLength) {
2664 const UAmbiguousConverter *a;
2665 int32_t i;
2666 UChar variant5c;
2667
2668 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2669 {
2670 return;
2671 }
2672
2673 variant5c=a->variant5c;
2674 for(i=0; i<sourceLength; ++i) {
2675 if(source[i]==variant5c) {
2676 source[i]=0x5c;
2677 }
2678 }
2679 }
2680
2681 U_CAPI UBool U_EXPORT2
ucnv_isAmbiguous(const UConverter * cnv)2682 ucnv_isAmbiguous(const UConverter *cnv) {
2683 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2684 }
2685
2686 U_CAPI void U_EXPORT2
ucnv_setFallback(UConverter * cnv,UBool usesFallback)2687 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2688 {
2689 cnv->useFallback = usesFallback;
2690 }
2691
2692 U_CAPI UBool U_EXPORT2
ucnv_usesFallback(const UConverter * cnv)2693 ucnv_usesFallback(const UConverter *cnv)
2694 {
2695 return cnv->useFallback;
2696 }
2697
2698 U_CAPI void U_EXPORT2
ucnv_getInvalidChars(const UConverter * converter,char * errBytes,int8_t * len,UErrorCode * err)2699 ucnv_getInvalidChars (const UConverter * converter,
2700 char *errBytes,
2701 int8_t * len,
2702 UErrorCode * err)
2703 {
2704 if (err == NULL || U_FAILURE(*err))
2705 {
2706 return;
2707 }
2708 if (len == NULL || errBytes == NULL || converter == NULL)
2709 {
2710 *err = U_ILLEGAL_ARGUMENT_ERROR;
2711 return;
2712 }
2713 if (*len < converter->invalidCharLength)
2714 {
2715 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2716 return;
2717 }
2718 if ((*len = converter->invalidCharLength) > 0)
2719 {
2720 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2721 }
2722 }
2723
2724 U_CAPI void U_EXPORT2
ucnv_getInvalidUChars(const UConverter * converter,UChar * errChars,int8_t * len,UErrorCode * err)2725 ucnv_getInvalidUChars (const UConverter * converter,
2726 UChar *errChars,
2727 int8_t * len,
2728 UErrorCode * err)
2729 {
2730 if (err == NULL || U_FAILURE(*err))
2731 {
2732 return;
2733 }
2734 if (len == NULL || errChars == NULL || converter == NULL)
2735 {
2736 *err = U_ILLEGAL_ARGUMENT_ERROR;
2737 return;
2738 }
2739 if (*len < converter->invalidUCharLength)
2740 {
2741 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2742 return;
2743 }
2744 if ((*len = converter->invalidUCharLength) > 0)
2745 {
2746 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2747 }
2748 }
2749
2750 #define SIG_MAX_LEN 5
2751
2752 U_CAPI const char* U_EXPORT2
ucnv_detectUnicodeSignature(const char * source,int32_t sourceLength,int32_t * signatureLength,UErrorCode * pErrorCode)2753 ucnv_detectUnicodeSignature( const char* source,
2754 int32_t sourceLength,
2755 int32_t* signatureLength,
2756 UErrorCode* pErrorCode) {
2757 int32_t dummy;
2758
2759 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2760 * bytes we don't misdetect something
2761 */
2762 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2763 int i = 0;
2764
2765 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2766 return NULL;
2767 }
2768
2769 if(source == NULL || sourceLength < -1){
2770 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2771 return NULL;
2772 }
2773
2774 if(signatureLength == NULL) {
2775 signatureLength = &dummy;
2776 }
2777
2778 if(sourceLength==-1){
2779 sourceLength=(int32_t)uprv_strlen(source);
2780 }
2781
2782
2783 while(i<sourceLength&& i<SIG_MAX_LEN){
2784 start[i]=source[i];
2785 i++;
2786 }
2787
2788 if(start[0] == '\xFE' && start[1] == '\xFF') {
2789 *signatureLength=2;
2790 return "UTF-16BE";
2791 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2792 if(start[2] == '\x00' && start[3] =='\x00') {
2793 *signatureLength=4;
2794 return "UTF-32LE";
2795 } else {
2796 *signatureLength=2;
2797 return "UTF-16LE";
2798 }
2799 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2800 *signatureLength=3;
2801 return "UTF-8";
2802 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2803 start[2] == '\xFE' && start[3]=='\xFF') {
2804 *signatureLength=4;
2805 return "UTF-32BE";
2806 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2807 *signatureLength=3;
2808 return "SCSU";
2809 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2810 *signatureLength=3;
2811 return "BOCU-1";
2812 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2813 /*
2814 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2815 * depending on the second UTF-16 code unit.
2816 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2817 * if it occurs.
2818 *
2819 * So far we have +/v
2820 */
2821 if(start[3] == '\x38' && start[4] == '\x2D') {
2822 /* 5 bytes +/v8- */
2823 *signatureLength=5;
2824 return "UTF-7";
2825 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2826 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2827 *signatureLength=4;
2828 return "UTF-7";
2829 }
2830 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2831 *signatureLength=4;
2832 return "UTF-EBCDIC";
2833 }
2834
2835
2836 /* no known Unicode signature byte sequence recognized */
2837 *signatureLength=0;
2838 return NULL;
2839 }
2840
2841 U_CAPI int32_t U_EXPORT2
ucnv_fromUCountPending(const UConverter * cnv,UErrorCode * status)2842 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2843 {
2844 if(status == NULL || U_FAILURE(*status)){
2845 return -1;
2846 }
2847 if(cnv == NULL){
2848 *status = U_ILLEGAL_ARGUMENT_ERROR;
2849 return -1;
2850 }
2851
2852 if(cnv->preFromUFirstCP >= 0){
2853 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2854 }else if(cnv->preFromULength < 0){
2855 return -cnv->preFromULength ;
2856 }else if(cnv->fromUChar32 > 0){
2857 return 1;
2858 }
2859 return 0;
2860
2861 }
2862
2863 U_CAPI int32_t U_EXPORT2
ucnv_toUCountPending(const UConverter * cnv,UErrorCode * status)2864 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2865
2866 if(status == NULL || U_FAILURE(*status)){
2867 return -1;
2868 }
2869 if(cnv == NULL){
2870 *status = U_ILLEGAL_ARGUMENT_ERROR;
2871 return -1;
2872 }
2873
2874 if(cnv->preToULength > 0){
2875 return cnv->preToULength ;
2876 }else if(cnv->preToULength < 0){
2877 return -cnv->preToULength;
2878 }else if(cnv->toULength > 0){
2879 return cnv->toULength;
2880 }
2881 return 0;
2882 }
2883
2884 U_CAPI UBool U_EXPORT2
ucnv_isFixedWidth(UConverter * cnv,UErrorCode * status)2885 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
2886 if (U_FAILURE(*status)) {
2887 return FALSE;
2888 }
2889
2890 if (cnv == NULL) {
2891 *status = U_ILLEGAL_ARGUMENT_ERROR;
2892 return FALSE;
2893 }
2894
2895 switch (ucnv_getType(cnv)) {
2896 case UCNV_SBCS:
2897 case UCNV_DBCS:
2898 case UCNV_UTF32_BigEndian:
2899 case UCNV_UTF32_LittleEndian:
2900 case UCNV_UTF32:
2901 case UCNV_US_ASCII:
2902 return TRUE;
2903 default:
2904 return FALSE;
2905 }
2906 }
2907 #endif
2908
2909 /*
2910 * Hey, Emacs, please set the following:
2911 *
2912 * Local Variables:
2913 * indent-tabs-mode: nil
2914 * End:
2915 *
2916 */
2917