1 /*
2 **********************************************************************
3 *   Copyright (C) 2000-2006, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6  *  ucnv_cb.c:
7  *  External APIs for the ICU's codeset conversion library
8  *  Helena Shih
9  *
10  * Modification History:
11  *
12  *   Date        Name        Description
13  *   7/28/2000   srl         Implementation
14  */
15 
16 /**
17  * @name Character Conversion C API
18  *
19  */
20 
21 #include "unicode/utypes.h"
22 
23 #if !UCONFIG_NO_CONVERSION
24 
25 #include "unicode/ucnv_cb.h"
26 #include "ucnv_bld.h"
27 #include "ucnv_cnv.h"
28 #include "cmemory.h"
29 
30 /* need to update the offsets when the target moves. */
31 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
32 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
33 the same call stack if the complexity arises. */
34 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteBytes(UConverterFromUnicodeArgs * args,const char * source,int32_t length,int32_t offsetIndex,UErrorCode * err)35 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
36                        const char* source,
37                        int32_t length,
38                        int32_t offsetIndex,
39                        UErrorCode * err)
40 {
41     if(U_FAILURE(*err)) {
42         return;
43     }
44 
45     ucnv_fromUWriteBytes(
46         args->converter,
47         source, length,
48         &args->target, args->targetLimit,
49         &args->offsets, offsetIndex,
50         err);
51 }
52 
53 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs * args,const UChar ** source,const UChar * sourceLimit,int32_t offsetIndex,UErrorCode * err)54 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
55                              const UChar** source,
56                              const UChar*  sourceLimit,
57                              int32_t offsetIndex,
58                              UErrorCode * err)
59 {
60     /*
61     This is a fun one.  Recursion can occur - we're basically going to
62     just retry shoving data through the same converter. Note, if you got
63     here through some kind of invalid sequence, you maybe should emit a
64     reset sequence of some kind and/or call ucnv_reset().  Since this
65     IS an actual conversion, take care that you've changed the callback
66     or the data, or you'll get an infinite loop.
67 
68     Please set the err value to something reasonable before calling
69     into this.
70     */
71 
72     char *oldTarget;
73 
74     if(U_FAILURE(*err))
75     {
76         return;
77     }
78 
79     oldTarget = args->target;
80 
81     ucnv_fromUnicode(args->converter,
82         &args->target,
83         args->targetLimit,
84         source,
85         sourceLimit,
86         NULL, /* no offsets */
87         FALSE, /* no flush */
88         err);
89 
90     if(args->offsets)
91     {
92         while (args->target != oldTarget)  /* if it moved at all.. */
93         {
94             *(args->offsets)++ = offsetIndex;
95             oldTarget++;
96         }
97     }
98 
99     /*
100     Note, if you did something like used a Stop subcallback, things would get interesting.
101     In fact, here's where we want to return the partially consumed in-source!
102     */
103     if(*err == U_BUFFER_OVERFLOW_ERROR)
104     /* && (*source < sourceLimit && args->target >= args->targetLimit)
105     -- S. Hrcek */
106     {
107         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
108         It's a fixed size. If we overflow it... Hmm */
109         char *newTarget;
110         const char *newTargetLimit;
111         UErrorCode err2 = U_ZERO_ERROR;
112 
113         int8_t errBuffLen;
114 
115         errBuffLen  = args->converter->charErrorBufferLength;
116 
117         /* start the new target at the first free slot in the errbuff.. */
118         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
119 
120         newTargetLimit = (char *)(args->converter->charErrorBuffer +
121             sizeof(args->converter->charErrorBuffer));
122 
123         if(newTarget >= newTargetLimit)
124         {
125             *err = U_INTERNAL_PROGRAM_ERROR;
126             return;
127         }
128 
129         /* We're going to tell the converter that the errbuff len is empty.
130         This prevents the existing errbuff from being 'flushed' out onto
131         itself.  If the errbuff is needed by the converter this time,
132         we're hosed - we're out of space! */
133 
134         args->converter->charErrorBufferLength = 0;
135 
136         ucnv_fromUnicode(args->converter,
137                          &newTarget,
138                          newTargetLimit,
139                          source,
140                          sourceLimit,
141                          NULL,
142                          FALSE,
143                          &err2);
144 
145         /* We can go ahead and overwrite the  length here. We know just how
146         to recalculate it. */
147 
148         args->converter->charErrorBufferLength = (int8_t)(
149             newTarget - (char*)args->converter->charErrorBuffer);
150 
151         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
152         {
153             /* now we're REALLY in trouble.
154             Internal program error - callback shouldn't have written this much
155             data!
156             */
157             *err = U_INTERNAL_PROGRAM_ERROR;
158             return;
159         }
160         /*else {*/
161             /* sub errs could be invalid/truncated/illegal chars or w/e.
162             These might want to be passed on up.. But the problem is, we already
163             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
164             other errs.. */
165 
166             /*
167             if(U_FAILURE(err2))
168             ??
169             */
170         /*}*/
171     }
172 }
173 
174 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteSub(UConverterFromUnicodeArgs * args,int32_t offsetIndex,UErrorCode * err)175 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
176                            int32_t offsetIndex,
177                            UErrorCode * err)
178 {
179     UConverter *converter;
180     int32_t length;
181 
182     if(U_FAILURE(*err)) {
183         return;
184     }
185     converter = args->converter;
186     length = converter->subCharLen;
187 
188     if(length == 0) {
189         return;
190     }
191 
192     if(length < 0) {
193         /*
194          * Write/convert the substitution string. Its real length is -length.
195          * Unlike the escape callback, we need not change the converter's
196          * callback function because ucnv_setSubstString() verified that
197          * the string can be converted, so we will not get a conversion error
198          * and will not recurse.
199          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
200          */
201         const UChar *source = (const UChar *)converter->subChars;
202         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
203         return;
204     }
205 
206     if(converter->sharedData->impl->writeSub!=NULL) {
207         converter->sharedData->impl->writeSub(args, offsetIndex, err);
208     }
209     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
210         /*
211         TODO: Is this untestable because the MBCS converter has a writeSub function to call
212         and the other converters don't use subChar1?
213         */
214         ucnv_cbFromUWriteBytes(args,
215                                (const char *)&converter->subChar1, 1,
216                                offsetIndex, err);
217     }
218     else {
219         ucnv_cbFromUWriteBytes(args,
220                                (const char *)converter->subChars, length,
221                                offsetIndex, err);
222     }
223 }
224 
225 U_CAPI void  U_EXPORT2
ucnv_cbToUWriteUChars(UConverterToUnicodeArgs * args,const UChar * source,int32_t length,int32_t offsetIndex,UErrorCode * err)226 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
227                             const UChar* source,
228                             int32_t length,
229                             int32_t offsetIndex,
230                             UErrorCode * err)
231 {
232     if(U_FAILURE(*err)) {
233         return;
234     }
235 
236     ucnv_toUWriteUChars(
237         args->converter,
238         source, length,
239         &args->target, args->targetLimit,
240         &args->offsets, offsetIndex,
241         err);
242 }
243 
244 U_CAPI void  U_EXPORT2
ucnv_cbToUWriteSub(UConverterToUnicodeArgs * args,int32_t offsetIndex,UErrorCode * err)245 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
246                          int32_t offsetIndex,
247                        UErrorCode * err)
248 {
249     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
250 
251     /* could optimize this case, just one uchar */
252     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
253         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
254     } else {
255         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
256     }
257 }
258 
259 #endif
260