1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2000-2006, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8  *  ucnv_cb.c:
9  *  External APIs for the ICU's codeset conversion library
10  *  Helena Shih
11  *
12  * Modification History:
13  *
14  *   Date        Name        Description
15  *   7/28/2000   srl         Implementation
16  */
17 
18 /**
19  * @name Character Conversion C API
20  *
21  */
22 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_CONVERSION
26 
27 #include "unicode/ucnv_cb.h"
28 #include "ucnv_bld.h"
29 #include "ucnv_cnv.h"
30 #include "cmemory.h"
31 
32 /* need to update the offsets when the target moves. */
33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
34 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
35 the same call stack if the complexity arises. */
36 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteBytes(UConverterFromUnicodeArgs * args,const char * source,int32_t length,int32_t offsetIndex,UErrorCode * err)37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
38                        const char* source,
39                        int32_t length,
40                        int32_t offsetIndex,
41                        UErrorCode * err)
42 {
43     if(U_FAILURE(*err)) {
44         return;
45     }
46 
47     ucnv_fromUWriteBytes(
48         args->converter,
49         source, length,
50         &args->target, args->targetLimit,
51         &args->offsets, offsetIndex,
52         err);
53 }
54 
55 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs * args,const UChar ** source,const UChar * sourceLimit,int32_t offsetIndex,UErrorCode * err)56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
57                              const UChar** source,
58                              const UChar*  sourceLimit,
59                              int32_t offsetIndex,
60                              UErrorCode * err)
61 {
62     /*
63     This is a fun one.  Recursion can occur - we're basically going to
64     just retry shoving data through the same converter. Note, if you got
65     here through some kind of invalid sequence, you maybe should emit a
66     reset sequence of some kind and/or call ucnv_reset().  Since this
67     IS an actual conversion, take care that you've changed the callback
68     or the data, or you'll get an infinite loop.
69 
70     Please set the err value to something reasonable before calling
71     into this.
72     */
73 
74     char *oldTarget;
75 
76     if(U_FAILURE(*err))
77     {
78         return;
79     }
80 
81     oldTarget = args->target;
82 
83     ucnv_fromUnicode(args->converter,
84         &args->target,
85         args->targetLimit,
86         source,
87         sourceLimit,
88         NULL, /* no offsets */
89         FALSE, /* no flush */
90         err);
91 
92     if(args->offsets)
93     {
94         while (args->target != oldTarget)  /* if it moved at all.. */
95         {
96             *(args->offsets)++ = offsetIndex;
97             oldTarget++;
98         }
99     }
100 
101     /*
102     Note, if you did something like used a Stop subcallback, things would get interesting.
103     In fact, here's where we want to return the partially consumed in-source!
104     */
105     if(*err == U_BUFFER_OVERFLOW_ERROR)
106     /* && (*source < sourceLimit && args->target >= args->targetLimit)
107     -- S. Hrcek */
108     {
109         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
110         It's a fixed size. If we overflow it... Hmm */
111         char *newTarget;
112         const char *newTargetLimit;
113         UErrorCode err2 = U_ZERO_ERROR;
114 
115         int8_t errBuffLen;
116 
117         errBuffLen  = args->converter->charErrorBufferLength;
118 
119         /* start the new target at the first free slot in the errbuff.. */
120         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
121 
122         newTargetLimit = (char *)(args->converter->charErrorBuffer +
123             sizeof(args->converter->charErrorBuffer));
124 
125         if(newTarget >= newTargetLimit)
126         {
127             *err = U_INTERNAL_PROGRAM_ERROR;
128             return;
129         }
130 
131         /* We're going to tell the converter that the errbuff len is empty.
132         This prevents the existing errbuff from being 'flushed' out onto
133         itself.  If the errbuff is needed by the converter this time,
134         we're hosed - we're out of space! */
135 
136         args->converter->charErrorBufferLength = 0;
137 
138         ucnv_fromUnicode(args->converter,
139                          &newTarget,
140                          newTargetLimit,
141                          source,
142                          sourceLimit,
143                          NULL,
144                          FALSE,
145                          &err2);
146 
147         /* We can go ahead and overwrite the  length here. We know just how
148         to recalculate it. */
149 
150         args->converter->charErrorBufferLength = (int8_t)(
151             newTarget - (char*)args->converter->charErrorBuffer);
152 
153         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
154         {
155             /* now we're REALLY in trouble.
156             Internal program error - callback shouldn't have written this much
157             data!
158             */
159             *err = U_INTERNAL_PROGRAM_ERROR;
160             return;
161         }
162         /*else {*/
163             /* sub errs could be invalid/truncated/illegal chars or w/e.
164             These might want to be passed on up.. But the problem is, we already
165             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
166             other errs.. */
167 
168             /*
169             if(U_FAILURE(err2))
170             ??
171             */
172         /*}*/
173     }
174 }
175 
176 U_CAPI void  U_EXPORT2
ucnv_cbFromUWriteSub(UConverterFromUnicodeArgs * args,int32_t offsetIndex,UErrorCode * err)177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
178                            int32_t offsetIndex,
179                            UErrorCode * err)
180 {
181     UConverter *converter;
182     int32_t length;
183 
184     if(U_FAILURE(*err)) {
185         return;
186     }
187     converter = args->converter;
188     length = converter->subCharLen;
189 
190     if(length == 0) {
191         return;
192     }
193 
194     if(length < 0) {
195         /*
196          * Write/convert the substitution string. Its real length is -length.
197          * Unlike the escape callback, we need not change the converter's
198          * callback function because ucnv_setSubstString() verified that
199          * the string can be converted, so we will not get a conversion error
200          * and will not recurse.
201          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
202          */
203         const UChar *source = (const UChar *)converter->subChars;
204         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
205         return;
206     }
207 
208     if(converter->sharedData->impl->writeSub!=NULL) {
209         converter->sharedData->impl->writeSub(args, offsetIndex, err);
210     }
211     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
212         /*
213         TODO: Is this untestable because the MBCS converter has a writeSub function to call
214         and the other converters don't use subChar1?
215         */
216         ucnv_cbFromUWriteBytes(args,
217                                (const char *)&converter->subChar1, 1,
218                                offsetIndex, err);
219     }
220     else {
221         ucnv_cbFromUWriteBytes(args,
222                                (const char *)converter->subChars, length,
223                                offsetIndex, err);
224     }
225 }
226 
227 U_CAPI void  U_EXPORT2
ucnv_cbToUWriteUChars(UConverterToUnicodeArgs * args,const UChar * source,int32_t length,int32_t offsetIndex,UErrorCode * err)228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
229                             const UChar* source,
230                             int32_t length,
231                             int32_t offsetIndex,
232                             UErrorCode * err)
233 {
234     if(U_FAILURE(*err)) {
235         return;
236     }
237 
238     ucnv_toUWriteUChars(
239         args->converter,
240         source, length,
241         &args->target, args->targetLimit,
242         &args->offsets, offsetIndex,
243         err);
244 }
245 
246 U_CAPI void  U_EXPORT2
ucnv_cbToUWriteSub(UConverterToUnicodeArgs * args,int32_t offsetIndex,UErrorCode * err)247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
248                          int32_t offsetIndex,
249                        UErrorCode * err)
250 {
251     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
252 
253     /* could optimize this case, just one uchar */
254     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
255         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
256     } else {
257         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
258     }
259 }
260 
261 #endif
262