1 /**
2 ***     Transcoding support and wrappers.
3 ***
4 ***     See Copyright for the status of this software.
5 ***
6 ***     Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
7 **/
8 
9 #define IN_LIBXML
10 #include "libxml.h"
11 
12 #include <sys/types.h>
13 #include <iconv.h>
14 #include "libxml/xmlmemory.h"
15 #include "libxml/dict.h"
16 #include "transcode.h"
17 
18 
19 /**
20 ***     Destroy a dictionary and mark as destroyed.
21 **/
22 
23 void
xmlZapDict(xmlDictPtr * dict)24 xmlZapDict(xmlDictPtr * dict)
25 
26 {
27         if (dict && *dict) {
28                 xmlDictFree(*dict);
29                 *dict = (xmlDictPtr) NULL;
30                 }
31 }
32 
33 
34 /**
35 ***     Support for inline conversion from/to UTF-8.
36 ***     This is targetted to function parameter encoding conversion.
37 ***     Method is:
38 ***     -       Convert string from/to UTF-8.
39 ***     -       Keep it in a dictionary.
40 ***     -       Free original string if a release procedure is provided.
41 ***     Can also be called without dictionary to convert a string from/to UTF-8
42 ***             into xmlMalloc'ed dynamic storage.
43 **/
44 
45 const char *
xmlTranscodeResult(const xmlChar * s,const char * encoding,xmlDictPtr * dict,void (* freeproc)(const void *))46 xmlTranscodeResult(const xmlChar * s, const char * encoding,
47                         xmlDictPtr * dict, void (*freeproc)(const void *))
48 
49 {
50         size_t l;
51         iconv_t cd;
52         char * srcp;
53         char * dstp;
54         size_t srcc;
55         size_t dstc;
56         char * ts;
57         const char * ret;
58         int err;
59         static const int nullstring[] = { 0 };
60 
61         /* Convert from UTF-8. */
62 
63         if (!s)
64                 return (const char *) NULL;
65 
66         ret = (const char *) NULL;
67         ts = (char *) NULL;
68         err = 0;
69         l = xmlStrlen(s);
70 
71         if (!l && dict)
72                 ret = (const char *) nullstring;
73         else {
74                 if (dict && !*dict)
75                         err = !(*dict = xmlDictCreate());
76 
77                 if (!err)
78                         err = !(ts = xmlMalloc(4 * l + 4));
79 
80                 dstp = ts;
81                 dstc = 4 * l;
82 
83                 if (!err && l) {
84                         if (!encoding)
85                                 encoding = "ibm-0";     /* Job's encoding. */
86 
87                         cd = iconv_open(encoding, "UTF-8");
88 
89                         if (cd == (iconv_t) -1)
90                                 err = 1;
91                         else {
92                                 srcp = (char *) s;
93                                 srcc = l;
94                                 srcc = iconv(cd, &srcp, &srcc, &dstp, &dstc);
95                                 iconv_close(cd);
96                                 err = srcc == (size_t) -1;
97                                 }
98                         }
99 
100                 if (!err) {
101                         dstp[0] = dstp[1] = dstp[2] = dstp[3] = '\0';
102 
103                         if (!dict) {
104                                 if (dstc)
105                                         ts = xmlRealloc(ts, (dstp - ts) + 4);
106 
107                                 ret = (const char *) ts;
108                                 ts = (char *) NULL;
109                                 }
110                         else
111                                 ret = (char *) xmlDictLookup(*dict,
112                                     (xmlChar *) ts, dstp - ts + 1);
113                         }
114                 }
115 
116         if (ts)
117                 xmlFree(ts);
118 
119         if (freeproc)
120                 (*freeproc)(s);
121 
122         return ret;
123 }
124 
125 
126 /**
127 ***     Support for inline conversion to UTF-8.
128 ***     Method is:
129 ***     -       Convert string to UTF-8.
130 ***     -       Keep it in a dictionary.
131 ***     Can also be called without dictionary to convert a string to UTF-8 into
132 ***             xmlMalloc'ed dynamic storage.
133 **/
134 
135 static const xmlChar *
inTranscode(const char * s,size_t l,const char * encoding,xmlDictPtr * dict)136 inTranscode(const char * s, size_t l, const char * encoding, xmlDictPtr * dict)
137 
138 {
139         iconv_t cd;
140         char * srcp;
141         char * dstp;
142         size_t srcc;
143         size_t dstc;
144         xmlChar * ts;
145         const xmlChar * ret;
146         static const xmlChar nullstring[] = { 0 };
147 
148         if (!l && dict)
149                 return nullstring;
150 
151         if (dict && !*dict)
152                 if (!(*dict = xmlDictCreate()))
153                         return (const xmlChar *) NULL;
154 
155         ts = (xmlChar *) xmlMalloc(6 * l + 1);
156 
157         if (!ts)
158                 return (const xmlChar *) NULL;
159 
160         dstp = (char *) ts;
161         dstc = 6 * l;
162 
163         if (l) {
164                 if (!encoding)
165                         encoding = "ibm-0";     /* Use job's encoding. */
166 
167                 cd = iconv_open("UTF-8", encoding);
168 
169                 if (cd == (iconv_t) -1) {
170                         xmlFree((char *) ts);
171                         return (const xmlChar *) NULL;
172                         }
173 
174                 srcp = (char *) s;
175                 srcc = l;
176                 srcc = iconv(cd, &srcp, &srcc, &dstp, &dstc);
177                 iconv_close(cd);
178 
179                 if (srcc == (size_t) -1) {
180                         xmlFree((char *) ts);
181                         return (const xmlChar *) NULL;
182                         }
183                 }
184 
185         *dstp = '\0';
186 
187         if (!dict) {
188                 if (dstc)
189                         ts = xmlRealloc(ts, (dstp - ts) + 1);
190 
191                 return ts;
192                 }
193 
194         ret = xmlDictLookup(*dict, ts, dstp - ts + 1);
195         xmlFree((char *) ts);
196         return ret;
197 }
198 
199 
200 /**
201 ***     Input 8-bit character string parameter.
202 **/
203 
204 const xmlChar *
xmlTranscodeString(const char * s,const char * encoding,xmlDictPtr * dict)205 xmlTranscodeString(const char * s, const char * encoding, xmlDictPtr * dict)
206 
207 {
208         if (!s)
209                 return (const xmlChar *) NULL;
210 
211         return inTranscode(s, xmlStrlen(s), encoding, dict);
212 }
213 
214 
215 /**
216 ***     Input 16-bit character string parameter.
217 **/
218 
219 const xmlChar *
xmlTranscodeWString(const char * s,const char * encoding,xmlDictPtr * dict)220 xmlTranscodeWString(const char * s, const char * encoding, xmlDictPtr * dict)
221 
222 {
223         size_t i;
224 
225         if (!s)
226                 return (const xmlChar *) NULL;
227 
228         for (i = 0; s[i] && s[i + 1]; i += 2)
229                 ;
230 
231         return inTranscode(s, i, encoding, dict);
232 }
233 
234 
235 /**
236 ***     Input 32-bit character string parameter.
237 **/
238 
239 const xmlChar *
xmlTranscodeHString(const char * s,const char * encoding,xmlDictPtr * dict)240 xmlTranscodeHString(const char * s, const char * encoding, xmlDictPtr * dict)
241 
242 {
243         size_t i;
244 
245         if (!s)
246                 return (const xmlChar *) NULL;
247 
248         for (i = 0; s[i] && s[i + 1] && s[i + 2] && s[i + 3]; i += 4)
249                 ;
250 
251         return inTranscode(s, i, encoding, dict);
252 }
253 
254 
255 /**
256 ***     vasprintf() implementation with result transcoding.
257 **/
258 
259 const char *
xmlVasprintf(xmlDictPtr * dict,const char * encoding,const xmlChar * fmt,va_list args)260 xmlVasprintf(xmlDictPtr * dict, const char * encoding,
261                                         const xmlChar * fmt, va_list args)
262 
263 {
264         char * s = NULL;
265 
266         vasprintf(&s, fmt, args);
267         return xmlTranscodeResult((const xmlChar *) s, encoding, dict, free);
268 }
269