1      * Summary: interface for the encoding conversion functions
2      * Description: interface for the encoding conversion functions needed for
3      *              XML basic encoding and iconv() support.
4      *
5      * Related specs are
6      * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
7      * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8      * [ISO-8859-1]   ISO Latin-1 characters codes.
9      * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10      *                Worldwide Character Encoding -- Version 1.0", Addison-
11      *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12      *                described in Unicode Technical Report #4.
13      * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14      *                Information Interchange, ANSI X3.4-1986.
15      *
16      * Copy: See Copyright for the status of this software.
17      *
18      * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
19
20      /if not defined(XML_CHAR_ENCODING_H__)
21      /define XML_CHAR_ENCODING_H__
22
23      /include "libxmlrpg/xmlversion"
24      /include "libxmlrpg/xmlTypesC"
25
26      * xmlCharEncoding:
27      *
28      * Predefined values for some standard encodings.
29      * Libxml does not do beforehand translation on UTF8 and ISOLatinX.
30      * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
31      *
32      * Anything else would have to be translated to UTF8 before being
33      * given to the parser itself. The BOM for UTF16 and the encoding
34      * declaration are looked at and a converter is looked for at that
35      * point. If not found the parser stops here as asked by the XML REC. A
36      * converter can be registered by the user
37      * xmlRegisterCharEncodingHandler but the current form doesn't allow
38      * stateful transcoding (a serious problem agreed !). If iconv has been
39      * found it will be used automatically and allow stateful transcoding,
40      * the simplest is then to be sure to enable iconv and to provide iconv
41      * libs for the encoding support needed.
42      *
43      * Note that the generic "UTF-16" is not a predefined value.  Instead, only
44      * the specific UTF-16LE and UTF-16BE are present.
45
46     d xmlCharEncoding...
47     d                 s                   based(######typedef######)
48     d                                     like(xmlCenum)
49     d  XML_CHAR_ENCODING_ERROR...                                              No encoding detected
50     d                 c                   -1
51     d  XML_CHAR_ENCODING_NONE...                                               No encoding detected
52     d                 c                   0
53     d  XML_CHAR_ENCODING_UTF8...                                               UTF-8
54     d                 c                   1
55     d  XML_CHAR_ENCODING_UTF16LE...                                            UTF-16 little endian
56     d                 c                   2
57     d  XML_CHAR_ENCODING_UTF16BE...                                            UTF-16 big endian
58     d                 c                   3
59     d  XML_CHAR_ENCODING_UCS4LE...                                             UCS-4 little endian
60     d                 c                   4
61     d  XML_CHAR_ENCODING_UCS4BE...                                             UCS-4 big endian
62     d                 c                   5
63     d  XML_CHAR_ENCODING_EBCDIC...                                             EBCDIC uh!
64     d                 c                   6
65     d  XML_CHAR_ENCODING_UCS4_2143...                                          UCS-4 unusual order
66     d                 c                   7
67     d  XML_CHAR_ENCODING_UCS4_3412...                                          UCS-4 unusual order
68     d                 c                   8
69     d  XML_CHAR_ENCODING_UCS2...                                               UCS-2
70     d                 c                   9
71     d  XML_CHAR_ENCODING_8859_1...                                             ISO-8859-1 ISOLatin1
72     d                 c                   10
73     d  XML_CHAR_ENCODING_8859_2...                                             ISO-8859-2 ISOLatin2
74     d                 c                   11
75     d  XML_CHAR_ENCODING_8859_3...                                             ISO-8859-3
76     d                 c                   12
77     d  XML_CHAR_ENCODING_8859_4...                                             ISO-8859-4
78     d                 c                   13
79     d  XML_CHAR_ENCODING_8859_5...                                             ISO-8859-5
80     d                 c                   14
81     d  XML_CHAR_ENCODING_8859_6...                                             ISO-8859-6
82     d                 c                   15
83     d  XML_CHAR_ENCODING_8859_7...                                             ISO-8859-7
84     d                 c                   16
85     d  XML_CHAR_ENCODING_8859_8...                                             ISO-8859-8
86     d                 c                   17
87     d  XML_CHAR_ENCODING_8859_9...                                             ISO-8859-9
88     d                 c                   18
89     d  XML_CHAR_ENCODING_2022_JP...                                            ISO-2022-JP
90     d                 c                   19
91     d  XML_CHAR_ENCODING_SHIFT_JIS...                                          Shift_JIS
92     d                 c                   20
93     d  XML_CHAR_ENCODING_EUC_JP...                                             EUC-JP
94     d                 c                   21
95     d  XML_CHAR_ENCODING_ASCII...                                              Pure ASCII
96     d                 c                   22
97
98      * xmlCharEncodingInputFunc:
99      * @out:  a pointer to an array of bytes to store the UTF-8 result
100      * @outlen:  the length of @out
101      * @in:  a pointer to an array of chars in the original encoding
102      * @inlen:  the length of @in
103      *
104      * Take a block of chars in the original encoding and try to convert
105      * it to an UTF-8 block of chars out.
106      *
107      * Returns the number of bytes written, -1 if lack of space, or -2
108      *     if the transcoding failed.
109      * The value of @inlen after return is the number of octets consumed
110      *     if the return value is positive, else unpredictiable.
111      * The value of @outlen after return is the number of octets consumed.
112
113     d xmlCharEncodingInputFunc...
114     d                 s               *   based(######typedef######)
115     d                                     procptr
116
117      * xmlCharEncodingOutputFunc:
118      * @out:  a pointer to an array of bytes to store the result
119      * @outlen:  the length of @out
120      * @in:  a pointer to an array of UTF-8 chars
121      * @inlen:  the length of @in
122      *
123      * Take a block of UTF-8 chars in and try to convert it to another
124      * encoding.
125      * Note: a first call designed to produce heading info is called with
126      * in = NULL. If stateful this should also initialize the encoder state.
127      *
128      * Returns the number of bytes written, -1 if lack of space, or -2
129      *     if the transcoding failed.
130      * The value of @inlen after return is the number of octets consumed
131      *     if the return value is positive, else unpredictiable.
132      * The value of @outlen after return is the number of octets produced.
133
134     d xmlCharEncodingOutputFunc...
135     d                 s               *   based(######typedef######)
136     d                                     procptr
137
138      * Block defining the handlers for non UTF-8 encodings.
139      * If iconv is supported, there are two extra fields.
140
141      /if defined(LIBXML_ICU_ENABLED)
142     d uconv_t         ds                  based(######typedef######)
143     d                                     align qualified
144     d  uconv                          *                                        UConverter *
145     d  utf8                           *                                        UConverter *
146      /endif
147
148     d xmlCharEncodingHandlerPtr...
149     d                 s               *   based(######typedef######)
150
151     d xmlCharEncodingHandler...
152     d                 ds                  based(xmlCharEncodingHandlerPtr)
153     d                                     align qualified
154     d  name                           *                                        char *
155     d  input                              like(xmlCharEncodingInputFunc)
156     d  output                             like(xmlCharEncodingOutputFunc)
157      *
158      /if defined(LIBXML_ICONV_ENABLED)
159     d  iconv_in                       *                                        iconv_t
160     d  iconv_out                      *                                        iconv_t
161      /endif                                                                    LIBXML_ICONV_ENABLED
162      *
163      /if defined(LIBXML_ICU_ENABLED)
164     d  uconv_in                       *                                        uconv_t *
165     d  uconv_out                      *                                        uconv_t *
166      /endif                                                                    LIBXML_ICU_ENABLED
167
168      /include "libxmlrpg/tree"
169
170      * Interfaces for encoding handlers.
171
172     d xmlInitCharEncodingHandlers...
173     d                 pr                  extproc(
174     d                                      'xmlInitCharEncodingHandlers')
175
176     d xmlCleanupCharEncodingHandlers...
177     d                 pr                  extproc(
178     d                                      'xmlCleanupCharEncodingHandlers')
179
180     d xmlRegisterCharEncodingHandler...
181     d                 pr                  extproc(
182     d                                      'xmlRegisterCharEncodingHandler')
183     d  handler                            value like(xmlCharEncodingHandlerPtr)
184
185     d xmlGetCharEncodingHandler...
186     d                 pr                  extproc('xmlGetCharEncodingHandler')
187     d                                     like(xmlCharEncodingHandlerPtr)
188     d  enc                                value like(xmlCharEncoding)
189
190     d xmlFindCharEncodingHandler...
191     d                 pr                  extproc('xmlFindCharEncodingHandler')
192     d                                     like(xmlCharEncodingHandlerPtr)
193     d  name                           *   value options(*string)               const char *
194
195     d xmlNewCharEncodingHandler...
196     d                 pr                  extproc('xmlNewCharEncodingHandler')
197     d                                     like(xmlCharEncodingHandlerPtr)
198     d  name                           *   value options(*string)               const char *
199     d  input                              value like(xmlCharEncodingInputFunc)
200     d  output                             value like(xmlCharEncodingOutputFunc)
201
202      * Interfaces for encoding names and aliases.
203
204     d xmlAddEncodingAlias...
205     d                 pr                  extproc('xmlAddEncodingAlias')
206     d                                     like(xmlCint)
207     d  name                           *   value options(*string)               const char *
208     d  alias                          *   value options(*string)               const char *
209
210     d xmlDelEncodingAlias...
211     d                 pr                  extproc('xmlDelEncodingAlias')
212     d                                     like(xmlCint)
213     d  alias                          *   value options(*string)               const char *
214
215     d xmlGetEncodingAlias...
216     d                 pr              *   extproc('xmlGetEncodingAlias')       const char *
217     d  alias                          *   value options(*string)               const char *
218
219     d xmlCleanupEncodingAliases...
220     d                 pr                  extproc('xmlCleanupEncodingAliases')
221
222     d xmlParseCharEncoding...
223     d                 pr                  extproc('xmlParseCharEncoding')
224     d                                     like(xmlCharEncoding)
225     d  name                           *   value options(*string)               const char *
226
227     d xmlGetCharEncodingName...
228     d                 pr              *   extproc('xmlGetCharEncodingName')    const char *
229     d  enc                                value like(xmlCharEncoding)
230
231      * Interfaces directly used by the parsers.
232
233     d xmlDetectCharEncoding...
234     d                 pr                  extproc('xmlDetectCharEncoding')
235     d                                     like(xmlCharEncoding)
236     d  in                             *   value options(*string)               const unsigned char*
237     d  len                                value like(xmlCint)
238
239     d xmlCharEncOutFunc...
240     d                 pr                  extproc('xmlCharEncOutFunc')
241     d                                     like(xmlCint)
242     d  handler                            likeds(xmlCharEncodingHandler)
243     d  out                                value like(xmlBufferPtr)
244     d  in                                 value like(xmlBufferPtr)
245
246     d xmlCharEncInFunc...
247     d                 pr                  extproc('xmlCharEncInFunc')
248     d                                     like(xmlCint)
249     d  handler                            likeds(xmlCharEncodingHandler)
250     d  out                                value like(xmlBufferPtr)
251     d  in                                 value like(xmlBufferPtr)
252
253     d xmlCharEncFirstLine...
254     d                 pr                  extproc('xmlCharEncFirstLine')
255     d                                     like(xmlCint)
256     d  handler                            likeds(xmlCharEncodingHandler)
257     d  out                                value like(xmlBufferPtr)
258     d  in                                 value like(xmlBufferPtr)
259
260     d xmlCharEncCloseFunc...
261     d                 pr                  extproc('xmlCharEncCloseFunc')
262     d                                     like(xmlCint)
263     d  handler                            likeds(xmlCharEncodingHandler)
264
265      * Export a few useful functions
266
267      /if defined(LIBXML_OUTPUT_ENABLED)
268     d UTF8Toisolat1   pr                  extproc('UTF8Toisolat1')
269     d                                     like(xmlCint)
270     d  out                       65535    options(*varsize)                    unsigned char (*)
271     d  outlen                             like(xmlCint)
272     d  in                             *   value options(*string)               const unsigned char*
273     d  inlen                              like(xmlCint)
274
275      /endif                                                                    LIBXML_OUTPUT_ENABLD
276
277     d isolat1ToUTF8   pr                  extproc('isolat1ToUTF8')
278     d                                     like(xmlCint)
279     d  out                       65535    options(*varsize)                    unsigned char (*)
280     d  outlen                             like(xmlCint)
281     d  in                             *   value options(*string)               const unsigned char*
282     d  inlen                              like(xmlCint)
283
284      /endif                                                                    XML_CHAR_ENCODING_H
285