1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2008-2011, International Business Machines
7 *   Corporation, Google and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 */
11 /*
12  * Author : eldawy@google.com (Mohamed Eldawy)
13  * ucnvsel.h
14  *
15  * Purpose: To generate a list of encodings capable of handling
16  * a given Unicode text
17  *
18  * Started 09-April-2008
19  */
20 
21 #ifndef __ICU_UCNV_SEL_H__
22 #define __ICU_UCNV_SEL_H__
23 
24 #include "unicode/utypes.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #include "unicode/uset.h"
29 #include "unicode/utf16.h"
30 #include "unicode/uenum.h"
31 #include "unicode/ucnv.h"
32 #include "unicode/localpointer.h"
33 
34 /**
35  * \file
36  *
37  * A converter selector is built with a set of encoding/charset names
38  * and given an input string returns the set of names of the
39  * corresponding converters which can convert the string.
40  *
41  * A converter selector can be serialized into a buffer and reopened
42  * from the serialized form.
43  */
44 
45 /**
46  * @{
47  * The selector data structure
48  */
49 struct UConverterSelector;
50 typedef struct UConverterSelector UConverterSelector;
51 /** @} */
52 
53 /**
54  * Open a selector.
55  * If converterListSize is 0, build for all available converters.
56  * If excludedCodePoints is NULL, don't exclude any code points.
57  *
58  * @param converterList a pointer to encoding names needed to be involved.
59  *                      Can be NULL if converterListSize==0.
60  *                      The list and the names will be cloned, and the caller
61  *                      retains ownership of the original.
62  * @param converterListSize number of encodings in above list.
63  *                          If 0, builds a selector for all available converters.
64  * @param excludedCodePoints a set of code points to be excluded from consideration.
65  *                           That is, excluded code points in a string do not change
66  *                           the selection result. (They might be handled by a callback.)
67  *                           Use NULL to exclude nothing.
68  * @param whichSet what converter set to use? Use this to determine whether
69  *                 to consider only roundtrip mappings or also fallbacks.
70  * @param status an in/out ICU UErrorCode
71  * @return the new selector
72  *
73  * @stable ICU 4.2
74  */
75 U_STABLE UConverterSelector* U_EXPORT2
76 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
77              const USet* excludedCodePoints,
78              const UConverterUnicodeSet whichSet, UErrorCode* status);
79 
80 /**
81  * Closes a selector.
82  * If any Enumerations were returned by ucnv_select*, they become invalid.
83  * They can be closed before or after calling ucnv_closeSelector,
84  * but should never be used after the selector is closed.
85  *
86  * @see ucnv_selectForString
87  * @see ucnv_selectForUTF8
88  *
89  * @param sel selector to close
90  *
91  * @stable ICU 4.2
92  */
93 U_STABLE void U_EXPORT2
94 ucnvsel_close(UConverterSelector *sel);
95 
96 #if U_SHOW_CPLUSPLUS_API
97 
98 U_NAMESPACE_BEGIN
99 
100 /**
101  * \class LocalUConverterSelectorPointer
102  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
103  * For most methods see the LocalPointerBase base class.
104  *
105  * @see LocalPointerBase
106  * @see LocalPointer
107  * @stable ICU 4.4
108  */
109 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
110 
111 U_NAMESPACE_END
112 
113 #endif
114 
115 /**
116  * Open a selector from its serialized form.
117  * The buffer must remain valid and unchanged for the lifetime of the selector.
118  * This is much faster than creating a selector from scratch.
119  * Using a serialized form from a different machine (endianness/charset) is supported.
120  *
121  * @param buffer pointer to the serialized form of a converter selector;
122  *               must be 32-bit-aligned
123  * @param length the capacity of this buffer (can be equal to or larger than
124  *               the actual data length)
125  * @param status an in/out ICU UErrorCode
126  * @return the new selector
127  *
128  * @stable ICU 4.2
129  */
130 U_STABLE UConverterSelector* U_EXPORT2
131 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
132 
133 /**
134  * Serialize a selector into a linear buffer.
135  * The serialized form is portable to different machines.
136  *
137  * @param sel selector to consider
138  * @param buffer pointer to 32-bit-aligned memory to be filled with the
139  *               serialized form of this converter selector
140  * @param bufferCapacity the capacity of this buffer
141  * @param status an in/out ICU UErrorCode
142  * @return the required buffer capacity to hold serialize data (even if the call fails
143  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
144  *
145  * @stable ICU 4.2
146  */
147 U_STABLE int32_t U_EXPORT2
148 ucnvsel_serialize(const UConverterSelector* sel,
149                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
150 
151 /**
152  * Select converters that can map all characters in a UTF-16 string,
153  * ignoring the excluded code points.
154  *
155  * @param sel a selector
156  * @param s UTF-16 string
157  * @param length length of the string, or -1 if NUL-terminated
158  * @param status an in/out ICU UErrorCode
159  * @return an enumeration containing encoding names.
160  *         The returned encoding names and their order will be the same as
161  *         supplied when building the selector.
162  *
163  * @stable ICU 4.2
164  */
165 U_STABLE UEnumeration * U_EXPORT2
166 ucnvsel_selectForString(const UConverterSelector* sel,
167                         const UChar *s, int32_t length, UErrorCode *status);
168 
169 /**
170  * Select converters that can map all characters in a UTF-8 string,
171  * ignoring the excluded code points.
172  *
173  * @param sel a selector
174  * @param s UTF-8 string
175  * @param length length of the string, or -1 if NUL-terminated
176  * @param status an in/out ICU UErrorCode
177  * @return an enumeration containing encoding names.
178  *         The returned encoding names and their order will be the same as
179  *         supplied when building the selector.
180  *
181  * @stable ICU 4.2
182  */
183 U_STABLE UEnumeration * U_EXPORT2
184 ucnvsel_selectForUTF8(const UConverterSelector* sel,
185                       const char *s, int32_t length, UErrorCode *status);
186 
187 #endif  /* !UCONFIG_NO_CONVERSION */
188 
189 #endif  /* __ICU_UCNV_SEL_H__ */
190