1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2008-2011, International Business Machines
5 *   Corporation, Google and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 */
9 /*
10  * Author : eldawy@google.com (Mohamed Eldawy)
11  * ucnvsel.h
12  *
13  * Purpose: To generate a list of encodings capable of handling
14  * a given Unicode text
15  *
16  * Started 09-April-2008
17  */
18 
19 #ifndef __ICU_UCNV_SEL_H__
20 #define __ICU_UCNV_SEL_H__
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_CONVERSION
25 
26 #include "unicode/uset.h"
27 #include "unicode/utf16.h"
28 #include "unicode/uenum.h"
29 #include "unicode/ucnv.h"
30 #include "unicode/localpointer.h"
31 
32 /**
33  * \file
34  *
35  * A converter selector is built with a set of encoding/charset names
36  * and given an input string returns the set of names of the
37  * corresponding converters which can convert the string.
38  *
39  * A converter selector can be serialized into a buffer and reopened
40  * from the serialized form.
41  */
42 
43 /**
44  * @{
45  * The selector data structure
46  */
47 struct UConverterSelector;
48 typedef struct UConverterSelector UConverterSelector;
49 /** @} */
50 
51 /**
52  * Open a selector.
53  * If converterListSize is 0, build for all available converters.
54  * If excludedCodePoints is NULL, don't exclude any code points.
55  *
56  * @param converterList a pointer to encoding names needed to be involved.
57  *                      Can be NULL if converterListSize==0.
58  *                      The list and the names will be cloned, and the caller
59  *                      retains ownership of the original.
60  * @param converterListSize number of encodings in above list.
61  *                          If 0, builds a selector for all available converters.
62  * @param excludedCodePoints a set of code points to be excluded from consideration.
63  *                           That is, excluded code points in a string do not change
64  *                           the selection result. (They might be handled by a callback.)
65  *                           Use NULL to exclude nothing.
66  * @param whichSet what converter set to use? Use this to determine whether
67  *                 to consider only roundtrip mappings or also fallbacks.
68  * @param status an in/out ICU UErrorCode
69  * @return the new selector
70  *
71  * @stable ICU 4.2
72  */
73 U_STABLE UConverterSelector* U_EXPORT2
74 ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
75              const USet* excludedCodePoints,
76              const UConverterUnicodeSet whichSet, UErrorCode* status);
77 
78 /**
79  * Closes a selector.
80  * If any Enumerations were returned by ucnv_select*, they become invalid.
81  * They can be closed before or after calling ucnv_closeSelector,
82  * but should never be used after the selector is closed.
83  *
84  * @see ucnv_selectForString
85  * @see ucnv_selectForUTF8
86  *
87  * @param sel selector to close
88  *
89  * @stable ICU 4.2
90  */
91 U_STABLE void U_EXPORT2
92 ucnvsel_close(UConverterSelector *sel);
93 
94 #if U_SHOW_CPLUSPLUS_API
95 
96 U_NAMESPACE_BEGIN
97 
98 /**
99  * \class LocalUConverterSelectorPointer
100  * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
101  * For most methods see the LocalPointerBase base class.
102  *
103  * @see LocalPointerBase
104  * @see LocalPointer
105  * @stable ICU 4.4
106  */
107 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
108 
109 U_NAMESPACE_END
110 
111 #endif
112 
113 /**
114  * Open a selector from its serialized form.
115  * The buffer must remain valid and unchanged for the lifetime of the selector.
116  * This is much faster than creating a selector from scratch.
117  * Using a serialized form from a different machine (endianness/charset) is supported.
118  *
119  * @param buffer pointer to the serialized form of a converter selector;
120  *               must be 32-bit-aligned
121  * @param length the capacity of this buffer (can be equal to or larger than
122  *               the actual data length)
123  * @param status an in/out ICU UErrorCode
124  * @return the new selector
125  *
126  * @stable ICU 4.2
127  */
128 U_STABLE UConverterSelector* U_EXPORT2
129 ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
130 
131 /**
132  * Serialize a selector into a linear buffer.
133  * The serialized form is portable to different machines.
134  *
135  * @param sel selector to consider
136  * @param buffer pointer to 32-bit-aligned memory to be filled with the
137  *               serialized form of this converter selector
138  * @param bufferCapacity the capacity of this buffer
139  * @param status an in/out ICU UErrorCode
140  * @return the required buffer capacity to hold serialize data (even if the call fails
141  *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
142  *
143  * @stable ICU 4.2
144  */
145 U_STABLE int32_t U_EXPORT2
146 ucnvsel_serialize(const UConverterSelector* sel,
147                   void* buffer, int32_t bufferCapacity, UErrorCode* status);
148 
149 /**
150  * Select converters that can map all characters in a UTF-16 string,
151  * ignoring the excluded code points.
152  *
153  * @param sel a selector
154  * @param s UTF-16 string
155  * @param length length of the string, or -1 if NUL-terminated
156  * @param status an in/out ICU UErrorCode
157  * @return an enumeration containing encoding names.
158  *         The returned encoding names and their order will be the same as
159  *         supplied when building the selector.
160  *
161  * @stable ICU 4.2
162  */
163 U_STABLE UEnumeration * U_EXPORT2
164 ucnvsel_selectForString(const UConverterSelector* sel,
165                         const UChar *s, int32_t length, UErrorCode *status);
166 
167 /**
168  * Select converters that can map all characters in a UTF-8 string,
169  * ignoring the excluded code points.
170  *
171  * @param sel a selector
172  * @param s UTF-8 string
173  * @param length length of the string, or -1 if NUL-terminated
174  * @param status an in/out ICU UErrorCode
175  * @return an enumeration containing encoding names.
176  *         The returned encoding names and their order will be the same as
177  *         supplied when building the selector.
178  *
179  * @stable ICU 4.2
180  */
181 U_STABLE UEnumeration * U_EXPORT2
182 ucnvsel_selectForUTF8(const UConverterSelector* sel,
183                       const char *s, int32_t length, UErrorCode *status);
184 
185 #endif  /* !UCONFIG_NO_CONVERSION */
186 
187 #endif  /* __ICU_UCNV_SEL_H__ */
188