1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1998-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File ucbuf.h
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   05/10/01    Ram         Creation.
17 *
18 * This API reads in files and returns UChars
19 *******************************************************************************
20 */
21 
22 #include "unicode/localpointer.h"
23 #include "unicode/ucnv.h"
24 #include "filestrm.h"
25 
26 #if !UCONFIG_NO_CONVERSION
27 
28 #ifndef UCBUF_H
29 #define UCBUF_H 1
30 
31 typedef struct UCHARBUF UCHARBUF;
32 /**
33  * End of file value
34  */
35 #define U_EOF 0xFFFFFFFF
36 /**
37  * Error value if a sequence cannot be unescaped
38  */
39 #define U_ERR 0xFFFFFFFE
40 
41 typedef struct ULine ULine;
42 
43 struct  ULine {
44     UChar     *name;
45     int32_t   len;
46 };
47 
48 /**
49  * Opens the UCHARBUF with the given file stream and code page for conversion
50  * @param fileName  Name of the file to open.
51  * @param codepage  The encoding of the file stream to convert to Unicode.
52  *                  If *codepoge is NULL on input the API will try to autodetect
53  *                  popular Unicode encodings
54  * @param showWarning Flag to print out warnings to STDOUT
55  * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads
56  *                  the whole file into memory and converts it.
57  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
58  *        indicates a failure on entry, the function will immediately return.
59  *        On exit the value will indicate the success of the operation.
60  * @return pointer to the newly opened UCHARBUF
61  */
62 U_CAPI UCHARBUF* U_EXPORT2
63 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
64 
65 /**
66  * Gets a UTF-16 code unit at the current position from the converted buffer
67  * and increments the current position
68  * @param buf Pointer to UCHARBUF structure
69  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
70  *        indicates a failure on entry, the function will immediately return.
71  *        On exit the value will indicate the success of the operation.
72  */
73 U_CAPI int32_t U_EXPORT2
74 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
75 
76 /**
77  * Gets a UTF-32 code point at the current position from the converted buffer
78  * and increments the current position
79  * @param buf Pointer to UCHARBUF structure
80  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
81  *        indicates a failure on entry, the function will immediately return.
82  *        On exit the value will indicate the success of the operation.
83  */
84 U_CAPI int32_t U_EXPORT2
85 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
86 
87 /**
88  * Gets a UTF-16 code unit at the current position from the converted buffer after
89  * unescaping and increments the current position. If the escape sequence is for UTF-32
90  * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
91  * @param buf Pointer to UCHARBUF structure
92  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
93  *        indicates a failure on entry, the function will immediately return.
94  *        On exit the value will indicate the success of the operation.
95  */
96 U_CAPI int32_t U_EXPORT2
97 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
98 
99 /**
100  * Gets a pointer to the current position in the internal buffer and length of the line.
101  * It imperative to make a copy of the returned buffer before performing operations on it.
102  * @param buf Pointer to UCHARBUF structure
103  * @param len Output param to receive the len of the buffer returned till end of the line
104  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
105  *        indicates a failure on entry, the function will immediately return.
106  *        On exit the value will indicate the success of the operation.
107  *        Error: U_TRUNCATED_CHAR_FOUND
108  * @return Pointer to the internal buffer, NULL if EOF
109  */
110 U_CAPI const UChar* U_EXPORT2
111 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
112 
113 
114 /**
115  * Resets the buffers and the underlying file stream.
116  * @param buf Pointer to UCHARBUF structure
117  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
118  *        indicates a failure on entry, the function will immediately return.
119  *        On exit the value will indicate the success of the operation.
120  */
121 U_CAPI void U_EXPORT2
122 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
123 
124 /**
125  * Returns a pointer to the internal converted buffer
126  * @param buf Pointer to UCHARBUF structure
127  * @param len Pointer to int32_t to receive the lenth of buffer
128  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
129  *        indicates a failure on entry, the function will immediately return.
130  *        On exit the value will indicate the success of the operation.
131  * @return Pointer to internal UChar buffer
132  */
133 U_CAPI const UChar* U_EXPORT2
134 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
135 
136 /**
137  * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
138  * @param buf Pointer to UCHARBUF structure
139  */
140 U_CAPI void U_EXPORT2
141 ucbuf_close(UCHARBUF* buf);
142 
143 #if U_SHOW_CPLUSPLUS_API
144 
145 U_NAMESPACE_BEGIN
146 
147 /**
148  * \class LocalUCHARBUFPointer
149  * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
150  * For most methods see the LocalPointerBase base class.
151  *
152  * @see LocalPointerBase
153  * @see LocalPointer
154  */
155 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
156 
157 U_NAMESPACE_END
158 
159 #endif
160 
161 /**
162  * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
163  */
164 U_CAPI void U_EXPORT2
165 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
166 
167 
168 /**
169  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
170  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
171  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
172  * is necessary.
173  * If the charset was autodetected, the caller must close both the input FileStream
174  * and the converter.
175  *
176  * @param fileName The file name to be opened and encoding autodected
177  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
178  * @param cp Output param to receive the detected encoding
179  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
180  *        indicates a failure on entry, the function will immediately return.
181  *        On exit the value will indicate the success of the operation.
182  * @return The input FileStream if its charset was autodetected; NULL otherwise.
183  */
184 U_CAPI FileStream * U_EXPORT2
185 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
186 int32_t* signatureLength, UErrorCode* status);
187 
188 /**
189  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
190  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
191  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
192  * is necessary.
193  * If the charset was autodetected, the caller must close the converter.
194  *
195  * @param fileStream The file stream whose encoding is to be detected
196  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
197  * @param cp Output param to receive the detected encoding
198  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
199  *        indicates a failure on entry, the function will immediately return.
200  *        On exit the value will indicate the success of the operation.
201  * @return Boolean whether the Unicode charset was autodetected.
202  */
203 
204 U_CAPI UBool U_EXPORT2
205 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
206 
207 /**
208  * Returns the approximate size in UChars required for converting the file to UChars
209  */
210 U_CAPI int32_t U_EXPORT2
211 ucbuf_size(UCHARBUF* buf);
212 
213 U_CAPI const char* U_EXPORT2
214 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
215 
216 #endif
217 #endif
218 
219