1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2008, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   05/10/01    Ram         Creation.
15 *
16 * This API reads in files and returns UChars
17 *******************************************************************************
18 */
19 
20 #include "unicode/ucnv.h"
21 #include "filestrm.h"
22 
23 #if !UCONFIG_NO_CONVERSION
24 
25 #ifndef UCBUF_H
26 #define UCBUF_H 1
27 
28 typedef struct UCHARBUF UCHARBUF;
29 /**
30  * End of file value
31  */
32 #define U_EOF 0xFFFFFFFF
33 /**
34  * Error value if a sequence cannot be unescaped
35  */
36 #define U_ERR 0xFFFFFFFE
37 
38 typedef struct ULine ULine;
39 
40 struct  ULine {
41     UChar     *name;
42     int32_t   len;
43 };
44 
45 /**
46  * Opens the UCHARBUF with the given file stream and code page for conversion
47  * @param fileName  Name of the file to open.
48  * @param codepage  The encoding of the file stream to convert to Unicode.
49  *                  If *codepoge is NULL on input the API will try to autodetect
50  *                  popular Unicode encodings
51  * @param showWarning Flag to print out warnings to STDOUT
52  * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads
53  *                  the whole file into memory and converts it.
54  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
55  *        indicates a failure on entry, the function will immediately return.
56  *        On exit the value will indicate the success of the operation.
57  * @return pointer to the newly opened UCHARBUF
58  */
59 U_CAPI UCHARBUF* U_EXPORT2
60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
61 
62 /**
63  * Gets a UTF-16 code unit at the current position from the converted buffer
64  * and increments the current position
65  * @param buf Pointer to UCHARBUF structure
66  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
67  *        indicates a failure on entry, the function will immediately return.
68  *        On exit the value will indicate the success of the operation.
69  */
70 U_CAPI int32_t U_EXPORT2
71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
72 
73 /**
74  * Gets a UTF-32 code point at the current position from the converted buffer
75  * and increments the current position
76  * @param buf Pointer to UCHARBUF structure
77  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
78  *        indicates a failure on entry, the function will immediately return.
79  *        On exit the value will indicate the success of the operation.
80  */
81 U_CAPI int32_t U_EXPORT2
82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
83 
84 /**
85  * Gets a UTF-16 code unit at the current position from the converted buffer after
86  * unescaping and increments the current position. If the escape sequence is for UTF-32
87  * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
88  * @param buf Pointer to UCHARBUF structure
89  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
90  *        indicates a failure on entry, the function will immediately return.
91  *        On exit the value will indicate the success of the operation.
92  */
93 U_CAPI int32_t U_EXPORT2
94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
95 
96 /**
97  * Gets a pointer to the current position in the internal buffer and length of the line.
98  * It imperative to make a copy of the returned buffere before performing operations on it.
99  * @param buf Pointer to UCHARBUF structure
100  * @param len Output param to receive the len of the buffer returned till end of the line
101  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
102  *        indicates a failure on entry, the function will immediately return.
103  *        On exit the value will indicate the success of the operation.
104  *        Error: U_TRUNCATED_CHAR_FOUND
105  * @return Pointer to the internal buffer, NULL if EOF
106  */
107 U_CAPI const UChar* U_EXPORT2
108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
109 
110 
111 /**
112  * Resets the buffers and the underlying file stream.
113  * @param buf Pointer to UCHARBUF structure
114  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
115  *        indicates a failure on entry, the function will immediately return.
116  *        On exit the value will indicate the success of the operation.
117  */
118 U_CAPI void U_EXPORT2
119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
120 
121 /**
122  * Returns a pointer to the internal converted buffer
123  * @param buf Pointer to UCHARBUF structure
124  * @param len Pointer to int32_t to receive the lenth of buffer
125  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
126  *        indicates a failure on entry, the function will immediately return.
127  *        On exit the value will indicate the success of the operation.
128  * @return Pointer to internal UChar buffer
129  */
130 U_CAPI const UChar* U_EXPORT2
131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
132 
133 /**
134  * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
135  * @param buf Pointer to UCHARBUF structure
136  */
137 U_CAPI void U_EXPORT2
138 ucbuf_close(UCHARBUF* buf);
139 
140 /**
141  * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
142  */
143 U_CAPI void U_EXPORT2
144 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
145 
146 
147 /**
148  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
149  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
150  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
151  * is necessary.
152  * If the charset was autodetected, the caller must close both the input FileStream
153  * and the converter.
154  *
155  * @param fileName The file name to be opened and encoding autodected
156  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
157  * @param cp Output param to receive the detected encoding
158  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
159  *        indicates a failure on entry, the function will immediately return.
160  *        On exit the value will indicate the success of the operation.
161  * @return The input FileStream if its charset was autodetected; NULL otherwise.
162  */
163 U_CAPI FileStream * U_EXPORT2
164 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
165 int32_t* signatureLength, UErrorCode* status);
166 
167 /**
168  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
169  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
170  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
171  * is necessary.
172  * If the charset was autodetected, the caller must close the converter.
173  *
174  * @param fileStream The file stream whose encoding is to be detected
175  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
176  * @param cp Output param to receive the detected encoding
177  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
178  *        indicates a failure on entry, the function will immediately return.
179  *        On exit the value will indicate the success of the operation.
180  * @return Boolean whether the Unicode charset was autodetected.
181  */
182 
183 U_CAPI UBool U_EXPORT2
184 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
185 
186 /**
187  * Returns the approximate size in UChars required for converting the file to UChars
188  */
189 U_CAPI int32_t U_EXPORT2
190 ucbuf_size(UCHARBUF* buf);
191 
192 U_CAPI const char* U_EXPORT2
193 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
194 
195 #endif
196 #endif
197 
198