1 /*
2 **********************************************************************
3 * Copyright (c) 2004,2011 International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: March 19 2004
8 * Since: ICU 3.0
9 **********************************************************************
10 */
11 #include "textfile.h"
12 #include "cmemory.h"
13 #include "cstring.h"
14 #include "intltest.h"
15 #include "util.h"
16 
17 // If the symbol CCP is defined, then the 'name' and 'encoding'
18 // constructor parameters are copied.  Otherwise they are aliased.
19 // #define CCP
20 
TextFile(const char * _name,const char * _encoding,UErrorCode & ec)21 TextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
22     file(0),
23     name(0), encoding(0),
24     buffer(0),
25     capacity(0),
26     lineNo(0)
27 {
28     if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
29         if (U_SUCCESS(ec)) {
30             ec = U_ILLEGAL_ARGUMENT_ERROR;
31         }
32         return;
33     }
34 
35 #ifdef CCP
36     name = uprv_malloc(uprv_strlen(_name) + 1);
37     encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
38     if (name == 0 || encoding == 0) {
39         ec = U_MEMORY_ALLOCATION_ERROR;
40         return;
41     }
42     uprv_strcpy(name, _name);
43     uprv_strcpy(encoding, _encoding);
44 #else
45     name = (char*) _name;
46     encoding = (char*) _encoding;
47 #endif
48 
49     const char* testDir = IntlTest::getSourceTestData(ec);
50     if (U_FAILURE(ec)) {
51         return;
52     }
53     if (!ensureCapacity((int32_t)(uprv_strlen(testDir) + uprv_strlen(name) + 1))) {
54         ec = U_MEMORY_ALLOCATION_ERROR;
55         return;
56     }
57     uprv_strcpy(buffer, testDir);
58     uprv_strcat(buffer, name);
59 
60     file = T_FileStream_open(buffer, "rb");
61     if (file == 0) {
62         ec = U_ILLEGAL_ARGUMENT_ERROR;
63         return;
64     }
65 }
66 
~TextFile()67 TextFile::~TextFile() {
68     if (file != 0) T_FileStream_close(file);
69     if (buffer != 0) uprv_free(buffer);
70 #ifdef CCP
71     uprv_free(name);
72     uprv_free(encoding);
73 #endif
74 }
75 
readLine(UnicodeString & line,UErrorCode & ec)76 UBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
77     if (T_FileStream_eof(file)) {
78         return FALSE;
79     }
80     // Note: 'buffer' may change after ensureCapacity() is called,
81     // so don't use
82     //   p=buffer; *p++=c;
83     // but rather
84     //   i=; buffer[i++]=c;
85     int32_t n = 0;
86     for (;;) {
87         int c = T_FileStream_getc(file); // sic: int, not int32_t
88         if (c < 0 || c == 0xD || c == 0xA) {
89             // consume 0xA following 0xD
90             if (c == 0xD) {
91                 c = T_FileStream_getc(file);
92                 if (c != 0xA && c >= 0) {
93                     T_FileStream_ungetc(c, file);
94                 }
95             }
96             break;
97         }
98         if (!setBuffer(n++, c, ec)) return FALSE;
99     }
100     if (!setBuffer(n++, 0, ec)) return FALSE;
101     UnicodeString str(buffer, encoding);
102     // Remove BOM in first line, if present
103     if (lineNo == 0 && str[0] == 0xFEFF) {
104         str.remove(0, 1);
105     }
106     ++lineNo;
107     line = str.unescape();
108     return TRUE;
109 }
110 
readLineSkippingComments(UnicodeString & line,UErrorCode & ec,UBool trim)111 UBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
112                                          UBool trim) {
113     for (;;) {
114         if (!readLine(line, ec)) return FALSE;
115         // Skip over white space
116         int32_t pos = 0;
117         ICU_Utility::skipWhitespace(line, pos, TRUE);
118         // Ignore blank lines and comment lines
119         if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
120             continue;
121         }
122         // Process line
123         if (trim) line.remove(0, pos);
124         return TRUE;
125     }
126 }
127 
128 /**
129  * Set buffer[index] to c, growing buffer if necessary. Return TRUE if
130  * successful.
131  */
setBuffer(int32_t index,char c,UErrorCode & ec)132 UBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
133     if (capacity <= index) {
134         if (!ensureCapacity(index+1)) {
135             ec = U_MEMORY_ALLOCATION_ERROR;
136             return FALSE;
137         }
138     }
139     buffer[index] = c;
140     return TRUE;
141 }
142 
143 /**
144  * Make sure that 'buffer' has at least 'mincapacity' bytes.
145  * Return TRUE upon success. Upon return, 'buffer' may change
146  * value. In any case, previous contents are preserved.
147  */
148  #define LOWEST_MIN_CAPACITY 64
ensureCapacity(int32_t mincapacity)149 UBool TextFile::ensureCapacity(int32_t mincapacity) {
150     if (capacity >= mincapacity) {
151         return TRUE;
152     }
153 
154     // Grow by factor of 2 to prevent frequent allocation
155     // Note: 'capacity' may be 0
156     int32_t i = (capacity < LOWEST_MIN_CAPACITY)? LOWEST_MIN_CAPACITY: capacity;
157     while (i < mincapacity) {
158         i <<= 1;
159         if (i < 0) {
160             i = 0x7FFFFFFF;
161             break;
162         }
163     }
164     mincapacity = i;
165 
166     // Simple realloc() no good; contents not preserved
167     // Note: 'buffer' may be 0
168     char* newbuffer = (char*) uprv_malloc(mincapacity);
169     if (newbuffer == 0) {
170         return FALSE;
171     }
172     if (buffer != 0) {
173         uprv_strncpy(newbuffer, buffer, capacity);
174         uprv_free(buffer);
175     }
176     buffer = newbuffer;
177     capacity = mincapacity;
178     return TRUE;
179 }
180 
181