1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2004,2011 International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: March 19 2004
10 * Since: ICU 3.0
11 **********************************************************************
12 */
13 #include "textfile.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "intltest.h"
17 #include "util.h"
18 
19 // If the symbol CCP is defined, then the 'name' and 'encoding'
20 // constructor parameters are copied.  Otherwise they are aliased.
21 // #define CCP
22 
TextFile(const char * _name,const char * _encoding,UErrorCode & ec)23 TextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
24     file(0),
25     name(0), encoding(0),
26     buffer(0),
27     capacity(0),
28     lineNo(0)
29 {
30     if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
31         if (U_SUCCESS(ec)) {
32             ec = U_ILLEGAL_ARGUMENT_ERROR;
33         }
34         return;
35     }
36 
37 #ifdef CCP
38     name = uprv_malloc(uprv_strlen(_name) + 1);
39     encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
40     if (name == 0 || encoding == 0) {
41         ec = U_MEMORY_ALLOCATION_ERROR;
42         return;
43     }
44     uprv_strcpy(name, _name);
45     uprv_strcpy(encoding, _encoding);
46 #else
47     name = (char*) _name;
48     encoding = (char*) _encoding;
49 #endif
50 
51     const char* testDir = IntlTest::getSourceTestData(ec);
52     if (U_FAILURE(ec)) {
53         return;
54     }
55     if (!ensureCapacity((int32_t)(uprv_strlen(testDir) + uprv_strlen(name) + 1))) {
56         ec = U_MEMORY_ALLOCATION_ERROR;
57         return;
58     }
59     uprv_strcpy(buffer, testDir);
60     uprv_strcat(buffer, name);
61 
62     file = T_FileStream_open(buffer, "rb");
63     if (file == 0) {
64         ec = U_ILLEGAL_ARGUMENT_ERROR;
65         return;
66     }
67 }
68 
~TextFile()69 TextFile::~TextFile() {
70     if (file != 0) T_FileStream_close(file);
71     if (buffer != 0) uprv_free(buffer);
72 #ifdef CCP
73     uprv_free(name);
74     uprv_free(encoding);
75 #endif
76 }
77 
readLine(UnicodeString & line,UErrorCode & ec)78 UBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
79     if (T_FileStream_eof(file)) {
80         return FALSE;
81     }
82     // Note: 'buffer' may change after ensureCapacity() is called,
83     // so don't use
84     //   p=buffer; *p++=c;
85     // but rather
86     //   i=; buffer[i++]=c;
87     int32_t n = 0;
88     for (;;) {
89         int c = T_FileStream_getc(file); // sic: int, not int32_t
90         if (c < 0 || c == 0xD || c == 0xA) {
91             // consume 0xA following 0xD
92             if (c == 0xD) {
93                 c = T_FileStream_getc(file);
94                 if (c != 0xA && c >= 0) {
95                     T_FileStream_ungetc(c, file);
96                 }
97             }
98             break;
99         }
100         if (!setBuffer(n++, c, ec)) return FALSE;
101     }
102     if (!setBuffer(n++, 0, ec)) return FALSE;
103     UnicodeString str(buffer, encoding);
104     // Remove BOM in first line, if present
105     if (lineNo == 0 && str[0] == 0xFEFF) {
106         str.remove(0, 1);
107     }
108     ++lineNo;
109     line = str.unescape();
110     return TRUE;
111 }
112 
readLineSkippingComments(UnicodeString & line,UErrorCode & ec,UBool trim)113 UBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
114                                          UBool trim) {
115     for (;;) {
116         if (!readLine(line, ec)) return FALSE;
117         // Skip over white space
118         int32_t pos = 0;
119         ICU_Utility::skipWhitespace(line, pos, TRUE);
120         // Ignore blank lines and comment lines
121         if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
122             continue;
123         }
124         // Process line
125         if (trim) line.remove(0, pos);
126         return TRUE;
127     }
128 }
129 
130 /**
131  * Set buffer[index] to c, growing buffer if necessary. Return TRUE if
132  * successful.
133  */
setBuffer(int32_t index,char c,UErrorCode & ec)134 UBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
135     if (capacity <= index) {
136         if (!ensureCapacity(index+1)) {
137             ec = U_MEMORY_ALLOCATION_ERROR;
138             return FALSE;
139         }
140     }
141     buffer[index] = c;
142     return TRUE;
143 }
144 
145 /**
146  * Make sure that 'buffer' has at least 'mincapacity' bytes.
147  * Return TRUE upon success. Upon return, 'buffer' may change
148  * value. In any case, previous contents are preserved.
149  */
150  #define LOWEST_MIN_CAPACITY 64
ensureCapacity(int32_t mincapacity)151 UBool TextFile::ensureCapacity(int32_t mincapacity) {
152     if (capacity >= mincapacity) {
153         return TRUE;
154     }
155 
156     // Grow by factor of 2 to prevent frequent allocation
157     // Note: 'capacity' may be 0
158     int32_t i = (capacity < LOWEST_MIN_CAPACITY)? LOWEST_MIN_CAPACITY: capacity;
159     while (i < mincapacity) {
160         i <<= 1;
161         if (i < 0) {
162             i = 0x7FFFFFFF;
163             break;
164         }
165     }
166     mincapacity = i;
167 
168     // Simple realloc() no good; contents not preserved
169     // Note: 'buffer' may be 0
170     char* newbuffer = (char*) uprv_malloc(mincapacity);
171     if (newbuffer == 0) {
172         return FALSE;
173     }
174     if (buffer != 0) {
175         uprv_strncpy(newbuffer, buffer, capacity);
176         uprv_free(buffer);
177     }
178     buffer = newbuffer;
179     capacity = mincapacity;
180     return TRUE;
181 }
182 
183