1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1997-2013, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File resbund.cpp
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   02/05/97    aliu        Fixed bug in chopLocale.  Added scanForLocaleInFile
15 *                           based on code taken from scanForLocale.  Added
16 *                           constructor which attempts to read resource bundle
17 *                           from a specific file, without searching other files.
18 *   02/11/97    aliu        Added UErrorCode return values to constructors. Fixed
19 *                           infinite loops in scanForFile and scanForLocale.
20 *                           Modified getRawResourceData to not delete storage in
21 *                           localeData and resourceData which it doesn't own.
22 *                           Added Mac compatibility #ifdefs for tellp() and
23 *                           ios::nocreate.
24 *   03/04/97    aliu        Modified to use ExpandingDataSink objects instead of
25 *                           the highly inefficient ostrstream objects.
26 *   03/13/97    aliu        Rewrote to load in entire resource bundle and store
27 *                           it as a Hashtable of ResourceBundleData objects.
28 *                           Added state table to govern parsing of files.
29 *                           Modified to load locale index out of new file distinct
30 *                           from default.txt.
31 *   03/25/97    aliu        Modified to support 2-d arrays, needed for timezone data.
32 *                           Added support for custom file suffixes.  Again, needed
33 *                           to support timezone data.  Improved error handling to
34 *                           detect duplicate tags and subtags.
35 *   04/07/97    aliu        Fixed bug in getHashtableForLocale().  Fixed handling
36 *                           of failing UErrorCode values on entry to API methods.
37 *                           Fixed bugs in getArrayItem() for negative indices.
38 *   04/29/97    aliu        Update to use new Hashtable deletion protocol.
39 *   05/06/97    aliu        Flattened kTransitionTable for HP compiler.
40 *                           Fixed usage of CharString.
41 * 06/11/99      stephen     Removed parsing of .txt files.
42 *                           Reworked to use new binary format.
43 *                           Cleaned up.
44 * 06/14/99      stephen     Removed methods taking a filename suffix.
45 * 06/22/99      stephen     Added missing T_FileStream_close in parse()
46 * 11/09/99      weiv        Added getLocale(), rewritten constructForLocale()
47 * March 2000    weiv        complete overhaul.
48 ******************************************************************************
49 */
50 
51 #include "unicode/utypes.h"
52 #include "unicode/resbund.h"
53 
54 #include "mutex.h"
55 #include "uassert.h"
56 #include "umutex.h"
57 
58 #include "uresimp.h"
59 
60 U_NAMESPACE_BEGIN
61 
62 /*-----------------------------------------------------------------------------
63  * Implementation Notes
64  *
65  * Resource bundles are read in once, and thereafter cached.
66  * ResourceBundle statically keeps track of which files have been
67  * read, so we are guaranteed that each file is read at most once.
68  * Resource bundles can be loaded from different data directories and
69  * will be treated as distinct, even if they are for the same locale.
70  *
71  * Resource bundles are lightweight objects, which have pointers to
72  * one or more shared Hashtable objects containing all the data.
73  * Copying would be cheap, but there is no copy constructor, since
74  * there wasn't one in the original API.
75  *
76  * The ResourceBundle parsing mechanism is implemented as a transition
77  * network, for easy maintenance and modification.  The network is
78  * implemented as a matrix (instead of in code) to make this even
79  * easier.  The matrix contains Transition objects.  Each Transition
80  * object describes a destination node and an action to take before
81  * moving to the destination node.  The source node is encoded by the
82  * index of the object in the array that contains it.  The pieces
83  * needed to understand the transition network are the enums for node
84  * IDs and actions, the parse() method, which walks through the
85  * network and implements the actions, and the network itself.  The
86  * network guarantees certain conditions, for example, that a new
87  * resource will not be closed until one has been opened first; or
88  * that data will not be stored into a TaggedList until a TaggedList
89  * has been created.  Nonetheless, the code in parse() does some
90  * consistency checks as it runs the network, and fails with an
91  * U_INTERNAL_PROGRAM_ERROR if one of these checks fails.  If the input
92  * data has a bad format, an U_INVALID_FORMAT_ERROR is returned.  If you
93  * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in
94  * it.
95  *
96  * Old functionality of multiple locales in a single file is still
97  * supported.  For this reason, LOCALE names override FILE names.  If
98  * data for en_US is located in the en.txt file, once it is loaded,
99  * the code will not care where it came from (other than remembering
100  * which directory it came from).  However, if there is an en_US
101  * resource in en_US.txt, that will take precedence.  There is no
102  * limit to the number or type of resources that can be stored in a
103  * file, however, files are only searched in a specific way.  If
104  * en_US_CA is requested, then first en_US_CA.txt is searched, then
105  * en_US.txt, then en.txt, then default.txt.  So it only makes sense
106  * to put certain locales in certain files.  In this example, it would
107  * be logical to put en_US_CA, en_US, and en into the en.txt file,
108  * since they would be found there if asked for.  The extreme example
109  * is to place all locale resources into default.txt, which should
110  * also work.
111  *
112  * Inheritance is implemented.  For example, xx_YY_zz inherits as
113  * follows: xx_YY_zz, xx_YY, xx, default.  Inheritance is implemented
114  * as an array of hashtables.  There will be from 1 to 4 hashtables in
115  * the array.
116  *
117  * Fallback files are implemented.  The fallback pattern is Language
118  * Country Variant (LCV) -> LC -> L.  Fallback is first done for the
119  * requested locale.  Then it is done for the default locale, as
120  * returned by Locale::getDefault().  Then the special file
121  * default.txt is searched for the default locale.  The overall FILE
122  * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default.
123  *
124  * Note that although file name searching includes the default locale,
125  * once a ResourceBundle object is constructed, the inheritance path
126  * no longer includes the default locale.  The path is LCV -> LC -> L
127  * -> default.
128  *
129  * File parsing is lazy.  Nothing is parsed unless it is called for by
130  * someone.  So when a ResourceBundle for xx_YY_zz is constructed,
131  * only that locale is parsed (along with anything else in the same
132  * file).  Later, if the FooBar tag is asked for, and if it isn't
133  * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and
134  * so forth, until the chain is exhausted or the tag is found.
135  *
136  * Thread-safety is implemented around caches, both the cache that
137  * stores all the resouce data, and the cache that stores flags
138  * indicating whether or not a file has been visited.  These caches
139  * delete their storage at static cleanup time, when the process
140  * quits.
141  *
142  * ResourceBundle supports TableCollation as a special case.  This
143  * involves having special ResourceBundle objects which DO own their
144  * data, since we don't want large collation rule strings in the
145  * ResourceBundle cache (these are already cached in the
146  * TableCollation cache).  TableCollation files (.ctx files) have the
147  * same format as normal resource data files, with a different
148  * interpretation, from the standpoint of ResourceBundle.  .ctx files
149  * are loaded into otherwise ordinary ResourceBundle objects.  They
150  * don't inherit (that's implemented by TableCollation) and they own
151  * their data (as mentioned above).  However, they still support
152  * possible multiple locales in a single .ctx file.  (This is in
153  * practice a bad idea, since you only want the one locale you're
154  * looking for, and only one tag will be present
155  * ("CollationElements"), so you don't need an inheritance chain of
156  * multiple locales.)  Up to 4 locale resources will be loaded from a
157  * .ctx file; everything after the first 4 is ignored (parsed and
158  * deleted).  (Normal .txt files have no limit.)  Instead of being
159  * loaded into the cache, and then looked up as needed, the locale
160  * resources are read straight into the ResourceBundle object.
161  *
162  * The Index, which used to reside in default.txt, has been moved to a
163  * new file, index.txt.  This file contains a slightly modified format
164  * with the addition of the "InstalledLocales" tag; it looks like:
165  *
166  * Index {
167  *   InstalledLocales {
168  *     ar
169  *     ..
170  *     zh_TW
171  *   }
172  * }
173  */
174 //-----------------------------------------------------------------------------
175 
176 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
177 
178 ResourceBundle::ResourceBundle(UErrorCode &err)
179                                 :UObject(), fLocale(NULL)
180 {
181     fResource = ures_open(0, Locale::getDefault().getName(), &err);
182 }
183 
184 ResourceBundle::ResourceBundle(const ResourceBundle &other)
185                               :UObject(other), fLocale(NULL)
186 {
187     UErrorCode status = U_ZERO_ERROR;
188 
189     if (other.fResource) {
190         fResource = ures_copyResb(0, other.fResource, &status);
191     } else {
192         /* Copying a bad resource bundle */
193         fResource = NULL;
194     }
195 }
196 
197 ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
198                                :UObject(), fLocale(NULL)
199 {
200     if (res) {
201         fResource = ures_copyResb(0, res, &err);
202     } else {
203         /* Copying a bad resource bundle */
204         fResource = NULL;
205     }
206 }
207 
208 ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err)
209                                :UObject(), fLocale(NULL)
210 {
211     fResource = ures_open(path, locale.getName(), &err);
212 }
213 
214 
215 ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
216 {
217     if(this == &other) {
218         return *this;
219     }
220     if(fResource != 0) {
221         ures_close(fResource);
222         fResource = NULL;
223     }
224     if (fLocale != NULL) {
225         delete fLocale;
226         fLocale = NULL;
227     }
228     UErrorCode status = U_ZERO_ERROR;
229     if (other.fResource) {
230         fResource = ures_copyResb(0, other.fResource, &status);
231     } else {
232         /* Copying a bad resource bundle */
233         fResource = NULL;
234     }
235     return *this;
236 }
237 
238 ResourceBundle::~ResourceBundle()
239 {
240     if(fResource != 0) {
241         ures_close(fResource);
242     }
243     if(fLocale != NULL) {
244       delete(fLocale);
245     }
246 }
247 
248 ResourceBundle *
249 ResourceBundle::clone() const {
250     return new ResourceBundle(*this);
251 }
252 
253 UnicodeString ResourceBundle::getString(UErrorCode& status) const {
254     int32_t len = 0;
255     const UChar *r = ures_getString(fResource, &len, &status);
256     return UnicodeString(TRUE, r, len);
257 }
258 
259 const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const {
260     return ures_getBinary(fResource, &len, &status);
261 }
262 
263 const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const {
264     return ures_getIntVector(fResource, &len, &status);
265 }
266 
267 uint32_t ResourceBundle::getUInt(UErrorCode& status) const {
268     return ures_getUInt(fResource, &status);
269 }
270 
271 int32_t ResourceBundle::getInt(UErrorCode& status) const {
272     return ures_getInt(fResource, &status);
273 }
274 
275 const char *ResourceBundle::getName(void) const {
276     return ures_getName(fResource);
277 }
278 
279 const char *ResourceBundle::getKey(void) const {
280     return ures_getKey(fResource);
281 }
282 
283 UResType ResourceBundle::getType(void) const {
284     return ures_getType(fResource);
285 }
286 
287 int32_t ResourceBundle::getSize(void) const {
288     return ures_getSize(fResource);
289 }
290 
291 UBool ResourceBundle::hasNext(void) const {
292     return ures_hasNext(fResource);
293 }
294 
295 void ResourceBundle::resetIterator(void) {
296     ures_resetIterator(fResource);
297 }
298 
299 ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
300     UResourceBundle r;
301 
302     ures_initStackObject(&r);
303     ures_getNextResource(fResource, &r, &status);
304     ResourceBundle res(&r, status);
305     if (U_SUCCESS(status)) {
306         ures_close(&r);
307     }
308     return res;
309 }
310 
311 UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
312     int32_t len = 0;
313     const UChar* r = ures_getNextString(fResource, &len, 0, &status);
314     return UnicodeString(TRUE, r, len);
315 }
316 
317 UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) {
318     int32_t len = 0;
319     const UChar* r = ures_getNextString(fResource, &len, key, &status);
320     return UnicodeString(TRUE, r, len);
321 }
322 
323 ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const {
324     UResourceBundle r;
325 
326     ures_initStackObject(&r);
327     ures_getByIndex(fResource, indexR, &r, &status);
328     ResourceBundle res(&r, status);
329     if (U_SUCCESS(status)) {
330         ures_close(&r);
331     }
332     return res;
333 }
334 
335 UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const {
336     int32_t len = 0;
337     const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status);
338     return UnicodeString(TRUE, r, len);
339 }
340 
341 ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const {
342     UResourceBundle r;
343 
344     ures_initStackObject(&r);
345     ures_getByKey(fResource, key, &r, &status);
346     ResourceBundle res(&r, status);
347     if (U_SUCCESS(status)) {
348         ures_close(&r);
349     }
350     return res;
351 }
352 
353 ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){
354     UResourceBundle r;
355     ures_initStackObject(&r);
356     ures_getByKeyWithFallback(fResource, key, &r, &status);
357     ResourceBundle res(&r, status);
358     if(U_SUCCESS(status)){
359         ures_close(&r);
360     }
361     return res;
362 }
363 UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const {
364     int32_t len = 0;
365     const UChar* r = ures_getStringByKey(fResource, key, &len, &status);
366     return UnicodeString(TRUE, r, len);
367 }
368 
369 const char*
370 ResourceBundle::getVersionNumber()  const
371 {
372     return ures_getVersionNumberInternal(fResource);
373 }
374 
375 void ResourceBundle::getVersion(UVersionInfo versionInfo) const {
376     ures_getVersion(fResource, versionInfo);
377 }
378 
379 static UMutex gLocaleLock = U_MUTEX_INITIALIZER;
380 const Locale &ResourceBundle::getLocale(void) const {
381     Mutex lock(&gLocaleLock);
382     if (fLocale != NULL) {
383         return *fLocale;
384     }
385     UErrorCode status = U_ZERO_ERROR;
386     const char *localeName = ures_getLocaleInternal(fResource, &status);
387     ResourceBundle *ncThis = const_cast<ResourceBundle *>(this);
388     ncThis->fLocale = new Locale(localeName);
389     return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault();
390 }
391 
392 const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const
393 {
394   return ures_getLocaleByType(fResource, type, &status);
395 }
396 
397 U_NAMESPACE_END
398 //eof
399