1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  udata.cpp
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999oct25
16 *   created by: Markus W. Scherer
17 */
18 
19 #include "unicode/utypes.h"  /* U_PLATFORM etc. */
20 
21 #ifdef __GNUC__
22 /* if gcc
23 #define ATTRIBUTE_WEAK __attribute__ ((weak))
24 might have to #include some other header
25 */
26 #endif
27 
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "unicode/uversion.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "mutex.h"
35 #include "putilimp.h"
36 #include "uassert.h"
37 #include "ucln_cmn.h"
38 #include "ucmndata.h"
39 #include "udatamem.h"
40 #include "uhash.h"
41 #include "umapfile.h"
42 #include "umutex.h"
43 
44 /***********************************************************************
45 *
46 *   Notes on the organization of the ICU data implementation
47 *
48 *      All of the public API is defined in udata.h
49 *
50 *      The implementation is split into several files...
51 *
52 *         - udata.c  (this file) contains higher level code that knows about
53 *                     the search paths for locating data, caching opened data, etc.
54 *
55 *         - umapfile.c  contains the low level platform-specific code for actually loading
56 *                     (memory mapping, file reading, whatever) data into memory.
57 *
58 *         - ucmndata.c  deals with the tables of contents of ICU data items within
59 *                     an ICU common format data file.  The implementation includes
60 *                     an abstract interface and support for multiple TOC formats.
61 *                     All knowledge of any specific TOC format is encapsulated here.
62 *
63 *         - udatamem.c has code for managing UDataMemory structs.  These are little
64 *                     descriptor objects for blocks of memory holding ICU data of
65 *                     various types.
66 */
67 
68 /* configuration ---------------------------------------------------------- */
69 
70 /* If you are excruciatingly bored turn this on .. */
71 /* #define UDATA_DEBUG 1 */
72 
73 #if defined(UDATA_DEBUG)
74 #   include <stdio.h>
75 #endif
76 
77 U_NAMESPACE_USE
78 
79 /*
80  *  Forward declarations
81  */
82 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
83 
84 /***********************************************************************
85 *
86 *    static (Global) data
87 *
88 ************************************************************************/
89 
90 /*
91  * Pointers to the common ICU data.
92  *
93  * We store multiple pointers to ICU data packages and iterate through them
94  * when looking for a data item.
95  *
96  * It is possible to combine this with dependency inversion:
97  * One or more data package libraries may export
98  * functions that each return a pointer to their piece of the ICU data,
99  * and this file would import them as weak functions, without a
100  * strong linker dependency from the common library on the data library.
101  *
102  * Then we can have applications depend on only that part of ICU's data
103  * that they really need, reducing the size of binaries that take advantage
104  * of this.
105  */
106 static UDataMemory *gCommonICUDataArray[10] = { NULL };   // Access protected by icu global mutex.
107 
108 static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0);  //  See extendICUData().
109 
110 static UHashtable  *gCommonDataCache = NULL;  /* Global hash table of opened ICU data files.  */
111 static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
112 
113 static UDataFileAccess  gDataFileAccess = UDATA_DEFAULT_ACCESS;  // Access not synchronized.
114                                                                  // Modifying is documented as thread-unsafe.
115 
116 static UBool U_CALLCONV
udata_cleanup(void)117 udata_cleanup(void)
118 {
119     int32_t i;
120 
121     if (gCommonDataCache) {             /* Delete the cache of user data mappings.  */
122         uhash_close(gCommonDataCache);  /*   Table owns the contents, and will delete them. */
123         gCommonDataCache = NULL;        /*   Cleanup is not thread safe.                */
124     }
125     gCommonDataCacheInitOnce.reset();
126 
127     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
128         udata_close(gCommonICUDataArray[i]);
129         gCommonICUDataArray[i] = NULL;
130     }
131     gHaveTriedToLoadCommonData = 0;
132 
133     return TRUE;                   /* Everything was cleaned up */
134 }
135 
136 static UBool U_CALLCONV
findCommonICUDataByName(const char * inBasename,UErrorCode & err)137 findCommonICUDataByName(const char *inBasename, UErrorCode &err)
138 {
139     UBool found = FALSE;
140     int32_t i;
141 
142     UDataMemory  *pData = udata_findCachedData(inBasename, err);
143     if (U_FAILURE(err) || pData == NULL)
144         return FALSE;
145 
146     {
147         Mutex lock;
148         for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
149             if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
150                 /* The data pointer is already in the array. */
151                 found = TRUE;
152                 break;
153             }
154         }
155     }
156     return found;
157 }
158 
159 
160 /*
161  * setCommonICUData.   Set a UDataMemory to be the global ICU Data
162  */
163 static UBool
setCommonICUData(UDataMemory * pData,UBool warn,UErrorCode * pErr)164 setCommonICUData(UDataMemory *pData,     /*  The new common data.  Belongs to caller, we copy it. */
165                  UBool       warn,       /*  If true, set USING_DEFAULT warning if ICUData was    */
166                                          /*    changed by another thread before we got to it.     */
167                  UErrorCode *pErr)
168 {
169     UDataMemory  *newCommonData = UDataMemory_createNewInstance(pErr);
170     int32_t i;
171     UBool didUpdate = FALSE;
172     if (U_FAILURE(*pErr)) {
173         return FALSE;
174     }
175 
176     /*  For the assignment, other threads must cleanly see either the old            */
177     /*    or the new, not some partially initialized new.  The old can not be        */
178     /*    deleted - someone may still have a pointer to it lying around in           */
179     /*    their locals.                                                              */
180     UDatamemory_assign(newCommonData, pData);
181     umtx_lock(NULL);
182     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
183         if (gCommonICUDataArray[i] == NULL) {
184             gCommonICUDataArray[i] = newCommonData;
185             didUpdate = TRUE;
186             break;
187         } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
188             /* The same data pointer is already in the array. */
189             break;
190         }
191     }
192     umtx_unlock(NULL);
193 
194     if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
195         *pErr = U_USING_DEFAULT_WARNING;
196     }
197     if (didUpdate) {
198         ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
199     } else {
200         uprv_free(newCommonData);
201     }
202     return didUpdate;
203 }
204 
205 static UBool
setCommonICUDataPointer(const void * pData,UBool,UErrorCode * pErrorCode)206 setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
207     UDataMemory tData;
208     UDataMemory_init(&tData);
209     UDataMemory_setData(&tData, pData);
210     udata_checkCommonData(&tData, pErrorCode);
211     return setCommonICUData(&tData, FALSE, pErrorCode);
212 }
213 
214 static const char *
findBasename(const char * path)215 findBasename(const char *path) {
216     const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
217     if(basename==NULL) {
218         return path;
219     } else {
220         return basename+1;
221     }
222 }
223 
224 #ifdef UDATA_DEBUG
225 static const char *
packageNameFromPath(const char * path)226 packageNameFromPath(const char *path)
227 {
228     if((path == NULL) || (*path == 0)) {
229         return U_ICUDATA_NAME;
230     }
231 
232     path = findBasename(path);
233 
234     if((path == NULL) || (*path == 0)) {
235         return U_ICUDATA_NAME;
236     }
237 
238     return path;
239 }
240 #endif
241 
242 /*----------------------------------------------------------------------*
243  *                                                                      *
244  *   Cache for common data                                              *
245  *      Functions for looking up or adding entries to a cache of        *
246  *      data that has been previously opened.  Avoids a potentially     *
247  *      expensive operation of re-opening the data for subsequent       *
248  *      uses.                                                           *
249  *                                                                      *
250  *      Data remains cached for the duration of the process.            *
251  *                                                                      *
252  *----------------------------------------------------------------------*/
253 
254 typedef struct DataCacheElement {
255     char          *name;
256     UDataMemory   *item;
257 } DataCacheElement;
258 
259 
260 
261 /*
262  * Deleter function for DataCacheElements.
263  *         udata cleanup function closes the hash table; hash table in turn calls back to
264  *         here for each entry.
265  */
DataCacheElement_deleter(void * pDCEl)266 static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
267     DataCacheElement *p = (DataCacheElement *)pDCEl;
268     udata_close(p->item);              /* unmaps storage */
269     uprv_free(p->name);                /* delete the hash key string. */
270     uprv_free(pDCEl);                  /* delete 'this'          */
271 }
272 
udata_initHashTable(UErrorCode & err)273 static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
274     U_ASSERT(gCommonDataCache == NULL);
275     gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
276     if (U_FAILURE(err)) {
277        return;
278     }
279     U_ASSERT(gCommonDataCache != NULL);
280     uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
281     ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
282 }
283 
284  /*   udata_getCacheHashTable()
285   *     Get the hash table used to store the data cache entries.
286   *     Lazy create it if it doesn't yet exist.
287   */
udata_getHashTable(UErrorCode & err)288 static UHashtable *udata_getHashTable(UErrorCode &err) {
289     umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
290     return gCommonDataCache;
291 }
292 
293 
294 
udata_findCachedData(const char * path,UErrorCode & err)295 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
296 {
297     UHashtable        *htable;
298     UDataMemory       *retVal = NULL;
299     DataCacheElement  *el;
300     const char        *baseName;
301 
302     htable = udata_getHashTable(err);
303     if (U_FAILURE(err)) {
304         return NULL;
305     }
306 
307     baseName = findBasename(path);   /* Cache remembers only the base name, not the full path. */
308     umtx_lock(NULL);
309     el = (DataCacheElement *)uhash_get(htable, baseName);
310     umtx_unlock(NULL);
311     if (el != NULL) {
312         retVal = el->item;
313     }
314 #ifdef UDATA_DEBUG
315     fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal);
316 #endif
317     return retVal;
318 }
319 
320 
udata_cacheDataItem(const char * path,UDataMemory * item,UErrorCode * pErr)321 static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
322     DataCacheElement *newElement;
323     const char       *baseName;
324     int32_t           nameLen;
325     UHashtable       *htable;
326     DataCacheElement *oldValue = NULL;
327     UErrorCode        subErr = U_ZERO_ERROR;
328 
329     htable = udata_getHashTable(*pErr);
330     if (U_FAILURE(*pErr)) {
331         return NULL;
332     }
333 
334     /* Create a new DataCacheElement - the thingy we store in the hash table -
335      * and copy the supplied path and UDataMemoryItems into it.
336      */
337     newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
338     if (newElement == NULL) {
339         *pErr = U_MEMORY_ALLOCATION_ERROR;
340         return NULL;
341     }
342     newElement->item = UDataMemory_createNewInstance(pErr);
343     if (U_FAILURE(*pErr)) {
344         uprv_free(newElement);
345         return NULL;
346     }
347     UDatamemory_assign(newElement->item, item);
348 
349     baseName = findBasename(path);
350     nameLen = (int32_t)uprv_strlen(baseName);
351     newElement->name = (char *)uprv_malloc(nameLen+1);
352     if (newElement->name == NULL) {
353         *pErr = U_MEMORY_ALLOCATION_ERROR;
354         uprv_free(newElement->item);
355         uprv_free(newElement);
356         return NULL;
357     }
358     uprv_strcpy(newElement->name, baseName);
359 
360     /* Stick the new DataCacheElement into the hash table.
361     */
362     umtx_lock(NULL);
363     oldValue = (DataCacheElement *)uhash_get(htable, path);
364     if (oldValue != NULL) {
365         subErr = U_USING_DEFAULT_WARNING;
366     }
367     else {
368         uhash_put(
369             htable,
370             newElement->name,               /* Key   */
371             newElement,                     /* Value */
372             &subErr);
373     }
374     umtx_unlock(NULL);
375 
376 #ifdef UDATA_DEBUG
377     fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
378     newElement->item, u_errorName(subErr), newElement->item->vFuncs);
379 #endif
380 
381     if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
382         *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
383         uprv_free(newElement->name);
384         uprv_free(newElement->item);
385         uprv_free(newElement);
386         return oldValue ? oldValue->item : NULL;
387     }
388 
389     return newElement->item;
390 }
391 
392 /*----------------------------------------------------------------------*==============
393  *                                                                      *
394  *  Path management.  Could be shared with other tools/etc if need be   *
395  * later on.                                                            *
396  *                                                                      *
397  *----------------------------------------------------------------------*/
398 
399 U_NAMESPACE_BEGIN
400 
401 class UDataPathIterator
402 {
403 public:
404     UDataPathIterator(const char *path, const char *pkg,
405                       const char *item, const char *suffix, UBool doCheckLastFour,
406                       UErrorCode *pErrorCode);
407     const char *next(UErrorCode *pErrorCode);
408 
409 private:
410     const char *path;                              /* working path (u_icudata_Dir) */
411     const char *nextPath;                          /* path following this one */
412     const char *basename;                          /* item's basename (icudt22e_mt.res)*/
413     const char *suffix;                            /* item suffix (can be null) */
414 
415     uint32_t    basenameLen;                       /* length of basename */
416 
417     CharString  itemPath;                          /* path passed in with item name */
418     CharString  pathBuffer;                        /* output path for this it'ion */
419     CharString  packageStub;                       /* example:  "/icudt28b". Will ignore that leaf in set paths. */
420 
421     UBool       checkLastFour;                     /* if TRUE then allow paths such as '/foo/myapp.dat'
422                                                     * to match, checks last 4 chars of suffix with
423                                                     * last 4 of path, then previous chars. */
424 };
425 
426 /**
427  * @param iter  The iterator to be initialized. Its current state does not matter.
428  * @param path  The full pathname to be iterated over.  If NULL, defaults to U_ICUDATA_NAME
429  * @param pkg   Package which is being searched for, ex "icudt28l".  Will ignore leave directories such as /icudt28l
430  * @param item  Item to be searched for.  Can include full path, such as /a/b/foo.dat
431  * @param suffix  Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
432  *               Ex:   'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
433  *                     '/blarg/stuff.dat' would also be found.
434  */
UDataPathIterator(const char * inPath,const char * pkg,const char * item,const char * inSuffix,UBool doCheckLastFour,UErrorCode * pErrorCode)435 UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
436                                      const char *item, const char *inSuffix, UBool doCheckLastFour,
437                                      UErrorCode *pErrorCode)
438 {
439 #ifdef UDATA_DEBUG
440         fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
441 #endif
442     /** Path **/
443     if(inPath == NULL) {
444         path = u_getDataDirectory();
445     } else {
446         path = inPath;
447     }
448 
449     /** Package **/
450     if(pkg != NULL) {
451       packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
452 #ifdef UDATA_DEBUG
453       fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
454 #endif
455     }
456 
457     /** Item **/
458     basename = findBasename(item);
459     basenameLen = (int32_t)uprv_strlen(basename);
460 
461     /** Item path **/
462     if(basename == item) {
463         nextPath = path;
464     } else {
465         itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
466         nextPath = itemPath.data();
467     }
468 #ifdef UDATA_DEBUG
469     fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, inSuffix);
470 #endif
471 
472     /** Suffix  **/
473     if(inSuffix != NULL) {
474         suffix = inSuffix;
475     } else {
476         suffix = "";
477     }
478 
479     checkLastFour = doCheckLastFour;
480 
481     /* pathBuffer will hold the output path strings returned by this iterator */
482 
483 #ifdef UDATA_DEBUG
484     fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
485             iter,
486             item,
487             path,
488             basename,
489             suffix,
490             itemPath.data(),
491             nextPath,
492             checkLastFour?"TRUE":"false");
493 #endif
494 }
495 
496 /**
497  * Get the next path on the list.
498  *
499  * @param iter The Iter to be used
500  * @param len  If set, pointer to the length of the returned path, for convenience.
501  * @return Pointer to the next path segment, or NULL if there are no more.
502  */
next(UErrorCode * pErrorCode)503 const char *UDataPathIterator::next(UErrorCode *pErrorCode)
504 {
505     if(U_FAILURE(*pErrorCode)) {
506         return NULL;
507     }
508 
509     const char *currentPath = NULL;
510     int32_t     pathLen = 0;
511     const char *pathBasename;
512 
513     do
514     {
515         if( nextPath == NULL ) {
516             break;
517         }
518         currentPath = nextPath;
519 
520         if(nextPath == itemPath.data()) { /* we were processing item's path. */
521             nextPath = path; /* start with regular path next tm. */
522             pathLen = (int32_t)uprv_strlen(currentPath);
523         } else {
524             /* fix up next for next time */
525             nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
526             if(nextPath == NULL) {
527                 /* segment: entire path */
528                 pathLen = (int32_t)uprv_strlen(currentPath);
529             } else {
530                 /* segment: until next segment */
531                 pathLen = (int32_t)(nextPath - currentPath);
532                 /* skip divider */
533                 nextPath ++;
534             }
535         }
536 
537         if(pathLen == 0) {
538             continue;
539         }
540 
541 #ifdef UDATA_DEBUG
542         fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
543         fprintf(stderr, "                     ");
544         {
545             uint32_t qqq;
546             for(qqq=0;qqq<pathLen;qqq++)
547             {
548                 fprintf(stderr, " ");
549             }
550 
551             fprintf(stderr, "^\n");
552         }
553 #endif
554         pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
555 
556         /* check for .dat files */
557         pathBasename = findBasename(pathBuffer.data());
558 
559         if(checkLastFour == TRUE &&
560            (pathLen>=4) &&
561            uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix, 4)==0 && /* suffix matches */
562            uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0  && /* base matches */
563            uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
564 
565 #ifdef UDATA_DEBUG
566             fprintf(stderr, "Have %s file on the path: %s\n", suffix, pathBuffer.data());
567 #endif
568             /* do nothing */
569         }
570         else
571         {       /* regular dir path */
572             if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
573                 if((pathLen>=4) &&
574                    uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
575                 {
576 #ifdef UDATA_DEBUG
577                     fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
578 #endif
579                     continue;
580                 }
581 
582                 /* Check if it is a directory with the same name as our package */
583                 if(!packageStub.isEmpty() &&
584                    (pathLen > packageStub.length()) &&
585                    !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
586 #ifdef UDATA_DEBUG
587                   fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
588 #endif
589                   pathBuffer.truncate(pathLen - packageStub.length());
590                 }
591                 pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
592             }
593 
594             /* + basename */
595             pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
596 
597             if(*suffix)  /* tack on suffix */
598             {
599                 pathBuffer.append(suffix, *pErrorCode);
600             }
601         }
602 
603 #ifdef UDATA_DEBUG
604         fprintf(stderr, " -->  %s\n", pathBuffer.data());
605 #endif
606 
607         return pathBuffer.data();
608 
609     } while(path);
610 
611     /* fell way off the end */
612     return NULL;
613 }
614 
615 U_NAMESPACE_END
616 
617 /* ==================================================================================*/
618 
619 
620 /*----------------------------------------------------------------------*
621  *                                                                      *
622  *  Add a static reference to the common data  library                  *
623  *   Unless overridden by an explicit udata_setCommonData, this will be *
624  *      our common data.                                                *
625  *                                                                      *
626  *----------------------------------------------------------------------*/
627 extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
628 
629 /*
630  * This would be a good place for weak-linkage declarations of
631  * partial-data-library access functions where each returns a pointer
632  * to its data package, if it is linked in.
633  */
634 /*
635 extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
636 extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
637 */
638 
639 /*----------------------------------------------------------------------*
640  *                                                                      *
641  *   openCommonData   Attempt to open a common format (.dat) file       *
642  *                    Map it into memory (if it's not there already)    *
643  *                    and return a UDataMemory object for it.           *
644  *                                                                      *
645  *                    If the requested data is already open and cached  *
646  *                       just return the cached UDataMem object.        *
647  *                                                                      *
648  *----------------------------------------------------------------------*/
649 static UDataMemory *
openCommonData(const char * path,int32_t commonDataIndex,UErrorCode * pErrorCode)650 openCommonData(const char *path,          /*  Path from OpenChoice?          */
651                int32_t commonDataIndex,   /*  ICU Data (index >= 0) if path == NULL */
652                UErrorCode *pErrorCode)
653 {
654     UDataMemory tData;
655     const char *pathBuffer;
656     const char *inBasename;
657 
658     if (U_FAILURE(*pErrorCode)) {
659         return NULL;
660     }
661 
662     UDataMemory_init(&tData);
663 
664     /* ??????? TODO revisit this */
665     if (commonDataIndex >= 0) {
666         /* "mini-cache" for common ICU data */
667         if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
668             return NULL;
669         }
670         {
671             Mutex lock;
672             if(gCommonICUDataArray[commonDataIndex] != NULL) {
673                 return gCommonICUDataArray[commonDataIndex];
674             }
675             int32_t i;
676             for(i = 0; i < commonDataIndex; ++i) {
677                 if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
678                     /* The linked-in data is already in the list. */
679                     return NULL;
680                 }
681             }
682         }
683 
684         /* Add the linked-in data to the list. */
685         /*
686          * This is where we would check and call weakly linked partial-data-library
687          * access functions.
688          */
689         /*
690         if (uprv_getICUData_collation) {
691             setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
692         }
693         if (uprv_getICUData_conversion) {
694             setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
695         }
696         */
697         setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
698         {
699             Mutex lock;
700             return gCommonICUDataArray[commonDataIndex];
701         }
702     }
703 
704 
705     /* request is NOT for ICU Data.  */
706 
707     /* Find the base name portion of the supplied path.   */
708     /*   inBasename will be left pointing somewhere within the original path string.      */
709     inBasename = findBasename(path);
710 #ifdef UDATA_DEBUG
711     fprintf(stderr, "inBasename = %s\n", inBasename);
712 #endif
713 
714     if(*inBasename==0) {
715         /* no basename.     This will happen if the original path was a directory name,   */
716         /*    like  "a/b/c/".   (Fallback to separate files will still work.)             */
717 #ifdef UDATA_DEBUG
718         fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
719 #endif
720         if (U_SUCCESS(*pErrorCode)) {
721             *pErrorCode=U_FILE_ACCESS_ERROR;
722         }
723         return NULL;
724     }
725 
726    /* Is the requested common data file already open and cached?                     */
727    /*   Note that the cache is keyed by the base name only.  The rest of the path,   */
728    /*     if any, is not considered.                                                 */
729     UDataMemory  *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
730     if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) {
731         return dataToReturn;
732     }
733 
734     /* Requested item is not in the cache.
735      * Hunt it down, trying all the path locations
736      */
737 
738     UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
739 
740     while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
741     {
742 #ifdef UDATA_DEBUG
743         fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
744 #endif
745         uprv_mapFile(&tData, pathBuffer);
746 #ifdef UDATA_DEBUG
747         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
748 #endif
749     }
750 
751 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
752     if (!UDataMemory_isLoaded(&tData)) {
753         char ourPathBuffer[1024];
754         /* One more chance, for extendCommonData() */
755         uprv_strncpy(ourPathBuffer, path, 1019);
756         ourPathBuffer[1019]=0;
757         uprv_strcat(ourPathBuffer, ".dat");
758         uprv_mapFile(&tData, ourPathBuffer);
759     }
760 #endif
761 
762     if (U_FAILURE(*pErrorCode)) {
763         return NULL;
764     }
765     if (!UDataMemory_isLoaded(&tData)) {
766         /* no common data */
767         *pErrorCode=U_FILE_ACCESS_ERROR;
768         return NULL;
769     }
770 
771     /* we have mapped a file, check its header */
772     udata_checkCommonData(&tData, pErrorCode);
773 
774 
775     /* Cache the UDataMemory struct for this .dat file,
776      *   so we won't need to hunt it down and map it again next time
777      *   something is needed from it.                */
778     return udata_cacheDataItem(inBasename, &tData, pErrorCode);
779 }
780 
781 
782 /*----------------------------------------------------------------------*
783  *                                                                      *
784  *   extendICUData   If the full set of ICU data was not loaded at      *
785  *                   program startup, load it now.  This function will  *
786  *                   be called when the lookup of an ICU data item in   *
787  *                   the common ICU data fails.                         *
788  *                                                                      *
789  *                   return true if new data is loaded, false otherwise.*
790  *                                                                      *
791  *----------------------------------------------------------------------*/
extendICUData(UErrorCode * pErr)792 static UBool extendICUData(UErrorCode *pErr)
793 {
794     UDataMemory   *pData;
795     UDataMemory   copyPData;
796     UBool         didUpdate = FALSE;
797 
798     /*
799      * There is a chance for a race condition here.
800      * Normally, ICU data is loaded from a DLL or via mmap() and
801      * setCommonICUData() will detect if the same address is set twice.
802      * If ICU is built with data loading via fread() then the address will
803      * be different each time the common data is loaded and we may add
804      * multiple copies of the data.
805      * In this case, use a mutex to prevent the race.
806      * Use a specific mutex to avoid nested locks of the global mutex.
807      */
808 #if MAP_IMPLEMENTATION==MAP_STDIO
809     static UMutex extendICUDataMutex = U_MUTEX_INITIALIZER;
810     umtx_lock(&extendICUDataMutex);
811 #endif
812     if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
813         /* See if we can explicitly open a .dat file for the ICUData. */
814         pData = openCommonData(
815                    U_ICUDATA_NAME,            /*  "icudt20l" , for example.          */
816                    -1,                        /*  Pretend we're not opening ICUData  */
817                    pErr);
818 
819         /* How about if there is no pData, eh... */
820 
821        UDataMemory_init(&copyPData);
822        if(pData != NULL) {
823           UDatamemory_assign(&copyPData, pData);
824           copyPData.map = 0;              /* The mapping for this data is owned by the hash table */
825           copyPData.mapAddr = 0;          /*   which will unmap it when ICU is shut down.         */
826                                           /* CommonICUData is also unmapped when ICU is shut down.*/
827                                           /* To avoid unmapping the data twice, zero out the map  */
828                                           /*   fields in the UDataMemory that we're assigning     */
829                                           /*   to CommonICUData.                                  */
830 
831           didUpdate = /* no longer using this result */
832               setCommonICUData(&copyPData,/*  The new common data.                                */
833                        FALSE,             /*  No warnings if write didn't happen                  */
834                        pErr);             /*  setCommonICUData honors errors; NOP if error set    */
835         }
836 
837         umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
838     }
839 
840     didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr);  /* Return 'true' when a racing writes out the extended                 */
841                                                           /* data after another thread has failed to see it (in openCommonData), so     */
842                                                           /* extended data can be examined.                                             */
843                                                           /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
844 
845 #if MAP_IMPLEMENTATION==MAP_STDIO
846     umtx_unlock(&extendICUDataMutex);
847 #endif
848     return didUpdate;               /* Return true if ICUData pointer was updated.   */
849                                     /*   (Could potentialy have been done by another thread racing */
850                                     /*   us through here, but that's fine, we still return true    */
851                                     /*   so that current thread will also examine extended data.   */
852 }
853 
854 /*----------------------------------------------------------------------*
855  *                                                                      *
856  *   udata_setCommonData                                                *
857  *                                                                      *
858  *----------------------------------------------------------------------*/
859 U_CAPI void U_EXPORT2
udata_setCommonData(const void * data,UErrorCode * pErrorCode)860 udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
861     UDataMemory dataMemory;
862 
863     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
864         return;
865     }
866 
867     if(data==NULL) {
868         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
869         return;
870     }
871 
872     /* set the data pointer and test for validity */
873     UDataMemory_init(&dataMemory);
874     UDataMemory_setData(&dataMemory, data);
875     udata_checkCommonData(&dataMemory, pErrorCode);
876     if (U_FAILURE(*pErrorCode)) {return;}
877 
878     /* we have good data */
879     /* Set it up as the ICU Common Data.  */
880     setCommonICUData(&dataMemory, TRUE, pErrorCode);
881 }
882 
883 /*---------------------------------------------------------------------------
884  *
885  *  udata_setAppData
886  *
887  *---------------------------------------------------------------------------- */
888 U_CAPI void U_EXPORT2
udata_setAppData(const char * path,const void * data,UErrorCode * err)889 udata_setAppData(const char *path, const void *data, UErrorCode *err)
890 {
891     UDataMemory     udm;
892 
893     if(err==NULL || U_FAILURE(*err)) {
894         return;
895     }
896     if(data==NULL) {
897         *err=U_ILLEGAL_ARGUMENT_ERROR;
898         return;
899     }
900 
901     UDataMemory_init(&udm);
902     UDataMemory_setData(&udm, data);
903     udata_checkCommonData(&udm, err);
904     udata_cacheDataItem(path, &udm, err);
905 }
906 
907 /*----------------------------------------------------------------------------*
908  *                                                                            *
909  *  checkDataItem     Given a freshly located/loaded data item, either        *
910  *                    an entry in a common file or a separately loaded file,  *
911  *                    sanity check its header, and see if the data is         *
912  *                    acceptable to the app.                                  *
913  *                    If the data is good, create and return a UDataMemory    *
914  *                    object that can be returned to the application.         *
915  *                    Return NULL on any sort of failure.                     *
916  *                                                                            *
917  *----------------------------------------------------------------------------*/
918 static UDataMemory *
checkDataItem(const DataHeader * pHeader,UDataMemoryIsAcceptable * isAcceptable,void * context,const char * type,const char * name,UErrorCode * nonFatalErr,UErrorCode * fatalErr)919 checkDataItem
920 (
921  const DataHeader         *pHeader,         /* The data item to be checked.                */
922  UDataMemoryIsAcceptable  *isAcceptable,    /* App's call-back function                    */
923  void                     *context,         /*   pass-thru param for above.                */
924  const char               *type,            /*   pass-thru param for above.                */
925  const char               *name,            /*   pass-thru param for above.                */
926  UErrorCode               *nonFatalErr,     /* Error code if this data was not acceptable  */
927                                             /*   but openChoice should continue with       */
928                                             /*   trying to get data from fallback path.    */
929  UErrorCode               *fatalErr         /* Bad error, caller should return immediately */
930  )
931 {
932     UDataMemory  *rDataMem = NULL;          /* the new UDataMemory, to be returned.        */
933 
934     if (U_FAILURE(*fatalErr)) {
935         return NULL;
936     }
937 
938     if(pHeader->dataHeader.magic1==0xda &&
939         pHeader->dataHeader.magic2==0x27 &&
940         (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
941     ) {
942         rDataMem=UDataMemory_createNewInstance(fatalErr);
943         if (U_FAILURE(*fatalErr)) {
944             return NULL;
945         }
946         rDataMem->pHeader = pHeader;
947     } else {
948         /* the data is not acceptable, look further */
949         /* If we eventually find something good, this errorcode will be */
950         /*    cleared out.                                              */
951         *nonFatalErr=U_INVALID_FORMAT_ERROR;
952     }
953     return rDataMem;
954 }
955 
956 /**
957  * @return 0 if not loaded, 1 if loaded or err
958  */
doLoadFromIndividualFiles(const char * pkgName,const char * dataPath,const char * tocEntryPathSuffix,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)959 static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
960         const char *dataPath, const char *tocEntryPathSuffix,
961             /* following arguments are the same as doOpenChoice itself */
962             const char *path, const char *type, const char *name,
963              UDataMemoryIsAcceptable *isAcceptable, void *context,
964              UErrorCode *subErrorCode,
965              UErrorCode *pErrorCode)
966 {
967     const char         *pathBuffer;
968     UDataMemory         dataMemory;
969     UDataMemory *pEntryData;
970 
971     /* look in ind. files: package\nam.typ  ========================= */
972     /* init path iterator for individual files */
973     UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
974 
975     while((pathBuffer = iter.next(pErrorCode)))
976     {
977 #ifdef UDATA_DEBUG
978         fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
979 #endif
980         if(uprv_mapFile(&dataMemory, pathBuffer))
981         {
982             pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
983             if (pEntryData != NULL) {
984                 /* Data is good.
985                 *  Hand off ownership of the backing memory to the user's UDataMemory.
986                 *  and return it.   */
987                 pEntryData->mapAddr = dataMemory.mapAddr;
988                 pEntryData->map     = dataMemory.map;
989 
990 #ifdef UDATA_DEBUG
991                 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
992 #endif
993                 return pEntryData;
994             }
995 
996             /* the data is not acceptable, or some error occured.  Either way, unmap the memory */
997             udata_close(&dataMemory);
998 
999             /* If we had a nasty error, bail out completely.  */
1000             if (U_FAILURE(*pErrorCode)) {
1001                 return NULL;
1002             }
1003 
1004             /* Otherwise remember that we found data but didn't like it for some reason  */
1005             *subErrorCode=U_INVALID_FORMAT_ERROR;
1006         }
1007 #ifdef UDATA_DEBUG
1008         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
1009 #endif
1010     }
1011     return NULL;
1012 }
1013 
1014 /**
1015  * @return 0 if not loaded, 1 if loaded or err
1016  */
doLoadFromCommonData(UBool isICUData,const char *,const char *,const char *,const char * tocEntryName,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1017 static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
1018         const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
1019             /* following arguments are the same as doOpenChoice itself */
1020             const char *path, const char *type, const char *name,
1021              UDataMemoryIsAcceptable *isAcceptable, void *context,
1022              UErrorCode *subErrorCode,
1023              UErrorCode *pErrorCode)
1024 {
1025     UDataMemory        *pEntryData;
1026     const DataHeader   *pHeader;
1027     UDataMemory        *pCommonData;
1028     int32_t            commonDataIndex;
1029     UBool              checkedExtendedICUData = FALSE;
1030     /* try to get common data.  The loop is for platforms such as the 390 that do
1031      *  not initially load the full set of ICU data.  If the lookup of an ICU data item
1032      *  fails, the full (but slower to load) set is loaded, the and the loop repeats,
1033      *  trying the lookup again.  Once the full set of ICU data is loaded, the loop wont
1034      *  repeat because the full set will be checked the first time through.
1035      *
1036      *  The loop also handles the fallback to a .dat file if the application linked
1037      *   to the stub data library rather than a real library.
1038      */
1039     for (commonDataIndex = isICUData ? 0 : -1;;) {
1040         pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
1041 
1042         if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
1043             int32_t length;
1044 
1045             /* look up the data piece in the common data */
1046             pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
1047 #ifdef UDATA_DEBUG
1048             fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, pHeader, u_errorName(*subErrorCode));
1049 #endif
1050 
1051             if(pHeader!=NULL) {
1052                 pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1053 #ifdef UDATA_DEBUG
1054                 fprintf(stderr, "pEntryData=%p\n", pEntryData);
1055 #endif
1056                 if (U_FAILURE(*pErrorCode)) {
1057                     return NULL;
1058                 }
1059                 if (pEntryData != NULL) {
1060                     pEntryData->length = length;
1061                     return pEntryData;
1062                 }
1063             }
1064         }
1065         /* Data wasn't found.  If we were looking for an ICUData item and there is
1066          * more data available, load it and try again,
1067          * otherwise break out of this loop. */
1068         if (!isICUData) {
1069             return NULL;
1070         } else if (pCommonData != NULL) {
1071             ++commonDataIndex;  /* try the next data package */
1072         } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
1073             checkedExtendedICUData = TRUE;
1074             /* try this data package slot again: it changed from NULL to non-NULL */
1075         } else {
1076             return NULL;
1077         }
1078     }
1079 }
1080 
1081 /*
1082  * Identify the Time Zone resources that are subject to special override data loading.
1083  */
isTimeZoneFile(const char * name,const char * type)1084 static UBool isTimeZoneFile(const char *name, const char *type) {
1085     return ((uprv_strcmp(type, "res") == 0) &&
1086             (uprv_strcmp(name, "zoneinfo64") == 0 ||
1087              uprv_strcmp(name, "timezoneTypes") == 0 ||
1088              uprv_strcmp(name, "windowsZones") == 0 ||
1089              uprv_strcmp(name, "metaZones") == 0));
1090 }
1091 
1092 /*
1093  *  A note on the ownership of Mapped Memory
1094  *
1095  *  For common format files, ownership resides with the UDataMemory object
1096  *    that lives in the cache of opened common data.  These UDataMemorys are private
1097  *    to the udata implementation, and are never seen directly by users.
1098  *
1099  *    The UDataMemory objects returned to users will have the address of some desired
1100  *    data within the mapped region, but they wont have the mapping info itself, and thus
1101  *    won't cause anything to be removed from memory when they are closed.
1102  *
1103  *  For individual data files, the UDataMemory returned to the user holds the
1104  *  information necessary to unmap the data on close.  If the user independently
1105  *  opens the same data file twice, two completely independent mappings will be made.
1106  *  (There is no cache of opened data items from individual files, only a cache of
1107  *   opened Common Data files, that is, files containing a collection of data items.)
1108  *
1109  *  For common data passed in from the user via udata_setAppData() or
1110  *  udata_setCommonData(), ownership remains with the user.
1111  *
1112  *  UDataMemory objects themselves, as opposed to the memory they describe,
1113  *  can be anywhere - heap, stack/local or global.
1114  *  They have a flag to indicate when they're heap allocated and thus
1115  *  must be deleted when closed.
1116  */
1117 
1118 
1119 /*----------------------------------------------------------------------------*
1120  *                                                                            *
1121  * main data loading functions                                                *
1122  *                                                                            *
1123  *----------------------------------------------------------------------------*/
1124 static UDataMemory *
doOpenChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1125 doOpenChoice(const char *path, const char *type, const char *name,
1126              UDataMemoryIsAcceptable *isAcceptable, void *context,
1127              UErrorCode *pErrorCode)
1128 {
1129     UDataMemory         *retVal = NULL;
1130 
1131     const char         *dataPath;
1132 
1133     int32_t             tocEntrySuffixIndex;
1134     const char         *tocEntryPathSuffix;
1135     UErrorCode          subErrorCode=U_ZERO_ERROR;
1136     const char         *treeChar;
1137 
1138     UBool               isICUData = FALSE;
1139 
1140 
1141     /* Is this path ICU data? */
1142     if(path == NULL ||
1143        !strcmp(path, U_ICUDATA_ALIAS) ||  /* "ICUDATA" */
1144        !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
1145                      uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
1146        !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
1147                      uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
1148       isICUData = TRUE;
1149     }
1150 
1151 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)  /* Windows:  try "foo\bar" and "foo/bar" */
1152     /* remap from alternate path char to the main one */
1153     CharString altSepPath;
1154     if(path) {
1155         if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
1156             altSepPath.append(path, *pErrorCode);
1157             char *p;
1158             while((p=uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR))) {
1159                 *p = U_FILE_SEP_CHAR;
1160             }
1161 #if defined (UDATA_DEBUG)
1162             fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
1163 #endif
1164             path = altSepPath.data();
1165         }
1166     }
1167 #endif
1168 
1169     CharString tocEntryName; /* entry name in tree format. ex:  'icudt28b/coll/ar.res' */
1170     CharString tocEntryPath; /* entry name in path format. ex:  'icudt28b\\coll\\ar.res' */
1171 
1172     CharString pkgName;
1173     CharString treeName;
1174 
1175     /* ======= Set up strings */
1176     if(path==NULL) {
1177         pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1178     } else {
1179         const char *pkg;
1180         const char *first;
1181         pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
1182         first = uprv_strchr(path, U_FILE_SEP_CHAR);
1183         if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
1184             /* see if this is an /absolute/path/to/package  path */
1185             if(pkg) {
1186                 pkgName.append(pkg+1, *pErrorCode);
1187             } else {
1188                 pkgName.append(path, *pErrorCode);
1189             }
1190         } else {
1191             treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
1192             if(treeChar) {
1193                 treeName.append(treeChar+1, *pErrorCode); /* following '-' */
1194                 if(isICUData) {
1195                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1196                 } else {
1197                     pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
1198                     if (first == NULL) {
1199                         /*
1200                         This user data has no path, but there is a tree name.
1201                         Look up the correct path from the data cache later.
1202                         */
1203                         path = pkgName.data();
1204                     }
1205                 }
1206             } else {
1207                 if(isICUData) {
1208                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1209                 } else {
1210                     pkgName.append(path, *pErrorCode);
1211                 }
1212             }
1213         }
1214     }
1215 
1216 #ifdef UDATA_DEBUG
1217     fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
1218 #endif
1219 
1220     /* setting up the entry name and file name
1221      * Make up a full name by appending the type to the supplied
1222      *  name, assuming that a type was supplied.
1223      */
1224 
1225     /* prepend the package */
1226     tocEntryName.append(pkgName, *pErrorCode);
1227     tocEntryPath.append(pkgName, *pErrorCode);
1228     tocEntrySuffixIndex = tocEntryName.length();
1229 
1230     if(!treeName.isEmpty()) {
1231         tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1232         tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1233     }
1234 
1235     tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1236     tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1237     if(type!=NULL && *type!=0) {
1238         tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
1239         tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
1240     }
1241     tocEntryPathSuffix = tocEntryPath.data()+tocEntrySuffixIndex; /* suffix starts here */
1242 
1243 #ifdef UDATA_DEBUG
1244     fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
1245     fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
1246 #endif
1247 
1248     if(path == NULL) {
1249         path = COMMON_DATA_NAME; /* "icudt26e" */
1250     }
1251 
1252     /************************ Begin loop looking for ind. files ***************/
1253 #ifdef UDATA_DEBUG
1254     fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
1255 #endif
1256 
1257     /* End of dealing with a null basename */
1258     dataPath = u_getDataDirectory();
1259 
1260     /****    Time zone individual files override  */
1261     if (isICUData && isTimeZoneFile(name, type)) {
1262         const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
1263         if (tzFilesDir[0] != 0) {
1264 #ifdef UDATA_DEBUG
1265             fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
1266 #endif
1267             retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
1268                             /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1269             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1270                 return retVal;
1271             }
1272         }
1273     }
1274 
1275     /****    COMMON PACKAGE  - only if packages are first. */
1276     if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
1277 #ifdef UDATA_DEBUG
1278         fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
1279 #endif
1280         /* #2 */
1281         retVal = doLoadFromCommonData(isICUData,
1282                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1283                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1284         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1285             return retVal;
1286         }
1287     }
1288 
1289     /****    INDIVIDUAL FILES  */
1290     if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
1291        (gDataFileAccess==UDATA_FILES_FIRST)) {
1292 #ifdef UDATA_DEBUG
1293         fprintf(stderr, "Trying individual files\n");
1294 #endif
1295         /* Check to make sure that there is a dataPath to iterate over */
1296         if ((dataPath && *dataPath) || !isICUData) {
1297             retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
1298                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1299             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1300                 return retVal;
1301             }
1302         }
1303     }
1304 
1305     /****    COMMON PACKAGE  */
1306     if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
1307        (gDataFileAccess==UDATA_FILES_FIRST)) {
1308 #ifdef UDATA_DEBUG
1309         fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
1310 #endif
1311         retVal = doLoadFromCommonData(isICUData,
1312                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1313                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1314         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1315             return retVal;
1316         }
1317     }
1318 
1319     /* Load from DLL.  If we haven't attempted package load, we also haven't had any chance to
1320         try a DLL (static or setCommonData/etc)  load.
1321          If we ever have a "UDATA_ONLY_FILES", add it to the or list here.  */
1322     if(gDataFileAccess==UDATA_NO_FILES) {
1323 #ifdef UDATA_DEBUG
1324         fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
1325 #endif
1326         retVal = doLoadFromCommonData(isICUData,
1327                             pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
1328                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1329         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1330             return retVal;
1331         }
1332     }
1333 
1334     /* data not found */
1335     if(U_SUCCESS(*pErrorCode)) {
1336         if(U_SUCCESS(subErrorCode)) {
1337             /* file not found */
1338             *pErrorCode=U_FILE_ACCESS_ERROR;
1339         } else {
1340             /* entry point not found or rejected */
1341             *pErrorCode=subErrorCode;
1342         }
1343     }
1344     return retVal;
1345 }
1346 
1347 
1348 
1349 /* API ---------------------------------------------------------------------- */
1350 
1351 U_CAPI UDataMemory * U_EXPORT2
udata_open(const char * path,const char * type,const char * name,UErrorCode * pErrorCode)1352 udata_open(const char *path, const char *type, const char *name,
1353            UErrorCode *pErrorCode) {
1354 #ifdef UDATA_DEBUG
1355   fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1356     fflush(stderr);
1357 #endif
1358 
1359     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1360         return NULL;
1361     } else if(name==NULL || *name==0) {
1362         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1363         return NULL;
1364     } else {
1365         return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
1366     }
1367 }
1368 
1369 
1370 
1371 U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1372 udata_openChoice(const char *path, const char *type, const char *name,
1373                  UDataMemoryIsAcceptable *isAcceptable, void *context,
1374                  UErrorCode *pErrorCode) {
1375 #ifdef UDATA_DEBUG
1376   fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1377 #endif
1378 
1379     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1380         return NULL;
1381     } else if(name==NULL || *name==0 || isAcceptable==NULL) {
1382         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1383         return NULL;
1384     } else {
1385         return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
1386     }
1387 }
1388 
1389 
1390 
1391 U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory * pData,UDataInfo * pInfo)1392 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
1393     if(pInfo!=NULL) {
1394         if(pData!=NULL && pData->pHeader!=NULL) {
1395             const UDataInfo *info=&pData->pHeader->info;
1396             uint16_t dataInfoSize=udata_getInfoSize(info);
1397             if(pInfo->size>dataInfoSize) {
1398                 pInfo->size=dataInfoSize;
1399             }
1400             uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
1401             if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
1402                 /* opposite endianness */
1403                 uint16_t x=info->reservedWord;
1404                 pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
1405             }
1406         } else {
1407             pInfo->size=0;
1408         }
1409     }
1410 }
1411 
1412 
udata_setFileAccess(UDataFileAccess access,UErrorCode *)1413 U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
1414 {
1415     // Note: this function is documented as not thread safe.
1416     gDataFileAccess = access;
1417 }
1418