1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  udata.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999oct25
16 *   created by: Markus W. Scherer
17 */
18 
19 #include "unicode/utypes.h"  /* U_PLATFORM etc. */
20 
21 #ifdef __GNUC__
22 /* if gcc
23 #define ATTRIBUTE_WEAK __attribute__ ((weak))
24 might have to #include some other header
25 */
26 #endif
27 
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "unicode/uversion.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "mutex.h"
35 #include "putilimp.h"
36 #include "restrace.h"
37 #include "uassert.h"
38 #include "ucln_cmn.h"
39 #include "ucmndata.h"
40 #include "udatamem.h"
41 #include "uhash.h"
42 #include "umapfile.h"
43 #include "umutex.h"
44 
45 /***********************************************************************
46 *
47 *   Notes on the organization of the ICU data implementation
48 *
49 *      All of the public API is defined in udata.h
50 *
51 *      The implementation is split into several files...
52 *
53 *         - udata.c  (this file) contains higher level code that knows about
54 *                     the search paths for locating data, caching opened data, etc.
55 *
56 *         - umapfile.c  contains the low level platform-specific code for actually loading
57 *                     (memory mapping, file reading, whatever) data into memory.
58 *
59 *         - ucmndata.c  deals with the tables of contents of ICU data items within
60 *                     an ICU common format data file.  The implementation includes
61 *                     an abstract interface and support for multiple TOC formats.
62 *                     All knowledge of any specific TOC format is encapsulated here.
63 *
64 *         - udatamem.c has code for managing UDataMemory structs.  These are little
65 *                     descriptor objects for blocks of memory holding ICU data of
66 *                     various types.
67 */
68 
69 /* configuration ---------------------------------------------------------- */
70 
71 /* If you are excruciatingly bored turn this on .. */
72 /* #define UDATA_DEBUG 1 */
73 
74 #if defined(UDATA_DEBUG)
75 #   include <stdio.h>
76 #endif
77 
78 U_NAMESPACE_USE
79 
80 /*
81  *  Forward declarations
82  */
83 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
84 
85 /***********************************************************************
86 *
87 *    static (Global) data
88 *
89 ************************************************************************/
90 
91 /*
92  * Pointers to the common ICU data.
93  *
94  * We store multiple pointers to ICU data packages and iterate through them
95  * when looking for a data item.
96  *
97  * It is possible to combine this with dependency inversion:
98  * One or more data package libraries may export
99  * functions that each return a pointer to their piece of the ICU data,
100  * and this file would import them as weak functions, without a
101  * strong linker dependency from the common library on the data library.
102  *
103  * Then we can have applications depend on only that part of ICU's data
104  * that they really need, reducing the size of binaries that take advantage
105  * of this.
106  */
107 static UDataMemory *gCommonICUDataArray[10] = { NULL };   // Access protected by icu global mutex.
108 
109 static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0);  //  See extendICUData().
110 
111 static UHashtable  *gCommonDataCache = NULL;  /* Global hash table of opened ICU data files.  */
112 static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
113 
114 // Android-changed: On Android, use our patched version of openCommonData() to load the data,
115 //   and do not try to load ICU data from other files.
116 #if U_PLATFORM == U_PF_ANDROID
117 static UDataFileAccess  gDataFileAccess = UDATA_NO_FILES;
118 #elif !defined(ICU_DATA_DIR_WINDOWS)
119 static UDataFileAccess  gDataFileAccess = UDATA_DEFAULT_ACCESS;  // Access not synchronized.
120                                                                  // Modifying is documented as thread-unsafe.
121 #else
122 // If we are using the Windows data directory, then look in one spot only.
123 static UDataFileAccess  gDataFileAccess = UDATA_NO_FILES;
124 #endif
125 
126 // BEGIN Android-added: Include android/host-linux-specific headers and variables.
127 #ifdef ANDROID // if using the AOSP build system, e.g. Soong, but not the normal GNU make used by ./updateicudata.py
128   #if U_PLATFORM == U_PF_ANDROID ||  U_PLATFORM == U_PF_LINUX // if targeting Android or host linux
129     #define AOSP_ICU_INIT 1
130   #endif
131 #endif
132 
133 #ifdef AOSP_ICU_INIT
134   #include "androidicuinit/android_icu_init.h"
135   static icu::UInitOnce gAospInitOnce = U_INITONCE_INITIALIZER;
136 #endif
137 // END Android-added: Include android/host-linux-specific headers and variables.
138 
139 
140 static UBool U_CALLCONV
udata_cleanup(void)141 udata_cleanup(void)
142 {
143     int32_t i;
144 
145     if (gCommonDataCache) {             /* Delete the cache of user data mappings.  */
146         uhash_close(gCommonDataCache);  /*   Table owns the contents, and will delete them. */
147         gCommonDataCache = NULL;        /*   Cleanup is not thread safe.                */
148     }
149     gCommonDataCacheInitOnce.reset();
150 
151     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
152         udata_close(gCommonICUDataArray[i]);
153         gCommonICUDataArray[i] = NULL;
154     }
155     gHaveTriedToLoadCommonData = 0;
156 
157 // BEGIN Android-added: Use specialized libandroidicuinit to unload the data on Android/ART host.
158 #ifdef AOSP_ICU_INIT
159     android_icu_cleanup();
160     gAospInitOnce.reset();
161 #endif
162 // END Android-added: Use specialized libandroidicuinit to unload the data on Android/ART host.
163 
164     return TRUE;                   /* Everything was cleaned up */
165 }
166 
167 static UBool U_CALLCONV
findCommonICUDataByName(const char * inBasename,UErrorCode & err)168 findCommonICUDataByName(const char *inBasename, UErrorCode &err)
169 {
170     UBool found = FALSE;
171     int32_t i;
172 
173     UDataMemory  *pData = udata_findCachedData(inBasename, err);
174     if (U_FAILURE(err) || pData == NULL)
175         return FALSE;
176 
177     {
178         Mutex lock;
179         for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
180             if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
181                 /* The data pointer is already in the array. */
182                 found = TRUE;
183                 break;
184             }
185         }
186     }
187     return found;
188 }
189 
190 
191 /*
192  * setCommonICUData.   Set a UDataMemory to be the global ICU Data
193  */
194 static UBool
setCommonICUData(UDataMemory * pData,UBool warn,UErrorCode * pErr)195 setCommonICUData(UDataMemory *pData,     /*  The new common data.  Belongs to caller, we copy it. */
196                  UBool       warn,       /*  If true, set USING_DEFAULT warning if ICUData was    */
197                                          /*    changed by another thread before we got to it.     */
198                  UErrorCode *pErr)
199 {
200     UDataMemory  *newCommonData = UDataMemory_createNewInstance(pErr);
201     int32_t i;
202     UBool didUpdate = FALSE;
203     if (U_FAILURE(*pErr)) {
204         return FALSE;
205     }
206 
207     /*  For the assignment, other threads must cleanly see either the old            */
208     /*    or the new, not some partially initialized new.  The old can not be        */
209     /*    deleted - someone may still have a pointer to it lying around in           */
210     /*    their locals.                                                              */
211     UDatamemory_assign(newCommonData, pData);
212     umtx_lock(NULL);
213     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
214         if (gCommonICUDataArray[i] == NULL) {
215             gCommonICUDataArray[i] = newCommonData;
216             didUpdate = TRUE;
217             break;
218         } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
219             /* The same data pointer is already in the array. */
220             break;
221         }
222     }
223     umtx_unlock(NULL);
224 
225     if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
226         *pErr = U_USING_DEFAULT_WARNING;
227     }
228     if (didUpdate) {
229         ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
230     } else {
231         uprv_free(newCommonData);
232     }
233     return didUpdate;
234 }
235 
236 #if !defined(ICU_DATA_DIR_WINDOWS)
237 
238 static UBool
setCommonICUDataPointer(const void * pData,UBool,UErrorCode * pErrorCode)239 setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
240     UDataMemory tData;
241     UDataMemory_init(&tData);
242     UDataMemory_setData(&tData, pData);
243     udata_checkCommonData(&tData, pErrorCode);
244     return setCommonICUData(&tData, FALSE, pErrorCode);
245 }
246 
247 #endif
248 
249 static const char *
findBasename(const char * path)250 findBasename(const char *path) {
251     const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
252     if(basename==NULL) {
253         return path;
254     } else {
255         return basename+1;
256     }
257 }
258 
259 #ifdef UDATA_DEBUG
260 static const char *
packageNameFromPath(const char * path)261 packageNameFromPath(const char *path)
262 {
263     if((path == NULL) || (*path == 0)) {
264         return U_ICUDATA_NAME;
265     }
266 
267     path = findBasename(path);
268 
269     if((path == NULL) || (*path == 0)) {
270         return U_ICUDATA_NAME;
271     }
272 
273     return path;
274 }
275 #endif
276 
277 /*----------------------------------------------------------------------*
278  *                                                                      *
279  *   Cache for common data                                              *
280  *      Functions for looking up or adding entries to a cache of        *
281  *      data that has been previously opened.  Avoids a potentially     *
282  *      expensive operation of re-opening the data for subsequent       *
283  *      uses.                                                           *
284  *                                                                      *
285  *      Data remains cached for the duration of the process.            *
286  *                                                                      *
287  *----------------------------------------------------------------------*/
288 
289 typedef struct DataCacheElement {
290     char          *name;
291     UDataMemory   *item;
292 } DataCacheElement;
293 
294 
295 
296 /*
297  * Deleter function for DataCacheElements.
298  *         udata cleanup function closes the hash table; hash table in turn calls back to
299  *         here for each entry.
300  */
DataCacheElement_deleter(void * pDCEl)301 static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
302     DataCacheElement *p = (DataCacheElement *)pDCEl;
303     udata_close(p->item);              /* unmaps storage */
304     uprv_free(p->name);                /* delete the hash key string. */
305     uprv_free(pDCEl);                  /* delete 'this'          */
306 }
307 
udata_initHashTable(UErrorCode & err)308 static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
309     U_ASSERT(gCommonDataCache == NULL);
310     gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
311     if (U_FAILURE(err)) {
312        return;
313     }
314     U_ASSERT(gCommonDataCache != NULL);
315     uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
316     ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
317 }
318 
319  /*   udata_getCacheHashTable()
320   *     Get the hash table used to store the data cache entries.
321   *     Lazy create it if it doesn't yet exist.
322   */
udata_getHashTable(UErrorCode & err)323 static UHashtable *udata_getHashTable(UErrorCode &err) {
324     umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
325     return gCommonDataCache;
326 }
327 
328 
329 
udata_findCachedData(const char * path,UErrorCode & err)330 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
331 {
332     UHashtable        *htable;
333     UDataMemory       *retVal = NULL;
334     DataCacheElement  *el;
335     const char        *baseName;
336 
337     htable = udata_getHashTable(err);
338     if (U_FAILURE(err)) {
339         return NULL;
340     }
341 
342     baseName = findBasename(path);   /* Cache remembers only the base name, not the full path. */
343     umtx_lock(NULL);
344     el = (DataCacheElement *)uhash_get(htable, baseName);
345     umtx_unlock(NULL);
346     if (el != NULL) {
347         retVal = el->item;
348     }
349 #ifdef UDATA_DEBUG
350     fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
351 #endif
352     return retVal;
353 }
354 
355 
udata_cacheDataItem(const char * path,UDataMemory * item,UErrorCode * pErr)356 static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
357     DataCacheElement *newElement;
358     const char       *baseName;
359     int32_t           nameLen;
360     UHashtable       *htable;
361     DataCacheElement *oldValue = NULL;
362     UErrorCode        subErr = U_ZERO_ERROR;
363 
364     htable = udata_getHashTable(*pErr);
365     if (U_FAILURE(*pErr)) {
366         return NULL;
367     }
368 
369     /* Create a new DataCacheElement - the thingy we store in the hash table -
370      * and copy the supplied path and UDataMemoryItems into it.
371      */
372     newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
373     if (newElement == NULL) {
374         *pErr = U_MEMORY_ALLOCATION_ERROR;
375         return NULL;
376     }
377     newElement->item = UDataMemory_createNewInstance(pErr);
378     if (U_FAILURE(*pErr)) {
379         uprv_free(newElement);
380         return NULL;
381     }
382     UDatamemory_assign(newElement->item, item);
383 
384     baseName = findBasename(path);
385     nameLen = (int32_t)uprv_strlen(baseName);
386     newElement->name = (char *)uprv_malloc(nameLen+1);
387     if (newElement->name == NULL) {
388         *pErr = U_MEMORY_ALLOCATION_ERROR;
389         uprv_free(newElement->item);
390         uprv_free(newElement);
391         return NULL;
392     }
393     uprv_strcpy(newElement->name, baseName);
394 
395     /* Stick the new DataCacheElement into the hash table.
396     */
397     umtx_lock(NULL);
398     oldValue = (DataCacheElement *)uhash_get(htable, path);
399     if (oldValue != NULL) {
400         subErr = U_USING_DEFAULT_WARNING;
401     }
402     else {
403         uhash_put(
404             htable,
405             newElement->name,               /* Key   */
406             newElement,                     /* Value */
407             &subErr);
408     }
409     umtx_unlock(NULL);
410 
411 #ifdef UDATA_DEBUG
412     fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
413     (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
414 #endif
415 
416     if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
417         *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
418         uprv_free(newElement->name);
419         uprv_free(newElement->item);
420         uprv_free(newElement);
421         return oldValue ? oldValue->item : NULL;
422     }
423 
424     return newElement->item;
425 }
426 
427 /*----------------------------------------------------------------------*==============
428  *                                                                      *
429  *  Path management.  Could be shared with other tools/etc if need be   *
430  * later on.                                                            *
431  *                                                                      *
432  *----------------------------------------------------------------------*/
433 
434 U_NAMESPACE_BEGIN
435 
436 class UDataPathIterator
437 {
438 public:
439     UDataPathIterator(const char *path, const char *pkg,
440                       const char *item, const char *suffix, UBool doCheckLastFour,
441                       UErrorCode *pErrorCode);
442     const char *next(UErrorCode *pErrorCode);
443 
444 private:
445     const char *path;                              /* working path (u_icudata_Dir) */
446     const char *nextPath;                          /* path following this one */
447     const char *basename;                          /* item's basename (icudt22e_mt.res)*/
448 
449     StringPiece suffix;                            /* item suffix (can be null) */
450 
451     uint32_t    basenameLen;                       /* length of basename */
452 
453     CharString  itemPath;                          /* path passed in with item name */
454     CharString  pathBuffer;                        /* output path for this it'ion */
455     CharString  packageStub;                       /* example:  "/icudt28b". Will ignore that leaf in set paths. */
456 
457     UBool       checkLastFour;                     /* if TRUE then allow paths such as '/foo/myapp.dat'
458                                                     * to match, checks last 4 chars of suffix with
459                                                     * last 4 of path, then previous chars. */
460 };
461 
462 /**
463  * @param iter    The iterator to be initialized. Its current state does not matter.
464  * @param inPath  The full pathname to be iterated over.  If NULL, defaults to U_ICUDATA_NAME
465  * @param pkg     Package which is being searched for, ex "icudt28l".  Will ignore leaf directories such as /icudt28l
466  * @param item    Item to be searched for.  Can include full path, such as /a/b/foo.dat
467  * @param inSuffix  Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
468  *             Ex:   'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
469  *                   '/blarg/stuff.dat' would also be found.
470  *  Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
471  *        the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
472  */
UDataPathIterator(const char * inPath,const char * pkg,const char * item,const char * inSuffix,UBool doCheckLastFour,UErrorCode * pErrorCode)473 UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
474                                      const char *item, const char *inSuffix, UBool doCheckLastFour,
475                                      UErrorCode *pErrorCode)
476 {
477 #ifdef UDATA_DEBUG
478         fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
479 #endif
480     /** Path **/
481     if(inPath == NULL) {
482         path = u_getDataDirectory();
483     } else {
484         path = inPath;
485     }
486 
487     /** Package **/
488     if(pkg != NULL) {
489       packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
490 #ifdef UDATA_DEBUG
491       fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
492 #endif
493     }
494 
495     /** Item **/
496     basename = findBasename(item);
497     basenameLen = (int32_t)uprv_strlen(basename);
498 
499     /** Item path **/
500     if(basename == item) {
501         nextPath = path;
502     } else {
503         itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
504         nextPath = itemPath.data();
505     }
506 #ifdef UDATA_DEBUG
507     fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
508 #endif
509 
510     /** Suffix  **/
511     if(inSuffix != NULL) {
512         suffix = inSuffix;
513     } else {
514         suffix = "";
515     }
516 
517     checkLastFour = doCheckLastFour;
518 
519     /* pathBuffer will hold the output path strings returned by this iterator */
520 
521 #ifdef UDATA_DEBUG
522     fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
523             item,
524             path,
525             basename,
526             suffix.data(),
527             itemPath.data(),
528             nextPath,
529             checkLastFour?"TRUE":"false");
530 #endif
531 }
532 
533 /**
534  * Get the next path on the list.
535  *
536  * @param iter The Iter to be used
537  * @param len  If set, pointer to the length of the returned path, for convenience.
538  * @return Pointer to the next path segment, or NULL if there are no more.
539  */
next(UErrorCode * pErrorCode)540 const char *UDataPathIterator::next(UErrorCode *pErrorCode)
541 {
542     if(U_FAILURE(*pErrorCode)) {
543         return NULL;
544     }
545 
546     const char *currentPath = NULL;
547     int32_t     pathLen = 0;
548     const char *pathBasename;
549 
550     do
551     {
552         if( nextPath == NULL ) {
553             break;
554         }
555         currentPath = nextPath;
556 
557         if(nextPath == itemPath.data()) { /* we were processing item's path. */
558             nextPath = path; /* start with regular path next tm. */
559             pathLen = (int32_t)uprv_strlen(currentPath);
560         } else {
561             /* fix up next for next time */
562             nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
563             if(nextPath == NULL) {
564                 /* segment: entire path */
565                 pathLen = (int32_t)uprv_strlen(currentPath);
566             } else {
567                 /* segment: until next segment */
568                 pathLen = (int32_t)(nextPath - currentPath);
569                 /* skip divider */
570                 nextPath ++;
571             }
572         }
573 
574         if(pathLen == 0) {
575             continue;
576         }
577 
578 #ifdef UDATA_DEBUG
579         fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
580         fprintf(stderr, "                     ");
581         {
582             int32_t qqq;
583             for(qqq=0;qqq<pathLen;qqq++)
584             {
585                 fprintf(stderr, " ");
586             }
587 
588             fprintf(stderr, "^\n");
589         }
590 #endif
591         pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
592 
593         /* check for .dat files */
594         pathBasename = findBasename(pathBuffer.data());
595 
596         if(checkLastFour == TRUE &&
597            (pathLen>=4) &&
598            uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
599            uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0  && /* base matches */
600            uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
601 
602 #ifdef UDATA_DEBUG
603             fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
604 #endif
605             /* do nothing */
606         }
607         else
608         {       /* regular dir path */
609             if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
610                 if((pathLen>=4) &&
611                    uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
612                 {
613 #ifdef UDATA_DEBUG
614                     fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
615 #endif
616                     continue;
617                 }
618 
619                 /* Check if it is a directory with the same name as our package */
620                 if(!packageStub.isEmpty() &&
621                    (pathLen > packageStub.length()) &&
622                    !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
623 #ifdef UDATA_DEBUG
624                   fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
625 #endif
626                   pathBuffer.truncate(pathLen - packageStub.length());
627                 }
628                 pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
629             }
630 
631             /* + basename */
632             pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
633 
634             if (!suffix.empty())  /* tack on suffix */
635             {
636                 if (suffix.length() > 4) {
637                     // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
638                     // then we need to ensure that the path ends with a separator.
639                     pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
640                 }
641                 pathBuffer.append(suffix, *pErrorCode);
642             }
643         }
644 
645 #ifdef UDATA_DEBUG
646         fprintf(stderr, " -->  %s\n", pathBuffer.data());
647 #endif
648 
649         return pathBuffer.data();
650 
651     } while(path);
652 
653     /* fell way off the end */
654     return NULL;
655 }
656 
657 U_NAMESPACE_END
658 
659 /* ==================================================================================*/
660 
661 
662 /*----------------------------------------------------------------------*
663  *                                                                      *
664  *  Add a static reference to the common data library                   *
665  *   Unless overridden by an explicit udata_setCommonData, this will be *
666  *      our common data.                                                *
667  *                                                                      *
668  *----------------------------------------------------------------------*/
669 #if !defined(ICU_DATA_DIR_WINDOWS)
670 // When using the Windows system data, we expect only a single data file.
671 extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
672 #endif
673 
674 /*
675  * This would be a good place for weak-linkage declarations of
676  * partial-data-library access functions where each returns a pointer
677  * to its data package, if it is linked in.
678  */
679 /*
680 extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
681 extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
682 */
683 
684 /*----------------------------------------------------------------------*
685  *                                                                      *
686  *   openCommonData   Attempt to open a common format (.dat) file       *
687  *                    Map it into memory (if it's not there already)    *
688  *                    and return a UDataMemory object for it.           *
689  *                                                                      *
690  *                    If the requested data is already open and cached  *
691  *                       just return the cached UDataMem object.        *
692  *                                                                      *
693  *----------------------------------------------------------------------*/
694 static UDataMemory *
openCommonData(const char * path,int32_t commonDataIndex,UErrorCode * pErrorCode)695 openCommonData(const char *path,          /*  Path from OpenChoice?          */
696                int32_t commonDataIndex,   /*  ICU Data (index >= 0) if path == NULL */
697                UErrorCode *pErrorCode)
698 {
699     UDataMemory tData;
700     const char *pathBuffer;
701     const char *inBasename;
702 
703     if (U_FAILURE(*pErrorCode)) {
704         return NULL;
705     }
706 
707     UDataMemory_init(&tData);
708 
709     /* ??????? TODO revisit this */
710     if (commonDataIndex >= 0) {
711         /* "mini-cache" for common ICU data */
712         if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
713             return NULL;
714         }
715         {
716             Mutex lock;
717             if(gCommonICUDataArray[commonDataIndex] != NULL) {
718                 return gCommonICUDataArray[commonDataIndex];
719             }
720 #if !defined(ICU_DATA_DIR_WINDOWS)
721 // When using the Windows system data, we expect only a single data file.
722             int32_t i;
723             for(i = 0; i < commonDataIndex; ++i) {
724                 if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
725                     /* The linked-in data is already in the list. */
726                     return NULL;
727                 }
728             }
729 #endif
730         }
731 
732 // BEGIN Android-added: Use specialized libandroidicuinit to load the data on Android/ART host.
733 #ifdef AOSP_ICU_INIT // Do nothing on other platforms, e.g. Windows
734         // android_icu_init() is only called once.
735         umtx_initOnce(gAospInitOnce, &android_icu_init);
736 #endif // AOSP_ICU_INIT
737 // END Android-added: Use specialized libandroidicuinit to load the data on Android/ART host.
738 
739         /* Add the linked-in data to the list. */
740         /*
741          * This is where we would check and call weakly linked partial-data-library
742          * access functions.
743          */
744         /*
745         if (uprv_getICUData_collation) {
746             setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
747         }
748         if (uprv_getICUData_conversion) {
749             setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
750         }
751         */
752 #if !defined(ICU_DATA_DIR_WINDOWS)
753 // When using the Windows system data, we expect only a single data file.
754         setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
755         {
756             Mutex lock;
757             return gCommonICUDataArray[commonDataIndex];
758         }
759 #endif
760     }
761 
762 
763     /* request is NOT for ICU Data.  */
764 
765     /* Find the base name portion of the supplied path.   */
766     /*   inBasename will be left pointing somewhere within the original path string.      */
767     inBasename = findBasename(path);
768 #ifdef UDATA_DEBUG
769     fprintf(stderr, "inBasename = %s\n", inBasename);
770 #endif
771 
772     if(*inBasename==0) {
773         /* no basename.     This will happen if the original path was a directory name,   */
774         /*    like  "a/b/c/".   (Fallback to separate files will still work.)             */
775 #ifdef UDATA_DEBUG
776         fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
777 #endif
778         if (U_SUCCESS(*pErrorCode)) {
779             *pErrorCode=U_FILE_ACCESS_ERROR;
780         }
781         return NULL;
782     }
783 
784    /* Is the requested common data file already open and cached?                     */
785    /*   Note that the cache is keyed by the base name only.  The rest of the path,   */
786    /*     if any, is not considered.                                                 */
787     UDataMemory  *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
788     if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) {
789         return dataToReturn;
790     }
791 
792     /* Requested item is not in the cache.
793      * Hunt it down, trying all the path locations
794      */
795 
796     UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
797 
798     while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
799     {
800 #ifdef UDATA_DEBUG
801         fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
802 #endif
803         uprv_mapFile(&tData, pathBuffer, pErrorCode);
804 #ifdef UDATA_DEBUG
805         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
806 #endif
807     }
808     if (U_FAILURE(*pErrorCode)) {
809         return NULL;
810     }
811 
812 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
813     if (!UDataMemory_isLoaded(&tData)) {
814         char ourPathBuffer[1024];
815         /* One more chance, for extendCommonData() */
816         uprv_strncpy(ourPathBuffer, path, 1019);
817         ourPathBuffer[1019]=0;
818         uprv_strcat(ourPathBuffer, ".dat");
819         uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
820     }
821 #endif
822 
823     if (U_FAILURE(*pErrorCode)) {
824         return NULL;
825     }
826     if (!UDataMemory_isLoaded(&tData)) {
827         /* no common data */
828         *pErrorCode=U_FILE_ACCESS_ERROR;
829         return NULL;
830     }
831 
832     /* we have mapped a file, check its header */
833     udata_checkCommonData(&tData, pErrorCode);
834 
835 
836     /* Cache the UDataMemory struct for this .dat file,
837      *   so we won't need to hunt it down and map it again next time
838      *   something is needed from it.                */
839     return udata_cacheDataItem(inBasename, &tData, pErrorCode);
840 }
841 
842 
843 /*----------------------------------------------------------------------*
844  *                                                                      *
845  *   extendICUData   If the full set of ICU data was not loaded at      *
846  *                   program startup, load it now.  This function will  *
847  *                   be called when the lookup of an ICU data item in   *
848  *                   the common ICU data fails.                         *
849  *                                                                      *
850  *                   return true if new data is loaded, false otherwise.*
851  *                                                                      *
852  *----------------------------------------------------------------------*/
extendICUData(UErrorCode * pErr)853 static UBool extendICUData(UErrorCode *pErr)
854 {
855     UDataMemory   *pData;
856     UDataMemory   copyPData;
857     UBool         didUpdate = FALSE;
858 
859     /*
860      * There is a chance for a race condition here.
861      * Normally, ICU data is loaded from a DLL or via mmap() and
862      * setCommonICUData() will detect if the same address is set twice.
863      * If ICU is built with data loading via fread() then the address will
864      * be different each time the common data is loaded and we may add
865      * multiple copies of the data.
866      * In this case, use a mutex to prevent the race.
867      * Use a specific mutex to avoid nested locks of the global mutex.
868      */
869 #if MAP_IMPLEMENTATION==MAP_STDIO
870     static UMutex extendICUDataMutex;
871     umtx_lock(&extendICUDataMutex);
872 #endif
873     if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
874         /* See if we can explicitly open a .dat file for the ICUData. */
875         pData = openCommonData(
876                    U_ICUDATA_NAME,            /*  "icudt20l" , for example.          */
877                    -1,                        /*  Pretend we're not opening ICUData  */
878                    pErr);
879 
880         /* How about if there is no pData, eh... */
881 
882        UDataMemory_init(&copyPData);
883        if(pData != NULL) {
884           UDatamemory_assign(&copyPData, pData);
885           copyPData.map = 0;              /* The mapping for this data is owned by the hash table */
886           copyPData.mapAddr = 0;          /*   which will unmap it when ICU is shut down.         */
887                                           /* CommonICUData is also unmapped when ICU is shut down.*/
888                                           /* To avoid unmapping the data twice, zero out the map  */
889                                           /*   fields in the UDataMemory that we're assigning     */
890                                           /*   to CommonICUData.                                  */
891 
892           didUpdate = /* no longer using this result */
893               setCommonICUData(&copyPData,/*  The new common data.                                */
894                        FALSE,             /*  No warnings if write didn't happen                  */
895                        pErr);             /*  setCommonICUData honors errors; NOP if error set    */
896         }
897 
898         umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
899     }
900 
901     didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr);  /* Return 'true' when a racing writes out the extended                 */
902                                                           /* data after another thread has failed to see it (in openCommonData), so     */
903                                                           /* extended data can be examined.                                             */
904                                                           /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
905 
906 #if MAP_IMPLEMENTATION==MAP_STDIO
907     umtx_unlock(&extendICUDataMutex);
908 #endif
909     return didUpdate;               /* Return true if ICUData pointer was updated.   */
910                                     /*   (Could potentially have been done by another thread racing */
911                                     /*   us through here, but that's fine, we still return true    */
912                                     /*   so that current thread will also examine extended data.   */
913 }
914 
915 /*----------------------------------------------------------------------*
916  *                                                                      *
917  *   udata_setCommonData                                                *
918  *                                                                      *
919  *----------------------------------------------------------------------*/
920 U_CAPI void U_EXPORT2
udata_setCommonData(const void * data,UErrorCode * pErrorCode)921 udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
922     UDataMemory dataMemory;
923 
924     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
925         return;
926     }
927 
928     if(data==NULL) {
929         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
930         return;
931     }
932 
933     /* set the data pointer and test for validity */
934     UDataMemory_init(&dataMemory);
935     UDataMemory_setData(&dataMemory, data);
936     udata_checkCommonData(&dataMemory, pErrorCode);
937     if (U_FAILURE(*pErrorCode)) {return;}
938 
939     /* we have good data */
940     /* Set it up as the ICU Common Data.  */
941     setCommonICUData(&dataMemory, TRUE, pErrorCode);
942 }
943 
944 /*---------------------------------------------------------------------------
945  *
946  *  udata_setAppData
947  *
948  *---------------------------------------------------------------------------- */
949 U_CAPI void U_EXPORT2
udata_setAppData(const char * path,const void * data,UErrorCode * err)950 udata_setAppData(const char *path, const void *data, UErrorCode *err)
951 {
952     UDataMemory     udm;
953 
954     if(err==NULL || U_FAILURE(*err)) {
955         return;
956     }
957     if(data==NULL) {
958         *err=U_ILLEGAL_ARGUMENT_ERROR;
959         return;
960     }
961 
962     UDataMemory_init(&udm);
963     UDataMemory_setData(&udm, data);
964     udata_checkCommonData(&udm, err);
965     udata_cacheDataItem(path, &udm, err);
966 }
967 
968 /*----------------------------------------------------------------------------*
969  *                                                                            *
970  *  checkDataItem     Given a freshly located/loaded data item, either        *
971  *                    an entry in a common file or a separately loaded file,  *
972  *                    sanity check its header, and see if the data is         *
973  *                    acceptable to the app.                                  *
974  *                    If the data is good, create and return a UDataMemory    *
975  *                    object that can be returned to the application.         *
976  *                    Return NULL on any sort of failure.                     *
977  *                                                                            *
978  *----------------------------------------------------------------------------*/
979 static UDataMemory *
checkDataItem(const DataHeader * pHeader,UDataMemoryIsAcceptable * isAcceptable,void * context,const char * type,const char * name,UErrorCode * nonFatalErr,UErrorCode * fatalErr)980 checkDataItem
981 (
982  const DataHeader         *pHeader,         /* The data item to be checked.                */
983  UDataMemoryIsAcceptable  *isAcceptable,    /* App's call-back function                    */
984  void                     *context,         /*   pass-thru param for above.                */
985  const char               *type,            /*   pass-thru param for above.                */
986  const char               *name,            /*   pass-thru param for above.                */
987  UErrorCode               *nonFatalErr,     /* Error code if this data was not acceptable  */
988                                             /*   but openChoice should continue with       */
989                                             /*   trying to get data from fallback path.    */
990  UErrorCode               *fatalErr         /* Bad error, caller should return immediately */
991  )
992 {
993     UDataMemory  *rDataMem = NULL;          /* the new UDataMemory, to be returned.        */
994 
995     if (U_FAILURE(*fatalErr)) {
996         return NULL;
997     }
998 
999     if(pHeader->dataHeader.magic1==0xda &&
1000         pHeader->dataHeader.magic2==0x27 &&
1001         (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
1002     ) {
1003         rDataMem=UDataMemory_createNewInstance(fatalErr);
1004         if (U_FAILURE(*fatalErr)) {
1005             return NULL;
1006         }
1007         rDataMem->pHeader = pHeader;
1008     } else {
1009         /* the data is not acceptable, look further */
1010         /* If we eventually find something good, this errorcode will be */
1011         /*    cleared out.                                              */
1012         *nonFatalErr=U_INVALID_FORMAT_ERROR;
1013     }
1014     return rDataMem;
1015 }
1016 
1017 /**
1018  * @return 0 if not loaded, 1 if loaded or err
1019  */
doLoadFromIndividualFiles(const char * pkgName,const char * dataPath,const char * tocEntryPathSuffix,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1020 static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
1021         const char *dataPath, const char *tocEntryPathSuffix,
1022             /* following arguments are the same as doOpenChoice itself */
1023             const char *path, const char *type, const char *name,
1024              UDataMemoryIsAcceptable *isAcceptable, void *context,
1025              UErrorCode *subErrorCode,
1026              UErrorCode *pErrorCode)
1027 {
1028     const char         *pathBuffer;
1029     UDataMemory         dataMemory;
1030     UDataMemory *pEntryData;
1031 
1032     /* look in ind. files: package\nam.typ  ========================= */
1033     /* init path iterator for individual files */
1034     UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
1035 
1036     while ((pathBuffer = iter.next(pErrorCode)) != NULL)
1037     {
1038 #ifdef UDATA_DEBUG
1039         fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
1040 #endif
1041         if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
1042         {
1043             pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1044             if (pEntryData != NULL) {
1045                 /* Data is good.
1046                 *  Hand off ownership of the backing memory to the user's UDataMemory.
1047                 *  and return it.   */
1048                 pEntryData->mapAddr = dataMemory.mapAddr;
1049                 pEntryData->map     = dataMemory.map;
1050 
1051 #ifdef UDATA_DEBUG
1052                 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
1053 #endif
1054                 return pEntryData;
1055             }
1056 
1057             /* the data is not acceptable, or some error occurred.  Either way, unmap the memory */
1058             udata_close(&dataMemory);
1059 
1060             /* If we had a nasty error, bail out completely.  */
1061             if (U_FAILURE(*pErrorCode)) {
1062                 return NULL;
1063             }
1064 
1065             /* Otherwise remember that we found data but didn't like it for some reason  */
1066             *subErrorCode=U_INVALID_FORMAT_ERROR;
1067         }
1068 #ifdef UDATA_DEBUG
1069         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
1070 #endif
1071     }
1072     return NULL;
1073 }
1074 
1075 /**
1076  * @return 0 if not loaded, 1 if loaded or err
1077  */
doLoadFromCommonData(UBool isICUData,const char *,const char *,const char *,const char * tocEntryName,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1078 static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
1079         const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
1080             /* following arguments are the same as doOpenChoice itself */
1081             const char *path, const char *type, const char *name,
1082              UDataMemoryIsAcceptable *isAcceptable, void *context,
1083              UErrorCode *subErrorCode,
1084              UErrorCode *pErrorCode)
1085 {
1086     UDataMemory        *pEntryData;
1087     const DataHeader   *pHeader;
1088     UDataMemory        *pCommonData;
1089     int32_t            commonDataIndex;
1090     UBool              checkedExtendedICUData = FALSE;
1091     /* try to get common data.  The loop is for platforms such as the 390 that do
1092      *  not initially load the full set of ICU data.  If the lookup of an ICU data item
1093      *  fails, the full (but slower to load) set is loaded, the and the loop repeats,
1094      *  trying the lookup again.  Once the full set of ICU data is loaded, the loop wont
1095      *  repeat because the full set will be checked the first time through.
1096      *
1097      *  The loop also handles the fallback to a .dat file if the application linked
1098      *   to the stub data library rather than a real library.
1099      */
1100     for (commonDataIndex = isICUData ? 0 : -1;;) {
1101         pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
1102 
1103         if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
1104             int32_t length;
1105 
1106             /* look up the data piece in the common data */
1107             pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
1108 #ifdef UDATA_DEBUG
1109             fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
1110 #endif
1111 
1112             if(pHeader!=NULL) {
1113                 pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1114 #ifdef UDATA_DEBUG
1115                 fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
1116 #endif
1117                 if (U_FAILURE(*pErrorCode)) {
1118                     return NULL;
1119                 }
1120                 if (pEntryData != NULL) {
1121                     pEntryData->length = length;
1122                     return pEntryData;
1123                 }
1124             }
1125         }
1126         // If we failed due to being out-of-memory, then stop early and report the error.
1127         if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
1128             *pErrorCode = *subErrorCode;
1129             return NULL;
1130         }
1131         /* Data wasn't found.  If we were looking for an ICUData item and there is
1132          * more data available, load it and try again,
1133          * otherwise break out of this loop. */
1134         if (!isICUData) {
1135             return NULL;
1136         } else if (pCommonData != NULL) {
1137             ++commonDataIndex;  /* try the next data package */
1138         } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
1139             checkedExtendedICUData = TRUE;
1140             /* try this data package slot again: it changed from NULL to non-NULL */
1141         } else {
1142             return NULL;
1143         }
1144     }
1145 }
1146 
1147 /*
1148  * Identify the Time Zone resources that are subject to special override data loading.
1149  */
isTimeZoneFile(const char * name,const char * type)1150 static UBool isTimeZoneFile(const char *name, const char *type) {
1151     return ((uprv_strcmp(type, "res") == 0) &&
1152             (uprv_strcmp(name, "zoneinfo64") == 0 ||
1153              uprv_strcmp(name, "timezoneTypes") == 0 ||
1154              uprv_strcmp(name, "windowsZones") == 0 ||
1155              uprv_strcmp(name, "metaZones") == 0));
1156 }
1157 
1158 /*
1159  *  A note on the ownership of Mapped Memory
1160  *
1161  *  For common format files, ownership resides with the UDataMemory object
1162  *    that lives in the cache of opened common data.  These UDataMemorys are private
1163  *    to the udata implementation, and are never seen directly by users.
1164  *
1165  *    The UDataMemory objects returned to users will have the address of some desired
1166  *    data within the mapped region, but they wont have the mapping info itself, and thus
1167  *    won't cause anything to be removed from memory when they are closed.
1168  *
1169  *  For individual data files, the UDataMemory returned to the user holds the
1170  *  information necessary to unmap the data on close.  If the user independently
1171  *  opens the same data file twice, two completely independent mappings will be made.
1172  *  (There is no cache of opened data items from individual files, only a cache of
1173  *   opened Common Data files, that is, files containing a collection of data items.)
1174  *
1175  *  For common data passed in from the user via udata_setAppData() or
1176  *  udata_setCommonData(), ownership remains with the user.
1177  *
1178  *  UDataMemory objects themselves, as opposed to the memory they describe,
1179  *  can be anywhere - heap, stack/local or global.
1180  *  They have a flag to indicate when they're heap allocated and thus
1181  *  must be deleted when closed.
1182  */
1183 
1184 
1185 /*----------------------------------------------------------------------------*
1186  *                                                                            *
1187  * main data loading functions                                                *
1188  *                                                                            *
1189  *----------------------------------------------------------------------------*/
1190 static UDataMemory *
doOpenChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1191 doOpenChoice(const char *path, const char *type, const char *name,
1192              UDataMemoryIsAcceptable *isAcceptable, void *context,
1193              UErrorCode *pErrorCode)
1194 {
1195     UDataMemory         *retVal = NULL;
1196 
1197     const char         *dataPath;
1198 
1199     int32_t             tocEntrySuffixIndex;
1200     const char         *tocEntryPathSuffix;
1201     UErrorCode          subErrorCode=U_ZERO_ERROR;
1202     const char         *treeChar;
1203 
1204     UBool               isICUData = FALSE;
1205 
1206 
1207     FileTracer::traceOpen(path, type, name);
1208 
1209 
1210     /* Is this path ICU data? */
1211     if(path == NULL ||
1212        !strcmp(path, U_ICUDATA_ALIAS) ||  /* "ICUDATA" */
1213        !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
1214                      uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
1215        !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
1216                      uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
1217       isICUData = TRUE;
1218     }
1219 
1220 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)  /* Windows:  try "foo\bar" and "foo/bar" */
1221     /* remap from alternate path char to the main one */
1222     CharString altSepPath;
1223     if(path) {
1224         if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
1225             altSepPath.append(path, *pErrorCode);
1226             char *p;
1227             while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
1228                 *p = U_FILE_SEP_CHAR;
1229             }
1230 #if defined (UDATA_DEBUG)
1231             fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
1232 #endif
1233             path = altSepPath.data();
1234         }
1235     }
1236 #endif
1237 
1238     CharString tocEntryName; /* entry name in tree format. ex:  'icudt28b/coll/ar.res' */
1239     CharString tocEntryPath; /* entry name in path format. ex:  'icudt28b\\coll\\ar.res' */
1240 
1241     CharString pkgName;
1242     CharString treeName;
1243 
1244     /* ======= Set up strings */
1245     if(path==NULL) {
1246         pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1247     } else {
1248         const char *pkg;
1249         const char *first;
1250         pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
1251         first = uprv_strchr(path, U_FILE_SEP_CHAR);
1252         if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
1253             /* see if this is an /absolute/path/to/package  path */
1254             if(pkg) {
1255                 pkgName.append(pkg+1, *pErrorCode);
1256             } else {
1257                 pkgName.append(path, *pErrorCode);
1258             }
1259         } else {
1260             treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
1261             if(treeChar) {
1262                 treeName.append(treeChar+1, *pErrorCode); /* following '-' */
1263                 if(isICUData) {
1264                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1265                 } else {
1266                     pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
1267                     if (first == NULL) {
1268                         /*
1269                         This user data has no path, but there is a tree name.
1270                         Look up the correct path from the data cache later.
1271                         */
1272                         path = pkgName.data();
1273                     }
1274                 }
1275             } else {
1276                 if(isICUData) {
1277                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1278                 } else {
1279                     pkgName.append(path, *pErrorCode);
1280                 }
1281             }
1282         }
1283     }
1284 
1285 #ifdef UDATA_DEBUG
1286     fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
1287 #endif
1288 
1289     /* setting up the entry name and file name
1290      * Make up a full name by appending the type to the supplied
1291      *  name, assuming that a type was supplied.
1292      */
1293 
1294     /* prepend the package */
1295     tocEntryName.append(pkgName, *pErrorCode);
1296     tocEntryPath.append(pkgName, *pErrorCode);
1297     tocEntrySuffixIndex = tocEntryName.length();
1298 
1299     if(!treeName.isEmpty()) {
1300         tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1301         tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1302     }
1303 
1304     tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1305     tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1306     if(type!=NULL && *type!=0) {
1307         tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
1308         tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
1309     }
1310     // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
1311     tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
1312 
1313 #ifdef UDATA_DEBUG
1314     fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
1315     fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
1316 #endif
1317 
1318 #if !defined(ICU_DATA_DIR_WINDOWS)
1319     if(path == NULL) {
1320         path = COMMON_DATA_NAME; /* "icudt26e" */
1321     }
1322 #else
1323     // When using the Windows system data, we expects only a single data file.
1324     path = COMMON_DATA_NAME; /* "icudt26e" */
1325 #endif
1326 
1327     /************************ Begin loop looking for ind. files ***************/
1328 #ifdef UDATA_DEBUG
1329     fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
1330 #endif
1331 
1332     /* End of dealing with a null basename */
1333     dataPath = u_getDataDirectory();
1334 
1335     /****    Time zone individual files override  */
1336     if (isICUData && isTimeZoneFile(name, type)) {
1337         const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
1338         if (tzFilesDir[0] != 0) {
1339 #ifdef UDATA_DEBUG
1340             fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
1341 #endif
1342             retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
1343                             /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1344             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1345                 return retVal;
1346             }
1347         }
1348     }
1349 
1350     /****    COMMON PACKAGE  - only if packages are first. */
1351     if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
1352 #ifdef UDATA_DEBUG
1353         fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
1354 #endif
1355         /* #2 */
1356         retVal = doLoadFromCommonData(isICUData,
1357                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1358                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1359         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1360             return retVal;
1361         }
1362     }
1363 
1364     /****    INDIVIDUAL FILES  */
1365     if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
1366        (gDataFileAccess==UDATA_FILES_FIRST)) {
1367 #ifdef UDATA_DEBUG
1368         fprintf(stderr, "Trying individual files\n");
1369 #endif
1370         /* Check to make sure that there is a dataPath to iterate over */
1371         if ((dataPath && *dataPath) || !isICUData) {
1372             retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
1373                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1374             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1375                 return retVal;
1376             }
1377         }
1378     }
1379 
1380     /****    COMMON PACKAGE  */
1381     if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
1382        (gDataFileAccess==UDATA_FILES_FIRST)) {
1383 #ifdef UDATA_DEBUG
1384         fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
1385 #endif
1386         retVal = doLoadFromCommonData(isICUData,
1387                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1388                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1389         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1390             return retVal;
1391         }
1392     }
1393 
1394     /* Load from DLL.  If we haven't attempted package load, we also haven't had any chance to
1395         try a DLL (static or setCommonData/etc)  load.
1396          If we ever have a "UDATA_ONLY_FILES", add it to the or list here.  */
1397     if(gDataFileAccess==UDATA_NO_FILES) {
1398 #ifdef UDATA_DEBUG
1399         fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
1400 #endif
1401         retVal = doLoadFromCommonData(isICUData,
1402                             pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
1403                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1404         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1405             return retVal;
1406         }
1407     }
1408 
1409     /* data not found */
1410     if(U_SUCCESS(*pErrorCode)) {
1411         if(U_SUCCESS(subErrorCode)) {
1412             /* file not found */
1413             *pErrorCode=U_FILE_ACCESS_ERROR;
1414         } else {
1415             /* entry point not found or rejected */
1416             *pErrorCode=subErrorCode;
1417         }
1418     }
1419     return retVal;
1420 }
1421 
1422 
1423 
1424 /* API ---------------------------------------------------------------------- */
1425 
1426 U_CAPI UDataMemory * U_EXPORT2
udata_open(const char * path,const char * type,const char * name,UErrorCode * pErrorCode)1427 udata_open(const char *path, const char *type, const char *name,
1428            UErrorCode *pErrorCode) {
1429 #ifdef UDATA_DEBUG
1430   fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1431     fflush(stderr);
1432 #endif
1433 
1434     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1435         return NULL;
1436     } else if(name==NULL || *name==0) {
1437         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1438         return NULL;
1439     } else {
1440         return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
1441     }
1442 }
1443 
1444 
1445 
1446 U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1447 udata_openChoice(const char *path, const char *type, const char *name,
1448                  UDataMemoryIsAcceptable *isAcceptable, void *context,
1449                  UErrorCode *pErrorCode) {
1450 #ifdef UDATA_DEBUG
1451   fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1452 #endif
1453 
1454     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1455         return NULL;
1456     } else if(name==NULL || *name==0 || isAcceptable==NULL) {
1457         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1458         return NULL;
1459     } else {
1460         return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
1461     }
1462 }
1463 
1464 
1465 
1466 U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory * pData,UDataInfo * pInfo)1467 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
1468     if(pInfo!=NULL) {
1469         if(pData!=NULL && pData->pHeader!=NULL) {
1470             const UDataInfo *info=&pData->pHeader->info;
1471             uint16_t dataInfoSize=udata_getInfoSize(info);
1472             if(pInfo->size>dataInfoSize) {
1473                 pInfo->size=dataInfoSize;
1474             }
1475             uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
1476             if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
1477                 /* opposite endianness */
1478                 uint16_t x=info->reservedWord;
1479                 pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
1480             }
1481         } else {
1482             pInfo->size=0;
1483         }
1484     }
1485 }
1486 
1487 
udata_setFileAccess(UDataFileAccess access,UErrorCode *)1488 U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
1489 {
1490     // Note: this function is documented as not thread safe.
1491     gDataFileAccess = access;
1492 }
1493