1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1997-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12 *
13 * Date Name Description
14 * 04/14/97 aliu Creation.
15 * 04/24/97 aliu Added getDefaultDataDirectory() and
16 * getDefaultLocaleID().
17 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
18 * for assumed case. Non-UNIX platforms must be
19 * special-cased. Rewrote numeric methods dealing
20 * with NaN and Infinity to be platform independent
21 * over all IEEE 754 platforms.
22 * 05/13/97 aliu Restored sign of timezone
23 * (semantics are hours West of GMT)
24 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25 * nextDouble..
26 * 07/22/98 stephen Added remainder, max, min, trunc
27 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28 * 08/24/98 stephen Added longBitsFromDouble
29 * 09/08/98 stephen Minor changes for Mac Port
30 * 03/02/99 stephen Removed openFile(). Added AS400 support.
31 * Fixed EBCDIC tables
32 * 04/15/99 stephen Converted to C.
33 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34 * 08/04/99 jeffrey R. Added OS/2 changes
35 * 11/15/99 helena Integrated S/390 IEEE support.
36 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38 * 01/03/08 Steven L. Fake Time Support
39 ******************************************************************************
40 */
41
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
45
46 /* include ICU headers */
47 #include "unicode/utypes.h"
48 #include "unicode/putil.h"
49 #include "unicode/ustring.h"
50 #include "putilimp.h"
51 #include "uassert.h"
52 #include "umutex.h"
53 #include "cmemory.h"
54 #include "cstring.h"
55 #include "locmap.h"
56 #include "ucln_cmn.h"
57 #include "charstr.h"
58
59 /* Include standard headers. */
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <math.h>
64 #include <locale.h>
65 #include <float.h>
66
67 #ifndef U_COMMON_IMPLEMENTATION
68 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
69 #endif
70
71
72 /* include system headers */
73 #if U_PLATFORM_USES_ONLY_WIN32_API
74 /*
75 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
76 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
77 * to use native APIs as much as possible?
78 */
79 # define WIN32_LEAN_AND_MEAN
80 # define VC_EXTRALEAN
81 # define NOUSER
82 # define NOSERVICE
83 # define NOIME
84 # define NOMCX
85 # include <windows.h>
86 # include "wintz.h"
87 #elif U_PLATFORM == U_PF_OS400
88 # include <float.h>
89 # include <qusec.h> /* error code structure */
90 # include <qusrjobi.h>
91 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
92 # include <mih/testptr.h> /* For uprv_maximumPtr */
93 #elif U_PLATFORM == U_PF_OS390
94 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
95 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
96 # include <limits.h>
97 # include <unistd.h>
98 # if U_PLATFORM == U_PF_SOLARIS
99 # ifndef _XPG4_2
100 # define _XPG4_2
101 # endif
102 # endif
103 #elif U_PLATFORM == U_PF_QNX
104 # include <sys/neutrino.h>
105 #endif
106
107 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
108 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
109 #undef __STRICT_ANSI__
110 #endif
111
112 /*
113 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
114 */
115 #include <time.h>
116
117 #if !U_PLATFORM_USES_ONLY_WIN32_API
118 #include <sys/time.h>
119 #endif
120
121 /*
122 * Only include langinfo.h if we have a way to get the codeset. If we later
123 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
124 *
125 */
126
127 #if U_HAVE_NL_LANGINFO_CODESET
128 #include <langinfo.h>
129 #endif
130
131 /**
132 * Simple things (presence of functions, etc) should just go in configure.in and be added to
133 * icucfg.h via autoheader.
134 */
135 #if U_PLATFORM_IMPLEMENTS_POSIX
136 # if U_PLATFORM == U_PF_OS400
137 # define HAVE_DLFCN_H 0
138 # define HAVE_DLOPEN 0
139 # else
140 # ifndef HAVE_DLFCN_H
141 # define HAVE_DLFCN_H 1
142 # endif
143 # ifndef HAVE_DLOPEN
144 # define HAVE_DLOPEN 1
145 # endif
146 # endif
147 # ifndef HAVE_GETTIMEOFDAY
148 # define HAVE_GETTIMEOFDAY 1
149 # endif
150 #else
151 # define HAVE_DLFCN_H 0
152 # define HAVE_DLOPEN 0
153 # define HAVE_GETTIMEOFDAY 0
154 #endif
155
156 U_NAMESPACE_USE
157
158 /* Define the extension for data files, again... */
159 #define DATA_TYPE "dat"
160
161 /* Leave this copyright notice here! */
162 static const char copyright[] = U_COPYRIGHT_STRING;
163
164 /* floating point implementations ------------------------------------------- */
165
166 /* We return QNAN rather than SNAN*/
167 #define SIGN 0x80000000U
168
169 /* Make it easy to define certain types of constants */
170 typedef union {
171 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
172 double d64;
173 } BitPatternConversion;
174 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
175 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
176
177 /*---------------------------------------------------------------------------
178 Platform utilities
179 Our general strategy is to assume we're on a POSIX platform. Platforms which
180 are non-POSIX must declare themselves so. The default POSIX implementation
181 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
182 functions).
183 ---------------------------------------------------------------------------*/
184
185 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
186 # undef U_POSIX_LOCALE
187 #else
188 # define U_POSIX_LOCALE 1
189 #endif
190
191 /*
192 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
193 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
194 */
195 #if !IEEE_754
196 static char*
u_topNBytesOfDouble(double * d,int n)197 u_topNBytesOfDouble(double* d, int n)
198 {
199 #if U_IS_BIG_ENDIAN
200 return (char*)d;
201 #else
202 return (char*)(d + 1) - n;
203 #endif
204 }
205
206 static char*
u_bottomNBytesOfDouble(double * d,int n)207 u_bottomNBytesOfDouble(double* d, int n)
208 {
209 #if U_IS_BIG_ENDIAN
210 return (char*)(d + 1) - n;
211 #else
212 return (char*)d;
213 #endif
214 }
215 #endif /* !IEEE_754 */
216
217 #if IEEE_754
218 static UBool
u_signBit(double d)219 u_signBit(double d) {
220 uint8_t hiByte;
221 #if U_IS_BIG_ENDIAN
222 hiByte = *(uint8_t *)&d;
223 #else
224 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
225 #endif
226 return (hiByte & 0x80) != 0;
227 }
228 #endif
229
230
231
232 #if defined (U_DEBUG_FAKETIME)
233 /* Override the clock to test things without having to move the system clock.
234 * Assumes POSIX gettimeofday() will function
235 */
236 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
237 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
238 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
239 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
240
getUTCtime_real()241 static UDate getUTCtime_real() {
242 struct timeval posixTime;
243 gettimeofday(&posixTime, NULL);
244 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
245 }
246
getUTCtime_fake()247 static UDate getUTCtime_fake() {
248 umtx_lock(&fakeClockMutex);
249 if(!fakeClock_set) {
250 UDate real = getUTCtime_real();
251 const char *fake_start = getenv("U_FAKETIME_START");
252 if((fake_start!=NULL) && (fake_start[0]!=0)) {
253 sscanf(fake_start,"%lf",&fakeClock_t0);
254 fakeClock_dt = fakeClock_t0 - real;
255 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
256 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
257 fakeClock_t0, fake_start, fakeClock_dt, real);
258 } else {
259 fakeClock_dt = 0;
260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
261 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
262 }
263 fakeClock_set = TRUE;
264 }
265 umtx_unlock(&fakeClockMutex);
266
267 return getUTCtime_real() + fakeClock_dt;
268 }
269 #endif
270
271 #if U_PLATFORM_USES_ONLY_WIN32_API
272 typedef union {
273 int64_t int64;
274 FILETIME fileTime;
275 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
276
277 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
278 #define EPOCH_BIAS INT64_C(116444736000000000)
279 #define HECTONANOSECOND_PER_MILLISECOND 10000
280
281 #endif
282
283 /*---------------------------------------------------------------------------
284 Universal Implementations
285 These are designed to work on all platforms. Try these, and if they
286 don't work on your platform, then special case your platform with new
287 implementations.
288 ---------------------------------------------------------------------------*/
289
290 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()291 uprv_getUTCtime()
292 {
293 #if defined(U_DEBUG_FAKETIME)
294 return getUTCtime_fake(); /* Hook for overriding the clock */
295 #else
296 return uprv_getRawUTCtime();
297 #endif
298 }
299
300 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
301 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()302 uprv_getRawUTCtime()
303 {
304 #if U_PLATFORM_USES_ONLY_WIN32_API
305
306 FileTimeConversion winTime;
307 GetSystemTimeAsFileTime(&winTime.fileTime);
308 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
309 #else
310
311 #if HAVE_GETTIMEOFDAY
312 struct timeval posixTime;
313 gettimeofday(&posixTime, NULL);
314 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
315 #else
316 time_t epochtime;
317 time(&epochtime);
318 return (UDate)epochtime * U_MILLIS_PER_SECOND;
319 #endif
320
321 #endif
322 }
323
324 /*-----------------------------------------------------------------------------
325 IEEE 754
326 These methods detect and return NaN and infinity values for doubles
327 conforming to IEEE 754. Platforms which support this standard include X86,
328 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
329 If this doesn't work on your platform, you have non-IEEE floating-point, and
330 will need to code your own versions. A naive implementation is to return 0.0
331 for getNaN and getInfinity, and false for isNaN and isInfinite.
332 ---------------------------------------------------------------------------*/
333
334 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)335 uprv_isNaN(double number)
336 {
337 #if IEEE_754
338 BitPatternConversion convertedNumber;
339 convertedNumber.d64 = number;
340 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
341 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
342
343 #elif U_PLATFORM == U_PF_OS390
344 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
345 sizeof(uint32_t));
346 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
347 sizeof(uint32_t));
348
349 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
350 (lowBits == 0x00000000L);
351
352 #else
353 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
354 /* you'll need to replace this default implementation with what's correct*/
355 /* for your platform.*/
356 return number != number;
357 #endif
358 }
359
360 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)361 uprv_isInfinite(double number)
362 {
363 #if IEEE_754
364 BitPatternConversion convertedNumber;
365 convertedNumber.d64 = number;
366 /* Infinity is exactly 0x7FF0000000000000U. */
367 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
368 #elif U_PLATFORM == U_PF_OS390
369 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
370 sizeof(uint32_t));
371 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
372 sizeof(uint32_t));
373
374 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
375
376 #else
377 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
378 /* value, you'll need to replace this default implementation with what's*/
379 /* correct for your platform.*/
380 return number == (2.0 * number);
381 #endif
382 }
383
384 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)385 uprv_isPositiveInfinity(double number)
386 {
387 #if IEEE_754 || U_PLATFORM == U_PF_OS390
388 return (UBool)(number > 0 && uprv_isInfinite(number));
389 #else
390 return uprv_isInfinite(number);
391 #endif
392 }
393
394 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)395 uprv_isNegativeInfinity(double number)
396 {
397 #if IEEE_754 || U_PLATFORM == U_PF_OS390
398 return (UBool)(number < 0 && uprv_isInfinite(number));
399
400 #else
401 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
402 sizeof(uint32_t));
403 return((highBits & SIGN) && uprv_isInfinite(number));
404
405 #endif
406 }
407
408 U_CAPI double U_EXPORT2
uprv_getNaN()409 uprv_getNaN()
410 {
411 #if IEEE_754 || U_PLATFORM == U_PF_OS390
412 return gNan.d64;
413 #else
414 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
415 /* you'll need to replace this default implementation with what's correct*/
416 /* for your platform.*/
417 return 0.0;
418 #endif
419 }
420
421 U_CAPI double U_EXPORT2
uprv_getInfinity()422 uprv_getInfinity()
423 {
424 #if IEEE_754 || U_PLATFORM == U_PF_OS390
425 return gInf.d64;
426 #else
427 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
428 /* value, you'll need to replace this default implementation with what's*/
429 /* correct for your platform.*/
430 return 0.0;
431 #endif
432 }
433
434 U_CAPI double U_EXPORT2
uprv_floor(double x)435 uprv_floor(double x)
436 {
437 return floor(x);
438 }
439
440 U_CAPI double U_EXPORT2
uprv_ceil(double x)441 uprv_ceil(double x)
442 {
443 return ceil(x);
444 }
445
446 U_CAPI double U_EXPORT2
uprv_round(double x)447 uprv_round(double x)
448 {
449 return uprv_floor(x + 0.5);
450 }
451
452 U_CAPI double U_EXPORT2
uprv_fabs(double x)453 uprv_fabs(double x)
454 {
455 return fabs(x);
456 }
457
458 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)459 uprv_modf(double x, double* y)
460 {
461 return modf(x, y);
462 }
463
464 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)465 uprv_fmod(double x, double y)
466 {
467 return fmod(x, y);
468 }
469
470 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)471 uprv_pow(double x, double y)
472 {
473 /* This is declared as "double pow(double x, double y)" */
474 return pow(x, y);
475 }
476
477 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)478 uprv_pow10(int32_t x)
479 {
480 return pow(10.0, (double)x);
481 }
482
483 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)484 uprv_fmax(double x, double y)
485 {
486 #if IEEE_754
487 /* first handle NaN*/
488 if(uprv_isNaN(x) || uprv_isNaN(y))
489 return uprv_getNaN();
490
491 /* check for -0 and 0*/
492 if(x == 0.0 && y == 0.0 && u_signBit(x))
493 return y;
494
495 #endif
496
497 /* this should work for all flt point w/o NaN and Inf special cases */
498 return (x > y ? x : y);
499 }
500
501 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)502 uprv_fmin(double x, double y)
503 {
504 #if IEEE_754
505 /* first handle NaN*/
506 if(uprv_isNaN(x) || uprv_isNaN(y))
507 return uprv_getNaN();
508
509 /* check for -0 and 0*/
510 if(x == 0.0 && y == 0.0 && u_signBit(y))
511 return y;
512
513 #endif
514
515 /* this should work for all flt point w/o NaN and Inf special cases */
516 return (x > y ? y : x);
517 }
518
519 /**
520 * Truncates the given double.
521 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
522 * This is different than calling floor() or ceil():
523 * floor(3.3) = 3, floor(-3.3) = -4
524 * ceil(3.3) = 4, ceil(-3.3) = -3
525 */
526 U_CAPI double U_EXPORT2
uprv_trunc(double d)527 uprv_trunc(double d)
528 {
529 #if IEEE_754
530 /* handle error cases*/
531 if(uprv_isNaN(d))
532 return uprv_getNaN();
533 if(uprv_isInfinite(d))
534 return uprv_getInfinity();
535
536 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
537 return ceil(d);
538 else
539 return floor(d);
540
541 #else
542 return d >= 0 ? floor(d) : ceil(d);
543
544 #endif
545 }
546
547 /**
548 * Return the largest positive number that can be represented by an integer
549 * type of arbitrary bit length.
550 */
551 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)552 uprv_maxMantissa(void)
553 {
554 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
555 }
556
557 U_CAPI double U_EXPORT2
uprv_log(double d)558 uprv_log(double d)
559 {
560 return log(d);
561 }
562
563 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)564 uprv_maximumPtr(void * base)
565 {
566 #if U_PLATFORM == U_PF_OS400
567 /*
568 * With the provided function we should never be out of range of a given segment
569 * (a traditional/typical segment that is). Our segments have 5 bytes for the
570 * id and 3 bytes for the offset. The key is that the casting takes care of
571 * only retrieving the offset portion minus x1000. Hence, the smallest offset
572 * seen in a program is x001000 and when casted to an int would be 0.
573 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
574 *
575 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
576 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
577 * This function determines the activation based on the pointer that is passed in and
578 * calculates the appropriate maximum available size for
579 * each pointer type (TERASPACE and non-TERASPACE)
580 *
581 * Unlike other operating systems, the pointer model isn't determined at
582 * compile time on i5/OS.
583 */
584 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
585 /* if it is a TERASPACE pointer the max is 2GB - 4k */
586 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
587 }
588 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
589 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
590
591 #else
592 return U_MAX_PTR(base);
593 #endif
594 }
595
596 /*---------------------------------------------------------------------------
597 Platform-specific Implementations
598 Try these, and if they don't work on your platform, then special case your
599 platform with new implementations.
600 ---------------------------------------------------------------------------*/
601
602 /* Generic time zone layer -------------------------------------------------- */
603
604 /* Time zone utilities */
605 U_CAPI void U_EXPORT2
uprv_tzset()606 uprv_tzset()
607 {
608 #if defined(U_TZSET)
609 U_TZSET();
610 #else
611 /* no initialization*/
612 #endif
613 }
614
615 U_CAPI int32_t U_EXPORT2
uprv_timezone()616 uprv_timezone()
617 {
618 #ifdef U_TIMEZONE
619 return U_TIMEZONE;
620 #else
621 time_t t, t1, t2;
622 struct tm tmrec;
623 int32_t tdiff = 0;
624
625 time(&t);
626 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
627 #if U_PLATFORM != U_PF_IPHONE
628 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
629 #endif
630 t1 = mktime(&tmrec); /* local time in seconds*/
631 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
632 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
633 tdiff = t2 - t1;
634
635 #if U_PLATFORM != U_PF_IPHONE
636 /* imitate NT behaviour, which returns same timezone offset to GMT for
637 winter and summer.
638 This does not work on all platforms. For instance, on glibc on Linux
639 and on Mac OS 10.5, tdiff calculated above remains the same
640 regardless of whether DST is in effect or not. iOS is another
641 platform where this does not work. Linux + glibc and Mac OS 10.5
642 have U_TIMEZONE defined so that this code is not reached.
643 */
644 if (dst_checked)
645 tdiff += 3600;
646 #endif
647 return tdiff;
648 #endif
649 }
650
651 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
652 some platforms need to have it declared here. */
653
654 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
655 /* RS6000 and others reject char **tzname. */
656 extern U_IMPORT char *U_TZNAME[];
657 #endif
658
659 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
660 /* These platforms are likely to use Olson timezone IDs. */
661 #define CHECK_LOCALTIME_LINK 1
662 #if U_PLATFORM_IS_DARWIN_BASED
663 #include <tzfile.h>
664 #define TZZONEINFO (TZDIR "/")
665 #elif U_PLATFORM == U_PF_SOLARIS
666 #define TZDEFAULT "/etc/localtime"
667 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
668 #define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
669 #define TZ_ENV_CHECK "localtime"
670 #else
671 #define TZDEFAULT "/etc/localtime"
672 #define TZZONEINFO "/usr/share/zoneinfo/"
673 #endif
674 #if U_HAVE_DIRENT_H
675 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
676 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
677 symlinked to /etc/localtime, which makes searchForTZFile return
678 'localtime' when it's the first match. */
679 #define TZFILE_SKIP2 "localtime"
680 #define SEARCH_TZFILE
681 #include <dirent.h> /* Needed to search through system timezone files */
682 #endif
683 static char gTimeZoneBuffer[PATH_MAX];
684 static char *gTimeZoneBufferPtr = NULL;
685 #endif
686
687 #if !U_PLATFORM_USES_ONLY_WIN32_API
688 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)689 static UBool isValidOlsonID(const char *id) {
690 int32_t idx = 0;
691
692 /* Determine if this is something like Iceland (Olson ID)
693 or AST4ADT (non-Olson ID) */
694 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
695 idx++;
696 }
697
698 /* If we went through the whole string, then it might be okay.
699 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
700 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
701 The rest of the time it could be an Olson ID. George */
702 return (UBool)(id[idx] == 0
703 || uprv_strcmp(id, "PST8PDT") == 0
704 || uprv_strcmp(id, "MST7MDT") == 0
705 || uprv_strcmp(id, "CST6CDT") == 0
706 || uprv_strcmp(id, "EST5EDT") == 0);
707 }
708
709 /* On some Unix-like OS, 'posix' subdirectory in
710 /usr/share/zoneinfo replicates the top-level contents. 'right'
711 subdirectory has the same set of files, but individual files
712 are different from those in the top-level directory or 'posix'
713 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
714 has files for UTC.
715 When the first match for /etc/localtime is in either of them
716 (usually in posix because 'right' has different file contents),
717 or TZ environment variable points to one of them, createTimeZone
718 fails because, say, 'posix/America/New_York' is not an Olson
719 timezone id ('America/New_York' is). So, we have to skip
720 'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)721 static void skipZoneIDPrefix(const char** id) {
722 if (uprv_strncmp(*id, "posix/", 6) == 0
723 || uprv_strncmp(*id, "right/", 6) == 0)
724 {
725 *id += 6;
726 }
727 }
728 #endif
729
730 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
731
732 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
733 typedef struct OffsetZoneMapping {
734 int32_t offsetSeconds;
735 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
736 const char *stdID;
737 const char *dstID;
738 const char *olsonID;
739 } OffsetZoneMapping;
740
741 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
742
743 /*
744 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
745 and maps it to an Olson ID.
746 Before adding anything to this list, take a look at
747 icu/source/tools/tzcode/tz.alias
748 Sometimes no daylight savings (0) is important to define due to aliases.
749 This list can be tested with icu/source/test/compat/tzone.pl
750 More values could be added to daylightType to increase precision.
751 */
752 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
753 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
754 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
755 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
756 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
757 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
758 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
759 {-36000, 2, "EST", "EST", "Australia/Sydney"},
760 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
761 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
762 {-34200, 2, "CST", "CST", "Australia/South"},
763 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
764 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
765 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
766 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
767 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
768 {-28800, 2, "WST", "WST", "Australia/West"},
769 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
770 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
771 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
772 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
773 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
774 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
775 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
776 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
777 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
778 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
779 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
780 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
781 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
782 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
783 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
784 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
785 {0, 1, "GMT", "IST", "Europe/Dublin"},
786 {0, 1, "GMT", "BST", "Europe/London"},
787 {0, 0, "WET", "WEST", "Africa/Casablanca"},
788 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
789 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
790 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
791 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
792 {10800, 2, "UYT", "UYST", "America/Montevideo"},
793 {10800, 1, "WGT", "WGST", "America/Godthab"},
794 {10800, 2, "BRT", "BRST", "Brazil/East"},
795 {12600, 1, "NST", "NDT", "America/St_Johns"},
796 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
797 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
798 {14400, 2, "CLT", "CLST", "Chile/Continental"},
799 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
800 {14400, 2, "PYT", "PYST", "America/Asuncion"},
801 {18000, 1, "CST", "CDT", "America/Havana"},
802 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
803 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
804 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
805 {21600, 0, "CST", "CDT", "America/Guatemala"},
806 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
807 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
808 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
809 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
810 {32400, 1, "AKST", "AKDT", "US/Alaska"},
811 {36000, 1, "HAST", "HADT", "US/Aleutian"}
812 };
813
814 /*#define DEBUG_TZNAME*/
815
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)816 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
817 {
818 int32_t idx;
819 #ifdef DEBUG_TZNAME
820 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
821 #endif
822 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
823 {
824 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
825 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
826 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
827 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
828 {
829 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
830 }
831 }
832 return NULL;
833 }
834 #endif
835
836 #ifdef SEARCH_TZFILE
837 #define MAX_READ_SIZE 512
838
839 typedef struct DefaultTZInfo {
840 char* defaultTZBuffer;
841 int64_t defaultTZFileSize;
842 FILE* defaultTZFilePtr;
843 UBool defaultTZstatus;
844 int32_t defaultTZPosition;
845 } DefaultTZInfo;
846
847 /*
848 * This method compares the two files given to see if they are a match.
849 * It is currently use to compare two TZ files.
850 */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)851 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
852 FILE* file;
853 int64_t sizeFile;
854 int64_t sizeFileLeft;
855 int32_t sizeFileRead;
856 int32_t sizeFileToRead;
857 char bufferFile[MAX_READ_SIZE];
858 UBool result = TRUE;
859
860 if (tzInfo->defaultTZFilePtr == NULL) {
861 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
862 }
863 file = fopen(TZFileName, "r");
864
865 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
866
867 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
868 /* First check that the file size are equal. */
869 if (tzInfo->defaultTZFileSize == 0) {
870 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
871 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
872 }
873 fseek(file, 0, SEEK_END);
874 sizeFile = ftell(file);
875 sizeFileLeft = sizeFile;
876
877 if (sizeFile != tzInfo->defaultTZFileSize) {
878 result = FALSE;
879 } else {
880 /* Store the data from the files in seperate buffers and
881 * compare each byte to determine equality.
882 */
883 if (tzInfo->defaultTZBuffer == NULL) {
884 rewind(tzInfo->defaultTZFilePtr);
885 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
886 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
887 }
888 rewind(file);
889 while(sizeFileLeft > 0) {
890 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
891 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
892
893 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
894 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
895 result = FALSE;
896 break;
897 }
898 sizeFileLeft -= sizeFileRead;
899 tzInfo->defaultTZPosition += sizeFileRead;
900 }
901 }
902 } else {
903 result = FALSE;
904 }
905
906 if (file != NULL) {
907 fclose(file);
908 }
909
910 return result;
911 }
912
913
914 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
915 #define SKIP1 "."
916 #define SKIP2 ".."
917 static UBool U_CALLCONV putil_cleanup(void);
918 static CharString *gSearchTZFileResult = NULL;
919
920 /*
921 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
922 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
923 */
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)924 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
925 DIR* dirp = opendir(path);
926 DIR* subDirp = NULL;
927 struct dirent* dirEntry = NULL;
928
929 char* result = NULL;
930 if (dirp == NULL) {
931 return result;
932 }
933
934 if (gSearchTZFileResult == NULL) {
935 gSearchTZFileResult = new CharString;
936 if (gSearchTZFileResult == NULL) {
937 return NULL;
938 }
939 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
940 }
941
942 /* Save the current path */
943 UErrorCode status = U_ZERO_ERROR;
944 CharString curpath(path, -1, status);
945 if (U_FAILURE(status)) {
946 return NULL;
947 }
948
949 /* Check each entry in the directory. */
950 while((dirEntry = readdir(dirp)) != NULL) {
951 const char* dirName = dirEntry->d_name;
952 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
953 /* Create a newpath with the new entry to test each entry in the directory. */
954 CharString newpath(curpath, status);
955 newpath.append(dirName, -1, status);
956 if (U_FAILURE(status)) {
957 return NULL;
958 }
959
960 if ((subDirp = opendir(newpath.data())) != NULL) {
961 /* If this new path is a directory, make a recursive call with the newpath. */
962 closedir(subDirp);
963 newpath.append('/', status);
964 if (U_FAILURE(status)) {
965 return NULL;
966 }
967 result = searchForTZFile(newpath.data(), tzInfo);
968 /*
969 Have to get out here. Otherwise, we'd keep looking
970 and return the first match in the top-level directory
971 if there's a match in the top-level. If not, this function
972 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
973 It worked without this in most cases because we have a fallback of calling
974 localtime_r to figure out the default timezone.
975 */
976 if (result != NULL)
977 break;
978 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
979 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
980 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
981 if (amountToSkip > newpath.length()) {
982 amountToSkip = newpath.length();
983 }
984 const char* zoneid = newpath.data() + amountToSkip;
985 skipZoneIDPrefix(&zoneid);
986 gSearchTZFileResult->clear();
987 gSearchTZFileResult->append(zoneid, -1, status);
988 if (U_FAILURE(status)) {
989 return NULL;
990 }
991 result = gSearchTZFileResult->data();
992 /* Get out after the first one found. */
993 break;
994 }
995 }
996 }
997 }
998 closedir(dirp);
999 return result;
1000 }
1001 #endif
1002
1003 U_CAPI void U_EXPORT2
uprv_tzname_clear_cache()1004 uprv_tzname_clear_cache()
1005 {
1006 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1007 gTimeZoneBufferPtr = NULL;
1008 #endif
1009 }
1010
1011 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)1012 uprv_tzname(int n)
1013 {
1014 const char *tzid = NULL;
1015 #if U_PLATFORM_USES_ONLY_WIN32_API
1016 tzid = uprv_detectWindowsTimeZone();
1017
1018 if (tzid != NULL) {
1019 return tzid;
1020 }
1021 #else
1022
1023 /*#if U_PLATFORM_IS_DARWIN_BASED
1024 int ret;
1025
1026 tzid = getenv("TZFILE");
1027 if (tzid != NULL) {
1028 return tzid;
1029 }
1030 #endif*/
1031
1032 /* This code can be temporarily disabled to test tzname resolution later on. */
1033 #ifndef DEBUG_TZNAME
1034 tzid = getenv("TZ");
1035 if (tzid != NULL && isValidOlsonID(tzid)
1036 #if U_PLATFORM == U_PF_SOLARIS
1037 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1038 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1039 #endif
1040 ) {
1041 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1042 if (tzid[0] == ':') {
1043 tzid++;
1044 }
1045 /* This might be a good Olson ID. */
1046 skipZoneIDPrefix(&tzid);
1047 return tzid;
1048 }
1049 /* else U_TZNAME will give a better result. */
1050 #endif
1051
1052 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1053 /* Caller must handle threading issues */
1054 if (gTimeZoneBufferPtr == NULL) {
1055 /*
1056 This is a trick to look at the name of the link to get the Olson ID
1057 because the tzfile contents is underspecified.
1058 This isn't guaranteed to work because it may not be a symlink.
1059 */
1060 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1061 if (0 < ret) {
1062 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1063 gTimeZoneBuffer[ret] = 0;
1064 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1065 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1066 {
1067 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1068 }
1069 #if U_PLATFORM == U_PF_SOLARIS
1070 else
1071 {
1072 tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1073 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1074 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1075 {
1076 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1077 }
1078 }
1079 #endif
1080 } else {
1081 #if defined(SEARCH_TZFILE)
1082 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1083 if (tzInfo != NULL) {
1084 tzInfo->defaultTZBuffer = NULL;
1085 tzInfo->defaultTZFileSize = 0;
1086 tzInfo->defaultTZFilePtr = NULL;
1087 tzInfo->defaultTZstatus = FALSE;
1088 tzInfo->defaultTZPosition = 0;
1089
1090 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1091
1092 /* Free previously allocated memory */
1093 if (tzInfo->defaultTZBuffer != NULL) {
1094 uprv_free(tzInfo->defaultTZBuffer);
1095 }
1096 if (tzInfo->defaultTZFilePtr != NULL) {
1097 fclose(tzInfo->defaultTZFilePtr);
1098 }
1099 uprv_free(tzInfo);
1100 }
1101
1102 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1103 return gTimeZoneBufferPtr;
1104 }
1105 #endif
1106 }
1107 }
1108 else {
1109 return gTimeZoneBufferPtr;
1110 }
1111 #endif
1112 #endif
1113
1114 #ifdef U_TZNAME
1115 #if U_PLATFORM_USES_ONLY_WIN32_API
1116 /* The return value is free'd in timezone.cpp on Windows because
1117 * the other code path returns a pointer to a heap location. */
1118 return uprv_strdup(U_TZNAME[n]);
1119 #else
1120 /*
1121 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1122 So we remap the abbreviation to an olson ID.
1123
1124 Since Windows exposes a little more timezone information,
1125 we normally don't use this code on Windows because
1126 uprv_detectWindowsTimeZone should have already given the correct answer.
1127 */
1128 {
1129 struct tm juneSol, decemberSol;
1130 int daylightType;
1131 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1132 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1133
1134 /* This probing will tell us when daylight savings occurs. */
1135 localtime_r(&juneSolstice, &juneSol);
1136 localtime_r(&decemberSolstice, &decemberSol);
1137 if(decemberSol.tm_isdst > 0) {
1138 daylightType = U_DAYLIGHT_DECEMBER;
1139 } else if(juneSol.tm_isdst > 0) {
1140 daylightType = U_DAYLIGHT_JUNE;
1141 } else {
1142 daylightType = U_DAYLIGHT_NONE;
1143 }
1144 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1145 if (tzid != NULL) {
1146 return tzid;
1147 }
1148 }
1149 return U_TZNAME[n];
1150 #endif
1151 #else
1152 return "";
1153 #endif
1154 }
1155
1156 /* Get and set the ICU data directory --------------------------------------- */
1157
1158 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1159 static char *gDataDirectory = NULL;
1160
1161 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1162 static CharString *gTimeZoneFilesDirectory = NULL;
1163
1164 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1165 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1166 #endif
1167
putil_cleanup(void)1168 static UBool U_CALLCONV putil_cleanup(void)
1169 {
1170 if (gDataDirectory && *gDataDirectory) {
1171 uprv_free(gDataDirectory);
1172 }
1173 gDataDirectory = NULL;
1174 gDataDirInitOnce.reset();
1175
1176 delete gTimeZoneFilesDirectory;
1177 gTimeZoneFilesDirectory = NULL;
1178 gTimeZoneFilesInitOnce.reset();
1179
1180 #ifdef SEARCH_TZFILE
1181 delete gSearchTZFileResult;
1182 gSearchTZFileResult = NULL;
1183 #endif
1184
1185 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1186 if (gCorrectedPOSIXLocale) {
1187 uprv_free(gCorrectedPOSIXLocale);
1188 gCorrectedPOSIXLocale = NULL;
1189 }
1190 #endif
1191 return TRUE;
1192 }
1193
1194 /*
1195 * Set the data directory.
1196 * Make a copy of the passed string, and set the global data dir to point to it.
1197 */
1198 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1199 u_setDataDirectory(const char *directory) {
1200 char *newDataDir;
1201 int32_t length;
1202
1203 if(directory==NULL || *directory==0) {
1204 /* A small optimization to prevent the malloc and copy when the
1205 shared library is used, and this is a way to make sure that NULL
1206 is never returned.
1207 */
1208 newDataDir = (char *)"";
1209 }
1210 else {
1211 length=(int32_t)uprv_strlen(directory);
1212 newDataDir = (char *)uprv_malloc(length + 2);
1213 /* Exit out if newDataDir could not be created. */
1214 if (newDataDir == NULL) {
1215 return;
1216 }
1217 uprv_strcpy(newDataDir, directory);
1218
1219 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1220 {
1221 char *p;
1222 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1223 *p = U_FILE_SEP_CHAR;
1224 }
1225 }
1226 #endif
1227 }
1228
1229 if (gDataDirectory && *gDataDirectory) {
1230 uprv_free(gDataDirectory);
1231 }
1232 gDataDirectory = newDataDir;
1233 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1234 }
1235
1236 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1237 uprv_pathIsAbsolute(const char *path)
1238 {
1239 if(!path || !*path) {
1240 return FALSE;
1241 }
1242
1243 if(*path == U_FILE_SEP_CHAR) {
1244 return TRUE;
1245 }
1246
1247 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1248 if(*path == U_FILE_ALT_SEP_CHAR) {
1249 return TRUE;
1250 }
1251 #endif
1252
1253 #if U_PLATFORM_USES_ONLY_WIN32_API
1254 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1255 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1256 path[1] == ':' ) {
1257 return TRUE;
1258 }
1259 #endif
1260
1261 return FALSE;
1262 }
1263
1264 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1265 until some client wrapper makefiles are updated */
1266 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1267 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1268 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1269 # endif
1270 #endif
1271
dataDirectoryInitFn()1272 static void U_CALLCONV dataDirectoryInitFn() {
1273 /* If we already have the directory, then return immediately. Will happen if user called
1274 * u_setDataDirectory().
1275 */
1276 if (gDataDirectory) {
1277 return;
1278 }
1279
1280 const char *path = NULL;
1281 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1282 char datadir_path_buffer[PATH_MAX];
1283 #endif
1284
1285 /*
1286 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1287 override ICU's data with the ICU_DATA environment variable. This prevents
1288 problems where multiple custom copies of ICU's specific version of data
1289 are installed on a system. Either the application must define the data
1290 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1291 ICU, set the data with udata_setCommonData or trust that all of the
1292 required data is contained in ICU's data library that contains
1293 the entry point defined by U_ICUDATA_ENTRY_POINT.
1294
1295 There may also be some platforms where environment variables
1296 are not allowed.
1297 */
1298 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1299 /* First try to get the environment variable */
1300 path=getenv("ICU_DATA");
1301 # endif
1302
1303 /* ICU_DATA_DIR may be set as a compile option.
1304 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1305 * and is used only when data is built in archive mode eliminating the need
1306 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1307 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1308 * set their own path.
1309 */
1310 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1311 if(path==NULL || *path==0) {
1312 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1313 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1314 # endif
1315 # ifdef ICU_DATA_DIR
1316 path=ICU_DATA_DIR;
1317 # else
1318 path=U_ICU_DATA_DEFAULT_DIR;
1319 # endif
1320 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1321 if (prefix != NULL) {
1322 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1323 path=datadir_path_buffer;
1324 }
1325 # endif
1326 }
1327 #endif
1328
1329 if(path==NULL) {
1330 /* It looks really bad, set it to something. */
1331 path = "";
1332 }
1333
1334 u_setDataDirectory(path);
1335 return;
1336 }
1337
1338 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1339 u_getDataDirectory(void) {
1340 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1341 return gDataDirectory;
1342 }
1343
setTimeZoneFilesDir(const char * path,UErrorCode & status)1344 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1345 if (U_FAILURE(status)) {
1346 return;
1347 }
1348 gTimeZoneFilesDirectory->clear();
1349 gTimeZoneFilesDirectory->append(path, status);
1350 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1351 char *p = gTimeZoneFilesDirectory->data();
1352 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
1353 *p = U_FILE_SEP_CHAR;
1354 }
1355 #endif
1356 }
1357
1358 #define TO_STRING(x) TO_STRING_2(x)
1359 #define TO_STRING_2(x) #x
1360
TimeZoneDataDirInitFn(UErrorCode & status)1361 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1362 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1363 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1364 gTimeZoneFilesDirectory = new CharString();
1365 if (gTimeZoneFilesDirectory == NULL) {
1366 status = U_MEMORY_ALLOCATION_ERROR;
1367 return;
1368 }
1369 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1370 #if defined(U_TIMEZONE_FILES_DIR)
1371 if (dir == NULL) {
1372 dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1373 }
1374 #endif
1375 if (dir == NULL) {
1376 dir = "";
1377 }
1378 setTimeZoneFilesDir(dir, status);
1379 }
1380
1381
1382 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1383 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1384 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1385 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1386 }
1387
1388 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1389 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1390 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1391 setTimeZoneFilesDir(path, *status);
1392
1393 // Note: this function does some extra churn, first setting based on the
1394 // environment, then immediately replacing with the value passed in.
1395 // The logic is simpler that way, and performance shouldn't be an issue.
1396 }
1397
1398
1399 #if U_POSIX_LOCALE
1400 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1403 */
uprv_getPOSIXIDForCategory(int category)1404 static const char *uprv_getPOSIXIDForCategory(int category)
1405 {
1406 const char* posixID = NULL;
1407 if (category == LC_MESSAGES || category == LC_CTYPE) {
1408 /*
1409 * On Solaris two different calls to setlocale can result in
1410 * different values. Only get this value once.
1411 *
1412 * We must check this first because an application can set this.
1413 *
1414 * LC_ALL can't be used because it's platform dependent. The LANG
1415 * environment variable seems to affect LC_CTYPE variable by default.
1416 * Here is what setlocale(LC_ALL, NULL) can return.
1417 * HPUX can return 'C C C C C C C'
1418 * Solaris can return /en_US/C/C/C/C/C on the second try.
1419 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1420 *
1421 * The default codepage detection also needs to use LC_CTYPE.
1422 *
1423 * Do not call setlocale(LC_*, "")! Using an empty string instead
1424 * of NULL, will modify the libc behavior.
1425 */
1426 posixID = setlocale(category, NULL);
1427 if ((posixID == 0)
1428 || (uprv_strcmp("C", posixID) == 0)
1429 || (uprv_strcmp("POSIX", posixID) == 0))
1430 {
1431 /* Maybe we got some garbage. Try something more reasonable */
1432 posixID = getenv("LC_ALL");
1433 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1434 * This is needed to properly handle empty env. variables
1435 */
1436 #if U_PLATFORM == U_PF_SOLARIS
1437 if ((posixID == 0) || (posixID[0] == '\0')) {
1438 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1439 if ((posixID == 0) || (posixID[0] == '\0')) {
1440 #else
1441 if (posixID == 0) {
1442 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1443 if (posixID == 0) {
1444 #endif
1445 posixID = getenv("LANG");
1446 }
1447 }
1448 }
1449 }
1450 if ((posixID==0)
1451 || (uprv_strcmp("C", posixID) == 0)
1452 || (uprv_strcmp("POSIX", posixID) == 0))
1453 {
1454 /* Nothing worked. Give it a nice POSIX default value. */
1455 posixID = "en_US_POSIX";
1456 }
1457 return posixID;
1458 }
1459
1460 /* Return just the POSIX id for the default locale, whatever happens to be in
1461 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1462 */
1463 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1464 {
1465 static const char* posixID = NULL;
1466 if (posixID == 0) {
1467 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1468 }
1469 return posixID;
1470 }
1471
1472 #if !U_CHARSET_IS_UTF8
1473 /* Return just the POSIX id for the default codepage, whatever happens to be in
1474 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1475 */
1476 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1477 {
1478 static const char* posixID = NULL;
1479 if (posixID == 0) {
1480 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1481 }
1482 return posixID;
1483 }
1484 #endif
1485 #endif
1486
1487 /* NOTE: The caller should handle thread safety */
1488 U_CAPI const char* U_EXPORT2
1489 uprv_getDefaultLocaleID()
1490 {
1491 #if U_POSIX_LOCALE
1492 /*
1493 Note that: (a '!' means the ID is improper somehow)
1494 LC_ALL ----> default_loc codepage
1495 --------------------------------------------------------
1496 ab.CD ab CD
1497 ab@CD ab__CD -
1498 ab@CD.EF ab__CD EF
1499
1500 ab_CD.EF@GH ab_CD_GH EF
1501
1502 Some 'improper' ways to do the same as above:
1503 ! ab_CD@GH.EF ab_CD_GH EF
1504 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1505 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1506
1507 _CD@GH _CD_GH -
1508 _CD.EF@GH _CD_GH EF
1509
1510 The variant cannot have dots in it.
1511 The 'rightmost' variant (@xxx) wins.
1512 The leftmost codepage (.xxx) wins.
1513 */
1514 char *correctedPOSIXLocale = 0;
1515 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1516 const char *p;
1517 const char *q;
1518 int32_t len;
1519
1520 /* Format: (no spaces)
1521 ll [ _CC ] [ . MM ] [ @ VV]
1522
1523 l = lang, C = ctry, M = charmap, V = variant
1524 */
1525
1526 if (gCorrectedPOSIXLocale != NULL) {
1527 return gCorrectedPOSIXLocale;
1528 }
1529
1530 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1531 /* assume new locale can't be larger than old one? */
1532 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1533 /* Exit on memory allocation error. */
1534 if (correctedPOSIXLocale == NULL) {
1535 return NULL;
1536 }
1537 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1538 correctedPOSIXLocale[p-posixID] = 0;
1539
1540 /* do not copy after the @ */
1541 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1542 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1543 }
1544 }
1545
1546 /* Note that we scan the *uncorrected* ID. */
1547 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1548 if (correctedPOSIXLocale == NULL) {
1549 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1550 /* Exit on memory allocation error. */
1551 if (correctedPOSIXLocale == NULL) {
1552 return NULL;
1553 }
1554 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1555 correctedPOSIXLocale[p-posixID] = 0;
1556 }
1557 p++;
1558
1559 /* Take care of any special cases here.. */
1560 if (!uprv_strcmp(p, "nynorsk")) {
1561 p = "NY";
1562 /* Don't worry about no__NY. In practice, it won't appear. */
1563 }
1564
1565 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1566 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1567 }
1568 else {
1569 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1570 }
1571
1572 if ((q = uprv_strchr(p, '.')) != NULL) {
1573 /* How big will the resulting string be? */
1574 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1575 uprv_strncat(correctedPOSIXLocale, p, q-p);
1576 correctedPOSIXLocale[len] = 0;
1577 }
1578 else {
1579 /* Anything following the @ sign */
1580 uprv_strcat(correctedPOSIXLocale, p);
1581 }
1582
1583 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1584 * How about 'russian' -> 'ru'?
1585 * Many of the other locales using ISO codes will be handled by the
1586 * canonicalization functions in uloc_getDefault.
1587 */
1588 }
1589
1590 /* Was a correction made? */
1591 if (correctedPOSIXLocale != NULL) {
1592 posixID = correctedPOSIXLocale;
1593 }
1594 else {
1595 /* copy it, just in case the original pointer goes away. See j2395 */
1596 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1597 /* Exit on memory allocation error. */
1598 if (correctedPOSIXLocale == NULL) {
1599 return NULL;
1600 }
1601 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1602 }
1603
1604 if (gCorrectedPOSIXLocale == NULL) {
1605 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1606 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1607 correctedPOSIXLocale = NULL;
1608 }
1609
1610 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1611 uprv_free(correctedPOSIXLocale);
1612 }
1613
1614 return posixID;
1615
1616 #elif U_PLATFORM_USES_ONLY_WIN32_API
1617 #define POSIX_LOCALE_CAPACITY 64
1618 UErrorCode status = U_ZERO_ERROR;
1619 char *correctedPOSIXLocale = 0;
1620
1621 if (gCorrectedPOSIXLocale != NULL) {
1622 return gCorrectedPOSIXLocale;
1623 }
1624
1625 LCID id = GetThreadLocale();
1626 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1627 if (correctedPOSIXLocale) {
1628 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1629 if (U_SUCCESS(status)) {
1630 *(correctedPOSIXLocale + posixLen) = 0;
1631 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1632 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1633 } else {
1634 uprv_free(correctedPOSIXLocale);
1635 }
1636 }
1637
1638 if (gCorrectedPOSIXLocale == NULL) {
1639 return "en_US";
1640 }
1641 return gCorrectedPOSIXLocale;
1642
1643 #elif U_PLATFORM == U_PF_OS400
1644 /* locales are process scoped and are by definition thread safe */
1645 static char correctedLocale[64];
1646 const char *localeID = getenv("LC_ALL");
1647 char *p;
1648
1649 if (localeID == NULL)
1650 localeID = getenv("LANG");
1651 if (localeID == NULL)
1652 localeID = setlocale(LC_ALL, NULL);
1653 /* Make sure we have something... */
1654 if (localeID == NULL)
1655 return "en_US_POSIX";
1656
1657 /* Extract the locale name from the path. */
1658 if((p = uprv_strrchr(localeID, '/')) != NULL)
1659 {
1660 /* Increment p to start of locale name. */
1661 p++;
1662 localeID = p;
1663 }
1664
1665 /* Copy to work location. */
1666 uprv_strcpy(correctedLocale, localeID);
1667
1668 /* Strip off the '.locale' extension. */
1669 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1670 *p = 0;
1671 }
1672
1673 /* Upper case the locale name. */
1674 T_CString_toUpperCase(correctedLocale);
1675
1676 /* See if we are using the POSIX locale. Any of the
1677 * following are equivalent and use the same QLGPGCMA
1678 * (POSIX) locale.
1679 * QLGPGCMA2 means UCS2
1680 * QLGPGCMA_4 means UTF-32
1681 * QLGPGCMA_8 means UTF-8
1682 */
1683 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1684 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1685 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1686 {
1687 uprv_strcpy(correctedLocale, "en_US_POSIX");
1688 }
1689 else
1690 {
1691 int16_t LocaleLen;
1692
1693 /* Lower case the lang portion. */
1694 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1695 {
1696 *p = uprv_tolower(*p);
1697 }
1698
1699 /* Adjust for Euro. After '_E' add 'URO'. */
1700 LocaleLen = uprv_strlen(correctedLocale);
1701 if (correctedLocale[LocaleLen - 2] == '_' &&
1702 correctedLocale[LocaleLen - 1] == 'E')
1703 {
1704 uprv_strcat(correctedLocale, "URO");
1705 }
1706
1707 /* If using Lotus-based locale then convert to
1708 * equivalent non Lotus.
1709 */
1710 else if (correctedLocale[LocaleLen - 2] == '_' &&
1711 correctedLocale[LocaleLen - 1] == 'L')
1712 {
1713 correctedLocale[LocaleLen - 2] = 0;
1714 }
1715
1716 /* There are separate simplified and traditional
1717 * locales called zh_HK_S and zh_HK_T.
1718 */
1719 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1720 {
1721 uprv_strcpy(correctedLocale, "zh_HK");
1722 }
1723
1724 /* A special zh_CN_GBK locale...
1725 */
1726 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1727 {
1728 uprv_strcpy(correctedLocale, "zh_CN");
1729 }
1730
1731 }
1732
1733 return correctedLocale;
1734 #endif
1735
1736 }
1737
1738 #if !U_CHARSET_IS_UTF8
1739 #if U_POSIX_LOCALE
1740 /*
1741 Due to various platform differences, one platform may specify a charset,
1742 when they really mean a different charset. Remap the names so that they are
1743 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1744 here. Before adding anything to this function, please consider adding unique
1745 names to the ICU alias table in the data directory.
1746 */
1747 static const char*
1748 remapPlatformDependentCodepage(const char *locale, const char *name) {
1749 if (locale != NULL && *locale == 0) {
1750 /* Make sure that an empty locale is handled the same way. */
1751 locale = NULL;
1752 }
1753 if (name == NULL) {
1754 return NULL;
1755 }
1756 #if U_PLATFORM == U_PF_AIX
1757 if (uprv_strcmp(name, "IBM-943") == 0) {
1758 /* Use the ASCII compatible ibm-943 */
1759 name = "Shift-JIS";
1760 }
1761 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1762 /* Use the windows-1252 that contains the Euro */
1763 name = "IBM-5348";
1764 }
1765 #elif U_PLATFORM == U_PF_SOLARIS
1766 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1767 /* Solaris underspecifies the "EUC" name. */
1768 if (uprv_strcmp(locale, "zh_CN") == 0) {
1769 name = "EUC-CN";
1770 }
1771 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1772 name = "EUC-TW";
1773 }
1774 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1775 name = "EUC-KR";
1776 }
1777 }
1778 else if (uprv_strcmp(name, "eucJP") == 0) {
1779 /*
1780 ibm-954 is the best match.
1781 ibm-33722 is the default for eucJP (similar to Windows).
1782 */
1783 name = "eucjis";
1784 }
1785 else if (uprv_strcmp(name, "646") == 0) {
1786 /*
1787 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1788 * ISO-8859-1 instead of US-ASCII(646).
1789 */
1790 name = "ISO-8859-1";
1791 }
1792 #elif U_PLATFORM_IS_DARWIN_BASED
1793 if (locale == NULL && *name == 0) {
1794 /*
1795 No locale was specified, and an empty name was passed in.
1796 This usually indicates that nl_langinfo didn't return valid information.
1797 Mac OS X uses UTF-8 by default (especially the locale data and console).
1798 */
1799 name = "UTF-8";
1800 }
1801 else if (uprv_strcmp(name, "CP949") == 0) {
1802 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1803 name = "EUC-KR";
1804 }
1805 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1806 /*
1807 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1808 */
1809 name = "UTF-8";
1810 }
1811 #elif U_PLATFORM == U_PF_BSD
1812 if (uprv_strcmp(name, "CP949") == 0) {
1813 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1814 name = "EUC-KR";
1815 }
1816 #elif U_PLATFORM == U_PF_HPUX
1817 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1818 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1819 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1820 name = "hkbig5";
1821 }
1822 else if (uprv_strcmp(name, "eucJP") == 0) {
1823 /*
1824 ibm-1350 is the best match, but unavailable.
1825 ibm-954 is mostly a superset of ibm-1350.
1826 ibm-33722 is the default for eucJP (similar to Windows).
1827 */
1828 name = "eucjis";
1829 }
1830 #elif U_PLATFORM == U_PF_LINUX
1831 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1832 /* Linux underspecifies the "EUC" name. */
1833 if (uprv_strcmp(locale, "korean") == 0) {
1834 name = "EUC-KR";
1835 }
1836 else if (uprv_strcmp(locale, "japanese") == 0) {
1837 /* See comment below about eucJP */
1838 name = "eucjis";
1839 }
1840 }
1841 else if (uprv_strcmp(name, "eucjp") == 0) {
1842 /*
1843 ibm-1350 is the best match, but unavailable.
1844 ibm-954 is mostly a superset of ibm-1350.
1845 ibm-33722 is the default for eucJP (similar to Windows).
1846 */
1847 name = "eucjis";
1848 }
1849 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1850 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1851 /*
1852 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1853 */
1854 name = "UTF-8";
1855 }
1856 /*
1857 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1858 * it by falling back to 'US-ASCII' when NULL is returned from this
1859 * function. So, we don't have to worry about it here.
1860 */
1861 #endif
1862 /* return NULL when "" is passed in */
1863 if (*name == 0) {
1864 name = NULL;
1865 }
1866 return name;
1867 }
1868
1869 static const char*
1870 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1871 {
1872 char localeBuf[100];
1873 const char *name = NULL;
1874 char *variant = NULL;
1875
1876 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1877 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1878 uprv_strncpy(localeBuf, localeName, localeCapacity);
1879 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1880 name = uprv_strncpy(buffer, name+1, buffCapacity);
1881 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1882 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1883 *variant = 0;
1884 }
1885 name = remapPlatformDependentCodepage(localeBuf, name);
1886 }
1887 return name;
1888 }
1889 #endif
1890
1891 static const char*
1892 int_getDefaultCodepage()
1893 {
1894 #if U_PLATFORM == U_PF_OS400
1895 uint32_t ccsid = 37; /* Default to ibm-37 */
1896 static char codepage[64];
1897 Qwc_JOBI0400_t jobinfo;
1898 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1899
1900 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1901 "* ", " ", &error);
1902
1903 if (error.Bytes_Available == 0) {
1904 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1905 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1906 }
1907 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1908 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1909 }
1910 /* else use the default */
1911 }
1912 sprintf(codepage,"ibm-%d", ccsid);
1913 return codepage;
1914
1915 #elif U_PLATFORM == U_PF_OS390
1916 static char codepage[64];
1917
1918 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1919 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1920 codepage[63] = 0; /* NULL terminate */
1921
1922 return codepage;
1923
1924 #elif U_PLATFORM_USES_ONLY_WIN32_API
1925 static char codepage[64];
1926 sprintf(codepage, "windows-%d", GetACP());
1927 return codepage;
1928
1929 #elif U_POSIX_LOCALE
1930 static char codesetName[100];
1931 const char *localeName = NULL;
1932 const char *name = NULL;
1933
1934 localeName = uprv_getPOSIXIDForDefaultCodepage();
1935 uprv_memset(codesetName, 0, sizeof(codesetName));
1936 /* On Solaris nl_langinfo returns C locale values unless setlocale
1937 * was called earlier.
1938 */
1939 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
1940 /* When available, check nl_langinfo first because it usually gives more
1941 useful names. It depends on LC_CTYPE.
1942 nl_langinfo may use the same buffer as setlocale. */
1943 {
1944 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1945 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1946 /*
1947 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1948 * instead of ASCII.
1949 */
1950 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1951 codeset = remapPlatformDependentCodepage(localeName, codeset);
1952 } else
1953 #endif
1954 {
1955 codeset = remapPlatformDependentCodepage(NULL, codeset);
1956 }
1957
1958 if (codeset != NULL) {
1959 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1960 codesetName[sizeof(codesetName)-1] = 0;
1961 return codesetName;
1962 }
1963 }
1964 #endif
1965
1966 /* Use setlocale in a nice way, and then check some environment variables.
1967 Maybe the application used setlocale already.
1968 */
1969 uprv_memset(codesetName, 0, sizeof(codesetName));
1970 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1971 if (name) {
1972 /* if we can find the codeset name from setlocale, return that. */
1973 return name;
1974 }
1975
1976 if (*codesetName == 0)
1977 {
1978 /* Everything failed. Return US ASCII (ISO 646). */
1979 (void)uprv_strcpy(codesetName, "US-ASCII");
1980 }
1981 return codesetName;
1982 #else
1983 return "US-ASCII";
1984 #endif
1985 }
1986
1987
1988 U_CAPI const char* U_EXPORT2
1989 uprv_getDefaultCodepage()
1990 {
1991 static char const *name = NULL;
1992 umtx_lock(NULL);
1993 if (name == NULL) {
1994 name = int_getDefaultCodepage();
1995 }
1996 umtx_unlock(NULL);
1997 return name;
1998 }
1999 #endif /* !U_CHARSET_IS_UTF8 */
2000
2001
2002 /* end of platform-specific implementation -------------- */
2003
2004 /* version handling --------------------------------------------------------- */
2005
2006 U_CAPI void U_EXPORT2
2007 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2008 char *end;
2009 uint16_t part=0;
2010
2011 if(versionArray==NULL) {
2012 return;
2013 }
2014
2015 if(versionString!=NULL) {
2016 for(;;) {
2017 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2018 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2019 break;
2020 }
2021 versionString=end+1;
2022 }
2023 }
2024
2025 while(part<U_MAX_VERSION_LENGTH) {
2026 versionArray[part++]=0;
2027 }
2028 }
2029
2030 U_CAPI void U_EXPORT2
2031 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2032 if(versionArray!=NULL && versionString!=NULL) {
2033 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2034 int32_t len = u_strlen(versionString);
2035 if(len>U_MAX_VERSION_STRING_LENGTH) {
2036 len = U_MAX_VERSION_STRING_LENGTH;
2037 }
2038 u_UCharsToChars(versionString, versionChars, len);
2039 versionChars[len]=0;
2040 u_versionFromString(versionArray, versionChars);
2041 }
2042 }
2043
2044 U_CAPI void U_EXPORT2
2045 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2046 uint16_t count, part;
2047 uint8_t field;
2048
2049 if(versionString==NULL) {
2050 return;
2051 }
2052
2053 if(versionArray==NULL) {
2054 versionString[0]=0;
2055 return;
2056 }
2057
2058 /* count how many fields need to be written */
2059 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2060 }
2061
2062 if(count <= 1) {
2063 count = 2;
2064 }
2065
2066 /* write the first part */
2067 /* write the decimal field value */
2068 field=versionArray[0];
2069 if(field>=100) {
2070 *versionString++=(char)('0'+field/100);
2071 field%=100;
2072 }
2073 if(field>=10) {
2074 *versionString++=(char)('0'+field/10);
2075 field%=10;
2076 }
2077 *versionString++=(char)('0'+field);
2078
2079 /* write the following parts */
2080 for(part=1; part<count; ++part) {
2081 /* write a dot first */
2082 *versionString++=U_VERSION_DELIMITER;
2083
2084 /* write the decimal field value */
2085 field=versionArray[part];
2086 if(field>=100) {
2087 *versionString++=(char)('0'+field/100);
2088 field%=100;
2089 }
2090 if(field>=10) {
2091 *versionString++=(char)('0'+field/10);
2092 field%=10;
2093 }
2094 *versionString++=(char)('0'+field);
2095 }
2096
2097 /* NUL-terminate */
2098 *versionString=0;
2099 }
2100
2101 U_CAPI void U_EXPORT2
2102 u_getVersion(UVersionInfo versionArray) {
2103 (void)copyright; // Suppress unused variable warning from clang.
2104 u_versionFromString(versionArray, U_ICU_VERSION);
2105 }
2106
2107 /**
2108 * icucfg.h dependent code
2109 */
2110
2111 #if U_ENABLE_DYLOAD
2112
2113 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2114
2115 #if HAVE_DLFCN_H
2116
2117 #ifdef __MVS__
2118 #ifndef __SUSV3
2119 #define __SUSV3 1
2120 #endif
2121 #endif
2122 #include <dlfcn.h>
2123 #endif
2124
2125 U_INTERNAL void * U_EXPORT2
2126 uprv_dl_open(const char *libName, UErrorCode *status) {
2127 void *ret = NULL;
2128 if(U_FAILURE(*status)) return ret;
2129 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2130 if(ret==NULL) {
2131 #ifdef U_TRACE_DYLOAD
2132 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2133 #endif
2134 *status = U_MISSING_RESOURCE_ERROR;
2135 }
2136 return ret;
2137 }
2138
2139 U_INTERNAL void U_EXPORT2
2140 uprv_dl_close(void *lib, UErrorCode *status) {
2141 if(U_FAILURE(*status)) return;
2142 dlclose(lib);
2143 }
2144
2145 U_INTERNAL UVoidFunction* U_EXPORT2
2146 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2147 union {
2148 UVoidFunction *fp;
2149 void *vp;
2150 } uret;
2151 uret.fp = NULL;
2152 if(U_FAILURE(*status)) return uret.fp;
2153 uret.vp = dlsym(lib, sym);
2154 if(uret.vp == NULL) {
2155 #ifdef U_TRACE_DYLOAD
2156 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2157 #endif
2158 *status = U_MISSING_RESOURCE_ERROR;
2159 }
2160 return uret.fp;
2161 }
2162
2163 #else
2164
2165 /* null (nonexistent) implementation. */
2166
2167 U_INTERNAL void * U_EXPORT2
2168 uprv_dl_open(const char *libName, UErrorCode *status) {
2169 if(U_FAILURE(*status)) return NULL;
2170 *status = U_UNSUPPORTED_ERROR;
2171 return NULL;
2172 }
2173
2174 U_INTERNAL void U_EXPORT2
2175 uprv_dl_close(void *lib, UErrorCode *status) {
2176 if(U_FAILURE(*status)) return;
2177 *status = U_UNSUPPORTED_ERROR;
2178 return;
2179 }
2180
2181
2182 U_INTERNAL UVoidFunction* U_EXPORT2
2183 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2184 if(U_SUCCESS(*status)) {
2185 *status = U_UNSUPPORTED_ERROR;
2186 }
2187 return (UVoidFunction*)NULL;
2188 }
2189
2190
2191
2192 #endif
2193
2194 #elif U_PLATFORM_USES_ONLY_WIN32_API
2195
2196 U_INTERNAL void * U_EXPORT2
2197 uprv_dl_open(const char *libName, UErrorCode *status) {
2198 HMODULE lib = NULL;
2199
2200 if(U_FAILURE(*status)) return NULL;
2201
2202 lib = LoadLibraryA(libName);
2203
2204 if(lib==NULL) {
2205 *status = U_MISSING_RESOURCE_ERROR;
2206 }
2207
2208 return (void*)lib;
2209 }
2210
2211 U_INTERNAL void U_EXPORT2
2212 uprv_dl_close(void *lib, UErrorCode *status) {
2213 HMODULE handle = (HMODULE)lib;
2214 if(U_FAILURE(*status)) return;
2215
2216 FreeLibrary(handle);
2217
2218 return;
2219 }
2220
2221
2222 U_INTERNAL UVoidFunction* U_EXPORT2
2223 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2224 HMODULE handle = (HMODULE)lib;
2225 UVoidFunction* addr = NULL;
2226
2227 if(U_FAILURE(*status) || lib==NULL) return NULL;
2228
2229 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2230
2231 if(addr==NULL) {
2232 DWORD lastError = GetLastError();
2233 if(lastError == ERROR_PROC_NOT_FOUND) {
2234 *status = U_MISSING_RESOURCE_ERROR;
2235 } else {
2236 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2237 }
2238 }
2239
2240 return addr;
2241 }
2242
2243
2244 #else
2245
2246 /* No dynamic loading set. */
2247
2248 U_INTERNAL void * U_EXPORT2
2249 uprv_dl_open(const char *libName, UErrorCode *status) {
2250 (void)libName;
2251 if(U_FAILURE(*status)) return NULL;
2252 *status = U_UNSUPPORTED_ERROR;
2253 return NULL;
2254 }
2255
2256 U_INTERNAL void U_EXPORT2
2257 uprv_dl_close(void *lib, UErrorCode *status) {
2258 (void)lib;
2259 if(U_FAILURE(*status)) return;
2260 *status = U_UNSUPPORTED_ERROR;
2261 return;
2262 }
2263
2264
2265 U_INTERNAL UVoidFunction* U_EXPORT2
2266 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2267 (void)lib;
2268 (void)sym;
2269 if(U_SUCCESS(*status)) {
2270 *status = U_UNSUPPORTED_ERROR;
2271 }
2272 return (UVoidFunction*)NULL;
2273 }
2274
2275 #endif /* U_ENABLE_DYLOAD */
2276
2277 /*
2278 * Hey, Emacs, please set the following:
2279 *
2280 * Local Variables:
2281 * indent-tabs-mode: nil
2282 * End:
2283 *
2284 */
2285