1"""Strptime-related classes and functions. 2 3CLASSES: 4 LocaleTime -- Discovers and stores locale-specific time information 5 TimeRE -- Creates regexes for pattern matching a string of text containing 6 time information 7 8FUNCTIONS: 9 _getlang -- Figure out what language is being used for the locale 10 strptime -- Calculates the time struct represented by the passed-in string 11 12""" 13import time 14import locale 15import calendar 16from re import compile as re_compile 17from re import IGNORECASE 18from re import escape as re_escape 19from datetime import (date as datetime_date, 20 timedelta as datetime_timedelta, 21 timezone as datetime_timezone) 22try: 23 from _thread import allocate_lock as _thread_allocate_lock 24except ImportError: 25 from _dummy_thread import allocate_lock as _thread_allocate_lock 26 27__all__ = [] 28 29def _getlang(): 30 # Figure out what the current language is set to. 31 return locale.getlocale(locale.LC_TIME) 32 33class LocaleTime(object): 34 """Stores and handles locale-specific information related to time. 35 36 ATTRIBUTES: 37 f_weekday -- full weekday names (7-item list) 38 a_weekday -- abbreviated weekday names (7-item list) 39 f_month -- full month names (13-item list; dummy value in [0], which 40 is added by code) 41 a_month -- abbreviated month names (13-item list, dummy value in 42 [0], which is added by code) 43 am_pm -- AM/PM representation (2-item list) 44 LC_date_time -- format string for date/time representation (string) 45 LC_date -- format string for date representation (string) 46 LC_time -- format string for time representation (string) 47 timezone -- daylight- and non-daylight-savings timezone representation 48 (2-item list of sets) 49 lang -- Language used by instance (2-item tuple) 50 """ 51 52 def __init__(self): 53 """Set all attributes. 54 55 Order of methods called matters for dependency reasons. 56 57 The locale language is set at the offset and then checked again before 58 exiting. This is to make sure that the attributes were not set with a 59 mix of information from more than one locale. This would most likely 60 happen when using threads where one thread calls a locale-dependent 61 function while another thread changes the locale while the function in 62 the other thread is still running. Proper coding would call for 63 locks to prevent changing the locale while locale-dependent code is 64 running. The check here is done in case someone does not think about 65 doing this. 66 67 Only other possible issue is if someone changed the timezone and did 68 not call tz.tzset . That is an issue for the programmer, though, 69 since changing the timezone is worthless without that call. 70 71 """ 72 self.lang = _getlang() 73 self.__calc_weekday() 74 self.__calc_month() 75 self.__calc_am_pm() 76 self.__calc_timezone() 77 self.__calc_date_time() 78 if _getlang() != self.lang: 79 raise ValueError("locale changed during initialization") 80 if time.tzname != self.tzname or time.daylight != self.daylight: 81 raise ValueError("timezone changed during initialization") 82 83 def __pad(self, seq, front): 84 # Add '' to seq to either the front (is True), else the back. 85 seq = list(seq) 86 if front: 87 seq.insert(0, '') 88 else: 89 seq.append('') 90 return seq 91 92 def __calc_weekday(self): 93 # Set self.a_weekday and self.f_weekday using the calendar 94 # module. 95 a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] 96 f_weekday = [calendar.day_name[i].lower() for i in range(7)] 97 self.a_weekday = a_weekday 98 self.f_weekday = f_weekday 99 100 def __calc_month(self): 101 # Set self.f_month and self.a_month using the calendar module. 102 a_month = [calendar.month_abbr[i].lower() for i in range(13)] 103 f_month = [calendar.month_name[i].lower() for i in range(13)] 104 self.a_month = a_month 105 self.f_month = f_month 106 107 def __calc_am_pm(self): 108 # Set self.am_pm by using time.strftime(). 109 110 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that 111 # magical; just happened to have used it everywhere else where a 112 # static date was needed. 113 am_pm = [] 114 for hour in (1, 22): 115 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) 116 am_pm.append(time.strftime("%p", time_tuple).lower()) 117 self.am_pm = am_pm 118 119 def __calc_date_time(self): 120 # Set self.date_time, self.date, & self.time by using 121 # time.strftime(). 122 123 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of 124 # overloaded numbers is minimized. The order in which searches for 125 # values within the format string is very important; it eliminates 126 # possible ambiguity for what something represents. 127 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) 128 date_time = [None, None, None] 129 date_time[0] = time.strftime("%c", time_tuple).lower() 130 date_time[1] = time.strftime("%x", time_tuple).lower() 131 date_time[2] = time.strftime("%X", time_tuple).lower() 132 replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), 133 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), 134 (self.a_month[3], '%b'), (self.am_pm[1], '%p'), 135 ('1999', '%Y'), ('99', '%y'), ('22', '%H'), 136 ('44', '%M'), ('55', '%S'), ('76', '%j'), 137 ('17', '%d'), ('03', '%m'), ('3', '%m'), 138 # '3' needed for when no leading zero. 139 ('2', '%w'), ('10', '%I')] 140 replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone 141 for tz in tz_values]) 142 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): 143 current_format = date_time[offset] 144 for old, new in replacement_pairs: 145 # Must deal with possible lack of locale info 146 # manifesting itself as the empty string (e.g., Swedish's 147 # lack of AM/PM info) or a platform returning a tuple of empty 148 # strings (e.g., MacOS 9 having timezone as ('','')). 149 if old: 150 current_format = current_format.replace(old, new) 151 # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since 152 # 2005-01-03 occurs before the first Monday of the year. Otherwise 153 # %U is used. 154 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) 155 if '00' in time.strftime(directive, time_tuple): 156 U_W = '%W' 157 else: 158 U_W = '%U' 159 date_time[offset] = current_format.replace('11', U_W) 160 self.LC_date_time = date_time[0] 161 self.LC_date = date_time[1] 162 self.LC_time = date_time[2] 163 164 def __calc_timezone(self): 165 # Set self.timezone by using time.tzname. 166 # Do not worry about possibility of time.tzname[0] == time.tzname[1] 167 # and time.daylight; handle that in strptime. 168 try: 169 time.tzset() 170 except AttributeError: 171 pass 172 self.tzname = time.tzname 173 self.daylight = time.daylight 174 no_saving = frozenset({"utc", "gmt", self.tzname[0].lower()}) 175 if self.daylight: 176 has_saving = frozenset({self.tzname[1].lower()}) 177 else: 178 has_saving = frozenset() 179 self.timezone = (no_saving, has_saving) 180 181 182class TimeRE(dict): 183 """Handle conversion from format directives to regexes.""" 184 185 def __init__(self, locale_time=None): 186 """Create keys/values. 187 188 Order of execution is important for dependency reasons. 189 190 """ 191 if locale_time: 192 self.locale_time = locale_time 193 else: 194 self.locale_time = LocaleTime() 195 base = super() 196 base.__init__({ 197 # The " \d" part of the regex is to make %c from ANSI C work 198 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 199 'f': r"(?P<f>[0-9]{1,6})", 200 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)", 201 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])", 202 'G': r"(?P<G>\d\d\d\d)", 203 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 204 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])", 205 'M': r"(?P<M>[0-5]\d|\d)", 206 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)", 207 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)", 208 'w': r"(?P<w>[0-6])", 209 'u': r"(?P<u>[1-7])", 210 'V': r"(?P<V>5[0-3]|0[1-9]|[1-4]\d|\d)", 211 # W is set below by using 'U' 212 'y': r"(?P<y>\d\d)", 213 #XXX: Does 'Y' need to worry about having less or more than 214 # 4 digits? 215 'Y': r"(?P<Y>\d\d\d\d)", 216 'z': r"(?P<z>[+-]\d\d[0-5]\d)", 217 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 218 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), 219 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), 220 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), 221 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 222 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone 223 for tz in tz_names), 224 'Z'), 225 '%': '%'}) 226 base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) 227 base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) 228 base.__setitem__('x', self.pattern(self.locale_time.LC_date)) 229 base.__setitem__('X', self.pattern(self.locale_time.LC_time)) 230 231 def __seqToRE(self, to_convert, directive): 232 """Convert a list to a regex string for matching a directive. 233 234 Want possible matching values to be from longest to shortest. This 235 prevents the possibility of a match occurring for a value that also 236 a substring of a larger value that should have matched (e.g., 'abc' 237 matching when 'abcdef' should have been the match). 238 239 """ 240 to_convert = sorted(to_convert, key=len, reverse=True) 241 for value in to_convert: 242 if value != '': 243 break 244 else: 245 return '' 246 regex = '|'.join(re_escape(stuff) for stuff in to_convert) 247 regex = '(?P<%s>%s' % (directive, regex) 248 return '%s)' % regex 249 250 def pattern(self, format): 251 """Return regex pattern for the format string. 252 253 Need to make sure that any characters that might be interpreted as 254 regex syntax are escaped. 255 256 """ 257 processed_format = '' 258 # The sub() call escapes all characters that might be misconstrued 259 # as regex syntax. Cannot use re.escape since we have to deal with 260 # format directives (%m, etc.). 261 regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") 262 format = regex_chars.sub(r"\\\1", format) 263 whitespace_replacement = re_compile(r'\s+') 264 format = whitespace_replacement.sub(r'\\s+', format) 265 while '%' in format: 266 directive_index = format.index('%')+1 267 processed_format = "%s%s%s" % (processed_format, 268 format[:directive_index-1], 269 self[format[directive_index]]) 270 format = format[directive_index+1:] 271 return "%s%s" % (processed_format, format) 272 273 def compile(self, format): 274 """Return a compiled re object for the format string.""" 275 return re_compile(self.pattern(format), IGNORECASE) 276 277_cache_lock = _thread_allocate_lock() 278# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock 279# first! 280_TimeRE_cache = TimeRE() 281_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache 282_regex_cache = {} 283 284def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon): 285 """Calculate the Julian day based on the year, week of the year, and day of 286 the week, with week_start_day representing whether the week of the year 287 assumes the week starts on Sunday or Monday (6 or 0).""" 288 first_weekday = datetime_date(year, 1, 1).weekday() 289 # If we are dealing with the %U directive (week starts on Sunday), it's 290 # easier to just shift the view to Sunday being the first day of the 291 # week. 292 if not week_starts_Mon: 293 first_weekday = (first_weekday + 1) % 7 294 day_of_week = (day_of_week + 1) % 7 295 # Need to watch out for a week 0 (when the first day of the year is not 296 # the same as that specified by %U or %W). 297 week_0_length = (7 - first_weekday) % 7 298 if week_of_year == 0: 299 return 1 + day_of_week - first_weekday 300 else: 301 days_to_week = week_0_length + (7 * (week_of_year - 1)) 302 return 1 + days_to_week + day_of_week 303 304 305def _calc_julian_from_V(iso_year, iso_week, iso_weekday): 306 """Calculate the Julian day based on the ISO 8601 year, week, and weekday. 307 ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. 308 ISO week days range from 1 (Monday) to 7 (Sunday). 309 """ 310 correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 311 ordinal = (iso_week * 7) + iso_weekday - correction 312 # ordinal may be negative or 0 now, which means the date is in the previous 313 # calendar year 314 if ordinal < 1: 315 ordinal += datetime_date(iso_year, 1, 1).toordinal() 316 iso_year -= 1 317 ordinal -= datetime_date(iso_year, 1, 1).toordinal() 318 return iso_year, ordinal 319 320 321def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): 322 """Return a 2-tuple consisting of a time struct and an int containing 323 the number of microseconds based on the input string and the 324 format string.""" 325 326 for index, arg in enumerate([data_string, format]): 327 if not isinstance(arg, str): 328 msg = "strptime() argument {} must be str, not {}" 329 raise TypeError(msg.format(index, type(arg))) 330 331 global _TimeRE_cache, _regex_cache 332 with _cache_lock: 333 locale_time = _TimeRE_cache.locale_time 334 if (_getlang() != locale_time.lang or 335 time.tzname != locale_time.tzname or 336 time.daylight != locale_time.daylight): 337 _TimeRE_cache = TimeRE() 338 _regex_cache.clear() 339 locale_time = _TimeRE_cache.locale_time 340 if len(_regex_cache) > _CACHE_MAX_SIZE: 341 _regex_cache.clear() 342 format_regex = _regex_cache.get(format) 343 if not format_regex: 344 try: 345 format_regex = _TimeRE_cache.compile(format) 346 # KeyError raised when a bad format is found; can be specified as 347 # \\, in which case it was a stray % but with a space after it 348 except KeyError as err: 349 bad_directive = err.args[0] 350 if bad_directive == "\\": 351 bad_directive = "%" 352 del err 353 raise ValueError("'%s' is a bad directive in format '%s'" % 354 (bad_directive, format)) from None 355 # IndexError only occurs when the format string is "%" 356 except IndexError: 357 raise ValueError("stray %% in format '%s'" % format) from None 358 _regex_cache[format] = format_regex 359 found = format_regex.match(data_string) 360 if not found: 361 raise ValueError("time data %r does not match format %r" % 362 (data_string, format)) 363 if len(data_string) != found.end(): 364 raise ValueError("unconverted data remains: %s" % 365 data_string[found.end():]) 366 367 iso_year = year = None 368 month = day = 1 369 hour = minute = second = fraction = 0 370 tz = -1 371 tzoffset = None 372 # Default to -1 to signify that values not known; not critical to have, 373 # though 374 iso_week = week_of_year = None 375 week_of_year_start = None 376 # weekday and julian defaulted to None so as to signal need to calculate 377 # values 378 weekday = julian = None 379 found_dict = found.groupdict() 380 for group_key in found_dict.keys(): 381 # Directives not explicitly handled below: 382 # c, x, X 383 # handled by making out of other directives 384 # U, W 385 # worthless without day of the week 386 if group_key == 'y': 387 year = int(found_dict['y']) 388 # Open Group specification for strptime() states that a %y 389 #value in the range of [00, 68] is in the century 2000, while 390 #[69,99] is in the century 1900 391 if year <= 68: 392 year += 2000 393 else: 394 year += 1900 395 elif group_key == 'Y': 396 year = int(found_dict['Y']) 397 elif group_key == 'G': 398 iso_year = int(found_dict['G']) 399 elif group_key == 'm': 400 month = int(found_dict['m']) 401 elif group_key == 'B': 402 month = locale_time.f_month.index(found_dict['B'].lower()) 403 elif group_key == 'b': 404 month = locale_time.a_month.index(found_dict['b'].lower()) 405 elif group_key == 'd': 406 day = int(found_dict['d']) 407 elif group_key == 'H': 408 hour = int(found_dict['H']) 409 elif group_key == 'I': 410 hour = int(found_dict['I']) 411 ampm = found_dict.get('p', '').lower() 412 # If there was no AM/PM indicator, we'll treat this like AM 413 if ampm in ('', locale_time.am_pm[0]): 414 # We're in AM so the hour is correct unless we're 415 # looking at 12 midnight. 416 # 12 midnight == 12 AM == hour 0 417 if hour == 12: 418 hour = 0 419 elif ampm == locale_time.am_pm[1]: 420 # We're in PM so we need to add 12 to the hour unless 421 # we're looking at 12 noon. 422 # 12 noon == 12 PM == hour 12 423 if hour != 12: 424 hour += 12 425 elif group_key == 'M': 426 minute = int(found_dict['M']) 427 elif group_key == 'S': 428 second = int(found_dict['S']) 429 elif group_key == 'f': 430 s = found_dict['f'] 431 # Pad to always return microseconds. 432 s += "0" * (6 - len(s)) 433 fraction = int(s) 434 elif group_key == 'A': 435 weekday = locale_time.f_weekday.index(found_dict['A'].lower()) 436 elif group_key == 'a': 437 weekday = locale_time.a_weekday.index(found_dict['a'].lower()) 438 elif group_key == 'w': 439 weekday = int(found_dict['w']) 440 if weekday == 0: 441 weekday = 6 442 else: 443 weekday -= 1 444 elif group_key == 'u': 445 weekday = int(found_dict['u']) 446 weekday -= 1 447 elif group_key == 'j': 448 julian = int(found_dict['j']) 449 elif group_key in ('U', 'W'): 450 week_of_year = int(found_dict[group_key]) 451 if group_key == 'U': 452 # U starts week on Sunday. 453 week_of_year_start = 6 454 else: 455 # W starts week on Monday. 456 week_of_year_start = 0 457 elif group_key == 'V': 458 iso_week = int(found_dict['V']) 459 elif group_key == 'z': 460 z = found_dict['z'] 461 tzoffset = int(z[1:3]) * 60 + int(z[3:5]) 462 if z.startswith("-"): 463 tzoffset = -tzoffset 464 elif group_key == 'Z': 465 # Since -1 is default value only need to worry about setting tz if 466 # it can be something other than -1. 467 found_zone = found_dict['Z'].lower() 468 for value, tz_values in enumerate(locale_time.timezone): 469 if found_zone in tz_values: 470 # Deal with bad locale setup where timezone names are the 471 # same and yet time.daylight is true; too ambiguous to 472 # be able to tell what timezone has daylight savings 473 if (time.tzname[0] == time.tzname[1] and 474 time.daylight and found_zone not in ("utc", "gmt")): 475 break 476 else: 477 tz = value 478 break 479 # Deal with the cases where ambiguities arize 480 # don't assume default values for ISO week/year 481 if year is None and iso_year is not None: 482 if iso_week is None or weekday is None: 483 raise ValueError("ISO year directive '%G' must be used with " 484 "the ISO week directive '%V' and a weekday " 485 "directive ('%A', '%a', '%w', or '%u').") 486 if julian is not None: 487 raise ValueError("Day of the year directive '%j' is not " 488 "compatible with ISO year directive '%G'. " 489 "Use '%Y' instead.") 490 elif week_of_year is None and iso_week is not None: 491 if weekday is None: 492 raise ValueError("ISO week directive '%V' must be used with " 493 "the ISO year directive '%G' and a weekday " 494 "directive ('%A', '%a', '%w', or '%u').") 495 else: 496 raise ValueError("ISO week directive '%V' is incompatible with " 497 "the year directive '%Y'. Use the ISO year '%G' " 498 "instead.") 499 500 leap_year_fix = False 501 if year is None and month == 2 and day == 29: 502 year = 1904 # 1904 is first leap year of 20th century 503 leap_year_fix = True 504 elif year is None: 505 year = 1900 506 507 508 # If we know the week of the year and what day of that week, we can figure 509 # out the Julian day of the year. 510 if julian is None and weekday is not None: 511 if week_of_year is not None: 512 week_starts_Mon = True if week_of_year_start == 0 else False 513 julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, 514 week_starts_Mon) 515 elif iso_year is not None and iso_week is not None: 516 year, julian = _calc_julian_from_V(iso_year, iso_week, weekday + 1) 517 if julian is not None and julian <= 0: 518 year -= 1 519 yday = 366 if calendar.isleap(year) else 365 520 julian += yday 521 522 if julian is None: 523 # Cannot pre-calculate datetime_date() since can change in Julian 524 # calculation and thus could have different value for the day of 525 # the week calculation. 526 # Need to add 1 to result since first day of the year is 1, not 0. 527 julian = datetime_date(year, month, day).toordinal() - \ 528 datetime_date(year, 1, 1).toordinal() + 1 529 else: # Assume that if they bothered to include Julian day (or if it was 530 # calculated above with year/week/weekday) it will be accurate. 531 datetime_result = datetime_date.fromordinal( 532 (julian - 1) + 533 datetime_date(year, 1, 1).toordinal()) 534 year = datetime_result.year 535 month = datetime_result.month 536 day = datetime_result.day 537 if weekday is None: 538 weekday = datetime_date(year, month, day).weekday() 539 # Add timezone info 540 tzname = found_dict.get("Z") 541 if tzoffset is not None: 542 gmtoff = tzoffset * 60 543 else: 544 gmtoff = None 545 546 if leap_year_fix: 547 # the caller didn't supply a year but asked for Feb 29th. We couldn't 548 # use the default of 1900 for computations. We set it back to ensure 549 # that February 29th is smaller than March 1st. 550 year = 1900 551 552 return (year, month, day, 553 hour, minute, second, 554 weekday, julian, tz, tzname, gmtoff), fraction 555 556def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): 557 """Return a time struct based on the input string and the 558 format string.""" 559 tt = _strptime(data_string, format)[0] 560 return time.struct_time(tt[:time._STRUCT_TM_ITEMS]) 561 562def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): 563 """Return a class cls instance based on the input string and the 564 format string.""" 565 tt, fraction = _strptime(data_string, format) 566 tzname, gmtoff = tt[-2:] 567 args = tt[:6] + (fraction,) 568 if gmtoff is not None: 569 tzdelta = datetime_timedelta(seconds=gmtoff) 570 if tzname: 571 tz = datetime_timezone(tzdelta, tzname) 572 else: 573 tz = datetime_timezone(tzdelta) 574 args += (tz,) 575 576 return cls(*args) 577