1# -*- coding: utf-8 -*- 2""" 3This module offers a generic date/time string parser which is able to parse 4most known formats to represent a date and/or time. 5 6This module attempts to be forgiving with regards to unlikely input formats, 7returning a datetime object even for dates which are ambiguous. If an element 8of a date/time stamp is omitted, the following rules are applied: 9 10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 12 specified. 13- If a time zone is omitted, a timezone-naive datetime is returned. 14 15If any other elements are missing, they are taken from the 16:class:`datetime.datetime` object passed to the parameter ``default``. If this 17results in a day number exceeding the valid number of days per month, the 18value falls back to the end of the month. 19 20Additional resources about date/time string formats can be found below: 21 22- `A summary of the international standard date and time notation 23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ 25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 26- `CPAN ParseDate module 27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 28- `Java SimpleDateFormat Class 29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 30""" 31from __future__ import unicode_literals 32 33import datetime 34import re 35import string 36import time 37import warnings 38 39from calendar import monthrange 40from io import StringIO 41 42import six 43from six import integer_types, text_type 44 45from decimal import Decimal 46 47from warnings import warn 48 49from .. import relativedelta 50from .. import tz 51 52__all__ = ["parse", "parserinfo"] 53 54 55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 56# making public and/or figuring out if there is something we can 57# take off their plate. 58class _timelex(object): 59 # Fractional seconds are sometimes split by a comma 60 _split_decimal = re.compile("([.,])") 61 62 def __init__(self, instream): 63 if six.PY2: 64 # In Python 2, we can't duck type properly because unicode has 65 # a 'decode' function, and we'd be double-decoding 66 if isinstance(instream, (bytes, bytearray)): 67 instream = instream.decode() 68 else: 69 if getattr(instream, 'decode', None) is not None: 70 instream = instream.decode() 71 72 if isinstance(instream, text_type): 73 instream = StringIO(instream) 74 elif getattr(instream, 'read', None) is None: 75 raise TypeError('Parser must be a string or character stream, not ' 76 '{itype}'.format(itype=instream.__class__.__name__)) 77 78 self.instream = instream 79 self.charstack = [] 80 self.tokenstack = [] 81 self.eof = False 82 83 def get_token(self): 84 """ 85 This function breaks the time string into lexical units (tokens), which 86 can be parsed by the parser. Lexical units are demarcated by changes in 87 the character set, so any continuous string of letters is considered 88 one unit, any continuous string of numbers is considered one unit. 89 90 The main complication arises from the fact that dots ('.') can be used 91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 92 "4:30:21.447"). As such, it is necessary to read the full context of 93 any dot-separated strings before breaking it into tokens; as such, this 94 function maintains a "token stack", for when the ambiguous context 95 demands that multiple tokens be parsed at once. 96 """ 97 if self.tokenstack: 98 return self.tokenstack.pop(0) 99 100 seenletters = False 101 token = None 102 state = None 103 104 while not self.eof: 105 # We only realize that we've reached the end of a token when we 106 # find a character that's not part of the current token - since 107 # that character may be part of the next token, it's stored in the 108 # charstack. 109 if self.charstack: 110 nextchar = self.charstack.pop(0) 111 else: 112 nextchar = self.instream.read(1) 113 while nextchar == '\x00': 114 nextchar = self.instream.read(1) 115 116 if not nextchar: 117 self.eof = True 118 break 119 elif not state: 120 # First character of the token - determines if we're starting 121 # to parse a word, a number or something else. 122 token = nextchar 123 if self.isword(nextchar): 124 state = 'a' 125 elif self.isnum(nextchar): 126 state = '0' 127 elif self.isspace(nextchar): 128 token = ' ' 129 break # emit token 130 else: 131 break # emit token 132 elif state == 'a': 133 # If we've already started reading a word, we keep reading 134 # letters until we find something that's not part of a word. 135 seenletters = True 136 if self.isword(nextchar): 137 token += nextchar 138 elif nextchar == '.': 139 token += nextchar 140 state = 'a.' 141 else: 142 self.charstack.append(nextchar) 143 break # emit token 144 elif state == '0': 145 # If we've already started reading a number, we keep reading 146 # numbers until we find something that doesn't fit. 147 if self.isnum(nextchar): 148 token += nextchar 149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 150 token += nextchar 151 state = '0.' 152 else: 153 self.charstack.append(nextchar) 154 break # emit token 155 elif state == 'a.': 156 # If we've seen some letters and a dot separator, continue 157 # parsing, and the tokens will be broken up later. 158 seenletters = True 159 if nextchar == '.' or self.isword(nextchar): 160 token += nextchar 161 elif self.isnum(nextchar) and token[-1] == '.': 162 token += nextchar 163 state = '0.' 164 else: 165 self.charstack.append(nextchar) 166 break # emit token 167 elif state == '0.': 168 # If we've seen at least one dot separator, keep going, we'll 169 # break up the tokens later. 170 if nextchar == '.' or self.isnum(nextchar): 171 token += nextchar 172 elif self.isword(nextchar) and token[-1] == '.': 173 token += nextchar 174 state = 'a.' 175 else: 176 self.charstack.append(nextchar) 177 break # emit token 178 179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 180 token[-1] in '.,')): 181 l = self._split_decimal.split(token) 182 token = l[0] 183 for tok in l[1:]: 184 if tok: 185 self.tokenstack.append(tok) 186 187 if state == '0.' and token.count('.') == 0: 188 token = token.replace(',', '.') 189 190 return token 191 192 def __iter__(self): 193 return self 194 195 def __next__(self): 196 token = self.get_token() 197 if token is None: 198 raise StopIteration 199 200 return token 201 202 def next(self): 203 return self.__next__() # Python 2.x support 204 205 @classmethod 206 def split(cls, s): 207 return list(cls(s)) 208 209 @classmethod 210 def isword(cls, nextchar): 211 """ Whether or not the next character is part of a word """ 212 return nextchar.isalpha() 213 214 @classmethod 215 def isnum(cls, nextchar): 216 """ Whether the next character is part of a number """ 217 return nextchar.isdigit() 218 219 @classmethod 220 def isspace(cls, nextchar): 221 """ Whether the next character is whitespace """ 222 return nextchar.isspace() 223 224 225class _resultbase(object): 226 227 def __init__(self): 228 for attr in self.__slots__: 229 setattr(self, attr, None) 230 231 def _repr(self, classname): 232 l = [] 233 for attr in self.__slots__: 234 value = getattr(self, attr) 235 if value is not None: 236 l.append("%s=%s" % (attr, repr(value))) 237 return "%s(%s)" % (classname, ", ".join(l)) 238 239 def __len__(self): 240 return (sum(getattr(self, attr) is not None 241 for attr in self.__slots__)) 242 243 def __repr__(self): 244 return self._repr(self.__class__.__name__) 245 246 247class parserinfo(object): 248 """ 249 Class which handles what inputs are accepted. Subclass this to customize 250 the language and acceptable values for each parameter. 251 252 :param dayfirst: 253 Whether to interpret the first value in an ambiguous 3-integer date 254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 255 ``yearfirst`` is set to ``True``, this distinguishes between YDM 256 and YMD. Default is ``False``. 257 258 :param yearfirst: 259 Whether to interpret the first value in an ambiguous 3-integer date 260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 261 to be the year, otherwise the last number is taken to be the year. 262 Default is ``False``. 263 """ 264 265 # m from a.m/p.m, t from ISO T separator 266 JUMP = [" ", ".", ",", ";", "-", "/", "'", 267 "at", "on", "and", "ad", "m", "t", "of", 268 "st", "nd", "rd", "th"] 269 270 WEEKDAYS = [("Mon", "Monday"), 271 ("Tue", "Tuesday"), # TODO: "Tues" 272 ("Wed", "Wednesday"), 273 ("Thu", "Thursday"), # TODO: "Thurs" 274 ("Fri", "Friday"), 275 ("Sat", "Saturday"), 276 ("Sun", "Sunday")] 277 MONTHS = [("Jan", "January"), 278 ("Feb", "February"), # TODO: "Febr" 279 ("Mar", "March"), 280 ("Apr", "April"), 281 ("May", "May"), 282 ("Jun", "June"), 283 ("Jul", "July"), 284 ("Aug", "August"), 285 ("Sep", "Sept", "September"), 286 ("Oct", "October"), 287 ("Nov", "November"), 288 ("Dec", "December")] 289 HMS = [("h", "hour", "hours"), 290 ("m", "minute", "minutes"), 291 ("s", "second", "seconds")] 292 AMPM = [("am", "a"), 293 ("pm", "p")] 294 UTCZONE = ["UTC", "GMT", "Z", "z"] 295 PERTAIN = ["of"] 296 TZOFFSET = {} 297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 298 # "Anno Domini", "Year of Our Lord"] 299 300 def __init__(self, dayfirst=False, yearfirst=False): 301 self._jump = self._convert(self.JUMP) 302 self._weekdays = self._convert(self.WEEKDAYS) 303 self._months = self._convert(self.MONTHS) 304 self._hms = self._convert(self.HMS) 305 self._ampm = self._convert(self.AMPM) 306 self._utczone = self._convert(self.UTCZONE) 307 self._pertain = self._convert(self.PERTAIN) 308 309 self.dayfirst = dayfirst 310 self.yearfirst = yearfirst 311 312 self._year = time.localtime().tm_year 313 self._century = self._year // 100 * 100 314 315 def _convert(self, lst): 316 dct = {} 317 for i, v in enumerate(lst): 318 if isinstance(v, tuple): 319 for v in v: 320 dct[v.lower()] = i 321 else: 322 dct[v.lower()] = i 323 return dct 324 325 def jump(self, name): 326 return name.lower() in self._jump 327 328 def weekday(self, name): 329 try: 330 return self._weekdays[name.lower()] 331 except KeyError: 332 pass 333 return None 334 335 def month(self, name): 336 try: 337 return self._months[name.lower()] + 1 338 except KeyError: 339 pass 340 return None 341 342 def hms(self, name): 343 try: 344 return self._hms[name.lower()] 345 except KeyError: 346 return None 347 348 def ampm(self, name): 349 try: 350 return self._ampm[name.lower()] 351 except KeyError: 352 return None 353 354 def pertain(self, name): 355 return name.lower() in self._pertain 356 357 def utczone(self, name): 358 return name.lower() in self._utczone 359 360 def tzoffset(self, name): 361 if name in self._utczone: 362 return 0 363 364 return self.TZOFFSET.get(name) 365 366 def convertyear(self, year, century_specified=False): 367 """ 368 Converts two-digit years to year within [-50, 49] 369 range of self._year (current local time) 370 """ 371 372 # Function contract is that the year is always positive 373 assert year >= 0 374 375 if year < 100 and not century_specified: 376 # assume current century to start 377 year += self._century 378 379 if year >= self._year + 50: # if too far in future 380 year -= 100 381 elif year < self._year - 50: # if too far in past 382 year += 100 383 384 return year 385 386 def validate(self, res): 387 # move to info 388 if res.year is not None: 389 res.year = self.convertyear(res.year, res.century_specified) 390 391 if ((res.tzoffset == 0 and not res.tzname) or 392 (res.tzname == 'Z' or res.tzname == 'z')): 393 res.tzname = "UTC" 394 res.tzoffset = 0 395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 396 res.tzoffset = 0 397 return True 398 399 400class _ymd(list): 401 def __init__(self, *args, **kwargs): 402 super(self.__class__, self).__init__(*args, **kwargs) 403 self.century_specified = False 404 self.dstridx = None 405 self.mstridx = None 406 self.ystridx = None 407 408 @property 409 def has_year(self): 410 return self.ystridx is not None 411 412 @property 413 def has_month(self): 414 return self.mstridx is not None 415 416 @property 417 def has_day(self): 418 return self.dstridx is not None 419 420 def could_be_day(self, value): 421 if self.has_day: 422 return False 423 elif not self.has_month: 424 return 1 <= value <= 31 425 elif not self.has_year: 426 # Be permissive, assume leapyear 427 month = self[self.mstridx] 428 return 1 <= value <= monthrange(2000, month)[1] 429 else: 430 month = self[self.mstridx] 431 year = self[self.ystridx] 432 return 1 <= value <= monthrange(year, month)[1] 433 434 def append(self, val, label=None): 435 if hasattr(val, '__len__'): 436 if val.isdigit() and len(val) > 2: 437 self.century_specified = True 438 if label not in [None, 'Y']: # pragma: no cover 439 raise ValueError(label) 440 label = 'Y' 441 elif val > 100: 442 self.century_specified = True 443 if label not in [None, 'Y']: # pragma: no cover 444 raise ValueError(label) 445 label = 'Y' 446 447 super(self.__class__, self).append(int(val)) 448 449 if label == 'M': 450 if self.has_month: 451 raise ValueError('Month is already set') 452 self.mstridx = len(self) - 1 453 elif label == 'D': 454 if self.has_day: 455 raise ValueError('Day is already set') 456 self.dstridx = len(self) - 1 457 elif label == 'Y': 458 if self.has_year: 459 raise ValueError('Year is already set') 460 self.ystridx = len(self) - 1 461 462 def _resolve_from_stridxs(self, strids): 463 """ 464 Try to resolve the identities of year/month/day elements using 465 ystridx, mstridx, and dstridx, if enough of these are specified. 466 """ 467 if len(self) == 3 and len(strids) == 2: 468 # we can back out the remaining stridx value 469 missing = [x for x in range(3) if x not in strids.values()] 470 key = [x for x in ['y', 'm', 'd'] if x not in strids] 471 assert len(missing) == len(key) == 1 472 key = key[0] 473 val = missing[0] 474 strids[key] = val 475 476 assert len(self) == len(strids) # otherwise this should not be called 477 out = {key: self[strids[key]] for key in strids} 478 return (out.get('y'), out.get('m'), out.get('d')) 479 480 def resolve_ymd(self, yearfirst, dayfirst): 481 len_ymd = len(self) 482 year, month, day = (None, None, None) 483 484 strids = (('y', self.ystridx), 485 ('m', self.mstridx), 486 ('d', self.dstridx)) 487 488 strids = {key: val for key, val in strids if val is not None} 489 if (len(self) == len(strids) > 0 or 490 (len(self) == 3 and len(strids) == 2)): 491 return self._resolve_from_stridxs(strids) 492 493 mstridx = self.mstridx 494 495 if len_ymd > 3: 496 raise ValueError("More than three YMD values") 497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 498 # One member, or two members with a month string 499 if mstridx is not None: 500 month = self[mstridx] 501 # since mstridx is 0 or 1, self[mstridx-1] always 502 # looks up the other element 503 other = self[mstridx - 1] 504 else: 505 other = self[0] 506 507 if len_ymd > 1 or mstridx is None: 508 if other > 31: 509 year = other 510 else: 511 day = other 512 513 elif len_ymd == 2: 514 # Two members with numbers 515 if self[0] > 31: 516 # 99-01 517 year, month = self 518 elif self[1] > 31: 519 # 01-99 520 month, year = self 521 elif dayfirst and self[1] <= 12: 522 # 13-01 523 day, month = self 524 else: 525 # 01-13 526 month, day = self 527 528 elif len_ymd == 3: 529 # Three members 530 if mstridx == 0: 531 if self[1] > 31: 532 # Apr-2003-25 533 month, year, day = self 534 else: 535 month, day, year = self 536 elif mstridx == 1: 537 if self[0] > 31 or (yearfirst and self[2] <= 31): 538 # 99-Jan-01 539 year, month, day = self 540 else: 541 # 01-Jan-01 542 # Give precendence to day-first, since 543 # two-digit years is usually hand-written. 544 day, month, year = self 545 546 elif mstridx == 2: 547 # WTF!? 548 if self[1] > 31: 549 # 01-99-Jan 550 day, year, month = self 551 else: 552 # 99-01-Jan 553 year, day, month = self 554 555 else: 556 if (self[0] > 31 or 557 self.ystridx == 0 or 558 (yearfirst and self[1] <= 12 and self[2] <= 31)): 559 # 99-01-01 560 if dayfirst and self[2] <= 12: 561 year, day, month = self 562 else: 563 year, month, day = self 564 elif self[0] > 12 or (dayfirst and self[1] <= 12): 565 # 13-01-01 566 day, month, year = self 567 else: 568 # 01-13-01 569 month, day, year = self 570 571 return year, month, day 572 573 574class parser(object): 575 def __init__(self, info=None): 576 self.info = info or parserinfo() 577 578 def parse(self, timestr, default=None, 579 ignoretz=False, tzinfos=None, **kwargs): 580 """ 581 Parse the date/time string into a :class:`datetime.datetime` object. 582 583 :param timestr: 584 Any date/time string using the supported formats. 585 586 :param default: 587 The default datetime object, if this is a datetime object and not 588 ``None``, elements specified in ``timestr`` replace elements in the 589 default object. 590 591 :param ignoretz: 592 If set ``True``, time zones in parsed strings are ignored and a 593 naive :class:`datetime.datetime` object is returned. 594 595 :param tzinfos: 596 Additional time zone names / aliases which may be present in the 597 string. This argument maps time zone names (and optionally offsets 598 from those time zones) to time zones. This parameter can be a 599 dictionary with timezone aliases mapping time zone names to time 600 zones or a function taking two parameters (``tzname`` and 601 ``tzoffset``) and returning a time zone. 602 603 The timezones to which the names are mapped can be an integer 604 offset from UTC in seconds or a :class:`tzinfo` object. 605 606 .. doctest:: 607 :options: +NORMALIZE_WHITESPACE 608 609 >>> from dateutil.parser import parse 610 >>> from dateutil.tz import gettz 611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 615 datetime.datetime(2012, 1, 19, 17, 21, 616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 617 618 This parameter is ignored if ``ignoretz`` is set. 619 620 :param \\*\\*kwargs: 621 Keyword arguments as passed to ``_parse()``. 622 623 :return: 624 Returns a :class:`datetime.datetime` object or, if the 625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 626 first element being a :class:`datetime.datetime` object, the second 627 a tuple containing the fuzzy tokens. 628 629 :raises ValueError: 630 Raised for invalid or unknown string format, if the provided 631 :class:`tzinfo` is not in a valid format, or if an invalid date 632 would be created. 633 634 :raises TypeError: 635 Raised for non-string or character stream input. 636 637 :raises OverflowError: 638 Raised if the parsed date exceeds the largest valid C integer on 639 your system. 640 """ 641 642 if default is None: 643 default = datetime.datetime.now().replace(hour=0, minute=0, 644 second=0, microsecond=0) 645 646 res, skipped_tokens = self._parse(timestr, **kwargs) 647 648 if res is None: 649 raise ValueError("Unknown string format:", timestr) 650 651 if len(res) == 0: 652 raise ValueError("String does not contain a date:", timestr) 653 654 ret = self._build_naive(res, default) 655 656 if not ignoretz: 657 ret = self._build_tzaware(ret, res, tzinfos) 658 659 if kwargs.get('fuzzy_with_tokens', False): 660 return ret, skipped_tokens 661 else: 662 return ret 663 664 class _result(_resultbase): 665 __slots__ = ["year", "month", "day", "weekday", 666 "hour", "minute", "second", "microsecond", 667 "tzname", "tzoffset", "ampm","any_unused_tokens"] 668 669 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 670 fuzzy_with_tokens=False): 671 """ 672 Private method which performs the heavy lifting of parsing, called from 673 ``parse()``, which passes on its ``kwargs`` to this function. 674 675 :param timestr: 676 The string to parse. 677 678 :param dayfirst: 679 Whether to interpret the first value in an ambiguous 3-integer date 680 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 681 ``yearfirst`` is set to ``True``, this distinguishes between YDM 682 and YMD. If set to ``None``, this value is retrieved from the 683 current :class:`parserinfo` object (which itself defaults to 684 ``False``). 685 686 :param yearfirst: 687 Whether to interpret the first value in an ambiguous 3-integer date 688 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 689 to be the year, otherwise the last number is taken to be the year. 690 If this is set to ``None``, the value is retrieved from the current 691 :class:`parserinfo` object (which itself defaults to ``False``). 692 693 :param fuzzy: 694 Whether to allow fuzzy parsing, allowing for string like "Today is 695 January 1, 2047 at 8:21:00AM". 696 697 :param fuzzy_with_tokens: 698 If ``True``, ``fuzzy`` is automatically set to True, and the parser 699 will return a tuple where the first element is the parsed 700 :class:`datetime.datetime` datetimestamp and the second element is 701 a tuple containing the portions of the string which were ignored: 702 703 .. doctest:: 704 705 >>> from dateutil.parser import parse 706 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 707 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 708 709 """ 710 if fuzzy_with_tokens: 711 fuzzy = True 712 713 info = self.info 714 715 if dayfirst is None: 716 dayfirst = info.dayfirst 717 718 if yearfirst is None: 719 yearfirst = info.yearfirst 720 721 res = self._result() 722 l = _timelex.split(timestr) # Splits the timestr into tokens 723 724 skipped_idxs = [] 725 726 # year/month/day list 727 ymd = _ymd() 728 729 len_l = len(l) 730 i = 0 731 try: 732 while i < len_l: 733 734 # Check if it's a number 735 value_repr = l[i] 736 try: 737 value = float(value_repr) 738 except ValueError: 739 value = None 740 741 if value is not None: 742 # Numeric token 743 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 744 745 # Check weekday 746 elif info.weekday(l[i]) is not None: 747 value = info.weekday(l[i]) 748 res.weekday = value 749 750 # Check month name 751 elif info.month(l[i]) is not None: 752 value = info.month(l[i]) 753 ymd.append(value, 'M') 754 755 if i + 1 < len_l: 756 if l[i + 1] in ('-', '/'): 757 # Jan-01[-99] 758 sep = l[i + 1] 759 ymd.append(l[i + 2]) 760 761 if i + 3 < len_l and l[i + 3] == sep: 762 # Jan-01-99 763 ymd.append(l[i + 4]) 764 i += 2 765 766 i += 2 767 768 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 769 info.pertain(l[i + 2])): 770 # Jan of 01 771 # In this case, 01 is clearly year 772 if l[i + 4].isdigit(): 773 # Convert it here to become unambiguous 774 value = int(l[i + 4]) 775 year = str(info.convertyear(value)) 776 ymd.append(year, 'Y') 777 else: 778 # Wrong guess 779 pass 780 # TODO: not hit in tests 781 i += 4 782 783 # Check am/pm 784 elif info.ampm(l[i]) is not None: 785 value = info.ampm(l[i]) 786 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 787 788 if val_is_ampm: 789 res.hour = self._adjust_ampm(res.hour, value) 790 res.ampm = value 791 792 elif fuzzy: 793 skipped_idxs.append(i) 794 795 # Check for a timezone name 796 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 797 res.tzname = l[i] 798 res.tzoffset = info.tzoffset(res.tzname) 799 800 # Check for something like GMT+3, or BRST+3. Notice 801 # that it doesn't mean "I am 3 hours after GMT", but 802 # "my time +3 is GMT". If found, we reverse the 803 # logic so that timezone parsing code will get it 804 # right. 805 if i + 1 < len_l and l[i + 1] in ('+', '-'): 806 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 807 res.tzoffset = None 808 if info.utczone(res.tzname): 809 # With something like GMT+3, the timezone 810 # is *not* GMT. 811 res.tzname = None 812 813 # Check for a numbered timezone 814 elif res.hour is not None and l[i] in ('+', '-'): 815 signal = (-1, 1)[l[i] == '+'] 816 len_li = len(l[i + 1]) 817 818 # TODO: check that l[i + 1] is integer? 819 if len_li == 4: 820 # -0300 821 hour_offset = int(l[i + 1][:2]) 822 min_offset = int(l[i + 1][2:]) 823 elif i + 2 < len_l and l[i + 2] == ':': 824 # -03:00 825 hour_offset = int(l[i + 1]) 826 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 827 i += 2 828 elif len_li <= 2: 829 # -[0]3 830 hour_offset = int(l[i + 1][:2]) 831 min_offset = 0 832 else: 833 raise ValueError(timestr) 834 835 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 836 837 # Look for a timezone name between parenthesis 838 if (i + 5 < len_l and 839 info.jump(l[i + 2]) and l[i + 3] == '(' and 840 l[i + 5] == ')' and 841 3 <= len(l[i + 4]) and 842 self._could_be_tzname(res.hour, res.tzname, 843 None, l[i + 4])): 844 # -0300 (BRST) 845 res.tzname = l[i + 4] 846 i += 4 847 848 i += 1 849 850 # Check jumps 851 elif not (info.jump(l[i]) or fuzzy): 852 raise ValueError(timestr) 853 854 else: 855 skipped_idxs.append(i) 856 i += 1 857 858 # Process year/month/day 859 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 860 861 res.century_specified = ymd.century_specified 862 res.year = year 863 res.month = month 864 res.day = day 865 866 except (IndexError, ValueError): 867 return None, None 868 869 if not info.validate(res): 870 return None, None 871 872 if fuzzy_with_tokens: 873 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 874 return res, tuple(skipped_tokens) 875 else: 876 return res, None 877 878 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 879 # Token is a number 880 value_repr = tokens[idx] 881 try: 882 value = self._to_decimal(value_repr) 883 except Exception as e: 884 six.raise_from(ValueError('Unknown numeric token'), e) 885 886 len_li = len(value_repr) 887 888 len_l = len(tokens) 889 890 if (len(ymd) == 3 and len_li in (2, 4) and 891 res.hour is None and 892 (idx + 1 >= len_l or 893 (tokens[idx + 1] != ':' and 894 info.hms(tokens[idx + 1]) is None))): 895 # 19990101T23[59] 896 s = tokens[idx] 897 res.hour = int(s[:2]) 898 899 if len_li == 4: 900 res.minute = int(s[2:]) 901 902 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 903 # YYMMDD or HHMMSS[.ss] 904 s = tokens[idx] 905 906 if not ymd and '.' not in tokens[idx]: 907 ymd.append(s[:2]) 908 ymd.append(s[2:4]) 909 ymd.append(s[4:]) 910 else: 911 # 19990101T235959[.59] 912 913 # TODO: Check if res attributes already set. 914 res.hour = int(s[:2]) 915 res.minute = int(s[2:4]) 916 res.second, res.microsecond = self._parsems(s[4:]) 917 918 elif len_li in (8, 12, 14): 919 # YYYYMMDD 920 s = tokens[idx] 921 ymd.append(s[:4], 'Y') 922 ymd.append(s[4:6]) 923 ymd.append(s[6:8]) 924 925 if len_li > 8: 926 res.hour = int(s[8:10]) 927 res.minute = int(s[10:12]) 928 929 if len_li > 12: 930 res.second = int(s[12:]) 931 932 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 933 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 934 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 935 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 936 if hms is not None: 937 # TODO: checking that hour/minute/second are not 938 # already set? 939 self._assign_hms(res, value_repr, hms) 940 941 elif idx + 2 < len_l and tokens[idx + 1] == ':': 942 # HH:MM[:SS[.ss]] 943 res.hour = int(value) 944 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 945 (res.minute, res.second) = self._parse_min_sec(value) 946 947 if idx + 4 < len_l and tokens[idx + 3] == ':': 948 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 949 950 idx += 2 951 952 idx += 2 953 954 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 955 sep = tokens[idx + 1] 956 ymd.append(value_repr) 957 958 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 959 if tokens[idx + 2].isdigit(): 960 # 01-01[-01] 961 ymd.append(tokens[idx + 2]) 962 else: 963 # 01-Jan[-01] 964 value = info.month(tokens[idx + 2]) 965 966 if value is not None: 967 ymd.append(value, 'M') 968 else: 969 raise ValueError() 970 971 if idx + 3 < len_l and tokens[idx + 3] == sep: 972 # We have three members 973 value = info.month(tokens[idx + 4]) 974 975 if value is not None: 976 ymd.append(value, 'M') 977 else: 978 ymd.append(tokens[idx + 4]) 979 idx += 2 980 981 idx += 1 982 idx += 1 983 984 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 985 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 986 # 12 am 987 hour = int(value) 988 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 989 idx += 1 990 else: 991 # Year, month or day 992 ymd.append(value) 993 idx += 1 994 995 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 996 # 12am 997 hour = int(value) 998 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 999 idx += 1 1000 1001 elif ymd.could_be_day(value): 1002 ymd.append(value) 1003 1004 elif not fuzzy: 1005 raise ValueError() 1006 1007 return idx 1008 1009 def _find_hms_idx(self, idx, tokens, info, allow_jump): 1010 len_l = len(tokens) 1011 1012 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 1013 # There is an "h", "m", or "s" label following this token. We take 1014 # assign the upcoming label to the current token. 1015 # e.g. the "12" in 12h" 1016 hms_idx = idx + 1 1017 1018 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 1019 info.hms(tokens[idx+2]) is not None): 1020 # There is a space and then an "h", "m", or "s" label. 1021 # e.g. the "12" in "12 h" 1022 hms_idx = idx + 2 1023 1024 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 1025 # There is a "h", "m", or "s" preceeding this token. Since neither 1026 # of the previous cases was hit, there is no label following this 1027 # token, so we use the previous label. 1028 # e.g. the "04" in "12h04" 1029 hms_idx = idx-1 1030 1031 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 1032 info.hms(tokens[idx-2]) is not None): 1033 # If we are looking at the final token, we allow for a 1034 # backward-looking check to skip over a space. 1035 # TODO: Are we sure this is the right condition here? 1036 hms_idx = idx - 2 1037 1038 else: 1039 hms_idx = None 1040 1041 return hms_idx 1042 1043 def _assign_hms(self, res, value_repr, hms): 1044 # See GH issue #427, fixing float rounding 1045 value = self._to_decimal(value_repr) 1046 1047 if hms == 0: 1048 # Hour 1049 res.hour = int(value) 1050 if value % 1: 1051 res.minute = int(60*(value % 1)) 1052 1053 elif hms == 1: 1054 (res.minute, res.second) = self._parse_min_sec(value) 1055 1056 elif hms == 2: 1057 (res.second, res.microsecond) = self._parsems(value_repr) 1058 1059 def _could_be_tzname(self, hour, tzname, tzoffset, token): 1060 return (hour is not None and 1061 tzname is None and 1062 tzoffset is None and 1063 len(token) <= 5 and 1064 (all(x in string.ascii_uppercase for x in token) 1065 or token in self.info.UTCZONE)) 1066 1067 def _ampm_valid(self, hour, ampm, fuzzy): 1068 """ 1069 For fuzzy parsing, 'a' or 'am' (both valid English words) 1070 may erroneously trigger the AM/PM flag. Deal with that 1071 here. 1072 """ 1073 val_is_ampm = True 1074 1075 # If there's already an AM/PM flag, this one isn't one. 1076 if fuzzy and ampm is not None: 1077 val_is_ampm = False 1078 1079 # If AM/PM is found and hour is not, raise a ValueError 1080 if hour is None: 1081 if fuzzy: 1082 val_is_ampm = False 1083 else: 1084 raise ValueError('No hour specified with AM or PM flag.') 1085 elif not 0 <= hour <= 12: 1086 # If AM/PM is found, it's a 12 hour clock, so raise 1087 # an error for invalid range 1088 if fuzzy: 1089 val_is_ampm = False 1090 else: 1091 raise ValueError('Invalid hour specified for 12-hour clock.') 1092 1093 return val_is_ampm 1094 1095 def _adjust_ampm(self, hour, ampm): 1096 if hour < 12 and ampm == 1: 1097 hour += 12 1098 elif hour == 12 and ampm == 0: 1099 hour = 0 1100 return hour 1101 1102 def _parse_min_sec(self, value): 1103 # TODO: Every usage of this function sets res.second to the return 1104 # value. Are there any cases where second will be returned as None and 1105 # we *dont* want to set res.second = None? 1106 minute = int(value) 1107 second = None 1108 1109 sec_remainder = value % 1 1110 if sec_remainder: 1111 second = int(60 * sec_remainder) 1112 return (minute, second) 1113 1114 def _parsems(self, value): 1115 """Parse a I[.F] seconds value into (seconds, microseconds).""" 1116 if "." not in value: 1117 return int(value), 0 1118 else: 1119 i, f = value.split(".") 1120 return int(i), int(f.ljust(6, "0")[:6]) 1121 1122 def _parse_hms(self, idx, tokens, info, hms_idx): 1123 # TODO: Is this going to admit a lot of false-positives for when we 1124 # just happen to have digits and "h", "m" or "s" characters in non-date 1125 # text? I guess hex hashes won't have that problem, but there's plenty 1126 # of random junk out there. 1127 if hms_idx is None: 1128 hms = None 1129 new_idx = idx 1130 elif hms_idx > idx: 1131 hms = info.hms(tokens[hms_idx]) 1132 new_idx = hms_idx 1133 else: 1134 # Looking backwards, increment one. 1135 hms = info.hms(tokens[hms_idx]) + 1 1136 new_idx = idx 1137 1138 return (new_idx, hms) 1139 1140 def _recombine_skipped(self, tokens, skipped_idxs): 1141 """ 1142 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 1143 >>> skipped_idxs = [0, 1, 2, 5] 1144 >>> _recombine_skipped(tokens, skipped_idxs) 1145 ["foo bar", "baz"] 1146 """ 1147 skipped_tokens = [] 1148 for i, idx in enumerate(sorted(skipped_idxs)): 1149 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 1150 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 1151 else: 1152 skipped_tokens.append(tokens[idx]) 1153 1154 return skipped_tokens 1155 1156 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 1157 if callable(tzinfos): 1158 tzdata = tzinfos(tzname, tzoffset) 1159 else: 1160 tzdata = tzinfos.get(tzname) 1161 # handle case where tzinfo is paased an options that returns None 1162 # eg tzinfos = {'BRST' : None} 1163 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 1164 tzinfo = tzdata 1165 elif isinstance(tzdata, text_type): 1166 tzinfo = tz.tzstr(tzdata) 1167 elif isinstance(tzdata, integer_types): 1168 tzinfo = tz.tzoffset(tzname, tzdata) 1169 return tzinfo 1170 1171 def _build_tzaware(self, naive, res, tzinfos): 1172 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 1173 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 1174 aware = naive.replace(tzinfo=tzinfo) 1175 aware = self._assign_tzname(aware, res.tzname) 1176 1177 elif res.tzname and res.tzname in time.tzname: 1178 aware = naive.replace(tzinfo=tz.tzlocal()) 1179 1180 # Handle ambiguous local datetime 1181 aware = self._assign_tzname(aware, res.tzname) 1182 1183 # This is mostly relevant for winter GMT zones parsed in the UK 1184 if (aware.tzname() != res.tzname and 1185 res.tzname in self.info.UTCZONE): 1186 aware = aware.replace(tzinfo=tz.tzutc()) 1187 1188 elif res.tzoffset == 0: 1189 aware = naive.replace(tzinfo=tz.tzutc()) 1190 1191 elif res.tzoffset: 1192 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 1193 1194 elif not res.tzname and not res.tzoffset: 1195 # i.e. no timezone information was found. 1196 aware = naive 1197 1198 elif res.tzname: 1199 # tz-like string was parsed but we don't know what to do 1200 # with it 1201 warnings.warn("tzname {tzname} identified but not understood. " 1202 "Pass `tzinfos` argument in order to correctly " 1203 "return a timezone-aware datetime. In a future " 1204 "version, this will raise an " 1205 "exception.".format(tzname=res.tzname), 1206 category=UnknownTimezoneWarning) 1207 aware = naive 1208 1209 return aware 1210 1211 def _build_naive(self, res, default): 1212 repl = {} 1213 for attr in ("year", "month", "day", "hour", 1214 "minute", "second", "microsecond"): 1215 value = getattr(res, attr) 1216 if value is not None: 1217 repl[attr] = value 1218 1219 if 'day' not in repl: 1220 # If the default day exceeds the last day of the month, fall back 1221 # to the end of the month. 1222 cyear = default.year if res.year is None else res.year 1223 cmonth = default.month if res.month is None else res.month 1224 cday = default.day if res.day is None else res.day 1225 1226 if cday > monthrange(cyear, cmonth)[1]: 1227 repl['day'] = monthrange(cyear, cmonth)[1] 1228 1229 naive = default.replace(**repl) 1230 1231 if res.weekday is not None and not res.day: 1232 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 1233 1234 return naive 1235 1236 def _assign_tzname(self, dt, tzname): 1237 if dt.tzname() != tzname: 1238 new_dt = tz.enfold(dt, fold=1) 1239 if new_dt.tzname() == tzname: 1240 return new_dt 1241 1242 return dt 1243 1244 def _to_decimal(self, val): 1245 try: 1246 decimal_value = Decimal(val) 1247 # See GH 662, edge case, infinite value should not be converted via `_to_decimal` 1248 if not decimal_value.is_finite(): 1249 raise ValueError("Converted decimal value is infinite or NaN") 1250 except Exception as e: 1251 msg = "Could not convert %s to decimal" % val 1252 six.raise_from(ValueError(msg), e) 1253 else: 1254 return decimal_value 1255 1256 1257DEFAULTPARSER = parser() 1258 1259 1260def parse(timestr, parserinfo=None, **kwargs): 1261 """ 1262 1263 Parse a string in one of the supported formats, using the 1264 ``parserinfo`` parameters. 1265 1266 :param timestr: 1267 A string containing a date/time stamp. 1268 1269 :param parserinfo: 1270 A :class:`parserinfo` object containing parameters for the parser. 1271 If ``None``, the default arguments to the :class:`parserinfo` 1272 constructor are used. 1273 1274 The ``**kwargs`` parameter takes the following keyword arguments: 1275 1276 :param default: 1277 The default datetime object, if this is a datetime object and not 1278 ``None``, elements specified in ``timestr`` replace elements in the 1279 default object. 1280 1281 :param ignoretz: 1282 If set ``True``, time zones in parsed strings are ignored and a naive 1283 :class:`datetime` object is returned. 1284 1285 :param tzinfos: 1286 Additional time zone names / aliases which may be present in the 1287 string. This argument maps time zone names (and optionally offsets 1288 from those time zones) to time zones. This parameter can be a 1289 dictionary with timezone aliases mapping time zone names to time 1290 zones or a function taking two parameters (``tzname`` and 1291 ``tzoffset``) and returning a time zone. 1292 1293 The timezones to which the names are mapped can be an integer 1294 offset from UTC in seconds or a :class:`tzinfo` object. 1295 1296 .. doctest:: 1297 :options: +NORMALIZE_WHITESPACE 1298 1299 >>> from dateutil.parser import parse 1300 >>> from dateutil.tz import gettz 1301 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 1302 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 1303 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 1304 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 1305 datetime.datetime(2012, 1, 19, 17, 21, 1306 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 1307 1308 This parameter is ignored if ``ignoretz`` is set. 1309 1310 :param dayfirst: 1311 Whether to interpret the first value in an ambiguous 3-integer date 1312 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 1313 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 1314 YMD. If set to ``None``, this value is retrieved from the current 1315 :class:`parserinfo` object (which itself defaults to ``False``). 1316 1317 :param yearfirst: 1318 Whether to interpret the first value in an ambiguous 3-integer date 1319 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 1320 be the year, otherwise the last number is taken to be the year. If 1321 this is set to ``None``, the value is retrieved from the current 1322 :class:`parserinfo` object (which itself defaults to ``False``). 1323 1324 :param fuzzy: 1325 Whether to allow fuzzy parsing, allowing for string like "Today is 1326 January 1, 2047 at 8:21:00AM". 1327 1328 :param fuzzy_with_tokens: 1329 If ``True``, ``fuzzy`` is automatically set to True, and the parser 1330 will return a tuple where the first element is the parsed 1331 :class:`datetime.datetime` datetimestamp and the second element is 1332 a tuple containing the portions of the string which were ignored: 1333 1334 .. doctest:: 1335 1336 >>> from dateutil.parser import parse 1337 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 1338 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 1339 1340 :return: 1341 Returns a :class:`datetime.datetime` object or, if the 1342 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 1343 first element being a :class:`datetime.datetime` object, the second 1344 a tuple containing the fuzzy tokens. 1345 1346 :raises ValueError: 1347 Raised for invalid or unknown string format, if the provided 1348 :class:`tzinfo` is not in a valid format, or if an invalid date 1349 would be created. 1350 1351 :raises OverflowError: 1352 Raised if the parsed date exceeds the largest valid C integer on 1353 your system. 1354 """ 1355 if parserinfo: 1356 return parser(parserinfo).parse(timestr, **kwargs) 1357 else: 1358 return DEFAULTPARSER.parse(timestr, **kwargs) 1359 1360 1361class _tzparser(object): 1362 1363 class _result(_resultbase): 1364 1365 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 1366 "start", "end"] 1367 1368 class _attr(_resultbase): 1369 __slots__ = ["month", "week", "weekday", 1370 "yday", "jyday", "day", "time"] 1371 1372 def __repr__(self): 1373 return self._repr("") 1374 1375 def __init__(self): 1376 _resultbase.__init__(self) 1377 self.start = self._attr() 1378 self.end = self._attr() 1379 1380 def parse(self, tzstr): 1381 res = self._result() 1382 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 1383 used_idxs = list() 1384 try: 1385 1386 len_l = len(l) 1387 1388 i = 0 1389 while i < len_l: 1390 # BRST+3[BRDT[+2]] 1391 j = i 1392 while j < len_l and not [x for x in l[j] 1393 if x in "0123456789:,-+"]: 1394 j += 1 1395 if j != i: 1396 if not res.stdabbr: 1397 offattr = "stdoffset" 1398 res.stdabbr = "".join(l[i:j]) 1399 else: 1400 offattr = "dstoffset" 1401 res.dstabbr = "".join(l[i:j]) 1402 1403 for ii in range(j): 1404 used_idxs.append(ii) 1405 i = j 1406 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 1407 "0123456789")): 1408 if l[i] in ('+', '-'): 1409 # Yes, that's right. See the TZ variable 1410 # documentation. 1411 signal = (1, -1)[l[i] == '+'] 1412 used_idxs.append(i) 1413 i += 1 1414 else: 1415 signal = -1 1416 len_li = len(l[i]) 1417 if len_li == 4: 1418 # -0300 1419 setattr(res, offattr, (int(l[i][:2]) * 3600 + 1420 int(l[i][2:]) * 60) * signal) 1421 elif i + 1 < len_l and l[i + 1] == ':': 1422 # -03:00 1423 setattr(res, offattr, 1424 (int(l[i]) * 3600 + 1425 int(l[i + 2]) * 60) * signal) 1426 used_idxs.append(i) 1427 i += 2 1428 elif len_li <= 2: 1429 # -[0]3 1430 setattr(res, offattr, 1431 int(l[i][:2]) * 3600 * signal) 1432 else: 1433 return None 1434 used_idxs.append(i) 1435 i += 1 1436 if res.dstabbr: 1437 break 1438 else: 1439 break 1440 1441 1442 if i < len_l: 1443 for j in range(i, len_l): 1444 if l[j] == ';': 1445 l[j] = ',' 1446 1447 assert l[i] == ',' 1448 1449 i += 1 1450 1451 if i >= len_l: 1452 pass 1453 elif (8 <= l.count(',') <= 9 and 1454 not [y for x in l[i:] if x != ',' 1455 for y in x if y not in "0123456789+-"]): 1456 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 1457 for x in (res.start, res.end): 1458 x.month = int(l[i]) 1459 used_idxs.append(i) 1460 i += 2 1461 if l[i] == '-': 1462 value = int(l[i + 1]) * -1 1463 used_idxs.append(i) 1464 i += 1 1465 else: 1466 value = int(l[i]) 1467 used_idxs.append(i) 1468 i += 2 1469 if value: 1470 x.week = value 1471 x.weekday = (int(l[i]) - 1) % 7 1472 else: 1473 x.day = int(l[i]) 1474 used_idxs.append(i) 1475 i += 2 1476 x.time = int(l[i]) 1477 used_idxs.append(i) 1478 i += 2 1479 if i < len_l: 1480 if l[i] in ('-', '+'): 1481 signal = (-1, 1)[l[i] == "+"] 1482 used_idxs.append(i) 1483 i += 1 1484 else: 1485 signal = 1 1486 used_idxs.append(i) 1487 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 1488 1489 # This was a made-up format that is not in normal use 1490 warn(('Parsed time zone "%s"' % tzstr) + 1491 'is in a non-standard dateutil-specific format, which ' + 1492 'is now deprecated; support for parsing this format ' + 1493 'will be removed in future versions. It is recommended ' + 1494 'that you switch to a standard format like the GNU ' + 1495 'TZ variable format.', tz.DeprecatedTzFormatWarning) 1496 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 1497 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 1498 '.', '-', ':') 1499 for y in x if y not in "0123456789"]): 1500 for x in (res.start, res.end): 1501 if l[i] == 'J': 1502 # non-leap year day (1 based) 1503 used_idxs.append(i) 1504 i += 1 1505 x.jyday = int(l[i]) 1506 elif l[i] == 'M': 1507 # month[-.]week[-.]weekday 1508 used_idxs.append(i) 1509 i += 1 1510 x.month = int(l[i]) 1511 used_idxs.append(i) 1512 i += 1 1513 assert l[i] in ('-', '.') 1514 used_idxs.append(i) 1515 i += 1 1516 x.week = int(l[i]) 1517 if x.week == 5: 1518 x.week = -1 1519 used_idxs.append(i) 1520 i += 1 1521 assert l[i] in ('-', '.') 1522 used_idxs.append(i) 1523 i += 1 1524 x.weekday = (int(l[i]) - 1) % 7 1525 else: 1526 # year day (zero based) 1527 x.yday = int(l[i]) + 1 1528 1529 used_idxs.append(i) 1530 i += 1 1531 1532 if i < len_l and l[i] == '/': 1533 used_idxs.append(i) 1534 i += 1 1535 # start time 1536 len_li = len(l[i]) 1537 if len_li == 4: 1538 # -0300 1539 x.time = (int(l[i][:2]) * 3600 + 1540 int(l[i][2:]) * 60) 1541 elif i + 1 < len_l and l[i + 1] == ':': 1542 # -03:00 1543 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 1544 used_idxs.append(i) 1545 i += 2 1546 if i + 1 < len_l and l[i + 1] == ':': 1547 used_idxs.append(i) 1548 i += 2 1549 x.time += int(l[i]) 1550 elif len_li <= 2: 1551 # -[0]3 1552 x.time = (int(l[i][:2]) * 3600) 1553 else: 1554 return None 1555 used_idxs.append(i) 1556 i += 1 1557 1558 assert i == len_l or l[i] == ',' 1559 1560 i += 1 1561 1562 assert i >= len_l 1563 1564 except (IndexError, ValueError, AssertionError): 1565 return None 1566 1567 unused_idxs = set(range(len_l)).difference(used_idxs) 1568 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 1569 return res 1570 1571 1572DEFAULTTZPARSER = _tzparser() 1573 1574 1575def _parsetz(tzstr): 1576 return DEFAULTTZPARSER.parse(tzstr) 1577 1578class UnknownTimezoneWarning(RuntimeWarning): 1579 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" 1580# vim:ts=4:sw=4:et 1581