1"""Representing and manipulating email headers via custom objects. 2 3This module provides an implementation of the HeaderRegistry API. 4The implementation is designed to flexibly follow RFC5322 rules. 5 6Eventually HeaderRegistry will be a public API, but it isn't yet, 7and will probably change some before that happens. 8 9""" 10from types import MappingProxyType 11 12from email import utils 13from email import errors 14from email import _header_value_parser as parser 15 16class Address: 17 18 def __init__(self, display_name='', username='', domain='', addr_spec=None): 19 """Create an object representing a full email address. 20 21 An address can have a 'display_name', a 'username', and a 'domain'. In 22 addition to specifying the username and domain separately, they may be 23 specified together by using the addr_spec keyword *instead of* the 24 username and domain keywords. If an addr_spec string is specified it 25 must be properly quoted according to RFC 5322 rules; an error will be 26 raised if it is not. 27 28 An Address object has display_name, username, domain, and addr_spec 29 attributes, all of which are read-only. The addr_spec and the string 30 value of the object are both quoted according to RFC5322 rules, but 31 without any Content Transfer Encoding. 32 33 """ 34 # This clause with its potential 'raise' may only happen when an 35 # application program creates an Address object using an addr_spec 36 # keyword. The email library code itself must always supply username 37 # and domain. 38 if addr_spec is not None: 39 if username or domain: 40 raise TypeError("addrspec specified when username and/or " 41 "domain also specified") 42 a_s, rest = parser.get_addr_spec(addr_spec) 43 if rest: 44 raise ValueError("Invalid addr_spec; only '{}' " 45 "could be parsed from '{}'".format( 46 a_s, addr_spec)) 47 if a_s.all_defects: 48 raise a_s.all_defects[0] 49 username = a_s.local_part 50 domain = a_s.domain 51 self._display_name = display_name 52 self._username = username 53 self._domain = domain 54 55 @property 56 def display_name(self): 57 return self._display_name 58 59 @property 60 def username(self): 61 return self._username 62 63 @property 64 def domain(self): 65 return self._domain 66 67 @property 68 def addr_spec(self): 69 """The addr_spec (username@domain) portion of the address, quoted 70 according to RFC 5322 rules, but with no Content Transfer Encoding. 71 """ 72 nameset = set(self.username) 73 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): 74 lp = parser.quote_string(self.username) 75 else: 76 lp = self.username 77 if self.domain: 78 return lp + '@' + self.domain 79 if not lp: 80 return '<>' 81 return lp 82 83 def __repr__(self): 84 return "{}(display_name={!r}, username={!r}, domain={!r})".format( 85 self.__class__.__name__, 86 self.display_name, self.username, self.domain) 87 88 def __str__(self): 89 nameset = set(self.display_name) 90 if len(nameset) > len(nameset-parser.SPECIALS): 91 disp = parser.quote_string(self.display_name) 92 else: 93 disp = self.display_name 94 if disp: 95 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec 96 return "{} <{}>".format(disp, addr_spec) 97 return self.addr_spec 98 99 def __eq__(self, other): 100 if type(other) != type(self): 101 return False 102 return (self.display_name == other.display_name and 103 self.username == other.username and 104 self.domain == other.domain) 105 106 107class Group: 108 109 def __init__(self, display_name=None, addresses=None): 110 """Create an object representing an address group. 111 112 An address group consists of a display_name followed by colon and a 113 list of addresses (see Address) terminated by a semi-colon. The Group 114 is created by specifying a display_name and a possibly empty list of 115 Address objects. A Group can also be used to represent a single 116 address that is not in a group, which is convenient when manipulating 117 lists that are a combination of Groups and individual Addresses. In 118 this case the display_name should be set to None. In particular, the 119 string representation of a Group whose display_name is None is the same 120 as the Address object, if there is one and only one Address object in 121 the addresses list. 122 123 """ 124 self._display_name = display_name 125 self._addresses = tuple(addresses) if addresses else tuple() 126 127 @property 128 def display_name(self): 129 return self._display_name 130 131 @property 132 def addresses(self): 133 return self._addresses 134 135 def __repr__(self): 136 return "{}(display_name={!r}, addresses={!r}".format( 137 self.__class__.__name__, 138 self.display_name, self.addresses) 139 140 def __str__(self): 141 if self.display_name is None and len(self.addresses)==1: 142 return str(self.addresses[0]) 143 disp = self.display_name 144 if disp is not None: 145 nameset = set(disp) 146 if len(nameset) > len(nameset-parser.SPECIALS): 147 disp = parser.quote_string(disp) 148 adrstr = ", ".join(str(x) for x in self.addresses) 149 adrstr = ' ' + adrstr if adrstr else adrstr 150 return "{}:{};".format(disp, adrstr) 151 152 def __eq__(self, other): 153 if type(other) != type(self): 154 return False 155 return (self.display_name == other.display_name and 156 self.addresses == other.addresses) 157 158 159# Header Classes # 160 161class BaseHeader(str): 162 163 """Base class for message headers. 164 165 Implements generic behavior and provides tools for subclasses. 166 167 A subclass must define a classmethod named 'parse' that takes an unfolded 168 value string and a dictionary as its arguments. The dictionary will 169 contain one key, 'defects', initialized to an empty list. After the call 170 the dictionary must contain two additional keys: parse_tree, set to the 171 parse tree obtained from parsing the header, and 'decoded', set to the 172 string value of the idealized representation of the data from the value. 173 (That is, encoded words are decoded, and values that have canonical 174 representations are so represented.) 175 176 The defects key is intended to collect parsing defects, which the message 177 parser will subsequently dispose of as appropriate. The parser should not, 178 insofar as practical, raise any errors. Defects should be added to the 179 list instead. The standard header parsers register defects for RFC 180 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing 181 errors. 182 183 The parse method may add additional keys to the dictionary. In this case 184 the subclass must define an 'init' method, which will be passed the 185 dictionary as its keyword arguments. The method should use (usually by 186 setting them as the value of similarly named attributes) and remove all the 187 extra keys added by its parse method, and then use super to call its parent 188 class with the remaining arguments and keywords. 189 190 The subclass should also make sure that a 'max_count' attribute is defined 191 that is either None or 1. XXX: need to better define this API. 192 193 """ 194 195 def __new__(cls, name, value): 196 kwds = {'defects': []} 197 cls.parse(value, kwds) 198 if utils._has_surrogates(kwds['decoded']): 199 kwds['decoded'] = utils._sanitize(kwds['decoded']) 200 self = str.__new__(cls, kwds['decoded']) 201 del kwds['decoded'] 202 self.init(name, **kwds) 203 return self 204 205 def init(self, name, *, parse_tree, defects): 206 self._name = name 207 self._parse_tree = parse_tree 208 self._defects = defects 209 210 @property 211 def name(self): 212 return self._name 213 214 @property 215 def defects(self): 216 return tuple(self._defects) 217 218 def __reduce__(self): 219 return ( 220 _reconstruct_header, 221 ( 222 self.__class__.__name__, 223 self.__class__.__bases__, 224 str(self), 225 ), 226 self.__dict__) 227 228 @classmethod 229 def _reconstruct(cls, value): 230 return str.__new__(cls, value) 231 232 def fold(self, *, policy): 233 """Fold header according to policy. 234 235 The parsed representation of the header is folded according to 236 RFC5322 rules, as modified by the policy. If the parse tree 237 contains surrogateescaped bytes, the bytes are CTE encoded using 238 the charset 'unknown-8bit". 239 240 Any non-ASCII characters in the parse tree are CTE encoded using 241 charset utf-8. XXX: make this a policy setting. 242 243 The returned value is an ASCII-only string possibly containing linesep 244 characters, and ending with a linesep character. The string includes 245 the header name and the ': ' separator. 246 247 """ 248 # At some point we need to put fws here iif it was in the source. 249 header = parser.Header([ 250 parser.HeaderLabel([ 251 parser.ValueTerminal(self.name, 'header-name'), 252 parser.ValueTerminal(':', 'header-sep')]), 253 ]) 254 if self._parse_tree: 255 header.append( 256 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) 257 header.append(self._parse_tree) 258 return header.fold(policy=policy) 259 260 261def _reconstruct_header(cls_name, bases, value): 262 return type(cls_name, bases, {})._reconstruct(value) 263 264 265class UnstructuredHeader: 266 267 max_count = None 268 value_parser = staticmethod(parser.get_unstructured) 269 270 @classmethod 271 def parse(cls, value, kwds): 272 kwds['parse_tree'] = cls.value_parser(value) 273 kwds['decoded'] = str(kwds['parse_tree']) 274 275 276class UniqueUnstructuredHeader(UnstructuredHeader): 277 278 max_count = 1 279 280 281class DateHeader: 282 283 """Header whose value consists of a single timestamp. 284 285 Provides an additional attribute, datetime, which is either an aware 286 datetime using a timezone, or a naive datetime if the timezone 287 in the input string is -0000. Also accepts a datetime as input. 288 The 'value' attribute is the normalized form of the timestamp, 289 which means it is the output of format_datetime on the datetime. 290 """ 291 292 max_count = None 293 294 # This is used only for folding, not for creating 'decoded'. 295 value_parser = staticmethod(parser.get_unstructured) 296 297 @classmethod 298 def parse(cls, value, kwds): 299 if not value: 300 kwds['defects'].append(errors.HeaderMissingRequiredValue()) 301 kwds['datetime'] = None 302 kwds['decoded'] = '' 303 kwds['parse_tree'] = parser.TokenList() 304 return 305 if isinstance(value, str): 306 value = utils.parsedate_to_datetime(value) 307 kwds['datetime'] = value 308 kwds['decoded'] = utils.format_datetime(kwds['datetime']) 309 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 310 311 def init(self, *args, **kw): 312 self._datetime = kw.pop('datetime') 313 super().init(*args, **kw) 314 315 @property 316 def datetime(self): 317 return self._datetime 318 319 320class UniqueDateHeader(DateHeader): 321 322 max_count = 1 323 324 325class AddressHeader: 326 327 max_count = None 328 329 @staticmethod 330 def value_parser(value): 331 address_list, value = parser.get_address_list(value) 332 assert not value, 'this should not happen' 333 return address_list 334 335 @classmethod 336 def parse(cls, value, kwds): 337 if isinstance(value, str): 338 # We are translating here from the RFC language (address/mailbox) 339 # to our API language (group/address). 340 kwds['parse_tree'] = address_list = cls.value_parser(value) 341 groups = [] 342 for addr in address_list.addresses: 343 groups.append(Group(addr.display_name, 344 [Address(mb.display_name or '', 345 mb.local_part or '', 346 mb.domain or '') 347 for mb in addr.all_mailboxes])) 348 defects = list(address_list.all_defects) 349 else: 350 # Assume it is Address/Group stuff 351 if not hasattr(value, '__iter__'): 352 value = [value] 353 groups = [Group(None, [item]) if not hasattr(item, 'addresses') 354 else item 355 for item in value] 356 defects = [] 357 kwds['groups'] = groups 358 kwds['defects'] = defects 359 kwds['decoded'] = ', '.join([str(item) for item in groups]) 360 if 'parse_tree' not in kwds: 361 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) 362 363 def init(self, *args, **kw): 364 self._groups = tuple(kw.pop('groups')) 365 self._addresses = None 366 super().init(*args, **kw) 367 368 @property 369 def groups(self): 370 return self._groups 371 372 @property 373 def addresses(self): 374 if self._addresses is None: 375 self._addresses = tuple(address for group in self._groups 376 for address in group.addresses) 377 return self._addresses 378 379 380class UniqueAddressHeader(AddressHeader): 381 382 max_count = 1 383 384 385class SingleAddressHeader(AddressHeader): 386 387 @property 388 def address(self): 389 if len(self.addresses)!=1: 390 raise ValueError(("value of single address header {} is not " 391 "a single address").format(self.name)) 392 return self.addresses[0] 393 394 395class UniqueSingleAddressHeader(SingleAddressHeader): 396 397 max_count = 1 398 399 400class MIMEVersionHeader: 401 402 max_count = 1 403 404 value_parser = staticmethod(parser.parse_mime_version) 405 406 @classmethod 407 def parse(cls, value, kwds): 408 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 409 kwds['decoded'] = str(parse_tree) 410 kwds['defects'].extend(parse_tree.all_defects) 411 kwds['major'] = None if parse_tree.minor is None else parse_tree.major 412 kwds['minor'] = parse_tree.minor 413 if parse_tree.minor is not None: 414 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) 415 else: 416 kwds['version'] = None 417 418 def init(self, *args, **kw): 419 self._version = kw.pop('version') 420 self._major = kw.pop('major') 421 self._minor = kw.pop('minor') 422 super().init(*args, **kw) 423 424 @property 425 def major(self): 426 return self._major 427 428 @property 429 def minor(self): 430 return self._minor 431 432 @property 433 def version(self): 434 return self._version 435 436 437class ParameterizedMIMEHeader: 438 439 # Mixin that handles the params dict. Must be subclassed and 440 # a property value_parser for the specific header provided. 441 442 max_count = 1 443 444 @classmethod 445 def parse(cls, value, kwds): 446 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 447 kwds['decoded'] = str(parse_tree) 448 kwds['defects'].extend(parse_tree.all_defects) 449 if parse_tree.params is None: 450 kwds['params'] = {} 451 else: 452 # The MIME RFCs specify that parameter ordering is arbitrary. 453 kwds['params'] = {utils._sanitize(name).lower(): 454 utils._sanitize(value) 455 for name, value in parse_tree.params} 456 457 def init(self, *args, **kw): 458 self._params = kw.pop('params') 459 super().init(*args, **kw) 460 461 @property 462 def params(self): 463 return MappingProxyType(self._params) 464 465 466class ContentTypeHeader(ParameterizedMIMEHeader): 467 468 value_parser = staticmethod(parser.parse_content_type_header) 469 470 def init(self, *args, **kw): 471 super().init(*args, **kw) 472 self._maintype = utils._sanitize(self._parse_tree.maintype) 473 self._subtype = utils._sanitize(self._parse_tree.subtype) 474 475 @property 476 def maintype(self): 477 return self._maintype 478 479 @property 480 def subtype(self): 481 return self._subtype 482 483 @property 484 def content_type(self): 485 return self.maintype + '/' + self.subtype 486 487 488class ContentDispositionHeader(ParameterizedMIMEHeader): 489 490 value_parser = staticmethod(parser.parse_content_disposition_header) 491 492 def init(self, *args, **kw): 493 super().init(*args, **kw) 494 cd = self._parse_tree.content_disposition 495 self._content_disposition = cd if cd is None else utils._sanitize(cd) 496 497 @property 498 def content_disposition(self): 499 return self._content_disposition 500 501 502class ContentTransferEncodingHeader: 503 504 max_count = 1 505 506 value_parser = staticmethod(parser.parse_content_transfer_encoding_header) 507 508 @classmethod 509 def parse(cls, value, kwds): 510 kwds['parse_tree'] = parse_tree = cls.value_parser(value) 511 kwds['decoded'] = str(parse_tree) 512 kwds['defects'].extend(parse_tree.all_defects) 513 514 def init(self, *args, **kw): 515 super().init(*args, **kw) 516 self._cte = utils._sanitize(self._parse_tree.cte) 517 518 @property 519 def cte(self): 520 return self._cte 521 522 523# The header factory # 524 525_default_header_map = { 526 'subject': UniqueUnstructuredHeader, 527 'date': UniqueDateHeader, 528 'resent-date': DateHeader, 529 'orig-date': UniqueDateHeader, 530 'sender': UniqueSingleAddressHeader, 531 'resent-sender': SingleAddressHeader, 532 'to': UniqueAddressHeader, 533 'resent-to': AddressHeader, 534 'cc': UniqueAddressHeader, 535 'resent-cc': AddressHeader, 536 'bcc': UniqueAddressHeader, 537 'resent-bcc': AddressHeader, 538 'from': UniqueAddressHeader, 539 'resent-from': AddressHeader, 540 'reply-to': UniqueAddressHeader, 541 'mime-version': MIMEVersionHeader, 542 'content-type': ContentTypeHeader, 543 'content-disposition': ContentDispositionHeader, 544 'content-transfer-encoding': ContentTransferEncodingHeader, 545 } 546 547class HeaderRegistry: 548 549 """A header_factory and header registry.""" 550 551 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, 552 use_default_map=True): 553 """Create a header_factory that works with the Policy API. 554 555 base_class is the class that will be the last class in the created 556 header class's __bases__ list. default_class is the class that will be 557 used if "name" (see __call__) does not appear in the registry. 558 use_default_map controls whether or not the default mapping of names to 559 specialized classes is copied in to the registry when the factory is 560 created. The default is True. 561 562 """ 563 self.registry = {} 564 self.base_class = base_class 565 self.default_class = default_class 566 if use_default_map: 567 self.registry.update(_default_header_map) 568 569 def map_to_type(self, name, cls): 570 """Register cls as the specialized class for handling "name" headers. 571 572 """ 573 self.registry[name.lower()] = cls 574 575 def __getitem__(self, name): 576 cls = self.registry.get(name.lower(), self.default_class) 577 return type('_'+cls.__name__, (cls, self.base_class), {}) 578 579 def __call__(self, name, value): 580 """Create a header instance for header 'name' from 'value'. 581 582 Creates a header instance by creating a specialized class for parsing 583 and representing the specified header by combining the factory 584 base_class with a specialized class from the registry or the 585 default_class, and passing the name and value to the constructed 586 class's constructor. 587 588 """ 589 return self[name](name, value) 590