1"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10from types import MappingProxyType
11
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18    def __init__(self, display_name='', username='', domain='', addr_spec=None):
19        """Create an object representing a full email address.
20
21        An address can have a 'display_name', a 'username', and a 'domain'.  In
22        addition to specifying the username and domain separately, they may be
23        specified together by using the addr_spec keyword *instead of* the
24        username and domain keywords.  If an addr_spec string is specified it
25        must be properly quoted according to RFC 5322 rules; an error will be
26        raised if it is not.
27
28        An Address object has display_name, username, domain, and addr_spec
29        attributes, all of which are read-only.  The addr_spec and the string
30        value of the object are both quoted according to RFC5322 rules, but
31        without any Content Transfer Encoding.
32
33        """
34        # This clause with its potential 'raise' may only happen when an
35        # application program creates an Address object using an addr_spec
36        # keyword.  The email library code itself must always supply username
37        # and domain.
38        if addr_spec is not None:
39            if username or domain:
40                raise TypeError("addrspec specified when username and/or "
41                                "domain also specified")
42            a_s, rest = parser.get_addr_spec(addr_spec)
43            if rest:
44                raise ValueError("Invalid addr_spec; only '{}' "
45                                 "could be parsed from '{}'".format(
46                                    a_s, addr_spec))
47            if a_s.all_defects:
48                raise a_s.all_defects[0]
49            username = a_s.local_part
50            domain = a_s.domain
51        self._display_name = display_name
52        self._username = username
53        self._domain = domain
54
55    @property
56    def display_name(self):
57        return self._display_name
58
59    @property
60    def username(self):
61        return self._username
62
63    @property
64    def domain(self):
65        return self._domain
66
67    @property
68    def addr_spec(self):
69        """The addr_spec (username@domain) portion of the address, quoted
70        according to RFC 5322 rules, but with no Content Transfer Encoding.
71        """
72        nameset = set(self.username)
73        if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
74            lp = parser.quote_string(self.username)
75        else:
76            lp = self.username
77        if self.domain:
78            return lp + '@' + self.domain
79        if not lp:
80            return '<>'
81        return lp
82
83    def __repr__(self):
84        return "{}(display_name={!r}, username={!r}, domain={!r})".format(
85                        self.__class__.__name__,
86                        self.display_name, self.username, self.domain)
87
88    def __str__(self):
89        nameset = set(self.display_name)
90        if len(nameset) > len(nameset-parser.SPECIALS):
91            disp = parser.quote_string(self.display_name)
92        else:
93            disp = self.display_name
94        if disp:
95            addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
96            return "{} <{}>".format(disp, addr_spec)
97        return self.addr_spec
98
99    def __eq__(self, other):
100        if type(other) != type(self):
101            return False
102        return (self.display_name == other.display_name and
103                self.username == other.username and
104                self.domain == other.domain)
105
106
107class Group:
108
109    def __init__(self, display_name=None, addresses=None):
110        """Create an object representing an address group.
111
112        An address group consists of a display_name followed by colon and a
113        list of addresses (see Address) terminated by a semi-colon.  The Group
114        is created by specifying a display_name and a possibly empty list of
115        Address objects.  A Group can also be used to represent a single
116        address that is not in a group, which is convenient when manipulating
117        lists that are a combination of Groups and individual Addresses.  In
118        this case the display_name should be set to None.  In particular, the
119        string representation of a Group whose display_name is None is the same
120        as the Address object, if there is one and only one Address object in
121        the addresses list.
122
123        """
124        self._display_name = display_name
125        self._addresses = tuple(addresses) if addresses else tuple()
126
127    @property
128    def display_name(self):
129        return self._display_name
130
131    @property
132    def addresses(self):
133        return self._addresses
134
135    def __repr__(self):
136        return "{}(display_name={!r}, addresses={!r}".format(
137                 self.__class__.__name__,
138                 self.display_name, self.addresses)
139
140    def __str__(self):
141        if self.display_name is None and len(self.addresses)==1:
142            return str(self.addresses[0])
143        disp = self.display_name
144        if disp is not None:
145            nameset = set(disp)
146            if len(nameset) > len(nameset-parser.SPECIALS):
147                disp = parser.quote_string(disp)
148        adrstr = ", ".join(str(x) for x in self.addresses)
149        adrstr = ' ' + adrstr if adrstr else adrstr
150        return "{}:{};".format(disp, adrstr)
151
152    def __eq__(self, other):
153        if type(other) != type(self):
154            return False
155        return (self.display_name == other.display_name and
156                self.addresses == other.addresses)
157
158
159# Header Classes #
160
161class BaseHeader(str):
162
163    """Base class for message headers.
164
165    Implements generic behavior and provides tools for subclasses.
166
167    A subclass must define a classmethod named 'parse' that takes an unfolded
168    value string and a dictionary as its arguments.  The dictionary will
169    contain one key, 'defects', initialized to an empty list.  After the call
170    the dictionary must contain two additional keys: parse_tree, set to the
171    parse tree obtained from parsing the header, and 'decoded', set to the
172    string value of the idealized representation of the data from the value.
173    (That is, encoded words are decoded, and values that have canonical
174    representations are so represented.)
175
176    The defects key is intended to collect parsing defects, which the message
177    parser will subsequently dispose of as appropriate.  The parser should not,
178    insofar as practical, raise any errors.  Defects should be added to the
179    list instead.  The standard header parsers register defects for RFC
180    compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
181    errors.
182
183    The parse method may add additional keys to the dictionary.  In this case
184    the subclass must define an 'init' method, which will be passed the
185    dictionary as its keyword arguments.  The method should use (usually by
186    setting them as the value of similarly named attributes) and remove all the
187    extra keys added by its parse method, and then use super to call its parent
188    class with the remaining arguments and keywords.
189
190    The subclass should also make sure that a 'max_count' attribute is defined
191    that is either None or 1. XXX: need to better define this API.
192
193    """
194
195    def __new__(cls, name, value):
196        kwds = {'defects': []}
197        cls.parse(value, kwds)
198        if utils._has_surrogates(kwds['decoded']):
199            kwds['decoded'] = utils._sanitize(kwds['decoded'])
200        self = str.__new__(cls, kwds['decoded'])
201        del kwds['decoded']
202        self.init(name, **kwds)
203        return self
204
205    def init(self, name, *, parse_tree, defects):
206        self._name = name
207        self._parse_tree = parse_tree
208        self._defects = defects
209
210    @property
211    def name(self):
212        return self._name
213
214    @property
215    def defects(self):
216        return tuple(self._defects)
217
218    def __reduce__(self):
219        return (
220            _reconstruct_header,
221            (
222                self.__class__.__name__,
223                self.__class__.__bases__,
224                str(self),
225            ),
226            self.__dict__)
227
228    @classmethod
229    def _reconstruct(cls, value):
230        return str.__new__(cls, value)
231
232    def fold(self, *, policy):
233        """Fold header according to policy.
234
235        The parsed representation of the header is folded according to
236        RFC5322 rules, as modified by the policy.  If the parse tree
237        contains surrogateescaped bytes, the bytes are CTE encoded using
238        the charset 'unknown-8bit".
239
240        Any non-ASCII characters in the parse tree are CTE encoded using
241        charset utf-8. XXX: make this a policy setting.
242
243        The returned value is an ASCII-only string possibly containing linesep
244        characters, and ending with a linesep character.  The string includes
245        the header name and the ': ' separator.
246
247        """
248        # At some point we need to put fws here iif it was in the source.
249        header = parser.Header([
250            parser.HeaderLabel([
251                parser.ValueTerminal(self.name, 'header-name'),
252                parser.ValueTerminal(':', 'header-sep')]),
253            ])
254        if self._parse_tree:
255            header.append(
256                parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
257        header.append(self._parse_tree)
258        return header.fold(policy=policy)
259
260
261def _reconstruct_header(cls_name, bases, value):
262    return type(cls_name, bases, {})._reconstruct(value)
263
264
265class UnstructuredHeader:
266
267    max_count = None
268    value_parser = staticmethod(parser.get_unstructured)
269
270    @classmethod
271    def parse(cls, value, kwds):
272        kwds['parse_tree'] = cls.value_parser(value)
273        kwds['decoded'] = str(kwds['parse_tree'])
274
275
276class UniqueUnstructuredHeader(UnstructuredHeader):
277
278    max_count = 1
279
280
281class DateHeader:
282
283    """Header whose value consists of a single timestamp.
284
285    Provides an additional attribute, datetime, which is either an aware
286    datetime using a timezone, or a naive datetime if the timezone
287    in the input string is -0000.  Also accepts a datetime as input.
288    The 'value' attribute is the normalized form of the timestamp,
289    which means it is the output of format_datetime on the datetime.
290    """
291
292    max_count = None
293
294    # This is used only for folding, not for creating 'decoded'.
295    value_parser = staticmethod(parser.get_unstructured)
296
297    @classmethod
298    def parse(cls, value, kwds):
299        if not value:
300            kwds['defects'].append(errors.HeaderMissingRequiredValue())
301            kwds['datetime'] = None
302            kwds['decoded'] = ''
303            kwds['parse_tree'] = parser.TokenList()
304            return
305        if isinstance(value, str):
306            value = utils.parsedate_to_datetime(value)
307        kwds['datetime'] = value
308        kwds['decoded'] = utils.format_datetime(kwds['datetime'])
309        kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
310
311    def init(self, *args, **kw):
312        self._datetime = kw.pop('datetime')
313        super().init(*args, **kw)
314
315    @property
316    def datetime(self):
317        return self._datetime
318
319
320class UniqueDateHeader(DateHeader):
321
322    max_count = 1
323
324
325class AddressHeader:
326
327    max_count = None
328
329    @staticmethod
330    def value_parser(value):
331        address_list, value = parser.get_address_list(value)
332        assert not value, 'this should not happen'
333        return address_list
334
335    @classmethod
336    def parse(cls, value, kwds):
337        if isinstance(value, str):
338            # We are translating here from the RFC language (address/mailbox)
339            # to our API language (group/address).
340            kwds['parse_tree'] = address_list = cls.value_parser(value)
341            groups = []
342            for addr in address_list.addresses:
343                groups.append(Group(addr.display_name,
344                                    [Address(mb.display_name or '',
345                                             mb.local_part or '',
346                                             mb.domain or '')
347                                     for mb in addr.all_mailboxes]))
348            defects = list(address_list.all_defects)
349        else:
350            # Assume it is Address/Group stuff
351            if not hasattr(value, '__iter__'):
352                value = [value]
353            groups = [Group(None, [item]) if not hasattr(item, 'addresses')
354                                          else item
355                                    for item in value]
356            defects = []
357        kwds['groups'] = groups
358        kwds['defects'] = defects
359        kwds['decoded'] = ', '.join([str(item) for item in groups])
360        if 'parse_tree' not in kwds:
361            kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
362
363    def init(self, *args, **kw):
364        self._groups = tuple(kw.pop('groups'))
365        self._addresses = None
366        super().init(*args, **kw)
367
368    @property
369    def groups(self):
370        return self._groups
371
372    @property
373    def addresses(self):
374        if self._addresses is None:
375            self._addresses = tuple(address for group in self._groups
376                                            for address in group.addresses)
377        return self._addresses
378
379
380class UniqueAddressHeader(AddressHeader):
381
382    max_count = 1
383
384
385class SingleAddressHeader(AddressHeader):
386
387    @property
388    def address(self):
389        if len(self.addresses)!=1:
390            raise ValueError(("value of single address header {} is not "
391                "a single address").format(self.name))
392        return self.addresses[0]
393
394
395class UniqueSingleAddressHeader(SingleAddressHeader):
396
397    max_count = 1
398
399
400class MIMEVersionHeader:
401
402    max_count = 1
403
404    value_parser = staticmethod(parser.parse_mime_version)
405
406    @classmethod
407    def parse(cls, value, kwds):
408        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
409        kwds['decoded'] = str(parse_tree)
410        kwds['defects'].extend(parse_tree.all_defects)
411        kwds['major'] = None if parse_tree.minor is None else parse_tree.major
412        kwds['minor'] = parse_tree.minor
413        if parse_tree.minor is not None:
414            kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
415        else:
416            kwds['version'] = None
417
418    def init(self, *args, **kw):
419        self._version = kw.pop('version')
420        self._major = kw.pop('major')
421        self._minor = kw.pop('minor')
422        super().init(*args, **kw)
423
424    @property
425    def major(self):
426        return self._major
427
428    @property
429    def minor(self):
430        return self._minor
431
432    @property
433    def version(self):
434        return self._version
435
436
437class ParameterizedMIMEHeader:
438
439    # Mixin that handles the params dict.  Must be subclassed and
440    # a property value_parser for the specific header provided.
441
442    max_count = 1
443
444    @classmethod
445    def parse(cls, value, kwds):
446        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
447        kwds['decoded'] = str(parse_tree)
448        kwds['defects'].extend(parse_tree.all_defects)
449        if parse_tree.params is None:
450            kwds['params'] = {}
451        else:
452            # The MIME RFCs specify that parameter ordering is arbitrary.
453            kwds['params'] = {utils._sanitize(name).lower():
454                                    utils._sanitize(value)
455                               for name, value in parse_tree.params}
456
457    def init(self, *args, **kw):
458        self._params = kw.pop('params')
459        super().init(*args, **kw)
460
461    @property
462    def params(self):
463        return MappingProxyType(self._params)
464
465
466class ContentTypeHeader(ParameterizedMIMEHeader):
467
468    value_parser = staticmethod(parser.parse_content_type_header)
469
470    def init(self, *args, **kw):
471        super().init(*args, **kw)
472        self._maintype = utils._sanitize(self._parse_tree.maintype)
473        self._subtype = utils._sanitize(self._parse_tree.subtype)
474
475    @property
476    def maintype(self):
477        return self._maintype
478
479    @property
480    def subtype(self):
481        return self._subtype
482
483    @property
484    def content_type(self):
485        return self.maintype + '/' + self.subtype
486
487
488class ContentDispositionHeader(ParameterizedMIMEHeader):
489
490    value_parser = staticmethod(parser.parse_content_disposition_header)
491
492    def init(self, *args, **kw):
493        super().init(*args, **kw)
494        cd = self._parse_tree.content_disposition
495        self._content_disposition = cd if cd is None else utils._sanitize(cd)
496
497    @property
498    def content_disposition(self):
499        return self._content_disposition
500
501
502class ContentTransferEncodingHeader:
503
504    max_count = 1
505
506    value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
507
508    @classmethod
509    def parse(cls, value, kwds):
510        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
511        kwds['decoded'] = str(parse_tree)
512        kwds['defects'].extend(parse_tree.all_defects)
513
514    def init(self, *args, **kw):
515        super().init(*args, **kw)
516        self._cte = utils._sanitize(self._parse_tree.cte)
517
518    @property
519    def cte(self):
520        return self._cte
521
522
523# The header factory #
524
525_default_header_map = {
526    'subject':                      UniqueUnstructuredHeader,
527    'date':                         UniqueDateHeader,
528    'resent-date':                  DateHeader,
529    'orig-date':                    UniqueDateHeader,
530    'sender':                       UniqueSingleAddressHeader,
531    'resent-sender':                SingleAddressHeader,
532    'to':                           UniqueAddressHeader,
533    'resent-to':                    AddressHeader,
534    'cc':                           UniqueAddressHeader,
535    'resent-cc':                    AddressHeader,
536    'bcc':                          UniqueAddressHeader,
537    'resent-bcc':                   AddressHeader,
538    'from':                         UniqueAddressHeader,
539    'resent-from':                  AddressHeader,
540    'reply-to':                     UniqueAddressHeader,
541    'mime-version':                 MIMEVersionHeader,
542    'content-type':                 ContentTypeHeader,
543    'content-disposition':          ContentDispositionHeader,
544    'content-transfer-encoding':    ContentTransferEncodingHeader,
545    }
546
547class HeaderRegistry:
548
549    """A header_factory and header registry."""
550
551    def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
552                       use_default_map=True):
553        """Create a header_factory that works with the Policy API.
554
555        base_class is the class that will be the last class in the created
556        header class's __bases__ list.  default_class is the class that will be
557        used if "name" (see __call__) does not appear in the registry.
558        use_default_map controls whether or not the default mapping of names to
559        specialized classes is copied in to the registry when the factory is
560        created.  The default is True.
561
562        """
563        self.registry = {}
564        self.base_class = base_class
565        self.default_class = default_class
566        if use_default_map:
567            self.registry.update(_default_header_map)
568
569    def map_to_type(self, name, cls):
570        """Register cls as the specialized class for handling "name" headers.
571
572        """
573        self.registry[name.lower()] = cls
574
575    def __getitem__(self, name):
576        cls = self.registry.get(name.lower(), self.default_class)
577        return type('_'+cls.__name__, (cls, self.base_class), {})
578
579    def __call__(self, name, value):
580        """Create a header instance for header 'name' from 'value'.
581
582        Creates a header instance by creating a specialized class for parsing
583        and representing the specified header by combining the factory
584        base_class with a specialized class from the registry or the
585        default_class, and passing the name and value to the constructed
586        class's constructor.
587
588        """
589        return self[name](name, value)
590