1"""Policy framework for the email package.
2
3Allows fine grained feature control of how the package parses and emits data.
4"""
5
6import abc
7from email import header
8from email import charset as _charset
9from email.utils import _has_surrogates
10
11__all__ = [
12    'Policy',
13    'Compat32',
14    'compat32',
15    ]
16
17
18class _PolicyBase:
19
20    """Policy Object basic framework.
21
22    This class is useless unless subclassed.  A subclass should define
23    class attributes with defaults for any values that are to be
24    managed by the Policy object.  The constructor will then allow
25    non-default values to be set for these attributes at instance
26    creation time.  The instance will be callable, taking these same
27    attributes keyword arguments, and returning a new instance
28    identical to the called instance except for those values changed
29    by the keyword arguments.  Instances may be added, yielding new
30    instances with any non-default values from the right hand
31    operand overriding those in the left hand operand.  That is,
32
33        A + B == A(<non-default values of B>)
34
35    The repr of an instance can be used to reconstruct the object
36    if and only if the repr of the values can be used to reconstruct
37    those values.
38
39    """
40
41    def __init__(self, **kw):
42        """Create new Policy, possibly overriding some defaults.
43
44        See class docstring for a list of overridable attributes.
45
46        """
47        for name, value in kw.items():
48            if hasattr(self, name):
49                super(_PolicyBase,self).__setattr__(name, value)
50            else:
51                raise TypeError(
52                    "{!r} is an invalid keyword argument for {}".format(
53                        name, self.__class__.__name__))
54
55    def __repr__(self):
56        args = [ "{}={!r}".format(name, value)
57                 for name, value in self.__dict__.items() ]
58        return "{}({})".format(self.__class__.__name__, ', '.join(args))
59
60    def clone(self, **kw):
61        """Return a new instance with specified attributes changed.
62
63        The new instance has the same attribute values as the current object,
64        except for the changes passed in as keyword arguments.
65
66        """
67        newpolicy = self.__class__.__new__(self.__class__)
68        for attr, value in self.__dict__.items():
69            object.__setattr__(newpolicy, attr, value)
70        for attr, value in kw.items():
71            if not hasattr(self, attr):
72                raise TypeError(
73                    "{!r} is an invalid keyword argument for {}".format(
74                        attr, self.__class__.__name__))
75            object.__setattr__(newpolicy, attr, value)
76        return newpolicy
77
78    def __setattr__(self, name, value):
79        if hasattr(self, name):
80            msg = "{!r} object attribute {!r} is read-only"
81        else:
82            msg = "{!r} object has no attribute {!r}"
83        raise AttributeError(msg.format(self.__class__.__name__, name))
84
85    def __add__(self, other):
86        """Non-default values from right operand override those from left.
87
88        The object returned is a new instance of the subclass.
89
90        """
91        return self.clone(**other.__dict__)
92
93
94def _append_doc(doc, added_doc):
95    doc = doc.rsplit('\n', 1)[0]
96    added_doc = added_doc.split('\n', 1)[1]
97    return doc + '\n' + added_doc
98
99def _extend_docstrings(cls):
100    if cls.__doc__ and cls.__doc__.startswith('+'):
101        cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
102    for name, attr in cls.__dict__.items():
103        if attr.__doc__ and attr.__doc__.startswith('+'):
104            for c in (c for base in cls.__bases__ for c in base.mro()):
105                doc = getattr(getattr(c, name), '__doc__')
106                if doc:
107                    attr.__doc__ = _append_doc(doc, attr.__doc__)
108                    break
109    return cls
110
111
112class Policy(_PolicyBase, metaclass=abc.ABCMeta):
113
114    r"""Controls for how messages are interpreted and formatted.
115
116    Most of the classes and many of the methods in the email package accept
117    Policy objects as parameters.  A Policy object contains a set of values and
118    functions that control how input is interpreted and how output is rendered.
119    For example, the parameter 'raise_on_defect' controls whether or not an RFC
120    violation results in an error being raised or not, while 'max_line_length'
121    controls the maximum length of output lines when a Message is serialized.
122
123    Any valid attribute may be overridden when a Policy is created by passing
124    it as a keyword argument to the constructor.  Policy objects are immutable,
125    but a new Policy object can be created with only certain values changed by
126    calling the Policy instance with keyword arguments.  Policy objects can
127    also be added, producing a new Policy object in which the non-default
128    attributes set in the right hand operand overwrite those specified in the
129    left operand.
130
131    Settable attributes:
132
133    raise_on_defect     -- If true, then defects should be raised as errors.
134                           Default: False.
135
136    linesep             -- string containing the value to use as separation
137                           between output lines.  Default '\n'.
138
139    cte_type            -- Type of allowed content transfer encodings
140
141                           7bit  -- ASCII only
142                           8bit  -- Content-Transfer-Encoding: 8bit is allowed
143
144                           Default: 8bit.  Also controls the disposition of
145                           (RFC invalid) binary data in headers; see the
146                           documentation of the binary_fold method.
147
148    max_line_length     -- maximum length of lines, excluding 'linesep',
149                           during serialization.  None or 0 means no line
150                           wrapping is done.  Default is 78.
151
152    mangle_from_        -- a flag that, when True escapes From_ lines in the
153                           body of the message by putting a `>' in front of
154                           them. This is used when the message is being
155                           serialized by a generator. Default: True.
156
157    message_factory     -- the class to use to create new message objects.
158                           If the value is None, the default is Message.
159
160    """
161
162    raise_on_defect = False
163    linesep = '\n'
164    cte_type = '8bit'
165    max_line_length = 78
166    mangle_from_ = False
167    message_factory = None
168
169    def handle_defect(self, obj, defect):
170        """Based on policy, either raise defect or call register_defect.
171
172            handle_defect(obj, defect)
173
174        defect should be a Defect subclass, but in any case must be an
175        Exception subclass.  obj is the object on which the defect should be
176        registered if it is not raised.  If the raise_on_defect is True, the
177        defect is raised as an error, otherwise the object and the defect are
178        passed to register_defect.
179
180        This method is intended to be called by parsers that discover defects.
181        The email package parsers always call it with Defect instances.
182
183        """
184        if self.raise_on_defect:
185            raise defect
186        self.register_defect(obj, defect)
187
188    def register_defect(self, obj, defect):
189        """Record 'defect' on 'obj'.
190
191        Called by handle_defect if raise_on_defect is False.  This method is
192        part of the Policy API so that Policy subclasses can implement custom
193        defect handling.  The default implementation calls the append method of
194        the defects attribute of obj.  The objects used by the email package by
195        default that get passed to this method will always have a defects
196        attribute with an append method.
197
198        """
199        obj.defects.append(defect)
200
201    def header_max_count(self, name):
202        """Return the maximum allowed number of headers named 'name'.
203
204        Called when a header is added to a Message object.  If the returned
205        value is not 0 or None, and there are already a number of headers with
206        the name 'name' equal to the value returned, a ValueError is raised.
207
208        Because the default behavior of Message's __setitem__ is to append the
209        value to the list of headers, it is easy to create duplicate headers
210        without realizing it.  This method allows certain headers to be limited
211        in the number of instances of that header that may be added to a
212        Message programmatically.  (The limit is not observed by the parser,
213        which will faithfully produce as many headers as exist in the message
214        being parsed.)
215
216        The default implementation returns None for all header names.
217        """
218        return None
219
220    @abc.abstractmethod
221    def header_source_parse(self, sourcelines):
222        """Given a list of linesep terminated strings constituting the lines of
223        a single header, return the (name, value) tuple that should be stored
224        in the model.  The input lines should retain their terminating linesep
225        characters.  The lines passed in by the email package may contain
226        surrogateescaped binary data.
227        """
228        raise NotImplementedError
229
230    @abc.abstractmethod
231    def header_store_parse(self, name, value):
232        """Given the header name and the value provided by the application
233        program, return the (name, value) that should be stored in the model.
234        """
235        raise NotImplementedError
236
237    @abc.abstractmethod
238    def header_fetch_parse(self, name, value):
239        """Given the header name and the value from the model, return the value
240        to be returned to the application program that is requesting that
241        header.  The value passed in by the email package may contain
242        surrogateescaped binary data if the lines were parsed by a BytesParser.
243        The returned value should not contain any surrogateescaped data.
244
245        """
246        raise NotImplementedError
247
248    @abc.abstractmethod
249    def fold(self, name, value):
250        """Given the header name and the value from the model, return a string
251        containing linesep characters that implement the folding of the header
252        according to the policy controls.  The value passed in by the email
253        package may contain surrogateescaped binary data if the lines were
254        parsed by a BytesParser.  The returned value should not contain any
255        surrogateescaped data.
256
257        """
258        raise NotImplementedError
259
260    @abc.abstractmethod
261    def fold_binary(self, name, value):
262        """Given the header name and the value from the model, return binary
263        data containing linesep characters that implement the folding of the
264        header according to the policy controls.  The value passed in by the
265        email package may contain surrogateescaped binary data.
266
267        """
268        raise NotImplementedError
269
270
271@_extend_docstrings
272class Compat32(Policy):
273
274    """+
275    This particular policy is the backward compatibility Policy.  It
276    replicates the behavior of the email package version 5.1.
277    """
278
279    mangle_from_ = True
280
281    def _sanitize_header(self, name, value):
282        # If the header value contains surrogates, return a Header using
283        # the unknown-8bit charset to encode the bytes as encoded words.
284        if not isinstance(value, str):
285            # Assume it is already a header object
286            return value
287        if _has_surrogates(value):
288            return header.Header(value, charset=_charset.UNKNOWN8BIT,
289                                 header_name=name)
290        else:
291            return value
292
293    def header_source_parse(self, sourcelines):
294        """+
295        The name is parsed as everything up to the ':' and returned unmodified.
296        The value is determined by stripping leading whitespace off the
297        remainder of the first line, joining all subsequent lines together, and
298        stripping any trailing carriage return or linefeed characters.
299
300        """
301        name, value = sourcelines[0].split(':', 1)
302        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
303        return (name, value.rstrip('\r\n'))
304
305    def header_store_parse(self, name, value):
306        """+
307        The name and value are returned unmodified.
308        """
309        return (name, value)
310
311    def header_fetch_parse(self, name, value):
312        """+
313        If the value contains binary data, it is converted into a Header object
314        using the unknown-8bit charset.  Otherwise it is returned unmodified.
315        """
316        return self._sanitize_header(name, value)
317
318    def fold(self, name, value):
319        """+
320        Headers are folded using the Header folding algorithm, which preserves
321        existing line breaks in the value, and wraps each resulting line to the
322        max_line_length.  Non-ASCII binary data are CTE encoded using the
323        unknown-8bit charset.
324
325        """
326        return self._fold(name, value, sanitize=True)
327
328    def fold_binary(self, name, value):
329        """+
330        Headers are folded using the Header folding algorithm, which preserves
331        existing line breaks in the value, and wraps each resulting line to the
332        max_line_length.  If cte_type is 7bit, non-ascii binary data is CTE
333        encoded using the unknown-8bit charset.  Otherwise the original source
334        header is used, with its existing line breaks and/or binary data.
335
336        """
337        folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
338        return folded.encode('ascii', 'surrogateescape')
339
340    def _fold(self, name, value, sanitize):
341        parts = []
342        parts.append('%s: ' % name)
343        if isinstance(value, str):
344            if _has_surrogates(value):
345                if sanitize:
346                    h = header.Header(value,
347                                      charset=_charset.UNKNOWN8BIT,
348                                      header_name=name)
349                else:
350                    # If we have raw 8bit data in a byte string, we have no idea
351                    # what the encoding is.  There is no safe way to split this
352                    # string.  If it's ascii-subset, then we could do a normal
353                    # ascii split, but if it's multibyte then we could break the
354                    # string.  There's no way to know so the least harm seems to
355                    # be to not split the string and risk it being too long.
356                    parts.append(value)
357                    h = None
358            else:
359                h = header.Header(value, header_name=name)
360        else:
361            # Assume it is a Header-like object.
362            h = value
363        if h is not None:
364            # The Header class interprets a value of None for maxlinelen as the
365            # default value of 78, as recommended by RFC 2822.
366            maxlinelen = 0
367            if self.max_line_length is not None:
368                maxlinelen = self.max_line_length
369            parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
370        parts.append(self.linesep)
371        return ''.join(parts)
372
373
374compat32 = Compat32()
375