1# -*- coding: utf-8 -*-
2import re
3from json import loads
4
5from webtest import forms
6from webtest import utils
7from webtest.compat import print_stderr
8from webtest.compat import splittype
9from webtest.compat import splithost
10from webtest.compat import PY3
11from webtest.compat import urlparse
12from webtest.compat import to_bytes
13
14from six import string_types
15from six import binary_type
16from six import text_type
17
18from bs4 import BeautifulSoup
19
20import webob
21
22
23class TestResponse(webob.Response):
24    """
25    Instances of this class are returned by
26    :class:`~webtest.app.TestApp` methods.
27    """
28
29    request = None
30    _forms_indexed = None
31    parser_features = 'html.parser'
32
33    @property
34    def forms(self):
35        """
36        Returns a dictionary containing all the forms in the pages as
37        :class:`~webtest.forms.Form` objects. Indexes are both in
38        order (from zero) and by form id (if the form is given an id).
39
40        See :doc:`forms` for more info on form objects.
41        """
42        if self._forms_indexed is None:
43            self._parse_forms()
44        return self._forms_indexed
45
46    @property
47    def form(self):
48        """
49        If there is only one form on the page, return it as a
50        :class:`~webtest.forms.Form` object; raise a TypeError is
51        there are no form or multiple forms.
52        """
53        forms_ = self.forms
54        if not forms_:
55            raise TypeError(
56                "You used response.form, but no forms exist")
57        if 1 in forms_:
58            # There is more than one form
59            raise TypeError(
60                "You used response.form, but more than one form exists")
61        return forms_[0]
62
63    @property
64    def testbody(self):
65        self.decode_content()
66        if self.charset:
67            try:
68                return self.text
69            except UnicodeDecodeError:
70                return self.body.decode(self.charset, 'replace')
71        return self.body.decode('ascii', 'replace')
72
73    _tag_re = re.compile(r'<(/?)([:a-z0-9_\-]*)(.*?)>', re.S | re.I)
74
75    def _parse_forms(self):
76        forms_ = self._forms_indexed = {}
77        form_texts = [str(f) for f in self.html('form')]
78        for i, text in enumerate(form_texts):
79            form = forms.Form(self, text, self.parser_features)
80            forms_[i] = form
81            if form.id:
82                forms_[form.id] = form
83
84    def _follow(self, **kw):
85        location = self.headers['location']
86        abslocation = urlparse.urljoin(self.request.url, location)
87        type_, rest = splittype(abslocation)
88        host, path = splithost(rest)
89        # @@: We should test that it's not a remote redirect
90        return self.test_app.get(abslocation, **kw)
91
92    def follow(self, **kw):
93        """
94        If this response is a redirect, follow that redirect.  It is an
95        error if it is not a redirect response. Any keyword
96        arguments are passed to :class:`webtest.app.TestApp.get`. Returns
97        another :class:`TestResponse` object.
98        """
99        assert 300 <= self.status_int < 400, (
100            "You can only follow redirect responses (not %s)"
101            % self.status)
102        return self._follow(**kw)
103
104    def maybe_follow(self, **kw):
105        """
106        Follow all redirects. If this response is not a redirect, do nothing.
107        Any keyword arguments are passed to :class:`webtest.app.TestApp.get`.
108        Returns another :class:`TestResponse` object.
109        """
110        remaining_redirects = 100  # infinite loops protection
111        response = self
112
113        while 300 <= response.status_int < 400 and remaining_redirects:
114            response = response._follow(**kw)
115            remaining_redirects -= 1
116
117        assert remaining_redirects > 0, "redirects chain looks infinite"
118        return response
119
120    def click(self, description=None, linkid=None, href=None,
121              index=None, verbose=False,
122              extra_environ=None):
123        """
124        Click the link as described.  Each of ``description``,
125        ``linkid``, and ``url`` are *patterns*, meaning that they are
126        either strings (regular expressions), compiled regular
127        expressions (objects with a ``search`` method), or callables
128        returning true or false.
129
130        All the given patterns are ANDed together:
131
132        * ``description`` is a pattern that matches the contents of the
133          anchor (HTML and all -- everything between ``<a...>`` and
134          ``</a>``)
135
136        * ``linkid`` is a pattern that matches the ``id`` attribute of
137          the anchor.  It will receive the empty string if no id is
138          given.
139
140        * ``href`` is a pattern that matches the ``href`` of the anchor;
141          the literal content of that attribute, not the fully qualified
142          attribute.
143
144        If more than one link matches, then the ``index`` link is
145        followed.  If ``index`` is not given and more than one link
146        matches, or if no link matches, then ``IndexError`` will be
147        raised.
148
149        If you give ``verbose`` then messages will be printed about
150        each link, and why it does or doesn't match.  If you use
151        ``app.click(verbose=True)`` you'll see a list of all the
152        links.
153
154        You can use multiple criteria to essentially assert multiple
155        aspects about the link, e.g., where the link's destination is.
156        """
157        found_html, found_desc, found_attrs = self._find_element(
158            tag='a', href_attr='href',
159            href_extract=None,
160            content=description,
161            id=linkid,
162            href_pattern=href,
163            index=index, verbose=verbose)
164        return self.goto(str(found_attrs['uri']), extra_environ=extra_environ)
165
166    def clickbutton(self, description=None, buttonid=None, href=None,
167                    index=None, verbose=False):
168        """
169        Like :meth:`~webtest.response.TestResponse.click`, except looks
170        for link-like buttons.
171        This kind of button should look like
172        ``<button onclick="...location.href='url'...">``.
173        """
174        found_html, found_desc, found_attrs = self._find_element(
175            tag='button', href_attr='onclick',
176            href_extract=re.compile(r"location\.href='(.*?)'"),
177            content=description,
178            id=buttonid,
179            href_pattern=href,
180            index=index, verbose=verbose)
181        return self.goto(str(found_attrs['uri']))
182
183    def _find_element(self, tag, href_attr, href_extract,
184                      content, id,
185                      href_pattern,
186                      index, verbose):
187        content_pat = utils.make_pattern(content)
188        id_pat = utils.make_pattern(id)
189        href_pat = utils.make_pattern(href_pattern)
190
191        def printlog(s):
192            if verbose:
193                print(s)
194
195        found_links = []
196        total_links = 0
197        for element in self.html.find_all(tag):
198            el_html = str(element)
199            el_content = element.decode_contents()
200            attrs = element
201            if verbose:
202                printlog('Element: %r' % el_html)
203            if not attrs.get(href_attr):
204                printlog('  Skipped: no %s attribute' % href_attr)
205                continue
206            el_href = attrs[href_attr]
207            if href_extract:
208                m = href_extract.search(el_href)
209                if not m:
210                    printlog("  Skipped: doesn't match extract pattern")
211                    continue
212                el_href = m.group(1)
213            attrs['uri'] = el_href
214            if el_href.startswith('#'):
215                printlog('  Skipped: only internal fragment href')
216                continue
217            if el_href.startswith('javascript:'):
218                printlog('  Skipped: cannot follow javascript:')
219                continue
220            total_links += 1
221            if content_pat and not content_pat(el_content):
222                printlog("  Skipped: doesn't match description")
223                continue
224            if id_pat and not id_pat(attrs.get('id', '')):
225                printlog("  Skipped: doesn't match id")
226                continue
227            if href_pat and not href_pat(el_href):
228                printlog("  Skipped: doesn't match href")
229                continue
230            printlog("  Accepted")
231            found_links.append((el_html, el_content, attrs))
232        if not found_links:
233            raise IndexError(
234                "No matching elements found (from %s possible)"
235                % total_links)
236        if index is None:
237            if len(found_links) > 1:
238                raise IndexError(
239                    "Multiple links match: %s"
240                    % ', '.join([repr(anc) for anc, d, attr in found_links]))
241            found_link = found_links[0]
242        else:
243            try:
244                found_link = found_links[index]
245            except IndexError:
246                raise IndexError(
247                    "Only %s (out of %s) links match; index %s out of range"
248                    % (len(found_links), total_links, index))
249        return found_link
250
251    def goto(self, href, method='get', **args):
252        """
253        Go to the (potentially relative) link ``href``, using the
254        given method (``'get'`` or ``'post'``) and any extra arguments
255        you want to pass to the :meth:`webtest.app.TestApp.get` or
256        :meth:`webtest.app.TestApp.post` methods.
257
258        All hostnames and schemes will be ignored.
259        """
260        scheme, host, path, query, fragment = urlparse.urlsplit(href)
261        # We
262        scheme = host = fragment = ''
263        href = urlparse.urlunsplit((scheme, host, path, query, fragment))
264        href = urlparse.urljoin(self.request.url, href)
265        method = method.lower()
266        assert method in ('get', 'post'), (
267            'Only "get" or "post" are allowed for method (you gave %r)'
268            % method)
269
270        # encode unicode strings for the outside world
271        if not PY3 and getattr(self, '_use_unicode', False):
272            def to_str(s):
273                if isinstance(s, text_type):
274                    return s.encode(self.charset)
275                return s
276
277            href = to_str(href)
278
279            if 'params' in args:
280                args['params'] = [tuple(map(to_str, p))
281                                  for p in args['params']]
282
283            if 'upload_files' in args:
284                args['upload_files'] = [map(to_str, f)
285                                        for f in args['upload_files']]
286
287            if 'content_type' in args:
288                args['content_type'] = to_str(args['content_type'])
289
290        if method == 'get':
291            method = self.test_app.get
292        else:
293            method = self.test_app.post
294        return method(href, **args)
295
296    _normal_body_regex = re.compile(to_bytes(r'[ \n\r\t]+'))
297
298    @property
299    def normal_body(self):
300        """
301        Return the whitespace-normalized body
302        """
303        if getattr(self, '_normal_body', None) is None:
304            self._normal_body = self._normal_body_regex.sub(b' ', self.body)
305        return self._normal_body
306
307    _unicode_normal_body_regex = re.compile('[ \\n\\r\\t]+')
308
309    @property
310    def unicode_normal_body(self):
311        """
312        Return the whitespace-normalized body, as unicode
313        """
314        if not self.charset:
315            raise AttributeError(
316                ("You cannot access Response.unicode_normal_body "
317                 "unless charset is set"))
318        if getattr(self, '_unicode_normal_body', None) is None:
319            self._unicode_normal_body = self._unicode_normal_body_regex.sub(
320                ' ', self.testbody)
321        return self._unicode_normal_body
322
323    def __contains__(self, s):
324        """
325        A response 'contains' a string if it is present in the body
326        of the response.  Whitespace is normalized when searching
327        for a string.
328        """
329        if not self.charset and isinstance(s, text_type):
330            s = s.encode('utf8')
331        if isinstance(s, binary_type):
332            return s in self.body or s in self.normal_body
333        return s in self.testbody or s in self.unicode_normal_body
334
335    def mustcontain(self, *strings, **kw):
336        """mustcontain(*strings, no=[])
337
338        Assert that the response contains all of the strings passed
339        in as arguments.
340
341        Equivalent to::
342
343            assert string in res
344
345        Can take a `no` keyword argument that can be a string or a
346        list of strings which must not be present in the response.
347        """
348        if 'no' in kw:
349            no = kw['no']
350            del kw['no']
351            if isinstance(no, string_types):
352                no = [no]
353        else:
354            no = []
355        if kw:
356            raise TypeError(
357                "The only keyword argument allowed is 'no'")
358        for s in strings:
359            if not s in self:
360                print_stderr("Actual response (no %r):" % s)
361                print_stderr(str(self))
362                raise IndexError(
363                    "Body does not contain string %r" % s)
364        for no_s in no:
365            if no_s in self:
366                print_stderr("Actual response (has %r)" % no_s)
367                print_stderr(str(self))
368                raise IndexError(
369                    "Body contains bad string %r" % no_s)
370
371    def __str__(self):
372        simple_body = str('\n').join([l for l in self.testbody.splitlines()
373                                     if l.strip()])
374        headers = [(n.title(), v)
375                   for n, v in self.headerlist
376                   if n.lower() != 'content-length']
377        headers.sort()
378        output = str('Response: %s\n%s\n%s') % (
379            self.status,
380            str('\n').join([str('%s: %s') % (n, v) for n, v in headers]),
381            simple_body)
382        if not PY3 and isinstance(output, text_type):
383            output = output.encode(self.charset or 'utf8', 'replace')
384        return output
385
386    def __unicode__(self):
387        output = str(self)
388        if PY3:
389            return output
390        return output.decode(self.charset or 'utf8', 'replace')
391
392    def __repr__(self):
393        # Specifically intended for doctests
394        if self.content_type:
395            ct = ' %s' % self.content_type
396        else:
397            ct = ''
398        if self.body:
399            br = repr(self.body)
400            if len(br) > 18:
401                br = br[:10] + '...' + br[-5:]
402                br += '/%s' % len(self.body)
403            body = ' body=%s' % br
404        else:
405            body = ' no body'
406        if self.location:
407            location = ' location: %s' % self.location
408        else:
409            location = ''
410        return ('<' + self.status + ct + location + body + '>')
411
412    @property
413    def html(self):
414        """
415        Returns the response as a `BeautifulSoup
416        <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_
417        object.
418
419        Only works with HTML responses; other content-types raise
420        AttributeError.
421        """
422        if 'html' not in self.content_type:
423            raise AttributeError(
424                "Not an HTML response body (content-type: %s)"
425                % self.content_type)
426        soup = BeautifulSoup(self.testbody, self.parser_features)
427        return soup
428
429    @property
430    def xml(self):
431        """
432        Returns the response as an `ElementTree
433        <http://python.org/doc/current/lib/module-xml.etree.ElementTree.html>`_
434        object.
435
436        Only works with XML responses; other content-types raise
437        AttributeError
438        """
439        if 'xml' not in self.content_type:
440            raise AttributeError(
441                "Not an XML response body (content-type: %s)"
442                % self.content_type)
443        try:
444            from xml.etree import ElementTree
445        except ImportError:  # pragma: no cover
446            try:
447                import ElementTree
448            except ImportError:
449                try:
450                    from elementtree import ElementTree  # NOQA
451                except ImportError:
452                    raise ImportError(
453                        ("You must have ElementTree installed "
454                         "(or use Python 2.5) to use response.xml"))
455        # ElementTree can't parse unicode => use `body` instead of `testbody`
456        return ElementTree.XML(self.body)
457
458    @property
459    def lxml(self):
460        """
461        Returns the response as an `lxml object
462        <http://codespeak.net/lxml/>`_.  You must have lxml installed
463        to use this.
464
465        If this is an HTML response and you have lxml 2.x installed,
466        then an ``lxml.html.HTML`` object will be returned; if you
467        have an earlier version of lxml then a ``lxml.HTML`` object
468        will be returned.
469        """
470        if 'html' not in self.content_type and \
471           'xml' not in self.content_type:
472            raise AttributeError(
473                "Not an XML or HTML response body (content-type: %s)"
474                % self.content_type)
475        try:
476            from lxml import etree
477        except ImportError:  # pragma: no cover
478            raise ImportError(
479                "You must have lxml installed to use response.lxml")
480        try:
481            from lxml.html import fromstring
482        except ImportError:  # pragma: no cover
483            fromstring = etree.HTML
484        ## FIXME: would be nice to set xml:base, in some fashion
485        if self.content_type == 'text/html':
486            return fromstring(self.testbody, base_url=self.request.url)
487        else:
488            return etree.XML(self.testbody, base_url=self.request.url)
489
490    @property
491    def json(self):
492        """
493        Return the response as a JSON response.  You must have `simplejson
494        <http://goo.gl/B9g6s>`_ installed to use this, or be using a Python
495        version with the json module.
496
497        The content type must be one of json type to use this.
498        """
499        if not self.content_type.endswith(('+json', '/json')):
500            raise AttributeError(
501                "Not a JSON response body (content-type: %s)"
502                % self.content_type)
503        return loads(self.testbody)
504
505    @property
506    def pyquery(self):
507        """
508        Returns the response as a `PyQuery <http://pyquery.org/>`_ object.
509
510        Only works with HTML and XML responses; other content-types raise
511        AttributeError.
512        """
513        if 'html' not in self.content_type and 'xml' not in self.content_type:
514            raise AttributeError(
515                "Not an HTML or XML response body (content-type: %s)"
516                % self.content_type)
517        try:
518            from pyquery import PyQuery
519        except ImportError:  # pragma: no cover
520            raise ImportError(
521                "You must have PyQuery installed to use response.pyquery")
522        d = PyQuery(self.testbody)
523        return d
524
525    def showbrowser(self):
526        """
527        Show this response in a browser window (for debugging purposes,
528        when it's hard to read the HTML).
529        """
530        import webbrowser
531        import tempfile
532        f = tempfile.NamedTemporaryFile(prefix='webtest-page',
533                                        suffix='.html')
534        name = f.name
535        f.close()
536        f = open(name, 'w')
537        if PY3:
538            f.write(self.body.decode(self.charset or 'ascii', 'replace'))
539        else:
540            f.write(self.body)
541        f.close()
542        if name[0] != '/':  # pragma: no cover
543            # windows ...
544            url = 'file:///' + name
545        else:
546            url = 'file://' + name
547        webbrowser.open_new(url)
548