1# -*- coding: utf-8 -*- 2import re 3from json import loads 4 5from webtest import forms 6from webtest import utils 7from webtest.compat import print_stderr 8from webtest.compat import splittype 9from webtest.compat import splithost 10from webtest.compat import PY3 11from webtest.compat import urlparse 12from webtest.compat import to_bytes 13 14from six import string_types 15from six import binary_type 16from six import text_type 17 18from bs4 import BeautifulSoup 19 20import webob 21 22 23class TestResponse(webob.Response): 24 """ 25 Instances of this class are returned by 26 :class:`~webtest.app.TestApp` methods. 27 """ 28 29 request = None 30 _forms_indexed = None 31 parser_features = 'html.parser' 32 33 @property 34 def forms(self): 35 """ 36 Returns a dictionary containing all the forms in the pages as 37 :class:`~webtest.forms.Form` objects. Indexes are both in 38 order (from zero) and by form id (if the form is given an id). 39 40 See :doc:`forms` for more info on form objects. 41 """ 42 if self._forms_indexed is None: 43 self._parse_forms() 44 return self._forms_indexed 45 46 @property 47 def form(self): 48 """ 49 If there is only one form on the page, return it as a 50 :class:`~webtest.forms.Form` object; raise a TypeError is 51 there are no form or multiple forms. 52 """ 53 forms_ = self.forms 54 if not forms_: 55 raise TypeError( 56 "You used response.form, but no forms exist") 57 if 1 in forms_: 58 # There is more than one form 59 raise TypeError( 60 "You used response.form, but more than one form exists") 61 return forms_[0] 62 63 @property 64 def testbody(self): 65 self.decode_content() 66 if self.charset: 67 try: 68 return self.text 69 except UnicodeDecodeError: 70 return self.body.decode(self.charset, 'replace') 71 return self.body.decode('ascii', 'replace') 72 73 _tag_re = re.compile(r'<(/?)([:a-z0-9_\-]*)(.*?)>', re.S | re.I) 74 75 def _parse_forms(self): 76 forms_ = self._forms_indexed = {} 77 form_texts = [str(f) for f in self.html('form')] 78 for i, text in enumerate(form_texts): 79 form = forms.Form(self, text, self.parser_features) 80 forms_[i] = form 81 if form.id: 82 forms_[form.id] = form 83 84 def _follow(self, **kw): 85 location = self.headers['location'] 86 abslocation = urlparse.urljoin(self.request.url, location) 87 type_, rest = splittype(abslocation) 88 host, path = splithost(rest) 89 # @@: We should test that it's not a remote redirect 90 return self.test_app.get(abslocation, **kw) 91 92 def follow(self, **kw): 93 """ 94 If this response is a redirect, follow that redirect. It is an 95 error if it is not a redirect response. Any keyword 96 arguments are passed to :class:`webtest.app.TestApp.get`. Returns 97 another :class:`TestResponse` object. 98 """ 99 assert 300 <= self.status_int < 400, ( 100 "You can only follow redirect responses (not %s)" 101 % self.status) 102 return self._follow(**kw) 103 104 def maybe_follow(self, **kw): 105 """ 106 Follow all redirects. If this response is not a redirect, do nothing. 107 Any keyword arguments are passed to :class:`webtest.app.TestApp.get`. 108 Returns another :class:`TestResponse` object. 109 """ 110 remaining_redirects = 100 # infinite loops protection 111 response = self 112 113 while 300 <= response.status_int < 400 and remaining_redirects: 114 response = response._follow(**kw) 115 remaining_redirects -= 1 116 117 assert remaining_redirects > 0, "redirects chain looks infinite" 118 return response 119 120 def click(self, description=None, linkid=None, href=None, 121 index=None, verbose=False, 122 extra_environ=None): 123 """ 124 Click the link as described. Each of ``description``, 125 ``linkid``, and ``url`` are *patterns*, meaning that they are 126 either strings (regular expressions), compiled regular 127 expressions (objects with a ``search`` method), or callables 128 returning true or false. 129 130 All the given patterns are ANDed together: 131 132 * ``description`` is a pattern that matches the contents of the 133 anchor (HTML and all -- everything between ``<a...>`` and 134 ``</a>``) 135 136 * ``linkid`` is a pattern that matches the ``id`` attribute of 137 the anchor. It will receive the empty string if no id is 138 given. 139 140 * ``href`` is a pattern that matches the ``href`` of the anchor; 141 the literal content of that attribute, not the fully qualified 142 attribute. 143 144 If more than one link matches, then the ``index`` link is 145 followed. If ``index`` is not given and more than one link 146 matches, or if no link matches, then ``IndexError`` will be 147 raised. 148 149 If you give ``verbose`` then messages will be printed about 150 each link, and why it does or doesn't match. If you use 151 ``app.click(verbose=True)`` you'll see a list of all the 152 links. 153 154 You can use multiple criteria to essentially assert multiple 155 aspects about the link, e.g., where the link's destination is. 156 """ 157 found_html, found_desc, found_attrs = self._find_element( 158 tag='a', href_attr='href', 159 href_extract=None, 160 content=description, 161 id=linkid, 162 href_pattern=href, 163 index=index, verbose=verbose) 164 return self.goto(str(found_attrs['uri']), extra_environ=extra_environ) 165 166 def clickbutton(self, description=None, buttonid=None, href=None, 167 index=None, verbose=False): 168 """ 169 Like :meth:`~webtest.response.TestResponse.click`, except looks 170 for link-like buttons. 171 This kind of button should look like 172 ``<button onclick="...location.href='url'...">``. 173 """ 174 found_html, found_desc, found_attrs = self._find_element( 175 tag='button', href_attr='onclick', 176 href_extract=re.compile(r"location\.href='(.*?)'"), 177 content=description, 178 id=buttonid, 179 href_pattern=href, 180 index=index, verbose=verbose) 181 return self.goto(str(found_attrs['uri'])) 182 183 def _find_element(self, tag, href_attr, href_extract, 184 content, id, 185 href_pattern, 186 index, verbose): 187 content_pat = utils.make_pattern(content) 188 id_pat = utils.make_pattern(id) 189 href_pat = utils.make_pattern(href_pattern) 190 191 def printlog(s): 192 if verbose: 193 print(s) 194 195 found_links = [] 196 total_links = 0 197 for element in self.html.find_all(tag): 198 el_html = str(element) 199 el_content = element.decode_contents() 200 attrs = element 201 if verbose: 202 printlog('Element: %r' % el_html) 203 if not attrs.get(href_attr): 204 printlog(' Skipped: no %s attribute' % href_attr) 205 continue 206 el_href = attrs[href_attr] 207 if href_extract: 208 m = href_extract.search(el_href) 209 if not m: 210 printlog(" Skipped: doesn't match extract pattern") 211 continue 212 el_href = m.group(1) 213 attrs['uri'] = el_href 214 if el_href.startswith('#'): 215 printlog(' Skipped: only internal fragment href') 216 continue 217 if el_href.startswith('javascript:'): 218 printlog(' Skipped: cannot follow javascript:') 219 continue 220 total_links += 1 221 if content_pat and not content_pat(el_content): 222 printlog(" Skipped: doesn't match description") 223 continue 224 if id_pat and not id_pat(attrs.get('id', '')): 225 printlog(" Skipped: doesn't match id") 226 continue 227 if href_pat and not href_pat(el_href): 228 printlog(" Skipped: doesn't match href") 229 continue 230 printlog(" Accepted") 231 found_links.append((el_html, el_content, attrs)) 232 if not found_links: 233 raise IndexError( 234 "No matching elements found (from %s possible)" 235 % total_links) 236 if index is None: 237 if len(found_links) > 1: 238 raise IndexError( 239 "Multiple links match: %s" 240 % ', '.join([repr(anc) for anc, d, attr in found_links])) 241 found_link = found_links[0] 242 else: 243 try: 244 found_link = found_links[index] 245 except IndexError: 246 raise IndexError( 247 "Only %s (out of %s) links match; index %s out of range" 248 % (len(found_links), total_links, index)) 249 return found_link 250 251 def goto(self, href, method='get', **args): 252 """ 253 Go to the (potentially relative) link ``href``, using the 254 given method (``'get'`` or ``'post'``) and any extra arguments 255 you want to pass to the :meth:`webtest.app.TestApp.get` or 256 :meth:`webtest.app.TestApp.post` methods. 257 258 All hostnames and schemes will be ignored. 259 """ 260 scheme, host, path, query, fragment = urlparse.urlsplit(href) 261 # We 262 scheme = host = fragment = '' 263 href = urlparse.urlunsplit((scheme, host, path, query, fragment)) 264 href = urlparse.urljoin(self.request.url, href) 265 method = method.lower() 266 assert method in ('get', 'post'), ( 267 'Only "get" or "post" are allowed for method (you gave %r)' 268 % method) 269 270 # encode unicode strings for the outside world 271 if not PY3 and getattr(self, '_use_unicode', False): 272 def to_str(s): 273 if isinstance(s, text_type): 274 return s.encode(self.charset) 275 return s 276 277 href = to_str(href) 278 279 if 'params' in args: 280 args['params'] = [tuple(map(to_str, p)) 281 for p in args['params']] 282 283 if 'upload_files' in args: 284 args['upload_files'] = [map(to_str, f) 285 for f in args['upload_files']] 286 287 if 'content_type' in args: 288 args['content_type'] = to_str(args['content_type']) 289 290 if method == 'get': 291 method = self.test_app.get 292 else: 293 method = self.test_app.post 294 return method(href, **args) 295 296 _normal_body_regex = re.compile(to_bytes(r'[ \n\r\t]+')) 297 298 @property 299 def normal_body(self): 300 """ 301 Return the whitespace-normalized body 302 """ 303 if getattr(self, '_normal_body', None) is None: 304 self._normal_body = self._normal_body_regex.sub(b' ', self.body) 305 return self._normal_body 306 307 _unicode_normal_body_regex = re.compile('[ \\n\\r\\t]+') 308 309 @property 310 def unicode_normal_body(self): 311 """ 312 Return the whitespace-normalized body, as unicode 313 """ 314 if not self.charset: 315 raise AttributeError( 316 ("You cannot access Response.unicode_normal_body " 317 "unless charset is set")) 318 if getattr(self, '_unicode_normal_body', None) is None: 319 self._unicode_normal_body = self._unicode_normal_body_regex.sub( 320 ' ', self.testbody) 321 return self._unicode_normal_body 322 323 def __contains__(self, s): 324 """ 325 A response 'contains' a string if it is present in the body 326 of the response. Whitespace is normalized when searching 327 for a string. 328 """ 329 if not self.charset and isinstance(s, text_type): 330 s = s.encode('utf8') 331 if isinstance(s, binary_type): 332 return s in self.body or s in self.normal_body 333 return s in self.testbody or s in self.unicode_normal_body 334 335 def mustcontain(self, *strings, **kw): 336 """mustcontain(*strings, no=[]) 337 338 Assert that the response contains all of the strings passed 339 in as arguments. 340 341 Equivalent to:: 342 343 assert string in res 344 345 Can take a `no` keyword argument that can be a string or a 346 list of strings which must not be present in the response. 347 """ 348 if 'no' in kw: 349 no = kw['no'] 350 del kw['no'] 351 if isinstance(no, string_types): 352 no = [no] 353 else: 354 no = [] 355 if kw: 356 raise TypeError( 357 "The only keyword argument allowed is 'no'") 358 for s in strings: 359 if not s in self: 360 print_stderr("Actual response (no %r):" % s) 361 print_stderr(str(self)) 362 raise IndexError( 363 "Body does not contain string %r" % s) 364 for no_s in no: 365 if no_s in self: 366 print_stderr("Actual response (has %r)" % no_s) 367 print_stderr(str(self)) 368 raise IndexError( 369 "Body contains bad string %r" % no_s) 370 371 def __str__(self): 372 simple_body = str('\n').join([l for l in self.testbody.splitlines() 373 if l.strip()]) 374 headers = [(n.title(), v) 375 for n, v in self.headerlist 376 if n.lower() != 'content-length'] 377 headers.sort() 378 output = str('Response: %s\n%s\n%s') % ( 379 self.status, 380 str('\n').join([str('%s: %s') % (n, v) for n, v in headers]), 381 simple_body) 382 if not PY3 and isinstance(output, text_type): 383 output = output.encode(self.charset or 'utf8', 'replace') 384 return output 385 386 def __unicode__(self): 387 output = str(self) 388 if PY3: 389 return output 390 return output.decode(self.charset or 'utf8', 'replace') 391 392 def __repr__(self): 393 # Specifically intended for doctests 394 if self.content_type: 395 ct = ' %s' % self.content_type 396 else: 397 ct = '' 398 if self.body: 399 br = repr(self.body) 400 if len(br) > 18: 401 br = br[:10] + '...' + br[-5:] 402 br += '/%s' % len(self.body) 403 body = ' body=%s' % br 404 else: 405 body = ' no body' 406 if self.location: 407 location = ' location: %s' % self.location 408 else: 409 location = '' 410 return ('<' + self.status + ct + location + body + '>') 411 412 @property 413 def html(self): 414 """ 415 Returns the response as a `BeautifulSoup 416 <http://www.crummy.com/software/BeautifulSoup/documentation.html>`_ 417 object. 418 419 Only works with HTML responses; other content-types raise 420 AttributeError. 421 """ 422 if 'html' not in self.content_type: 423 raise AttributeError( 424 "Not an HTML response body (content-type: %s)" 425 % self.content_type) 426 soup = BeautifulSoup(self.testbody, self.parser_features) 427 return soup 428 429 @property 430 def xml(self): 431 """ 432 Returns the response as an `ElementTree 433 <http://python.org/doc/current/lib/module-xml.etree.ElementTree.html>`_ 434 object. 435 436 Only works with XML responses; other content-types raise 437 AttributeError 438 """ 439 if 'xml' not in self.content_type: 440 raise AttributeError( 441 "Not an XML response body (content-type: %s)" 442 % self.content_type) 443 try: 444 from xml.etree import ElementTree 445 except ImportError: # pragma: no cover 446 try: 447 import ElementTree 448 except ImportError: 449 try: 450 from elementtree import ElementTree # NOQA 451 except ImportError: 452 raise ImportError( 453 ("You must have ElementTree installed " 454 "(or use Python 2.5) to use response.xml")) 455 # ElementTree can't parse unicode => use `body` instead of `testbody` 456 return ElementTree.XML(self.body) 457 458 @property 459 def lxml(self): 460 """ 461 Returns the response as an `lxml object 462 <http://codespeak.net/lxml/>`_. You must have lxml installed 463 to use this. 464 465 If this is an HTML response and you have lxml 2.x installed, 466 then an ``lxml.html.HTML`` object will be returned; if you 467 have an earlier version of lxml then a ``lxml.HTML`` object 468 will be returned. 469 """ 470 if 'html' not in self.content_type and \ 471 'xml' not in self.content_type: 472 raise AttributeError( 473 "Not an XML or HTML response body (content-type: %s)" 474 % self.content_type) 475 try: 476 from lxml import etree 477 except ImportError: # pragma: no cover 478 raise ImportError( 479 "You must have lxml installed to use response.lxml") 480 try: 481 from lxml.html import fromstring 482 except ImportError: # pragma: no cover 483 fromstring = etree.HTML 484 ## FIXME: would be nice to set xml:base, in some fashion 485 if self.content_type == 'text/html': 486 return fromstring(self.testbody, base_url=self.request.url) 487 else: 488 return etree.XML(self.testbody, base_url=self.request.url) 489 490 @property 491 def json(self): 492 """ 493 Return the response as a JSON response. You must have `simplejson 494 <http://goo.gl/B9g6s>`_ installed to use this, or be using a Python 495 version with the json module. 496 497 The content type must be one of json type to use this. 498 """ 499 if not self.content_type.endswith(('+json', '/json')): 500 raise AttributeError( 501 "Not a JSON response body (content-type: %s)" 502 % self.content_type) 503 return loads(self.testbody) 504 505 @property 506 def pyquery(self): 507 """ 508 Returns the response as a `PyQuery <http://pyquery.org/>`_ object. 509 510 Only works with HTML and XML responses; other content-types raise 511 AttributeError. 512 """ 513 if 'html' not in self.content_type and 'xml' not in self.content_type: 514 raise AttributeError( 515 "Not an HTML or XML response body (content-type: %s)" 516 % self.content_type) 517 try: 518 from pyquery import PyQuery 519 except ImportError: # pragma: no cover 520 raise ImportError( 521 "You must have PyQuery installed to use response.pyquery") 522 d = PyQuery(self.testbody) 523 return d 524 525 def showbrowser(self): 526 """ 527 Show this response in a browser window (for debugging purposes, 528 when it's hard to read the HTML). 529 """ 530 import webbrowser 531 import tempfile 532 f = tempfile.NamedTemporaryFile(prefix='webtest-page', 533 suffix='.html') 534 name = f.name 535 f.close() 536 f = open(name, 'w') 537 if PY3: 538 f.write(self.body.decode(self.charset or 'ascii', 'replace')) 539 else: 540 f.write(self.body) 541 f.close() 542 if name[0] != '/': # pragma: no cover 543 # windows ... 544 url = 'file:///' + name 545 else: 546 url = 'file://' + name 547 webbrowser.open_new(url) 548