1"""Regression tests for what was in Python 2's "urllib" module""" 2 3import urllib.parse 4import urllib.request 5import urllib.error 6import http.client 7import email.message 8import io 9import unittest 10from unittest.mock import patch 11from test import support 12import os 13try: 14 import ssl 15except ImportError: 16 ssl = None 17import sys 18import tempfile 19from nturl2path import url2pathname, pathname2url 20 21from base64 import b64encode 22import collections 23 24 25def hexescape(char): 26 """Escape char as RFC 2396 specifies""" 27 hex_repr = hex(ord(char))[2:].upper() 28 if len(hex_repr) == 1: 29 hex_repr = "0%s" % hex_repr 30 return "%" + hex_repr 31 32# Shortcut for testing FancyURLopener 33_urlopener = None 34 35 36def urlopen(url, data=None, proxies=None): 37 """urlopen(url [, data]) -> open file-like object""" 38 global _urlopener 39 if proxies is not None: 40 opener = urllib.request.FancyURLopener(proxies=proxies) 41 elif not _urlopener: 42 opener = FancyURLopener() 43 _urlopener = opener 44 else: 45 opener = _urlopener 46 if data is None: 47 return opener.open(url) 48 else: 49 return opener.open(url, data) 50 51 52def FancyURLopener(): 53 with support.check_warnings( 54 ('FancyURLopener style of invoking requests is deprecated.', 55 DeprecationWarning)): 56 return urllib.request.FancyURLopener() 57 58 59def fakehttp(fakedata): 60 class FakeSocket(io.BytesIO): 61 io_refs = 1 62 63 def sendall(self, data): 64 FakeHTTPConnection.buf = data 65 66 def makefile(self, *args, **kwds): 67 self.io_refs += 1 68 return self 69 70 def read(self, amt=None): 71 if self.closed: 72 return b"" 73 return io.BytesIO.read(self, amt) 74 75 def readline(self, length=None): 76 if self.closed: 77 return b"" 78 return io.BytesIO.readline(self, length) 79 80 def close(self): 81 self.io_refs -= 1 82 if self.io_refs == 0: 83 io.BytesIO.close(self) 84 85 class FakeHTTPConnection(http.client.HTTPConnection): 86 87 # buffer to store data for verification in urlopen tests. 88 buf = None 89 90 def connect(self): 91 self.sock = FakeSocket(self.fakedata) 92 type(self).fakesock = self.sock 93 FakeHTTPConnection.fakedata = fakedata 94 95 return FakeHTTPConnection 96 97 98class FakeHTTPMixin(object): 99 def fakehttp(self, fakedata): 100 self._connection_class = http.client.HTTPConnection 101 http.client.HTTPConnection = fakehttp(fakedata) 102 103 def unfakehttp(self): 104 http.client.HTTPConnection = self._connection_class 105 106 107class FakeFTPMixin(object): 108 def fakeftp(self): 109 class FakeFtpWrapper(object): 110 def __init__(self, user, passwd, host, port, dirs, timeout=None, 111 persistent=True): 112 pass 113 114 def retrfile(self, file, type): 115 return io.BytesIO(), 0 116 117 def close(self): 118 pass 119 120 self._ftpwrapper_class = urllib.request.ftpwrapper 121 urllib.request.ftpwrapper = FakeFtpWrapper 122 123 def unfakeftp(self): 124 urllib.request.ftpwrapper = self._ftpwrapper_class 125 126 127class urlopen_FileTests(unittest.TestCase): 128 """Test urlopen() opening a temporary file. 129 130 Try to test as much functionality as possible so as to cut down on reliance 131 on connecting to the Net for testing. 132 133 """ 134 135 def setUp(self): 136 # Create a temp file to use for testing 137 self.text = bytes("test_urllib: %s\n" % self.__class__.__name__, 138 "ascii") 139 f = open(support.TESTFN, 'wb') 140 try: 141 f.write(self.text) 142 finally: 143 f.close() 144 self.pathname = support.TESTFN 145 self.returned_obj = urlopen("file:%s" % self.pathname) 146 147 def tearDown(self): 148 """Shut down the open object""" 149 self.returned_obj.close() 150 os.remove(support.TESTFN) 151 152 def test_interface(self): 153 # Make sure object returned by urlopen() has the specified methods 154 for attr in ("read", "readline", "readlines", "fileno", 155 "close", "info", "geturl", "getcode", "__iter__"): 156 self.assertTrue(hasattr(self.returned_obj, attr), 157 "object returned by urlopen() lacks %s attribute" % 158 attr) 159 160 def test_read(self): 161 self.assertEqual(self.text, self.returned_obj.read()) 162 163 def test_readline(self): 164 self.assertEqual(self.text, self.returned_obj.readline()) 165 self.assertEqual(b'', self.returned_obj.readline(), 166 "calling readline() after exhausting the file did not" 167 " return an empty string") 168 169 def test_readlines(self): 170 lines_list = self.returned_obj.readlines() 171 self.assertEqual(len(lines_list), 1, 172 "readlines() returned the wrong number of lines") 173 self.assertEqual(lines_list[0], self.text, 174 "readlines() returned improper text") 175 176 def test_fileno(self): 177 file_num = self.returned_obj.fileno() 178 self.assertIsInstance(file_num, int, "fileno() did not return an int") 179 self.assertEqual(os.read(file_num, len(self.text)), self.text, 180 "Reading on the file descriptor returned by fileno() " 181 "did not return the expected text") 182 183 def test_close(self): 184 # Test close() by calling it here and then having it be called again 185 # by the tearDown() method for the test 186 self.returned_obj.close() 187 188 def test_info(self): 189 self.assertIsInstance(self.returned_obj.info(), email.message.Message) 190 191 def test_geturl(self): 192 self.assertEqual(self.returned_obj.geturl(), self.pathname) 193 194 def test_getcode(self): 195 self.assertIsNone(self.returned_obj.getcode()) 196 197 def test_iter(self): 198 # Test iterator 199 # Don't need to count number of iterations since test would fail the 200 # instant it returned anything beyond the first line from the 201 # comparison. 202 # Use the iterator in the usual implicit way to test for ticket #4608. 203 for line in self.returned_obj: 204 self.assertEqual(line, self.text) 205 206 def test_relativelocalfile(self): 207 self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname) 208 209class ProxyTests(unittest.TestCase): 210 211 def setUp(self): 212 # Records changes to env vars 213 self.env = support.EnvironmentVarGuard() 214 # Delete all proxy related env vars 215 for k in list(os.environ): 216 if 'proxy' in k.lower(): 217 self.env.unset(k) 218 219 def tearDown(self): 220 # Restore all proxy related env vars 221 self.env.__exit__() 222 del self.env 223 224 def test_getproxies_environment_keep_no_proxies(self): 225 self.env.set('NO_PROXY', 'localhost') 226 proxies = urllib.request.getproxies_environment() 227 # getproxies_environment use lowered case truncated (no '_proxy') keys 228 self.assertEqual('localhost', proxies['no']) 229 # List of no_proxies with space. 230 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') 231 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com')) 232 self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888')) 233 self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234')) 234 235 def test_proxy_cgi_ignore(self): 236 try: 237 self.env.set('HTTP_PROXY', 'http://somewhere:3128') 238 proxies = urllib.request.getproxies_environment() 239 self.assertEqual('http://somewhere:3128', proxies['http']) 240 self.env.set('REQUEST_METHOD', 'GET') 241 proxies = urllib.request.getproxies_environment() 242 self.assertNotIn('http', proxies) 243 finally: 244 self.env.unset('REQUEST_METHOD') 245 self.env.unset('HTTP_PROXY') 246 247 def test_proxy_bypass_environment_host_match(self): 248 bypass = urllib.request.proxy_bypass_environment 249 self.env.set('NO_PROXY', 250 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') 251 self.assertTrue(bypass('localhost')) 252 self.assertTrue(bypass('LocalHost')) # MixedCase 253 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE 254 self.assertTrue(bypass('newdomain.com:1234')) 255 self.assertTrue(bypass('foo.d.o.t')) # issue 29142 256 self.assertTrue(bypass('anotherdomain.com:8888')) 257 self.assertTrue(bypass('www.newdomain.com:1234')) 258 self.assertFalse(bypass('prelocalhost')) 259 self.assertFalse(bypass('newdomain.com')) # no port 260 self.assertFalse(bypass('newdomain.com:1235')) # wrong port 261 262class ProxyTests_withOrderedEnv(unittest.TestCase): 263 264 def setUp(self): 265 # We need to test conditions, where variable order _is_ significant 266 self._saved_env = os.environ 267 # Monkey patch os.environ, start with empty fake environment 268 os.environ = collections.OrderedDict() 269 270 def tearDown(self): 271 os.environ = self._saved_env 272 273 def test_getproxies_environment_prefer_lowercase(self): 274 # Test lowercase preference with removal 275 os.environ['no_proxy'] = '' 276 os.environ['No_Proxy'] = 'localhost' 277 self.assertFalse(urllib.request.proxy_bypass_environment('localhost')) 278 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 279 os.environ['http_proxy'] = '' 280 os.environ['HTTP_PROXY'] = 'http://somewhere:3128' 281 proxies = urllib.request.getproxies_environment() 282 self.assertEqual({}, proxies) 283 # Test lowercase preference of proxy bypass and correct matching including ports 284 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' 285 os.environ['No_Proxy'] = 'xyz.com' 286 self.assertTrue(urllib.request.proxy_bypass_environment('localhost')) 287 self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678')) 288 self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234')) 289 self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy')) 290 self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary')) 291 # Test lowercase preference with replacement 292 os.environ['http_proxy'] = 'http://somewhere:3128' 293 os.environ['Http_Proxy'] = 'http://somewhereelse:3128' 294 proxies = urllib.request.getproxies_environment() 295 self.assertEqual('http://somewhere:3128', proxies['http']) 296 297class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): 298 """Test urlopen() opening a fake http connection.""" 299 300 def check_read(self, ver): 301 self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") 302 try: 303 fp = urlopen("http://python.org/") 304 self.assertEqual(fp.readline(), b"Hello!") 305 self.assertEqual(fp.readline(), b"") 306 self.assertEqual(fp.geturl(), 'http://python.org/') 307 self.assertEqual(fp.getcode(), 200) 308 finally: 309 self.unfakehttp() 310 311 def test_url_fragment(self): 312 # Issue #11703: geturl() omits fragments in the original URL. 313 url = 'http://docs.python.org/library/urllib.html#OK' 314 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 315 try: 316 fp = urllib.request.urlopen(url) 317 self.assertEqual(fp.geturl(), url) 318 finally: 319 self.unfakehttp() 320 321 def test_willclose(self): 322 self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") 323 try: 324 resp = urlopen("http://www.python.org") 325 self.assertTrue(resp.fp.will_close) 326 finally: 327 self.unfakehttp() 328 329 def test_read_0_9(self): 330 # "0.9" response accepted (but not "simple responses" without 331 # a status line) 332 self.check_read(b"0.9") 333 334 def test_read_1_0(self): 335 self.check_read(b"1.0") 336 337 def test_read_1_1(self): 338 self.check_read(b"1.1") 339 340 def test_read_bogus(self): 341 # urlopen() should raise OSError for many error codes. 342 self.fakehttp(b'''HTTP/1.1 401 Authentication Required 343Date: Wed, 02 Jan 2008 03:03:54 GMT 344Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 345Connection: close 346Content-Type: text/html; charset=iso-8859-1 347''') 348 try: 349 self.assertRaises(OSError, urlopen, "http://python.org/") 350 finally: 351 self.unfakehttp() 352 353 def test_invalid_redirect(self): 354 # urlopen() should raise OSError for many error codes. 355 self.fakehttp(b'''HTTP/1.1 302 Found 356Date: Wed, 02 Jan 2008 03:03:54 GMT 357Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 358Location: file://guidocomputer.athome.com:/python/license 359Connection: close 360Content-Type: text/html; charset=iso-8859-1 361''') 362 try: 363 msg = "Redirection to url 'file:" 364 with self.assertRaisesRegex(urllib.error.HTTPError, msg): 365 urlopen("http://python.org/") 366 finally: 367 self.unfakehttp() 368 369 def test_redirect_limit_independent(self): 370 # Ticket #12923: make sure independent requests each use their 371 # own retry limit. 372 for i in range(FancyURLopener().maxtries): 373 self.fakehttp(b'''HTTP/1.1 302 Found 374Location: file://guidocomputer.athome.com:/python/license 375Connection: close 376''') 377 try: 378 self.assertRaises(urllib.error.HTTPError, urlopen, 379 "http://something") 380 finally: 381 self.unfakehttp() 382 383 def test_empty_socket(self): 384 # urlopen() raises OSError if the underlying socket does not send any 385 # data. (#1680230) 386 self.fakehttp(b'') 387 try: 388 self.assertRaises(OSError, urlopen, "http://something") 389 finally: 390 self.unfakehttp() 391 392 def test_missing_localfile(self): 393 # Test for #10836 394 with self.assertRaises(urllib.error.URLError) as e: 395 urlopen('file://localhost/a/file/which/doesnot/exists.py') 396 self.assertTrue(e.exception.filename) 397 self.assertTrue(e.exception.reason) 398 399 def test_file_notexists(self): 400 fd, tmp_file = tempfile.mkstemp() 401 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') 402 try: 403 self.assertTrue(os.path.exists(tmp_file)) 404 with urlopen(tmp_fileurl) as fobj: 405 self.assertTrue(fobj) 406 finally: 407 os.close(fd) 408 os.unlink(tmp_file) 409 self.assertFalse(os.path.exists(tmp_file)) 410 with self.assertRaises(urllib.error.URLError): 411 urlopen(tmp_fileurl) 412 413 def test_ftp_nohost(self): 414 test_ftp_url = 'ftp:///path' 415 with self.assertRaises(urllib.error.URLError) as e: 416 urlopen(test_ftp_url) 417 self.assertFalse(e.exception.filename) 418 self.assertTrue(e.exception.reason) 419 420 def test_ftp_nonexisting(self): 421 with self.assertRaises(urllib.error.URLError) as e: 422 urlopen('ftp://localhost/a/file/which/doesnot/exists.py') 423 self.assertFalse(e.exception.filename) 424 self.assertTrue(e.exception.reason) 425 426 @patch.object(urllib.request, 'MAXFTPCACHE', 0) 427 def test_ftp_cache_pruning(self): 428 self.fakeftp() 429 try: 430 urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) 431 urlopen('ftp://localhost') 432 finally: 433 self.unfakeftp() 434 435 436 def test_userpass_inurl(self): 437 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 438 try: 439 fp = urlopen("http://user:pass@python.org/") 440 self.assertEqual(fp.readline(), b"Hello!") 441 self.assertEqual(fp.readline(), b"") 442 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') 443 self.assertEqual(fp.getcode(), 200) 444 finally: 445 self.unfakehttp() 446 447 def test_userpass_inurl_w_spaces(self): 448 self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") 449 try: 450 userpass = "a b:c d" 451 url = "http://{}@python.org/".format(userpass) 452 fakehttp_wrapper = http.client.HTTPConnection 453 authorization = ("Authorization: Basic %s\r\n" % 454 b64encode(userpass.encode("ASCII")).decode("ASCII")) 455 fp = urlopen(url) 456 # The authorization header must be in place 457 self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) 458 self.assertEqual(fp.readline(), b"Hello!") 459 self.assertEqual(fp.readline(), b"") 460 # the spaces are quoted in URL so no match 461 self.assertNotEqual(fp.geturl(), url) 462 self.assertEqual(fp.getcode(), 200) 463 finally: 464 self.unfakehttp() 465 466 def test_URLopener_deprecation(self): 467 with support.check_warnings(('',DeprecationWarning)): 468 urllib.request.URLopener() 469 470 @unittest.skipUnless(ssl, "ssl module required") 471 def test_cafile_and_context(self): 472 context = ssl.create_default_context() 473 with support.check_warnings(('', DeprecationWarning)): 474 with self.assertRaises(ValueError): 475 urllib.request.urlopen( 476 "https://localhost", cafile="/nonexistent/path", context=context 477 ) 478 479class urlopen_DataTests(unittest.TestCase): 480 """Test urlopen() opening a data URL.""" 481 482 def setUp(self): 483 # text containing URL special- and unicode-characters 484 self.text = "test data URLs :;,%=& \u00f6 \u00c4 " 485 # 2x1 pixel RGB PNG image with one black and one white pixel 486 self.image = ( 487 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00' 488 b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae' 489 b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00' 490 b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82') 491 492 self.text_url = ( 493 "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3" 494 "D%26%20%C3%B6%20%C3%84%20") 495 self.text_url_base64 = ( 496 "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs" 497 "sJT0mIPYgxCA%3D") 498 # base64 encoded data URL that contains ignorable spaces, 499 # such as "\n", " ", "%0A", and "%20". 500 self.image_url = ( 501 "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n" 502 "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 " 503 "vHgAAAABJRU5ErkJggg%3D%3D%0A%20") 504 505 self.text_url_resp = urllib.request.urlopen(self.text_url) 506 self.text_url_base64_resp = urllib.request.urlopen( 507 self.text_url_base64) 508 self.image_url_resp = urllib.request.urlopen(self.image_url) 509 510 def test_interface(self): 511 # Make sure object returned by urlopen() has the specified methods 512 for attr in ("read", "readline", "readlines", 513 "close", "info", "geturl", "getcode", "__iter__"): 514 self.assertTrue(hasattr(self.text_url_resp, attr), 515 "object returned by urlopen() lacks %s attribute" % 516 attr) 517 518 def test_info(self): 519 self.assertIsInstance(self.text_url_resp.info(), email.message.Message) 520 self.assertEqual(self.text_url_base64_resp.info().get_params(), 521 [('text/plain', ''), ('charset', 'ISO-8859-1')]) 522 self.assertEqual(self.image_url_resp.info()['content-length'], 523 str(len(self.image))) 524 self.assertEqual(urllib.request.urlopen("data:,").info().get_params(), 525 [('text/plain', ''), ('charset', 'US-ASCII')]) 526 527 def test_geturl(self): 528 self.assertEqual(self.text_url_resp.geturl(), self.text_url) 529 self.assertEqual(self.text_url_base64_resp.geturl(), 530 self.text_url_base64) 531 self.assertEqual(self.image_url_resp.geturl(), self.image_url) 532 533 def test_read_text(self): 534 self.assertEqual(self.text_url_resp.read().decode( 535 dict(self.text_url_resp.info().get_params())['charset']), self.text) 536 537 def test_read_text_base64(self): 538 self.assertEqual(self.text_url_base64_resp.read().decode( 539 dict(self.text_url_base64_resp.info().get_params())['charset']), 540 self.text) 541 542 def test_read_image(self): 543 self.assertEqual(self.image_url_resp.read(), self.image) 544 545 def test_missing_comma(self): 546 self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain') 547 548 def test_invalid_base64_data(self): 549 # missing padding character 550 self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') 551 552class urlretrieve_FileTests(unittest.TestCase): 553 """Test urllib.urlretrieve() on local files""" 554 555 def setUp(self): 556 # Create a list of temporary files. Each item in the list is a file 557 # name (absolute path or relative to the current working directory). 558 # All files in this list will be deleted in the tearDown method. Note, 559 # this only helps to makes sure temporary files get deleted, but it 560 # does nothing about trying to close files that may still be open. It 561 # is the responsibility of the developer to properly close files even 562 # when exceptional conditions occur. 563 self.tempFiles = [] 564 565 # Create a temporary file. 566 self.registerFileForCleanUp(support.TESTFN) 567 self.text = b'testing urllib.urlretrieve' 568 try: 569 FILE = open(support.TESTFN, 'wb') 570 FILE.write(self.text) 571 FILE.close() 572 finally: 573 try: FILE.close() 574 except: pass 575 576 def tearDown(self): 577 # Delete the temporary files. 578 for each in self.tempFiles: 579 try: os.remove(each) 580 except: pass 581 582 def constructLocalFileUrl(self, filePath): 583 filePath = os.path.abspath(filePath) 584 try: 585 filePath.encode("utf-8") 586 except UnicodeEncodeError: 587 raise unittest.SkipTest("filePath is not encodable to utf8") 588 return "file://%s" % urllib.request.pathname2url(filePath) 589 590 def createNewTempFile(self, data=b""): 591 """Creates a new temporary file containing the specified data, 592 registers the file for deletion during the test fixture tear down, and 593 returns the absolute path of the file.""" 594 595 newFd, newFilePath = tempfile.mkstemp() 596 try: 597 self.registerFileForCleanUp(newFilePath) 598 newFile = os.fdopen(newFd, "wb") 599 newFile.write(data) 600 newFile.close() 601 finally: 602 try: newFile.close() 603 except: pass 604 return newFilePath 605 606 def registerFileForCleanUp(self, fileName): 607 self.tempFiles.append(fileName) 608 609 def test_basic(self): 610 # Make sure that a local file just gets its own location returned and 611 # a headers value is returned. 612 result = urllib.request.urlretrieve("file:%s" % support.TESTFN) 613 self.assertEqual(result[0], support.TESTFN) 614 self.assertIsInstance(result[1], email.message.Message, 615 "did not get an email.message.Message instance " 616 "as second returned value") 617 618 def test_copy(self): 619 # Test that setting the filename argument works. 620 second_temp = "%s.2" % support.TESTFN 621 self.registerFileForCleanUp(second_temp) 622 result = urllib.request.urlretrieve(self.constructLocalFileUrl( 623 support.TESTFN), second_temp) 624 self.assertEqual(second_temp, result[0]) 625 self.assertTrue(os.path.exists(second_temp), "copy of the file was not " 626 "made") 627 FILE = open(second_temp, 'rb') 628 try: 629 text = FILE.read() 630 FILE.close() 631 finally: 632 try: FILE.close() 633 except: pass 634 self.assertEqual(self.text, text) 635 636 def test_reporthook(self): 637 # Make sure that the reporthook works. 638 def hooktester(block_count, block_read_size, file_size, count_holder=[0]): 639 self.assertIsInstance(block_count, int) 640 self.assertIsInstance(block_read_size, int) 641 self.assertIsInstance(file_size, int) 642 self.assertEqual(block_count, count_holder[0]) 643 count_holder[0] = count_holder[0] + 1 644 second_temp = "%s.2" % support.TESTFN 645 self.registerFileForCleanUp(second_temp) 646 urllib.request.urlretrieve( 647 self.constructLocalFileUrl(support.TESTFN), 648 second_temp, hooktester) 649 650 def test_reporthook_0_bytes(self): 651 # Test on zero length file. Should call reporthook only 1 time. 652 report = [] 653 def hooktester(block_count, block_read_size, file_size, _report=report): 654 _report.append((block_count, block_read_size, file_size)) 655 srcFileName = self.createNewTempFile() 656 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 657 support.TESTFN, hooktester) 658 self.assertEqual(len(report), 1) 659 self.assertEqual(report[0][2], 0) 660 661 def test_reporthook_5_bytes(self): 662 # Test on 5 byte file. Should call reporthook only 2 times (once when 663 # the "network connection" is established and once when the block is 664 # read). 665 report = [] 666 def hooktester(block_count, block_read_size, file_size, _report=report): 667 _report.append((block_count, block_read_size, file_size)) 668 srcFileName = self.createNewTempFile(b"x" * 5) 669 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 670 support.TESTFN, hooktester) 671 self.assertEqual(len(report), 2) 672 self.assertEqual(report[0][2], 5) 673 self.assertEqual(report[1][2], 5) 674 675 def test_reporthook_8193_bytes(self): 676 # Test on 8193 byte file. Should call reporthook only 3 times (once 677 # when the "network connection" is established, once for the next 8192 678 # bytes, and once for the last byte). 679 report = [] 680 def hooktester(block_count, block_read_size, file_size, _report=report): 681 _report.append((block_count, block_read_size, file_size)) 682 srcFileName = self.createNewTempFile(b"x" * 8193) 683 urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName), 684 support.TESTFN, hooktester) 685 self.assertEqual(len(report), 3) 686 self.assertEqual(report[0][2], 8193) 687 self.assertEqual(report[0][1], 8192) 688 self.assertEqual(report[1][1], 8192) 689 self.assertEqual(report[2][1], 8192) 690 691 692class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): 693 """Test urllib.urlretrieve() using fake http connections""" 694 695 def test_short_content_raises_ContentTooShortError(self): 696 self.fakehttp(b'''HTTP/1.1 200 OK 697Date: Wed, 02 Jan 2008 03:03:54 GMT 698Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 699Connection: close 700Content-Length: 100 701Content-Type: text/html; charset=iso-8859-1 702 703FF 704''') 705 706 def _reporthook(par1, par2, par3): 707 pass 708 709 with self.assertRaises(urllib.error.ContentTooShortError): 710 try: 711 urllib.request.urlretrieve('http://example.com/', 712 reporthook=_reporthook) 713 finally: 714 self.unfakehttp() 715 716 def test_short_content_raises_ContentTooShortError_without_reporthook(self): 717 self.fakehttp(b'''HTTP/1.1 200 OK 718Date: Wed, 02 Jan 2008 03:03:54 GMT 719Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 720Connection: close 721Content-Length: 100 722Content-Type: text/html; charset=iso-8859-1 723 724FF 725''') 726 with self.assertRaises(urllib.error.ContentTooShortError): 727 try: 728 urllib.request.urlretrieve('http://example.com/') 729 finally: 730 self.unfakehttp() 731 732 733class QuotingTests(unittest.TestCase): 734 r"""Tests for urllib.quote() and urllib.quote_plus() 735 736 According to RFC 2396 (Uniform Resource Identifiers), to escape a 737 character you write it as '%' + <2 character US-ASCII hex value>. 738 The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a 739 character properly. Case does not matter on the hex letters. 740 741 The various character sets specified are: 742 743 Reserved characters : ";/?:@&=+$," 744 Have special meaning in URIs and must be escaped if not being used for 745 their special meaning 746 Data characters : letters, digits, and "-_.!~*'()" 747 Unreserved and do not need to be escaped; can be, though, if desired 748 Control characters : 0x00 - 0x1F, 0x7F 749 Have no use in URIs so must be escaped 750 space : 0x20 751 Must be escaped 752 Delimiters : '<>#%"' 753 Must be escaped 754 Unwise : "{}|\^[]`" 755 Must be escaped 756 757 """ 758 759 def test_never_quote(self): 760 # Make sure quote() does not quote letters, digits, and "_,.-" 761 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", 762 "abcdefghijklmnopqrstuvwxyz", 763 "0123456789", 764 "_.-"]) 765 result = urllib.parse.quote(do_not_quote) 766 self.assertEqual(do_not_quote, result, 767 "using quote(): %r != %r" % (do_not_quote, result)) 768 result = urllib.parse.quote_plus(do_not_quote) 769 self.assertEqual(do_not_quote, result, 770 "using quote_plus(): %r != %r" % (do_not_quote, result)) 771 772 def test_default_safe(self): 773 # Test '/' is default value for 'safe' parameter 774 self.assertEqual(urllib.parse.quote.__defaults__[0], '/') 775 776 def test_safe(self): 777 # Test setting 'safe' parameter does what it should do 778 quote_by_default = "<>" 779 result = urllib.parse.quote(quote_by_default, safe=quote_by_default) 780 self.assertEqual(quote_by_default, result, 781 "using quote(): %r != %r" % (quote_by_default, result)) 782 result = urllib.parse.quote_plus(quote_by_default, 783 safe=quote_by_default) 784 self.assertEqual(quote_by_default, result, 785 "using quote_plus(): %r != %r" % 786 (quote_by_default, result)) 787 # Safe expressed as bytes rather than str 788 result = urllib.parse.quote(quote_by_default, safe=b"<>") 789 self.assertEqual(quote_by_default, result, 790 "using quote(): %r != %r" % (quote_by_default, result)) 791 # "Safe" non-ASCII characters should have no effect 792 # (Since URIs are not allowed to have non-ASCII characters) 793 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc") 794 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 795 self.assertEqual(expect, result, 796 "using quote(): %r != %r" % 797 (expect, result)) 798 # Same as above, but using a bytes rather than str 799 result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc") 800 expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="") 801 self.assertEqual(expect, result, 802 "using quote(): %r != %r" % 803 (expect, result)) 804 805 def test_default_quoting(self): 806 # Make sure all characters that should be quoted are by default sans 807 # space (separate test for that). 808 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F 809 should_quote.append(r'<>#%"{}|\^[]`') 810 should_quote.append(chr(127)) # For 0x7F 811 should_quote = ''.join(should_quote) 812 for char in should_quote: 813 result = urllib.parse.quote(char) 814 self.assertEqual(hexescape(char), result, 815 "using quote(): " 816 "%s should be escaped to %s, not %s" % 817 (char, hexescape(char), result)) 818 result = urllib.parse.quote_plus(char) 819 self.assertEqual(hexescape(char), result, 820 "using quote_plus(): " 821 "%s should be escapes to %s, not %s" % 822 (char, hexescape(char), result)) 823 del should_quote 824 partial_quote = "ab[]cd" 825 expected = "ab%5B%5Dcd" 826 result = urllib.parse.quote(partial_quote) 827 self.assertEqual(expected, result, 828 "using quote(): %r != %r" % (expected, result)) 829 result = urllib.parse.quote_plus(partial_quote) 830 self.assertEqual(expected, result, 831 "using quote_plus(): %r != %r" % (expected, result)) 832 833 def test_quoting_space(self): 834 # Make sure quote() and quote_plus() handle spaces as specified in 835 # their unique way 836 result = urllib.parse.quote(' ') 837 self.assertEqual(result, hexescape(' '), 838 "using quote(): %r != %r" % (result, hexescape(' '))) 839 result = urllib.parse.quote_plus(' ') 840 self.assertEqual(result, '+', 841 "using quote_plus(): %r != +" % result) 842 given = "a b cd e f" 843 expect = given.replace(' ', hexescape(' ')) 844 result = urllib.parse.quote(given) 845 self.assertEqual(expect, result, 846 "using quote(): %r != %r" % (expect, result)) 847 expect = given.replace(' ', '+') 848 result = urllib.parse.quote_plus(given) 849 self.assertEqual(expect, result, 850 "using quote_plus(): %r != %r" % (expect, result)) 851 852 def test_quoting_plus(self): 853 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'), 854 'alpha%2Bbeta+gamma') 855 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'), 856 'alpha+beta+gamma') 857 # Test with bytes 858 self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'), 859 'alpha%2Bbeta+gamma') 860 # Test with safe bytes 861 self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'), 862 'alpha+beta+gamma') 863 864 def test_quote_bytes(self): 865 # Bytes should quote directly to percent-encoded values 866 given = b"\xa2\xd8ab\xff" 867 expect = "%A2%D8ab%FF" 868 result = urllib.parse.quote(given) 869 self.assertEqual(expect, result, 870 "using quote(): %r != %r" % (expect, result)) 871 # Encoding argument should raise type error on bytes input 872 self.assertRaises(TypeError, urllib.parse.quote, given, 873 encoding="latin-1") 874 # quote_from_bytes should work the same 875 result = urllib.parse.quote_from_bytes(given) 876 self.assertEqual(expect, result, 877 "using quote_from_bytes(): %r != %r" 878 % (expect, result)) 879 880 def test_quote_with_unicode(self): 881 # Characters in Latin-1 range, encoded by default in UTF-8 882 given = "\xa2\xd8ab\xff" 883 expect = "%C2%A2%C3%98ab%C3%BF" 884 result = urllib.parse.quote(given) 885 self.assertEqual(expect, result, 886 "using quote(): %r != %r" % (expect, result)) 887 # Characters in Latin-1 range, encoded by with None (default) 888 result = urllib.parse.quote(given, encoding=None, errors=None) 889 self.assertEqual(expect, result, 890 "using quote(): %r != %r" % (expect, result)) 891 # Characters in Latin-1 range, encoded with Latin-1 892 given = "\xa2\xd8ab\xff" 893 expect = "%A2%D8ab%FF" 894 result = urllib.parse.quote(given, encoding="latin-1") 895 self.assertEqual(expect, result, 896 "using quote(): %r != %r" % (expect, result)) 897 # Characters in BMP, encoded by default in UTF-8 898 given = "\u6f22\u5b57" # "Kanji" 899 expect = "%E6%BC%A2%E5%AD%97" 900 result = urllib.parse.quote(given) 901 self.assertEqual(expect, result, 902 "using quote(): %r != %r" % (expect, result)) 903 # Characters in BMP, encoded with Latin-1 904 given = "\u6f22\u5b57" 905 self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given, 906 encoding="latin-1") 907 # Characters in BMP, encoded with Latin-1, with replace error handling 908 given = "\u6f22\u5b57" 909 expect = "%3F%3F" # "??" 910 result = urllib.parse.quote(given, encoding="latin-1", 911 errors="replace") 912 self.assertEqual(expect, result, 913 "using quote(): %r != %r" % (expect, result)) 914 # Characters in BMP, Latin-1, with xmlcharref error handling 915 given = "\u6f22\u5b57" 916 expect = "%26%2328450%3B%26%2323383%3B" # "漢字" 917 result = urllib.parse.quote(given, encoding="latin-1", 918 errors="xmlcharrefreplace") 919 self.assertEqual(expect, result, 920 "using quote(): %r != %r" % (expect, result)) 921 922 def test_quote_plus_with_unicode(self): 923 # Encoding (latin-1) test for quote_plus 924 given = "\xa2\xd8 \xff" 925 expect = "%A2%D8+%FF" 926 result = urllib.parse.quote_plus(given, encoding="latin-1") 927 self.assertEqual(expect, result, 928 "using quote_plus(): %r != %r" % (expect, result)) 929 # Errors test for quote_plus 930 given = "ab\u6f22\u5b57 cd" 931 expect = "ab%3F%3F+cd" 932 result = urllib.parse.quote_plus(given, encoding="latin-1", 933 errors="replace") 934 self.assertEqual(expect, result, 935 "using quote_plus(): %r != %r" % (expect, result)) 936 937 938class UnquotingTests(unittest.TestCase): 939 """Tests for unquote() and unquote_plus() 940 941 See the doc string for quoting_Tests for details on quoting and such. 942 943 """ 944 945 def test_unquoting(self): 946 # Make sure unquoting of all ASCII values works 947 escape_list = [] 948 for num in range(128): 949 given = hexescape(chr(num)) 950 expect = chr(num) 951 result = urllib.parse.unquote(given) 952 self.assertEqual(expect, result, 953 "using unquote(): %r != %r" % (expect, result)) 954 result = urllib.parse.unquote_plus(given) 955 self.assertEqual(expect, result, 956 "using unquote_plus(): %r != %r" % 957 (expect, result)) 958 escape_list.append(given) 959 escape_string = ''.join(escape_list) 960 del escape_list 961 result = urllib.parse.unquote(escape_string) 962 self.assertEqual(result.count('%'), 1, 963 "using unquote(): not all characters escaped: " 964 "%s" % result) 965 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None) 966 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ()) 967 with support.check_warnings(('', BytesWarning), quiet=True): 968 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'') 969 970 def test_unquoting_badpercent(self): 971 # Test unquoting on bad percent-escapes 972 given = '%xab' 973 expect = given 974 result = urllib.parse.unquote(given) 975 self.assertEqual(expect, result, "using unquote(): %r != %r" 976 % (expect, result)) 977 given = '%x' 978 expect = given 979 result = urllib.parse.unquote(given) 980 self.assertEqual(expect, result, "using unquote(): %r != %r" 981 % (expect, result)) 982 given = '%' 983 expect = given 984 result = urllib.parse.unquote(given) 985 self.assertEqual(expect, result, "using unquote(): %r != %r" 986 % (expect, result)) 987 # unquote_to_bytes 988 given = '%xab' 989 expect = bytes(given, 'ascii') 990 result = urllib.parse.unquote_to_bytes(given) 991 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 992 % (expect, result)) 993 given = '%x' 994 expect = bytes(given, 'ascii') 995 result = urllib.parse.unquote_to_bytes(given) 996 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 997 % (expect, result)) 998 given = '%' 999 expect = bytes(given, 'ascii') 1000 result = urllib.parse.unquote_to_bytes(given) 1001 self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r" 1002 % (expect, result)) 1003 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None) 1004 self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ()) 1005 1006 def test_unquoting_mixed_case(self): 1007 # Test unquoting on mixed-case hex digits in the percent-escapes 1008 given = '%Ab%eA' 1009 expect = b'\xab\xea' 1010 result = urllib.parse.unquote_to_bytes(given) 1011 self.assertEqual(expect, result, 1012 "using unquote_to_bytes(): %r != %r" 1013 % (expect, result)) 1014 1015 def test_unquoting_parts(self): 1016 # Make sure unquoting works when have non-quoted characters 1017 # interspersed 1018 given = 'ab%sd' % hexescape('c') 1019 expect = "abcd" 1020 result = urllib.parse.unquote(given) 1021 self.assertEqual(expect, result, 1022 "using quote(): %r != %r" % (expect, result)) 1023 result = urllib.parse.unquote_plus(given) 1024 self.assertEqual(expect, result, 1025 "using unquote_plus(): %r != %r" % (expect, result)) 1026 1027 def test_unquoting_plus(self): 1028 # Test difference between unquote() and unquote_plus() 1029 given = "are+there+spaces..." 1030 expect = given 1031 result = urllib.parse.unquote(given) 1032 self.assertEqual(expect, result, 1033 "using unquote(): %r != %r" % (expect, result)) 1034 expect = given.replace('+', ' ') 1035 result = urllib.parse.unquote_plus(given) 1036 self.assertEqual(expect, result, 1037 "using unquote_plus(): %r != %r" % (expect, result)) 1038 1039 def test_unquote_to_bytes(self): 1040 given = 'br%C3%BCckner_sapporo_20050930.doc' 1041 expect = b'br\xc3\xbcckner_sapporo_20050930.doc' 1042 result = urllib.parse.unquote_to_bytes(given) 1043 self.assertEqual(expect, result, 1044 "using unquote_to_bytes(): %r != %r" 1045 % (expect, result)) 1046 # Test on a string with unescaped non-ASCII characters 1047 # (Technically an invalid URI; expect those characters to be UTF-8 1048 # encoded). 1049 result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC") 1050 expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc" 1051 self.assertEqual(expect, result, 1052 "using unquote_to_bytes(): %r != %r" 1053 % (expect, result)) 1054 # Test with a bytes as input 1055 given = b'%A2%D8ab%FF' 1056 expect = b'\xa2\xd8ab\xff' 1057 result = urllib.parse.unquote_to_bytes(given) 1058 self.assertEqual(expect, result, 1059 "using unquote_to_bytes(): %r != %r" 1060 % (expect, result)) 1061 # Test with a bytes as input, with unescaped non-ASCII bytes 1062 # (Technically an invalid URI; expect those bytes to be preserved) 1063 given = b'%A2\xd8ab%FF' 1064 expect = b'\xa2\xd8ab\xff' 1065 result = urllib.parse.unquote_to_bytes(given) 1066 self.assertEqual(expect, result, 1067 "using unquote_to_bytes(): %r != %r" 1068 % (expect, result)) 1069 1070 def test_unquote_with_unicode(self): 1071 # Characters in the Latin-1 range, encoded with UTF-8 1072 given = 'br%C3%BCckner_sapporo_20050930.doc' 1073 expect = 'br\u00fcckner_sapporo_20050930.doc' 1074 result = urllib.parse.unquote(given) 1075 self.assertEqual(expect, result, 1076 "using unquote(): %r != %r" % (expect, result)) 1077 # Characters in the Latin-1 range, encoded with None (default) 1078 result = urllib.parse.unquote(given, encoding=None, errors=None) 1079 self.assertEqual(expect, result, 1080 "using unquote(): %r != %r" % (expect, result)) 1081 1082 # Characters in the Latin-1 range, encoded with Latin-1 1083 result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc', 1084 encoding="latin-1") 1085 expect = 'br\u00fcckner_sapporo_20050930.doc' 1086 self.assertEqual(expect, result, 1087 "using unquote(): %r != %r" % (expect, result)) 1088 1089 # Characters in BMP, encoded with UTF-8 1090 given = "%E6%BC%A2%E5%AD%97" 1091 expect = "\u6f22\u5b57" # "Kanji" 1092 result = urllib.parse.unquote(given) 1093 self.assertEqual(expect, result, 1094 "using unquote(): %r != %r" % (expect, result)) 1095 1096 # Decode with UTF-8, invalid sequence 1097 given = "%F3%B1" 1098 expect = "\ufffd" # Replacement character 1099 result = urllib.parse.unquote(given) 1100 self.assertEqual(expect, result, 1101 "using unquote(): %r != %r" % (expect, result)) 1102 1103 # Decode with UTF-8, invalid sequence, replace errors 1104 result = urllib.parse.unquote(given, errors="replace") 1105 self.assertEqual(expect, result, 1106 "using unquote(): %r != %r" % (expect, result)) 1107 1108 # Decode with UTF-8, invalid sequence, ignoring errors 1109 given = "%F3%B1" 1110 expect = "" 1111 result = urllib.parse.unquote(given, errors="ignore") 1112 self.assertEqual(expect, result, 1113 "using unquote(): %r != %r" % (expect, result)) 1114 1115 # A mix of non-ASCII and percent-encoded characters, UTF-8 1116 result = urllib.parse.unquote("\u6f22%C3%BC") 1117 expect = '\u6f22\u00fc' 1118 self.assertEqual(expect, result, 1119 "using unquote(): %r != %r" % (expect, result)) 1120 1121 # A mix of non-ASCII and percent-encoded characters, Latin-1 1122 # (Note, the string contains non-Latin-1-representable characters) 1123 result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1") 1124 expect = '\u6f22\u00fc' 1125 self.assertEqual(expect, result, 1126 "using unquote(): %r != %r" % (expect, result)) 1127 1128class urlencode_Tests(unittest.TestCase): 1129 """Tests for urlencode()""" 1130 1131 def help_inputtype(self, given, test_type): 1132 """Helper method for testing different input types. 1133 1134 'given' must lead to only the pairs: 1135 * 1st, 1 1136 * 2nd, 2 1137 * 3rd, 3 1138 1139 Test cannot assume anything about order. Docs make no guarantee and 1140 have possible dictionary input. 1141 1142 """ 1143 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] 1144 result = urllib.parse.urlencode(given) 1145 for expected in expect_somewhere: 1146 self.assertIn(expected, result, 1147 "testing %s: %s not found in %s" % 1148 (test_type, expected, result)) 1149 self.assertEqual(result.count('&'), 2, 1150 "testing %s: expected 2 '&'s; got %s" % 1151 (test_type, result.count('&'))) 1152 amp_location = result.index('&') 1153 on_amp_left = result[amp_location - 1] 1154 on_amp_right = result[amp_location + 1] 1155 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), 1156 "testing %s: '&' not located in proper place in %s" % 1157 (test_type, result)) 1158 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps 1159 "testing %s: " 1160 "unexpected number of characters: %s != %s" % 1161 (test_type, len(result), (5 * 3) + 2)) 1162 1163 def test_using_mapping(self): 1164 # Test passing in a mapping object as an argument. 1165 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, 1166 "using dict as input type") 1167 1168 def test_using_sequence(self): 1169 # Test passing in a sequence of two-item sequences as an argument. 1170 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], 1171 "using sequence of two-item tuples as input") 1172 1173 def test_quoting(self): 1174 # Make sure keys and values are quoted using quote_plus() 1175 given = {"&":"="} 1176 expect = "%s=%s" % (hexescape('&'), hexescape('=')) 1177 result = urllib.parse.urlencode(given) 1178 self.assertEqual(expect, result) 1179 given = {"key name":"A bunch of pluses"} 1180 expect = "key+name=A+bunch+of+pluses" 1181 result = urllib.parse.urlencode(given) 1182 self.assertEqual(expect, result) 1183 1184 def test_doseq(self): 1185 # Test that passing True for 'doseq' parameter works correctly 1186 given = {'sequence':['1', '2', '3']} 1187 expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3'])) 1188 result = urllib.parse.urlencode(given) 1189 self.assertEqual(expect, result) 1190 result = urllib.parse.urlencode(given, True) 1191 for value in given["sequence"]: 1192 expect = "sequence=%s" % value 1193 self.assertIn(expect, result) 1194 self.assertEqual(result.count('&'), 2, 1195 "Expected 2 '&'s, got %s" % result.count('&')) 1196 1197 def test_empty_sequence(self): 1198 self.assertEqual("", urllib.parse.urlencode({})) 1199 self.assertEqual("", urllib.parse.urlencode([])) 1200 1201 def test_nonstring_values(self): 1202 self.assertEqual("a=1", urllib.parse.urlencode({"a": 1})) 1203 self.assertEqual("a=None", urllib.parse.urlencode({"a": None})) 1204 1205 def test_nonstring_seq_values(self): 1206 self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True)) 1207 self.assertEqual("a=None&a=a", 1208 urllib.parse.urlencode({"a": [None, "a"]}, True)) 1209 data = collections.OrderedDict([("a", 1), ("b", 1)]) 1210 self.assertEqual("a=a&a=b", 1211 urllib.parse.urlencode({"a": data}, True)) 1212 1213 def test_urlencode_encoding(self): 1214 # ASCII encoding. Expect %3F with errors="replace' 1215 given = (('\u00a0', '\u00c1'),) 1216 expect = '%3F=%3F' 1217 result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace") 1218 self.assertEqual(expect, result) 1219 1220 # Default is UTF-8 encoding. 1221 given = (('\u00a0', '\u00c1'),) 1222 expect = '%C2%A0=%C3%81' 1223 result = urllib.parse.urlencode(given) 1224 self.assertEqual(expect, result) 1225 1226 # Latin-1 encoding. 1227 given = (('\u00a0', '\u00c1'),) 1228 expect = '%A0=%C1' 1229 result = urllib.parse.urlencode(given, encoding="latin-1") 1230 self.assertEqual(expect, result) 1231 1232 def test_urlencode_encoding_doseq(self): 1233 # ASCII Encoding. Expect %3F with errors="replace' 1234 given = (('\u00a0', '\u00c1'),) 1235 expect = '%3F=%3F' 1236 result = urllib.parse.urlencode(given, doseq=True, 1237 encoding="ASCII", errors="replace") 1238 self.assertEqual(expect, result) 1239 1240 # ASCII Encoding. On a sequence of values. 1241 given = (("\u00a0", (1, "\u00c1")),) 1242 expect = '%3F=1&%3F=%3F' 1243 result = urllib.parse.urlencode(given, True, 1244 encoding="ASCII", errors="replace") 1245 self.assertEqual(expect, result) 1246 1247 # Utf-8 1248 given = (("\u00a0", "\u00c1"),) 1249 expect = '%C2%A0=%C3%81' 1250 result = urllib.parse.urlencode(given, True) 1251 self.assertEqual(expect, result) 1252 1253 given = (("\u00a0", (42, "\u00c1")),) 1254 expect = '%C2%A0=42&%C2%A0=%C3%81' 1255 result = urllib.parse.urlencode(given, True) 1256 self.assertEqual(expect, result) 1257 1258 # latin-1 1259 given = (("\u00a0", "\u00c1"),) 1260 expect = '%A0=%C1' 1261 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1262 self.assertEqual(expect, result) 1263 1264 given = (("\u00a0", (42, "\u00c1")),) 1265 expect = '%A0=42&%A0=%C1' 1266 result = urllib.parse.urlencode(given, True, encoding="latin-1") 1267 self.assertEqual(expect, result) 1268 1269 def test_urlencode_bytes(self): 1270 given = ((b'\xa0\x24', b'\xc1\x24'),) 1271 expect = '%A0%24=%C1%24' 1272 result = urllib.parse.urlencode(given) 1273 self.assertEqual(expect, result) 1274 result = urllib.parse.urlencode(given, True) 1275 self.assertEqual(expect, result) 1276 1277 # Sequence of values 1278 given = ((b'\xa0\x24', (42, b'\xc1\x24')),) 1279 expect = '%A0%24=42&%A0%24=%C1%24' 1280 result = urllib.parse.urlencode(given, True) 1281 self.assertEqual(expect, result) 1282 1283 def test_urlencode_encoding_safe_parameter(self): 1284 1285 # Send '$' (\x24) as safe character 1286 # Default utf-8 encoding 1287 1288 given = ((b'\xa0\x24', b'\xc1\x24'),) 1289 result = urllib.parse.urlencode(given, safe=":$") 1290 expect = '%A0$=%C1$' 1291 self.assertEqual(expect, result) 1292 1293 given = ((b'\xa0\x24', b'\xc1\x24'),) 1294 result = urllib.parse.urlencode(given, doseq=True, safe=":$") 1295 expect = '%A0$=%C1$' 1296 self.assertEqual(expect, result) 1297 1298 # Safe parameter in sequence 1299 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1300 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1301 result = urllib.parse.urlencode(given, True, safe=":$") 1302 self.assertEqual(expect, result) 1303 1304 # Test all above in latin-1 encoding 1305 1306 given = ((b'\xa0\x24', b'\xc1\x24'),) 1307 result = urllib.parse.urlencode(given, safe=":$", 1308 encoding="latin-1") 1309 expect = '%A0$=%C1$' 1310 self.assertEqual(expect, result) 1311 1312 given = ((b'\xa0\x24', b'\xc1\x24'),) 1313 expect = '%A0$=%C1$' 1314 result = urllib.parse.urlencode(given, doseq=True, safe=":$", 1315 encoding="latin-1") 1316 1317 given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),) 1318 expect = '%A0$=%C1$&%A0$=13&%A0$=42' 1319 result = urllib.parse.urlencode(given, True, safe=":$", 1320 encoding="latin-1") 1321 self.assertEqual(expect, result) 1322 1323class Pathname_Tests(unittest.TestCase): 1324 """Test pathname2url() and url2pathname()""" 1325 1326 def test_basic(self): 1327 # Make sure simple tests pass 1328 expected_path = os.path.join("parts", "of", "a", "path") 1329 expected_url = "parts/of/a/path" 1330 result = urllib.request.pathname2url(expected_path) 1331 self.assertEqual(expected_url, result, 1332 "pathname2url() failed; %s != %s" % 1333 (result, expected_url)) 1334 result = urllib.request.url2pathname(expected_url) 1335 self.assertEqual(expected_path, result, 1336 "url2pathame() failed; %s != %s" % 1337 (result, expected_path)) 1338 1339 def test_quoting(self): 1340 # Test automatic quoting and unquoting works for pathnam2url() and 1341 # url2pathname() respectively 1342 given = os.path.join("needs", "quot=ing", "here") 1343 expect = "needs/%s/here" % urllib.parse.quote("quot=ing") 1344 result = urllib.request.pathname2url(given) 1345 self.assertEqual(expect, result, 1346 "pathname2url() failed; %s != %s" % 1347 (expect, result)) 1348 expect = given 1349 result = urllib.request.url2pathname(result) 1350 self.assertEqual(expect, result, 1351 "url2pathname() failed; %s != %s" % 1352 (expect, result)) 1353 given = os.path.join("make sure", "using_quote") 1354 expect = "%s/using_quote" % urllib.parse.quote("make sure") 1355 result = urllib.request.pathname2url(given) 1356 self.assertEqual(expect, result, 1357 "pathname2url() failed; %s != %s" % 1358 (expect, result)) 1359 given = "make+sure/using_unquote" 1360 expect = os.path.join("make+sure", "using_unquote") 1361 result = urllib.request.url2pathname(given) 1362 self.assertEqual(expect, result, 1363 "url2pathname() failed; %s != %s" % 1364 (expect, result)) 1365 1366 @unittest.skipUnless(sys.platform == 'win32', 1367 'test specific to the urllib.url2path function.') 1368 def test_ntpath(self): 1369 given = ('/C:/', '///C:/', '/C|//') 1370 expect = 'C:\\' 1371 for url in given: 1372 result = urllib.request.url2pathname(url) 1373 self.assertEqual(expect, result, 1374 'urllib.request..url2pathname() failed; %s != %s' % 1375 (expect, result)) 1376 given = '///C|/path' 1377 expect = 'C:\\path' 1378 result = urllib.request.url2pathname(given) 1379 self.assertEqual(expect, result, 1380 'urllib.request.url2pathname() failed; %s != %s' % 1381 (expect, result)) 1382 1383class Utility_Tests(unittest.TestCase): 1384 """Testcase to test the various utility functions in the urllib.""" 1385 1386 def test_thishost(self): 1387 """Test the urllib.request.thishost utility function returns a tuple""" 1388 self.assertIsInstance(urllib.request.thishost(), tuple) 1389 1390 1391class URLopener_Tests(unittest.TestCase): 1392 """Testcase to test the open method of URLopener class.""" 1393 1394 def test_quoted_open(self): 1395 class DummyURLopener(urllib.request.URLopener): 1396 def open_spam(self, url): 1397 return url 1398 with support.check_warnings( 1399 ('DummyURLopener style of invoking requests is deprecated.', 1400 DeprecationWarning)): 1401 self.assertEqual(DummyURLopener().open( 1402 'spam://example/ /'),'//example/%20/') 1403 1404 # test the safe characters are not quoted by urlopen 1405 self.assertEqual(DummyURLopener().open( 1406 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), 1407 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") 1408 1409# Just commented them out. 1410# Can't really tell why keep failing in windows and sparc. 1411# Everywhere else they work ok, but on those machines, sometimes 1412# fail in one of the tests, sometimes in other. I have a linux, and 1413# the tests go ok. 1414# If anybody has one of the problematic environments, please help! 1415# . Facundo 1416# 1417# def server(evt): 1418# import socket, time 1419# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1420# serv.settimeout(3) 1421# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 1422# serv.bind(("", 9093)) 1423# serv.listen() 1424# try: 1425# conn, addr = serv.accept() 1426# conn.send("1 Hola mundo\n") 1427# cantdata = 0 1428# while cantdata < 13: 1429# data = conn.recv(13-cantdata) 1430# cantdata += len(data) 1431# time.sleep(.3) 1432# conn.send("2 No more lines\n") 1433# conn.close() 1434# except socket.timeout: 1435# pass 1436# finally: 1437# serv.close() 1438# evt.set() 1439# 1440# class FTPWrapperTests(unittest.TestCase): 1441# 1442# def setUp(self): 1443# import ftplib, time, threading 1444# ftplib.FTP.port = 9093 1445# self.evt = threading.Event() 1446# threading.Thread(target=server, args=(self.evt,)).start() 1447# time.sleep(.1) 1448# 1449# def tearDown(self): 1450# self.evt.wait() 1451# 1452# def testBasic(self): 1453# # connects 1454# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1455# ftp.close() 1456# 1457# def testTimeoutNone(self): 1458# # global default timeout is ignored 1459# import socket 1460# self.assertIsNone(socket.getdefaulttimeout()) 1461# socket.setdefaulttimeout(30) 1462# try: 1463# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1464# finally: 1465# socket.setdefaulttimeout(None) 1466# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1467# ftp.close() 1468# 1469# def testTimeoutDefault(self): 1470# # global default timeout is used 1471# import socket 1472# self.assertIsNone(socket.getdefaulttimeout()) 1473# socket.setdefaulttimeout(30) 1474# try: 1475# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1476# finally: 1477# socket.setdefaulttimeout(None) 1478# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1479# ftp.close() 1480# 1481# def testTimeoutValue(self): 1482# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [], 1483# timeout=30) 1484# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1485# ftp.close() 1486 1487 1488class RequestTests(unittest.TestCase): 1489 """Unit tests for urllib.request.Request.""" 1490 1491 def test_default_values(self): 1492 Request = urllib.request.Request 1493 request = Request("http://www.python.org") 1494 self.assertEqual(request.get_method(), 'GET') 1495 request = Request("http://www.python.org", {}) 1496 self.assertEqual(request.get_method(), 'POST') 1497 1498 def test_with_method_arg(self): 1499 Request = urllib.request.Request 1500 request = Request("http://www.python.org", method='HEAD') 1501 self.assertEqual(request.method, 'HEAD') 1502 self.assertEqual(request.get_method(), 'HEAD') 1503 request = Request("http://www.python.org", {}, method='HEAD') 1504 self.assertEqual(request.method, 'HEAD') 1505 self.assertEqual(request.get_method(), 'HEAD') 1506 request = Request("http://www.python.org", method='GET') 1507 self.assertEqual(request.get_method(), 'GET') 1508 request.method = 'HEAD' 1509 self.assertEqual(request.get_method(), 'HEAD') 1510 1511 1512class URL2PathNameTests(unittest.TestCase): 1513 1514 def test_converting_drive_letter(self): 1515 self.assertEqual(url2pathname("///C|"), 'C:') 1516 self.assertEqual(url2pathname("///C:"), 'C:') 1517 self.assertEqual(url2pathname("///C|/"), 'C:\\') 1518 1519 def test_converting_when_no_drive_letter(self): 1520 # cannot end a raw string in \ 1521 self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\') 1522 self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\') 1523 1524 def test_simple_compare(self): 1525 self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"), 1526 r'C:\foo\bar\spam.foo') 1527 1528 def test_non_ascii_drive_letter(self): 1529 self.assertRaises(IOError, url2pathname, "///\u00e8|/") 1530 1531 def test_roundtrip_url2pathname(self): 1532 list_of_paths = ['C:', 1533 r'\\\C\test\\', 1534 r'C:\foo\bar\spam.foo' 1535 ] 1536 for path in list_of_paths: 1537 self.assertEqual(url2pathname(pathname2url(path)), path) 1538 1539class PathName2URLTests(unittest.TestCase): 1540 1541 def test_converting_drive_letter(self): 1542 self.assertEqual(pathname2url("C:"), '///C:') 1543 self.assertEqual(pathname2url("C:\\"), '///C:') 1544 1545 def test_converting_when_no_drive_letter(self): 1546 self.assertEqual(pathname2url(r"\\\folder\test" "\\"), 1547 '/////folder/test/') 1548 self.assertEqual(pathname2url(r"\\folder\test" "\\"), 1549 '////folder/test/') 1550 self.assertEqual(pathname2url(r"\folder\test" "\\"), 1551 '/folder/test/') 1552 1553 def test_simple_compare(self): 1554 self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'), 1555 "///C:/foo/bar/spam.foo" ) 1556 1557 def test_long_drive_letter(self): 1558 self.assertRaises(IOError, pathname2url, "XX:\\") 1559 1560 def test_roundtrip_pathname2url(self): 1561 list_of_paths = ['///C:', 1562 '/////folder/test/', 1563 '///C:/foo/bar/spam.foo'] 1564 for path in list_of_paths: 1565 self.assertEqual(pathname2url(url2pathname(path)), path) 1566 1567if __name__ == '__main__': 1568 unittest.main() 1569