1"""Regression tests for urllib""" 2 3import collections 4import urllib 5import httplib 6import io 7import unittest 8import os 9import sys 10import mimetools 11import tempfile 12 13from test import test_support 14from base64 import b64encode 15 16 17def hexescape(char): 18 """Escape char as RFC 2396 specifies""" 19 hex_repr = hex(ord(char))[2:].upper() 20 if len(hex_repr) == 1: 21 hex_repr = "0%s" % hex_repr 22 return "%" + hex_repr 23 24 25def fakehttp(fakedata): 26 class FakeSocket(io.BytesIO): 27 28 def sendall(self, data): 29 FakeHTTPConnection.buf = data 30 31 def makefile(self, *args, **kwds): 32 return self 33 34 def read(self, amt=None): 35 if self.closed: 36 return b"" 37 return io.BytesIO.read(self, amt) 38 39 def readline(self, length=None): 40 if self.closed: 41 return b"" 42 return io.BytesIO.readline(self, length) 43 44 class FakeHTTPConnection(httplib.HTTPConnection): 45 46 # buffer to store data for verification in urlopen tests. 47 buf = "" 48 49 def connect(self): 50 self.sock = FakeSocket(self.fakedata) 51 self.__class__.fakesock = self.sock 52 FakeHTTPConnection.fakedata = fakedata 53 54 return FakeHTTPConnection 55 56 57class FakeHTTPMixin(object): 58 def fakehttp(self, fakedata): 59 assert httplib.HTTP._connection_class == httplib.HTTPConnection 60 61 httplib.HTTP._connection_class = fakehttp(fakedata) 62 63 def unfakehttp(self): 64 httplib.HTTP._connection_class = httplib.HTTPConnection 65 66 67class urlopen_FileTests(unittest.TestCase): 68 """Test urlopen() opening a temporary file. 69 70 Try to test as much functionality as possible so as to cut down on reliance 71 on connecting to the Net for testing. 72 73 """ 74 75 def setUp(self): 76 """Setup of a temp file to use for testing""" 77 self.text = "test_urllib: %s\n" % self.__class__.__name__ 78 FILE = file(test_support.TESTFN, 'wb') 79 try: 80 FILE.write(self.text) 81 finally: 82 FILE.close() 83 self.pathname = test_support.TESTFN 84 self.returned_obj = urllib.urlopen("file:%s" % self.pathname) 85 86 def tearDown(self): 87 """Shut down the open object""" 88 self.returned_obj.close() 89 os.remove(test_support.TESTFN) 90 91 def test_interface(self): 92 # Make sure object returned by urlopen() has the specified methods 93 for attr in ("read", "readline", "readlines", "fileno", 94 "close", "info", "geturl", "getcode", "__iter__"): 95 self.assertTrue(hasattr(self.returned_obj, attr), 96 "object returned by urlopen() lacks %s attribute" % 97 attr) 98 99 def test_read(self): 100 self.assertEqual(self.text, self.returned_obj.read()) 101 102 def test_readline(self): 103 self.assertEqual(self.text, self.returned_obj.readline()) 104 self.assertEqual('', self.returned_obj.readline(), 105 "calling readline() after exhausting the file did not" 106 " return an empty string") 107 108 def test_readlines(self): 109 lines_list = self.returned_obj.readlines() 110 self.assertEqual(len(lines_list), 1, 111 "readlines() returned the wrong number of lines") 112 self.assertEqual(lines_list[0], self.text, 113 "readlines() returned improper text") 114 115 def test_fileno(self): 116 file_num = self.returned_obj.fileno() 117 self.assertIsInstance(file_num, int, "fileno() did not return an int") 118 self.assertEqual(os.read(file_num, len(self.text)), self.text, 119 "Reading on the file descriptor returned by fileno() " 120 "did not return the expected text") 121 122 def test_close(self): 123 # Test close() by calling it hear and then having it be called again 124 # by the tearDown() method for the test 125 self.returned_obj.close() 126 127 def test_info(self): 128 self.assertIsInstance(self.returned_obj.info(), mimetools.Message) 129 130 def test_geturl(self): 131 self.assertEqual(self.returned_obj.geturl(), self.pathname) 132 133 def test_getcode(self): 134 self.assertEqual(self.returned_obj.getcode(), None) 135 136 def test_iter(self): 137 # Test iterator 138 # Don't need to count number of iterations since test would fail the 139 # instant it returned anything beyond the first line from the 140 # comparison 141 for line in self.returned_obj.__iter__(): 142 self.assertEqual(line, self.text) 143 144 def test_relativelocalfile(self): 145 self.assertRaises(ValueError,urllib.urlopen,'./' + self.pathname) 146 147class ProxyTests(unittest.TestCase): 148 149 def setUp(self): 150 # Records changes to env vars 151 self.env = test_support.EnvironmentVarGuard() 152 # Delete all proxy related env vars 153 for k in os.environ.keys(): 154 if 'proxy' in k.lower(): 155 self.env.unset(k) 156 157 def tearDown(self): 158 # Restore all proxy related env vars 159 self.env.__exit__() 160 del self.env 161 162 def test_getproxies_environment_keep_no_proxies(self): 163 self.env.set('NO_PROXY', 'localhost') 164 proxies = urllib.getproxies_environment() 165 # getproxies_environment use lowered case truncated (no '_proxy') keys 166 self.assertEqual('localhost', proxies['no']) 167 # List of no_proxies with space. 168 self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') 169 self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com')) 170 self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888')) 171 self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234')) 172 173 def test_proxy_cgi_ignore(self): 174 try: 175 self.env.set('HTTP_PROXY', 'http://somewhere:3128') 176 proxies = urllib.getproxies_environment() 177 self.assertEqual('http://somewhere:3128', proxies['http']) 178 self.env.set('REQUEST_METHOD', 'GET') 179 proxies = urllib.getproxies_environment() 180 self.assertNotIn('http', proxies) 181 finally: 182 self.env.unset('REQUEST_METHOD') 183 self.env.unset('HTTP_PROXY') 184 185 def test_proxy_bypass_environment_host_match(self): 186 bypass = urllib.proxy_bypass_environment 187 self.env.set('NO_PROXY', 188 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t') 189 self.assertTrue(bypass('localhost')) 190 self.assertTrue(bypass('LocalHost')) # MixedCase 191 self.assertTrue(bypass('LOCALHOST')) # UPPERCASE 192 self.assertTrue(bypass('newdomain.com:1234')) 193 self.assertTrue(bypass('foo.d.o.t')) # issue 29142 194 self.assertTrue(bypass('anotherdomain.com:8888')) 195 self.assertTrue(bypass('www.newdomain.com:1234')) 196 self.assertFalse(bypass('prelocalhost')) 197 self.assertFalse(bypass('newdomain.com')) # no port 198 self.assertFalse(bypass('newdomain.com:1235')) # wrong port 199 200class ProxyTests_withOrderedEnv(unittest.TestCase): 201 202 def setUp(self): 203 # We need to test conditions, where variable order _is_ significant 204 self._saved_env = os.environ 205 # Monkey patch os.environ, start with empty fake environment 206 os.environ = collections.OrderedDict() 207 208 def tearDown(self): 209 os.environ = self._saved_env 210 211 def test_getproxies_environment_prefer_lowercase(self): 212 # Test lowercase preference with removal 213 os.environ['no_proxy'] = '' 214 os.environ['No_Proxy'] = 'localhost' 215 self.assertFalse(urllib.proxy_bypass_environment('localhost')) 216 self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) 217 os.environ['http_proxy'] = '' 218 os.environ['HTTP_PROXY'] = 'http://somewhere:3128' 219 proxies = urllib.getproxies_environment() 220 self.assertEqual({}, proxies) 221 # Test lowercase preference of proxy bypass and correct matching including ports 222 os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' 223 os.environ['No_Proxy'] = 'xyz.com' 224 self.assertTrue(urllib.proxy_bypass_environment('localhost')) 225 self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678')) 226 self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234')) 227 self.assertFalse(urllib.proxy_bypass_environment('my.proxy')) 228 self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) 229 # Test lowercase preference with replacement 230 os.environ['http_proxy'] = 'http://somewhere:3128' 231 os.environ['Http_Proxy'] = 'http://somewhereelse:3128' 232 proxies = urllib.getproxies_environment() 233 self.assertEqual('http://somewhere:3128', proxies['http']) 234 235 236class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): 237 """Test urlopen() opening a fake http connection.""" 238 239 def test_read(self): 240 self.fakehttp('Hello!') 241 try: 242 fp = urllib.urlopen("http://python.org/") 243 self.assertEqual(fp.readline(), 'Hello!') 244 self.assertEqual(fp.readline(), '') 245 self.assertEqual(fp.geturl(), 'http://python.org/') 246 self.assertEqual(fp.getcode(), 200) 247 finally: 248 self.unfakehttp() 249 250 def test_url_fragment(self): 251 # Issue #11703: geturl() omits fragments in the original URL. 252 url = 'http://docs.python.org/library/urllib.html#OK' 253 self.fakehttp('Hello!') 254 try: 255 fp = urllib.urlopen(url) 256 self.assertEqual(fp.geturl(), url) 257 finally: 258 self.unfakehttp() 259 260 def test_read_bogus(self): 261 # urlopen() should raise IOError for many error codes. 262 self.fakehttp('''HTTP/1.1 401 Authentication Required 263Date: Wed, 02 Jan 2008 03:03:54 GMT 264Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 265Connection: close 266Content-Type: text/html; charset=iso-8859-1 267''') 268 try: 269 self.assertRaises(IOError, urllib.urlopen, "http://python.org/") 270 finally: 271 self.unfakehttp() 272 273 def test_invalid_redirect(self): 274 # urlopen() should raise IOError for many error codes. 275 self.fakehttp("""HTTP/1.1 302 Found 276Date: Wed, 02 Jan 2008 03:03:54 GMT 277Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 278Location: file:README 279Connection: close 280Content-Type: text/html; charset=iso-8859-1 281""") 282 try: 283 msg = "Redirection to url 'file:" 284 with self.assertRaisesRegexp(IOError, msg): 285 urllib.urlopen("http://python.org/") 286 finally: 287 self.unfakehttp() 288 289 def test_redirect_limit_independent(self): 290 # Ticket #12923: make sure independent requests each use their 291 # own retry limit. 292 for i in range(urllib.FancyURLopener().maxtries): 293 self.fakehttp(b'''HTTP/1.1 302 Found 294Location: file://guidocomputer.athome.com:/python/license 295Connection: close 296''') 297 try: 298 self.assertRaises(IOError, urllib.urlopen, 299 "http://something") 300 finally: 301 self.unfakehttp() 302 303 def test_empty_socket(self): 304 # urlopen() raises IOError if the underlying socket does not send any 305 # data. (#1680230) 306 self.fakehttp('') 307 try: 308 self.assertRaises(IOError, urllib.urlopen, 'http://something') 309 finally: 310 self.unfakehttp() 311 312 def test_missing_localfile(self): 313 self.assertRaises(IOError, urllib.urlopen, 314 'file://localhost/a/missing/file.py') 315 fd, tmp_file = tempfile.mkstemp() 316 tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') 317 self.assertTrue(os.path.exists(tmp_file)) 318 try: 319 fp = urllib.urlopen(tmp_fileurl) 320 fp.close() 321 finally: 322 os.close(fd) 323 os.unlink(tmp_file) 324 325 self.assertFalse(os.path.exists(tmp_file)) 326 self.assertRaises(IOError, urllib.urlopen, tmp_fileurl) 327 328 def test_ftp_nonexisting(self): 329 self.assertRaises(IOError, urllib.urlopen, 330 'ftp://localhost/not/existing/file.py') 331 332 333 def test_userpass_inurl(self): 334 self.fakehttp('Hello!') 335 try: 336 fakehttp_wrapper = httplib.HTTP._connection_class 337 fp = urllib.urlopen("http://user:pass@python.org/") 338 authorization = ("Authorization: Basic %s\r\n" % 339 b64encode('user:pass')) 340 # The authorization header must be in place 341 self.assertIn(authorization, fakehttp_wrapper.buf) 342 self.assertEqual(fp.readline(), "Hello!") 343 self.assertEqual(fp.readline(), "") 344 self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') 345 self.assertEqual(fp.getcode(), 200) 346 finally: 347 self.unfakehttp() 348 349 def test_userpass_with_spaces_inurl(self): 350 self.fakehttp('Hello!') 351 try: 352 url = "http://a b:c d@python.org/" 353 fakehttp_wrapper = httplib.HTTP._connection_class 354 authorization = ("Authorization: Basic %s\r\n" % 355 b64encode('a b:c d')) 356 fp = urllib.urlopen(url) 357 # The authorization header must be in place 358 self.assertIn(authorization, fakehttp_wrapper.buf) 359 self.assertEqual(fp.readline(), "Hello!") 360 self.assertEqual(fp.readline(), "") 361 # the spaces are quoted in URL so no match 362 self.assertNotEqual(fp.geturl(), url) 363 self.assertEqual(fp.getcode(), 200) 364 finally: 365 self.unfakehttp() 366 367 368class urlretrieve_FileTests(unittest.TestCase): 369 """Test urllib.urlretrieve() on local files""" 370 371 def setUp(self): 372 # Create a list of temporary files. Each item in the list is a file 373 # name (absolute path or relative to the current working directory). 374 # All files in this list will be deleted in the tearDown method. Note, 375 # this only helps to makes sure temporary files get deleted, but it 376 # does nothing about trying to close files that may still be open. It 377 # is the responsibility of the developer to properly close files even 378 # when exceptional conditions occur. 379 self.tempFiles = [] 380 381 # Create a temporary file. 382 self.registerFileForCleanUp(test_support.TESTFN) 383 self.text = 'testing urllib.urlretrieve' 384 try: 385 FILE = file(test_support.TESTFN, 'wb') 386 FILE.write(self.text) 387 FILE.close() 388 finally: 389 try: FILE.close() 390 except: pass 391 392 def tearDown(self): 393 # Delete the temporary files. 394 for each in self.tempFiles: 395 try: os.remove(each) 396 except: pass 397 398 def constructLocalFileUrl(self, filePath): 399 return "file://%s" % urllib.pathname2url(os.path.abspath(filePath)) 400 401 def createNewTempFile(self, data=""): 402 """Creates a new temporary file containing the specified data, 403 registers the file for deletion during the test fixture tear down, and 404 returns the absolute path of the file.""" 405 406 newFd, newFilePath = tempfile.mkstemp() 407 try: 408 self.registerFileForCleanUp(newFilePath) 409 newFile = os.fdopen(newFd, "wb") 410 newFile.write(data) 411 newFile.close() 412 finally: 413 try: newFile.close() 414 except: pass 415 return newFilePath 416 417 def registerFileForCleanUp(self, fileName): 418 self.tempFiles.append(fileName) 419 420 def test_basic(self): 421 # Make sure that a local file just gets its own location returned and 422 # a headers value is returned. 423 result = urllib.urlretrieve("file:%s" % test_support.TESTFN) 424 self.assertEqual(result[0], test_support.TESTFN) 425 self.assertIsInstance(result[1], mimetools.Message, 426 "did not get a mimetools.Message instance as " 427 "second returned value") 428 429 def test_copy(self): 430 # Test that setting the filename argument works. 431 second_temp = "%s.2" % test_support.TESTFN 432 self.registerFileForCleanUp(second_temp) 433 result = urllib.urlretrieve(self.constructLocalFileUrl( 434 test_support.TESTFN), second_temp) 435 self.assertEqual(second_temp, result[0]) 436 self.assertTrue(os.path.exists(second_temp), "copy of the file was not " 437 "made") 438 FILE = file(second_temp, 'rb') 439 try: 440 text = FILE.read() 441 FILE.close() 442 finally: 443 try: FILE.close() 444 except: pass 445 self.assertEqual(self.text, text) 446 447 def test_reporthook(self): 448 # Make sure that the reporthook works. 449 def hooktester(count, block_size, total_size, count_holder=[0]): 450 self.assertIsInstance(count, int) 451 self.assertIsInstance(block_size, int) 452 self.assertIsInstance(total_size, int) 453 self.assertEqual(count, count_holder[0]) 454 count_holder[0] = count_holder[0] + 1 455 second_temp = "%s.2" % test_support.TESTFN 456 self.registerFileForCleanUp(second_temp) 457 urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN), 458 second_temp, hooktester) 459 460 def test_reporthook_0_bytes(self): 461 # Test on zero length file. Should call reporthook only 1 time. 462 report = [] 463 def hooktester(count, block_size, total_size, _report=report): 464 _report.append((count, block_size, total_size)) 465 srcFileName = self.createNewTempFile() 466 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), 467 test_support.TESTFN, hooktester) 468 self.assertEqual(len(report), 1) 469 self.assertEqual(report[0][2], 0) 470 471 def test_reporthook_5_bytes(self): 472 # Test on 5 byte file. Should call reporthook only 2 times (once when 473 # the "network connection" is established and once when the block is 474 # read). Since the block size is 8192 bytes, only one block read is 475 # required to read the entire file. 476 report = [] 477 def hooktester(count, block_size, total_size, _report=report): 478 _report.append((count, block_size, total_size)) 479 srcFileName = self.createNewTempFile("x" * 5) 480 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), 481 test_support.TESTFN, hooktester) 482 self.assertEqual(len(report), 2) 483 self.assertEqual(report[0][1], 8192) 484 self.assertEqual(report[0][2], 5) 485 486 def test_reporthook_8193_bytes(self): 487 # Test on 8193 byte file. Should call reporthook only 3 times (once 488 # when the "network connection" is established, once for the next 8192 489 # bytes, and once for the last byte). 490 report = [] 491 def hooktester(count, block_size, total_size, _report=report): 492 _report.append((count, block_size, total_size)) 493 srcFileName = self.createNewTempFile("x" * 8193) 494 urllib.urlretrieve(self.constructLocalFileUrl(srcFileName), 495 test_support.TESTFN, hooktester) 496 self.assertEqual(len(report), 3) 497 self.assertEqual(report[0][1], 8192) 498 self.assertEqual(report[0][2], 8193) 499 500 501class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin): 502 """Test urllib.urlretrieve() using fake http connections""" 503 504 def test_short_content_raises_ContentTooShortError(self): 505 self.fakehttp('''HTTP/1.1 200 OK 506Date: Wed, 02 Jan 2008 03:03:54 GMT 507Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 508Connection: close 509Content-Length: 100 510Content-Type: text/html; charset=iso-8859-1 511 512FF 513''') 514 515 def _reporthook(par1, par2, par3): 516 pass 517 518 try: 519 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 520 'http://example.com', reporthook=_reporthook) 521 finally: 522 self.unfakehttp() 523 524 def test_short_content_raises_ContentTooShortError_without_reporthook(self): 525 self.fakehttp('''HTTP/1.1 200 OK 526Date: Wed, 02 Jan 2008 03:03:54 GMT 527Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e 528Connection: close 529Content-Length: 100 530Content-Type: text/html; charset=iso-8859-1 531 532FF 533''') 534 try: 535 self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/') 536 finally: 537 self.unfakehttp() 538 539class QuotingTests(unittest.TestCase): 540 """Tests for urllib.quote() and urllib.quote_plus() 541 542 According to RFC 2396 ("Uniform Resource Identifiers), to escape a 543 character you write it as '%' + <2 character US-ASCII hex value>. The Python 544 code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly. 545 Case does not matter on the hex letters. 546 547 The various character sets specified are: 548 549 Reserved characters : ";/?:@&=+$," 550 Have special meaning in URIs and must be escaped if not being used for 551 their special meaning 552 Data characters : letters, digits, and "-_.!~*'()" 553 Unreserved and do not need to be escaped; can be, though, if desired 554 Control characters : 0x00 - 0x1F, 0x7F 555 Have no use in URIs so must be escaped 556 space : 0x20 557 Must be escaped 558 Delimiters : '<>#%"' 559 Must be escaped 560 Unwise : "{}|\^[]`" 561 Must be escaped 562 563 """ 564 565 def test_never_quote(self): 566 # Make sure quote() does not quote letters, digits, and "_,.-" 567 do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ", 568 "abcdefghijklmnopqrstuvwxyz", 569 "0123456789", 570 "_.-"]) 571 result = urllib.quote(do_not_quote) 572 self.assertEqual(do_not_quote, result, 573 "using quote(): %s != %s" % (do_not_quote, result)) 574 result = urllib.quote_plus(do_not_quote) 575 self.assertEqual(do_not_quote, result, 576 "using quote_plus(): %s != %s" % (do_not_quote, result)) 577 578 def test_default_safe(self): 579 # Test '/' is default value for 'safe' parameter 580 self.assertEqual(urllib.quote.func_defaults[0], '/') 581 582 def test_safe(self): 583 # Test setting 'safe' parameter does what it should do 584 quote_by_default = "<>" 585 result = urllib.quote(quote_by_default, safe=quote_by_default) 586 self.assertEqual(quote_by_default, result, 587 "using quote(): %s != %s" % (quote_by_default, result)) 588 result = urllib.quote_plus(quote_by_default, safe=quote_by_default) 589 self.assertEqual(quote_by_default, result, 590 "using quote_plus(): %s != %s" % 591 (quote_by_default, result)) 592 593 def test_default_quoting(self): 594 # Make sure all characters that should be quoted are by default sans 595 # space (separate test for that). 596 should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F 597 should_quote.append('<>#%"{}|\^[]`') 598 should_quote.append(chr(127)) # For 0x7F 599 should_quote = ''.join(should_quote) 600 for char in should_quote: 601 result = urllib.quote(char) 602 self.assertEqual(hexescape(char), result, 603 "using quote(): %s should be escaped to %s, not %s" % 604 (char, hexescape(char), result)) 605 result = urllib.quote_plus(char) 606 self.assertEqual(hexescape(char), result, 607 "using quote_plus(): " 608 "%s should be escapes to %s, not %s" % 609 (char, hexescape(char), result)) 610 del should_quote 611 partial_quote = "ab[]cd" 612 expected = "ab%5B%5Dcd" 613 result = urllib.quote(partial_quote) 614 self.assertEqual(expected, result, 615 "using quote(): %s != %s" % (expected, result)) 616 result = urllib.quote_plus(partial_quote) 617 self.assertEqual(expected, result, 618 "using quote_plus(): %s != %s" % (expected, result)) 619 self.assertRaises(TypeError, urllib.quote, None) 620 621 def test_quoting_space(self): 622 # Make sure quote() and quote_plus() handle spaces as specified in 623 # their unique way 624 result = urllib.quote(' ') 625 self.assertEqual(result, hexescape(' '), 626 "using quote(): %s != %s" % (result, hexescape(' '))) 627 result = urllib.quote_plus(' ') 628 self.assertEqual(result, '+', 629 "using quote_plus(): %s != +" % result) 630 given = "a b cd e f" 631 expect = given.replace(' ', hexescape(' ')) 632 result = urllib.quote(given) 633 self.assertEqual(expect, result, 634 "using quote(): %s != %s" % (expect, result)) 635 expect = given.replace(' ', '+') 636 result = urllib.quote_plus(given) 637 self.assertEqual(expect, result, 638 "using quote_plus(): %s != %s" % (expect, result)) 639 640 def test_quoting_plus(self): 641 self.assertEqual(urllib.quote_plus('alpha+beta gamma'), 642 'alpha%2Bbeta+gamma') 643 self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'), 644 'alpha+beta+gamma') 645 646class UnquotingTests(unittest.TestCase): 647 """Tests for unquote() and unquote_plus() 648 649 See the doc string for quoting_Tests for details on quoting and such. 650 651 """ 652 653 def test_unquoting(self): 654 # Make sure unquoting of all ASCII values works 655 escape_list = [] 656 for num in range(128): 657 given = hexescape(chr(num)) 658 expect = chr(num) 659 result = urllib.unquote(given) 660 self.assertEqual(expect, result, 661 "using unquote(): %s != %s" % (expect, result)) 662 result = urllib.unquote_plus(given) 663 self.assertEqual(expect, result, 664 "using unquote_plus(): %s != %s" % 665 (expect, result)) 666 escape_list.append(given) 667 escape_string = ''.join(escape_list) 668 del escape_list 669 result = urllib.unquote(escape_string) 670 self.assertEqual(result.count('%'), 1, 671 "using quote(): not all characters escaped; %s" % 672 result) 673 result = urllib.unquote(escape_string) 674 self.assertEqual(result.count('%'), 1, 675 "using unquote(): not all characters escaped: " 676 "%s" % result) 677 678 def test_unquoting_badpercent(self): 679 # Test unquoting on bad percent-escapes 680 given = '%xab' 681 expect = given 682 result = urllib.unquote(given) 683 self.assertEqual(expect, result, "using unquote(): %r != %r" 684 % (expect, result)) 685 given = '%x' 686 expect = given 687 result = urllib.unquote(given) 688 self.assertEqual(expect, result, "using unquote(): %r != %r" 689 % (expect, result)) 690 given = '%' 691 expect = given 692 result = urllib.unquote(given) 693 self.assertEqual(expect, result, "using unquote(): %r != %r" 694 % (expect, result)) 695 696 def test_unquoting_mixed_case(self): 697 # Test unquoting on mixed-case hex digits in the percent-escapes 698 given = '%Ab%eA' 699 expect = '\xab\xea' 700 result = urllib.unquote(given) 701 self.assertEqual(expect, result, "using unquote(): %r != %r" 702 % (expect, result)) 703 704 def test_unquoting_parts(self): 705 # Make sure unquoting works when have non-quoted characters 706 # interspersed 707 given = 'ab%sd' % hexescape('c') 708 expect = "abcd" 709 result = urllib.unquote(given) 710 self.assertEqual(expect, result, 711 "using quote(): %s != %s" % (expect, result)) 712 result = urllib.unquote_plus(given) 713 self.assertEqual(expect, result, 714 "using unquote_plus(): %s != %s" % (expect, result)) 715 716 def test_unquoting_plus(self): 717 # Test difference between unquote() and unquote_plus() 718 given = "are+there+spaces..." 719 expect = given 720 result = urllib.unquote(given) 721 self.assertEqual(expect, result, 722 "using unquote(): %s != %s" % (expect, result)) 723 expect = given.replace('+', ' ') 724 result = urllib.unquote_plus(given) 725 self.assertEqual(expect, result, 726 "using unquote_plus(): %s != %s" % (expect, result)) 727 728 def test_unquote_with_unicode(self): 729 r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc') 730 self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc') 731 732class urlencode_Tests(unittest.TestCase): 733 """Tests for urlencode()""" 734 735 def help_inputtype(self, given, test_type): 736 """Helper method for testing different input types. 737 738 'given' must lead to only the pairs: 739 * 1st, 1 740 * 2nd, 2 741 * 3rd, 3 742 743 Test cannot assume anything about order. Docs make no guarantee and 744 have possible dictionary input. 745 746 """ 747 expect_somewhere = ["1st=1", "2nd=2", "3rd=3"] 748 result = urllib.urlencode(given) 749 for expected in expect_somewhere: 750 self.assertIn(expected, result, 751 "testing %s: %s not found in %s" % 752 (test_type, expected, result)) 753 self.assertEqual(result.count('&'), 2, 754 "testing %s: expected 2 '&'s; got %s" % 755 (test_type, result.count('&'))) 756 amp_location = result.index('&') 757 on_amp_left = result[amp_location - 1] 758 on_amp_right = result[amp_location + 1] 759 self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(), 760 "testing %s: '&' not located in proper place in %s" % 761 (test_type, result)) 762 self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps 763 "testing %s: " 764 "unexpected number of characters: %s != %s" % 765 (test_type, len(result), (5 * 3) + 2)) 766 767 def test_using_mapping(self): 768 # Test passing in a mapping object as an argument. 769 self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'}, 770 "using dict as input type") 771 772 def test_using_sequence(self): 773 # Test passing in a sequence of two-item sequences as an argument. 774 self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')], 775 "using sequence of two-item tuples as input") 776 777 def test_quoting(self): 778 # Make sure keys and values are quoted using quote_plus() 779 given = {"&":"="} 780 expect = "%s=%s" % (hexescape('&'), hexescape('=')) 781 result = urllib.urlencode(given) 782 self.assertEqual(expect, result) 783 given = {"key name":"A bunch of pluses"} 784 expect = "key+name=A+bunch+of+pluses" 785 result = urllib.urlencode(given) 786 self.assertEqual(expect, result) 787 788 def test_doseq(self): 789 # Test that passing True for 'doseq' parameter works correctly 790 given = {'sequence':['1', '2', '3']} 791 expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3'])) 792 result = urllib.urlencode(given) 793 self.assertEqual(expect, result) 794 result = urllib.urlencode(given, True) 795 for value in given["sequence"]: 796 expect = "sequence=%s" % value 797 self.assertIn(expect, result) 798 self.assertEqual(result.count('&'), 2, 799 "Expected 2 '&'s, got %s" % result.count('&')) 800 801class Pathname_Tests(unittest.TestCase): 802 """Test pathname2url() and url2pathname()""" 803 804 def test_basic(self): 805 # Make sure simple tests pass 806 expected_path = os.path.join("parts", "of", "a", "path") 807 expected_url = "parts/of/a/path" 808 result = urllib.pathname2url(expected_path) 809 self.assertEqual(expected_url, result, 810 "pathname2url() failed; %s != %s" % 811 (result, expected_url)) 812 result = urllib.url2pathname(expected_url) 813 self.assertEqual(expected_path, result, 814 "url2pathame() failed; %s != %s" % 815 (result, expected_path)) 816 817 def test_quoting(self): 818 # Test automatic quoting and unquoting works for pathnam2url() and 819 # url2pathname() respectively 820 given = os.path.join("needs", "quot=ing", "here") 821 expect = "needs/%s/here" % urllib.quote("quot=ing") 822 result = urllib.pathname2url(given) 823 self.assertEqual(expect, result, 824 "pathname2url() failed; %s != %s" % 825 (expect, result)) 826 expect = given 827 result = urllib.url2pathname(result) 828 self.assertEqual(expect, result, 829 "url2pathname() failed; %s != %s" % 830 (expect, result)) 831 given = os.path.join("make sure", "using_quote") 832 expect = "%s/using_quote" % urllib.quote("make sure") 833 result = urllib.pathname2url(given) 834 self.assertEqual(expect, result, 835 "pathname2url() failed; %s != %s" % 836 (expect, result)) 837 given = "make+sure/using_unquote" 838 expect = os.path.join("make+sure", "using_unquote") 839 result = urllib.url2pathname(given) 840 self.assertEqual(expect, result, 841 "url2pathname() failed; %s != %s" % 842 (expect, result)) 843 844 @unittest.skipUnless(sys.platform == 'win32', 845 'test specific to the nturl2path library') 846 def test_ntpath(self): 847 given = ('/C:/', '///C:/', '/C|//') 848 expect = 'C:\\' 849 for url in given: 850 result = urllib.url2pathname(url) 851 self.assertEqual(expect, result, 852 'nturl2path.url2pathname() failed; %s != %s' % 853 (expect, result)) 854 given = '///C|/path' 855 expect = 'C:\\path' 856 result = urllib.url2pathname(given) 857 self.assertEqual(expect, result, 858 'nturl2path.url2pathname() failed; %s != %s' % 859 (expect, result)) 860 861class Utility_Tests(unittest.TestCase): 862 """Testcase to test the various utility functions in the urllib.""" 863 # In Python 3 this test class is moved to test_urlparse. 864 865 def test_splittype(self): 866 splittype = urllib.splittype 867 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) 868 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) 869 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) 870 self.assertEqual(splittype('type:'), ('type', '')) 871 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) 872 873 def test_splithost(self): 874 splithost = urllib.splithost 875 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), 876 ('www.example.org:80', '/foo/bar/baz.html')) 877 self.assertEqual(splithost('//www.example.org:80'), 878 ('www.example.org:80', '')) 879 self.assertEqual(splithost('/foo/bar/baz.html'), 880 (None, '/foo/bar/baz.html')) 881 882 # bpo-30500: # starts a fragment. 883 self.assertEqual(splithost('//127.0.0.1#@host.com'), 884 ('127.0.0.1', '/#@host.com')) 885 self.assertEqual(splithost('//127.0.0.1#@host.com:80'), 886 ('127.0.0.1', '/#@host.com:80')) 887 self.assertEqual(splithost('//127.0.0.1:80#@host.com'), 888 ('127.0.0.1:80', '/#@host.com')) 889 890 # Empty host is returned as empty string. 891 self.assertEqual(splithost("///file"), 892 ('', '/file')) 893 894 # Trailing semicolon, question mark and hash symbol are kept. 895 self.assertEqual(splithost("//example.net/file;"), 896 ('example.net', '/file;')) 897 self.assertEqual(splithost("//example.net/file?"), 898 ('example.net', '/file?')) 899 self.assertEqual(splithost("//example.net/file#"), 900 ('example.net', '/file#')) 901 902 def test_splituser(self): 903 splituser = urllib.splituser 904 self.assertEqual(splituser('User:Pass@www.python.org:080'), 905 ('User:Pass', 'www.python.org:080')) 906 self.assertEqual(splituser('@www.python.org:080'), 907 ('', 'www.python.org:080')) 908 self.assertEqual(splituser('www.python.org:080'), 909 (None, 'www.python.org:080')) 910 self.assertEqual(splituser('User:Pass@'), 911 ('User:Pass', '')) 912 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), 913 ('User@example.com:Pass', 'www.python.org:080')) 914 915 def test_splitpasswd(self): 916 # Some of the password examples are not sensible, but it is added to 917 # confirming to RFC2617 and addressing issue4675. 918 splitpasswd = urllib.splitpasswd 919 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) 920 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) 921 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) 922 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) 923 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) 924 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) 925 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) 926 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) 927 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) 928 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) 929 self.assertEqual(splitpasswd('user:'), ('user', '')) 930 self.assertEqual(splitpasswd('user'), ('user', None)) 931 self.assertEqual(splitpasswd(':ab'), ('', 'ab')) 932 933 def test_splitport(self): 934 splitport = urllib.splitport 935 self.assertEqual(splitport('parrot:88'), ('parrot', '88')) 936 self.assertEqual(splitport('parrot'), ('parrot', None)) 937 self.assertEqual(splitport('parrot:'), ('parrot', None)) 938 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) 939 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) 940 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) 941 self.assertEqual(splitport('[::1]'), ('[::1]', None)) 942 self.assertEqual(splitport(':88'), ('', '88')) 943 944 def test_splitnport(self): 945 splitnport = urllib.splitnport 946 self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) 947 self.assertEqual(splitnport('parrot'), ('parrot', -1)) 948 self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) 949 self.assertEqual(splitnport('parrot:'), ('parrot', -1)) 950 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) 951 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) 952 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) 953 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) 954 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) 955 956 def test_splitquery(self): 957 # Normal cases are exercised by other tests; ensure that we also 958 # catch cases with no port specified (testcase ensuring coverage) 959 splitquery = urllib.splitquery 960 self.assertEqual(splitquery('http://python.org/fake?foo=bar'), 961 ('http://python.org/fake', 'foo=bar')) 962 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), 963 ('http://python.org/fake?foo=bar', '')) 964 self.assertEqual(splitquery('http://python.org/fake'), 965 ('http://python.org/fake', None)) 966 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) 967 968 def test_splittag(self): 969 splittag = urllib.splittag 970 self.assertEqual(splittag('http://example.com?foo=bar#baz'), 971 ('http://example.com?foo=bar', 'baz')) 972 self.assertEqual(splittag('http://example.com?foo=bar#'), 973 ('http://example.com?foo=bar', '')) 974 self.assertEqual(splittag('#baz'), ('', 'baz')) 975 self.assertEqual(splittag('http://example.com?foo=bar'), 976 ('http://example.com?foo=bar', None)) 977 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), 978 ('http://example.com?foo=bar#baz', 'boo')) 979 980 def test_splitattr(self): 981 splitattr = urllib.splitattr 982 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), 983 ('/path', ['attr1=value1', 'attr2=value2'])) 984 self.assertEqual(splitattr('/path;'), ('/path', [''])) 985 self.assertEqual(splitattr(';attr1=value1;attr2=value2'), 986 ('', ['attr1=value1', 'attr2=value2'])) 987 self.assertEqual(splitattr('/path'), ('/path', [])) 988 989 def test_splitvalue(self): 990 # Normal cases are exercised by other tests; test pathological cases 991 # with no key/value pairs. (testcase ensuring coverage) 992 splitvalue = urllib.splitvalue 993 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) 994 self.assertEqual(splitvalue('foo='), ('foo', '')) 995 self.assertEqual(splitvalue('=bar'), ('', 'bar')) 996 self.assertEqual(splitvalue('foobar'), ('foobar', None)) 997 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) 998 999 def test_toBytes(self): 1000 result = urllib.toBytes(u'http://www.python.org') 1001 self.assertEqual(result, 'http://www.python.org') 1002 self.assertRaises(UnicodeError, urllib.toBytes, 1003 test_support.u(r'http://www.python.org/medi\u00e6val')) 1004 1005 def test_unwrap(self): 1006 url = urllib.unwrap('<URL:type://host/path>') 1007 self.assertEqual(url, 'type://host/path') 1008 1009 1010class URLopener_Tests(unittest.TestCase): 1011 """Testcase to test the open method of URLopener class.""" 1012 1013 def test_quoted_open(self): 1014 class DummyURLopener(urllib.URLopener): 1015 def open_spam(self, url): 1016 return url 1017 1018 self.assertEqual(DummyURLopener().open( 1019 'spam://example/ /'),'//example/%20/') 1020 1021 # test the safe characters are not quoted by urlopen 1022 self.assertEqual(DummyURLopener().open( 1023 "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), 1024 "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") 1025 1026 1027# Just commented them out. 1028# Can't really tell why keep failing in windows and sparc. 1029# Everywhere else they work ok, but on those machines, sometimes 1030# fail in one of the tests, sometimes in other. I have a linux, and 1031# the tests go ok. 1032# If anybody has one of the problematic environments, please help! 1033# . Facundo 1034# 1035# def server(evt): 1036# import socket, time 1037# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1038# serv.settimeout(3) 1039# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 1040# serv.bind(("", 9093)) 1041# serv.listen(5) 1042# try: 1043# conn, addr = serv.accept() 1044# conn.send("1 Hola mundo\n") 1045# cantdata = 0 1046# while cantdata < 13: 1047# data = conn.recv(13-cantdata) 1048# cantdata += len(data) 1049# time.sleep(.3) 1050# conn.send("2 No more lines\n") 1051# conn.close() 1052# except socket.timeout: 1053# pass 1054# finally: 1055# serv.close() 1056# evt.set() 1057# 1058# class FTPWrapperTests(unittest.TestCase): 1059# 1060# def setUp(self): 1061# import ftplib, time, threading 1062# ftplib.FTP.port = 9093 1063# self.evt = threading.Event() 1064# threading.Thread(target=server, args=(self.evt,)).start() 1065# time.sleep(.1) 1066# 1067# def tearDown(self): 1068# self.evt.wait() 1069# 1070# def testBasic(self): 1071# # connects 1072# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1073# ftp.close() 1074# 1075# def testTimeoutNone(self): 1076# # global default timeout is ignored 1077# import socket 1078# self.assertIsNone(socket.getdefaulttimeout()) 1079# socket.setdefaulttimeout(30) 1080# try: 1081# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1082# finally: 1083# socket.setdefaulttimeout(None) 1084# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1085# ftp.close() 1086# 1087# def testTimeoutDefault(self): 1088# # global default timeout is used 1089# import socket 1090# self.assertIsNone(socket.getdefaulttimeout()) 1091# socket.setdefaulttimeout(30) 1092# try: 1093# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, []) 1094# finally: 1095# socket.setdefaulttimeout(None) 1096# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1097# ftp.close() 1098# 1099# def testTimeoutValue(self): 1100# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [], 1101# timeout=30) 1102# self.assertEqual(ftp.ftp.sock.gettimeout(), 30) 1103# ftp.close() 1104 1105 1106 1107def test_main(): 1108 import warnings 1109 with warnings.catch_warnings(): 1110 warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0", 1111 DeprecationWarning) 1112 test_support.run_unittest( 1113 urlopen_FileTests, 1114 urlopen_HttpTests, 1115 urlretrieve_FileTests, 1116 urlretrieve_HttpTests, 1117 ProxyTests, 1118 QuotingTests, 1119 UnquotingTests, 1120 urlencode_Tests, 1121 Pathname_Tests, 1122 Utility_Tests, 1123 URLopener_Tests, 1124 ProxyTests, 1125 ProxyTests_withOrderedEnv, 1126 #FTPWrapperTests, 1127 ) 1128 1129 1130 1131if __name__ == '__main__': 1132 test_main() 1133